{ "best_metric": 0.021544713242252534, "best_model_checkpoint": "./results-cc/code-t5/codet5_qlora_official_0.001/checkpoint-88308", "epoch": 6.0, "eval_steps": 500, "global_step": 88308, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003397200706617747, "grad_norm": 2.207773447036743, "learning_rate": 0.0009999575349911674, "loss": 9.2175, "step": 5 }, { "epoch": 0.0006794401413235494, "grad_norm": 0.8029590249061584, "learning_rate": 0.0009999150699823347, "loss": 6.2854, "step": 10 }, { "epoch": 0.0010191602119853241, "grad_norm": 1.3657054901123047, "learning_rate": 0.0009998726049735018, "loss": 5.4114, "step": 15 }, { "epoch": 0.001358880282647099, "grad_norm": 0.7428395748138428, "learning_rate": 0.0009998301399646691, "loss": 4.4378, "step": 20 }, { "epoch": 0.0016986003533088735, "grad_norm": 0.5788111090660095, "learning_rate": 0.0009997876749558365, "loss": 4.3423, "step": 25 }, { "epoch": 0.0020383204239706482, "grad_norm": 0.5947818160057068, "learning_rate": 0.0009997452099470036, "loss": 4.1356, "step": 30 }, { "epoch": 0.002378040494632423, "grad_norm": 0.7070780992507935, "learning_rate": 0.000999702744938171, "loss": 3.9679, "step": 35 }, { "epoch": 0.002717760565294198, "grad_norm": 1.4084078073501587, "learning_rate": 0.0009996602799293383, "loss": 4.2031, "step": 40 }, { "epoch": 0.0030574806359559724, "grad_norm": 0.570355236530304, "learning_rate": 0.0009996178149205056, "loss": 4.3601, "step": 45 }, { "epoch": 0.003397200706617747, "grad_norm": 9.142182350158691, "learning_rate": 0.0009995753499116727, "loss": 3.9385, "step": 50 }, { "epoch": 0.0037369207772795215, "grad_norm": 0.5780104398727417, "learning_rate": 0.00099953288490284, "loss": 4.0844, "step": 55 }, { "epoch": 0.0040766408479412965, "grad_norm": 1.0214359760284424, "learning_rate": 0.0009994904198940074, "loss": 3.9468, "step": 60 }, { "epoch": 0.0044163609186030715, "grad_norm": 0.4513009488582611, "learning_rate": 0.0009994479548851745, "loss": 4.0269, "step": 65 }, { "epoch": 0.004756080989264846, "grad_norm": 0.4692077040672302, "learning_rate": 0.000999405489876342, "loss": 4.2709, "step": 70 }, { "epoch": 0.005095801059926621, "grad_norm": 0.6555360555648804, "learning_rate": 0.0009993630248675092, "loss": 4.1327, "step": 75 }, { "epoch": 0.005435521130588396, "grad_norm": 1.6717242002487183, "learning_rate": 0.0009993205598586765, "loss": 3.8472, "step": 80 }, { "epoch": 0.00577524120125017, "grad_norm": 0.559617817401886, "learning_rate": 0.0009992780948498438, "loss": 3.9607, "step": 85 }, { "epoch": 0.006114961271911945, "grad_norm": 2.2418723106384277, "learning_rate": 0.000999235629841011, "loss": 4.0371, "step": 90 }, { "epoch": 0.006454681342573719, "grad_norm": 0.4393124282360077, "learning_rate": 0.0009991931648321783, "loss": 4.2313, "step": 95 }, { "epoch": 0.006794401413235494, "grad_norm": 0.7975866794586182, "learning_rate": 0.0009991506998233456, "loss": 4.1234, "step": 100 }, { "epoch": 0.007134121483897269, "grad_norm": 0.7241436243057251, "learning_rate": 0.000999108234814513, "loss": 3.655, "step": 105 }, { "epoch": 0.007473841554559043, "grad_norm": 0.45974311232566833, "learning_rate": 0.00099906576980568, "loss": 4.0754, "step": 110 }, { "epoch": 0.007813561625220818, "grad_norm": 0.42426973581314087, "learning_rate": 0.0009990233047968474, "loss": 4.0885, "step": 115 }, { "epoch": 0.008153281695882593, "grad_norm": 0.7785698771476746, "learning_rate": 0.0009989808397880147, "loss": 4.2543, "step": 120 }, { "epoch": 0.008493001766544368, "grad_norm": 1.1263967752456665, "learning_rate": 0.0009989383747791819, "loss": 4.0638, "step": 125 }, { "epoch": 0.008832721837206143, "grad_norm": 0.5730156302452087, "learning_rate": 0.0009988959097703492, "loss": 3.8552, "step": 130 }, { "epoch": 0.009172441907867916, "grad_norm": 0.5796052813529968, "learning_rate": 0.0009988534447615165, "loss": 3.8604, "step": 135 }, { "epoch": 0.009512161978529691, "grad_norm": 0.6022884249687195, "learning_rate": 0.0009988109797526839, "loss": 3.8499, "step": 140 }, { "epoch": 0.009851882049191466, "grad_norm": 0.8763172030448914, "learning_rate": 0.0009987685147438512, "loss": 3.8815, "step": 145 }, { "epoch": 0.010191602119853241, "grad_norm": 0.7262821197509766, "learning_rate": 0.0009987260497350183, "loss": 4.0481, "step": 150 }, { "epoch": 0.010531322190515016, "grad_norm": 1.2450162172317505, "learning_rate": 0.0009986835847261856, "loss": 3.8825, "step": 155 }, { "epoch": 0.010871042261176791, "grad_norm": 1.4473909139633179, "learning_rate": 0.000998641119717353, "loss": 3.9704, "step": 160 }, { "epoch": 0.011210762331838564, "grad_norm": 0.9646778106689453, "learning_rate": 0.00099859865470852, "loss": 4.0052, "step": 165 }, { "epoch": 0.01155048240250034, "grad_norm": 0.5259531140327454, "learning_rate": 0.0009985561896996874, "loss": 3.9015, "step": 170 }, { "epoch": 0.011890202473162114, "grad_norm": 0.5808979272842407, "learning_rate": 0.0009985137246908548, "loss": 3.9114, "step": 175 }, { "epoch": 0.01222992254382389, "grad_norm": 0.6354779005050659, "learning_rate": 0.000998471259682022, "loss": 3.9704, "step": 180 }, { "epoch": 0.012569642614485664, "grad_norm": 0.5465064644813538, "learning_rate": 0.0009984287946731894, "loss": 3.9919, "step": 185 }, { "epoch": 0.012909362685147438, "grad_norm": 0.7034687399864197, "learning_rate": 0.0009983863296643566, "loss": 3.89, "step": 190 }, { "epoch": 0.013249082755809213, "grad_norm": 0.5259936451911926, "learning_rate": 0.0009983438646555239, "loss": 4.1512, "step": 195 }, { "epoch": 0.013588802826470988, "grad_norm": 0.6321644186973572, "learning_rate": 0.0009983013996466912, "loss": 3.9967, "step": 200 }, { "epoch": 0.013928522897132763, "grad_norm": 0.4751060903072357, "learning_rate": 0.0009982589346378583, "loss": 4.1144, "step": 205 }, { "epoch": 0.014268242967794538, "grad_norm": 2.088681936264038, "learning_rate": 0.0009982164696290257, "loss": 3.8195, "step": 210 }, { "epoch": 0.014607963038456313, "grad_norm": 0.4030695855617523, "learning_rate": 0.000998174004620193, "loss": 3.8732, "step": 215 }, { "epoch": 0.014947683109118086, "grad_norm": 0.492465615272522, "learning_rate": 0.0009981315396113603, "loss": 4.0153, "step": 220 }, { "epoch": 0.015287403179779861, "grad_norm": 0.4808414876461029, "learning_rate": 0.0009980890746025275, "loss": 3.9177, "step": 225 }, { "epoch": 0.015627123250441636, "grad_norm": 0.4167867600917816, "learning_rate": 0.0009980466095936948, "loss": 3.8987, "step": 230 }, { "epoch": 0.01596684332110341, "grad_norm": 0.9164467453956604, "learning_rate": 0.0009980041445848621, "loss": 3.9862, "step": 235 }, { "epoch": 0.016306563391765186, "grad_norm": 0.6926063299179077, "learning_rate": 0.0009979616795760292, "loss": 3.8676, "step": 240 }, { "epoch": 0.01664628346242696, "grad_norm": 0.5375890135765076, "learning_rate": 0.0009979192145671968, "loss": 3.9015, "step": 245 }, { "epoch": 0.016986003533088736, "grad_norm": 0.5443463325500488, "learning_rate": 0.000997876749558364, "loss": 4.0356, "step": 250 }, { "epoch": 0.01732572360375051, "grad_norm": 0.6313775777816772, "learning_rate": 0.0009978342845495313, "loss": 3.8561, "step": 255 }, { "epoch": 0.017665443674412286, "grad_norm": 0.8847453594207764, "learning_rate": 0.0009977918195406986, "loss": 4.0132, "step": 260 }, { "epoch": 0.01800516374507406, "grad_norm": 0.48864248394966125, "learning_rate": 0.0009977493545318657, "loss": 4.0664, "step": 265 }, { "epoch": 0.018344883815735832, "grad_norm": 0.6228508949279785, "learning_rate": 0.000997706889523033, "loss": 3.8738, "step": 270 }, { "epoch": 0.01868460388639761, "grad_norm": 0.7404700517654419, "learning_rate": 0.0009976644245142004, "loss": 3.7775, "step": 275 }, { "epoch": 0.019024323957059382, "grad_norm": 0.47269967198371887, "learning_rate": 0.0009976219595053677, "loss": 3.8748, "step": 280 }, { "epoch": 0.01936404402772116, "grad_norm": 5.355066299438477, "learning_rate": 0.0009975794944965348, "loss": 3.9724, "step": 285 }, { "epoch": 0.019703764098382932, "grad_norm": 0.7765783667564392, "learning_rate": 0.0009975370294877022, "loss": 3.992, "step": 290 }, { "epoch": 0.020043484169044706, "grad_norm": 0.5294786691665649, "learning_rate": 0.0009974945644788695, "loss": 3.9407, "step": 295 }, { "epoch": 0.020383204239706482, "grad_norm": 0.5246041417121887, "learning_rate": 0.0009974520994700366, "loss": 3.6985, "step": 300 }, { "epoch": 0.020722924310368256, "grad_norm": 0.8263882994651794, "learning_rate": 0.000997409634461204, "loss": 3.949, "step": 305 }, { "epoch": 0.021062644381030032, "grad_norm": 0.4506300091743469, "learning_rate": 0.0009973671694523713, "loss": 3.718, "step": 310 }, { "epoch": 0.021402364451691806, "grad_norm": 0.4988076686859131, "learning_rate": 0.0009973247044435386, "loss": 4.163, "step": 315 }, { "epoch": 0.021742084522353582, "grad_norm": 0.5724675059318542, "learning_rate": 0.000997282239434706, "loss": 3.9906, "step": 320 }, { "epoch": 0.022081804593015356, "grad_norm": 0.5702639222145081, "learning_rate": 0.000997239774425873, "loss": 3.8087, "step": 325 }, { "epoch": 0.02242152466367713, "grad_norm": 0.45527079701423645, "learning_rate": 0.0009971973094170404, "loss": 3.6432, "step": 330 }, { "epoch": 0.022761244734338906, "grad_norm": 0.5885451436042786, "learning_rate": 0.0009971548444082077, "loss": 4.0753, "step": 335 }, { "epoch": 0.02310096480500068, "grad_norm": 0.5633436441421509, "learning_rate": 0.0009971123793993748, "loss": 3.9803, "step": 340 }, { "epoch": 0.023440684875662456, "grad_norm": 0.6600042581558228, "learning_rate": 0.0009970699143905422, "loss": 3.9767, "step": 345 }, { "epoch": 0.02378040494632423, "grad_norm": 0.5405495166778564, "learning_rate": 0.0009970274493817095, "loss": 3.75, "step": 350 }, { "epoch": 0.024120125016986002, "grad_norm": 0.6013660430908203, "learning_rate": 0.0009969849843728769, "loss": 4.1565, "step": 355 }, { "epoch": 0.02445984508764778, "grad_norm": 0.6407164335250854, "learning_rate": 0.000996942519364044, "loss": 4.0018, "step": 360 }, { "epoch": 0.024799565158309552, "grad_norm": 0.5490573644638062, "learning_rate": 0.0009969000543552113, "loss": 3.9018, "step": 365 }, { "epoch": 0.02513928522897133, "grad_norm": 0.5638839602470398, "learning_rate": 0.0009968575893463786, "loss": 4.0383, "step": 370 }, { "epoch": 0.025479005299633102, "grad_norm": 1.9583743810653687, "learning_rate": 0.0009968151243375458, "loss": 3.8543, "step": 375 }, { "epoch": 0.025818725370294875, "grad_norm": 1.9940112829208374, "learning_rate": 0.000996772659328713, "loss": 4.0845, "step": 380 }, { "epoch": 0.026158445440956652, "grad_norm": 0.5722443461418152, "learning_rate": 0.0009967301943198804, "loss": 4.0051, "step": 385 }, { "epoch": 0.026498165511618425, "grad_norm": 0.6977593898773193, "learning_rate": 0.0009966877293110478, "loss": 3.7642, "step": 390 }, { "epoch": 0.026837885582280202, "grad_norm": 0.54580157995224, "learning_rate": 0.000996645264302215, "loss": 3.5164, "step": 395 }, { "epoch": 0.027177605652941975, "grad_norm": 8.007853507995605, "learning_rate": 0.0009966027992933822, "loss": 3.8012, "step": 400 }, { "epoch": 0.027517325723603752, "grad_norm": 0.5816832780838013, "learning_rate": 0.0009965603342845495, "loss": 3.6734, "step": 405 }, { "epoch": 0.027857045794265525, "grad_norm": 0.5928305387496948, "learning_rate": 0.0009965178692757169, "loss": 3.9956, "step": 410 }, { "epoch": 0.0281967658649273, "grad_norm": 0.8828011155128479, "learning_rate": 0.000996475404266884, "loss": 4.1078, "step": 415 }, { "epoch": 0.028536485935589075, "grad_norm": 0.6904022693634033, "learning_rate": 0.0009964329392580513, "loss": 3.8418, "step": 420 }, { "epoch": 0.02887620600625085, "grad_norm": 0.5612075328826904, "learning_rate": 0.0009963904742492187, "loss": 4.1817, "step": 425 }, { "epoch": 0.029215926076912625, "grad_norm": 0.5470679402351379, "learning_rate": 0.000996348009240386, "loss": 3.9428, "step": 430 }, { "epoch": 0.0295556461475744, "grad_norm": 0.8141860961914062, "learning_rate": 0.0009963055442315531, "loss": 3.9853, "step": 435 }, { "epoch": 0.029895366218236172, "grad_norm": 0.5441884994506836, "learning_rate": 0.0009962630792227205, "loss": 3.8899, "step": 440 }, { "epoch": 0.03023508628889795, "grad_norm": 0.8587472438812256, "learning_rate": 0.0009962206142138878, "loss": 3.7975, "step": 445 }, { "epoch": 0.030574806359559722, "grad_norm": 0.5279538035392761, "learning_rate": 0.000996178149205055, "loss": 3.5993, "step": 450 }, { "epoch": 0.0309145264302215, "grad_norm": 0.504540205001831, "learning_rate": 0.0009961356841962225, "loss": 3.8558, "step": 455 }, { "epoch": 0.03125424650088327, "grad_norm": 0.5363671183586121, "learning_rate": 0.0009960932191873896, "loss": 4.1573, "step": 460 }, { "epoch": 0.03159396657154505, "grad_norm": 2.302508592605591, "learning_rate": 0.000996050754178557, "loss": 3.8831, "step": 465 }, { "epoch": 0.03193368664220682, "grad_norm": 0.7786630392074585, "learning_rate": 0.0009960082891697242, "loss": 3.885, "step": 470 }, { "epoch": 0.032273406712868595, "grad_norm": 0.5628072023391724, "learning_rate": 0.0009959658241608914, "loss": 4.0699, "step": 475 }, { "epoch": 0.03261312678353037, "grad_norm": 0.8729355931282043, "learning_rate": 0.0009959233591520587, "loss": 3.7228, "step": 480 }, { "epoch": 0.03295284685419215, "grad_norm": 0.5414428114891052, "learning_rate": 0.000995880894143226, "loss": 3.6357, "step": 485 }, { "epoch": 0.03329256692485392, "grad_norm": 0.6227617263793945, "learning_rate": 0.0009958384291343934, "loss": 3.8537, "step": 490 }, { "epoch": 0.033632286995515695, "grad_norm": 0.6045665144920349, "learning_rate": 0.0009957959641255605, "loss": 3.9296, "step": 495 }, { "epoch": 0.03397200706617747, "grad_norm": 1.6914665699005127, "learning_rate": 0.0009957534991167278, "loss": 3.8143, "step": 500 }, { "epoch": 0.03431172713683924, "grad_norm": 0.559988260269165, "learning_rate": 0.0009957110341078951, "loss": 3.829, "step": 505 }, { "epoch": 0.03465144720750102, "grad_norm": 0.6654518842697144, "learning_rate": 0.0009956685690990623, "loss": 3.851, "step": 510 }, { "epoch": 0.034991167278162795, "grad_norm": 0.5775548219680786, "learning_rate": 0.0009956261040902296, "loss": 3.7705, "step": 515 }, { "epoch": 0.03533088734882457, "grad_norm": 0.5970734357833862, "learning_rate": 0.000995583639081397, "loss": 3.9808, "step": 520 }, { "epoch": 0.03567060741948634, "grad_norm": 0.6709187030792236, "learning_rate": 0.0009955411740725643, "loss": 3.7881, "step": 525 }, { "epoch": 0.03601032749014812, "grad_norm": 1.4802911281585693, "learning_rate": 0.0009954987090637316, "loss": 3.6446, "step": 530 }, { "epoch": 0.036350047560809895, "grad_norm": 0.6298632621765137, "learning_rate": 0.0009954562440548987, "loss": 3.84, "step": 535 }, { "epoch": 0.036689767631471665, "grad_norm": 1.1104154586791992, "learning_rate": 0.000995413779046066, "loss": 3.8281, "step": 540 }, { "epoch": 0.03702948770213344, "grad_norm": 3.1669414043426514, "learning_rate": 0.0009953713140372334, "loss": 3.8184, "step": 545 }, { "epoch": 0.03736920777279522, "grad_norm": 0.643075704574585, "learning_rate": 0.0009953288490284005, "loss": 4.1517, "step": 550 }, { "epoch": 0.03770892784345699, "grad_norm": 0.5925756096839905, "learning_rate": 0.000995286384019568, "loss": 3.8738, "step": 555 }, { "epoch": 0.038048647914118765, "grad_norm": 0.5645868182182312, "learning_rate": 0.0009952439190107352, "loss": 3.9242, "step": 560 }, { "epoch": 0.03838836798478054, "grad_norm": 2.423346996307373, "learning_rate": 0.0009952014540019025, "loss": 3.9295, "step": 565 }, { "epoch": 0.03872808805544232, "grad_norm": 0.5646160840988159, "learning_rate": 0.0009951589889930698, "loss": 4.0769, "step": 570 }, { "epoch": 0.03906780812610409, "grad_norm": 0.6235633492469788, "learning_rate": 0.000995116523984237, "loss": 4.0743, "step": 575 }, { "epoch": 0.039407528196765865, "grad_norm": 0.9926804900169373, "learning_rate": 0.0009950740589754043, "loss": 3.7669, "step": 580 }, { "epoch": 0.03974724826742764, "grad_norm": 0.5877676606178284, "learning_rate": 0.0009950315939665716, "loss": 3.7585, "step": 585 }, { "epoch": 0.04008696833808941, "grad_norm": 0.6339868307113647, "learning_rate": 0.000994989128957739, "loss": 3.6846, "step": 590 }, { "epoch": 0.04042668840875119, "grad_norm": 0.7069829106330872, "learning_rate": 0.000994946663948906, "loss": 3.725, "step": 595 }, { "epoch": 0.040766408479412965, "grad_norm": 0.575941264629364, "learning_rate": 0.0009949041989400734, "loss": 3.9028, "step": 600 }, { "epoch": 0.04110612855007474, "grad_norm": 0.7453792095184326, "learning_rate": 0.0009948617339312407, "loss": 3.7812, "step": 605 }, { "epoch": 0.04144584862073651, "grad_norm": 0.5961312055587769, "learning_rate": 0.0009948192689224079, "loss": 3.9243, "step": 610 }, { "epoch": 0.04178556869139829, "grad_norm": 0.5527133941650391, "learning_rate": 0.0009947768039135752, "loss": 3.8414, "step": 615 }, { "epoch": 0.042125288762060065, "grad_norm": 0.4514964818954468, "learning_rate": 0.0009947343389047425, "loss": 3.6732, "step": 620 }, { "epoch": 0.042465008832721834, "grad_norm": 0.42869192361831665, "learning_rate": 0.0009946918738959099, "loss": 3.4291, "step": 625 }, { "epoch": 0.04280472890338361, "grad_norm": 0.5432568788528442, "learning_rate": 0.0009946494088870772, "loss": 3.6035, "step": 630 }, { "epoch": 0.04314444897404539, "grad_norm": 0.5461887121200562, "learning_rate": 0.0009946069438782443, "loss": 3.8103, "step": 635 }, { "epoch": 0.043484169044707165, "grad_norm": 0.6913933157920837, "learning_rate": 0.0009945644788694117, "loss": 3.6833, "step": 640 }, { "epoch": 0.043823889115368934, "grad_norm": 0.5428763628005981, "learning_rate": 0.000994522013860579, "loss": 4.0695, "step": 645 }, { "epoch": 0.04416360918603071, "grad_norm": 0.4568764865398407, "learning_rate": 0.000994479548851746, "loss": 3.9515, "step": 650 }, { "epoch": 0.04450332925669249, "grad_norm": 0.4596848785877228, "learning_rate": 0.0009944370838429134, "loss": 3.8036, "step": 655 }, { "epoch": 0.04484304932735426, "grad_norm": 0.5661909580230713, "learning_rate": 0.0009943946188340808, "loss": 3.7296, "step": 660 }, { "epoch": 0.045182769398016034, "grad_norm": 0.5530750751495361, "learning_rate": 0.0009943521538252481, "loss": 3.7028, "step": 665 }, { "epoch": 0.04552248946867781, "grad_norm": 0.5476675629615784, "learning_rate": 0.0009943096888164152, "loss": 3.9626, "step": 670 }, { "epoch": 0.04586220953933958, "grad_norm": 1.141539216041565, "learning_rate": 0.0009942672238075826, "loss": 3.7415, "step": 675 }, { "epoch": 0.04620192961000136, "grad_norm": 0.5888988971710205, "learning_rate": 0.00099422475879875, "loss": 3.8676, "step": 680 }, { "epoch": 0.046541649680663134, "grad_norm": 0.5741140246391296, "learning_rate": 0.000994182293789917, "loss": 3.9568, "step": 685 }, { "epoch": 0.04688136975132491, "grad_norm": 1.4437280893325806, "learning_rate": 0.0009941398287810843, "loss": 3.9491, "step": 690 }, { "epoch": 0.04722108982198668, "grad_norm": 0.5097208619117737, "learning_rate": 0.0009940973637722517, "loss": 3.8049, "step": 695 }, { "epoch": 0.04756080989264846, "grad_norm": 0.43953296542167664, "learning_rate": 0.000994054898763419, "loss": 3.969, "step": 700 }, { "epoch": 0.047900529963310234, "grad_norm": 1.2057006359100342, "learning_rate": 0.0009940124337545863, "loss": 3.9099, "step": 705 }, { "epoch": 0.048240250033972004, "grad_norm": 0.5880506038665771, "learning_rate": 0.0009939699687457535, "loss": 3.8779, "step": 710 }, { "epoch": 0.04857997010463378, "grad_norm": 0.47119975090026855, "learning_rate": 0.0009939275037369208, "loss": 3.7105, "step": 715 }, { "epoch": 0.04891969017529556, "grad_norm": 0.549010694026947, "learning_rate": 0.0009938850387280881, "loss": 3.9291, "step": 720 }, { "epoch": 0.049259410245957334, "grad_norm": 0.5610355138778687, "learning_rate": 0.0009938425737192553, "loss": 3.7261, "step": 725 }, { "epoch": 0.049599130316619104, "grad_norm": 1.5965944528579712, "learning_rate": 0.0009938001087104226, "loss": 3.824, "step": 730 }, { "epoch": 0.04993885038728088, "grad_norm": 0.6607491970062256, "learning_rate": 0.00099375764370159, "loss": 3.7982, "step": 735 }, { "epoch": 0.05027857045794266, "grad_norm": 1.4106281995773315, "learning_rate": 0.0009937151786927573, "loss": 3.9767, "step": 740 }, { "epoch": 0.05061829052860443, "grad_norm": 0.5237374305725098, "learning_rate": 0.0009936727136839244, "loss": 3.6383, "step": 745 }, { "epoch": 0.050958010599266204, "grad_norm": 0.6122420430183411, "learning_rate": 0.0009936302486750917, "loss": 3.8833, "step": 750 }, { "epoch": 0.05129773066992798, "grad_norm": 0.5208083987236023, "learning_rate": 0.000993587783666259, "loss": 3.8577, "step": 755 }, { "epoch": 0.05163745074058975, "grad_norm": 0.6950027346611023, "learning_rate": 0.0009935453186574262, "loss": 4.0022, "step": 760 }, { "epoch": 0.05197717081125153, "grad_norm": 0.5676352381706238, "learning_rate": 0.0009935028536485937, "loss": 3.7205, "step": 765 }, { "epoch": 0.052316890881913304, "grad_norm": 0.5784592032432556, "learning_rate": 0.0009934603886397608, "loss": 3.762, "step": 770 }, { "epoch": 0.05265661095257508, "grad_norm": 1.3643052577972412, "learning_rate": 0.0009934179236309282, "loss": 3.9225, "step": 775 }, { "epoch": 0.05299633102323685, "grad_norm": 0.552307665348053, "learning_rate": 0.0009933754586220955, "loss": 3.9288, "step": 780 }, { "epoch": 0.05333605109389863, "grad_norm": 0.6428114175796509, "learning_rate": 0.0009933329936132626, "loss": 3.6841, "step": 785 }, { "epoch": 0.053675771164560404, "grad_norm": 0.6585874557495117, "learning_rate": 0.00099329052860443, "loss": 3.9182, "step": 790 }, { "epoch": 0.054015491235222174, "grad_norm": 0.6242063045501709, "learning_rate": 0.0009932480635955973, "loss": 3.7315, "step": 795 }, { "epoch": 0.05435521130588395, "grad_norm": 1.172575831413269, "learning_rate": 0.0009932055985867646, "loss": 4.0225, "step": 800 }, { "epoch": 0.05469493137654573, "grad_norm": 0.5162668228149414, "learning_rate": 0.0009931631335779317, "loss": 4.0547, "step": 805 }, { "epoch": 0.055034651447207504, "grad_norm": 0.5157238841056824, "learning_rate": 0.000993120668569099, "loss": 3.9932, "step": 810 }, { "epoch": 0.055374371517869274, "grad_norm": 0.7152529954910278, "learning_rate": 0.0009930782035602664, "loss": 3.8642, "step": 815 }, { "epoch": 0.05571409158853105, "grad_norm": 0.6313608288764954, "learning_rate": 0.0009930357385514335, "loss": 3.7289, "step": 820 }, { "epoch": 0.05605381165919283, "grad_norm": 0.5713692903518677, "learning_rate": 0.0009929932735426009, "loss": 3.6429, "step": 825 }, { "epoch": 0.0563935317298546, "grad_norm": 0.6273418068885803, "learning_rate": 0.0009929508085337682, "loss": 3.695, "step": 830 }, { "epoch": 0.056733251800516374, "grad_norm": 1.2013376951217651, "learning_rate": 0.0009929083435249355, "loss": 3.896, "step": 835 }, { "epoch": 0.05707297187117815, "grad_norm": 0.6832719445228577, "learning_rate": 0.0009928658785161029, "loss": 4.0995, "step": 840 }, { "epoch": 0.05741269194183993, "grad_norm": 0.6568312048912048, "learning_rate": 0.00099282341350727, "loss": 3.9602, "step": 845 }, { "epoch": 0.0577524120125017, "grad_norm": 0.7554712891578674, "learning_rate": 0.0009927809484984373, "loss": 3.8707, "step": 850 }, { "epoch": 0.058092132083163474, "grad_norm": 0.5548936128616333, "learning_rate": 0.0009927384834896046, "loss": 3.708, "step": 855 }, { "epoch": 0.05843185215382525, "grad_norm": 0.5560023188591003, "learning_rate": 0.0009926960184807718, "loss": 3.5404, "step": 860 }, { "epoch": 0.05877157222448702, "grad_norm": 0.7071141004562378, "learning_rate": 0.000992653553471939, "loss": 3.8469, "step": 865 }, { "epoch": 0.0591112922951488, "grad_norm": 0.5891933441162109, "learning_rate": 0.0009926110884631064, "loss": 3.9003, "step": 870 }, { "epoch": 0.059451012365810574, "grad_norm": 0.6138566732406616, "learning_rate": 0.0009925686234542738, "loss": 3.6245, "step": 875 }, { "epoch": 0.059790732436472344, "grad_norm": 0.6222836971282959, "learning_rate": 0.000992526158445441, "loss": 3.733, "step": 880 }, { "epoch": 0.06013045250713412, "grad_norm": 0.6209458708763123, "learning_rate": 0.0009924836934366082, "loss": 3.8125, "step": 885 }, { "epoch": 0.0604701725777959, "grad_norm": 0.8434022068977356, "learning_rate": 0.0009924412284277755, "loss": 3.9784, "step": 890 }, { "epoch": 0.060809892648457674, "grad_norm": 0.5856556296348572, "learning_rate": 0.0009923987634189429, "loss": 3.8822, "step": 895 }, { "epoch": 0.061149612719119444, "grad_norm": 0.6009406447410583, "learning_rate": 0.00099235629841011, "loss": 3.8985, "step": 900 }, { "epoch": 0.06148933278978122, "grad_norm": 0.6219965815544128, "learning_rate": 0.0009923138334012773, "loss": 3.7295, "step": 905 }, { "epoch": 0.061829052860443, "grad_norm": 0.7353580594062805, "learning_rate": 0.0009922713683924447, "loss": 3.6831, "step": 910 }, { "epoch": 0.06216877293110477, "grad_norm": 0.7379398345947266, "learning_rate": 0.000992228903383612, "loss": 4.0579, "step": 915 }, { "epoch": 0.06250849300176654, "grad_norm": 0.6934113502502441, "learning_rate": 0.0009921864383747791, "loss": 3.8067, "step": 920 }, { "epoch": 0.06284821307242831, "grad_norm": 0.6656597852706909, "learning_rate": 0.0009921439733659465, "loss": 3.908, "step": 925 }, { "epoch": 0.0631879331430901, "grad_norm": 2.1865758895874023, "learning_rate": 0.0009921015083571138, "loss": 3.9016, "step": 930 }, { "epoch": 0.06352765321375187, "grad_norm": 0.6319058537483215, "learning_rate": 0.000992059043348281, "loss": 4.0054, "step": 935 }, { "epoch": 0.06386737328441364, "grad_norm": 0.5362616777420044, "learning_rate": 0.0009920165783394485, "loss": 3.6477, "step": 940 }, { "epoch": 0.06420709335507542, "grad_norm": 0.5525217652320862, "learning_rate": 0.0009919741133306156, "loss": 3.7794, "step": 945 }, { "epoch": 0.06454681342573719, "grad_norm": 0.7375221252441406, "learning_rate": 0.000991931648321783, "loss": 3.8622, "step": 950 }, { "epoch": 0.06488653349639897, "grad_norm": 0.6783074140548706, "learning_rate": 0.0009918891833129502, "loss": 3.7648, "step": 955 }, { "epoch": 0.06522625356706074, "grad_norm": 0.7984088063240051, "learning_rate": 0.0009918467183041174, "loss": 3.894, "step": 960 }, { "epoch": 0.06556597363772251, "grad_norm": 0.48116791248321533, "learning_rate": 0.0009918042532952847, "loss": 3.7698, "step": 965 }, { "epoch": 0.0659056937083843, "grad_norm": 0.9424999356269836, "learning_rate": 0.000991761788286452, "loss": 4.0673, "step": 970 }, { "epoch": 0.06624541377904607, "grad_norm": 0.5739089250564575, "learning_rate": 0.0009917193232776194, "loss": 4.0236, "step": 975 }, { "epoch": 0.06658513384970784, "grad_norm": 0.5319967269897461, "learning_rate": 0.0009916768582687865, "loss": 3.8909, "step": 980 }, { "epoch": 0.06692485392036962, "grad_norm": 0.6642907857894897, "learning_rate": 0.0009916343932599538, "loss": 3.8548, "step": 985 }, { "epoch": 0.06726457399103139, "grad_norm": 0.5319867134094238, "learning_rate": 0.0009915919282511212, "loss": 3.8652, "step": 990 }, { "epoch": 0.06760429406169316, "grad_norm": 0.5827558636665344, "learning_rate": 0.0009915494632422883, "loss": 3.7097, "step": 995 }, { "epoch": 0.06794401413235494, "grad_norm": 0.6750186681747437, "learning_rate": 0.0009915069982334556, "loss": 3.8518, "step": 1000 }, { "epoch": 0.06828373420301671, "grad_norm": 1.1676689386367798, "learning_rate": 0.000991464533224623, "loss": 3.7971, "step": 1005 }, { "epoch": 0.06862345427367848, "grad_norm": 0.5687863826751709, "learning_rate": 0.0009914220682157903, "loss": 3.6428, "step": 1010 }, { "epoch": 0.06896317434434027, "grad_norm": 0.6025897860527039, "learning_rate": 0.0009913796032069576, "loss": 3.9043, "step": 1015 }, { "epoch": 0.06930289441500204, "grad_norm": 0.5458235740661621, "learning_rate": 0.0009913371381981247, "loss": 3.7681, "step": 1020 }, { "epoch": 0.0696426144856638, "grad_norm": 0.505095899105072, "learning_rate": 0.000991294673189292, "loss": 3.5802, "step": 1025 }, { "epoch": 0.06998233455632559, "grad_norm": 0.7556877136230469, "learning_rate": 0.0009912522081804594, "loss": 4.0027, "step": 1030 }, { "epoch": 0.07032205462698736, "grad_norm": 0.7739489674568176, "learning_rate": 0.0009912097431716265, "loss": 3.7581, "step": 1035 }, { "epoch": 0.07066177469764914, "grad_norm": 0.7745015621185303, "learning_rate": 0.0009911672781627938, "loss": 3.7831, "step": 1040 }, { "epoch": 0.07100149476831091, "grad_norm": 1.3215758800506592, "learning_rate": 0.0009911248131539612, "loss": 3.7766, "step": 1045 }, { "epoch": 0.07134121483897268, "grad_norm": 0.6260070204734802, "learning_rate": 0.0009910823481451285, "loss": 4.033, "step": 1050 }, { "epoch": 0.07168093490963447, "grad_norm": 0.9822431802749634, "learning_rate": 0.0009910398831362956, "loss": 3.6278, "step": 1055 }, { "epoch": 0.07202065498029624, "grad_norm": 0.6256703734397888, "learning_rate": 0.000990997418127463, "loss": 3.6758, "step": 1060 }, { "epoch": 0.072360375050958, "grad_norm": 0.5993952751159668, "learning_rate": 0.0009909549531186303, "loss": 3.5871, "step": 1065 }, { "epoch": 0.07270009512161979, "grad_norm": 0.6881741881370544, "learning_rate": 0.0009909124881097974, "loss": 3.9959, "step": 1070 }, { "epoch": 0.07303981519228156, "grad_norm": 2.6121256351470947, "learning_rate": 0.0009908700231009647, "loss": 3.6342, "step": 1075 }, { "epoch": 0.07337953526294333, "grad_norm": 0.6979096531867981, "learning_rate": 0.000990827558092132, "loss": 4.0179, "step": 1080 }, { "epoch": 0.07371925533360511, "grad_norm": 0.5363898873329163, "learning_rate": 0.0009907850930832994, "loss": 3.7014, "step": 1085 }, { "epoch": 0.07405897540426688, "grad_norm": 0.6762584447860718, "learning_rate": 0.0009907426280744668, "loss": 3.8049, "step": 1090 }, { "epoch": 0.07439869547492865, "grad_norm": 0.7676011919975281, "learning_rate": 0.0009907001630656339, "loss": 3.8933, "step": 1095 }, { "epoch": 0.07473841554559044, "grad_norm": 0.6409669518470764, "learning_rate": 0.0009906576980568012, "loss": 3.378, "step": 1100 }, { "epoch": 0.0750781356162522, "grad_norm": 0.8464885354042053, "learning_rate": 0.0009906152330479685, "loss": 4.0465, "step": 1105 }, { "epoch": 0.07541785568691398, "grad_norm": 0.9416410326957703, "learning_rate": 0.0009905727680391357, "loss": 3.7612, "step": 1110 }, { "epoch": 0.07575757575757576, "grad_norm": 1.8878381252288818, "learning_rate": 0.000990530303030303, "loss": 3.8833, "step": 1115 }, { "epoch": 0.07609729582823753, "grad_norm": 0.6034110188484192, "learning_rate": 0.0009904878380214703, "loss": 3.7747, "step": 1120 }, { "epoch": 0.07643701589889931, "grad_norm": 0.848849356174469, "learning_rate": 0.0009904453730126377, "loss": 3.9122, "step": 1125 }, { "epoch": 0.07677673596956108, "grad_norm": 0.7755124568939209, "learning_rate": 0.0009904029080038048, "loss": 3.776, "step": 1130 }, { "epoch": 0.07711645604022285, "grad_norm": 1.3329683542251587, "learning_rate": 0.0009903604429949721, "loss": 3.9555, "step": 1135 }, { "epoch": 0.07745617611088464, "grad_norm": 0.7614222168922424, "learning_rate": 0.0009903179779861394, "loss": 3.5661, "step": 1140 }, { "epoch": 0.0777958961815464, "grad_norm": 0.8013120889663696, "learning_rate": 0.0009902755129773066, "loss": 3.9451, "step": 1145 }, { "epoch": 0.07813561625220818, "grad_norm": 0.7201833128929138, "learning_rate": 0.0009902330479684741, "loss": 3.7353, "step": 1150 }, { "epoch": 0.07847533632286996, "grad_norm": 0.6137555837631226, "learning_rate": 0.0009901905829596412, "loss": 3.8554, "step": 1155 }, { "epoch": 0.07881505639353173, "grad_norm": 0.5318056344985962, "learning_rate": 0.0009901481179508086, "loss": 3.7185, "step": 1160 }, { "epoch": 0.0791547764641935, "grad_norm": 0.6779008507728577, "learning_rate": 0.000990105652941976, "loss": 3.9978, "step": 1165 }, { "epoch": 0.07949449653485528, "grad_norm": 0.5661518573760986, "learning_rate": 0.000990063187933143, "loss": 3.8424, "step": 1170 }, { "epoch": 0.07983421660551705, "grad_norm": 0.6510219573974609, "learning_rate": 0.0009900207229243104, "loss": 3.7111, "step": 1175 }, { "epoch": 0.08017393667617882, "grad_norm": 0.6256716847419739, "learning_rate": 0.0009899782579154777, "loss": 4.2326, "step": 1180 }, { "epoch": 0.0805136567468406, "grad_norm": 0.5679534077644348, "learning_rate": 0.000989935792906645, "loss": 3.897, "step": 1185 }, { "epoch": 0.08085337681750238, "grad_norm": 0.5974684953689575, "learning_rate": 0.0009898933278978121, "loss": 4.1923, "step": 1190 }, { "epoch": 0.08119309688816416, "grad_norm": 0.8721858263015747, "learning_rate": 0.0009898508628889795, "loss": 4.3006, "step": 1195 }, { "epoch": 0.08153281695882593, "grad_norm": 112.88106536865234, "learning_rate": 0.0009898083978801468, "loss": 5.4792, "step": 1200 }, { "epoch": 0.0818725370294877, "grad_norm": 2.139004707336426, "learning_rate": 0.0009897659328713141, "loss": 5.7927, "step": 1205 }, { "epoch": 0.08221225710014948, "grad_norm": 2.9734630584716797, "learning_rate": 0.0009897234678624813, "loss": 4.1369, "step": 1210 }, { "epoch": 0.08255197717081125, "grad_norm": 0.7371381521224976, "learning_rate": 0.0009896810028536486, "loss": 3.8499, "step": 1215 }, { "epoch": 0.08289169724147302, "grad_norm": 0.7136930823326111, "learning_rate": 0.000989638537844816, "loss": 3.7277, "step": 1220 }, { "epoch": 0.0832314173121348, "grad_norm": 0.6129528880119324, "learning_rate": 0.0009895960728359833, "loss": 3.7181, "step": 1225 }, { "epoch": 0.08357113738279658, "grad_norm": 0.7247201204299927, "learning_rate": 0.0009895536078271504, "loss": 3.9597, "step": 1230 }, { "epoch": 0.08391085745345835, "grad_norm": 0.6700114011764526, "learning_rate": 0.0009895111428183177, "loss": 3.7295, "step": 1235 }, { "epoch": 0.08425057752412013, "grad_norm": 0.8911071419715881, "learning_rate": 0.000989468677809485, "loss": 3.8105, "step": 1240 }, { "epoch": 0.0845902975947819, "grad_norm": 0.6714710593223572, "learning_rate": 0.0009894262128006522, "loss": 3.5437, "step": 1245 }, { "epoch": 0.08493001766544367, "grad_norm": 0.670673668384552, "learning_rate": 0.0009893837477918197, "loss": 3.8434, "step": 1250 }, { "epoch": 0.08526973773610545, "grad_norm": 0.6780371069908142, "learning_rate": 0.0009893412827829868, "loss": 3.984, "step": 1255 }, { "epoch": 0.08560945780676722, "grad_norm": 0.6912269592285156, "learning_rate": 0.0009892988177741542, "loss": 3.6702, "step": 1260 }, { "epoch": 0.08594917787742899, "grad_norm": 0.7322074770927429, "learning_rate": 0.0009892563527653215, "loss": 3.7321, "step": 1265 }, { "epoch": 0.08628889794809078, "grad_norm": 0.7525180578231812, "learning_rate": 0.0009892138877564886, "loss": 3.725, "step": 1270 }, { "epoch": 0.08662861801875255, "grad_norm": 0.6995285153388977, "learning_rate": 0.000989171422747656, "loss": 3.7849, "step": 1275 }, { "epoch": 0.08696833808941433, "grad_norm": 0.514626681804657, "learning_rate": 0.0009891289577388233, "loss": 3.9906, "step": 1280 }, { "epoch": 0.0873080581600761, "grad_norm": 0.5982587933540344, "learning_rate": 0.0009890864927299906, "loss": 4.3071, "step": 1285 }, { "epoch": 0.08764777823073787, "grad_norm": 1.2551847696304321, "learning_rate": 0.0009890440277211577, "loss": 3.8248, "step": 1290 }, { "epoch": 0.08798749830139965, "grad_norm": 0.5657306909561157, "learning_rate": 0.000989001562712325, "loss": 3.765, "step": 1295 }, { "epoch": 0.08832721837206142, "grad_norm": 0.5734697580337524, "learning_rate": 0.0009889590977034924, "loss": 4.0299, "step": 1300 }, { "epoch": 0.08866693844272319, "grad_norm": 0.5558915734291077, "learning_rate": 0.0009889166326946595, "loss": 3.8244, "step": 1305 }, { "epoch": 0.08900665851338498, "grad_norm": 0.556370735168457, "learning_rate": 0.0009888741676858269, "loss": 3.8871, "step": 1310 }, { "epoch": 0.08934637858404675, "grad_norm": 0.6792877316474915, "learning_rate": 0.0009888317026769942, "loss": 3.7253, "step": 1315 }, { "epoch": 0.08968609865470852, "grad_norm": 0.6492648720741272, "learning_rate": 0.0009887892376681615, "loss": 3.4992, "step": 1320 }, { "epoch": 0.0900258187253703, "grad_norm": 0.652964174747467, "learning_rate": 0.0009887467726593289, "loss": 3.5441, "step": 1325 }, { "epoch": 0.09036553879603207, "grad_norm": 0.6389694213867188, "learning_rate": 0.000988704307650496, "loss": 3.815, "step": 1330 }, { "epoch": 0.09070525886669384, "grad_norm": 0.7050741910934448, "learning_rate": 0.0009886618426416633, "loss": 3.8492, "step": 1335 }, { "epoch": 0.09104497893735562, "grad_norm": 0.5823991894721985, "learning_rate": 0.0009886193776328306, "loss": 3.9839, "step": 1340 }, { "epoch": 0.09138469900801739, "grad_norm": 0.7883529663085938, "learning_rate": 0.0009885769126239978, "loss": 3.5452, "step": 1345 }, { "epoch": 0.09172441907867916, "grad_norm": 1.3914811611175537, "learning_rate": 0.000988534447615165, "loss": 3.6251, "step": 1350 }, { "epoch": 0.09206413914934095, "grad_norm": 0.6352505087852478, "learning_rate": 0.0009884919826063324, "loss": 3.8847, "step": 1355 }, { "epoch": 0.09240385922000272, "grad_norm": 1.3413283824920654, "learning_rate": 0.0009884495175974998, "loss": 3.7842, "step": 1360 }, { "epoch": 0.0927435792906645, "grad_norm": 0.5921688079833984, "learning_rate": 0.0009884070525886669, "loss": 3.7323, "step": 1365 }, { "epoch": 0.09308329936132627, "grad_norm": 0.5520976185798645, "learning_rate": 0.0009883645875798342, "loss": 3.828, "step": 1370 }, { "epoch": 0.09342301943198804, "grad_norm": 0.5818116068840027, "learning_rate": 0.0009883221225710016, "loss": 3.8957, "step": 1375 }, { "epoch": 0.09376273950264982, "grad_norm": 0.6413286328315735, "learning_rate": 0.0009882796575621687, "loss": 3.6669, "step": 1380 }, { "epoch": 0.09410245957331159, "grad_norm": 0.7118954658508301, "learning_rate": 0.000988237192553336, "loss": 3.7792, "step": 1385 }, { "epoch": 0.09444217964397336, "grad_norm": 0.6792705059051514, "learning_rate": 0.0009881947275445033, "loss": 3.7641, "step": 1390 }, { "epoch": 0.09478189971463515, "grad_norm": 0.7626094222068787, "learning_rate": 0.0009881522625356707, "loss": 3.7633, "step": 1395 }, { "epoch": 0.09512161978529692, "grad_norm": 0.5472246408462524, "learning_rate": 0.000988109797526838, "loss": 3.8334, "step": 1400 }, { "epoch": 0.09546133985595869, "grad_norm": 0.5271095037460327, "learning_rate": 0.0009880673325180051, "loss": 3.9326, "step": 1405 }, { "epoch": 0.09580105992662047, "grad_norm": 0.8590952157974243, "learning_rate": 0.0009880248675091725, "loss": 3.8794, "step": 1410 }, { "epoch": 0.09614077999728224, "grad_norm": 0.5687350630760193, "learning_rate": 0.0009879824025003398, "loss": 3.8372, "step": 1415 }, { "epoch": 0.09648050006794401, "grad_norm": 0.9411936402320862, "learning_rate": 0.000987939937491507, "loss": 3.9607, "step": 1420 }, { "epoch": 0.09682022013860579, "grad_norm": 0.7167156934738159, "learning_rate": 0.0009878974724826742, "loss": 3.8414, "step": 1425 }, { "epoch": 0.09715994020926756, "grad_norm": 0.5776757001876831, "learning_rate": 0.0009878550074738416, "loss": 3.7853, "step": 1430 }, { "epoch": 0.09749966027992933, "grad_norm": 1.1498574018478394, "learning_rate": 0.000987812542465009, "loss": 3.7495, "step": 1435 }, { "epoch": 0.09783938035059112, "grad_norm": 0.8664551377296448, "learning_rate": 0.000987770077456176, "loss": 3.8079, "step": 1440 }, { "epoch": 0.09817910042125289, "grad_norm": 0.5510807633399963, "learning_rate": 0.0009877276124473434, "loss": 3.6465, "step": 1445 }, { "epoch": 0.09851882049191467, "grad_norm": 0.5712562203407288, "learning_rate": 0.0009876851474385107, "loss": 3.7579, "step": 1450 }, { "epoch": 0.09885854056257644, "grad_norm": 0.4920804798603058, "learning_rate": 0.0009876426824296778, "loss": 3.7946, "step": 1455 }, { "epoch": 0.09919826063323821, "grad_norm": 0.5596398115158081, "learning_rate": 0.0009876002174208454, "loss": 3.8949, "step": 1460 }, { "epoch": 0.09953798070389999, "grad_norm": 0.9163932204246521, "learning_rate": 0.0009875577524120125, "loss": 3.789, "step": 1465 }, { "epoch": 0.09987770077456176, "grad_norm": 0.6597535610198975, "learning_rate": 0.0009875152874031798, "loss": 3.9565, "step": 1470 }, { "epoch": 0.10021742084522353, "grad_norm": 0.6468210220336914, "learning_rate": 0.0009874728223943472, "loss": 3.5579, "step": 1475 }, { "epoch": 0.10055714091588532, "grad_norm": 0.7370440363883972, "learning_rate": 0.0009874303573855143, "loss": 3.8719, "step": 1480 }, { "epoch": 0.10089686098654709, "grad_norm": 0.7131441831588745, "learning_rate": 0.0009873878923766816, "loss": 3.6775, "step": 1485 }, { "epoch": 0.10123658105720885, "grad_norm": 0.6849778294563293, "learning_rate": 0.000987345427367849, "loss": 3.7835, "step": 1490 }, { "epoch": 0.10157630112787064, "grad_norm": 0.5652458071708679, "learning_rate": 0.0009873029623590163, "loss": 3.7406, "step": 1495 }, { "epoch": 0.10191602119853241, "grad_norm": 0.6087997555732727, "learning_rate": 0.0009872604973501834, "loss": 3.9017, "step": 1500 }, { "epoch": 0.10225574126919418, "grad_norm": 0.6391319632530212, "learning_rate": 0.0009872180323413507, "loss": 3.8589, "step": 1505 }, { "epoch": 0.10259546133985596, "grad_norm": 0.7061350345611572, "learning_rate": 0.000987175567332518, "loss": 3.7486, "step": 1510 }, { "epoch": 0.10293518141051773, "grad_norm": 0.5502421259880066, "learning_rate": 0.0009871331023236852, "loss": 3.8157, "step": 1515 }, { "epoch": 0.1032749014811795, "grad_norm": 0.5989504456520081, "learning_rate": 0.0009870906373148525, "loss": 3.6464, "step": 1520 }, { "epoch": 0.10361462155184128, "grad_norm": 0.8456800580024719, "learning_rate": 0.0009870481723060198, "loss": 3.8163, "step": 1525 }, { "epoch": 0.10395434162250305, "grad_norm": 0.6394452452659607, "learning_rate": 0.0009870057072971872, "loss": 3.8105, "step": 1530 }, { "epoch": 0.10429406169316484, "grad_norm": 0.8114145994186401, "learning_rate": 0.0009869632422883545, "loss": 3.9587, "step": 1535 }, { "epoch": 0.10463378176382661, "grad_norm": 0.6726270914077759, "learning_rate": 0.0009869207772795216, "loss": 3.7907, "step": 1540 }, { "epoch": 0.10497350183448838, "grad_norm": 0.6676947474479675, "learning_rate": 0.000986878312270689, "loss": 3.8472, "step": 1545 }, { "epoch": 0.10531322190515016, "grad_norm": 0.8118389248847961, "learning_rate": 0.0009868358472618563, "loss": 3.7511, "step": 1550 }, { "epoch": 0.10565294197581193, "grad_norm": 0.6756294369697571, "learning_rate": 0.0009867933822530234, "loss": 3.6921, "step": 1555 }, { "epoch": 0.1059926620464737, "grad_norm": 0.6851464509963989, "learning_rate": 0.0009867509172441908, "loss": 3.7618, "step": 1560 }, { "epoch": 0.10633238211713548, "grad_norm": 0.7778677344322205, "learning_rate": 0.000986708452235358, "loss": 3.896, "step": 1565 }, { "epoch": 0.10667210218779725, "grad_norm": 0.6365407705307007, "learning_rate": 0.0009866659872265254, "loss": 3.8857, "step": 1570 }, { "epoch": 0.10701182225845902, "grad_norm": 0.6037127375602722, "learning_rate": 0.0009866235222176928, "loss": 3.967, "step": 1575 }, { "epoch": 0.10735154232912081, "grad_norm": 0.5145928263664246, "learning_rate": 0.0009865810572088599, "loss": 3.8159, "step": 1580 }, { "epoch": 0.10769126239978258, "grad_norm": 0.5643739700317383, "learning_rate": 0.0009865385922000272, "loss": 3.7708, "step": 1585 }, { "epoch": 0.10803098247044435, "grad_norm": 0.6068370938301086, "learning_rate": 0.0009864961271911945, "loss": 3.7037, "step": 1590 }, { "epoch": 0.10837070254110613, "grad_norm": 0.500476598739624, "learning_rate": 0.0009864536621823617, "loss": 3.8879, "step": 1595 }, { "epoch": 0.1087104226117679, "grad_norm": 0.488675981760025, "learning_rate": 0.000986411197173529, "loss": 3.7898, "step": 1600 }, { "epoch": 0.10905014268242967, "grad_norm": 0.5935916304588318, "learning_rate": 0.0009863687321646963, "loss": 3.7842, "step": 1605 }, { "epoch": 0.10938986275309145, "grad_norm": 0.7996106147766113, "learning_rate": 0.0009863262671558637, "loss": 3.9069, "step": 1610 }, { "epoch": 0.10972958282375322, "grad_norm": 0.6345498561859131, "learning_rate": 0.0009862838021470308, "loss": 3.7557, "step": 1615 }, { "epoch": 0.11006930289441501, "grad_norm": 0.592496395111084, "learning_rate": 0.0009862413371381981, "loss": 3.6109, "step": 1620 }, { "epoch": 0.11040902296507678, "grad_norm": 0.5481966137886047, "learning_rate": 0.0009861988721293654, "loss": 3.7252, "step": 1625 }, { "epoch": 0.11074874303573855, "grad_norm": 0.6437157392501831, "learning_rate": 0.0009861564071205326, "loss": 3.8171, "step": 1630 }, { "epoch": 0.11108846310640033, "grad_norm": 0.6112599968910217, "learning_rate": 0.0009861139421117001, "loss": 3.788, "step": 1635 }, { "epoch": 0.1114281831770621, "grad_norm": 0.595553457736969, "learning_rate": 0.0009860714771028672, "loss": 3.8753, "step": 1640 }, { "epoch": 0.11176790324772387, "grad_norm": 0.6088777780532837, "learning_rate": 0.0009860290120940346, "loss": 3.9003, "step": 1645 }, { "epoch": 0.11210762331838565, "grad_norm": 0.5482221841812134, "learning_rate": 0.000985986547085202, "loss": 3.6909, "step": 1650 }, { "epoch": 0.11244734338904742, "grad_norm": 0.6865453720092773, "learning_rate": 0.000985944082076369, "loss": 3.8776, "step": 1655 }, { "epoch": 0.1127870634597092, "grad_norm": 0.7188596129417419, "learning_rate": 0.0009859016170675364, "loss": 3.8061, "step": 1660 }, { "epoch": 0.11312678353037098, "grad_norm": 0.8115466237068176, "learning_rate": 0.0009858591520587037, "loss": 3.8013, "step": 1665 }, { "epoch": 0.11346650360103275, "grad_norm": 0.647230863571167, "learning_rate": 0.000985816687049871, "loss": 3.7771, "step": 1670 }, { "epoch": 0.11380622367169452, "grad_norm": 0.5701841711997986, "learning_rate": 0.0009857742220410381, "loss": 3.8349, "step": 1675 }, { "epoch": 0.1141459437423563, "grad_norm": 0.6241211891174316, "learning_rate": 0.0009857317570322055, "loss": 3.8027, "step": 1680 }, { "epoch": 0.11448566381301807, "grad_norm": 0.521457314491272, "learning_rate": 0.0009856892920233728, "loss": 3.645, "step": 1685 }, { "epoch": 0.11482538388367985, "grad_norm": 0.5666539072990417, "learning_rate": 0.00098564682701454, "loss": 3.4762, "step": 1690 }, { "epoch": 0.11516510395434162, "grad_norm": 0.5097023844718933, "learning_rate": 0.0009856043620057073, "loss": 3.6349, "step": 1695 }, { "epoch": 0.1155048240250034, "grad_norm": 0.6535791754722595, "learning_rate": 0.0009855618969968746, "loss": 3.8198, "step": 1700 }, { "epoch": 0.11584454409566518, "grad_norm": 0.5113033056259155, "learning_rate": 0.000985519431988042, "loss": 3.5389, "step": 1705 }, { "epoch": 0.11618426416632695, "grad_norm": 0.4774572253227234, "learning_rate": 0.0009854769669792093, "loss": 3.983, "step": 1710 }, { "epoch": 0.11652398423698872, "grad_norm": 0.6864311695098877, "learning_rate": 0.0009854345019703764, "loss": 3.5762, "step": 1715 }, { "epoch": 0.1168637043076505, "grad_norm": 0.6275101900100708, "learning_rate": 0.0009853920369615437, "loss": 3.8248, "step": 1720 }, { "epoch": 0.11720342437831227, "grad_norm": 0.673237144947052, "learning_rate": 0.000985349571952711, "loss": 3.593, "step": 1725 }, { "epoch": 0.11754314444897404, "grad_norm": 0.6461753249168396, "learning_rate": 0.0009853071069438782, "loss": 3.6515, "step": 1730 }, { "epoch": 0.11788286451963582, "grad_norm": 0.9219866991043091, "learning_rate": 0.0009852646419350455, "loss": 3.6874, "step": 1735 }, { "epoch": 0.1182225845902976, "grad_norm": 0.6653991937637329, "learning_rate": 0.0009852221769262128, "loss": 3.8494, "step": 1740 }, { "epoch": 0.11856230466095936, "grad_norm": 0.6028766632080078, "learning_rate": 0.0009851797119173802, "loss": 3.9718, "step": 1745 }, { "epoch": 0.11890202473162115, "grad_norm": 0.5921105146408081, "learning_rate": 0.0009851372469085473, "loss": 3.8311, "step": 1750 }, { "epoch": 0.11924174480228292, "grad_norm": 0.6015687584877014, "learning_rate": 0.0009850947818997146, "loss": 3.8485, "step": 1755 }, { "epoch": 0.11958146487294469, "grad_norm": 0.5676931738853455, "learning_rate": 0.000985052316890882, "loss": 3.9177, "step": 1760 }, { "epoch": 0.11992118494360647, "grad_norm": 0.6627026200294495, "learning_rate": 0.000985009851882049, "loss": 3.84, "step": 1765 }, { "epoch": 0.12026090501426824, "grad_norm": 0.6077857613563538, "learning_rate": 0.0009849673868732166, "loss": 3.8357, "step": 1770 }, { "epoch": 0.12060062508493002, "grad_norm": 0.7264236807823181, "learning_rate": 0.0009849249218643837, "loss": 3.749, "step": 1775 }, { "epoch": 0.1209403451555918, "grad_norm": 0.5790697336196899, "learning_rate": 0.000984882456855551, "loss": 3.8881, "step": 1780 }, { "epoch": 0.12128006522625356, "grad_norm": 0.6634170413017273, "learning_rate": 0.0009848399918467184, "loss": 3.9171, "step": 1785 }, { "epoch": 0.12161978529691535, "grad_norm": 0.7707322835922241, "learning_rate": 0.0009847975268378855, "loss": 3.8622, "step": 1790 }, { "epoch": 0.12195950536757712, "grad_norm": 0.6475215554237366, "learning_rate": 0.0009847550618290529, "loss": 3.6067, "step": 1795 }, { "epoch": 0.12229922543823889, "grad_norm": 0.6176405549049377, "learning_rate": 0.0009847125968202202, "loss": 3.6783, "step": 1800 }, { "epoch": 0.12263894550890067, "grad_norm": 0.6158005595207214, "learning_rate": 0.0009846701318113875, "loss": 3.7764, "step": 1805 }, { "epoch": 0.12297866557956244, "grad_norm": 0.5201879739761353, "learning_rate": 0.0009846276668025546, "loss": 3.6984, "step": 1810 }, { "epoch": 0.12331838565022421, "grad_norm": 0.551987886428833, "learning_rate": 0.000984585201793722, "loss": 3.7062, "step": 1815 }, { "epoch": 0.123658105720886, "grad_norm": 0.6607746481895447, "learning_rate": 0.0009845427367848893, "loss": 3.7366, "step": 1820 }, { "epoch": 0.12399782579154776, "grad_norm": 0.963157057762146, "learning_rate": 0.0009845002717760564, "loss": 3.6742, "step": 1825 }, { "epoch": 0.12433754586220953, "grad_norm": 0.5994268655776978, "learning_rate": 0.0009844578067672238, "loss": 3.846, "step": 1830 }, { "epoch": 0.12467726593287132, "grad_norm": 0.6295331120491028, "learning_rate": 0.000984415341758391, "loss": 3.7987, "step": 1835 }, { "epoch": 0.1250169860035331, "grad_norm": 0.6552512645721436, "learning_rate": 0.0009843728767495584, "loss": 3.8192, "step": 1840 }, { "epoch": 0.12535670607419486, "grad_norm": 0.8361065983772278, "learning_rate": 0.0009843304117407258, "loss": 3.7101, "step": 1845 }, { "epoch": 0.12569642614485663, "grad_norm": 1.1641591787338257, "learning_rate": 0.0009842879467318929, "loss": 3.8828, "step": 1850 }, { "epoch": 0.12603614621551842, "grad_norm": 0.6541483402252197, "learning_rate": 0.0009842454817230602, "loss": 3.8318, "step": 1855 }, { "epoch": 0.1263758662861802, "grad_norm": 0.6286070346832275, "learning_rate": 0.0009842030167142276, "loss": 3.7528, "step": 1860 }, { "epoch": 0.12671558635684196, "grad_norm": 0.6533440947532654, "learning_rate": 0.0009841605517053947, "loss": 3.5678, "step": 1865 }, { "epoch": 0.12705530642750373, "grad_norm": 0.6015164256095886, "learning_rate": 0.000984118086696562, "loss": 3.8394, "step": 1870 }, { "epoch": 0.1273950264981655, "grad_norm": 0.5898406505584717, "learning_rate": 0.0009840756216877293, "loss": 3.6599, "step": 1875 }, { "epoch": 0.12773474656882727, "grad_norm": 0.7372815012931824, "learning_rate": 0.0009840331566788967, "loss": 3.7581, "step": 1880 }, { "epoch": 0.12807446663948907, "grad_norm": 0.5894709825515747, "learning_rate": 0.000983990691670064, "loss": 3.8598, "step": 1885 }, { "epoch": 0.12841418671015084, "grad_norm": 0.7415690422058105, "learning_rate": 0.0009839482266612311, "loss": 3.3445, "step": 1890 }, { "epoch": 0.1287539067808126, "grad_norm": 0.669512152671814, "learning_rate": 0.0009839057616523985, "loss": 3.8245, "step": 1895 }, { "epoch": 0.12909362685147438, "grad_norm": 0.6851697564125061, "learning_rate": 0.0009838632966435658, "loss": 3.6915, "step": 1900 }, { "epoch": 0.12943334692213615, "grad_norm": 0.6504551768302917, "learning_rate": 0.000983820831634733, "loss": 3.5653, "step": 1905 }, { "epoch": 0.12977306699279795, "grad_norm": 0.6741543412208557, "learning_rate": 0.0009837783666259003, "loss": 3.844, "step": 1910 }, { "epoch": 0.13011278706345972, "grad_norm": 0.6617727875709534, "learning_rate": 0.0009837359016170676, "loss": 3.9249, "step": 1915 }, { "epoch": 0.1304525071341215, "grad_norm": 0.6049027442932129, "learning_rate": 0.000983693436608235, "loss": 3.4405, "step": 1920 }, { "epoch": 0.13079222720478326, "grad_norm": 0.6029608845710754, "learning_rate": 0.000983650971599402, "loss": 3.823, "step": 1925 }, { "epoch": 0.13113194727544503, "grad_norm": 0.6659353375434875, "learning_rate": 0.0009836085065905694, "loss": 3.7454, "step": 1930 }, { "epoch": 0.1314716673461068, "grad_norm": 0.8009145855903625, "learning_rate": 0.0009835660415817367, "loss": 3.7289, "step": 1935 }, { "epoch": 0.1318113874167686, "grad_norm": 0.5986393094062805, "learning_rate": 0.0009835235765729038, "loss": 3.3837, "step": 1940 }, { "epoch": 0.13215110748743036, "grad_norm": 0.8296811580657959, "learning_rate": 0.0009834811115640714, "loss": 3.6203, "step": 1945 }, { "epoch": 0.13249082755809213, "grad_norm": 0.5641279816627502, "learning_rate": 0.0009834386465552385, "loss": 3.8938, "step": 1950 }, { "epoch": 0.1328305476287539, "grad_norm": 0.6389486789703369, "learning_rate": 0.0009833961815464058, "loss": 3.7309, "step": 1955 }, { "epoch": 0.13317026769941567, "grad_norm": 0.6988972425460815, "learning_rate": 0.0009833537165375732, "loss": 3.7553, "step": 1960 }, { "epoch": 0.13350998777007744, "grad_norm": 2.3557167053222656, "learning_rate": 0.0009833112515287403, "loss": 3.4593, "step": 1965 }, { "epoch": 0.13384970784073924, "grad_norm": 0.8683882355690002, "learning_rate": 0.0009832687865199076, "loss": 3.6475, "step": 1970 }, { "epoch": 0.134189427911401, "grad_norm": 0.6037503480911255, "learning_rate": 0.000983226321511075, "loss": 3.9733, "step": 1975 }, { "epoch": 0.13452914798206278, "grad_norm": 0.6086533069610596, "learning_rate": 0.0009831838565022423, "loss": 3.6101, "step": 1980 }, { "epoch": 0.13486886805272455, "grad_norm": 0.5632864236831665, "learning_rate": 0.0009831413914934094, "loss": 3.7772, "step": 1985 }, { "epoch": 0.13520858812338632, "grad_norm": 0.621701717376709, "learning_rate": 0.0009830989264845767, "loss": 3.7126, "step": 1990 }, { "epoch": 0.13554830819404812, "grad_norm": 0.6236476302146912, "learning_rate": 0.000983056461475744, "loss": 3.8092, "step": 1995 }, { "epoch": 0.1358880282647099, "grad_norm": 0.6200979351997375, "learning_rate": 0.0009830139964669112, "loss": 3.7179, "step": 2000 }, { "epoch": 0.13622774833537166, "grad_norm": 0.778945803642273, "learning_rate": 0.0009829715314580785, "loss": 3.6423, "step": 2005 }, { "epoch": 0.13656746840603343, "grad_norm": 0.583135724067688, "learning_rate": 0.0009829290664492459, "loss": 4.018, "step": 2010 }, { "epoch": 0.1369071884766952, "grad_norm": 0.48970645666122437, "learning_rate": 0.0009828866014404132, "loss": 3.799, "step": 2015 }, { "epoch": 0.13724690854735697, "grad_norm": 0.7138967514038086, "learning_rate": 0.0009828441364315805, "loss": 3.9335, "step": 2020 }, { "epoch": 0.13758662861801876, "grad_norm": 0.6615206599235535, "learning_rate": 0.0009828016714227476, "loss": 3.645, "step": 2025 }, { "epoch": 0.13792634868868053, "grad_norm": 0.7684316635131836, "learning_rate": 0.000982759206413915, "loss": 3.7033, "step": 2030 }, { "epoch": 0.1382660687593423, "grad_norm": 0.6158739924430847, "learning_rate": 0.0009827167414050823, "loss": 3.9084, "step": 2035 }, { "epoch": 0.13860578883000407, "grad_norm": 0.5423030257225037, "learning_rate": 0.0009826742763962494, "loss": 3.7914, "step": 2040 }, { "epoch": 0.13894550890066584, "grad_norm": 0.501213550567627, "learning_rate": 0.0009826318113874168, "loss": 3.9085, "step": 2045 }, { "epoch": 0.1392852289713276, "grad_norm": 0.6407187581062317, "learning_rate": 0.000982589346378584, "loss": 3.7081, "step": 2050 }, { "epoch": 0.1396249490419894, "grad_norm": 0.6124802231788635, "learning_rate": 0.0009825468813697514, "loss": 3.7414, "step": 2055 }, { "epoch": 0.13996466911265118, "grad_norm": 0.543357789516449, "learning_rate": 0.0009825044163609185, "loss": 3.8518, "step": 2060 }, { "epoch": 0.14030438918331295, "grad_norm": 0.6752829551696777, "learning_rate": 0.0009824619513520859, "loss": 3.9744, "step": 2065 }, { "epoch": 0.14064410925397472, "grad_norm": 0.6038799285888672, "learning_rate": 0.0009824194863432532, "loss": 3.6754, "step": 2070 }, { "epoch": 0.1409838293246365, "grad_norm": 0.717085599899292, "learning_rate": 0.0009823770213344203, "loss": 3.5664, "step": 2075 }, { "epoch": 0.1413235493952983, "grad_norm": 0.4654978811740875, "learning_rate": 0.0009823345563255877, "loss": 3.8664, "step": 2080 }, { "epoch": 0.14166326946596006, "grad_norm": 0.5906335711479187, "learning_rate": 0.000982292091316755, "loss": 3.893, "step": 2085 }, { "epoch": 0.14200298953662183, "grad_norm": 0.5542263388633728, "learning_rate": 0.0009822496263079223, "loss": 3.8011, "step": 2090 }, { "epoch": 0.1423427096072836, "grad_norm": 0.6158429980278015, "learning_rate": 0.0009822071612990897, "loss": 3.8606, "step": 2095 }, { "epoch": 0.14268242967794537, "grad_norm": 0.5313276052474976, "learning_rate": 0.0009821646962902568, "loss": 3.6783, "step": 2100 }, { "epoch": 0.14302214974860714, "grad_norm": 0.6322357654571533, "learning_rate": 0.0009821222312814241, "loss": 3.8702, "step": 2105 }, { "epoch": 0.14336186981926893, "grad_norm": 0.5938016176223755, "learning_rate": 0.0009820797662725915, "loss": 3.8795, "step": 2110 }, { "epoch": 0.1437015898899307, "grad_norm": 0.7242531776428223, "learning_rate": 0.0009820373012637586, "loss": 3.4051, "step": 2115 }, { "epoch": 0.14404130996059247, "grad_norm": 0.5325323939323425, "learning_rate": 0.000981994836254926, "loss": 3.9024, "step": 2120 }, { "epoch": 0.14438103003125424, "grad_norm": 0.6194315552711487, "learning_rate": 0.0009819523712460932, "loss": 3.7453, "step": 2125 }, { "epoch": 0.144720750101916, "grad_norm": 0.7634878754615784, "learning_rate": 0.0009819099062372606, "loss": 3.7032, "step": 2130 }, { "epoch": 0.14506047017257778, "grad_norm": 0.8421163558959961, "learning_rate": 0.0009818674412284277, "loss": 3.7392, "step": 2135 }, { "epoch": 0.14540019024323958, "grad_norm": 0.5848629474639893, "learning_rate": 0.000981824976219595, "loss": 3.7481, "step": 2140 }, { "epoch": 0.14573991031390135, "grad_norm": 0.640381395816803, "learning_rate": 0.0009817825112107624, "loss": 3.8533, "step": 2145 }, { "epoch": 0.14607963038456312, "grad_norm": 2.21744441986084, "learning_rate": 0.0009817400462019295, "loss": 3.627, "step": 2150 }, { "epoch": 0.1464193504552249, "grad_norm": 0.7434030771255493, "learning_rate": 0.000981697581193097, "loss": 4.1559, "step": 2155 }, { "epoch": 0.14675907052588666, "grad_norm": 0.6058130860328674, "learning_rate": 0.0009816551161842641, "loss": 3.7676, "step": 2160 }, { "epoch": 0.14709879059654846, "grad_norm": 0.7785553932189941, "learning_rate": 0.0009816126511754315, "loss": 3.7069, "step": 2165 }, { "epoch": 0.14743851066721023, "grad_norm": 0.664575457572937, "learning_rate": 0.0009815701861665988, "loss": 3.4932, "step": 2170 }, { "epoch": 0.147778230737872, "grad_norm": 0.6050861477851868, "learning_rate": 0.000981527721157766, "loss": 3.9172, "step": 2175 }, { "epoch": 0.14811795080853377, "grad_norm": 0.6301407217979431, "learning_rate": 0.0009814852561489333, "loss": 3.6907, "step": 2180 }, { "epoch": 0.14845767087919554, "grad_norm": 0.6113725304603577, "learning_rate": 0.0009814427911401006, "loss": 3.7832, "step": 2185 }, { "epoch": 0.1487973909498573, "grad_norm": 0.6586499214172363, "learning_rate": 0.000981400326131268, "loss": 3.7312, "step": 2190 }, { "epoch": 0.1491371110205191, "grad_norm": 0.6039511561393738, "learning_rate": 0.000981357861122435, "loss": 3.7128, "step": 2195 }, { "epoch": 0.14947683109118087, "grad_norm": 0.6801586747169495, "learning_rate": 0.0009813153961136024, "loss": 3.7563, "step": 2200 }, { "epoch": 0.14981655116184264, "grad_norm": 0.5684835314750671, "learning_rate": 0.0009812729311047697, "loss": 3.7606, "step": 2205 }, { "epoch": 0.1501562712325044, "grad_norm": 0.5985090732574463, "learning_rate": 0.0009812304660959368, "loss": 3.8605, "step": 2210 }, { "epoch": 0.15049599130316618, "grad_norm": 0.6146273016929626, "learning_rate": 0.0009811880010871042, "loss": 3.735, "step": 2215 }, { "epoch": 0.15083571137382795, "grad_norm": 0.6372639536857605, "learning_rate": 0.0009811455360782715, "loss": 3.8889, "step": 2220 }, { "epoch": 0.15117543144448975, "grad_norm": 0.6332539916038513, "learning_rate": 0.0009811030710694388, "loss": 3.9223, "step": 2225 }, { "epoch": 0.15151515151515152, "grad_norm": 0.6205673217773438, "learning_rate": 0.0009810606060606062, "loss": 3.4679, "step": 2230 }, { "epoch": 0.1518548715858133, "grad_norm": 0.5693390965461731, "learning_rate": 0.0009810181410517733, "loss": 3.8416, "step": 2235 }, { "epoch": 0.15219459165647506, "grad_norm": 0.5709364414215088, "learning_rate": 0.0009809756760429406, "loss": 3.8709, "step": 2240 }, { "epoch": 0.15253431172713683, "grad_norm": 0.47355231642723083, "learning_rate": 0.000980933211034108, "loss": 3.8758, "step": 2245 }, { "epoch": 0.15287403179779863, "grad_norm": 0.7836024165153503, "learning_rate": 0.000980890746025275, "loss": 3.576, "step": 2250 }, { "epoch": 0.1532137518684604, "grad_norm": 0.6559258699417114, "learning_rate": 0.0009808482810164426, "loss": 3.8208, "step": 2255 }, { "epoch": 0.15355347193912217, "grad_norm": 0.7773926854133606, "learning_rate": 0.0009808058160076097, "loss": 3.8461, "step": 2260 }, { "epoch": 0.15389319200978394, "grad_norm": 0.6133778691291809, "learning_rate": 0.000980763350998777, "loss": 3.767, "step": 2265 }, { "epoch": 0.1542329120804457, "grad_norm": 0.6255161166191101, "learning_rate": 0.0009807208859899444, "loss": 3.8351, "step": 2270 }, { "epoch": 0.15457263215110748, "grad_norm": 0.9302341341972351, "learning_rate": 0.0009806784209811115, "loss": 3.7139, "step": 2275 }, { "epoch": 0.15491235222176927, "grad_norm": 0.7181994915008545, "learning_rate": 0.0009806359559722789, "loss": 3.6355, "step": 2280 }, { "epoch": 0.15525207229243104, "grad_norm": 0.6309781670570374, "learning_rate": 0.0009805934909634462, "loss": 3.6095, "step": 2285 }, { "epoch": 0.1555917923630928, "grad_norm": 0.702839195728302, "learning_rate": 0.0009805510259546135, "loss": 3.6753, "step": 2290 }, { "epoch": 0.15593151243375458, "grad_norm": 0.677828848361969, "learning_rate": 0.0009805085609457807, "loss": 3.5938, "step": 2295 }, { "epoch": 0.15627123250441635, "grad_norm": 1.157195806503296, "learning_rate": 0.000980466095936948, "loss": 3.7628, "step": 2300 }, { "epoch": 0.15661095257507815, "grad_norm": 0.6162868142127991, "learning_rate": 0.0009804236309281153, "loss": 3.7932, "step": 2305 }, { "epoch": 0.15695067264573992, "grad_norm": 0.6598204970359802, "learning_rate": 0.0009803811659192824, "loss": 3.6078, "step": 2310 }, { "epoch": 0.1572903927164017, "grad_norm": 0.5610998272895813, "learning_rate": 0.0009803387009104498, "loss": 3.8062, "step": 2315 }, { "epoch": 0.15763011278706346, "grad_norm": 0.6655665636062622, "learning_rate": 0.0009802962359016171, "loss": 3.7769, "step": 2320 }, { "epoch": 0.15796983285772523, "grad_norm": 0.6042912006378174, "learning_rate": 0.0009802537708927844, "loss": 3.9204, "step": 2325 }, { "epoch": 0.158309552928387, "grad_norm": 0.5904920697212219, "learning_rate": 0.0009802113058839518, "loss": 3.6929, "step": 2330 }, { "epoch": 0.1586492729990488, "grad_norm": 0.5653850436210632, "learning_rate": 0.000980168840875119, "loss": 3.7477, "step": 2335 }, { "epoch": 0.15898899306971057, "grad_norm": 0.5770355463027954, "learning_rate": 0.0009801263758662862, "loss": 3.7471, "step": 2340 }, { "epoch": 0.15932871314037234, "grad_norm": 0.6086682081222534, "learning_rate": 0.0009800839108574536, "loss": 3.8202, "step": 2345 }, { "epoch": 0.1596684332110341, "grad_norm": 0.6483132839202881, "learning_rate": 0.0009800414458486207, "loss": 3.9549, "step": 2350 }, { "epoch": 0.16000815328169588, "grad_norm": 0.5497443675994873, "learning_rate": 0.000979998980839788, "loss": 3.7957, "step": 2355 }, { "epoch": 0.16034787335235764, "grad_norm": 0.6367260813713074, "learning_rate": 0.0009799565158309553, "loss": 3.8473, "step": 2360 }, { "epoch": 0.16068759342301944, "grad_norm": 0.6136810183525085, "learning_rate": 0.0009799140508221227, "loss": 3.5634, "step": 2365 }, { "epoch": 0.1610273134936812, "grad_norm": 0.8666849136352539, "learning_rate": 0.0009798715858132898, "loss": 3.7025, "step": 2370 }, { "epoch": 0.16136703356434298, "grad_norm": 0.6140803694725037, "learning_rate": 0.0009798291208044571, "loss": 3.7509, "step": 2375 }, { "epoch": 0.16170675363500475, "grad_norm": 0.8208211660385132, "learning_rate": 0.0009797866557956245, "loss": 3.6264, "step": 2380 }, { "epoch": 0.16204647370566652, "grad_norm": 0.6705807447433472, "learning_rate": 0.0009797441907867916, "loss": 3.7361, "step": 2385 }, { "epoch": 0.16238619377632832, "grad_norm": 0.7236992120742798, "learning_rate": 0.000979701725777959, "loss": 3.8665, "step": 2390 }, { "epoch": 0.1627259138469901, "grad_norm": 1.1335411071777344, "learning_rate": 0.0009796592607691263, "loss": 3.5574, "step": 2395 }, { "epoch": 0.16306563391765186, "grad_norm": 0.565281331539154, "learning_rate": 0.0009796167957602936, "loss": 3.7638, "step": 2400 }, { "epoch": 0.16340535398831363, "grad_norm": 0.5212329626083374, "learning_rate": 0.000979574330751461, "loss": 3.5795, "step": 2405 }, { "epoch": 0.1637450740589754, "grad_norm": 0.5030485391616821, "learning_rate": 0.000979531865742628, "loss": 3.8794, "step": 2410 }, { "epoch": 0.16408479412963717, "grad_norm": 0.609352171421051, "learning_rate": 0.0009794894007337954, "loss": 3.9417, "step": 2415 }, { "epoch": 0.16442451420029897, "grad_norm": 0.7634018659591675, "learning_rate": 0.0009794469357249627, "loss": 3.8014, "step": 2420 }, { "epoch": 0.16476423427096074, "grad_norm": 0.7297620177268982, "learning_rate": 0.0009794044707161298, "loss": 3.733, "step": 2425 }, { "epoch": 0.1651039543416225, "grad_norm": 0.6963150501251221, "learning_rate": 0.0009793620057072972, "loss": 3.7993, "step": 2430 }, { "epoch": 0.16544367441228428, "grad_norm": 0.6455408930778503, "learning_rate": 0.0009793195406984645, "loss": 3.8651, "step": 2435 }, { "epoch": 0.16578339448294604, "grad_norm": 0.587772011756897, "learning_rate": 0.0009792770756896318, "loss": 3.7844, "step": 2440 }, { "epoch": 0.16612311455360781, "grad_norm": 1.017780065536499, "learning_rate": 0.000979234610680799, "loss": 3.8287, "step": 2445 }, { "epoch": 0.1664628346242696, "grad_norm": 0.5755481719970703, "learning_rate": 0.0009791921456719663, "loss": 3.6867, "step": 2450 }, { "epoch": 0.16680255469493138, "grad_norm": 0.8737612366676331, "learning_rate": 0.0009791496806631336, "loss": 3.7362, "step": 2455 }, { "epoch": 0.16714227476559315, "grad_norm": 0.6732498407363892, "learning_rate": 0.0009791072156543007, "loss": 3.8185, "step": 2460 }, { "epoch": 0.16748199483625492, "grad_norm": 0.6353683471679688, "learning_rate": 0.0009790647506454683, "loss": 3.6812, "step": 2465 }, { "epoch": 0.1678217149069167, "grad_norm": 0.7803769111633301, "learning_rate": 0.0009790222856366354, "loss": 3.8419, "step": 2470 }, { "epoch": 0.1681614349775785, "grad_norm": 0.8309979438781738, "learning_rate": 0.0009789798206278027, "loss": 3.8639, "step": 2475 }, { "epoch": 0.16850115504824026, "grad_norm": 0.6595731973648071, "learning_rate": 0.00097893735561897, "loss": 3.7646, "step": 2480 }, { "epoch": 0.16884087511890203, "grad_norm": 0.664367139339447, "learning_rate": 0.0009788948906101372, "loss": 3.8229, "step": 2485 }, { "epoch": 0.1691805951895638, "grad_norm": 0.482677161693573, "learning_rate": 0.0009788524256013045, "loss": 3.8088, "step": 2490 }, { "epoch": 0.16952031526022557, "grad_norm": 0.691443681716919, "learning_rate": 0.0009788099605924719, "loss": 3.7539, "step": 2495 }, { "epoch": 0.16986003533088734, "grad_norm": 0.5566770434379578, "learning_rate": 0.0009787674955836392, "loss": 3.8654, "step": 2500 }, { "epoch": 0.17019975540154914, "grad_norm": 0.6081881523132324, "learning_rate": 0.0009787250305748063, "loss": 3.9523, "step": 2505 }, { "epoch": 0.1705394754722109, "grad_norm": 0.5837857723236084, "learning_rate": 0.0009786825655659736, "loss": 3.9644, "step": 2510 }, { "epoch": 0.17087919554287267, "grad_norm": 0.5104924440383911, "learning_rate": 0.000978640100557141, "loss": 3.7676, "step": 2515 }, { "epoch": 0.17121891561353444, "grad_norm": 0.511321485042572, "learning_rate": 0.000978597635548308, "loss": 3.9301, "step": 2520 }, { "epoch": 0.17155863568419621, "grad_norm": 0.5427649021148682, "learning_rate": 0.0009785551705394754, "loss": 3.8802, "step": 2525 }, { "epoch": 0.17189835575485798, "grad_norm": 0.6896429657936096, "learning_rate": 0.0009785127055306428, "loss": 3.7196, "step": 2530 }, { "epoch": 0.17223807582551978, "grad_norm": 0.7047936320304871, "learning_rate": 0.00097847024052181, "loss": 3.9621, "step": 2535 }, { "epoch": 0.17257779589618155, "grad_norm": 0.6143273115158081, "learning_rate": 0.0009784277755129774, "loss": 3.6378, "step": 2540 }, { "epoch": 0.17291751596684332, "grad_norm": 0.6251726746559143, "learning_rate": 0.0009783853105041446, "loss": 3.7208, "step": 2545 }, { "epoch": 0.1732572360375051, "grad_norm": 0.5483553409576416, "learning_rate": 0.0009783428454953119, "loss": 3.7922, "step": 2550 }, { "epoch": 0.17359695610816686, "grad_norm": 0.6072860360145569, "learning_rate": 0.0009783003804864792, "loss": 3.7269, "step": 2555 }, { "epoch": 0.17393667617882866, "grad_norm": 0.5628990530967712, "learning_rate": 0.0009782579154776463, "loss": 3.5317, "step": 2560 }, { "epoch": 0.17427639624949043, "grad_norm": 0.8806532621383667, "learning_rate": 0.0009782154504688137, "loss": 3.6866, "step": 2565 }, { "epoch": 0.1746161163201522, "grad_norm": 0.7904943227767944, "learning_rate": 0.000978172985459981, "loss": 3.8671, "step": 2570 }, { "epoch": 0.17495583639081397, "grad_norm": 0.9674472808837891, "learning_rate": 0.0009781305204511483, "loss": 3.9379, "step": 2575 }, { "epoch": 0.17529555646147574, "grad_norm": 0.6988072991371155, "learning_rate": 0.0009780880554423157, "loss": 3.9829, "step": 2580 }, { "epoch": 0.1756352765321375, "grad_norm": 0.5407254099845886, "learning_rate": 0.0009780455904334828, "loss": 3.6546, "step": 2585 }, { "epoch": 0.1759749966027993, "grad_norm": 0.5559465885162354, "learning_rate": 0.0009780031254246501, "loss": 3.6391, "step": 2590 }, { "epoch": 0.17631471667346107, "grad_norm": 0.8155473470687866, "learning_rate": 0.0009779606604158175, "loss": 3.7127, "step": 2595 }, { "epoch": 0.17665443674412284, "grad_norm": 0.7560978531837463, "learning_rate": 0.0009779181954069846, "loss": 3.5354, "step": 2600 }, { "epoch": 0.17699415681478461, "grad_norm": 1.3758387565612793, "learning_rate": 0.000977875730398152, "loss": 4.0057, "step": 2605 }, { "epoch": 0.17733387688544638, "grad_norm": 0.6253563761711121, "learning_rate": 0.0009778332653893192, "loss": 3.7131, "step": 2610 }, { "epoch": 0.17767359695610815, "grad_norm": 0.5841245055198669, "learning_rate": 0.0009777908003804866, "loss": 3.4909, "step": 2615 }, { "epoch": 0.17801331702676995, "grad_norm": 0.7301042079925537, "learning_rate": 0.0009777483353716537, "loss": 3.7663, "step": 2620 }, { "epoch": 0.17835303709743172, "grad_norm": 0.6407013535499573, "learning_rate": 0.000977705870362821, "loss": 3.6259, "step": 2625 }, { "epoch": 0.1786927571680935, "grad_norm": 0.6893696784973145, "learning_rate": 0.0009776634053539884, "loss": 3.633, "step": 2630 }, { "epoch": 0.17903247723875526, "grad_norm": 0.5852348208427429, "learning_rate": 0.0009776209403451555, "loss": 3.791, "step": 2635 }, { "epoch": 0.17937219730941703, "grad_norm": 0.5743594765663147, "learning_rate": 0.000977578475336323, "loss": 3.8689, "step": 2640 }, { "epoch": 0.17971191738007883, "grad_norm": 0.5142814517021179, "learning_rate": 0.0009775360103274902, "loss": 3.7479, "step": 2645 }, { "epoch": 0.1800516374507406, "grad_norm": 0.6178168654441833, "learning_rate": 0.0009774935453186575, "loss": 3.9161, "step": 2650 }, { "epoch": 0.18039135752140237, "grad_norm": 0.6452828645706177, "learning_rate": 0.0009774510803098248, "loss": 3.8226, "step": 2655 }, { "epoch": 0.18073107759206414, "grad_norm": 0.5602723360061646, "learning_rate": 0.000977408615300992, "loss": 3.6176, "step": 2660 }, { "epoch": 0.1810707976627259, "grad_norm": 0.6433743834495544, "learning_rate": 0.0009773661502921593, "loss": 3.6957, "step": 2665 }, { "epoch": 0.18141051773338768, "grad_norm": 0.7300789952278137, "learning_rate": 0.0009773236852833266, "loss": 3.7096, "step": 2670 }, { "epoch": 0.18175023780404947, "grad_norm": 1.0117199420928955, "learning_rate": 0.000977281220274494, "loss": 3.6927, "step": 2675 }, { "epoch": 0.18208995787471124, "grad_norm": 0.7773440480232239, "learning_rate": 0.000977238755265661, "loss": 3.8849, "step": 2680 }, { "epoch": 0.18242967794537301, "grad_norm": 0.48008543252944946, "learning_rate": 0.0009771962902568284, "loss": 3.7642, "step": 2685 }, { "epoch": 0.18276939801603478, "grad_norm": 0.5967749953269958, "learning_rate": 0.0009771538252479957, "loss": 3.7564, "step": 2690 }, { "epoch": 0.18310911808669655, "grad_norm": 0.6126387715339661, "learning_rate": 0.0009771113602391628, "loss": 3.3185, "step": 2695 }, { "epoch": 0.18344883815735832, "grad_norm": 0.6584519147872925, "learning_rate": 0.0009770688952303302, "loss": 3.8923, "step": 2700 }, { "epoch": 0.18378855822802012, "grad_norm": 0.6974896192550659, "learning_rate": 0.0009770264302214975, "loss": 3.6187, "step": 2705 }, { "epoch": 0.1841282782986819, "grad_norm": 0.6637903451919556, "learning_rate": 0.0009769839652126648, "loss": 3.677, "step": 2710 }, { "epoch": 0.18446799836934366, "grad_norm": 0.9200413227081299, "learning_rate": 0.0009769415002038322, "loss": 3.7962, "step": 2715 }, { "epoch": 0.18480771844000543, "grad_norm": 0.69618159532547, "learning_rate": 0.0009768990351949993, "loss": 3.7602, "step": 2720 }, { "epoch": 0.1851474385106672, "grad_norm": 0.6195826530456543, "learning_rate": 0.0009768565701861666, "loss": 3.8316, "step": 2725 }, { "epoch": 0.185487158581329, "grad_norm": 0.6163364052772522, "learning_rate": 0.000976814105177334, "loss": 3.9855, "step": 2730 }, { "epoch": 0.18582687865199077, "grad_norm": 0.6414899230003357, "learning_rate": 0.000976771640168501, "loss": 3.8684, "step": 2735 }, { "epoch": 0.18616659872265254, "grad_norm": 0.5263423323631287, "learning_rate": 0.0009767291751596684, "loss": 3.5558, "step": 2740 }, { "epoch": 0.1865063187933143, "grad_norm": 0.49367937445640564, "learning_rate": 0.0009766867101508358, "loss": 3.6349, "step": 2745 }, { "epoch": 0.18684603886397608, "grad_norm": 0.691084623336792, "learning_rate": 0.000976644245142003, "loss": 3.8187, "step": 2750 }, { "epoch": 0.18718575893463785, "grad_norm": 0.6548663377761841, "learning_rate": 0.0009766017801331702, "loss": 4.0068, "step": 2755 }, { "epoch": 0.18752547900529964, "grad_norm": 0.6146909594535828, "learning_rate": 0.0009765593151243376, "loss": 3.7553, "step": 2760 }, { "epoch": 0.18786519907596141, "grad_norm": 0.5466817617416382, "learning_rate": 0.0009765168501155049, "loss": 3.8668, "step": 2765 }, { "epoch": 0.18820491914662318, "grad_norm": 0.4874761402606964, "learning_rate": 0.0009764743851066721, "loss": 3.8772, "step": 2770 }, { "epoch": 0.18854463921728495, "grad_norm": 0.6687860488891602, "learning_rate": 0.0009764319200978394, "loss": 3.9953, "step": 2775 }, { "epoch": 0.18888435928794672, "grad_norm": 0.5901328325271606, "learning_rate": 0.0009763894550890067, "loss": 3.8041, "step": 2780 }, { "epoch": 0.1892240793586085, "grad_norm": 0.5916528701782227, "learning_rate": 0.0009763469900801739, "loss": 3.428, "step": 2785 }, { "epoch": 0.1895637994292703, "grad_norm": 0.6788733601570129, "learning_rate": 0.0009763045250713412, "loss": 3.7871, "step": 2790 }, { "epoch": 0.18990351949993206, "grad_norm": 0.6555401086807251, "learning_rate": 0.0009762620600625086, "loss": 3.5644, "step": 2795 }, { "epoch": 0.19024323957059383, "grad_norm": 0.7433831691741943, "learning_rate": 0.0009762195950536758, "loss": 3.718, "step": 2800 }, { "epoch": 0.1905829596412556, "grad_norm": 0.7285386919975281, "learning_rate": 0.0009761771300448431, "loss": 3.7351, "step": 2805 }, { "epoch": 0.19092267971191737, "grad_norm": 0.5669515132904053, "learning_rate": 0.0009761346650360103, "loss": 3.2801, "step": 2810 }, { "epoch": 0.19126239978257917, "grad_norm": 0.8962259888648987, "learning_rate": 0.0009760922000271776, "loss": 3.7908, "step": 2815 }, { "epoch": 0.19160211985324094, "grad_norm": 0.6690675020217896, "learning_rate": 0.0009760497350183449, "loss": 3.8255, "step": 2820 }, { "epoch": 0.1919418399239027, "grad_norm": 0.5441779494285583, "learning_rate": 0.0009760072700095121, "loss": 3.6706, "step": 2825 }, { "epoch": 0.19228155999456448, "grad_norm": 0.5559229850769043, "learning_rate": 0.0009759648050006795, "loss": 3.8617, "step": 2830 }, { "epoch": 0.19262128006522625, "grad_norm": 0.73357093334198, "learning_rate": 0.0009759223399918468, "loss": 3.7379, "step": 2835 }, { "epoch": 0.19296100013588802, "grad_norm": 0.7806567549705505, "learning_rate": 0.000975879874983014, "loss": 3.7481, "step": 2840 }, { "epoch": 0.19330072020654981, "grad_norm": 0.4998747706413269, "learning_rate": 0.0009758374099741812, "loss": 3.8569, "step": 2845 }, { "epoch": 0.19364044027721158, "grad_norm": 0.5296024680137634, "learning_rate": 0.0009757949449653486, "loss": 3.7117, "step": 2850 }, { "epoch": 0.19398016034787335, "grad_norm": 0.867391049861908, "learning_rate": 0.0009757524799565158, "loss": 3.8734, "step": 2855 }, { "epoch": 0.19431988041853512, "grad_norm": 0.6658104062080383, "learning_rate": 0.000975710014947683, "loss": 3.6793, "step": 2860 }, { "epoch": 0.1946596004891969, "grad_norm": 0.8781140446662903, "learning_rate": 0.0009756675499388505, "loss": 3.7553, "step": 2865 }, { "epoch": 0.19499932055985866, "grad_norm": 0.849490761756897, "learning_rate": 0.0009756250849300177, "loss": 3.5455, "step": 2870 }, { "epoch": 0.19533904063052046, "grad_norm": 0.8140732645988464, "learning_rate": 0.0009755826199211849, "loss": 3.7532, "step": 2875 }, { "epoch": 0.19567876070118223, "grad_norm": 0.6011113524436951, "learning_rate": 0.0009755401549123523, "loss": 3.8898, "step": 2880 }, { "epoch": 0.196018480771844, "grad_norm": 0.6402468085289001, "learning_rate": 0.0009754976899035195, "loss": 3.7352, "step": 2885 }, { "epoch": 0.19635820084250577, "grad_norm": 0.6490626931190491, "learning_rate": 0.0009754552248946867, "loss": 3.4159, "step": 2890 }, { "epoch": 0.19669792091316754, "grad_norm": 0.6855554580688477, "learning_rate": 0.000975412759885854, "loss": 3.5553, "step": 2895 }, { "epoch": 0.19703764098382934, "grad_norm": 0.7514801621437073, "learning_rate": 0.0009753702948770214, "loss": 3.8005, "step": 2900 }, { "epoch": 0.1973773610544911, "grad_norm": 2.0744807720184326, "learning_rate": 0.0009753278298681887, "loss": 3.5033, "step": 2905 }, { "epoch": 0.19771708112515288, "grad_norm": 1.0016511678695679, "learning_rate": 0.0009752853648593559, "loss": 3.6079, "step": 2910 }, { "epoch": 0.19805680119581465, "grad_norm": 0.7487754225730896, "learning_rate": 0.0009752428998505232, "loss": 3.7548, "step": 2915 }, { "epoch": 0.19839652126647642, "grad_norm": 0.6639905571937561, "learning_rate": 0.0009752004348416905, "loss": 3.7385, "step": 2920 }, { "epoch": 0.1987362413371382, "grad_norm": 0.6206386089324951, "learning_rate": 0.0009751579698328577, "loss": 3.7201, "step": 2925 }, { "epoch": 0.19907596140779998, "grad_norm": 0.6712602376937866, "learning_rate": 0.000975115504824025, "loss": 3.5394, "step": 2930 }, { "epoch": 0.19941568147846175, "grad_norm": 0.620564877986908, "learning_rate": 0.0009750730398151924, "loss": 3.8665, "step": 2935 }, { "epoch": 0.19975540154912352, "grad_norm": 1.075077772140503, "learning_rate": 0.0009750305748063596, "loss": 3.5622, "step": 2940 }, { "epoch": 0.2000951216197853, "grad_norm": 0.7147380709648132, "learning_rate": 0.0009749881097975268, "loss": 3.7676, "step": 2945 }, { "epoch": 0.20043484169044706, "grad_norm": 0.7028825283050537, "learning_rate": 0.0009749456447886942, "loss": 3.8291, "step": 2950 }, { "epoch": 0.20077456176110883, "grad_norm": 0.6004649996757507, "learning_rate": 0.0009749031797798614, "loss": 4.0362, "step": 2955 }, { "epoch": 0.20111428183177063, "grad_norm": 0.7003999948501587, "learning_rate": 0.0009748607147710286, "loss": 3.9086, "step": 2960 }, { "epoch": 0.2014540019024324, "grad_norm": 0.483829528093338, "learning_rate": 0.000974818249762196, "loss": 3.5937, "step": 2965 }, { "epoch": 0.20179372197309417, "grad_norm": 0.5830460786819458, "learning_rate": 0.0009747757847533633, "loss": 3.709, "step": 2970 }, { "epoch": 0.20213344204375594, "grad_norm": 0.6097733974456787, "learning_rate": 0.0009747333197445305, "loss": 3.6495, "step": 2975 }, { "epoch": 0.2024731621144177, "grad_norm": 0.6874327659606934, "learning_rate": 0.0009746908547356979, "loss": 3.7219, "step": 2980 }, { "epoch": 0.2028128821850795, "grad_norm": 0.7092781662940979, "learning_rate": 0.0009746483897268651, "loss": 3.6287, "step": 2985 }, { "epoch": 0.20315260225574128, "grad_norm": 0.5750575065612793, "learning_rate": 0.0009746059247180323, "loss": 3.759, "step": 2990 }, { "epoch": 0.20349232232640305, "grad_norm": 0.8717525601387024, "learning_rate": 0.0009745634597091996, "loss": 3.7773, "step": 2995 }, { "epoch": 0.20383204239706482, "grad_norm": 0.5540760159492493, "learning_rate": 0.0009745209947003669, "loss": 3.627, "step": 3000 }, { "epoch": 0.2041717624677266, "grad_norm": 0.9130904674530029, "learning_rate": 0.0009744785296915342, "loss": 3.6937, "step": 3005 }, { "epoch": 0.20451148253838836, "grad_norm": 0.6381457448005676, "learning_rate": 0.0009744360646827015, "loss": 3.8555, "step": 3010 }, { "epoch": 0.20485120260905015, "grad_norm": 0.6486433744430542, "learning_rate": 0.0009743935996738688, "loss": 3.8618, "step": 3015 }, { "epoch": 0.20519092267971192, "grad_norm": 0.6014708280563354, "learning_rate": 0.000974351134665036, "loss": 3.8432, "step": 3020 }, { "epoch": 0.2055306427503737, "grad_norm": 0.5839025378227234, "learning_rate": 0.0009743086696562033, "loss": 3.7629, "step": 3025 }, { "epoch": 0.20587036282103546, "grad_norm": 0.7098844051361084, "learning_rate": 0.0009742662046473706, "loss": 3.75, "step": 3030 }, { "epoch": 0.20621008289169723, "grad_norm": 0.5805578827857971, "learning_rate": 0.0009742237396385378, "loss": 3.6937, "step": 3035 }, { "epoch": 0.206549802962359, "grad_norm": 0.6617678999900818, "learning_rate": 0.0009741812746297052, "loss": 3.7325, "step": 3040 }, { "epoch": 0.2068895230330208, "grad_norm": 0.7151147127151489, "learning_rate": 0.0009741388096208724, "loss": 3.6406, "step": 3045 }, { "epoch": 0.20722924310368257, "grad_norm": 0.588606059551239, "learning_rate": 0.0009740963446120397, "loss": 3.6841, "step": 3050 }, { "epoch": 0.20756896317434434, "grad_norm": 0.6761735081672668, "learning_rate": 0.000974053879603207, "loss": 3.5365, "step": 3055 }, { "epoch": 0.2079086832450061, "grad_norm": 0.8985639810562134, "learning_rate": 0.0009740114145943742, "loss": 3.8136, "step": 3060 }, { "epoch": 0.20824840331566788, "grad_norm": 0.5298454761505127, "learning_rate": 0.0009739689495855415, "loss": 3.5731, "step": 3065 }, { "epoch": 0.20858812338632968, "grad_norm": 0.6606918573379517, "learning_rate": 0.0009739264845767088, "loss": 3.858, "step": 3070 }, { "epoch": 0.20892784345699145, "grad_norm": 0.5564335584640503, "learning_rate": 0.0009738840195678761, "loss": 3.653, "step": 3075 }, { "epoch": 0.20926756352765322, "grad_norm": 0.6696929335594177, "learning_rate": 0.0009738415545590434, "loss": 3.6613, "step": 3080 }, { "epoch": 0.209607283598315, "grad_norm": 2.227457046508789, "learning_rate": 0.0009737990895502107, "loss": 3.9475, "step": 3085 }, { "epoch": 0.20994700366897676, "grad_norm": 0.795903742313385, "learning_rate": 0.0009737566245413779, "loss": 3.7129, "step": 3090 }, { "epoch": 0.21028672373963853, "grad_norm": 0.6205352544784546, "learning_rate": 0.0009737141595325451, "loss": 3.8524, "step": 3095 }, { "epoch": 0.21062644381030032, "grad_norm": 0.6212173104286194, "learning_rate": 0.0009736716945237125, "loss": 3.9522, "step": 3100 }, { "epoch": 0.2109661638809621, "grad_norm": 1.2397836446762085, "learning_rate": 0.0009736292295148797, "loss": 3.6891, "step": 3105 }, { "epoch": 0.21130588395162386, "grad_norm": 0.7002935409545898, "learning_rate": 0.000973586764506047, "loss": 3.9087, "step": 3110 }, { "epoch": 0.21164560402228563, "grad_norm": 0.6223004460334778, "learning_rate": 0.0009735442994972144, "loss": 3.8046, "step": 3115 }, { "epoch": 0.2119853240929474, "grad_norm": 1.711371898651123, "learning_rate": 0.0009735018344883816, "loss": 3.8441, "step": 3120 }, { "epoch": 0.21232504416360917, "grad_norm": 0.7471145391464233, "learning_rate": 0.0009734593694795488, "loss": 3.4484, "step": 3125 }, { "epoch": 0.21266476423427097, "grad_norm": 0.700401246547699, "learning_rate": 0.0009734169044707162, "loss": 3.9248, "step": 3130 }, { "epoch": 0.21300448430493274, "grad_norm": 3.3507747650146484, "learning_rate": 0.0009733744394618834, "loss": 3.7096, "step": 3135 }, { "epoch": 0.2133442043755945, "grad_norm": 0.6101709008216858, "learning_rate": 0.0009733319744530506, "loss": 3.6657, "step": 3140 }, { "epoch": 0.21368392444625628, "grad_norm": 0.6883925795555115, "learning_rate": 0.000973289509444218, "loss": 3.9038, "step": 3145 }, { "epoch": 0.21402364451691805, "grad_norm": 0.6930251121520996, "learning_rate": 0.0009732470444353853, "loss": 3.3827, "step": 3150 }, { "epoch": 0.21436336458757985, "grad_norm": 1.6364690065383911, "learning_rate": 0.0009732045794265525, "loss": 3.7638, "step": 3155 }, { "epoch": 0.21470308465824162, "grad_norm": 0.5960918068885803, "learning_rate": 0.0009731621144177198, "loss": 3.7745, "step": 3160 }, { "epoch": 0.2150428047289034, "grad_norm": 0.48007920384407043, "learning_rate": 0.0009731196494088871, "loss": 3.7569, "step": 3165 }, { "epoch": 0.21538252479956516, "grad_norm": 0.8826533555984497, "learning_rate": 0.0009730771844000543, "loss": 3.5113, "step": 3170 }, { "epoch": 0.21572224487022693, "grad_norm": 0.9135482311248779, "learning_rate": 0.0009730347193912217, "loss": 3.7115, "step": 3175 }, { "epoch": 0.2160619649408887, "grad_norm": 0.5753097534179688, "learning_rate": 0.000972992254382389, "loss": 3.7479, "step": 3180 }, { "epoch": 0.2164016850115505, "grad_norm": 0.7939460873603821, "learning_rate": 0.0009729497893735562, "loss": 3.7569, "step": 3185 }, { "epoch": 0.21674140508221226, "grad_norm": 0.8315974473953247, "learning_rate": 0.0009729073243647235, "loss": 4.0186, "step": 3190 }, { "epoch": 0.21708112515287403, "grad_norm": 0.613703727722168, "learning_rate": 0.0009728648593558907, "loss": 3.7061, "step": 3195 }, { "epoch": 0.2174208452235358, "grad_norm": 0.5988765358924866, "learning_rate": 0.000972822394347058, "loss": 3.8294, "step": 3200 }, { "epoch": 0.21776056529419757, "grad_norm": 0.8050014972686768, "learning_rate": 0.0009727799293382253, "loss": 3.9711, "step": 3205 }, { "epoch": 0.21810028536485934, "grad_norm": 0.6768515110015869, "learning_rate": 0.0009727374643293926, "loss": 3.9341, "step": 3210 }, { "epoch": 0.21844000543552114, "grad_norm": 0.6579541563987732, "learning_rate": 0.0009726949993205599, "loss": 3.7913, "step": 3215 }, { "epoch": 0.2187797255061829, "grad_norm": 0.7019892334938049, "learning_rate": 0.0009726525343117272, "loss": 3.6519, "step": 3220 }, { "epoch": 0.21911944557684468, "grad_norm": 0.67347651720047, "learning_rate": 0.0009726100693028944, "loss": 3.6208, "step": 3225 }, { "epoch": 0.21945916564750645, "grad_norm": 0.5936116576194763, "learning_rate": 0.0009725676042940616, "loss": 3.7613, "step": 3230 }, { "epoch": 0.21979888571816822, "grad_norm": 0.6363143920898438, "learning_rate": 0.000972525139285229, "loss": 3.6958, "step": 3235 }, { "epoch": 0.22013860578883002, "grad_norm": 0.653922975063324, "learning_rate": 0.0009724826742763962, "loss": 3.6718, "step": 3240 }, { "epoch": 0.22047832585949179, "grad_norm": 0.5786622166633606, "learning_rate": 0.0009724402092675637, "loss": 3.6836, "step": 3245 }, { "epoch": 0.22081804593015356, "grad_norm": 0.6533204913139343, "learning_rate": 0.0009723977442587309, "loss": 3.7397, "step": 3250 }, { "epoch": 0.22115776600081533, "grad_norm": 0.5819668173789978, "learning_rate": 0.0009723552792498981, "loss": 3.6297, "step": 3255 }, { "epoch": 0.2214974860714771, "grad_norm": 0.6860673427581787, "learning_rate": 0.0009723128142410654, "loss": 3.7599, "step": 3260 }, { "epoch": 0.22183720614213887, "grad_norm": 0.5516849756240845, "learning_rate": 0.0009722703492322327, "loss": 3.7819, "step": 3265 }, { "epoch": 0.22217692621280066, "grad_norm": 0.7628173232078552, "learning_rate": 0.0009722278842233999, "loss": 3.8201, "step": 3270 }, { "epoch": 0.22251664628346243, "grad_norm": 0.6516159176826477, "learning_rate": 0.0009721854192145672, "loss": 3.8293, "step": 3275 }, { "epoch": 0.2228563663541242, "grad_norm": 0.5998335480690002, "learning_rate": 0.0009721429542057346, "loss": 3.6546, "step": 3280 }, { "epoch": 0.22319608642478597, "grad_norm": 0.6493220329284668, "learning_rate": 0.0009721004891969018, "loss": 3.999, "step": 3285 }, { "epoch": 0.22353580649544774, "grad_norm": 0.9815689921379089, "learning_rate": 0.0009720580241880691, "loss": 3.7525, "step": 3290 }, { "epoch": 0.22387552656610954, "grad_norm": 0.7050333023071289, "learning_rate": 0.0009720155591792363, "loss": 3.4007, "step": 3295 }, { "epoch": 0.2242152466367713, "grad_norm": 0.8392935395240784, "learning_rate": 0.0009719730941704036, "loss": 3.6176, "step": 3300 }, { "epoch": 0.22455496670743308, "grad_norm": 0.7056354284286499, "learning_rate": 0.0009719306291615709, "loss": 3.8879, "step": 3305 }, { "epoch": 0.22489468677809485, "grad_norm": 0.5982909202575684, "learning_rate": 0.0009718881641527381, "loss": 3.4792, "step": 3310 }, { "epoch": 0.22523440684875662, "grad_norm": 0.7409027218818665, "learning_rate": 0.0009718456991439055, "loss": 3.6163, "step": 3315 }, { "epoch": 0.2255741269194184, "grad_norm": 0.6308236718177795, "learning_rate": 0.0009718032341350728, "loss": 3.6235, "step": 3320 }, { "epoch": 0.22591384699008019, "grad_norm": 0.7042643427848816, "learning_rate": 0.00097176076912624, "loss": 3.6881, "step": 3325 }, { "epoch": 0.22625356706074196, "grad_norm": 0.593842625617981, "learning_rate": 0.0009717183041174073, "loss": 3.919, "step": 3330 }, { "epoch": 0.22659328713140373, "grad_norm": 0.5943030714988708, "learning_rate": 0.0009716758391085746, "loss": 3.6208, "step": 3335 }, { "epoch": 0.2269330072020655, "grad_norm": 1.3144179582595825, "learning_rate": 0.0009716333740997418, "loss": 3.4317, "step": 3340 }, { "epoch": 0.22727272727272727, "grad_norm": 0.6511430740356445, "learning_rate": 0.000971590909090909, "loss": 3.7321, "step": 3345 }, { "epoch": 0.22761244734338903, "grad_norm": 0.6584237217903137, "learning_rate": 0.0009715484440820765, "loss": 3.803, "step": 3350 }, { "epoch": 0.22795216741405083, "grad_norm": 0.6386221647262573, "learning_rate": 0.0009715059790732437, "loss": 3.6276, "step": 3355 }, { "epoch": 0.2282918874847126, "grad_norm": 0.5515703558921814, "learning_rate": 0.0009714635140644109, "loss": 3.7117, "step": 3360 }, { "epoch": 0.22863160755537437, "grad_norm": 0.6941877007484436, "learning_rate": 0.0009714210490555783, "loss": 3.8806, "step": 3365 }, { "epoch": 0.22897132762603614, "grad_norm": 1.0481723546981812, "learning_rate": 0.0009713785840467455, "loss": 3.771, "step": 3370 }, { "epoch": 0.2293110476966979, "grad_norm": 0.9220503568649292, "learning_rate": 0.0009713361190379127, "loss": 3.7536, "step": 3375 }, { "epoch": 0.2296507677673597, "grad_norm": 0.7632156014442444, "learning_rate": 0.00097129365402908, "loss": 3.6445, "step": 3380 }, { "epoch": 0.22999048783802148, "grad_norm": 0.7810511589050293, "learning_rate": 0.0009712511890202474, "loss": 3.8208, "step": 3385 }, { "epoch": 0.23033020790868325, "grad_norm": 0.5559766888618469, "learning_rate": 0.0009712087240114146, "loss": 3.6546, "step": 3390 }, { "epoch": 0.23066992797934502, "grad_norm": 0.635195791721344, "learning_rate": 0.000971166259002582, "loss": 3.8679, "step": 3395 }, { "epoch": 0.2310096480500068, "grad_norm": 0.7121466994285583, "learning_rate": 0.0009711237939937492, "loss": 3.565, "step": 3400 }, { "epoch": 0.23134936812066856, "grad_norm": 2.192315101623535, "learning_rate": 0.0009710813289849164, "loss": 3.8571, "step": 3405 }, { "epoch": 0.23168908819133036, "grad_norm": 0.643825352191925, "learning_rate": 0.0009710388639760837, "loss": 3.7185, "step": 3410 }, { "epoch": 0.23202880826199213, "grad_norm": 0.7523700594902039, "learning_rate": 0.000970996398967251, "loss": 3.798, "step": 3415 }, { "epoch": 0.2323685283326539, "grad_norm": 0.7138873934745789, "learning_rate": 0.0009709539339584183, "loss": 3.8475, "step": 3420 }, { "epoch": 0.23270824840331567, "grad_norm": 0.7480379939079285, "learning_rate": 0.0009709114689495856, "loss": 3.8934, "step": 3425 }, { "epoch": 0.23304796847397743, "grad_norm": 0.681869387626648, "learning_rate": 0.0009708690039407529, "loss": 3.8174, "step": 3430 }, { "epoch": 0.2333876885446392, "grad_norm": 0.5060685873031616, "learning_rate": 0.0009708265389319201, "loss": 3.8649, "step": 3435 }, { "epoch": 0.233727408615301, "grad_norm": 0.7572314143180847, "learning_rate": 0.0009707840739230874, "loss": 3.5466, "step": 3440 }, { "epoch": 0.23406712868596277, "grad_norm": 1.0155407190322876, "learning_rate": 0.0009707416089142546, "loss": 3.6435, "step": 3445 }, { "epoch": 0.23440684875662454, "grad_norm": 0.7484281063079834, "learning_rate": 0.0009706991439054219, "loss": 3.718, "step": 3450 }, { "epoch": 0.2347465688272863, "grad_norm": 0.711729109287262, "learning_rate": 0.0009706566788965893, "loss": 3.749, "step": 3455 }, { "epoch": 0.23508628889794808, "grad_norm": 0.887844979763031, "learning_rate": 0.0009706142138877565, "loss": 3.6235, "step": 3460 }, { "epoch": 0.23542600896860988, "grad_norm": 0.5965444445610046, "learning_rate": 0.0009705717488789238, "loss": 3.8136, "step": 3465 }, { "epoch": 0.23576572903927165, "grad_norm": 0.7257551550865173, "learning_rate": 0.0009705292838700911, "loss": 3.5925, "step": 3470 }, { "epoch": 0.23610544910993342, "grad_norm": 0.7158923149108887, "learning_rate": 0.0009704868188612583, "loss": 3.6457, "step": 3475 }, { "epoch": 0.2364451691805952, "grad_norm": 0.8055842518806458, "learning_rate": 0.0009704443538524255, "loss": 3.8343, "step": 3480 }, { "epoch": 0.23678488925125696, "grad_norm": 0.6616836786270142, "learning_rate": 0.0009704018888435929, "loss": 3.808, "step": 3485 }, { "epoch": 0.23712460932191873, "grad_norm": 0.6936202049255371, "learning_rate": 0.0009703594238347602, "loss": 3.4899, "step": 3490 }, { "epoch": 0.23746432939258053, "grad_norm": 0.604406476020813, "learning_rate": 0.0009703169588259274, "loss": 3.6836, "step": 3495 }, { "epoch": 0.2378040494632423, "grad_norm": 0.740138053894043, "learning_rate": 0.0009702744938170948, "loss": 3.8944, "step": 3500 }, { "epoch": 0.23814376953390406, "grad_norm": 0.7477846145629883, "learning_rate": 0.000970232028808262, "loss": 3.84, "step": 3505 }, { "epoch": 0.23848348960456583, "grad_norm": 0.8168776631355286, "learning_rate": 0.0009701895637994292, "loss": 3.6887, "step": 3510 }, { "epoch": 0.2388232096752276, "grad_norm": 0.6762919425964355, "learning_rate": 0.0009701470987905966, "loss": 3.7436, "step": 3515 }, { "epoch": 0.23916292974588937, "grad_norm": 0.6656241416931152, "learning_rate": 0.0009701046337817638, "loss": 3.6114, "step": 3520 }, { "epoch": 0.23950264981655117, "grad_norm": 0.6061438918113708, "learning_rate": 0.0009700621687729311, "loss": 3.8649, "step": 3525 }, { "epoch": 0.23984236988721294, "grad_norm": 1.1483242511749268, "learning_rate": 0.0009700197037640985, "loss": 3.7656, "step": 3530 }, { "epoch": 0.2401820899578747, "grad_norm": 0.6067122220993042, "learning_rate": 0.0009699772387552657, "loss": 3.4087, "step": 3535 }, { "epoch": 0.24052181002853648, "grad_norm": 0.6878045797348022, "learning_rate": 0.0009699347737464329, "loss": 3.7125, "step": 3540 }, { "epoch": 0.24086153009919825, "grad_norm": 0.8297713994979858, "learning_rate": 0.0009698923087376002, "loss": 3.8188, "step": 3545 }, { "epoch": 0.24120125016986005, "grad_norm": 0.7606715559959412, "learning_rate": 0.0009698498437287675, "loss": 3.9155, "step": 3550 }, { "epoch": 0.24154097024052182, "grad_norm": 0.5454121232032776, "learning_rate": 0.0009698073787199347, "loss": 3.8156, "step": 3555 }, { "epoch": 0.2418806903111836, "grad_norm": 0.7684235572814941, "learning_rate": 0.0009697649137111021, "loss": 3.538, "step": 3560 }, { "epoch": 0.24222041038184536, "grad_norm": 0.6121727228164673, "learning_rate": 0.0009697224487022694, "loss": 3.6914, "step": 3565 }, { "epoch": 0.24256013045250713, "grad_norm": 0.7470285892486572, "learning_rate": 0.0009696799836934366, "loss": 3.5258, "step": 3570 }, { "epoch": 0.2428998505231689, "grad_norm": 0.6992287039756775, "learning_rate": 0.0009696375186846039, "loss": 3.8434, "step": 3575 }, { "epoch": 0.2432395705938307, "grad_norm": 0.7074785232543945, "learning_rate": 0.0009695950536757711, "loss": 3.9296, "step": 3580 }, { "epoch": 0.24357929066449246, "grad_norm": 0.5813422799110413, "learning_rate": 0.0009695525886669385, "loss": 3.8766, "step": 3585 }, { "epoch": 0.24391901073515423, "grad_norm": 2.3182342052459717, "learning_rate": 0.0009695101236581057, "loss": 3.605, "step": 3590 }, { "epoch": 0.244258730805816, "grad_norm": 1.234378695487976, "learning_rate": 0.000969467658649273, "loss": 3.6938, "step": 3595 }, { "epoch": 0.24459845087647777, "grad_norm": 0.6325404047966003, "learning_rate": 0.0009694251936404404, "loss": 3.694, "step": 3600 }, { "epoch": 0.24493817094713954, "grad_norm": 0.8209719061851501, "learning_rate": 0.0009693827286316076, "loss": 3.6918, "step": 3605 }, { "epoch": 0.24527789101780134, "grad_norm": 1.4580378532409668, "learning_rate": 0.0009693402636227748, "loss": 3.7744, "step": 3610 }, { "epoch": 0.2456176110884631, "grad_norm": 0.6046342253684998, "learning_rate": 0.0009692977986139422, "loss": 3.8888, "step": 3615 }, { "epoch": 0.24595733115912488, "grad_norm": 0.599679708480835, "learning_rate": 0.0009692553336051094, "loss": 3.576, "step": 3620 }, { "epoch": 0.24629705122978665, "grad_norm": 0.8095775842666626, "learning_rate": 0.0009692128685962766, "loss": 3.7028, "step": 3625 }, { "epoch": 0.24663677130044842, "grad_norm": 0.6189206838607788, "learning_rate": 0.0009691704035874441, "loss": 3.596, "step": 3630 }, { "epoch": 0.24697649137111022, "grad_norm": 0.7269278168678284, "learning_rate": 0.0009691279385786113, "loss": 3.794, "step": 3635 }, { "epoch": 0.247316211441772, "grad_norm": 0.6310652494430542, "learning_rate": 0.0009690854735697785, "loss": 3.81, "step": 3640 }, { "epoch": 0.24765593151243376, "grad_norm": 0.7167465090751648, "learning_rate": 0.0009690430085609458, "loss": 3.9076, "step": 3645 }, { "epoch": 0.24799565158309553, "grad_norm": 0.5810459852218628, "learning_rate": 0.0009690005435521131, "loss": 3.6931, "step": 3650 }, { "epoch": 0.2483353716537573, "grad_norm": 0.730108380317688, "learning_rate": 0.0009689580785432803, "loss": 3.5263, "step": 3655 }, { "epoch": 0.24867509172441907, "grad_norm": 0.7339727282524109, "learning_rate": 0.0009689156135344476, "loss": 3.6601, "step": 3660 }, { "epoch": 0.24901481179508086, "grad_norm": 1.1305773258209229, "learning_rate": 0.000968873148525615, "loss": 3.6521, "step": 3665 }, { "epoch": 0.24935453186574263, "grad_norm": 1.4087789058685303, "learning_rate": 0.0009688306835167822, "loss": 3.7739, "step": 3670 }, { "epoch": 0.2496942519364044, "grad_norm": 0.9541872143745422, "learning_rate": 0.0009687882185079495, "loss": 3.8882, "step": 3675 }, { "epoch": 0.2500339720070662, "grad_norm": 0.6053314805030823, "learning_rate": 0.0009687457534991167, "loss": 3.7305, "step": 3680 }, { "epoch": 0.25037369207772797, "grad_norm": 0.7858479619026184, "learning_rate": 0.000968703288490284, "loss": 3.6703, "step": 3685 }, { "epoch": 0.2507134121483897, "grad_norm": 0.5707651972770691, "learning_rate": 0.0009686608234814513, "loss": 3.5633, "step": 3690 }, { "epoch": 0.2510531322190515, "grad_norm": 0.6128956079483032, "learning_rate": 0.0009686183584726185, "loss": 3.8556, "step": 3695 }, { "epoch": 0.25139285228971325, "grad_norm": 0.753255307674408, "learning_rate": 0.0009685758934637859, "loss": 3.8814, "step": 3700 }, { "epoch": 0.25173257236037505, "grad_norm": 0.6241249442100525, "learning_rate": 0.0009685334284549532, "loss": 3.824, "step": 3705 }, { "epoch": 0.25207229243103685, "grad_norm": 0.7240098118782043, "learning_rate": 0.0009684909634461204, "loss": 3.6703, "step": 3710 }, { "epoch": 0.2524120125016986, "grad_norm": 0.8464183807373047, "learning_rate": 0.0009684484984372877, "loss": 3.6528, "step": 3715 }, { "epoch": 0.2527517325723604, "grad_norm": 0.5208722352981567, "learning_rate": 0.000968406033428455, "loss": 3.6292, "step": 3720 }, { "epoch": 0.25309145264302213, "grad_norm": 0.5412998795509338, "learning_rate": 0.0009683635684196222, "loss": 3.775, "step": 3725 }, { "epoch": 0.2534311727136839, "grad_norm": 0.709400475025177, "learning_rate": 0.0009683211034107894, "loss": 3.9485, "step": 3730 }, { "epoch": 0.2537708927843457, "grad_norm": 0.7480413913726807, "learning_rate": 0.0009682786384019569, "loss": 3.6651, "step": 3735 }, { "epoch": 0.25411061285500747, "grad_norm": 0.6629950404167175, "learning_rate": 0.0009682361733931241, "loss": 3.7367, "step": 3740 }, { "epoch": 0.25445033292566926, "grad_norm": 0.7563631534576416, "learning_rate": 0.0009681937083842913, "loss": 3.5439, "step": 3745 }, { "epoch": 0.254790052996331, "grad_norm": 0.5678558945655823, "learning_rate": 0.0009681512433754587, "loss": 3.5644, "step": 3750 }, { "epoch": 0.2551297730669928, "grad_norm": 1.0968127250671387, "learning_rate": 0.0009681087783666259, "loss": 3.9246, "step": 3755 }, { "epoch": 0.25546949313765455, "grad_norm": 0.7039735913276672, "learning_rate": 0.0009680663133577931, "loss": 3.6756, "step": 3760 }, { "epoch": 0.25580921320831634, "grad_norm": 3.6075406074523926, "learning_rate": 0.0009680238483489606, "loss": 3.7122, "step": 3765 }, { "epoch": 0.25614893327897814, "grad_norm": 0.7793748378753662, "learning_rate": 0.0009679813833401278, "loss": 3.7305, "step": 3770 }, { "epoch": 0.2564886533496399, "grad_norm": 0.6226781606674194, "learning_rate": 0.000967938918331295, "loss": 3.6611, "step": 3775 }, { "epoch": 0.2568283734203017, "grad_norm": 0.9450697302818298, "learning_rate": 0.0009678964533224623, "loss": 3.8694, "step": 3780 }, { "epoch": 0.2571680934909634, "grad_norm": 0.6313878297805786, "learning_rate": 0.0009678539883136296, "loss": 3.7633, "step": 3785 }, { "epoch": 0.2575078135616252, "grad_norm": 0.5901970267295837, "learning_rate": 0.0009678115233047968, "loss": 3.7798, "step": 3790 }, { "epoch": 0.257847533632287, "grad_norm": 0.7463569641113281, "learning_rate": 0.0009677690582959641, "loss": 3.7769, "step": 3795 }, { "epoch": 0.25818725370294876, "grad_norm": 0.6924563050270081, "learning_rate": 0.0009677265932871315, "loss": 3.6305, "step": 3800 }, { "epoch": 0.25852697377361056, "grad_norm": 0.8145256638526917, "learning_rate": 0.0009676841282782987, "loss": 3.6811, "step": 3805 }, { "epoch": 0.2588666938442723, "grad_norm": 0.7224252820014954, "learning_rate": 0.000967641663269466, "loss": 3.7228, "step": 3810 }, { "epoch": 0.2592064139149341, "grad_norm": 0.6247282028198242, "learning_rate": 0.0009675991982606333, "loss": 3.7572, "step": 3815 }, { "epoch": 0.2595461339855959, "grad_norm": 0.6958258152008057, "learning_rate": 0.0009675567332518005, "loss": 3.5988, "step": 3820 }, { "epoch": 0.25988585405625764, "grad_norm": 0.6718140244483948, "learning_rate": 0.0009675142682429678, "loss": 3.7994, "step": 3825 }, { "epoch": 0.26022557412691943, "grad_norm": 0.8125585317611694, "learning_rate": 0.000967471803234135, "loss": 3.4968, "step": 3830 }, { "epoch": 0.2605652941975812, "grad_norm": 0.6540567874908447, "learning_rate": 0.0009674293382253024, "loss": 3.5676, "step": 3835 }, { "epoch": 0.260905014268243, "grad_norm": 0.5872313976287842, "learning_rate": 0.0009673868732164697, "loss": 3.6956, "step": 3840 }, { "epoch": 0.2612447343389047, "grad_norm": 0.6185137629508972, "learning_rate": 0.0009673444082076369, "loss": 3.8481, "step": 3845 }, { "epoch": 0.2615844544095665, "grad_norm": 0.7442803978919983, "learning_rate": 0.0009673019431988042, "loss": 3.8018, "step": 3850 }, { "epoch": 0.2619241744802283, "grad_norm": 0.702149510383606, "learning_rate": 0.0009672594781899715, "loss": 3.5721, "step": 3855 }, { "epoch": 0.26226389455089005, "grad_norm": 0.7930136919021606, "learning_rate": 0.0009672170131811387, "loss": 3.8332, "step": 3860 }, { "epoch": 0.26260361462155185, "grad_norm": 0.6575914025306702, "learning_rate": 0.000967174548172306, "loss": 3.6584, "step": 3865 }, { "epoch": 0.2629433346922136, "grad_norm": 0.6223536729812622, "learning_rate": 0.0009671320831634734, "loss": 3.4476, "step": 3870 }, { "epoch": 0.2632830547628754, "grad_norm": 0.6894667148590088, "learning_rate": 0.0009670896181546406, "loss": 3.7942, "step": 3875 }, { "epoch": 0.2636227748335372, "grad_norm": 0.5753107070922852, "learning_rate": 0.0009670471531458078, "loss": 3.8189, "step": 3880 }, { "epoch": 0.26396249490419893, "grad_norm": 0.7406034469604492, "learning_rate": 0.0009670046881369752, "loss": 3.6759, "step": 3885 }, { "epoch": 0.2643022149748607, "grad_norm": 0.5585180521011353, "learning_rate": 0.0009669622231281424, "loss": 3.7618, "step": 3890 }, { "epoch": 0.26464193504552247, "grad_norm": 0.7381715774536133, "learning_rate": 0.0009669197581193096, "loss": 3.9587, "step": 3895 }, { "epoch": 0.26498165511618427, "grad_norm": 0.7363392114639282, "learning_rate": 0.000966877293110477, "loss": 3.4923, "step": 3900 }, { "epoch": 0.26532137518684606, "grad_norm": 0.9010974764823914, "learning_rate": 0.0009668348281016443, "loss": 4.0328, "step": 3905 }, { "epoch": 0.2656610952575078, "grad_norm": 0.7546082735061646, "learning_rate": 0.0009667923630928115, "loss": 3.6062, "step": 3910 }, { "epoch": 0.2660008153281696, "grad_norm": 0.6980230808258057, "learning_rate": 0.0009667498980839789, "loss": 3.9225, "step": 3915 }, { "epoch": 0.26634053539883135, "grad_norm": 0.7920522689819336, "learning_rate": 0.0009667074330751461, "loss": 3.9315, "step": 3920 }, { "epoch": 0.26668025546949314, "grad_norm": 0.7503193020820618, "learning_rate": 0.0009666649680663134, "loss": 3.7232, "step": 3925 }, { "epoch": 0.2670199755401549, "grad_norm": 0.5713961124420166, "learning_rate": 0.0009666225030574806, "loss": 3.4191, "step": 3930 }, { "epoch": 0.2673596956108167, "grad_norm": 0.6793795228004456, "learning_rate": 0.0009665800380486479, "loss": 3.8886, "step": 3935 }, { "epoch": 0.2676994156814785, "grad_norm": 0.5702452659606934, "learning_rate": 0.0009665375730398153, "loss": 3.8435, "step": 3940 }, { "epoch": 0.2680391357521402, "grad_norm": 0.7704566121101379, "learning_rate": 0.0009664951080309825, "loss": 3.5918, "step": 3945 }, { "epoch": 0.268378855822802, "grad_norm": 0.6938804984092712, "learning_rate": 0.0009664526430221498, "loss": 4.0113, "step": 3950 }, { "epoch": 0.26871857589346376, "grad_norm": 0.74507737159729, "learning_rate": 0.0009664101780133171, "loss": 3.5163, "step": 3955 }, { "epoch": 0.26905829596412556, "grad_norm": 0.5978330969810486, "learning_rate": 0.0009663677130044843, "loss": 3.8, "step": 3960 }, { "epoch": 0.26939801603478736, "grad_norm": 0.9157208204269409, "learning_rate": 0.0009663252479956515, "loss": 3.9161, "step": 3965 }, { "epoch": 0.2697377361054491, "grad_norm": 0.7541576623916626, "learning_rate": 0.0009662827829868189, "loss": 3.5579, "step": 3970 }, { "epoch": 0.2700774561761109, "grad_norm": 0.6681669354438782, "learning_rate": 0.0009662403179779862, "loss": 3.9983, "step": 3975 }, { "epoch": 0.27041717624677264, "grad_norm": 0.6837373971939087, "learning_rate": 0.0009661978529691534, "loss": 3.7978, "step": 3980 }, { "epoch": 0.27075689631743444, "grad_norm": 0.7061611413955688, "learning_rate": 0.0009661553879603208, "loss": 3.9587, "step": 3985 }, { "epoch": 0.27109661638809623, "grad_norm": 0.6160033345222473, "learning_rate": 0.000966112922951488, "loss": 3.7788, "step": 3990 }, { "epoch": 0.271436336458758, "grad_norm": 0.7030746340751648, "learning_rate": 0.0009660704579426552, "loss": 3.6304, "step": 3995 }, { "epoch": 0.2717760565294198, "grad_norm": 0.7516574263572693, "learning_rate": 0.0009660279929338226, "loss": 3.9709, "step": 4000 }, { "epoch": 0.2721157766000815, "grad_norm": 0.5817821621894836, "learning_rate": 0.0009659855279249898, "loss": 3.6713, "step": 4005 }, { "epoch": 0.2724554966707433, "grad_norm": 0.5100502967834473, "learning_rate": 0.0009659430629161571, "loss": 3.6554, "step": 4010 }, { "epoch": 0.27279521674140506, "grad_norm": 0.7973607182502747, "learning_rate": 0.0009659005979073245, "loss": 3.9906, "step": 4015 }, { "epoch": 0.27313493681206685, "grad_norm": 0.6986095905303955, "learning_rate": 0.0009658581328984917, "loss": 3.5992, "step": 4020 }, { "epoch": 0.27347465688272865, "grad_norm": 0.7145535945892334, "learning_rate": 0.0009658156678896589, "loss": 3.7215, "step": 4025 }, { "epoch": 0.2738143769533904, "grad_norm": 0.644110381603241, "learning_rate": 0.0009657732028808262, "loss": 3.4877, "step": 4030 }, { "epoch": 0.2741540970240522, "grad_norm": 0.6192665696144104, "learning_rate": 0.0009657307378719935, "loss": 3.7874, "step": 4035 }, { "epoch": 0.27449381709471393, "grad_norm": 0.5692789554595947, "learning_rate": 0.0009656882728631607, "loss": 3.6267, "step": 4040 }, { "epoch": 0.27483353716537573, "grad_norm": 0.6869579553604126, "learning_rate": 0.0009656458078543281, "loss": 4.0107, "step": 4045 }, { "epoch": 0.2751732572360375, "grad_norm": 0.7522791624069214, "learning_rate": 0.0009656033428454954, "loss": 3.8558, "step": 4050 }, { "epoch": 0.27551297730669927, "grad_norm": 0.5839440822601318, "learning_rate": 0.0009655608778366626, "loss": 3.7982, "step": 4055 }, { "epoch": 0.27585269737736107, "grad_norm": 0.6837066411972046, "learning_rate": 0.0009655184128278299, "loss": 3.7102, "step": 4060 }, { "epoch": 0.2761924174480228, "grad_norm": 0.7138178944587708, "learning_rate": 0.0009654759478189972, "loss": 3.622, "step": 4065 }, { "epoch": 0.2765321375186846, "grad_norm": 0.6989341378211975, "learning_rate": 0.0009654334828101644, "loss": 3.5083, "step": 4070 }, { "epoch": 0.2768718575893464, "grad_norm": 0.6958532929420471, "learning_rate": 0.0009653910178013317, "loss": 3.8488, "step": 4075 }, { "epoch": 0.27721157766000815, "grad_norm": 0.5760381817817688, "learning_rate": 0.000965348552792499, "loss": 3.731, "step": 4080 }, { "epoch": 0.27755129773066994, "grad_norm": 0.6913160085678101, "learning_rate": 0.0009653060877836663, "loss": 3.4657, "step": 4085 }, { "epoch": 0.2778910178013317, "grad_norm": 0.6547573208808899, "learning_rate": 0.0009652636227748336, "loss": 3.6995, "step": 4090 }, { "epoch": 0.2782307378719935, "grad_norm": 0.7820612192153931, "learning_rate": 0.0009652211577660008, "loss": 3.7452, "step": 4095 }, { "epoch": 0.2785704579426552, "grad_norm": 0.6629460453987122, "learning_rate": 0.0009651786927571681, "loss": 3.7866, "step": 4100 }, { "epoch": 0.278910178013317, "grad_norm": 0.6040059924125671, "learning_rate": 0.0009651362277483354, "loss": 3.5589, "step": 4105 }, { "epoch": 0.2792498980839788, "grad_norm": 1.0010850429534912, "learning_rate": 0.0009650937627395026, "loss": 3.802, "step": 4110 }, { "epoch": 0.27958961815464056, "grad_norm": 0.6360946893692017, "learning_rate": 0.00096505129773067, "loss": 3.7197, "step": 4115 }, { "epoch": 0.27992933822530236, "grad_norm": 2.3139994144439697, "learning_rate": 0.0009650088327218373, "loss": 3.6895, "step": 4120 }, { "epoch": 0.2802690582959641, "grad_norm": 0.8118367195129395, "learning_rate": 0.0009649663677130045, "loss": 3.5454, "step": 4125 }, { "epoch": 0.2806087783666259, "grad_norm": 0.702793538570404, "learning_rate": 0.0009649239027041717, "loss": 3.6917, "step": 4130 }, { "epoch": 0.2809484984372877, "grad_norm": 0.5384190678596497, "learning_rate": 0.0009648814376953391, "loss": 3.6009, "step": 4135 }, { "epoch": 0.28128821850794944, "grad_norm": 0.6091647148132324, "learning_rate": 0.0009648389726865063, "loss": 3.6381, "step": 4140 }, { "epoch": 0.28162793857861124, "grad_norm": 0.6185944080352783, "learning_rate": 0.0009647965076776735, "loss": 3.7377, "step": 4145 }, { "epoch": 0.281967658649273, "grad_norm": 0.8071286678314209, "learning_rate": 0.000964754042668841, "loss": 3.9584, "step": 4150 }, { "epoch": 0.2823073787199348, "grad_norm": 0.6260259747505188, "learning_rate": 0.0009647115776600082, "loss": 3.8657, "step": 4155 }, { "epoch": 0.2826470987905966, "grad_norm": 0.7029274702072144, "learning_rate": 0.0009646691126511754, "loss": 3.8413, "step": 4160 }, { "epoch": 0.2829868188612583, "grad_norm": 0.6549016833305359, "learning_rate": 0.0009646266476423428, "loss": 3.5741, "step": 4165 }, { "epoch": 0.2833265389319201, "grad_norm": 0.6251670718193054, "learning_rate": 0.00096458418263351, "loss": 3.6524, "step": 4170 }, { "epoch": 0.28366625900258186, "grad_norm": 0.6982675194740295, "learning_rate": 0.0009645417176246772, "loss": 3.7666, "step": 4175 }, { "epoch": 0.28400597907324365, "grad_norm": 0.8168516159057617, "learning_rate": 0.0009644992526158445, "loss": 3.4167, "step": 4180 }, { "epoch": 0.2843456991439054, "grad_norm": 0.6430438756942749, "learning_rate": 0.0009644567876070119, "loss": 3.8558, "step": 4185 }, { "epoch": 0.2846854192145672, "grad_norm": 0.8260819911956787, "learning_rate": 0.0009644143225981791, "loss": 3.4549, "step": 4190 }, { "epoch": 0.285025139285229, "grad_norm": 0.6831293702125549, "learning_rate": 0.0009643718575893464, "loss": 3.7823, "step": 4195 }, { "epoch": 0.28536485935589073, "grad_norm": 0.6138635873794556, "learning_rate": 0.0009643293925805137, "loss": 3.7056, "step": 4200 }, { "epoch": 0.28570457942655253, "grad_norm": 0.5763000249862671, "learning_rate": 0.0009642869275716809, "loss": 3.754, "step": 4205 }, { "epoch": 0.28604429949721427, "grad_norm": 0.8776401877403259, "learning_rate": 0.0009642444625628482, "loss": 3.8041, "step": 4210 }, { "epoch": 0.28638401956787607, "grad_norm": 1.109387993812561, "learning_rate": 0.0009642019975540154, "loss": 3.7585, "step": 4215 }, { "epoch": 0.28672373963853787, "grad_norm": 0.6992509365081787, "learning_rate": 0.0009641595325451828, "loss": 3.9185, "step": 4220 }, { "epoch": 0.2870634597091996, "grad_norm": 0.7073003053665161, "learning_rate": 0.0009641170675363501, "loss": 3.9487, "step": 4225 }, { "epoch": 0.2874031797798614, "grad_norm": 0.6866264939308167, "learning_rate": 0.0009640746025275173, "loss": 3.8364, "step": 4230 }, { "epoch": 0.28774289985052315, "grad_norm": 0.5466263890266418, "learning_rate": 0.0009640321375186846, "loss": 3.5596, "step": 4235 }, { "epoch": 0.28808261992118495, "grad_norm": 0.6644207239151001, "learning_rate": 0.0009639896725098519, "loss": 3.8898, "step": 4240 }, { "epoch": 0.28842233999184674, "grad_norm": 0.6112736463546753, "learning_rate": 0.0009639472075010191, "loss": 3.8798, "step": 4245 }, { "epoch": 0.2887620600625085, "grad_norm": 0.7702374458312988, "learning_rate": 0.0009639047424921864, "loss": 3.6329, "step": 4250 }, { "epoch": 0.2891017801331703, "grad_norm": 0.5818788409233093, "learning_rate": 0.0009638622774833538, "loss": 3.5738, "step": 4255 }, { "epoch": 0.289441500203832, "grad_norm": 0.7824899554252625, "learning_rate": 0.000963819812474521, "loss": 3.9873, "step": 4260 }, { "epoch": 0.2897812202744938, "grad_norm": 1.1394249200820923, "learning_rate": 0.0009637773474656884, "loss": 3.6349, "step": 4265 }, { "epoch": 0.29012094034515556, "grad_norm": 0.6994426846504211, "learning_rate": 0.0009637348824568556, "loss": 3.7287, "step": 4270 }, { "epoch": 0.29046066041581736, "grad_norm": 0.6484062671661377, "learning_rate": 0.0009636924174480228, "loss": 3.7825, "step": 4275 }, { "epoch": 0.29080038048647916, "grad_norm": 1.0661653280258179, "learning_rate": 0.0009636499524391901, "loss": 3.6908, "step": 4280 }, { "epoch": 0.2911401005571409, "grad_norm": 0.5649815201759338, "learning_rate": 0.0009636074874303575, "loss": 3.8531, "step": 4285 }, { "epoch": 0.2914798206278027, "grad_norm": 0.7183372974395752, "learning_rate": 0.0009635650224215247, "loss": 3.5815, "step": 4290 }, { "epoch": 0.29181954069846444, "grad_norm": 0.6840630173683167, "learning_rate": 0.000963522557412692, "loss": 3.955, "step": 4295 }, { "epoch": 0.29215926076912624, "grad_norm": 0.6997886300086975, "learning_rate": 0.0009634800924038593, "loss": 3.6086, "step": 4300 }, { "epoch": 0.29249898083978804, "grad_norm": 0.7394000291824341, "learning_rate": 0.0009634376273950265, "loss": 3.4845, "step": 4305 }, { "epoch": 0.2928387009104498, "grad_norm": 0.6880769729614258, "learning_rate": 0.0009633951623861938, "loss": 3.8555, "step": 4310 }, { "epoch": 0.2931784209811116, "grad_norm": 0.9616801142692566, "learning_rate": 0.000963352697377361, "loss": 3.6079, "step": 4315 }, { "epoch": 0.2935181410517733, "grad_norm": 0.7349090576171875, "learning_rate": 0.0009633102323685284, "loss": 3.6728, "step": 4320 }, { "epoch": 0.2938578611224351, "grad_norm": 0.6582932472229004, "learning_rate": 0.0009632677673596957, "loss": 3.5777, "step": 4325 }, { "epoch": 0.2941975811930969, "grad_norm": 0.6528509855270386, "learning_rate": 0.0009632253023508629, "loss": 3.7702, "step": 4330 }, { "epoch": 0.29453730126375866, "grad_norm": 0.6171696782112122, "learning_rate": 0.0009631828373420302, "loss": 3.8234, "step": 4335 }, { "epoch": 0.29487702133442045, "grad_norm": 0.9008753299713135, "learning_rate": 0.0009631403723331975, "loss": 3.6927, "step": 4340 }, { "epoch": 0.2952167414050822, "grad_norm": 0.6457626819610596, "learning_rate": 0.0009630979073243647, "loss": 3.2905, "step": 4345 }, { "epoch": 0.295556461475744, "grad_norm": 0.7185888886451721, "learning_rate": 0.000963055442315532, "loss": 3.6635, "step": 4350 }, { "epoch": 0.29589618154640573, "grad_norm": 0.7425805330276489, "learning_rate": 0.0009630129773066994, "loss": 3.7653, "step": 4355 }, { "epoch": 0.29623590161706753, "grad_norm": 0.6147916913032532, "learning_rate": 0.0009629705122978666, "loss": 3.5351, "step": 4360 }, { "epoch": 0.29657562168772933, "grad_norm": 0.6081828474998474, "learning_rate": 0.0009629280472890338, "loss": 3.8761, "step": 4365 }, { "epoch": 0.29691534175839107, "grad_norm": 0.7161690592765808, "learning_rate": 0.0009628855822802012, "loss": 3.8387, "step": 4370 }, { "epoch": 0.29725506182905287, "grad_norm": 0.7499025464057922, "learning_rate": 0.0009628431172713684, "loss": 3.5769, "step": 4375 }, { "epoch": 0.2975947818997146, "grad_norm": 0.7925812602043152, "learning_rate": 0.0009628006522625356, "loss": 3.7181, "step": 4380 }, { "epoch": 0.2979345019703764, "grad_norm": 0.6369455456733704, "learning_rate": 0.000962758187253703, "loss": 3.7408, "step": 4385 }, { "epoch": 0.2982742220410382, "grad_norm": 0.7347651720046997, "learning_rate": 0.0009627157222448703, "loss": 3.9034, "step": 4390 }, { "epoch": 0.29861394211169995, "grad_norm": 0.6870957016944885, "learning_rate": 0.0009626732572360375, "loss": 3.9102, "step": 4395 }, { "epoch": 0.29895366218236175, "grad_norm": 0.717404305934906, "learning_rate": 0.0009626307922272049, "loss": 3.6934, "step": 4400 }, { "epoch": 0.2992933822530235, "grad_norm": 0.7716221809387207, "learning_rate": 0.0009625883272183721, "loss": 3.6996, "step": 4405 }, { "epoch": 0.2996331023236853, "grad_norm": 0.8365433216094971, "learning_rate": 0.0009625458622095393, "loss": 3.9056, "step": 4410 }, { "epoch": 0.2999728223943471, "grad_norm": 0.7925680875778198, "learning_rate": 0.0009625033972007066, "loss": 3.9172, "step": 4415 }, { "epoch": 0.3003125424650088, "grad_norm": 0.8062108159065247, "learning_rate": 0.0009624609321918739, "loss": 3.7353, "step": 4420 }, { "epoch": 0.3006522625356706, "grad_norm": 0.5405961871147156, "learning_rate": 0.0009624184671830412, "loss": 3.8695, "step": 4425 }, { "epoch": 0.30099198260633236, "grad_norm": 0.6823050379753113, "learning_rate": 0.0009623760021742085, "loss": 3.9127, "step": 4430 }, { "epoch": 0.30133170267699416, "grad_norm": 0.9308080077171326, "learning_rate": 0.0009623335371653758, "loss": 3.9683, "step": 4435 }, { "epoch": 0.3016714227476559, "grad_norm": 0.7651321291923523, "learning_rate": 0.000962291072156543, "loss": 3.6044, "step": 4440 }, { "epoch": 0.3020111428183177, "grad_norm": 0.6551792025566101, "learning_rate": 0.0009622486071477103, "loss": 3.7825, "step": 4445 }, { "epoch": 0.3023508628889795, "grad_norm": 0.6831545233726501, "learning_rate": 0.0009622061421388776, "loss": 3.6857, "step": 4450 }, { "epoch": 0.30269058295964124, "grad_norm": 0.6022977828979492, "learning_rate": 0.0009621636771300448, "loss": 3.5084, "step": 4455 }, { "epoch": 0.30303030303030304, "grad_norm": 0.7112560868263245, "learning_rate": 0.0009621212121212122, "loss": 3.5855, "step": 4460 }, { "epoch": 0.3033700231009648, "grad_norm": 0.6755446791648865, "learning_rate": 0.0009620787471123794, "loss": 3.7325, "step": 4465 }, { "epoch": 0.3037097431716266, "grad_norm": 1.4436894655227661, "learning_rate": 0.0009620362821035467, "loss": 3.5141, "step": 4470 }, { "epoch": 0.3040494632422884, "grad_norm": 0.6700942516326904, "learning_rate": 0.000961993817094714, "loss": 3.7589, "step": 4475 }, { "epoch": 0.3043891833129501, "grad_norm": 0.6899858713150024, "learning_rate": 0.0009619513520858812, "loss": 3.9016, "step": 4480 }, { "epoch": 0.3047289033836119, "grad_norm": 0.6155400276184082, "learning_rate": 0.0009619088870770485, "loss": 3.592, "step": 4485 }, { "epoch": 0.30506862345427366, "grad_norm": 0.578019917011261, "learning_rate": 0.0009618664220682158, "loss": 4.0385, "step": 4490 }, { "epoch": 0.30540834352493546, "grad_norm": 1.3838962316513062, "learning_rate": 0.0009618239570593831, "loss": 3.7364, "step": 4495 }, { "epoch": 0.30574806359559725, "grad_norm": 0.6865847706794739, "learning_rate": 0.0009617814920505504, "loss": 3.7875, "step": 4500 }, { "epoch": 0.306087783666259, "grad_norm": 0.698574960231781, "learning_rate": 0.0009617390270417177, "loss": 3.5852, "step": 4505 }, { "epoch": 0.3064275037369208, "grad_norm": 0.6817865371704102, "learning_rate": 0.0009616965620328849, "loss": 4.033, "step": 4510 }, { "epoch": 0.30676722380758253, "grad_norm": 0.7040066719055176, "learning_rate": 0.0009616540970240521, "loss": 3.8144, "step": 4515 }, { "epoch": 0.30710694387824433, "grad_norm": 0.6947503685951233, "learning_rate": 0.0009616116320152195, "loss": 3.7897, "step": 4520 }, { "epoch": 0.3074466639489061, "grad_norm": 0.7446925640106201, "learning_rate": 0.0009615691670063867, "loss": 3.5158, "step": 4525 }, { "epoch": 0.30778638401956787, "grad_norm": 0.9600127339363098, "learning_rate": 0.000961526701997554, "loss": 3.8045, "step": 4530 }, { "epoch": 0.30812610409022967, "grad_norm": 0.7573233246803284, "learning_rate": 0.0009614842369887214, "loss": 3.5216, "step": 4535 }, { "epoch": 0.3084658241608914, "grad_norm": 0.7382401823997498, "learning_rate": 0.0009614417719798886, "loss": 3.6634, "step": 4540 }, { "epoch": 0.3088055442315532, "grad_norm": 0.9753466844558716, "learning_rate": 0.0009613993069710558, "loss": 3.4437, "step": 4545 }, { "epoch": 0.30914526430221495, "grad_norm": 0.6215182542800903, "learning_rate": 0.0009613568419622232, "loss": 3.5727, "step": 4550 }, { "epoch": 0.30948498437287675, "grad_norm": 0.7607511878013611, "learning_rate": 0.0009613143769533904, "loss": 3.8892, "step": 4555 }, { "epoch": 0.30982470444353855, "grad_norm": 0.5921122431755066, "learning_rate": 0.0009612719119445576, "loss": 3.7165, "step": 4560 }, { "epoch": 0.3101644245142003, "grad_norm": 0.8299614191055298, "learning_rate": 0.000961229446935725, "loss": 3.8243, "step": 4565 }, { "epoch": 0.3105041445848621, "grad_norm": 1.279056429862976, "learning_rate": 0.0009611869819268923, "loss": 3.8217, "step": 4570 }, { "epoch": 0.3108438646555238, "grad_norm": 0.7296881675720215, "learning_rate": 0.0009611445169180595, "loss": 3.5245, "step": 4575 }, { "epoch": 0.3111835847261856, "grad_norm": 0.9374946355819702, "learning_rate": 0.0009611020519092268, "loss": 4.0029, "step": 4580 }, { "epoch": 0.3115233047968474, "grad_norm": 0.6455745100975037, "learning_rate": 0.0009610595869003941, "loss": 3.7578, "step": 4585 }, { "epoch": 0.31186302486750916, "grad_norm": 0.7967641353607178, "learning_rate": 0.0009610171218915613, "loss": 3.7079, "step": 4590 }, { "epoch": 0.31220274493817096, "grad_norm": 0.6406934857368469, "learning_rate": 0.0009609746568827286, "loss": 3.7325, "step": 4595 }, { "epoch": 0.3125424650088327, "grad_norm": 0.6590662002563477, "learning_rate": 0.000960932191873896, "loss": 3.8455, "step": 4600 }, { "epoch": 0.3128821850794945, "grad_norm": 0.584558367729187, "learning_rate": 0.0009608897268650633, "loss": 3.7943, "step": 4605 }, { "epoch": 0.3132219051501563, "grad_norm": 0.7745699882507324, "learning_rate": 0.0009608472618562305, "loss": 3.6923, "step": 4610 }, { "epoch": 0.31356162522081804, "grad_norm": 0.5839930772781372, "learning_rate": 0.0009608047968473977, "loss": 3.7063, "step": 4615 }, { "epoch": 0.31390134529147984, "grad_norm": 0.6579026579856873, "learning_rate": 0.0009607623318385651, "loss": 3.9349, "step": 4620 }, { "epoch": 0.3142410653621416, "grad_norm": 0.7334092855453491, "learning_rate": 0.0009607198668297323, "loss": 3.7986, "step": 4625 }, { "epoch": 0.3145807854328034, "grad_norm": 0.5687442421913147, "learning_rate": 0.0009606774018208995, "loss": 3.5316, "step": 4630 }, { "epoch": 0.3149205055034651, "grad_norm": 0.8066163063049316, "learning_rate": 0.000960634936812067, "loss": 3.6936, "step": 4635 }, { "epoch": 0.3152602255741269, "grad_norm": 0.6861526370048523, "learning_rate": 0.0009605924718032342, "loss": 3.8663, "step": 4640 }, { "epoch": 0.3155999456447887, "grad_norm": 0.6165646910667419, "learning_rate": 0.0009605500067944014, "loss": 3.7368, "step": 4645 }, { "epoch": 0.31593966571545046, "grad_norm": 0.6109422445297241, "learning_rate": 0.0009605075417855688, "loss": 3.6905, "step": 4650 }, { "epoch": 0.31627938578611225, "grad_norm": 0.8834192156791687, "learning_rate": 0.000960465076776736, "loss": 3.604, "step": 4655 }, { "epoch": 0.316619105856774, "grad_norm": 0.5666911602020264, "learning_rate": 0.0009604226117679032, "loss": 3.9428, "step": 4660 }, { "epoch": 0.3169588259274358, "grad_norm": 0.922512412071228, "learning_rate": 0.0009603801467590705, "loss": 3.7212, "step": 4665 }, { "epoch": 0.3172985459980976, "grad_norm": 0.6613696813583374, "learning_rate": 0.0009603376817502379, "loss": 3.9531, "step": 4670 }, { "epoch": 0.31763826606875933, "grad_norm": 0.6953523755073547, "learning_rate": 0.0009602952167414051, "loss": 3.8304, "step": 4675 }, { "epoch": 0.31797798613942113, "grad_norm": 0.6979256868362427, "learning_rate": 0.0009602527517325724, "loss": 3.6581, "step": 4680 }, { "epoch": 0.3183177062100829, "grad_norm": 0.5268025994300842, "learning_rate": 0.0009602102867237397, "loss": 3.8971, "step": 4685 }, { "epoch": 0.31865742628074467, "grad_norm": 0.6296973824501038, "learning_rate": 0.0009601678217149069, "loss": 3.8694, "step": 4690 }, { "epoch": 0.31899714635140647, "grad_norm": 0.6151579022407532, "learning_rate": 0.0009601253567060742, "loss": 3.8435, "step": 4695 }, { "epoch": 0.3193368664220682, "grad_norm": 0.6179822087287903, "learning_rate": 0.0009600828916972415, "loss": 3.7191, "step": 4700 }, { "epoch": 0.31967658649273, "grad_norm": 0.5826281309127808, "learning_rate": 0.0009600404266884088, "loss": 3.5952, "step": 4705 }, { "epoch": 0.32001630656339175, "grad_norm": 0.8003848791122437, "learning_rate": 0.0009599979616795761, "loss": 3.5842, "step": 4710 }, { "epoch": 0.32035602663405355, "grad_norm": 0.6263446807861328, "learning_rate": 0.0009599554966707433, "loss": 3.7243, "step": 4715 }, { "epoch": 0.3206957467047153, "grad_norm": 0.6043445467948914, "learning_rate": 0.0009599130316619106, "loss": 3.912, "step": 4720 }, { "epoch": 0.3210354667753771, "grad_norm": 0.6658505797386169, "learning_rate": 0.0009598705666530779, "loss": 3.8563, "step": 4725 }, { "epoch": 0.3213751868460389, "grad_norm": 0.7094211578369141, "learning_rate": 0.0009598281016442451, "loss": 3.6251, "step": 4730 }, { "epoch": 0.3217149069167006, "grad_norm": 0.6801108717918396, "learning_rate": 0.0009597856366354124, "loss": 3.4148, "step": 4735 }, { "epoch": 0.3220546269873624, "grad_norm": 0.9091446399688721, "learning_rate": 0.0009597431716265798, "loss": 3.6064, "step": 4740 }, { "epoch": 0.32239434705802417, "grad_norm": 0.609541654586792, "learning_rate": 0.000959700706617747, "loss": 3.5285, "step": 4745 }, { "epoch": 0.32273406712868596, "grad_norm": 0.6929119825363159, "learning_rate": 0.0009596582416089143, "loss": 3.677, "step": 4750 }, { "epoch": 0.32307378719934776, "grad_norm": 0.7696729898452759, "learning_rate": 0.0009596157766000816, "loss": 3.907, "step": 4755 }, { "epoch": 0.3234135072700095, "grad_norm": 0.734379768371582, "learning_rate": 0.0009595733115912488, "loss": 3.5694, "step": 4760 }, { "epoch": 0.3237532273406713, "grad_norm": 0.670070469379425, "learning_rate": 0.000959530846582416, "loss": 3.7851, "step": 4765 }, { "epoch": 0.32409294741133304, "grad_norm": 0.7582732439041138, "learning_rate": 0.0009594883815735834, "loss": 3.7556, "step": 4770 }, { "epoch": 0.32443266748199484, "grad_norm": 0.5920730829238892, "learning_rate": 0.0009594459165647507, "loss": 3.5348, "step": 4775 }, { "epoch": 0.32477238755265664, "grad_norm": 0.6227866411209106, "learning_rate": 0.0009594034515559179, "loss": 3.8472, "step": 4780 }, { "epoch": 0.3251121076233184, "grad_norm": 0.6639100909233093, "learning_rate": 0.0009593609865470853, "loss": 3.732, "step": 4785 }, { "epoch": 0.3254518276939802, "grad_norm": 0.6550115942955017, "learning_rate": 0.0009593185215382525, "loss": 3.5549, "step": 4790 }, { "epoch": 0.3257915477646419, "grad_norm": 0.6169217824935913, "learning_rate": 0.0009592760565294197, "loss": 3.7479, "step": 4795 }, { "epoch": 0.3261312678353037, "grad_norm": 0.8241134285926819, "learning_rate": 0.000959233591520587, "loss": 3.4747, "step": 4800 }, { "epoch": 0.32647098790596546, "grad_norm": 0.8893460631370544, "learning_rate": 0.0009591911265117543, "loss": 3.6325, "step": 4805 }, { "epoch": 0.32681070797662726, "grad_norm": 0.6666406989097595, "learning_rate": 0.0009591486615029216, "loss": 3.9512, "step": 4810 }, { "epoch": 0.32715042804728905, "grad_norm": 0.8277883529663086, "learning_rate": 0.000959106196494089, "loss": 3.556, "step": 4815 }, { "epoch": 0.3274901481179508, "grad_norm": 0.8062562942504883, "learning_rate": 0.0009590637314852562, "loss": 3.737, "step": 4820 }, { "epoch": 0.3278298681886126, "grad_norm": 0.5659600496292114, "learning_rate": 0.0009590212664764234, "loss": 3.8212, "step": 4825 }, { "epoch": 0.32816958825927434, "grad_norm": 0.7723137736320496, "learning_rate": 0.0009589788014675907, "loss": 3.7699, "step": 4830 }, { "epoch": 0.32850930832993613, "grad_norm": 0.8332376480102539, "learning_rate": 0.000958936336458758, "loss": 3.733, "step": 4835 }, { "epoch": 0.32884902840059793, "grad_norm": 0.6778247952461243, "learning_rate": 0.0009588938714499252, "loss": 3.9619, "step": 4840 }, { "epoch": 0.3291887484712597, "grad_norm": 0.7050536274909973, "learning_rate": 0.0009588514064410926, "loss": 3.8264, "step": 4845 }, { "epoch": 0.32952846854192147, "grad_norm": 0.9085682034492493, "learning_rate": 0.0009588089414322599, "loss": 3.7086, "step": 4850 }, { "epoch": 0.3298681886125832, "grad_norm": 0.8574063181877136, "learning_rate": 0.0009587664764234271, "loss": 3.8815, "step": 4855 }, { "epoch": 0.330207908683245, "grad_norm": 0.7238953709602356, "learning_rate": 0.0009587240114145944, "loss": 3.4002, "step": 4860 }, { "epoch": 0.3305476287539068, "grad_norm": 0.7760982513427734, "learning_rate": 0.0009586815464057616, "loss": 3.8384, "step": 4865 }, { "epoch": 0.33088734882456855, "grad_norm": 0.8978222608566284, "learning_rate": 0.0009586390813969289, "loss": 4.007, "step": 4870 }, { "epoch": 0.33122706889523035, "grad_norm": 0.6592036485671997, "learning_rate": 0.0009585966163880963, "loss": 3.4802, "step": 4875 }, { "epoch": 0.3315667889658921, "grad_norm": 0.7717464566230774, "learning_rate": 0.0009585541513792635, "loss": 3.4183, "step": 4880 }, { "epoch": 0.3319065090365539, "grad_norm": 0.5275126695632935, "learning_rate": 0.0009585116863704308, "loss": 3.8552, "step": 4885 }, { "epoch": 0.33224622910721563, "grad_norm": 0.7978081703186035, "learning_rate": 0.0009584692213615981, "loss": 3.8261, "step": 4890 }, { "epoch": 0.3325859491778774, "grad_norm": 0.5648220777511597, "learning_rate": 0.0009584267563527653, "loss": 4.015, "step": 4895 }, { "epoch": 0.3329256692485392, "grad_norm": 0.7004973888397217, "learning_rate": 0.0009583842913439325, "loss": 3.7186, "step": 4900 }, { "epoch": 0.33326538931920097, "grad_norm": 0.7414132952690125, "learning_rate": 0.0009583418263350999, "loss": 3.626, "step": 4905 }, { "epoch": 0.33360510938986276, "grad_norm": 0.6210420727729797, "learning_rate": 0.0009582993613262672, "loss": 3.8391, "step": 4910 }, { "epoch": 0.3339448294605245, "grad_norm": 0.7563833594322205, "learning_rate": 0.0009582568963174344, "loss": 3.824, "step": 4915 }, { "epoch": 0.3342845495311863, "grad_norm": 0.7307597398757935, "learning_rate": 0.0009582144313086018, "loss": 3.5793, "step": 4920 }, { "epoch": 0.3346242696018481, "grad_norm": 0.6148789525032043, "learning_rate": 0.000958171966299769, "loss": 3.8081, "step": 4925 }, { "epoch": 0.33496398967250984, "grad_norm": 0.6317515969276428, "learning_rate": 0.0009581295012909362, "loss": 4.1875, "step": 4930 }, { "epoch": 0.33530370974317164, "grad_norm": 0.7363927960395813, "learning_rate": 0.0009580870362821036, "loss": 3.7434, "step": 4935 }, { "epoch": 0.3356434298138334, "grad_norm": 0.5728069543838501, "learning_rate": 0.0009580445712732708, "loss": 3.6144, "step": 4940 }, { "epoch": 0.3359831498844952, "grad_norm": 0.516986072063446, "learning_rate": 0.0009580021062644382, "loss": 3.8537, "step": 4945 }, { "epoch": 0.336322869955157, "grad_norm": 0.7671200037002563, "learning_rate": 0.0009579596412556055, "loss": 3.8065, "step": 4950 }, { "epoch": 0.3366625900258187, "grad_norm": 0.5155797600746155, "learning_rate": 0.0009579171762467727, "loss": 3.7821, "step": 4955 }, { "epoch": 0.3370023100964805, "grad_norm": 0.5971325635910034, "learning_rate": 0.00095787471123794, "loss": 3.6163, "step": 4960 }, { "epoch": 0.33734203016714226, "grad_norm": 0.5434179306030273, "learning_rate": 0.0009578322462291072, "loss": 3.79, "step": 4965 }, { "epoch": 0.33768175023780406, "grad_norm": 0.9440898299217224, "learning_rate": 0.0009577897812202745, "loss": 3.505, "step": 4970 }, { "epoch": 0.3380214703084658, "grad_norm": 0.7863972187042236, "learning_rate": 0.0009577473162114418, "loss": 3.6921, "step": 4975 }, { "epoch": 0.3383611903791276, "grad_norm": 0.6850526928901672, "learning_rate": 0.0009577048512026091, "loss": 3.7286, "step": 4980 }, { "epoch": 0.3387009104497894, "grad_norm": 0.6525793671607971, "learning_rate": 0.0009576623861937764, "loss": 3.7199, "step": 4985 }, { "epoch": 0.33904063052045114, "grad_norm": 1.2761176824569702, "learning_rate": 0.0009576199211849437, "loss": 3.7743, "step": 4990 }, { "epoch": 0.33938035059111293, "grad_norm": 0.6671357154846191, "learning_rate": 0.0009575774561761109, "loss": 3.8975, "step": 4995 }, { "epoch": 0.3397200706617747, "grad_norm": 0.7961123585700989, "learning_rate": 0.0009575349911672781, "loss": 3.7009, "step": 5000 }, { "epoch": 0.3400597907324365, "grad_norm": 1.0096056461334229, "learning_rate": 0.0009574925261584455, "loss": 3.9044, "step": 5005 }, { "epoch": 0.34039951080309827, "grad_norm": 0.7832284569740295, "learning_rate": 0.0009574500611496127, "loss": 3.6719, "step": 5010 }, { "epoch": 0.34073923087376, "grad_norm": 0.738598644733429, "learning_rate": 0.00095740759614078, "loss": 3.5275, "step": 5015 }, { "epoch": 0.3410789509444218, "grad_norm": 0.7370604276657104, "learning_rate": 0.0009573651311319474, "loss": 3.5247, "step": 5020 }, { "epoch": 0.34141867101508355, "grad_norm": 0.8083518743515015, "learning_rate": 0.0009573226661231146, "loss": 3.4899, "step": 5025 }, { "epoch": 0.34175839108574535, "grad_norm": 0.7787097692489624, "learning_rate": 0.0009572802011142818, "loss": 3.6516, "step": 5030 }, { "epoch": 0.34209811115640715, "grad_norm": 0.762360155582428, "learning_rate": 0.0009572377361054492, "loss": 3.5661, "step": 5035 }, { "epoch": 0.3424378312270689, "grad_norm": 0.8215653300285339, "learning_rate": 0.0009571952710966164, "loss": 3.5784, "step": 5040 }, { "epoch": 0.3427775512977307, "grad_norm": 0.7317585349082947, "learning_rate": 0.0009571528060877836, "loss": 3.7503, "step": 5045 }, { "epoch": 0.34311727136839243, "grad_norm": 0.8211789727210999, "learning_rate": 0.0009571103410789511, "loss": 3.7904, "step": 5050 }, { "epoch": 0.3434569914390542, "grad_norm": 0.49874982237815857, "learning_rate": 0.0009570678760701183, "loss": 3.9742, "step": 5055 }, { "epoch": 0.34379671150971597, "grad_norm": 0.8592227697372437, "learning_rate": 0.0009570254110612855, "loss": 3.5196, "step": 5060 }, { "epoch": 0.34413643158037777, "grad_norm": 0.746721625328064, "learning_rate": 0.0009569829460524528, "loss": 3.4566, "step": 5065 }, { "epoch": 0.34447615165103956, "grad_norm": 0.6125968098640442, "learning_rate": 0.0009569404810436201, "loss": 3.6418, "step": 5070 }, { "epoch": 0.3448158717217013, "grad_norm": 0.7079349756240845, "learning_rate": 0.0009568980160347873, "loss": 3.5957, "step": 5075 }, { "epoch": 0.3451555917923631, "grad_norm": 0.77970290184021, "learning_rate": 0.0009568555510259546, "loss": 3.6652, "step": 5080 }, { "epoch": 0.34549531186302485, "grad_norm": 1.1659437417984009, "learning_rate": 0.000956813086017122, "loss": 3.6127, "step": 5085 }, { "epoch": 0.34583503193368664, "grad_norm": 0.8484855890274048, "learning_rate": 0.0009567706210082892, "loss": 3.5648, "step": 5090 }, { "epoch": 0.34617475200434844, "grad_norm": 1.0683693885803223, "learning_rate": 0.0009567281559994565, "loss": 3.8022, "step": 5095 }, { "epoch": 0.3465144720750102, "grad_norm": 0.6394113898277283, "learning_rate": 0.0009566856909906237, "loss": 3.4618, "step": 5100 }, { "epoch": 0.346854192145672, "grad_norm": 0.6517549753189087, "learning_rate": 0.000956643225981791, "loss": 3.7026, "step": 5105 }, { "epoch": 0.3471939122163337, "grad_norm": 0.6326296329498291, "learning_rate": 0.0009566007609729583, "loss": 3.7764, "step": 5110 }, { "epoch": 0.3475336322869955, "grad_norm": 0.7380351424217224, "learning_rate": 0.0009565582959641255, "loss": 3.7525, "step": 5115 }, { "epoch": 0.3478733523576573, "grad_norm": 0.6437528133392334, "learning_rate": 0.0009565158309552929, "loss": 3.6299, "step": 5120 }, { "epoch": 0.34821307242831906, "grad_norm": 0.5986687541007996, "learning_rate": 0.0009564733659464602, "loss": 3.8119, "step": 5125 }, { "epoch": 0.34855279249898086, "grad_norm": 0.5814089775085449, "learning_rate": 0.0009564309009376274, "loss": 3.5218, "step": 5130 }, { "epoch": 0.3488925125696426, "grad_norm": 0.677362322807312, "learning_rate": 0.0009563884359287947, "loss": 3.5475, "step": 5135 }, { "epoch": 0.3492322326403044, "grad_norm": 0.6157347559928894, "learning_rate": 0.000956345970919962, "loss": 3.6541, "step": 5140 }, { "epoch": 0.34957195271096614, "grad_norm": 0.7887200117111206, "learning_rate": 0.0009563035059111292, "loss": 3.4216, "step": 5145 }, { "epoch": 0.34991167278162794, "grad_norm": 0.7251083254814148, "learning_rate": 0.0009562610409022964, "loss": 3.8344, "step": 5150 }, { "epoch": 0.35025139285228973, "grad_norm": 0.9756469130516052, "learning_rate": 0.0009562185758934639, "loss": 3.5152, "step": 5155 }, { "epoch": 0.3505911129229515, "grad_norm": 0.6757463216781616, "learning_rate": 0.0009561761108846311, "loss": 3.9051, "step": 5160 }, { "epoch": 0.3509308329936133, "grad_norm": 0.5955451130867004, "learning_rate": 0.0009561336458757983, "loss": 3.7723, "step": 5165 }, { "epoch": 0.351270553064275, "grad_norm": 0.59389328956604, "learning_rate": 0.0009560911808669657, "loss": 3.6294, "step": 5170 }, { "epoch": 0.3516102731349368, "grad_norm": 0.6841652989387512, "learning_rate": 0.0009560487158581329, "loss": 3.485, "step": 5175 }, { "epoch": 0.3519499932055986, "grad_norm": 0.8480034470558167, "learning_rate": 0.0009560062508493001, "loss": 3.8257, "step": 5180 }, { "epoch": 0.35228971327626035, "grad_norm": 0.8432101011276245, "learning_rate": 0.0009559637858404675, "loss": 3.6208, "step": 5185 }, { "epoch": 0.35262943334692215, "grad_norm": 0.9456750750541687, "learning_rate": 0.0009559213208316348, "loss": 3.809, "step": 5190 }, { "epoch": 0.3529691534175839, "grad_norm": 0.6838685274124146, "learning_rate": 0.000955878855822802, "loss": 3.6456, "step": 5195 }, { "epoch": 0.3533088734882457, "grad_norm": 0.716590940952301, "learning_rate": 0.0009558363908139693, "loss": 3.7528, "step": 5200 }, { "epoch": 0.3536485935589075, "grad_norm": 0.7408686876296997, "learning_rate": 0.0009557939258051366, "loss": 3.5792, "step": 5205 }, { "epoch": 0.35398831362956923, "grad_norm": 0.8332806825637817, "learning_rate": 0.0009557514607963038, "loss": 3.797, "step": 5210 }, { "epoch": 0.354328033700231, "grad_norm": 0.6005328893661499, "learning_rate": 0.0009557089957874711, "loss": 3.8753, "step": 5215 }, { "epoch": 0.35466775377089277, "grad_norm": 0.8470065593719482, "learning_rate": 0.0009556665307786384, "loss": 3.8303, "step": 5220 }, { "epoch": 0.35500747384155457, "grad_norm": 0.615916907787323, "learning_rate": 0.0009556240657698057, "loss": 3.6057, "step": 5225 }, { "epoch": 0.3553471939122163, "grad_norm": 0.5892014503479004, "learning_rate": 0.000955581600760973, "loss": 3.6605, "step": 5230 }, { "epoch": 0.3556869139828781, "grad_norm": 0.6659533977508545, "learning_rate": 0.0009555391357521403, "loss": 3.5359, "step": 5235 }, { "epoch": 0.3560266340535399, "grad_norm": 0.5987432599067688, "learning_rate": 0.0009554966707433075, "loss": 3.7816, "step": 5240 }, { "epoch": 0.35636635412420165, "grad_norm": 0.6748421788215637, "learning_rate": 0.0009554542057344748, "loss": 3.5962, "step": 5245 }, { "epoch": 0.35670607419486344, "grad_norm": 0.6741753816604614, "learning_rate": 0.000955411740725642, "loss": 3.9455, "step": 5250 }, { "epoch": 0.3570457942655252, "grad_norm": 0.6069574952125549, "learning_rate": 0.0009553692757168093, "loss": 3.5491, "step": 5255 }, { "epoch": 0.357385514336187, "grad_norm": 0.6320086717605591, "learning_rate": 0.0009553268107079767, "loss": 3.6434, "step": 5260 }, { "epoch": 0.3577252344068488, "grad_norm": 0.6192067861557007, "learning_rate": 0.0009552843456991439, "loss": 3.4161, "step": 5265 }, { "epoch": 0.3580649544775105, "grad_norm": 0.6193461418151855, "learning_rate": 0.0009552418806903112, "loss": 3.5882, "step": 5270 }, { "epoch": 0.3584046745481723, "grad_norm": 0.6376737952232361, "learning_rate": 0.0009551994156814785, "loss": 3.8481, "step": 5275 }, { "epoch": 0.35874439461883406, "grad_norm": 0.5878989100456238, "learning_rate": 0.0009551569506726457, "loss": 3.6245, "step": 5280 }, { "epoch": 0.35908411468949586, "grad_norm": 0.6927847266197205, "learning_rate": 0.0009551144856638131, "loss": 3.7532, "step": 5285 }, { "epoch": 0.35942383476015766, "grad_norm": 0.8402261137962341, "learning_rate": 0.0009550720206549803, "loss": 3.7789, "step": 5290 }, { "epoch": 0.3597635548308194, "grad_norm": 0.7300997972488403, "learning_rate": 0.0009550295556461476, "loss": 3.9195, "step": 5295 }, { "epoch": 0.3601032749014812, "grad_norm": 0.6109755039215088, "learning_rate": 0.000954987090637315, "loss": 3.8455, "step": 5300 }, { "epoch": 0.36044299497214294, "grad_norm": 0.5649875998497009, "learning_rate": 0.0009549446256284822, "loss": 3.9588, "step": 5305 }, { "epoch": 0.36078271504280474, "grad_norm": 0.7442036867141724, "learning_rate": 0.0009549021606196494, "loss": 3.8603, "step": 5310 }, { "epoch": 0.3611224351134665, "grad_norm": 0.5968815088272095, "learning_rate": 0.0009548596956108167, "loss": 3.9566, "step": 5315 }, { "epoch": 0.3614621551841283, "grad_norm": 0.6159237623214722, "learning_rate": 0.000954817230601984, "loss": 3.6103, "step": 5320 }, { "epoch": 0.3618018752547901, "grad_norm": 1.258449673652649, "learning_rate": 0.0009547747655931512, "loss": 3.9607, "step": 5325 }, { "epoch": 0.3621415953254518, "grad_norm": 0.5789459347724915, "learning_rate": 0.0009547323005843186, "loss": 3.7212, "step": 5330 }, { "epoch": 0.3624813153961136, "grad_norm": 0.9337835907936096, "learning_rate": 0.0009546898355754859, "loss": 4.0227, "step": 5335 }, { "epoch": 0.36282103546677535, "grad_norm": 0.8357377648353577, "learning_rate": 0.0009546473705666531, "loss": 3.6377, "step": 5340 }, { "epoch": 0.36316075553743715, "grad_norm": 0.7306584715843201, "learning_rate": 0.0009546049055578204, "loss": 3.6089, "step": 5345 }, { "epoch": 0.36350047560809895, "grad_norm": 12.268057823181152, "learning_rate": 0.0009545624405489876, "loss": 3.7759, "step": 5350 }, { "epoch": 0.3638401956787607, "grad_norm": 0.5472047924995422, "learning_rate": 0.0009545199755401549, "loss": 3.3878, "step": 5355 }, { "epoch": 0.3641799157494225, "grad_norm": 0.7283962965011597, "learning_rate": 0.0009544775105313222, "loss": 3.5856, "step": 5360 }, { "epoch": 0.36451963582008423, "grad_norm": 0.9926034212112427, "learning_rate": 0.0009544350455224895, "loss": 3.8012, "step": 5365 }, { "epoch": 0.36485935589074603, "grad_norm": 0.7117539644241333, "learning_rate": 0.0009543925805136568, "loss": 3.441, "step": 5370 }, { "epoch": 0.3651990759614078, "grad_norm": 0.6438385248184204, "learning_rate": 0.0009543501155048241, "loss": 3.6433, "step": 5375 }, { "epoch": 0.36553879603206957, "grad_norm": 0.7695574760437012, "learning_rate": 0.0009543076504959913, "loss": 3.5963, "step": 5380 }, { "epoch": 0.36587851610273137, "grad_norm": 0.5929762125015259, "learning_rate": 0.0009542651854871585, "loss": 3.8106, "step": 5385 }, { "epoch": 0.3662182361733931, "grad_norm": 0.6758908629417419, "learning_rate": 0.0009542227204783259, "loss": 3.6554, "step": 5390 }, { "epoch": 0.3665579562440549, "grad_norm": 0.621306300163269, "learning_rate": 0.0009541802554694931, "loss": 3.5419, "step": 5395 }, { "epoch": 0.36689767631471665, "grad_norm": 0.6142150163650513, "learning_rate": 0.0009541377904606604, "loss": 3.6943, "step": 5400 }, { "epoch": 0.36723739638537845, "grad_norm": 0.8195553421974182, "learning_rate": 0.0009540953254518278, "loss": 3.4791, "step": 5405 }, { "epoch": 0.36757711645604024, "grad_norm": 0.7049173712730408, "learning_rate": 0.000954052860442995, "loss": 3.5835, "step": 5410 }, { "epoch": 0.367916836526702, "grad_norm": 0.6227113008499146, "learning_rate": 0.0009540103954341622, "loss": 3.7407, "step": 5415 }, { "epoch": 0.3682565565973638, "grad_norm": 0.5784620642662048, "learning_rate": 0.0009539679304253296, "loss": 3.6682, "step": 5420 }, { "epoch": 0.3685962766680255, "grad_norm": 0.8935307860374451, "learning_rate": 0.0009539254654164968, "loss": 3.4359, "step": 5425 }, { "epoch": 0.3689359967386873, "grad_norm": 0.5990920662879944, "learning_rate": 0.000953883000407664, "loss": 3.7232, "step": 5430 }, { "epoch": 0.3692757168093491, "grad_norm": 0.8034440875053406, "learning_rate": 0.0009538405353988315, "loss": 3.6245, "step": 5435 }, { "epoch": 0.36961543688001086, "grad_norm": 0.765920877456665, "learning_rate": 0.0009537980703899987, "loss": 3.4965, "step": 5440 }, { "epoch": 0.36995515695067266, "grad_norm": 0.6728287935256958, "learning_rate": 0.0009537556053811659, "loss": 3.809, "step": 5445 }, { "epoch": 0.3702948770213344, "grad_norm": 0.7797513604164124, "learning_rate": 0.0009537131403723332, "loss": 3.5527, "step": 5450 }, { "epoch": 0.3706345970919962, "grad_norm": 0.7652395963668823, "learning_rate": 0.0009536706753635005, "loss": 3.8327, "step": 5455 }, { "epoch": 0.370974317162658, "grad_norm": 0.6470797657966614, "learning_rate": 0.0009536282103546677, "loss": 3.6113, "step": 5460 }, { "epoch": 0.37131403723331974, "grad_norm": 0.8430959582328796, "learning_rate": 0.0009535857453458351, "loss": 3.5643, "step": 5465 }, { "epoch": 0.37165375730398154, "grad_norm": 0.8380835652351379, "learning_rate": 0.0009535432803370024, "loss": 3.7893, "step": 5470 }, { "epoch": 0.3719934773746433, "grad_norm": 0.6937934160232544, "learning_rate": 0.0009535008153281696, "loss": 3.8825, "step": 5475 }, { "epoch": 0.3723331974453051, "grad_norm": 0.6520200371742249, "learning_rate": 0.0009534583503193369, "loss": 3.8422, "step": 5480 }, { "epoch": 0.3726729175159668, "grad_norm": 0.7678138613700867, "learning_rate": 0.0009534158853105042, "loss": 4.0005, "step": 5485 }, { "epoch": 0.3730126375866286, "grad_norm": 0.7923406362533569, "learning_rate": 0.0009533734203016714, "loss": 3.6193, "step": 5490 }, { "epoch": 0.3733523576572904, "grad_norm": 0.674468457698822, "learning_rate": 0.0009533309552928387, "loss": 3.6924, "step": 5495 }, { "epoch": 0.37369207772795215, "grad_norm": 0.6791908740997314, "learning_rate": 0.000953288490284006, "loss": 3.8278, "step": 5500 }, { "epoch": 0.37403179779861395, "grad_norm": 0.7335380911827087, "learning_rate": 0.0009532460252751733, "loss": 3.8782, "step": 5505 }, { "epoch": 0.3743715178692757, "grad_norm": 0.6129330396652222, "learning_rate": 0.0009532035602663406, "loss": 3.5498, "step": 5510 }, { "epoch": 0.3747112379399375, "grad_norm": 0.8125602602958679, "learning_rate": 0.0009531610952575078, "loss": 3.8319, "step": 5515 }, { "epoch": 0.3750509580105993, "grad_norm": 0.7539558410644531, "learning_rate": 0.0009531186302486751, "loss": 3.4676, "step": 5520 }, { "epoch": 0.37539067808126103, "grad_norm": 0.6741296052932739, "learning_rate": 0.0009530761652398424, "loss": 3.5679, "step": 5525 }, { "epoch": 0.37573039815192283, "grad_norm": 0.762295663356781, "learning_rate": 0.0009530337002310096, "loss": 3.6386, "step": 5530 }, { "epoch": 0.37607011822258457, "grad_norm": 0.5959886312484741, "learning_rate": 0.000952991235222177, "loss": 4.0023, "step": 5535 }, { "epoch": 0.37640983829324637, "grad_norm": 0.6451983451843262, "learning_rate": 0.0009529487702133443, "loss": 3.7566, "step": 5540 }, { "epoch": 0.37674955836390817, "grad_norm": 0.6125621795654297, "learning_rate": 0.0009529063052045115, "loss": 4.0503, "step": 5545 }, { "epoch": 0.3770892784345699, "grad_norm": 0.5408923029899597, "learning_rate": 0.0009528638401956787, "loss": 3.882, "step": 5550 }, { "epoch": 0.3774289985052317, "grad_norm": 0.7112954258918762, "learning_rate": 0.0009528213751868461, "loss": 3.9079, "step": 5555 }, { "epoch": 0.37776871857589345, "grad_norm": 0.9264182448387146, "learning_rate": 0.0009527789101780133, "loss": 3.8527, "step": 5560 }, { "epoch": 0.37810843864655524, "grad_norm": 0.8550432920455933, "learning_rate": 0.0009527364451691805, "loss": 3.5968, "step": 5565 }, { "epoch": 0.378448158717217, "grad_norm": 0.7274702787399292, "learning_rate": 0.000952693980160348, "loss": 3.7561, "step": 5570 }, { "epoch": 0.3787878787878788, "grad_norm": 0.6404582262039185, "learning_rate": 0.0009526515151515152, "loss": 3.5176, "step": 5575 }, { "epoch": 0.3791275988585406, "grad_norm": 0.7663081288337708, "learning_rate": 0.0009526090501426824, "loss": 3.5577, "step": 5580 }, { "epoch": 0.3794673189292023, "grad_norm": 0.6774986386299133, "learning_rate": 0.0009525665851338498, "loss": 3.6109, "step": 5585 }, { "epoch": 0.3798070389998641, "grad_norm": 0.6545806527137756, "learning_rate": 0.000952524120125017, "loss": 3.506, "step": 5590 }, { "epoch": 0.38014675907052586, "grad_norm": 0.6958641409873962, "learning_rate": 0.0009524816551161842, "loss": 3.5987, "step": 5595 }, { "epoch": 0.38048647914118766, "grad_norm": 0.5957985520362854, "learning_rate": 0.0009524391901073515, "loss": 3.901, "step": 5600 }, { "epoch": 0.38082619921184946, "grad_norm": 0.67305988073349, "learning_rate": 0.0009523967250985189, "loss": 3.7973, "step": 5605 }, { "epoch": 0.3811659192825112, "grad_norm": 0.7090798020362854, "learning_rate": 0.0009523542600896861, "loss": 3.5519, "step": 5610 }, { "epoch": 0.381505639353173, "grad_norm": 0.6447020769119263, "learning_rate": 0.0009523117950808534, "loss": 3.6189, "step": 5615 }, { "epoch": 0.38184535942383474, "grad_norm": 0.6408648490905762, "learning_rate": 0.0009522693300720207, "loss": 3.7404, "step": 5620 }, { "epoch": 0.38218507949449654, "grad_norm": 0.7665295004844666, "learning_rate": 0.000952226865063188, "loss": 3.6038, "step": 5625 }, { "epoch": 0.38252479956515834, "grad_norm": 0.7357596755027771, "learning_rate": 0.0009521844000543552, "loss": 3.8106, "step": 5630 }, { "epoch": 0.3828645196358201, "grad_norm": 0.7217463850975037, "learning_rate": 0.0009521419350455224, "loss": 3.5601, "step": 5635 }, { "epoch": 0.3832042397064819, "grad_norm": 0.6828171014785767, "learning_rate": 0.0009520994700366899, "loss": 3.8198, "step": 5640 }, { "epoch": 0.3835439597771436, "grad_norm": 0.7195796966552734, "learning_rate": 0.0009520570050278571, "loss": 3.7945, "step": 5645 }, { "epoch": 0.3838836798478054, "grad_norm": 0.8151988983154297, "learning_rate": 0.0009520145400190243, "loss": 3.6095, "step": 5650 }, { "epoch": 0.38422339991846716, "grad_norm": 0.676535964012146, "learning_rate": 0.0009519720750101917, "loss": 3.8636, "step": 5655 }, { "epoch": 0.38456311998912895, "grad_norm": 0.59398353099823, "learning_rate": 0.0009519296100013589, "loss": 3.6462, "step": 5660 }, { "epoch": 0.38490284005979075, "grad_norm": 0.7401514053344727, "learning_rate": 0.0009518871449925261, "loss": 3.6619, "step": 5665 }, { "epoch": 0.3852425601304525, "grad_norm": 0.7680660486221313, "learning_rate": 0.0009518446799836935, "loss": 3.5895, "step": 5670 }, { "epoch": 0.3855822802011143, "grad_norm": 0.6434188485145569, "learning_rate": 0.0009518022149748608, "loss": 3.6502, "step": 5675 }, { "epoch": 0.38592200027177603, "grad_norm": 0.735846996307373, "learning_rate": 0.000951759749966028, "loss": 3.5815, "step": 5680 }, { "epoch": 0.38626172034243783, "grad_norm": 0.623301088809967, "learning_rate": 0.0009517172849571954, "loss": 3.5788, "step": 5685 }, { "epoch": 0.38660144041309963, "grad_norm": 0.7407659292221069, "learning_rate": 0.0009516748199483626, "loss": 3.8412, "step": 5690 }, { "epoch": 0.38694116048376137, "grad_norm": 0.6278685331344604, "learning_rate": 0.0009516323549395298, "loss": 3.5552, "step": 5695 }, { "epoch": 0.38728088055442317, "grad_norm": 0.720552921295166, "learning_rate": 0.0009515898899306971, "loss": 3.7116, "step": 5700 }, { "epoch": 0.3876206006250849, "grad_norm": 0.7158149480819702, "learning_rate": 0.0009515474249218644, "loss": 3.6758, "step": 5705 }, { "epoch": 0.3879603206957467, "grad_norm": 0.6728513240814209, "learning_rate": 0.0009515049599130317, "loss": 3.7136, "step": 5710 }, { "epoch": 0.3883000407664085, "grad_norm": 0.6863958239555359, "learning_rate": 0.000951462494904199, "loss": 3.8059, "step": 5715 }, { "epoch": 0.38863976083707025, "grad_norm": 0.6744656562805176, "learning_rate": 0.0009514200298953663, "loss": 3.7461, "step": 5720 }, { "epoch": 0.38897948090773204, "grad_norm": 0.6520145535469055, "learning_rate": 0.0009513775648865335, "loss": 3.7768, "step": 5725 }, { "epoch": 0.3893192009783938, "grad_norm": 0.6971132755279541, "learning_rate": 0.0009513350998777008, "loss": 3.7232, "step": 5730 }, { "epoch": 0.3896589210490556, "grad_norm": 0.8385533690452576, "learning_rate": 0.000951292634868868, "loss": 3.6967, "step": 5735 }, { "epoch": 0.3899986411197173, "grad_norm": 0.6962178945541382, "learning_rate": 0.0009512501698600353, "loss": 3.9476, "step": 5740 }, { "epoch": 0.3903383611903791, "grad_norm": 0.612159788608551, "learning_rate": 0.0009512077048512027, "loss": 3.726, "step": 5745 }, { "epoch": 0.3906780812610409, "grad_norm": 0.6508966088294983, "learning_rate": 0.0009511652398423699, "loss": 3.7884, "step": 5750 }, { "epoch": 0.39101780133170266, "grad_norm": 0.6837339997291565, "learning_rate": 0.0009511227748335372, "loss": 3.8072, "step": 5755 }, { "epoch": 0.39135752140236446, "grad_norm": 0.7513009905815125, "learning_rate": 0.0009510803098247045, "loss": 3.8404, "step": 5760 }, { "epoch": 0.3916972414730262, "grad_norm": 0.6866264343261719, "learning_rate": 0.0009510378448158717, "loss": 3.5156, "step": 5765 }, { "epoch": 0.392036961543688, "grad_norm": 0.7577829957008362, "learning_rate": 0.000950995379807039, "loss": 3.9027, "step": 5770 }, { "epoch": 0.3923766816143498, "grad_norm": 0.8748649954795837, "learning_rate": 0.0009509529147982063, "loss": 3.8692, "step": 5775 }, { "epoch": 0.39271640168501154, "grad_norm": 0.7105889916419983, "learning_rate": 0.0009509104497893736, "loss": 3.531, "step": 5780 }, { "epoch": 0.39305612175567334, "grad_norm": 0.7299677133560181, "learning_rate": 0.0009508679847805408, "loss": 3.8598, "step": 5785 }, { "epoch": 0.3933958418263351, "grad_norm": 0.7281935811042786, "learning_rate": 0.0009508255197717082, "loss": 3.8852, "step": 5790 }, { "epoch": 0.3937355618969969, "grad_norm": 0.6584429740905762, "learning_rate": 0.0009507830547628754, "loss": 3.7188, "step": 5795 }, { "epoch": 0.3940752819676587, "grad_norm": 0.9629406332969666, "learning_rate": 0.0009507405897540426, "loss": 3.8586, "step": 5800 }, { "epoch": 0.3944150020383204, "grad_norm": 0.7765294909477234, "learning_rate": 0.00095069812474521, "loss": 3.842, "step": 5805 }, { "epoch": 0.3947547221089822, "grad_norm": 0.7289807200431824, "learning_rate": 0.0009506556597363772, "loss": 3.7421, "step": 5810 }, { "epoch": 0.39509444217964396, "grad_norm": 0.8604475855827332, "learning_rate": 0.0009506131947275445, "loss": 3.434, "step": 5815 }, { "epoch": 0.39543416225030575, "grad_norm": 0.676457405090332, "learning_rate": 0.0009505707297187119, "loss": 3.8626, "step": 5820 }, { "epoch": 0.3957738823209675, "grad_norm": 2.1074578762054443, "learning_rate": 0.0009505282647098791, "loss": 3.8085, "step": 5825 }, { "epoch": 0.3961136023916293, "grad_norm": 0.8767901062965393, "learning_rate": 0.0009504857997010463, "loss": 3.5175, "step": 5830 }, { "epoch": 0.3964533224622911, "grad_norm": 0.6856021285057068, "learning_rate": 0.0009504433346922136, "loss": 3.8532, "step": 5835 }, { "epoch": 0.39679304253295283, "grad_norm": 0.6957550644874573, "learning_rate": 0.0009504008696833809, "loss": 3.6103, "step": 5840 }, { "epoch": 0.39713276260361463, "grad_norm": 1.1055721044540405, "learning_rate": 0.0009503584046745481, "loss": 3.6346, "step": 5845 }, { "epoch": 0.3974724826742764, "grad_norm": 0.6748464703559875, "learning_rate": 0.0009503159396657155, "loss": 3.5869, "step": 5850 }, { "epoch": 0.39781220274493817, "grad_norm": 0.754275381565094, "learning_rate": 0.0009502734746568828, "loss": 3.7318, "step": 5855 }, { "epoch": 0.39815192281559997, "grad_norm": 0.5751346945762634, "learning_rate": 0.00095023100964805, "loss": 3.6952, "step": 5860 }, { "epoch": 0.3984916428862617, "grad_norm": 0.571988582611084, "learning_rate": 0.0009501885446392173, "loss": 3.8725, "step": 5865 }, { "epoch": 0.3988313629569235, "grad_norm": 0.7949030995368958, "learning_rate": 0.0009501460796303846, "loss": 3.6394, "step": 5870 }, { "epoch": 0.39917108302758525, "grad_norm": 0.5735267996788025, "learning_rate": 0.0009501036146215518, "loss": 3.4689, "step": 5875 }, { "epoch": 0.39951080309824705, "grad_norm": 0.8305995464324951, "learning_rate": 0.0009500611496127191, "loss": 3.7559, "step": 5880 }, { "epoch": 0.39985052316890884, "grad_norm": 0.6194843053817749, "learning_rate": 0.0009500186846038864, "loss": 3.902, "step": 5885 }, { "epoch": 0.4001902432395706, "grad_norm": 0.6379867792129517, "learning_rate": 0.0009499762195950537, "loss": 3.7771, "step": 5890 }, { "epoch": 0.4005299633102324, "grad_norm": 0.6605093479156494, "learning_rate": 0.000949933754586221, "loss": 3.5102, "step": 5895 }, { "epoch": 0.4008696833808941, "grad_norm": 0.9110813736915588, "learning_rate": 0.0009498912895773882, "loss": 3.662, "step": 5900 }, { "epoch": 0.4012094034515559, "grad_norm": 0.6358461976051331, "learning_rate": 0.0009498488245685555, "loss": 3.8034, "step": 5905 }, { "epoch": 0.40154912352221767, "grad_norm": 0.7812647819519043, "learning_rate": 0.0009498063595597228, "loss": 3.7213, "step": 5910 }, { "epoch": 0.40188884359287946, "grad_norm": 0.7158457636833191, "learning_rate": 0.00094976389455089, "loss": 3.7999, "step": 5915 }, { "epoch": 0.40222856366354126, "grad_norm": 1.4320002794265747, "learning_rate": 0.0009497214295420574, "loss": 3.8032, "step": 5920 }, { "epoch": 0.402568283734203, "grad_norm": 0.6569749712944031, "learning_rate": 0.0009496789645332247, "loss": 3.6588, "step": 5925 }, { "epoch": 0.4029080038048648, "grad_norm": 0.7718832492828369, "learning_rate": 0.0009496364995243919, "loss": 3.6084, "step": 5930 }, { "epoch": 0.40324772387552654, "grad_norm": 0.6366055011749268, "learning_rate": 0.0009495940345155591, "loss": 3.8482, "step": 5935 }, { "epoch": 0.40358744394618834, "grad_norm": 0.6743378639221191, "learning_rate": 0.0009495515695067265, "loss": 3.6285, "step": 5940 }, { "epoch": 0.40392716401685014, "grad_norm": 0.7103203535079956, "learning_rate": 0.0009495091044978937, "loss": 3.728, "step": 5945 }, { "epoch": 0.4042668840875119, "grad_norm": 0.6597638130187988, "learning_rate": 0.0009494666394890609, "loss": 3.9413, "step": 5950 }, { "epoch": 0.4046066041581737, "grad_norm": 0.6881067752838135, "learning_rate": 0.0009494241744802284, "loss": 3.8261, "step": 5955 }, { "epoch": 0.4049463242288354, "grad_norm": 0.696105420589447, "learning_rate": 0.0009493817094713956, "loss": 3.7296, "step": 5960 }, { "epoch": 0.4052860442994972, "grad_norm": 0.6859625577926636, "learning_rate": 0.0009493392444625629, "loss": 3.8281, "step": 5965 }, { "epoch": 0.405625764370159, "grad_norm": 0.7748442888259888, "learning_rate": 0.0009492967794537302, "loss": 4.0015, "step": 5970 }, { "epoch": 0.40596548444082076, "grad_norm": 0.6900736093521118, "learning_rate": 0.0009492543144448974, "loss": 3.4926, "step": 5975 }, { "epoch": 0.40630520451148255, "grad_norm": 0.7808459401130676, "learning_rate": 0.0009492118494360647, "loss": 3.6567, "step": 5980 }, { "epoch": 0.4066449245821443, "grad_norm": 1.1225916147232056, "learning_rate": 0.000949169384427232, "loss": 3.5888, "step": 5985 }, { "epoch": 0.4069846446528061, "grad_norm": 0.8027675151824951, "learning_rate": 0.0009491269194183993, "loss": 4.0031, "step": 5990 }, { "epoch": 0.40732436472346784, "grad_norm": 0.6575860977172852, "learning_rate": 0.0009490844544095666, "loss": 3.8601, "step": 5995 }, { "epoch": 0.40766408479412963, "grad_norm": 0.6641314029693604, "learning_rate": 0.0009490419894007338, "loss": 3.6208, "step": 6000 }, { "epoch": 0.40800380486479143, "grad_norm": 0.7412399053573608, "learning_rate": 0.0009489995243919011, "loss": 3.7681, "step": 6005 }, { "epoch": 0.4083435249354532, "grad_norm": 0.8145126104354858, "learning_rate": 0.0009489570593830684, "loss": 3.7507, "step": 6010 }, { "epoch": 0.40868324500611497, "grad_norm": 0.6852948665618896, "learning_rate": 0.0009489145943742356, "loss": 3.6825, "step": 6015 }, { "epoch": 0.4090229650767767, "grad_norm": 0.7576074600219727, "learning_rate": 0.000948872129365403, "loss": 3.7356, "step": 6020 }, { "epoch": 0.4093626851474385, "grad_norm": 0.7892106771469116, "learning_rate": 0.0009488296643565703, "loss": 3.5851, "step": 6025 }, { "epoch": 0.4097024052181003, "grad_norm": 0.6661109924316406, "learning_rate": 0.0009487871993477375, "loss": 3.5746, "step": 6030 }, { "epoch": 0.41004212528876205, "grad_norm": 0.7307931184768677, "learning_rate": 0.0009487447343389047, "loss": 3.6414, "step": 6035 }, { "epoch": 0.41038184535942385, "grad_norm": 0.9471137523651123, "learning_rate": 0.0009487022693300721, "loss": 3.7123, "step": 6040 }, { "epoch": 0.4107215654300856, "grad_norm": 0.7316438555717468, "learning_rate": 0.0009486598043212393, "loss": 3.7229, "step": 6045 }, { "epoch": 0.4110612855007474, "grad_norm": 0.7826873660087585, "learning_rate": 0.0009486173393124065, "loss": 3.7605, "step": 6050 }, { "epoch": 0.4114010055714092, "grad_norm": 0.6444186568260193, "learning_rate": 0.000948574874303574, "loss": 3.5857, "step": 6055 }, { "epoch": 0.4117407256420709, "grad_norm": 0.7916932702064514, "learning_rate": 0.0009485324092947412, "loss": 3.5616, "step": 6060 }, { "epoch": 0.4120804457127327, "grad_norm": 0.9611313343048096, "learning_rate": 0.0009484899442859084, "loss": 3.832, "step": 6065 }, { "epoch": 0.41242016578339447, "grad_norm": 0.8715277314186096, "learning_rate": 0.0009484474792770758, "loss": 3.9989, "step": 6070 }, { "epoch": 0.41275988585405626, "grad_norm": 0.7321308255195618, "learning_rate": 0.000948405014268243, "loss": 3.2912, "step": 6075 }, { "epoch": 0.413099605924718, "grad_norm": 0.9948475360870361, "learning_rate": 0.0009483625492594102, "loss": 3.9157, "step": 6080 }, { "epoch": 0.4134393259953798, "grad_norm": 0.9011125564575195, "learning_rate": 0.0009483200842505775, "loss": 3.6813, "step": 6085 }, { "epoch": 0.4137790460660416, "grad_norm": 0.6323263645172119, "learning_rate": 0.0009482776192417449, "loss": 3.9186, "step": 6090 }, { "epoch": 0.41411876613670334, "grad_norm": 0.9080272912979126, "learning_rate": 0.0009482351542329121, "loss": 3.6284, "step": 6095 }, { "epoch": 0.41445848620736514, "grad_norm": 0.6488545536994934, "learning_rate": 0.0009481926892240794, "loss": 3.5882, "step": 6100 }, { "epoch": 0.4147982062780269, "grad_norm": 0.6264088153839111, "learning_rate": 0.0009481502242152467, "loss": 3.4939, "step": 6105 }, { "epoch": 0.4151379263486887, "grad_norm": 0.7170696258544922, "learning_rate": 0.0009481077592064139, "loss": 3.6584, "step": 6110 }, { "epoch": 0.4154776464193505, "grad_norm": 1.0384855270385742, "learning_rate": 0.0009480652941975812, "loss": 3.631, "step": 6115 }, { "epoch": 0.4158173664900122, "grad_norm": 0.7056418657302856, "learning_rate": 0.0009480228291887485, "loss": 3.9131, "step": 6120 }, { "epoch": 0.416157086560674, "grad_norm": 0.638219952583313, "learning_rate": 0.0009479803641799158, "loss": 3.7226, "step": 6125 }, { "epoch": 0.41649680663133576, "grad_norm": 0.6111626625061035, "learning_rate": 0.0009479378991710831, "loss": 3.5262, "step": 6130 }, { "epoch": 0.41683652670199756, "grad_norm": 0.886923611164093, "learning_rate": 0.0009478954341622503, "loss": 3.5885, "step": 6135 }, { "epoch": 0.41717624677265935, "grad_norm": 0.6608405113220215, "learning_rate": 0.0009478529691534176, "loss": 3.7589, "step": 6140 }, { "epoch": 0.4175159668433211, "grad_norm": 0.7782901525497437, "learning_rate": 0.0009478105041445849, "loss": 3.8576, "step": 6145 }, { "epoch": 0.4178556869139829, "grad_norm": 1.1632819175720215, "learning_rate": 0.0009477680391357521, "loss": 3.8477, "step": 6150 }, { "epoch": 0.41819540698464464, "grad_norm": 0.7602625489234924, "learning_rate": 0.0009477255741269194, "loss": 3.639, "step": 6155 }, { "epoch": 0.41853512705530643, "grad_norm": 0.7379399538040161, "learning_rate": 0.0009476831091180868, "loss": 3.7218, "step": 6160 }, { "epoch": 0.4188748471259682, "grad_norm": 0.7413083910942078, "learning_rate": 0.000947640644109254, "loss": 3.9456, "step": 6165 }, { "epoch": 0.41921456719663, "grad_norm": 0.755257785320282, "learning_rate": 0.0009475981791004213, "loss": 3.7673, "step": 6170 }, { "epoch": 0.41955428726729177, "grad_norm": 0.8536903262138367, "learning_rate": 0.0009475557140915886, "loss": 3.8075, "step": 6175 }, { "epoch": 0.4198940073379535, "grad_norm": 0.6336054801940918, "learning_rate": 0.0009475132490827558, "loss": 3.6452, "step": 6180 }, { "epoch": 0.4202337274086153, "grad_norm": 0.5919987559318542, "learning_rate": 0.000947470784073923, "loss": 3.693, "step": 6185 }, { "epoch": 0.42057344747927705, "grad_norm": 0.7092812657356262, "learning_rate": 0.0009474283190650904, "loss": 3.5212, "step": 6190 }, { "epoch": 0.42091316754993885, "grad_norm": 0.5136128664016724, "learning_rate": 0.0009473858540562577, "loss": 3.742, "step": 6195 }, { "epoch": 0.42125288762060065, "grad_norm": 0.710419774055481, "learning_rate": 0.0009473433890474249, "loss": 3.6801, "step": 6200 }, { "epoch": 0.4215926076912624, "grad_norm": 0.74549400806427, "learning_rate": 0.0009473009240385923, "loss": 3.7057, "step": 6205 }, { "epoch": 0.4219323277619242, "grad_norm": 0.7171955108642578, "learning_rate": 0.0009472584590297595, "loss": 3.6453, "step": 6210 }, { "epoch": 0.42227204783258593, "grad_norm": 0.776326060295105, "learning_rate": 0.0009472159940209267, "loss": 3.5003, "step": 6215 }, { "epoch": 0.4226117679032477, "grad_norm": 0.7289601564407349, "learning_rate": 0.000947173529012094, "loss": 3.6845, "step": 6220 }, { "epoch": 0.4229514879739095, "grad_norm": 0.694192111492157, "learning_rate": 0.0009471310640032613, "loss": 3.5497, "step": 6225 }, { "epoch": 0.42329120804457127, "grad_norm": 0.7362591028213501, "learning_rate": 0.0009470885989944286, "loss": 3.7047, "step": 6230 }, { "epoch": 0.42363092811523306, "grad_norm": 0.645523190498352, "learning_rate": 0.000947046133985596, "loss": 3.7183, "step": 6235 }, { "epoch": 0.4239706481858948, "grad_norm": 0.7591707110404968, "learning_rate": 0.0009470036689767632, "loss": 3.6192, "step": 6240 }, { "epoch": 0.4243103682565566, "grad_norm": 2.7486765384674072, "learning_rate": 0.0009469612039679304, "loss": 3.6772, "step": 6245 }, { "epoch": 0.42465008832721834, "grad_norm": 0.7472816109657288, "learning_rate": 0.0009469187389590977, "loss": 3.621, "step": 6250 }, { "epoch": 0.42498980839788014, "grad_norm": 0.6765187978744507, "learning_rate": 0.000946876273950265, "loss": 3.7256, "step": 6255 }, { "epoch": 0.42532952846854194, "grad_norm": 1.0844764709472656, "learning_rate": 0.0009468338089414322, "loss": 3.3585, "step": 6260 }, { "epoch": 0.4256692485392037, "grad_norm": 0.5765259861946106, "learning_rate": 0.0009467913439325996, "loss": 3.548, "step": 6265 }, { "epoch": 0.4260089686098655, "grad_norm": 1.046905279159546, "learning_rate": 0.0009467488789237669, "loss": 3.8785, "step": 6270 }, { "epoch": 0.4263486886805272, "grad_norm": 0.661917507648468, "learning_rate": 0.0009467064139149341, "loss": 3.5404, "step": 6275 }, { "epoch": 0.426688408751189, "grad_norm": 1.0002559423446655, "learning_rate": 0.0009466639489061014, "loss": 3.6421, "step": 6280 }, { "epoch": 0.4270281288218508, "grad_norm": 0.9296825528144836, "learning_rate": 0.0009466214838972686, "loss": 3.8708, "step": 6285 }, { "epoch": 0.42736784889251256, "grad_norm": 1.198769450187683, "learning_rate": 0.0009465790188884359, "loss": 3.618, "step": 6290 }, { "epoch": 0.42770756896317436, "grad_norm": 0.6735273003578186, "learning_rate": 0.0009465365538796032, "loss": 3.8058, "step": 6295 }, { "epoch": 0.4280472890338361, "grad_norm": 0.6104165315628052, "learning_rate": 0.0009464940888707705, "loss": 3.7062, "step": 6300 }, { "epoch": 0.4283870091044979, "grad_norm": 0.6903350949287415, "learning_rate": 0.0009464516238619379, "loss": 3.3921, "step": 6305 }, { "epoch": 0.4287267291751597, "grad_norm": 0.6779121160507202, "learning_rate": 0.0009464091588531051, "loss": 4.0328, "step": 6310 }, { "epoch": 0.42906644924582144, "grad_norm": 0.6903079748153687, "learning_rate": 0.0009463666938442723, "loss": 3.7839, "step": 6315 }, { "epoch": 0.42940616931648323, "grad_norm": 0.7634524703025818, "learning_rate": 0.0009463242288354397, "loss": 3.7203, "step": 6320 }, { "epoch": 0.429745889387145, "grad_norm": 0.8356140851974487, "learning_rate": 0.0009462817638266069, "loss": 3.5624, "step": 6325 }, { "epoch": 0.4300856094578068, "grad_norm": 1.1918925046920776, "learning_rate": 0.0009462392988177741, "loss": 3.597, "step": 6330 }, { "epoch": 0.4304253295284685, "grad_norm": 0.7054935097694397, "learning_rate": 0.0009461968338089415, "loss": 3.7081, "step": 6335 }, { "epoch": 0.4307650495991303, "grad_norm": 0.7931617498397827, "learning_rate": 0.0009461543688001088, "loss": 3.7453, "step": 6340 }, { "epoch": 0.4311047696697921, "grad_norm": 0.7819299101829529, "learning_rate": 0.000946111903791276, "loss": 3.5882, "step": 6345 }, { "epoch": 0.43144448974045385, "grad_norm": 0.7842747569084167, "learning_rate": 0.0009460694387824433, "loss": 3.6094, "step": 6350 }, { "epoch": 0.43178420981111565, "grad_norm": 0.7885016798973083, "learning_rate": 0.0009460269737736106, "loss": 3.6641, "step": 6355 }, { "epoch": 0.4321239298817774, "grad_norm": 0.7479512095451355, "learning_rate": 0.0009459845087647778, "loss": 3.4524, "step": 6360 }, { "epoch": 0.4324636499524392, "grad_norm": 0.6265522837638855, "learning_rate": 0.0009459420437559451, "loss": 3.7072, "step": 6365 }, { "epoch": 0.432803370023101, "grad_norm": 0.9189678430557251, "learning_rate": 0.0009458995787471125, "loss": 3.4026, "step": 6370 }, { "epoch": 0.43314309009376273, "grad_norm": 0.8275874853134155, "learning_rate": 0.0009458571137382797, "loss": 3.6262, "step": 6375 }, { "epoch": 0.4334828101644245, "grad_norm": 0.7689844965934753, "learning_rate": 0.000945814648729447, "loss": 3.7545, "step": 6380 }, { "epoch": 0.43382253023508627, "grad_norm": 0.9868364930152893, "learning_rate": 0.0009457721837206142, "loss": 3.7024, "step": 6385 }, { "epoch": 0.43416225030574807, "grad_norm": 0.8647677302360535, "learning_rate": 0.0009457297187117815, "loss": 3.6294, "step": 6390 }, { "epoch": 0.43450197037640986, "grad_norm": 0.8021628856658936, "learning_rate": 0.0009456872537029488, "loss": 3.8862, "step": 6395 }, { "epoch": 0.4348416904470716, "grad_norm": 0.8168643116950989, "learning_rate": 0.000945644788694116, "loss": 3.6425, "step": 6400 }, { "epoch": 0.4351814105177334, "grad_norm": 0.653694212436676, "learning_rate": 0.0009456023236852834, "loss": 3.7277, "step": 6405 }, { "epoch": 0.43552113058839514, "grad_norm": 0.7840037941932678, "learning_rate": 0.0009455598586764507, "loss": 3.6298, "step": 6410 }, { "epoch": 0.43586085065905694, "grad_norm": 0.8717862367630005, "learning_rate": 0.0009455173936676179, "loss": 3.6109, "step": 6415 }, { "epoch": 0.4362005707297187, "grad_norm": 0.587716281414032, "learning_rate": 0.0009454749286587851, "loss": 3.569, "step": 6420 }, { "epoch": 0.4365402908003805, "grad_norm": 0.7357481122016907, "learning_rate": 0.0009454324636499525, "loss": 3.5375, "step": 6425 }, { "epoch": 0.4368800108710423, "grad_norm": 0.8100959062576294, "learning_rate": 0.0009453899986411197, "loss": 3.8886, "step": 6430 }, { "epoch": 0.437219730941704, "grad_norm": 1.7049295902252197, "learning_rate": 0.0009453475336322869, "loss": 3.757, "step": 6435 }, { "epoch": 0.4375594510123658, "grad_norm": 0.7924312949180603, "learning_rate": 0.0009453050686234544, "loss": 3.5864, "step": 6440 }, { "epoch": 0.43789917108302756, "grad_norm": 1.9161287546157837, "learning_rate": 0.0009452626036146216, "loss": 3.6795, "step": 6445 }, { "epoch": 0.43823889115368936, "grad_norm": 0.7814688682556152, "learning_rate": 0.0009452201386057888, "loss": 3.6705, "step": 6450 }, { "epoch": 0.43857861122435116, "grad_norm": 0.7019619941711426, "learning_rate": 0.0009451776735969562, "loss": 3.9125, "step": 6455 }, { "epoch": 0.4389183312950129, "grad_norm": 0.7521041035652161, "learning_rate": 0.0009451352085881234, "loss": 3.5766, "step": 6460 }, { "epoch": 0.4392580513656747, "grad_norm": 0.9290032982826233, "learning_rate": 0.0009450927435792906, "loss": 3.6903, "step": 6465 }, { "epoch": 0.43959777143633644, "grad_norm": 0.8511236906051636, "learning_rate": 0.000945050278570458, "loss": 3.4806, "step": 6470 }, { "epoch": 0.43993749150699824, "grad_norm": 0.6680617928504944, "learning_rate": 0.0009450078135616253, "loss": 3.86, "step": 6475 }, { "epoch": 0.44027721157766003, "grad_norm": 0.6665207147598267, "learning_rate": 0.0009449653485527925, "loss": 3.8806, "step": 6480 }, { "epoch": 0.4406169316483218, "grad_norm": 0.7572169899940491, "learning_rate": 0.0009449228835439598, "loss": 3.4571, "step": 6485 }, { "epoch": 0.44095665171898357, "grad_norm": 0.7516233921051025, "learning_rate": 0.0009448804185351271, "loss": 3.9447, "step": 6490 }, { "epoch": 0.4412963717896453, "grad_norm": 0.9311526417732239, "learning_rate": 0.0009448379535262943, "loss": 3.6585, "step": 6495 }, { "epoch": 0.4416360918603071, "grad_norm": 0.9275510311126709, "learning_rate": 0.0009447954885174616, "loss": 3.9472, "step": 6500 }, { "epoch": 0.4419758119309689, "grad_norm": 0.8019735813140869, "learning_rate": 0.0009447530235086289, "loss": 3.5139, "step": 6505 }, { "epoch": 0.44231553200163065, "grad_norm": 1.6833178997039795, "learning_rate": 0.0009447105584997962, "loss": 3.8665, "step": 6510 }, { "epoch": 0.44265525207229245, "grad_norm": 0.7209401726722717, "learning_rate": 0.0009446680934909635, "loss": 3.5843, "step": 6515 }, { "epoch": 0.4429949721429542, "grad_norm": 0.8485277891159058, "learning_rate": 0.0009446256284821307, "loss": 3.442, "step": 6520 }, { "epoch": 0.443334692213616, "grad_norm": 0.8769036531448364, "learning_rate": 0.000944583163473298, "loss": 3.8422, "step": 6525 }, { "epoch": 0.44367441228427773, "grad_norm": 0.7882018685340881, "learning_rate": 0.0009445406984644653, "loss": 3.656, "step": 6530 }, { "epoch": 0.44401413235493953, "grad_norm": 0.7847372889518738, "learning_rate": 0.0009444982334556325, "loss": 3.5124, "step": 6535 }, { "epoch": 0.4443538524256013, "grad_norm": 0.7315607070922852, "learning_rate": 0.0009444557684467998, "loss": 3.5742, "step": 6540 }, { "epoch": 0.44469357249626307, "grad_norm": 0.6868340969085693, "learning_rate": 0.0009444133034379672, "loss": 3.8345, "step": 6545 }, { "epoch": 0.44503329256692487, "grad_norm": 0.8444815278053284, "learning_rate": 0.0009443708384291344, "loss": 3.4333, "step": 6550 }, { "epoch": 0.4453730126375866, "grad_norm": 0.7547391653060913, "learning_rate": 0.0009443283734203017, "loss": 3.8328, "step": 6555 }, { "epoch": 0.4457127327082484, "grad_norm": 0.655636727809906, "learning_rate": 0.000944285908411469, "loss": 3.942, "step": 6560 }, { "epoch": 0.4460524527789102, "grad_norm": 0.6583059430122375, "learning_rate": 0.0009442434434026362, "loss": 3.3732, "step": 6565 }, { "epoch": 0.44639217284957194, "grad_norm": 0.6351702809333801, "learning_rate": 0.0009442009783938034, "loss": 3.5253, "step": 6570 }, { "epoch": 0.44673189292023374, "grad_norm": 0.9433460831642151, "learning_rate": 0.0009441585133849709, "loss": 3.7905, "step": 6575 }, { "epoch": 0.4470716129908955, "grad_norm": 0.7520065307617188, "learning_rate": 0.0009441160483761381, "loss": 3.5835, "step": 6580 }, { "epoch": 0.4474113330615573, "grad_norm": 1.069012999534607, "learning_rate": 0.0009440735833673053, "loss": 3.5903, "step": 6585 }, { "epoch": 0.4477510531322191, "grad_norm": 0.6990821957588196, "learning_rate": 0.0009440311183584727, "loss": 3.6411, "step": 6590 }, { "epoch": 0.4480907732028808, "grad_norm": 0.6778050065040588, "learning_rate": 0.0009439886533496399, "loss": 3.6482, "step": 6595 }, { "epoch": 0.4484304932735426, "grad_norm": 0.6824806332588196, "learning_rate": 0.0009439461883408071, "loss": 3.8699, "step": 6600 }, { "epoch": 0.44877021334420436, "grad_norm": 0.6217116117477417, "learning_rate": 0.0009439037233319745, "loss": 3.5637, "step": 6605 }, { "epoch": 0.44910993341486616, "grad_norm": 0.783353328704834, "learning_rate": 0.0009438612583231418, "loss": 3.5869, "step": 6610 }, { "epoch": 0.4494496534855279, "grad_norm": 0.6910473108291626, "learning_rate": 0.000943818793314309, "loss": 3.8893, "step": 6615 }, { "epoch": 0.4497893735561897, "grad_norm": 0.6780043840408325, "learning_rate": 0.0009437763283054763, "loss": 3.6721, "step": 6620 }, { "epoch": 0.4501290936268515, "grad_norm": 0.7951787710189819, "learning_rate": 0.0009437338632966436, "loss": 3.7806, "step": 6625 }, { "epoch": 0.45046881369751324, "grad_norm": 0.7661988735198975, "learning_rate": 0.0009436913982878108, "loss": 3.8586, "step": 6630 }, { "epoch": 0.45080853376817503, "grad_norm": 0.9542354345321655, "learning_rate": 0.0009436489332789781, "loss": 3.6989, "step": 6635 }, { "epoch": 0.4511482538388368, "grad_norm": 0.8352494239807129, "learning_rate": 0.0009436064682701454, "loss": 3.7621, "step": 6640 }, { "epoch": 0.4514879739094986, "grad_norm": 0.7874074578285217, "learning_rate": 0.0009435640032613128, "loss": 3.4904, "step": 6645 }, { "epoch": 0.45182769398016037, "grad_norm": 0.7971878051757812, "learning_rate": 0.00094352153825248, "loss": 3.8819, "step": 6650 }, { "epoch": 0.4521674140508221, "grad_norm": 0.8122754693031311, "learning_rate": 0.0009434790732436473, "loss": 3.6617, "step": 6655 }, { "epoch": 0.4525071341214839, "grad_norm": 0.9281842112541199, "learning_rate": 0.0009434366082348146, "loss": 3.4884, "step": 6660 }, { "epoch": 0.45284685419214565, "grad_norm": 0.6347576975822449, "learning_rate": 0.0009433941432259818, "loss": 3.5103, "step": 6665 }, { "epoch": 0.45318657426280745, "grad_norm": 0.7310206294059753, "learning_rate": 0.000943351678217149, "loss": 3.7996, "step": 6670 }, { "epoch": 0.45352629433346925, "grad_norm": 0.7240384221076965, "learning_rate": 0.0009433092132083164, "loss": 3.9415, "step": 6675 }, { "epoch": 0.453866014404131, "grad_norm": 1.0311484336853027, "learning_rate": 0.0009432667481994837, "loss": 3.7467, "step": 6680 }, { "epoch": 0.4542057344747928, "grad_norm": 0.8163008689880371, "learning_rate": 0.0009432242831906509, "loss": 3.5392, "step": 6685 }, { "epoch": 0.45454545454545453, "grad_norm": 0.7271761894226074, "learning_rate": 0.0009431818181818183, "loss": 3.5076, "step": 6690 }, { "epoch": 0.45488517461611633, "grad_norm": 1.3560280799865723, "learning_rate": 0.0009431393531729855, "loss": 3.7702, "step": 6695 }, { "epoch": 0.45522489468677807, "grad_norm": 0.8395495414733887, "learning_rate": 0.0009430968881641527, "loss": 3.405, "step": 6700 }, { "epoch": 0.45556461475743987, "grad_norm": 0.9616559743881226, "learning_rate": 0.0009430544231553201, "loss": 3.593, "step": 6705 }, { "epoch": 0.45590433482810166, "grad_norm": 0.791356086730957, "learning_rate": 0.0009430119581464873, "loss": 3.7463, "step": 6710 }, { "epoch": 0.4562440548987634, "grad_norm": 0.7021889686584473, "learning_rate": 0.0009429694931376546, "loss": 3.6889, "step": 6715 }, { "epoch": 0.4565837749694252, "grad_norm": 0.7087218761444092, "learning_rate": 0.000942927028128822, "loss": 3.6237, "step": 6720 }, { "epoch": 0.45692349504008695, "grad_norm": 1.0868803262710571, "learning_rate": 0.0009428845631199892, "loss": 3.56, "step": 6725 }, { "epoch": 0.45726321511074874, "grad_norm": 0.7047871947288513, "learning_rate": 0.0009428420981111564, "loss": 3.7595, "step": 6730 }, { "epoch": 0.45760293518141054, "grad_norm": 1.0414479970932007, "learning_rate": 0.0009427996331023237, "loss": 3.7527, "step": 6735 }, { "epoch": 0.4579426552520723, "grad_norm": 0.801582932472229, "learning_rate": 0.000942757168093491, "loss": 3.7045, "step": 6740 }, { "epoch": 0.4582823753227341, "grad_norm": 0.8411511778831482, "learning_rate": 0.0009427147030846582, "loss": 3.697, "step": 6745 }, { "epoch": 0.4586220953933958, "grad_norm": 0.6610255837440491, "learning_rate": 0.0009426722380758256, "loss": 3.7951, "step": 6750 }, { "epoch": 0.4589618154640576, "grad_norm": 0.6865373849868774, "learning_rate": 0.0009426297730669929, "loss": 3.7947, "step": 6755 }, { "epoch": 0.4593015355347194, "grad_norm": 0.7824287414550781, "learning_rate": 0.0009425873080581601, "loss": 3.417, "step": 6760 }, { "epoch": 0.45964125560538116, "grad_norm": 0.8662406802177429, "learning_rate": 0.0009425448430493274, "loss": 3.7397, "step": 6765 }, { "epoch": 0.45998097567604296, "grad_norm": 0.772973358631134, "learning_rate": 0.0009425023780404946, "loss": 3.8287, "step": 6770 }, { "epoch": 0.4603206957467047, "grad_norm": 0.681059718132019, "learning_rate": 0.0009424599130316619, "loss": 3.8886, "step": 6775 }, { "epoch": 0.4606604158173665, "grad_norm": 0.8197239637374878, "learning_rate": 0.0009424174480228292, "loss": 3.8622, "step": 6780 }, { "epoch": 0.46100013588802824, "grad_norm": 0.6595356464385986, "learning_rate": 0.0009423749830139965, "loss": 3.7606, "step": 6785 }, { "epoch": 0.46133985595869004, "grad_norm": 0.9089701771736145, "learning_rate": 0.0009423325180051638, "loss": 3.5524, "step": 6790 }, { "epoch": 0.46167957602935183, "grad_norm": 0.8775517344474792, "learning_rate": 0.0009422900529963311, "loss": 3.5668, "step": 6795 }, { "epoch": 0.4620192961000136, "grad_norm": 0.7642073035240173, "learning_rate": 0.0009422475879874983, "loss": 3.7652, "step": 6800 }, { "epoch": 0.4623590161706754, "grad_norm": 0.8666463494300842, "learning_rate": 0.0009422051229786655, "loss": 3.5679, "step": 6805 }, { "epoch": 0.4626987362413371, "grad_norm": 0.8861477375030518, "learning_rate": 0.0009421626579698329, "loss": 3.5925, "step": 6810 }, { "epoch": 0.4630384563119989, "grad_norm": 0.6292643547058105, "learning_rate": 0.0009421201929610001, "loss": 3.427, "step": 6815 }, { "epoch": 0.4633781763826607, "grad_norm": 0.7941412925720215, "learning_rate": 0.0009420777279521674, "loss": 4.0807, "step": 6820 }, { "epoch": 0.46371789645332245, "grad_norm": 0.8130123019218445, "learning_rate": 0.0009420352629433348, "loss": 3.8395, "step": 6825 }, { "epoch": 0.46405761652398425, "grad_norm": 0.7854185104370117, "learning_rate": 0.000941992797934502, "loss": 3.6002, "step": 6830 }, { "epoch": 0.464397336594646, "grad_norm": 0.7315665483474731, "learning_rate": 0.0009419503329256692, "loss": 3.5257, "step": 6835 }, { "epoch": 0.4647370566653078, "grad_norm": 0.6648357510566711, "learning_rate": 0.0009419078679168366, "loss": 3.829, "step": 6840 }, { "epoch": 0.4650767767359696, "grad_norm": 0.684614360332489, "learning_rate": 0.0009418654029080038, "loss": 3.6271, "step": 6845 }, { "epoch": 0.46541649680663133, "grad_norm": 0.7598673701286316, "learning_rate": 0.000941822937899171, "loss": 3.8068, "step": 6850 }, { "epoch": 0.4657562168772931, "grad_norm": 0.8031312227249146, "learning_rate": 0.0009417804728903385, "loss": 3.6739, "step": 6855 }, { "epoch": 0.46609593694795487, "grad_norm": 0.7127792239189148, "learning_rate": 0.0009417380078815057, "loss": 3.7733, "step": 6860 }, { "epoch": 0.46643565701861667, "grad_norm": 0.6540017127990723, "learning_rate": 0.0009416955428726729, "loss": 3.5903, "step": 6865 }, { "epoch": 0.4667753770892784, "grad_norm": 0.8222087621688843, "learning_rate": 0.0009416530778638402, "loss": 3.5829, "step": 6870 }, { "epoch": 0.4671150971599402, "grad_norm": 0.786706805229187, "learning_rate": 0.0009416106128550075, "loss": 3.3733, "step": 6875 }, { "epoch": 0.467454817230602, "grad_norm": 0.7892522215843201, "learning_rate": 0.0009415681478461747, "loss": 3.4567, "step": 6880 }, { "epoch": 0.46779453730126375, "grad_norm": 0.7317526340484619, "learning_rate": 0.000941525682837342, "loss": 3.6438, "step": 6885 }, { "epoch": 0.46813425737192554, "grad_norm": 0.6503486037254333, "learning_rate": 0.0009414832178285094, "loss": 3.7529, "step": 6890 }, { "epoch": 0.4684739774425873, "grad_norm": 0.7175481915473938, "learning_rate": 0.0009414407528196766, "loss": 3.7208, "step": 6895 }, { "epoch": 0.4688136975132491, "grad_norm": 0.8006057143211365, "learning_rate": 0.0009413982878108439, "loss": 3.8677, "step": 6900 }, { "epoch": 0.4691534175839109, "grad_norm": 0.7676239013671875, "learning_rate": 0.0009413558228020112, "loss": 3.8635, "step": 6905 }, { "epoch": 0.4694931376545726, "grad_norm": 0.8925326466560364, "learning_rate": 0.0009413133577931784, "loss": 3.7591, "step": 6910 }, { "epoch": 0.4698328577252344, "grad_norm": 0.7252369523048401, "learning_rate": 0.0009412708927843457, "loss": 3.7022, "step": 6915 }, { "epoch": 0.47017257779589616, "grad_norm": 0.8102614879608154, "learning_rate": 0.0009412284277755129, "loss": 3.5695, "step": 6920 }, { "epoch": 0.47051229786655796, "grad_norm": 0.6476667523384094, "learning_rate": 0.0009411859627666803, "loss": 3.6827, "step": 6925 }, { "epoch": 0.47085201793721976, "grad_norm": 1.0310362577438354, "learning_rate": 0.0009411434977578476, "loss": 3.7261, "step": 6930 }, { "epoch": 0.4711917380078815, "grad_norm": 0.7481087446212769, "learning_rate": 0.0009411010327490148, "loss": 3.62, "step": 6935 }, { "epoch": 0.4715314580785433, "grad_norm": 0.6489163637161255, "learning_rate": 0.0009410585677401821, "loss": 3.685, "step": 6940 }, { "epoch": 0.47187117814920504, "grad_norm": 0.7797324657440186, "learning_rate": 0.0009410161027313494, "loss": 3.9003, "step": 6945 }, { "epoch": 0.47221089821986684, "grad_norm": 0.825636625289917, "learning_rate": 0.0009409736377225166, "loss": 3.601, "step": 6950 }, { "epoch": 0.4725506182905286, "grad_norm": 1.3893637657165527, "learning_rate": 0.0009409311727136838, "loss": 3.9514, "step": 6955 }, { "epoch": 0.4728903383611904, "grad_norm": 1.127408504486084, "learning_rate": 0.0009408887077048513, "loss": 3.8569, "step": 6960 }, { "epoch": 0.4732300584318522, "grad_norm": 1.2677662372589111, "learning_rate": 0.0009408462426960185, "loss": 3.7982, "step": 6965 }, { "epoch": 0.4735697785025139, "grad_norm": 0.7119567394256592, "learning_rate": 0.0009408037776871857, "loss": 3.5079, "step": 6970 }, { "epoch": 0.4739094985731757, "grad_norm": 0.7485243082046509, "learning_rate": 0.0009407613126783531, "loss": 3.7139, "step": 6975 }, { "epoch": 0.47424921864383746, "grad_norm": 1.8985931873321533, "learning_rate": 0.0009407188476695203, "loss": 3.9208, "step": 6980 }, { "epoch": 0.47458893871449925, "grad_norm": 0.6684504747390747, "learning_rate": 0.0009406763826606876, "loss": 3.6739, "step": 6985 }, { "epoch": 0.47492865878516105, "grad_norm": 0.8101637959480286, "learning_rate": 0.0009406339176518549, "loss": 3.6928, "step": 6990 }, { "epoch": 0.4752683788558228, "grad_norm": 1.3333851099014282, "learning_rate": 0.0009405914526430222, "loss": 3.3483, "step": 6995 }, { "epoch": 0.4756080989264846, "grad_norm": 0.8930231928825378, "learning_rate": 0.0009405489876341895, "loss": 3.3347, "step": 7000 }, { "epoch": 0.47594781899714633, "grad_norm": 0.9755032062530518, "learning_rate": 0.0009405065226253568, "loss": 4.0481, "step": 7005 }, { "epoch": 0.47628753906780813, "grad_norm": 0.8229382038116455, "learning_rate": 0.000940464057616524, "loss": 3.8443, "step": 7010 }, { "epoch": 0.4766272591384699, "grad_norm": 0.8849020600318909, "learning_rate": 0.0009404215926076913, "loss": 3.7291, "step": 7015 }, { "epoch": 0.47696697920913167, "grad_norm": 0.6264156699180603, "learning_rate": 0.0009403791275988585, "loss": 3.3264, "step": 7020 }, { "epoch": 0.47730669927979347, "grad_norm": 0.765312910079956, "learning_rate": 0.0009403366625900258, "loss": 3.7668, "step": 7025 }, { "epoch": 0.4776464193504552, "grad_norm": 0.8168578743934631, "learning_rate": 0.0009402941975811932, "loss": 3.7237, "step": 7030 }, { "epoch": 0.477986139421117, "grad_norm": 0.7400926351547241, "learning_rate": 0.0009402517325723604, "loss": 3.5315, "step": 7035 }, { "epoch": 0.47832585949177875, "grad_norm": 0.657805323600769, "learning_rate": 0.0009402092675635277, "loss": 3.5353, "step": 7040 }, { "epoch": 0.47866557956244055, "grad_norm": 1.1266076564788818, "learning_rate": 0.000940166802554695, "loss": 3.7509, "step": 7045 }, { "epoch": 0.47900529963310234, "grad_norm": 0.6874140501022339, "learning_rate": 0.0009401243375458622, "loss": 3.6881, "step": 7050 }, { "epoch": 0.4793450197037641, "grad_norm": 0.7892255187034607, "learning_rate": 0.0009400818725370294, "loss": 3.8323, "step": 7055 }, { "epoch": 0.4796847397744259, "grad_norm": 0.6673566699028015, "learning_rate": 0.0009400394075281968, "loss": 3.7398, "step": 7060 }, { "epoch": 0.4800244598450876, "grad_norm": 0.7166375517845154, "learning_rate": 0.0009399969425193641, "loss": 3.6002, "step": 7065 }, { "epoch": 0.4803641799157494, "grad_norm": 1.9668477773666382, "learning_rate": 0.0009399544775105313, "loss": 3.4477, "step": 7070 }, { "epoch": 0.4807038999864112, "grad_norm": 1.105467677116394, "learning_rate": 0.0009399120125016987, "loss": 3.7769, "step": 7075 }, { "epoch": 0.48104362005707296, "grad_norm": 0.8100324273109436, "learning_rate": 0.0009398695474928659, "loss": 3.8853, "step": 7080 }, { "epoch": 0.48138334012773476, "grad_norm": 0.855181097984314, "learning_rate": 0.0009398270824840331, "loss": 3.9055, "step": 7085 }, { "epoch": 0.4817230601983965, "grad_norm": 0.8410840630531311, "learning_rate": 0.0009397846174752005, "loss": 3.6061, "step": 7090 }, { "epoch": 0.4820627802690583, "grad_norm": 0.6882803440093994, "learning_rate": 0.0009397421524663677, "loss": 3.7707, "step": 7095 }, { "epoch": 0.4824025003397201, "grad_norm": 0.5635659098625183, "learning_rate": 0.000939699687457535, "loss": 3.6181, "step": 7100 }, { "epoch": 0.48274222041038184, "grad_norm": 0.960499107837677, "learning_rate": 0.0009396572224487024, "loss": 3.7527, "step": 7105 }, { "epoch": 0.48308194048104364, "grad_norm": 1.3995314836502075, "learning_rate": 0.0009396147574398696, "loss": 3.8983, "step": 7110 }, { "epoch": 0.4834216605517054, "grad_norm": 0.8181443214416504, "learning_rate": 0.0009395722924310368, "loss": 3.6004, "step": 7115 }, { "epoch": 0.4837613806223672, "grad_norm": 0.6821444630622864, "learning_rate": 0.0009395298274222041, "loss": 3.7091, "step": 7120 }, { "epoch": 0.4841011006930289, "grad_norm": 1.4712451696395874, "learning_rate": 0.0009394873624133714, "loss": 3.6254, "step": 7125 }, { "epoch": 0.4844408207636907, "grad_norm": 0.9078455567359924, "learning_rate": 0.0009394448974045386, "loss": 3.6762, "step": 7130 }, { "epoch": 0.4847805408343525, "grad_norm": 0.9781457185745239, "learning_rate": 0.000939402432395706, "loss": 3.5707, "step": 7135 }, { "epoch": 0.48512026090501426, "grad_norm": 0.8346604108810425, "learning_rate": 0.0009393599673868733, "loss": 3.8072, "step": 7140 }, { "epoch": 0.48545998097567605, "grad_norm": 0.9316626787185669, "learning_rate": 0.0009393175023780405, "loss": 3.7848, "step": 7145 }, { "epoch": 0.4857997010463378, "grad_norm": 0.74868243932724, "learning_rate": 0.0009392750373692078, "loss": 3.7603, "step": 7150 }, { "epoch": 0.4861394211169996, "grad_norm": 0.7628958821296692, "learning_rate": 0.000939232572360375, "loss": 3.8041, "step": 7155 }, { "epoch": 0.4864791411876614, "grad_norm": 0.7317331433296204, "learning_rate": 0.0009391901073515423, "loss": 3.7645, "step": 7160 }, { "epoch": 0.48681886125832313, "grad_norm": 0.8249421119689941, "learning_rate": 0.0009391476423427097, "loss": 3.7042, "step": 7165 }, { "epoch": 0.48715858132898493, "grad_norm": 0.9656422138214111, "learning_rate": 0.0009391051773338769, "loss": 3.361, "step": 7170 }, { "epoch": 0.48749830139964667, "grad_norm": 0.849381148815155, "learning_rate": 0.0009390627123250442, "loss": 3.7454, "step": 7175 }, { "epoch": 0.48783802147030847, "grad_norm": 0.7788578867912292, "learning_rate": 0.0009390202473162115, "loss": 3.7689, "step": 7180 }, { "epoch": 0.48817774154097027, "grad_norm": 0.7999100685119629, "learning_rate": 0.0009389777823073787, "loss": 3.5582, "step": 7185 }, { "epoch": 0.488517461611632, "grad_norm": 0.6108559966087341, "learning_rate": 0.000938935317298546, "loss": 3.7503, "step": 7190 }, { "epoch": 0.4888571816822938, "grad_norm": 0.8938859701156616, "learning_rate": 0.0009388928522897133, "loss": 3.8461, "step": 7195 }, { "epoch": 0.48919690175295555, "grad_norm": 0.8030597567558289, "learning_rate": 0.0009388503872808806, "loss": 3.6002, "step": 7200 }, { "epoch": 0.48953662182361735, "grad_norm": 0.7490113973617554, "learning_rate": 0.0009388079222720478, "loss": 3.6717, "step": 7205 }, { "epoch": 0.4898763418942791, "grad_norm": 0.8324754238128662, "learning_rate": 0.0009387654572632152, "loss": 3.9046, "step": 7210 }, { "epoch": 0.4902160619649409, "grad_norm": 0.6378701329231262, "learning_rate": 0.0009387229922543824, "loss": 3.7435, "step": 7215 }, { "epoch": 0.4905557820356027, "grad_norm": 0.6696974635124207, "learning_rate": 0.0009386805272455496, "loss": 3.6896, "step": 7220 }, { "epoch": 0.4908955021062644, "grad_norm": 0.8677157163619995, "learning_rate": 0.000938638062236717, "loss": 3.8479, "step": 7225 }, { "epoch": 0.4912352221769262, "grad_norm": 0.7982289791107178, "learning_rate": 0.0009385955972278842, "loss": 3.4634, "step": 7230 }, { "epoch": 0.49157494224758796, "grad_norm": 1.0052379369735718, "learning_rate": 0.0009385531322190515, "loss": 3.7482, "step": 7235 }, { "epoch": 0.49191466231824976, "grad_norm": 0.8542678952217102, "learning_rate": 0.0009385106672102189, "loss": 3.7788, "step": 7240 }, { "epoch": 0.49225438238891156, "grad_norm": 0.9654294848442078, "learning_rate": 0.0009384682022013861, "loss": 3.8142, "step": 7245 }, { "epoch": 0.4925941024595733, "grad_norm": 0.8354349732398987, "learning_rate": 0.0009384257371925533, "loss": 3.8907, "step": 7250 }, { "epoch": 0.4929338225302351, "grad_norm": 0.7875881195068359, "learning_rate": 0.0009383832721837206, "loss": 3.3389, "step": 7255 }, { "epoch": 0.49327354260089684, "grad_norm": 0.8593769669532776, "learning_rate": 0.0009383408071748879, "loss": 3.8246, "step": 7260 }, { "epoch": 0.49361326267155864, "grad_norm": 0.8465110063552856, "learning_rate": 0.0009382983421660551, "loss": 3.7153, "step": 7265 }, { "epoch": 0.49395298274222044, "grad_norm": 0.7789784073829651, "learning_rate": 0.0009382558771572225, "loss": 3.6484, "step": 7270 }, { "epoch": 0.4942927028128822, "grad_norm": 0.8830150365829468, "learning_rate": 0.0009382134121483898, "loss": 3.6133, "step": 7275 }, { "epoch": 0.494632422883544, "grad_norm": 0.7771656513214111, "learning_rate": 0.000938170947139557, "loss": 3.7278, "step": 7280 }, { "epoch": 0.4949721429542057, "grad_norm": 0.6703338027000427, "learning_rate": 0.0009381284821307243, "loss": 3.9752, "step": 7285 }, { "epoch": 0.4953118630248675, "grad_norm": 0.7846285700798035, "learning_rate": 0.0009380860171218916, "loss": 3.4729, "step": 7290 }, { "epoch": 0.49565158309552926, "grad_norm": 0.948172390460968, "learning_rate": 0.0009380435521130588, "loss": 3.7144, "step": 7295 }, { "epoch": 0.49599130316619106, "grad_norm": 1.0101864337921143, "learning_rate": 0.0009380010871042261, "loss": 3.7646, "step": 7300 }, { "epoch": 0.49633102323685285, "grad_norm": 0.7544894814491272, "learning_rate": 0.0009379586220953934, "loss": 3.6367, "step": 7305 }, { "epoch": 0.4966707433075146, "grad_norm": 0.7571282982826233, "learning_rate": 0.0009379161570865607, "loss": 3.6955, "step": 7310 }, { "epoch": 0.4970104633781764, "grad_norm": 3.134646415710449, "learning_rate": 0.000937873692077728, "loss": 3.5329, "step": 7315 }, { "epoch": 0.49735018344883813, "grad_norm": 0.8307409882545471, "learning_rate": 0.0009378312270688952, "loss": 3.8784, "step": 7320 }, { "epoch": 0.49768990351949993, "grad_norm": 0.8937415480613708, "learning_rate": 0.0009377887620600626, "loss": 3.6707, "step": 7325 }, { "epoch": 0.49802962359016173, "grad_norm": 0.5931734442710876, "learning_rate": 0.0009377462970512298, "loss": 3.599, "step": 7330 }, { "epoch": 0.49836934366082347, "grad_norm": 0.6827585101127625, "learning_rate": 0.000937703832042397, "loss": 3.8399, "step": 7335 }, { "epoch": 0.49870906373148527, "grad_norm": 0.8192744851112366, "learning_rate": 0.0009376613670335645, "loss": 3.9087, "step": 7340 }, { "epoch": 0.499048783802147, "grad_norm": 1.973146677017212, "learning_rate": 0.0009376189020247317, "loss": 3.6978, "step": 7345 }, { "epoch": 0.4993885038728088, "grad_norm": 0.6945332884788513, "learning_rate": 0.0009375764370158989, "loss": 3.391, "step": 7350 }, { "epoch": 0.4997282239434706, "grad_norm": 0.7670307755470276, "learning_rate": 0.0009375339720070662, "loss": 3.7131, "step": 7355 }, { "epoch": 0.5000679440141323, "grad_norm": 0.8199055790901184, "learning_rate": 0.0009374915069982335, "loss": 3.7655, "step": 7360 }, { "epoch": 0.5004076640847941, "grad_norm": 0.9656381607055664, "learning_rate": 0.0009374490419894007, "loss": 3.7649, "step": 7365 }, { "epoch": 0.5007473841554559, "grad_norm": 0.9740866422653198, "learning_rate": 0.000937406576980568, "loss": 3.9114, "step": 7370 }, { "epoch": 0.5010871042261177, "grad_norm": 0.948676347732544, "learning_rate": 0.0009373641119717354, "loss": 3.7265, "step": 7375 }, { "epoch": 0.5014268242967794, "grad_norm": 0.7805066108703613, "learning_rate": 0.0009373216469629026, "loss": 3.9951, "step": 7380 }, { "epoch": 0.5017665443674413, "grad_norm": 0.7733291387557983, "learning_rate": 0.0009372791819540699, "loss": 4.0057, "step": 7385 }, { "epoch": 0.502106264438103, "grad_norm": 0.8172867894172668, "learning_rate": 0.0009372367169452372, "loss": 3.4659, "step": 7390 }, { "epoch": 0.5024459845087648, "grad_norm": 1.0584605932235718, "learning_rate": 0.0009371942519364044, "loss": 3.6907, "step": 7395 }, { "epoch": 0.5027857045794265, "grad_norm": 0.8395977020263672, "learning_rate": 0.0009371517869275717, "loss": 3.8193, "step": 7400 }, { "epoch": 0.5031254246500884, "grad_norm": 0.9776257872581482, "learning_rate": 0.0009371093219187389, "loss": 3.6174, "step": 7405 }, { "epoch": 0.5034651447207501, "grad_norm": 3.4432694911956787, "learning_rate": 0.0009370668569099063, "loss": 3.6921, "step": 7410 }, { "epoch": 0.5038048647914118, "grad_norm": 0.7370182871818542, "learning_rate": 0.0009370243919010736, "loss": 3.6347, "step": 7415 }, { "epoch": 0.5041445848620737, "grad_norm": 0.8768668174743652, "learning_rate": 0.0009369819268922408, "loss": 3.8462, "step": 7420 }, { "epoch": 0.5044843049327354, "grad_norm": 0.9089639186859131, "learning_rate": 0.0009369394618834081, "loss": 3.5258, "step": 7425 }, { "epoch": 0.5048240250033972, "grad_norm": 0.7319662570953369, "learning_rate": 0.0009368969968745754, "loss": 3.6741, "step": 7430 }, { "epoch": 0.5051637450740589, "grad_norm": 0.5772501230239868, "learning_rate": 0.0009368545318657426, "loss": 3.7717, "step": 7435 }, { "epoch": 0.5055034651447208, "grad_norm": 0.7919349670410156, "learning_rate": 0.0009368120668569098, "loss": 3.6611, "step": 7440 }, { "epoch": 0.5058431852153825, "grad_norm": 0.6962769627571106, "learning_rate": 0.0009367696018480773, "loss": 3.6246, "step": 7445 }, { "epoch": 0.5061829052860443, "grad_norm": 0.6427191495895386, "learning_rate": 0.0009367271368392445, "loss": 3.7291, "step": 7450 }, { "epoch": 0.5065226253567061, "grad_norm": 0.9956004023551941, "learning_rate": 0.0009366846718304117, "loss": 3.6286, "step": 7455 }, { "epoch": 0.5068623454273679, "grad_norm": 0.9007059335708618, "learning_rate": 0.0009366422068215791, "loss": 3.8041, "step": 7460 }, { "epoch": 0.5072020654980296, "grad_norm": 0.9285355806350708, "learning_rate": 0.0009365997418127463, "loss": 3.7582, "step": 7465 }, { "epoch": 0.5075417855686915, "grad_norm": 0.877824068069458, "learning_rate": 0.0009365572768039135, "loss": 3.3837, "step": 7470 }, { "epoch": 0.5078815056393532, "grad_norm": 0.666251540184021, "learning_rate": 0.0009365148117950809, "loss": 4.0161, "step": 7475 }, { "epoch": 0.5082212257100149, "grad_norm": 0.811247706413269, "learning_rate": 0.0009364723467862482, "loss": 3.4287, "step": 7480 }, { "epoch": 0.5085609457806767, "grad_norm": 0.7683402299880981, "learning_rate": 0.0009364298817774154, "loss": 4.0752, "step": 7485 }, { "epoch": 0.5089006658513385, "grad_norm": 0.7946499586105347, "learning_rate": 0.0009363874167685828, "loss": 3.5025, "step": 7490 }, { "epoch": 0.5092403859220003, "grad_norm": 0.8931328058242798, "learning_rate": 0.00093634495175975, "loss": 3.775, "step": 7495 }, { "epoch": 0.509580105992662, "grad_norm": 0.7139407992362976, "learning_rate": 0.0009363024867509172, "loss": 3.76, "step": 7500 }, { "epoch": 0.5099198260633239, "grad_norm": 6.0580525398254395, "learning_rate": 0.0009362600217420845, "loss": 3.7834, "step": 7505 }, { "epoch": 0.5102595461339856, "grad_norm": 0.7540703415870667, "learning_rate": 0.0009362175567332518, "loss": 3.8597, "step": 7510 }, { "epoch": 0.5105992662046474, "grad_norm": 1.1744097471237183, "learning_rate": 0.0009361750917244191, "loss": 3.775, "step": 7515 }, { "epoch": 0.5109389862753091, "grad_norm": 0.7168186902999878, "learning_rate": 0.0009361326267155864, "loss": 3.7443, "step": 7520 }, { "epoch": 0.511278706345971, "grad_norm": 1.5193027257919312, "learning_rate": 0.0009360901617067537, "loss": 3.7862, "step": 7525 }, { "epoch": 0.5116184264166327, "grad_norm": 1.029643177986145, "learning_rate": 0.0009360476966979209, "loss": 3.8337, "step": 7530 }, { "epoch": 0.5119581464872944, "grad_norm": 1.1100542545318604, "learning_rate": 0.0009360052316890882, "loss": 3.7029, "step": 7535 }, { "epoch": 0.5122978665579563, "grad_norm": 1.0867546796798706, "learning_rate": 0.0009359627666802554, "loss": 3.855, "step": 7540 }, { "epoch": 0.512637586628618, "grad_norm": 0.8331971168518066, "learning_rate": 0.0009359203016714227, "loss": 3.4543, "step": 7545 }, { "epoch": 0.5129773066992798, "grad_norm": 0.7595094442367554, "learning_rate": 0.0009358778366625901, "loss": 3.6165, "step": 7550 }, { "epoch": 0.5133170267699416, "grad_norm": 2.122424364089966, "learning_rate": 0.0009358353716537573, "loss": 3.8286, "step": 7555 }, { "epoch": 0.5136567468406034, "grad_norm": 0.9272922873497009, "learning_rate": 0.0009357929066449246, "loss": 3.8261, "step": 7560 }, { "epoch": 0.5139964669112651, "grad_norm": 0.9774302244186401, "learning_rate": 0.0009357504416360919, "loss": 3.7822, "step": 7565 }, { "epoch": 0.5143361869819268, "grad_norm": 0.6903049349784851, "learning_rate": 0.0009357079766272591, "loss": 3.7987, "step": 7570 }, { "epoch": 0.5146759070525887, "grad_norm": 0.8013500571250916, "learning_rate": 0.0009356655116184264, "loss": 3.575, "step": 7575 }, { "epoch": 0.5150156271232504, "grad_norm": 1.009568452835083, "learning_rate": 0.0009356230466095937, "loss": 3.7885, "step": 7580 }, { "epoch": 0.5153553471939122, "grad_norm": 0.7884366512298584, "learning_rate": 0.000935580581600761, "loss": 3.8031, "step": 7585 }, { "epoch": 0.515695067264574, "grad_norm": 0.8781176209449768, "learning_rate": 0.0009355381165919283, "loss": 3.573, "step": 7590 }, { "epoch": 0.5160347873352358, "grad_norm": 0.9837929010391235, "learning_rate": 0.0009354956515830956, "loss": 3.6799, "step": 7595 }, { "epoch": 0.5163745074058975, "grad_norm": 0.6819114685058594, "learning_rate": 0.0009354531865742628, "loss": 3.7495, "step": 7600 }, { "epoch": 0.5167142274765593, "grad_norm": 0.6864736080169678, "learning_rate": 0.00093541072156543, "loss": 3.6115, "step": 7605 }, { "epoch": 0.5170539475472211, "grad_norm": 1.046215295791626, "learning_rate": 0.0009353682565565974, "loss": 3.6609, "step": 7610 }, { "epoch": 0.5173936676178829, "grad_norm": 1.337174415588379, "learning_rate": 0.0009353257915477646, "loss": 3.3938, "step": 7615 }, { "epoch": 0.5177333876885446, "grad_norm": 0.8165753483772278, "learning_rate": 0.0009352833265389319, "loss": 3.8146, "step": 7620 }, { "epoch": 0.5180731077592065, "grad_norm": 0.6793105602264404, "learning_rate": 0.0009352408615300993, "loss": 3.8535, "step": 7625 }, { "epoch": 0.5184128278298682, "grad_norm": 0.9664869904518127, "learning_rate": 0.0009351983965212665, "loss": 3.4694, "step": 7630 }, { "epoch": 0.5187525479005299, "grad_norm": 1.0473023653030396, "learning_rate": 0.0009351559315124337, "loss": 3.8503, "step": 7635 }, { "epoch": 0.5190922679711918, "grad_norm": 0.6371603012084961, "learning_rate": 0.000935113466503601, "loss": 3.812, "step": 7640 }, { "epoch": 0.5194319880418535, "grad_norm": 0.8721631169319153, "learning_rate": 0.0009350710014947683, "loss": 3.6361, "step": 7645 }, { "epoch": 0.5197717081125153, "grad_norm": 0.7898417711257935, "learning_rate": 0.0009350285364859355, "loss": 3.7272, "step": 7650 }, { "epoch": 0.520111428183177, "grad_norm": 0.8976787328720093, "learning_rate": 0.000934986071477103, "loss": 3.5403, "step": 7655 }, { "epoch": 0.5204511482538389, "grad_norm": 1.6082552671432495, "learning_rate": 0.0009349436064682702, "loss": 3.6175, "step": 7660 }, { "epoch": 0.5207908683245006, "grad_norm": 0.830260694026947, "learning_rate": 0.0009349011414594374, "loss": 3.4931, "step": 7665 }, { "epoch": 0.5211305883951624, "grad_norm": 0.9476523399353027, "learning_rate": 0.0009348586764506047, "loss": 3.6765, "step": 7670 }, { "epoch": 0.5214703084658242, "grad_norm": 0.7993727326393127, "learning_rate": 0.000934816211441772, "loss": 3.5528, "step": 7675 }, { "epoch": 0.521810028536486, "grad_norm": 0.6129350662231445, "learning_rate": 0.0009347737464329393, "loss": 3.8175, "step": 7680 }, { "epoch": 0.5221497486071477, "grad_norm": 1.0025638341903687, "learning_rate": 0.0009347312814241065, "loss": 3.7148, "step": 7685 }, { "epoch": 0.5224894686778094, "grad_norm": 0.7446555495262146, "learning_rate": 0.0009346888164152739, "loss": 3.8676, "step": 7690 }, { "epoch": 0.5228291887484713, "grad_norm": 3.560454845428467, "learning_rate": 0.0009346463514064412, "loss": 3.4559, "step": 7695 }, { "epoch": 0.523168908819133, "grad_norm": 0.6714736819267273, "learning_rate": 0.0009346038863976084, "loss": 3.794, "step": 7700 }, { "epoch": 0.5235086288897948, "grad_norm": 0.85779869556427, "learning_rate": 0.0009345614213887756, "loss": 3.895, "step": 7705 }, { "epoch": 0.5238483489604566, "grad_norm": 0.8376686573028564, "learning_rate": 0.000934518956379943, "loss": 3.6197, "step": 7710 }, { "epoch": 0.5241880690311184, "grad_norm": 1.0659286975860596, "learning_rate": 0.0009344764913711102, "loss": 3.7913, "step": 7715 }, { "epoch": 0.5245277891017801, "grad_norm": 6.548509120941162, "learning_rate": 0.0009344340263622774, "loss": 3.6373, "step": 7720 }, { "epoch": 0.524867509172442, "grad_norm": 5.886991024017334, "learning_rate": 0.0009343915613534449, "loss": 3.8122, "step": 7725 }, { "epoch": 0.5252072292431037, "grad_norm": 0.7771844863891602, "learning_rate": 0.0009343490963446121, "loss": 3.6466, "step": 7730 }, { "epoch": 0.5255469493137654, "grad_norm": 0.7829844355583191, "learning_rate": 0.0009343066313357793, "loss": 3.5896, "step": 7735 }, { "epoch": 0.5258866693844272, "grad_norm": 0.7606650590896606, "learning_rate": 0.0009342641663269467, "loss": 3.7448, "step": 7740 }, { "epoch": 0.526226389455089, "grad_norm": 0.6309944987297058, "learning_rate": 0.0009342217013181139, "loss": 3.6158, "step": 7745 }, { "epoch": 0.5265661095257508, "grad_norm": 0.7991791367530823, "learning_rate": 0.0009341792363092811, "loss": 3.9482, "step": 7750 }, { "epoch": 0.5269058295964125, "grad_norm": 1.166977882385254, "learning_rate": 0.0009341367713004485, "loss": 3.8754, "step": 7755 }, { "epoch": 0.5272455496670744, "grad_norm": 0.951145589351654, "learning_rate": 0.0009340943062916158, "loss": 3.1355, "step": 7760 }, { "epoch": 0.5275852697377361, "grad_norm": 1.6882327795028687, "learning_rate": 0.000934051841282783, "loss": 3.8167, "step": 7765 }, { "epoch": 0.5279249898083979, "grad_norm": 1.015091896057129, "learning_rate": 0.0009340093762739503, "loss": 3.6939, "step": 7770 }, { "epoch": 0.5282647098790596, "grad_norm": 0.8167543411254883, "learning_rate": 0.0009339669112651176, "loss": 3.9178, "step": 7775 }, { "epoch": 0.5286044299497215, "grad_norm": 0.8711304068565369, "learning_rate": 0.0009339244462562848, "loss": 3.3895, "step": 7780 }, { "epoch": 0.5289441500203832, "grad_norm": 1.0086500644683838, "learning_rate": 0.0009338819812474521, "loss": 3.8921, "step": 7785 }, { "epoch": 0.5292838700910449, "grad_norm": 0.8425483703613281, "learning_rate": 0.0009338395162386195, "loss": 3.8591, "step": 7790 }, { "epoch": 0.5296235901617068, "grad_norm": 1.2312175035476685, "learning_rate": 0.0009337970512297867, "loss": 3.912, "step": 7795 }, { "epoch": 0.5299633102323685, "grad_norm": 1.2033361196517944, "learning_rate": 0.000933754586220954, "loss": 3.5202, "step": 7800 }, { "epoch": 0.5303030303030303, "grad_norm": 0.7065937519073486, "learning_rate": 0.0009337121212121212, "loss": 3.6259, "step": 7805 }, { "epoch": 0.5306427503736921, "grad_norm": 1.1824893951416016, "learning_rate": 0.0009336696562032885, "loss": 3.6446, "step": 7810 }, { "epoch": 0.5309824704443539, "grad_norm": 1.3385603427886963, "learning_rate": 0.0009336271911944558, "loss": 3.6275, "step": 7815 }, { "epoch": 0.5313221905150156, "grad_norm": 0.6545684337615967, "learning_rate": 0.000933584726185623, "loss": 3.6846, "step": 7820 }, { "epoch": 0.5316619105856774, "grad_norm": 0.7343723773956299, "learning_rate": 0.0009335422611767904, "loss": 3.7886, "step": 7825 }, { "epoch": 0.5320016306563392, "grad_norm": 1.327511191368103, "learning_rate": 0.0009334997961679577, "loss": 3.5956, "step": 7830 }, { "epoch": 0.532341350727001, "grad_norm": 0.8653922080993652, "learning_rate": 0.0009334573311591249, "loss": 3.8504, "step": 7835 }, { "epoch": 0.5326810707976627, "grad_norm": 0.9837885499000549, "learning_rate": 0.0009334148661502921, "loss": 3.9316, "step": 7840 }, { "epoch": 0.5330207908683245, "grad_norm": 0.8776041269302368, "learning_rate": 0.0009333724011414595, "loss": 3.6845, "step": 7845 }, { "epoch": 0.5333605109389863, "grad_norm": 0.7139930129051208, "learning_rate": 0.0009333299361326267, "loss": 3.7757, "step": 7850 }, { "epoch": 0.533700231009648, "grad_norm": 0.5540434718132019, "learning_rate": 0.0009332874711237939, "loss": 3.8681, "step": 7855 }, { "epoch": 0.5340399510803098, "grad_norm": 0.7290138602256775, "learning_rate": 0.0009332450061149614, "loss": 3.7144, "step": 7860 }, { "epoch": 0.5343796711509716, "grad_norm": 1.5798171758651733, "learning_rate": 0.0009332025411061286, "loss": 3.6538, "step": 7865 }, { "epoch": 0.5347193912216334, "grad_norm": 0.6140821576118469, "learning_rate": 0.0009331600760972958, "loss": 3.8139, "step": 7870 }, { "epoch": 0.5350591112922951, "grad_norm": 0.6630499958992004, "learning_rate": 0.0009331176110884632, "loss": 3.7556, "step": 7875 }, { "epoch": 0.535398831362957, "grad_norm": 0.8805527091026306, "learning_rate": 0.0009330751460796304, "loss": 3.8418, "step": 7880 }, { "epoch": 0.5357385514336187, "grad_norm": 0.7567583918571472, "learning_rate": 0.0009330326810707976, "loss": 3.6942, "step": 7885 }, { "epoch": 0.5360782715042804, "grad_norm": 0.8429593443870544, "learning_rate": 0.000932990216061965, "loss": 3.9365, "step": 7890 }, { "epoch": 0.5364179915749423, "grad_norm": 0.7898145914077759, "learning_rate": 0.0009329477510531323, "loss": 3.4877, "step": 7895 }, { "epoch": 0.536757711645604, "grad_norm": 0.6723822355270386, "learning_rate": 0.0009329052860442995, "loss": 3.4952, "step": 7900 }, { "epoch": 0.5370974317162658, "grad_norm": 0.8021454215049744, "learning_rate": 0.0009328628210354668, "loss": 3.8208, "step": 7905 }, { "epoch": 0.5374371517869275, "grad_norm": 1.1370452642440796, "learning_rate": 0.0009328203560266341, "loss": 3.798, "step": 7910 }, { "epoch": 0.5377768718575894, "grad_norm": 0.6277890205383301, "learning_rate": 0.0009327778910178013, "loss": 3.8627, "step": 7915 }, { "epoch": 0.5381165919282511, "grad_norm": 1.3399752378463745, "learning_rate": 0.0009327354260089686, "loss": 3.5943, "step": 7920 }, { "epoch": 0.5384563119989129, "grad_norm": 0.8896576166152954, "learning_rate": 0.0009326929610001359, "loss": 3.8576, "step": 7925 }, { "epoch": 0.5387960320695747, "grad_norm": 0.6801732182502747, "learning_rate": 0.0009326504959913032, "loss": 3.8824, "step": 7930 }, { "epoch": 0.5391357521402365, "grad_norm": 0.6704099178314209, "learning_rate": 0.0009326080309824705, "loss": 3.8018, "step": 7935 }, { "epoch": 0.5394754722108982, "grad_norm": 0.9045189619064331, "learning_rate": 0.0009325655659736377, "loss": 3.7072, "step": 7940 }, { "epoch": 0.5398151922815599, "grad_norm": 1.1468932628631592, "learning_rate": 0.000932523100964805, "loss": 3.6685, "step": 7945 }, { "epoch": 0.5401549123522218, "grad_norm": 0.8054346442222595, "learning_rate": 0.0009324806359559723, "loss": 3.8168, "step": 7950 }, { "epoch": 0.5404946324228835, "grad_norm": 0.6940960884094238, "learning_rate": 0.0009324381709471395, "loss": 3.5268, "step": 7955 }, { "epoch": 0.5408343524935453, "grad_norm": 0.7670547962188721, "learning_rate": 0.0009323957059383068, "loss": 3.7249, "step": 7960 }, { "epoch": 0.5411740725642071, "grad_norm": 0.9753726720809937, "learning_rate": 0.0009323532409294742, "loss": 3.6703, "step": 7965 }, { "epoch": 0.5415137926348689, "grad_norm": 0.9470634460449219, "learning_rate": 0.0009323107759206414, "loss": 3.6562, "step": 7970 }, { "epoch": 0.5418535127055306, "grad_norm": 0.7623605728149414, "learning_rate": 0.0009322683109118087, "loss": 3.8138, "step": 7975 }, { "epoch": 0.5421932327761925, "grad_norm": 0.7771326899528503, "learning_rate": 0.000932225845902976, "loss": 3.7095, "step": 7980 }, { "epoch": 0.5425329528468542, "grad_norm": 0.7347263693809509, "learning_rate": 0.0009321833808941432, "loss": 3.6734, "step": 7985 }, { "epoch": 0.542872672917516, "grad_norm": 0.909206211566925, "learning_rate": 0.0009321409158853104, "loss": 3.7041, "step": 7990 }, { "epoch": 0.5432123929881777, "grad_norm": 1.1828787326812744, "learning_rate": 0.0009320984508764778, "loss": 3.7721, "step": 7995 }, { "epoch": 0.5435521130588395, "grad_norm": 0.8316271901130676, "learning_rate": 0.0009320559858676451, "loss": 3.8443, "step": 8000 }, { "epoch": 0.5438918331295013, "grad_norm": 0.7310352921485901, "learning_rate": 0.0009320135208588123, "loss": 3.6048, "step": 8005 }, { "epoch": 0.544231553200163, "grad_norm": 0.7386995553970337, "learning_rate": 0.0009319710558499797, "loss": 3.779, "step": 8010 }, { "epoch": 0.5445712732708249, "grad_norm": 0.6649513244628906, "learning_rate": 0.0009319285908411469, "loss": 3.6275, "step": 8015 }, { "epoch": 0.5449109933414866, "grad_norm": 0.7939846515655518, "learning_rate": 0.0009318861258323142, "loss": 3.7219, "step": 8020 }, { "epoch": 0.5452507134121484, "grad_norm": 0.9218465089797974, "learning_rate": 0.0009318436608234815, "loss": 3.9361, "step": 8025 }, { "epoch": 0.5455904334828101, "grad_norm": 1.0145055055618286, "learning_rate": 0.0009318011958146487, "loss": 3.7554, "step": 8030 }, { "epoch": 0.545930153553472, "grad_norm": 0.793509304523468, "learning_rate": 0.0009317587308058161, "loss": 3.7625, "step": 8035 }, { "epoch": 0.5462698736241337, "grad_norm": 0.6704797148704529, "learning_rate": 0.0009317162657969833, "loss": 3.4795, "step": 8040 }, { "epoch": 0.5466095936947954, "grad_norm": 0.7801114320755005, "learning_rate": 0.0009316738007881506, "loss": 3.6944, "step": 8045 }, { "epoch": 0.5469493137654573, "grad_norm": 0.9635103940963745, "learning_rate": 0.0009316313357793179, "loss": 3.5057, "step": 8050 }, { "epoch": 0.547289033836119, "grad_norm": 1.3907709121704102, "learning_rate": 0.0009315888707704851, "loss": 3.6027, "step": 8055 }, { "epoch": 0.5476287539067808, "grad_norm": 0.9257709383964539, "learning_rate": 0.0009315464057616524, "loss": 3.6457, "step": 8060 }, { "epoch": 0.5479684739774426, "grad_norm": 0.8470078706741333, "learning_rate": 0.0009315039407528197, "loss": 3.6378, "step": 8065 }, { "epoch": 0.5483081940481044, "grad_norm": 0.9487181305885315, "learning_rate": 0.000931461475743987, "loss": 3.7572, "step": 8070 }, { "epoch": 0.5486479141187661, "grad_norm": 0.6114799380302429, "learning_rate": 0.0009314190107351543, "loss": 3.7352, "step": 8075 }, { "epoch": 0.5489876341894279, "grad_norm": 0.6494504809379578, "learning_rate": 0.0009313765457263216, "loss": 3.7128, "step": 8080 }, { "epoch": 0.5493273542600897, "grad_norm": 0.7182232737541199, "learning_rate": 0.0009313340807174888, "loss": 3.6579, "step": 8085 }, { "epoch": 0.5496670743307515, "grad_norm": 0.8048247694969177, "learning_rate": 0.000931291615708656, "loss": 3.6483, "step": 8090 }, { "epoch": 0.5500067944014132, "grad_norm": 1.2954751253128052, "learning_rate": 0.0009312491506998234, "loss": 3.6654, "step": 8095 }, { "epoch": 0.550346514472075, "grad_norm": 0.9840929508209229, "learning_rate": 0.0009312066856909906, "loss": 3.606, "step": 8100 }, { "epoch": 0.5506862345427368, "grad_norm": 2.9611222743988037, "learning_rate": 0.0009311642206821579, "loss": 3.5107, "step": 8105 }, { "epoch": 0.5510259546133985, "grad_norm": 0.7638512253761292, "learning_rate": 0.0009311217556733253, "loss": 3.5989, "step": 8110 }, { "epoch": 0.5513656746840603, "grad_norm": 1.5318514108657837, "learning_rate": 0.0009310792906644925, "loss": 3.6419, "step": 8115 }, { "epoch": 0.5517053947547221, "grad_norm": 0.9865680932998657, "learning_rate": 0.0009310368256556597, "loss": 3.6727, "step": 8120 }, { "epoch": 0.5520451148253839, "grad_norm": 0.762217104434967, "learning_rate": 0.0009309943606468271, "loss": 3.8597, "step": 8125 }, { "epoch": 0.5523848348960456, "grad_norm": 1.1266885995864868, "learning_rate": 0.0009309518956379943, "loss": 3.8076, "step": 8130 }, { "epoch": 0.5527245549667075, "grad_norm": 0.9770743250846863, "learning_rate": 0.0009309094306291615, "loss": 3.6184, "step": 8135 }, { "epoch": 0.5530642750373692, "grad_norm": 0.7908533215522766, "learning_rate": 0.000930866965620329, "loss": 3.6969, "step": 8140 }, { "epoch": 0.553403995108031, "grad_norm": 0.6796207427978516, "learning_rate": 0.0009308245006114962, "loss": 3.7962, "step": 8145 }, { "epoch": 0.5537437151786928, "grad_norm": 1.280287265777588, "learning_rate": 0.0009307820356026634, "loss": 3.4654, "step": 8150 }, { "epoch": 0.5540834352493546, "grad_norm": 0.866417646408081, "learning_rate": 0.0009307395705938307, "loss": 3.678, "step": 8155 }, { "epoch": 0.5544231553200163, "grad_norm": 0.6913934946060181, "learning_rate": 0.000930697105584998, "loss": 3.5478, "step": 8160 }, { "epoch": 0.554762875390678, "grad_norm": 0.7851265668869019, "learning_rate": 0.0009306546405761652, "loss": 3.9245, "step": 8165 }, { "epoch": 0.5551025954613399, "grad_norm": 0.6437773704528809, "learning_rate": 0.0009306121755673325, "loss": 4.0281, "step": 8170 }, { "epoch": 0.5554423155320016, "grad_norm": 0.7360479831695557, "learning_rate": 0.0009305697105584999, "loss": 3.816, "step": 8175 }, { "epoch": 0.5557820356026634, "grad_norm": 0.6019051671028137, "learning_rate": 0.0009305272455496671, "loss": 3.6643, "step": 8180 }, { "epoch": 0.5561217556733252, "grad_norm": 1.143753170967102, "learning_rate": 0.0009304847805408344, "loss": 3.5277, "step": 8185 }, { "epoch": 0.556461475743987, "grad_norm": 0.7752367258071899, "learning_rate": 0.0009304423155320016, "loss": 3.7001, "step": 8190 }, { "epoch": 0.5568011958146487, "grad_norm": 0.874198317527771, "learning_rate": 0.0009303998505231689, "loss": 3.7288, "step": 8195 }, { "epoch": 0.5571409158853105, "grad_norm": 0.9660792946815491, "learning_rate": 0.0009303573855143362, "loss": 3.7146, "step": 8200 }, { "epoch": 0.5574806359559723, "grad_norm": 0.8832139372825623, "learning_rate": 0.0009303149205055034, "loss": 3.6722, "step": 8205 }, { "epoch": 0.557820356026634, "grad_norm": 0.7793344259262085, "learning_rate": 0.0009302724554966708, "loss": 3.7029, "step": 8210 }, { "epoch": 0.5581600760972958, "grad_norm": 1.028097152709961, "learning_rate": 0.0009302299904878381, "loss": 3.6757, "step": 8215 }, { "epoch": 0.5584997961679576, "grad_norm": 0.6657744646072388, "learning_rate": 0.0009301875254790053, "loss": 3.8865, "step": 8220 }, { "epoch": 0.5588395162386194, "grad_norm": 0.9394684433937073, "learning_rate": 0.0009301450604701725, "loss": 3.5101, "step": 8225 }, { "epoch": 0.5591792363092811, "grad_norm": 0.629466712474823, "learning_rate": 0.0009301025954613399, "loss": 3.8255, "step": 8230 }, { "epoch": 0.559518956379943, "grad_norm": 0.8225659728050232, "learning_rate": 0.0009300601304525071, "loss": 3.6394, "step": 8235 }, { "epoch": 0.5598586764506047, "grad_norm": 0.6260023713111877, "learning_rate": 0.0009300176654436743, "loss": 3.9481, "step": 8240 }, { "epoch": 0.5601983965212665, "grad_norm": 0.7845155000686646, "learning_rate": 0.0009299752004348418, "loss": 3.4939, "step": 8245 }, { "epoch": 0.5605381165919282, "grad_norm": 0.8477449417114258, "learning_rate": 0.000929932735426009, "loss": 3.6506, "step": 8250 }, { "epoch": 0.5608778366625901, "grad_norm": 1.160852313041687, "learning_rate": 0.0009298902704171762, "loss": 3.5319, "step": 8255 }, { "epoch": 0.5612175567332518, "grad_norm": 0.6866785287857056, "learning_rate": 0.0009298478054083436, "loss": 3.6514, "step": 8260 }, { "epoch": 0.5615572768039135, "grad_norm": 0.5965500473976135, "learning_rate": 0.0009298053403995108, "loss": 3.7202, "step": 8265 }, { "epoch": 0.5618969968745754, "grad_norm": 0.7244465947151184, "learning_rate": 0.000929762875390678, "loss": 3.6218, "step": 8270 }, { "epoch": 0.5622367169452371, "grad_norm": 0.8582072854042053, "learning_rate": 0.0009297204103818455, "loss": 3.4413, "step": 8275 }, { "epoch": 0.5625764370158989, "grad_norm": 0.9259071350097656, "learning_rate": 0.0009296779453730127, "loss": 3.7276, "step": 8280 }, { "epoch": 0.5629161570865606, "grad_norm": 0.779722273349762, "learning_rate": 0.0009296354803641799, "loss": 3.6876, "step": 8285 }, { "epoch": 0.5632558771572225, "grad_norm": 0.7717862129211426, "learning_rate": 0.0009295930153553472, "loss": 3.8275, "step": 8290 }, { "epoch": 0.5635955972278842, "grad_norm": 0.7949544787406921, "learning_rate": 0.0009295505503465145, "loss": 3.6666, "step": 8295 }, { "epoch": 0.563935317298546, "grad_norm": 0.6197950839996338, "learning_rate": 0.0009295080853376817, "loss": 3.8307, "step": 8300 }, { "epoch": 0.5642750373692078, "grad_norm": 0.6807712912559509, "learning_rate": 0.000929465620328849, "loss": 3.7553, "step": 8305 }, { "epoch": 0.5646147574398696, "grad_norm": 0.6942443251609802, "learning_rate": 0.0009294231553200164, "loss": 3.7168, "step": 8310 }, { "epoch": 0.5649544775105313, "grad_norm": 0.8188214302062988, "learning_rate": 0.0009293806903111836, "loss": 3.4529, "step": 8315 }, { "epoch": 0.5652941975811931, "grad_norm": 0.8197862505912781, "learning_rate": 0.0009293382253023509, "loss": 3.4901, "step": 8320 }, { "epoch": 0.5656339176518549, "grad_norm": 0.9546991586685181, "learning_rate": 0.0009292957602935182, "loss": 3.9682, "step": 8325 }, { "epoch": 0.5659736377225166, "grad_norm": 0.887115478515625, "learning_rate": 0.0009292532952846854, "loss": 3.7381, "step": 8330 }, { "epoch": 0.5663133577931784, "grad_norm": 0.7302711606025696, "learning_rate": 0.0009292108302758527, "loss": 3.8742, "step": 8335 }, { "epoch": 0.5666530778638402, "grad_norm": 1.0479846000671387, "learning_rate": 0.0009291683652670199, "loss": 3.7514, "step": 8340 }, { "epoch": 0.566992797934502, "grad_norm": 1.013793706893921, "learning_rate": 0.0009291259002581873, "loss": 3.6086, "step": 8345 }, { "epoch": 0.5673325180051637, "grad_norm": 0.926574170589447, "learning_rate": 0.0009290834352493546, "loss": 3.9455, "step": 8350 }, { "epoch": 0.5676722380758256, "grad_norm": 0.6763489842414856, "learning_rate": 0.0009290409702405218, "loss": 3.8298, "step": 8355 }, { "epoch": 0.5680119581464873, "grad_norm": 0.6750110983848572, "learning_rate": 0.0009289985052316892, "loss": 3.5561, "step": 8360 }, { "epoch": 0.568351678217149, "grad_norm": 0.6831505298614502, "learning_rate": 0.0009289560402228564, "loss": 3.6613, "step": 8365 }, { "epoch": 0.5686913982878108, "grad_norm": 0.7711371779441833, "learning_rate": 0.0009289135752140236, "loss": 3.6781, "step": 8370 }, { "epoch": 0.5690311183584726, "grad_norm": 0.8220307230949402, "learning_rate": 0.000928871110205191, "loss": 3.8715, "step": 8375 }, { "epoch": 0.5693708384291344, "grad_norm": 0.6971573233604431, "learning_rate": 0.0009288286451963583, "loss": 3.8585, "step": 8380 }, { "epoch": 0.5697105584997961, "grad_norm": 0.7647146582603455, "learning_rate": 0.0009287861801875255, "loss": 3.7368, "step": 8385 }, { "epoch": 0.570050278570458, "grad_norm": 0.7522078156471252, "learning_rate": 0.0009287437151786928, "loss": 3.6513, "step": 8390 }, { "epoch": 0.5703899986411197, "grad_norm": 0.673718273639679, "learning_rate": 0.0009287012501698601, "loss": 3.9671, "step": 8395 }, { "epoch": 0.5707297187117815, "grad_norm": 0.8873665928840637, "learning_rate": 0.0009286587851610273, "loss": 3.5811, "step": 8400 }, { "epoch": 0.5710694387824433, "grad_norm": 0.717910885810852, "learning_rate": 0.0009286163201521946, "loss": 3.7473, "step": 8405 }, { "epoch": 0.5714091588531051, "grad_norm": 0.9158114790916443, "learning_rate": 0.0009285738551433619, "loss": 3.759, "step": 8410 }, { "epoch": 0.5717488789237668, "grad_norm": 0.7402276396751404, "learning_rate": 0.0009285313901345292, "loss": 3.3486, "step": 8415 }, { "epoch": 0.5720885989944285, "grad_norm": 0.7738250494003296, "learning_rate": 0.0009284889251256965, "loss": 3.7806, "step": 8420 }, { "epoch": 0.5724283190650904, "grad_norm": 0.5874295234680176, "learning_rate": 0.0009284464601168638, "loss": 3.735, "step": 8425 }, { "epoch": 0.5727680391357521, "grad_norm": 0.6737178564071655, "learning_rate": 0.000928403995108031, "loss": 3.6863, "step": 8430 }, { "epoch": 0.5731077592064139, "grad_norm": 0.9294766187667847, "learning_rate": 0.0009283615300991983, "loss": 3.3676, "step": 8435 }, { "epoch": 0.5734474792770757, "grad_norm": 0.8388890624046326, "learning_rate": 0.0009283190650903655, "loss": 3.6748, "step": 8440 }, { "epoch": 0.5737871993477375, "grad_norm": 0.9405798316001892, "learning_rate": 0.0009282766000815328, "loss": 3.5638, "step": 8445 }, { "epoch": 0.5741269194183992, "grad_norm": 0.8348854184150696, "learning_rate": 0.0009282341350727002, "loss": 3.7603, "step": 8450 }, { "epoch": 0.574466639489061, "grad_norm": 0.7804384231567383, "learning_rate": 0.0009281916700638674, "loss": 3.4435, "step": 8455 }, { "epoch": 0.5748063595597228, "grad_norm": 0.7420020699501038, "learning_rate": 0.0009281492050550347, "loss": 3.9071, "step": 8460 }, { "epoch": 0.5751460796303846, "grad_norm": 0.6835420727729797, "learning_rate": 0.000928106740046202, "loss": 3.7762, "step": 8465 }, { "epoch": 0.5754857997010463, "grad_norm": 0.658608615398407, "learning_rate": 0.0009280642750373692, "loss": 3.5829, "step": 8470 }, { "epoch": 0.5758255197717081, "grad_norm": 0.8319673538208008, "learning_rate": 0.0009280218100285364, "loss": 3.5933, "step": 8475 }, { "epoch": 0.5761652398423699, "grad_norm": 0.9539414048194885, "learning_rate": 0.0009279793450197038, "loss": 3.5525, "step": 8480 }, { "epoch": 0.5765049599130316, "grad_norm": 0.70899498462677, "learning_rate": 0.0009279368800108711, "loss": 3.7797, "step": 8485 }, { "epoch": 0.5768446799836935, "grad_norm": 0.76890629529953, "learning_rate": 0.0009278944150020383, "loss": 3.465, "step": 8490 }, { "epoch": 0.5771844000543552, "grad_norm": 0.8877576589584351, "learning_rate": 0.0009278519499932057, "loss": 3.8866, "step": 8495 }, { "epoch": 0.577524120125017, "grad_norm": 0.660688579082489, "learning_rate": 0.0009278094849843729, "loss": 3.7658, "step": 8500 }, { "epoch": 0.5778638401956787, "grad_norm": 0.896030068397522, "learning_rate": 0.0009277670199755401, "loss": 3.6203, "step": 8505 }, { "epoch": 0.5782035602663406, "grad_norm": 0.7487848401069641, "learning_rate": 0.0009277245549667075, "loss": 3.6431, "step": 8510 }, { "epoch": 0.5785432803370023, "grad_norm": 0.7852987051010132, "learning_rate": 0.0009276820899578747, "loss": 3.651, "step": 8515 }, { "epoch": 0.578883000407664, "grad_norm": 0.759682297706604, "learning_rate": 0.000927639624949042, "loss": 3.5244, "step": 8520 }, { "epoch": 0.5792227204783259, "grad_norm": 0.5944443345069885, "learning_rate": 0.0009275971599402094, "loss": 3.6422, "step": 8525 }, { "epoch": 0.5795624405489876, "grad_norm": 0.545721173286438, "learning_rate": 0.0009275546949313766, "loss": 3.6353, "step": 8530 }, { "epoch": 0.5799021606196494, "grad_norm": 0.8526937961578369, "learning_rate": 0.0009275122299225438, "loss": 3.6498, "step": 8535 }, { "epoch": 0.5802418806903111, "grad_norm": 1.258561372756958, "learning_rate": 0.0009274697649137111, "loss": 3.5986, "step": 8540 }, { "epoch": 0.580581600760973, "grad_norm": 0.875015914440155, "learning_rate": 0.0009274272999048784, "loss": 3.8405, "step": 8545 }, { "epoch": 0.5809213208316347, "grad_norm": 0.6685923337936401, "learning_rate": 0.0009273848348960456, "loss": 3.4763, "step": 8550 }, { "epoch": 0.5812610409022965, "grad_norm": 0.6964318752288818, "learning_rate": 0.000927342369887213, "loss": 3.791, "step": 8555 }, { "epoch": 0.5816007609729583, "grad_norm": 0.826518177986145, "learning_rate": 0.0009272999048783803, "loss": 3.7179, "step": 8560 }, { "epoch": 0.5819404810436201, "grad_norm": 0.7018935680389404, "learning_rate": 0.0009272574398695475, "loss": 3.626, "step": 8565 }, { "epoch": 0.5822802011142818, "grad_norm": 0.8180730938911438, "learning_rate": 0.0009272149748607148, "loss": 3.7448, "step": 8570 }, { "epoch": 0.5826199211849437, "grad_norm": 0.8528150320053101, "learning_rate": 0.000927172509851882, "loss": 3.7044, "step": 8575 }, { "epoch": 0.5829596412556054, "grad_norm": 0.7416120767593384, "learning_rate": 0.0009271300448430493, "loss": 3.5249, "step": 8580 }, { "epoch": 0.5832993613262671, "grad_norm": 0.6952391862869263, "learning_rate": 0.0009270875798342166, "loss": 3.7401, "step": 8585 }, { "epoch": 0.5836390813969289, "grad_norm": 0.8442528247833252, "learning_rate": 0.0009270451148253839, "loss": 3.6977, "step": 8590 }, { "epoch": 0.5839788014675907, "grad_norm": 0.6074960827827454, "learning_rate": 0.0009270026498165512, "loss": 3.9663, "step": 8595 }, { "epoch": 0.5843185215382525, "grad_norm": 0.7610037326812744, "learning_rate": 0.0009269601848077185, "loss": 3.6829, "step": 8600 }, { "epoch": 0.5846582416089142, "grad_norm": 0.7824552059173584, "learning_rate": 0.0009269177197988857, "loss": 3.9931, "step": 8605 }, { "epoch": 0.5849979616795761, "grad_norm": 0.5774102210998535, "learning_rate": 0.000926875254790053, "loss": 3.6076, "step": 8610 }, { "epoch": 0.5853376817502378, "grad_norm": 0.6610915064811707, "learning_rate": 0.0009268327897812203, "loss": 3.7273, "step": 8615 }, { "epoch": 0.5856774018208996, "grad_norm": 0.9578468799591064, "learning_rate": 0.0009267903247723875, "loss": 3.6174, "step": 8620 }, { "epoch": 0.5860171218915613, "grad_norm": 0.7981096506118774, "learning_rate": 0.0009267478597635548, "loss": 3.6837, "step": 8625 }, { "epoch": 0.5863568419622232, "grad_norm": 0.8085305094718933, "learning_rate": 0.0009267053947547222, "loss": 3.8751, "step": 8630 }, { "epoch": 0.5866965620328849, "grad_norm": 0.6167998909950256, "learning_rate": 0.0009266629297458894, "loss": 3.8212, "step": 8635 }, { "epoch": 0.5870362821035466, "grad_norm": 0.8082771301269531, "learning_rate": 0.0009266204647370566, "loss": 3.3319, "step": 8640 }, { "epoch": 0.5873760021742085, "grad_norm": 0.6618968844413757, "learning_rate": 0.000926577999728224, "loss": 4.0224, "step": 8645 }, { "epoch": 0.5877157222448702, "grad_norm": 0.6083776950836182, "learning_rate": 0.0009265355347193912, "loss": 4.0241, "step": 8650 }, { "epoch": 0.588055442315532, "grad_norm": 0.6757774949073792, "learning_rate": 0.0009264930697105584, "loss": 3.5121, "step": 8655 }, { "epoch": 0.5883951623861938, "grad_norm": 1.010972023010254, "learning_rate": 0.0009264506047017259, "loss": 3.5277, "step": 8660 }, { "epoch": 0.5887348824568556, "grad_norm": 0.6512971520423889, "learning_rate": 0.0009264081396928931, "loss": 3.6784, "step": 8665 }, { "epoch": 0.5890746025275173, "grad_norm": 0.5896017551422119, "learning_rate": 0.0009263656746840603, "loss": 3.8132, "step": 8670 }, { "epoch": 0.589414322598179, "grad_norm": 0.6976354718208313, "learning_rate": 0.0009263232096752276, "loss": 3.7363, "step": 8675 }, { "epoch": 0.5897540426688409, "grad_norm": 0.5934624671936035, "learning_rate": 0.0009262807446663949, "loss": 3.7175, "step": 8680 }, { "epoch": 0.5900937627395026, "grad_norm": 0.7559554576873779, "learning_rate": 0.0009262382796575621, "loss": 3.5124, "step": 8685 }, { "epoch": 0.5904334828101644, "grad_norm": 0.7427394986152649, "learning_rate": 0.0009261958146487294, "loss": 3.3798, "step": 8690 }, { "epoch": 0.5907732028808262, "grad_norm": 0.6653010249137878, "learning_rate": 0.0009261533496398968, "loss": 3.654, "step": 8695 }, { "epoch": 0.591112922951488, "grad_norm": 0.7774918675422668, "learning_rate": 0.0009261108846310641, "loss": 3.7007, "step": 8700 }, { "epoch": 0.5914526430221497, "grad_norm": 0.7363237738609314, "learning_rate": 0.0009260684196222313, "loss": 3.8055, "step": 8705 }, { "epoch": 0.5917923630928115, "grad_norm": 0.7885435819625854, "learning_rate": 0.0009260259546133986, "loss": 3.6456, "step": 8710 }, { "epoch": 0.5921320831634733, "grad_norm": 0.8036079406738281, "learning_rate": 0.0009259834896045659, "loss": 3.53, "step": 8715 }, { "epoch": 0.5924718032341351, "grad_norm": 0.9873092770576477, "learning_rate": 0.0009259410245957331, "loss": 3.719, "step": 8720 }, { "epoch": 0.5928115233047968, "grad_norm": 0.7451286911964417, "learning_rate": 0.0009258985595869003, "loss": 3.7688, "step": 8725 }, { "epoch": 0.5931512433754587, "grad_norm": 0.8148131370544434, "learning_rate": 0.0009258560945780678, "loss": 3.8162, "step": 8730 }, { "epoch": 0.5934909634461204, "grad_norm": 0.9994235038757324, "learning_rate": 0.000925813629569235, "loss": 3.7565, "step": 8735 }, { "epoch": 0.5938306835167821, "grad_norm": 0.7480568885803223, "learning_rate": 0.0009257711645604022, "loss": 3.6426, "step": 8740 }, { "epoch": 0.594170403587444, "grad_norm": 0.7387261986732483, "learning_rate": 0.0009257286995515696, "loss": 3.7159, "step": 8745 }, { "epoch": 0.5945101236581057, "grad_norm": 0.7426326274871826, "learning_rate": 0.0009256862345427368, "loss": 3.7695, "step": 8750 }, { "epoch": 0.5948498437287675, "grad_norm": 0.6524364948272705, "learning_rate": 0.000925643769533904, "loss": 3.5758, "step": 8755 }, { "epoch": 0.5951895637994292, "grad_norm": 1.2406293153762817, "learning_rate": 0.0009256013045250714, "loss": 3.7243, "step": 8760 }, { "epoch": 0.5955292838700911, "grad_norm": 0.9576144218444824, "learning_rate": 0.0009255588395162387, "loss": 3.6345, "step": 8765 }, { "epoch": 0.5958690039407528, "grad_norm": 0.6558308601379395, "learning_rate": 0.0009255163745074059, "loss": 3.7384, "step": 8770 }, { "epoch": 0.5962087240114146, "grad_norm": 0.9128443598747253, "learning_rate": 0.0009254739094985732, "loss": 3.6394, "step": 8775 }, { "epoch": 0.5965484440820764, "grad_norm": 0.6811715364456177, "learning_rate": 0.0009254314444897405, "loss": 3.6862, "step": 8780 }, { "epoch": 0.5968881641527382, "grad_norm": 0.6951560974121094, "learning_rate": 0.0009253889794809077, "loss": 3.6192, "step": 8785 }, { "epoch": 0.5972278842233999, "grad_norm": 0.726325273513794, "learning_rate": 0.000925346514472075, "loss": 3.769, "step": 8790 }, { "epoch": 0.5975676042940616, "grad_norm": 0.6010936498641968, "learning_rate": 0.0009253040494632423, "loss": 3.7858, "step": 8795 }, { "epoch": 0.5979073243647235, "grad_norm": 0.6359979510307312, "learning_rate": 0.0009252615844544096, "loss": 3.6635, "step": 8800 }, { "epoch": 0.5982470444353852, "grad_norm": 0.7707550525665283, "learning_rate": 0.0009252191194455769, "loss": 3.6817, "step": 8805 }, { "epoch": 0.598586764506047, "grad_norm": 0.7625561952590942, "learning_rate": 0.0009251766544367442, "loss": 3.7249, "step": 8810 }, { "epoch": 0.5989264845767088, "grad_norm": 0.9263338446617126, "learning_rate": 0.0009251341894279114, "loss": 3.8066, "step": 8815 }, { "epoch": 0.5992662046473706, "grad_norm": 0.8002027273178101, "learning_rate": 0.0009250917244190787, "loss": 3.4509, "step": 8820 }, { "epoch": 0.5996059247180323, "grad_norm": 0.967241108417511, "learning_rate": 0.0009250492594102459, "loss": 3.63, "step": 8825 }, { "epoch": 0.5999456447886942, "grad_norm": 0.7122082114219666, "learning_rate": 0.0009250067944014132, "loss": 3.5557, "step": 8830 }, { "epoch": 0.6002853648593559, "grad_norm": 0.6507645845413208, "learning_rate": 0.0009249643293925806, "loss": 3.6562, "step": 8835 }, { "epoch": 0.6006250849300176, "grad_norm": 2.508281707763672, "learning_rate": 0.0009249218643837478, "loss": 3.9142, "step": 8840 }, { "epoch": 0.6009648050006794, "grad_norm": 0.606696605682373, "learning_rate": 0.0009248793993749151, "loss": 3.6453, "step": 8845 }, { "epoch": 0.6013045250713412, "grad_norm": 0.7018161416053772, "learning_rate": 0.0009248369343660824, "loss": 3.8992, "step": 8850 }, { "epoch": 0.601644245142003, "grad_norm": 0.7630243301391602, "learning_rate": 0.0009247944693572496, "loss": 3.7281, "step": 8855 }, { "epoch": 0.6019839652126647, "grad_norm": 0.6921541690826416, "learning_rate": 0.0009247520043484168, "loss": 3.6395, "step": 8860 }, { "epoch": 0.6023236852833266, "grad_norm": 0.6790416836738586, "learning_rate": 0.0009247095393395843, "loss": 3.601, "step": 8865 }, { "epoch": 0.6026634053539883, "grad_norm": 0.7225310206413269, "learning_rate": 0.0009246670743307515, "loss": 3.6465, "step": 8870 }, { "epoch": 0.6030031254246501, "grad_norm": 0.8191479444503784, "learning_rate": 0.0009246246093219187, "loss": 3.6612, "step": 8875 }, { "epoch": 0.6033428454953118, "grad_norm": 0.8516916632652283, "learning_rate": 0.0009245821443130861, "loss": 3.4985, "step": 8880 }, { "epoch": 0.6036825655659737, "grad_norm": 0.763337254524231, "learning_rate": 0.0009245396793042533, "loss": 3.8809, "step": 8885 }, { "epoch": 0.6040222856366354, "grad_norm": 0.6327252388000488, "learning_rate": 0.0009244972142954205, "loss": 3.6463, "step": 8890 }, { "epoch": 0.6043620057072971, "grad_norm": 0.7968856692314148, "learning_rate": 0.0009244547492865879, "loss": 3.6589, "step": 8895 }, { "epoch": 0.604701725777959, "grad_norm": 0.7092478275299072, "learning_rate": 0.0009244122842777552, "loss": 3.8094, "step": 8900 }, { "epoch": 0.6050414458486207, "grad_norm": 0.9345669150352478, "learning_rate": 0.0009243698192689224, "loss": 3.5164, "step": 8905 }, { "epoch": 0.6053811659192825, "grad_norm": 0.6484115719795227, "learning_rate": 0.0009243273542600898, "loss": 3.5994, "step": 8910 }, { "epoch": 0.6057208859899443, "grad_norm": 1.0086615085601807, "learning_rate": 0.000924284889251257, "loss": 3.5933, "step": 8915 }, { "epoch": 0.6060606060606061, "grad_norm": 0.5958741903305054, "learning_rate": 0.0009242424242424242, "loss": 3.7198, "step": 8920 }, { "epoch": 0.6064003261312678, "grad_norm": 0.696983814239502, "learning_rate": 0.0009241999592335915, "loss": 3.5835, "step": 8925 }, { "epoch": 0.6067400462019296, "grad_norm": 0.8986988663673401, "learning_rate": 0.0009241574942247588, "loss": 3.821, "step": 8930 }, { "epoch": 0.6070797662725914, "grad_norm": 1.4587080478668213, "learning_rate": 0.0009241150292159261, "loss": 3.6971, "step": 8935 }, { "epoch": 0.6074194863432532, "grad_norm": 0.6396378874778748, "learning_rate": 0.0009240725642070934, "loss": 3.6322, "step": 8940 }, { "epoch": 0.6077592064139149, "grad_norm": 0.9018657803535461, "learning_rate": 0.0009240300991982607, "loss": 3.7441, "step": 8945 }, { "epoch": 0.6080989264845768, "grad_norm": 0.871687114238739, "learning_rate": 0.0009239876341894279, "loss": 3.8863, "step": 8950 }, { "epoch": 0.6084386465552385, "grad_norm": 1.0030956268310547, "learning_rate": 0.0009239451691805952, "loss": 3.6327, "step": 8955 }, { "epoch": 0.6087783666259002, "grad_norm": 0.700885534286499, "learning_rate": 0.0009239027041717624, "loss": 3.7696, "step": 8960 }, { "epoch": 0.609118086696562, "grad_norm": 0.677375853061676, "learning_rate": 0.0009238602391629297, "loss": 3.4678, "step": 8965 }, { "epoch": 0.6094578067672238, "grad_norm": 0.6456116437911987, "learning_rate": 0.0009238177741540971, "loss": 3.7062, "step": 8970 }, { "epoch": 0.6097975268378856, "grad_norm": 0.7838855385780334, "learning_rate": 0.0009237753091452643, "loss": 3.609, "step": 8975 }, { "epoch": 0.6101372469085473, "grad_norm": 1.1101454496383667, "learning_rate": 0.0009237328441364316, "loss": 3.5471, "step": 8980 }, { "epoch": 0.6104769669792092, "grad_norm": 0.7677356004714966, "learning_rate": 0.0009236903791275989, "loss": 3.4993, "step": 8985 }, { "epoch": 0.6108166870498709, "grad_norm": 0.6281837224960327, "learning_rate": 0.0009236479141187661, "loss": 3.6054, "step": 8990 }, { "epoch": 0.6111564071205327, "grad_norm": 1.1245094537734985, "learning_rate": 0.0009236054491099334, "loss": 3.9832, "step": 8995 }, { "epoch": 0.6114961271911945, "grad_norm": 0.674760103225708, "learning_rate": 0.0009235629841011007, "loss": 3.6782, "step": 9000 }, { "epoch": 0.6118358472618562, "grad_norm": 1.299085259437561, "learning_rate": 0.000923520519092268, "loss": 3.8441, "step": 9005 }, { "epoch": 0.612175567332518, "grad_norm": 0.7997239232063293, "learning_rate": 0.0009234780540834353, "loss": 3.6219, "step": 9010 }, { "epoch": 0.6125152874031797, "grad_norm": 0.7059243321418762, "learning_rate": 0.0009234355890746026, "loss": 3.6765, "step": 9015 }, { "epoch": 0.6128550074738416, "grad_norm": 0.7399749159812927, "learning_rate": 0.0009233931240657698, "loss": 3.7717, "step": 9020 }, { "epoch": 0.6131947275445033, "grad_norm": 0.9513211250305176, "learning_rate": 0.000923350659056937, "loss": 3.5431, "step": 9025 }, { "epoch": 0.6135344476151651, "grad_norm": 0.8005008101463318, "learning_rate": 0.0009233081940481044, "loss": 3.611, "step": 9030 }, { "epoch": 0.6138741676858269, "grad_norm": 0.8038389086723328, "learning_rate": 0.0009232657290392716, "loss": 3.5122, "step": 9035 }, { "epoch": 0.6142138877564887, "grad_norm": 1.1867358684539795, "learning_rate": 0.000923223264030439, "loss": 3.8666, "step": 9040 }, { "epoch": 0.6145536078271504, "grad_norm": 0.8052146434783936, "learning_rate": 0.0009231807990216063, "loss": 3.829, "step": 9045 }, { "epoch": 0.6148933278978121, "grad_norm": 0.7490555644035339, "learning_rate": 0.0009231383340127735, "loss": 3.7249, "step": 9050 }, { "epoch": 0.615233047968474, "grad_norm": 0.8196632266044617, "learning_rate": 0.0009230958690039408, "loss": 3.8551, "step": 9055 }, { "epoch": 0.6155727680391357, "grad_norm": 0.7509273290634155, "learning_rate": 0.000923053403995108, "loss": 3.7717, "step": 9060 }, { "epoch": 0.6159124881097975, "grad_norm": 0.6380864381790161, "learning_rate": 0.0009230109389862753, "loss": 3.8173, "step": 9065 }, { "epoch": 0.6162522081804593, "grad_norm": 0.6834045648574829, "learning_rate": 0.0009229684739774426, "loss": 3.5823, "step": 9070 }, { "epoch": 0.6165919282511211, "grad_norm": 0.8201780915260315, "learning_rate": 0.0009229260089686099, "loss": 3.4853, "step": 9075 }, { "epoch": 0.6169316483217828, "grad_norm": 0.6504944562911987, "learning_rate": 0.0009228835439597772, "loss": 3.7464, "step": 9080 }, { "epoch": 0.6172713683924447, "grad_norm": 0.6998506784439087, "learning_rate": 0.0009228410789509445, "loss": 3.7638, "step": 9085 }, { "epoch": 0.6176110884631064, "grad_norm": 0.6809467673301697, "learning_rate": 0.0009227986139421117, "loss": 3.4786, "step": 9090 }, { "epoch": 0.6179508085337682, "grad_norm": 0.5954441428184509, "learning_rate": 0.000922756148933279, "loss": 3.637, "step": 9095 }, { "epoch": 0.6182905286044299, "grad_norm": 0.8184671401977539, "learning_rate": 0.0009227136839244463, "loss": 3.7801, "step": 9100 }, { "epoch": 0.6186302486750918, "grad_norm": 0.7983341217041016, "learning_rate": 0.0009226712189156135, "loss": 4.0072, "step": 9105 }, { "epoch": 0.6189699687457535, "grad_norm": 0.8784645795822144, "learning_rate": 0.0009226287539067809, "loss": 3.256, "step": 9110 }, { "epoch": 0.6193096888164152, "grad_norm": 0.7210829854011536, "learning_rate": 0.0009225862888979482, "loss": 3.8032, "step": 9115 }, { "epoch": 0.6196494088870771, "grad_norm": 0.6914405226707458, "learning_rate": 0.0009225438238891154, "loss": 3.7015, "step": 9120 }, { "epoch": 0.6199891289577388, "grad_norm": 0.7073154449462891, "learning_rate": 0.0009225013588802826, "loss": 3.7968, "step": 9125 }, { "epoch": 0.6203288490284006, "grad_norm": 0.8103522658348083, "learning_rate": 0.00092245889387145, "loss": 3.8083, "step": 9130 }, { "epoch": 0.6206685690990623, "grad_norm": 0.6959781646728516, "learning_rate": 0.0009224164288626172, "loss": 3.8596, "step": 9135 }, { "epoch": 0.6210082891697242, "grad_norm": 0.6005171537399292, "learning_rate": 0.0009223739638537844, "loss": 3.5595, "step": 9140 }, { "epoch": 0.6213480092403859, "grad_norm": 0.7910379767417908, "learning_rate": 0.0009223314988449519, "loss": 3.8725, "step": 9145 }, { "epoch": 0.6216877293110477, "grad_norm": 0.6003121733665466, "learning_rate": 0.0009222890338361191, "loss": 3.5523, "step": 9150 }, { "epoch": 0.6220274493817095, "grad_norm": 0.6023795008659363, "learning_rate": 0.0009222465688272863, "loss": 3.6321, "step": 9155 }, { "epoch": 0.6223671694523712, "grad_norm": 0.6355746984481812, "learning_rate": 0.0009222041038184537, "loss": 3.7205, "step": 9160 }, { "epoch": 0.622706889523033, "grad_norm": 0.7739025354385376, "learning_rate": 0.0009221616388096209, "loss": 3.61, "step": 9165 }, { "epoch": 0.6230466095936948, "grad_norm": 0.8389310836791992, "learning_rate": 0.0009221191738007881, "loss": 3.6685, "step": 9170 }, { "epoch": 0.6233863296643566, "grad_norm": 0.8171815872192383, "learning_rate": 0.0009220767087919554, "loss": 3.6865, "step": 9175 }, { "epoch": 0.6237260497350183, "grad_norm": 1.1299707889556885, "learning_rate": 0.0009220342437831228, "loss": 3.373, "step": 9180 }, { "epoch": 0.6240657698056801, "grad_norm": 0.6047030687332153, "learning_rate": 0.00092199177877429, "loss": 3.9159, "step": 9185 }, { "epoch": 0.6244054898763419, "grad_norm": 0.719158411026001, "learning_rate": 0.0009219493137654573, "loss": 3.8218, "step": 9190 }, { "epoch": 0.6247452099470037, "grad_norm": 0.7280524969100952, "learning_rate": 0.0009219068487566246, "loss": 3.8482, "step": 9195 }, { "epoch": 0.6250849300176654, "grad_norm": 0.8552801609039307, "learning_rate": 0.0009218643837477918, "loss": 3.538, "step": 9200 }, { "epoch": 0.6254246500883273, "grad_norm": 0.7316009998321533, "learning_rate": 0.0009218219187389591, "loss": 3.7324, "step": 9205 }, { "epoch": 0.625764370158989, "grad_norm": 1.36768639087677, "learning_rate": 0.0009217794537301263, "loss": 3.9895, "step": 9210 }, { "epoch": 0.6261040902296507, "grad_norm": 0.848290741443634, "learning_rate": 0.0009217369887212937, "loss": 3.7114, "step": 9215 }, { "epoch": 0.6264438103003126, "grad_norm": 0.7369062900543213, "learning_rate": 0.000921694523712461, "loss": 3.8254, "step": 9220 }, { "epoch": 0.6267835303709743, "grad_norm": 0.7685941457748413, "learning_rate": 0.0009216520587036282, "loss": 4.1522, "step": 9225 }, { "epoch": 0.6271232504416361, "grad_norm": 0.9110896587371826, "learning_rate": 0.0009216095936947955, "loss": 3.6232, "step": 9230 }, { "epoch": 0.6274629705122978, "grad_norm": 0.6928056478500366, "learning_rate": 0.0009215671286859628, "loss": 3.6709, "step": 9235 }, { "epoch": 0.6278026905829597, "grad_norm": 0.7655983567237854, "learning_rate": 0.00092152466367713, "loss": 3.7193, "step": 9240 }, { "epoch": 0.6281424106536214, "grad_norm": 0.7509453892707825, "learning_rate": 0.0009214821986682973, "loss": 3.7139, "step": 9245 }, { "epoch": 0.6284821307242832, "grad_norm": 0.9849753379821777, "learning_rate": 0.0009214397336594647, "loss": 3.5676, "step": 9250 }, { "epoch": 0.628821850794945, "grad_norm": 0.9442876577377319, "learning_rate": 0.0009213972686506319, "loss": 3.4036, "step": 9255 }, { "epoch": 0.6291615708656068, "grad_norm": 0.6690783500671387, "learning_rate": 0.0009213548036417991, "loss": 3.683, "step": 9260 }, { "epoch": 0.6295012909362685, "grad_norm": 0.6942455768585205, "learning_rate": 0.0009213123386329665, "loss": 3.5282, "step": 9265 }, { "epoch": 0.6298410110069302, "grad_norm": 0.9975630044937134, "learning_rate": 0.0009212698736241337, "loss": 3.5586, "step": 9270 }, { "epoch": 0.6301807310775921, "grad_norm": 0.811707615852356, "learning_rate": 0.0009212274086153009, "loss": 3.6936, "step": 9275 }, { "epoch": 0.6305204511482538, "grad_norm": 0.7118427157402039, "learning_rate": 0.0009211849436064683, "loss": 3.832, "step": 9280 }, { "epoch": 0.6308601712189156, "grad_norm": 0.7813193202018738, "learning_rate": 0.0009211424785976356, "loss": 3.8221, "step": 9285 }, { "epoch": 0.6311998912895774, "grad_norm": 0.8137817978858948, "learning_rate": 0.0009211000135888028, "loss": 3.308, "step": 9290 }, { "epoch": 0.6315396113602392, "grad_norm": 0.741693377494812, "learning_rate": 0.0009210575485799702, "loss": 3.6189, "step": 9295 }, { "epoch": 0.6318793314309009, "grad_norm": 0.9716556668281555, "learning_rate": 0.0009210150835711374, "loss": 3.8412, "step": 9300 }, { "epoch": 0.6322190515015628, "grad_norm": 0.7751227617263794, "learning_rate": 0.0009209726185623046, "loss": 3.6609, "step": 9305 }, { "epoch": 0.6325587715722245, "grad_norm": 0.6161702871322632, "learning_rate": 0.000920930153553472, "loss": 3.5341, "step": 9310 }, { "epoch": 0.6328984916428863, "grad_norm": 1.1263669729232788, "learning_rate": 0.0009208876885446392, "loss": 3.5529, "step": 9315 }, { "epoch": 0.633238211713548, "grad_norm": 0.7302712798118591, "learning_rate": 0.0009208452235358065, "loss": 3.5646, "step": 9320 }, { "epoch": 0.6335779317842098, "grad_norm": 1.0156751871109009, "learning_rate": 0.0009208027585269738, "loss": 3.7378, "step": 9325 }, { "epoch": 0.6339176518548716, "grad_norm": 0.7550612688064575, "learning_rate": 0.0009207602935181411, "loss": 3.8317, "step": 9330 }, { "epoch": 0.6342573719255333, "grad_norm": 0.9245972037315369, "learning_rate": 0.0009207178285093083, "loss": 3.9464, "step": 9335 }, { "epoch": 0.6345970919961952, "grad_norm": 0.7483823895454407, "learning_rate": 0.0009206753635004756, "loss": 3.8021, "step": 9340 }, { "epoch": 0.6349368120668569, "grad_norm": 0.969968855381012, "learning_rate": 0.0009206328984916429, "loss": 3.6749, "step": 9345 }, { "epoch": 0.6352765321375187, "grad_norm": 0.6859450340270996, "learning_rate": 0.0009205904334828101, "loss": 3.7359, "step": 9350 }, { "epoch": 0.6356162522081804, "grad_norm": 0.5592966675758362, "learning_rate": 0.0009205479684739775, "loss": 3.6339, "step": 9355 }, { "epoch": 0.6359559722788423, "grad_norm": 0.736538290977478, "learning_rate": 0.0009205055034651447, "loss": 3.6134, "step": 9360 }, { "epoch": 0.636295692349504, "grad_norm": 1.0042273998260498, "learning_rate": 0.000920463038456312, "loss": 3.8557, "step": 9365 }, { "epoch": 0.6366354124201657, "grad_norm": 0.577450156211853, "learning_rate": 0.0009204205734474793, "loss": 3.9904, "step": 9370 }, { "epoch": 0.6369751324908276, "grad_norm": 0.8989750742912292, "learning_rate": 0.0009203781084386465, "loss": 3.6782, "step": 9375 }, { "epoch": 0.6373148525614893, "grad_norm": 3.73222017288208, "learning_rate": 0.0009203356434298139, "loss": 3.6033, "step": 9380 }, { "epoch": 0.6376545726321511, "grad_norm": 0.6811506748199463, "learning_rate": 0.0009202931784209811, "loss": 3.7267, "step": 9385 }, { "epoch": 0.6379942927028129, "grad_norm": 0.7121829986572266, "learning_rate": 0.0009202507134121484, "loss": 3.6227, "step": 9390 }, { "epoch": 0.6383340127734747, "grad_norm": 0.9353055357933044, "learning_rate": 0.0009202082484033158, "loss": 3.7228, "step": 9395 }, { "epoch": 0.6386737328441364, "grad_norm": 0.759248673915863, "learning_rate": 0.000920165783394483, "loss": 3.6836, "step": 9400 }, { "epoch": 0.6390134529147982, "grad_norm": 0.9816084504127502, "learning_rate": 0.0009201233183856502, "loss": 3.6267, "step": 9405 }, { "epoch": 0.63935317298546, "grad_norm": 0.7951840162277222, "learning_rate": 0.0009200808533768175, "loss": 3.6687, "step": 9410 }, { "epoch": 0.6396928930561218, "grad_norm": 0.8741581439971924, "learning_rate": 0.0009200383883679848, "loss": 3.5428, "step": 9415 }, { "epoch": 0.6400326131267835, "grad_norm": 0.8632642030715942, "learning_rate": 0.000919995923359152, "loss": 3.6862, "step": 9420 }, { "epoch": 0.6403723331974454, "grad_norm": 0.7152655124664307, "learning_rate": 0.0009199534583503194, "loss": 3.731, "step": 9425 }, { "epoch": 0.6407120532681071, "grad_norm": 0.8840948343276978, "learning_rate": 0.0009199109933414867, "loss": 3.6414, "step": 9430 }, { "epoch": 0.6410517733387688, "grad_norm": 1.1780478954315186, "learning_rate": 0.0009198685283326539, "loss": 3.5598, "step": 9435 }, { "epoch": 0.6413914934094306, "grad_norm": 0.7209941744804382, "learning_rate": 0.0009198260633238212, "loss": 3.6987, "step": 9440 }, { "epoch": 0.6417312134800924, "grad_norm": 0.813639223575592, "learning_rate": 0.0009197835983149885, "loss": 3.6755, "step": 9445 }, { "epoch": 0.6420709335507542, "grad_norm": 0.7683884501457214, "learning_rate": 0.0009197411333061557, "loss": 3.6572, "step": 9450 }, { "epoch": 0.6424106536214159, "grad_norm": 0.8849777579307556, "learning_rate": 0.0009196986682973231, "loss": 3.5845, "step": 9455 }, { "epoch": 0.6427503736920778, "grad_norm": 0.8575959205627441, "learning_rate": 0.0009196562032884903, "loss": 3.7525, "step": 9460 }, { "epoch": 0.6430900937627395, "grad_norm": 0.7876947522163391, "learning_rate": 0.0009196137382796576, "loss": 3.5424, "step": 9465 }, { "epoch": 0.6434298138334013, "grad_norm": 0.8727036714553833, "learning_rate": 0.0009195712732708249, "loss": 3.6765, "step": 9470 }, { "epoch": 0.6437695339040631, "grad_norm": 1.061108112335205, "learning_rate": 0.0009195288082619921, "loss": 3.8255, "step": 9475 }, { "epoch": 0.6441092539747248, "grad_norm": 0.6705709099769592, "learning_rate": 0.0009194863432531594, "loss": 3.5106, "step": 9480 }, { "epoch": 0.6444489740453866, "grad_norm": 0.7401744723320007, "learning_rate": 0.0009194438782443267, "loss": 4.1239, "step": 9485 }, { "epoch": 0.6447886941160483, "grad_norm": 0.8474775552749634, "learning_rate": 0.000919401413235494, "loss": 3.6468, "step": 9490 }, { "epoch": 0.6451284141867102, "grad_norm": 0.7192711234092712, "learning_rate": 0.0009193589482266613, "loss": 3.6743, "step": 9495 }, { "epoch": 0.6454681342573719, "grad_norm": 0.6971704363822937, "learning_rate": 0.0009193164832178286, "loss": 3.5711, "step": 9500 }, { "epoch": 0.6458078543280337, "grad_norm": 0.5742700099945068, "learning_rate": 0.0009192740182089958, "loss": 3.4929, "step": 9505 }, { "epoch": 0.6461475743986955, "grad_norm": 0.8801677823066711, "learning_rate": 0.000919231553200163, "loss": 3.5568, "step": 9510 }, { "epoch": 0.6464872944693573, "grad_norm": 0.7414723038673401, "learning_rate": 0.0009191890881913304, "loss": 3.788, "step": 9515 }, { "epoch": 0.646827014540019, "grad_norm": 0.9319229125976562, "learning_rate": 0.0009191466231824976, "loss": 3.7875, "step": 9520 }, { "epoch": 0.6471667346106807, "grad_norm": 0.8426511287689209, "learning_rate": 0.0009191041581736649, "loss": 3.7638, "step": 9525 }, { "epoch": 0.6475064546813426, "grad_norm": 0.6494197249412537, "learning_rate": 0.0009190616931648323, "loss": 3.8266, "step": 9530 }, { "epoch": 0.6478461747520043, "grad_norm": 0.7874410152435303, "learning_rate": 0.0009190192281559995, "loss": 3.6111, "step": 9535 }, { "epoch": 0.6481858948226661, "grad_norm": 0.9318093061447144, "learning_rate": 0.0009189767631471667, "loss": 3.8188, "step": 9540 }, { "epoch": 0.6485256148933279, "grad_norm": 1.0846035480499268, "learning_rate": 0.0009189342981383341, "loss": 3.7991, "step": 9545 }, { "epoch": 0.6488653349639897, "grad_norm": 0.718104362487793, "learning_rate": 0.0009188918331295013, "loss": 3.8044, "step": 9550 }, { "epoch": 0.6492050550346514, "grad_norm": 1.0362039804458618, "learning_rate": 0.0009188493681206685, "loss": 3.6264, "step": 9555 }, { "epoch": 0.6495447751053133, "grad_norm": 0.7583540081977844, "learning_rate": 0.000918806903111836, "loss": 3.7273, "step": 9560 }, { "epoch": 0.649884495175975, "grad_norm": 0.8064551949501038, "learning_rate": 0.0009187644381030032, "loss": 3.7833, "step": 9565 }, { "epoch": 0.6502242152466368, "grad_norm": 0.7205809354782104, "learning_rate": 0.0009187219730941704, "loss": 3.6088, "step": 9570 }, { "epoch": 0.6505639353172985, "grad_norm": 0.9827800393104553, "learning_rate": 0.0009186795080853377, "loss": 3.4921, "step": 9575 }, { "epoch": 0.6509036553879604, "grad_norm": 0.726604163646698, "learning_rate": 0.000918637043076505, "loss": 3.6566, "step": 9580 }, { "epoch": 0.6512433754586221, "grad_norm": 0.9542114734649658, "learning_rate": 0.0009185945780676722, "loss": 3.552, "step": 9585 }, { "epoch": 0.6515830955292838, "grad_norm": 0.5727741122245789, "learning_rate": 0.0009185521130588395, "loss": 3.7092, "step": 9590 }, { "epoch": 0.6519228155999457, "grad_norm": 0.868331789970398, "learning_rate": 0.0009185096480500069, "loss": 3.7067, "step": 9595 }, { "epoch": 0.6522625356706074, "grad_norm": 0.9321011900901794, "learning_rate": 0.0009184671830411741, "loss": 3.5555, "step": 9600 }, { "epoch": 0.6526022557412692, "grad_norm": 0.6808778047561646, "learning_rate": 0.0009184247180323414, "loss": 3.7372, "step": 9605 }, { "epoch": 0.6529419758119309, "grad_norm": 1.1563987731933594, "learning_rate": 0.0009183822530235086, "loss": 3.7393, "step": 9610 }, { "epoch": 0.6532816958825928, "grad_norm": 0.7172516584396362, "learning_rate": 0.0009183397880146759, "loss": 3.6702, "step": 9615 }, { "epoch": 0.6536214159532545, "grad_norm": 0.8999543190002441, "learning_rate": 0.0009182973230058432, "loss": 3.6403, "step": 9620 }, { "epoch": 0.6539611360239163, "grad_norm": 0.8441185355186462, "learning_rate": 0.0009182548579970104, "loss": 3.8175, "step": 9625 }, { "epoch": 0.6543008560945781, "grad_norm": 3.018766164779663, "learning_rate": 0.0009182123929881778, "loss": 3.7066, "step": 9630 }, { "epoch": 0.6546405761652399, "grad_norm": 0.7492625117301941, "learning_rate": 0.0009181699279793451, "loss": 3.6426, "step": 9635 }, { "epoch": 0.6549802962359016, "grad_norm": 1.1565454006195068, "learning_rate": 0.0009181274629705123, "loss": 3.6956, "step": 9640 }, { "epoch": 0.6553200163065634, "grad_norm": 0.693215548992157, "learning_rate": 0.0009180849979616795, "loss": 3.563, "step": 9645 }, { "epoch": 0.6556597363772252, "grad_norm": 0.891947329044342, "learning_rate": 0.0009180425329528469, "loss": 3.2896, "step": 9650 }, { "epoch": 0.6559994564478869, "grad_norm": 0.8147547841072083, "learning_rate": 0.0009180000679440141, "loss": 3.6974, "step": 9655 }, { "epoch": 0.6563391765185487, "grad_norm": 0.7547804713249207, "learning_rate": 0.0009179576029351813, "loss": 4.0684, "step": 9660 }, { "epoch": 0.6566788965892105, "grad_norm": 1.2950103282928467, "learning_rate": 0.0009179151379263488, "loss": 3.5927, "step": 9665 }, { "epoch": 0.6570186166598723, "grad_norm": 0.9161685705184937, "learning_rate": 0.000917872672917516, "loss": 3.2841, "step": 9670 }, { "epoch": 0.657358336730534, "grad_norm": 1.213436245918274, "learning_rate": 0.0009178302079086832, "loss": 3.4569, "step": 9675 }, { "epoch": 0.6576980568011959, "grad_norm": 0.6643913388252258, "learning_rate": 0.0009177877428998506, "loss": 3.9584, "step": 9680 }, { "epoch": 0.6580377768718576, "grad_norm": 1.6521121263504028, "learning_rate": 0.0009177452778910178, "loss": 3.4733, "step": 9685 }, { "epoch": 0.6583774969425193, "grad_norm": 1.1565953493118286, "learning_rate": 0.000917702812882185, "loss": 3.4032, "step": 9690 }, { "epoch": 0.6587172170131811, "grad_norm": 0.8639934659004211, "learning_rate": 0.0009176603478733524, "loss": 3.7065, "step": 9695 }, { "epoch": 0.6590569370838429, "grad_norm": 1.243613839149475, "learning_rate": 0.0009176178828645197, "loss": 3.6787, "step": 9700 }, { "epoch": 0.6593966571545047, "grad_norm": 2.7069647312164307, "learning_rate": 0.0009175754178556869, "loss": 3.658, "step": 9705 }, { "epoch": 0.6597363772251664, "grad_norm": 0.6198696494102478, "learning_rate": 0.0009175329528468542, "loss": 3.6963, "step": 9710 }, { "epoch": 0.6600760972958283, "grad_norm": 1.543363332748413, "learning_rate": 0.0009174904878380215, "loss": 3.6742, "step": 9715 }, { "epoch": 0.66041581736649, "grad_norm": 0.6742938756942749, "learning_rate": 0.0009174480228291888, "loss": 4.1696, "step": 9720 }, { "epoch": 0.6607555374371518, "grad_norm": 0.805629312992096, "learning_rate": 0.000917405557820356, "loss": 3.8163, "step": 9725 }, { "epoch": 0.6610952575078136, "grad_norm": 0.829976499080658, "learning_rate": 0.0009173630928115233, "loss": 3.4341, "step": 9730 }, { "epoch": 0.6614349775784754, "grad_norm": 0.6362728476524353, "learning_rate": 0.0009173206278026907, "loss": 3.5633, "step": 9735 }, { "epoch": 0.6617746976491371, "grad_norm": 0.964539110660553, "learning_rate": 0.0009172781627938579, "loss": 3.6039, "step": 9740 }, { "epoch": 0.6621144177197988, "grad_norm": 0.7047832012176514, "learning_rate": 0.0009172356977850252, "loss": 3.6552, "step": 9745 }, { "epoch": 0.6624541377904607, "grad_norm": 0.9688942432403564, "learning_rate": 0.0009171932327761925, "loss": 3.7936, "step": 9750 }, { "epoch": 0.6627938578611224, "grad_norm": 0.9333823323249817, "learning_rate": 0.0009171507677673597, "loss": 3.8512, "step": 9755 }, { "epoch": 0.6631335779317842, "grad_norm": 6.650848865509033, "learning_rate": 0.0009171083027585269, "loss": 3.5858, "step": 9760 }, { "epoch": 0.663473298002446, "grad_norm": 1.0969469547271729, "learning_rate": 0.0009170658377496943, "loss": 3.3736, "step": 9765 }, { "epoch": 0.6638130180731078, "grad_norm": 0.7564310431480408, "learning_rate": 0.0009170233727408616, "loss": 3.8052, "step": 9770 }, { "epoch": 0.6641527381437695, "grad_norm": 4.969074249267578, "learning_rate": 0.0009169809077320288, "loss": 3.4517, "step": 9775 }, { "epoch": 0.6644924582144313, "grad_norm": 0.6969420909881592, "learning_rate": 0.0009169384427231962, "loss": 3.9266, "step": 9780 }, { "epoch": 0.6648321782850931, "grad_norm": 0.6856746673583984, "learning_rate": 0.0009168959777143634, "loss": 3.882, "step": 9785 }, { "epoch": 0.6651718983557549, "grad_norm": 1.7262550592422485, "learning_rate": 0.0009168535127055306, "loss": 3.6014, "step": 9790 }, { "epoch": 0.6655116184264166, "grad_norm": 0.9507957100868225, "learning_rate": 0.000916811047696698, "loss": 3.4152, "step": 9795 }, { "epoch": 0.6658513384970784, "grad_norm": 0.7823410630226135, "learning_rate": 0.0009167685826878652, "loss": 3.5076, "step": 9800 }, { "epoch": 0.6661910585677402, "grad_norm": 0.7469034790992737, "learning_rate": 0.0009167261176790325, "loss": 3.6165, "step": 9805 }, { "epoch": 0.6665307786384019, "grad_norm": 0.634499192237854, "learning_rate": 0.0009166836526701998, "loss": 3.7085, "step": 9810 }, { "epoch": 0.6668704987090638, "grad_norm": 0.7854308485984802, "learning_rate": 0.0009166411876613671, "loss": 3.8342, "step": 9815 }, { "epoch": 0.6672102187797255, "grad_norm": 3.4686992168426514, "learning_rate": 0.0009165987226525343, "loss": 3.9239, "step": 9820 }, { "epoch": 0.6675499388503873, "grad_norm": 0.7814885377883911, "learning_rate": 0.0009165562576437016, "loss": 3.8134, "step": 9825 }, { "epoch": 0.667889658921049, "grad_norm": 0.7047469019889832, "learning_rate": 0.0009165137926348689, "loss": 3.6561, "step": 9830 }, { "epoch": 0.6682293789917109, "grad_norm": 0.8466688394546509, "learning_rate": 0.0009164713276260361, "loss": 3.6466, "step": 9835 }, { "epoch": 0.6685690990623726, "grad_norm": 0.7483973503112793, "learning_rate": 0.0009164288626172035, "loss": 3.7207, "step": 9840 }, { "epoch": 0.6689088191330343, "grad_norm": 8.970659255981445, "learning_rate": 0.0009163863976083708, "loss": 3.7573, "step": 9845 }, { "epoch": 0.6692485392036962, "grad_norm": 0.7494022846221924, "learning_rate": 0.000916343932599538, "loss": 3.5658, "step": 9850 }, { "epoch": 0.6695882592743579, "grad_norm": 0.968916654586792, "learning_rate": 0.0009163014675907053, "loss": 3.9541, "step": 9855 }, { "epoch": 0.6699279793450197, "grad_norm": 0.6635299921035767, "learning_rate": 0.0009162590025818725, "loss": 3.801, "step": 9860 }, { "epoch": 0.6702676994156814, "grad_norm": 1.0608104467391968, "learning_rate": 0.0009162165375730398, "loss": 3.8306, "step": 9865 }, { "epoch": 0.6706074194863433, "grad_norm": 0.799565851688385, "learning_rate": 0.0009161740725642071, "loss": 3.5243, "step": 9870 }, { "epoch": 0.670947139557005, "grad_norm": 1.1245368719100952, "learning_rate": 0.0009161316075553744, "loss": 4.0492, "step": 9875 }, { "epoch": 0.6712868596276668, "grad_norm": 0.9857480525970459, "learning_rate": 0.0009160891425465417, "loss": 3.784, "step": 9880 }, { "epoch": 0.6716265796983286, "grad_norm": 0.7099499106407166, "learning_rate": 0.000916046677537709, "loss": 3.8156, "step": 9885 }, { "epoch": 0.6719662997689904, "grad_norm": 3.200536012649536, "learning_rate": 0.0009160042125288762, "loss": 3.6041, "step": 9890 }, { "epoch": 0.6723060198396521, "grad_norm": 1.211044430732727, "learning_rate": 0.0009159617475200434, "loss": 3.5227, "step": 9895 }, { "epoch": 0.672645739910314, "grad_norm": 0.7891578078269958, "learning_rate": 0.0009159192825112108, "loss": 3.7963, "step": 9900 }, { "epoch": 0.6729854599809757, "grad_norm": 1.4241113662719727, "learning_rate": 0.000915876817502378, "loss": 3.4813, "step": 9905 }, { "epoch": 0.6733251800516374, "grad_norm": 0.7094100713729858, "learning_rate": 0.0009158343524935453, "loss": 3.7674, "step": 9910 }, { "epoch": 0.6736649001222992, "grad_norm": 0.7429136633872986, "learning_rate": 0.0009157918874847127, "loss": 3.7815, "step": 9915 }, { "epoch": 0.674004620192961, "grad_norm": 0.8643985390663147, "learning_rate": 0.0009157494224758799, "loss": 3.71, "step": 9920 }, { "epoch": 0.6743443402636228, "grad_norm": 0.8561012148857117, "learning_rate": 0.0009157069574670471, "loss": 3.5881, "step": 9925 }, { "epoch": 0.6746840603342845, "grad_norm": 0.5853846669197083, "learning_rate": 0.0009156644924582145, "loss": 3.6732, "step": 9930 }, { "epoch": 0.6750237804049464, "grad_norm": 0.9918604493141174, "learning_rate": 0.0009156220274493817, "loss": 3.7006, "step": 9935 }, { "epoch": 0.6753635004756081, "grad_norm": 0.9428429007530212, "learning_rate": 0.0009155795624405489, "loss": 3.7501, "step": 9940 }, { "epoch": 0.6757032205462699, "grad_norm": 0.7074365615844727, "learning_rate": 0.0009155370974317164, "loss": 3.6843, "step": 9945 }, { "epoch": 0.6760429406169316, "grad_norm": 0.6907420754432678, "learning_rate": 0.0009154946324228836, "loss": 3.8587, "step": 9950 }, { "epoch": 0.6763826606875935, "grad_norm": 0.8623411059379578, "learning_rate": 0.0009154521674140508, "loss": 3.5507, "step": 9955 }, { "epoch": 0.6767223807582552, "grad_norm": 0.7827767729759216, "learning_rate": 0.0009154097024052181, "loss": 3.6811, "step": 9960 }, { "epoch": 0.6770621008289169, "grad_norm": 0.7575980424880981, "learning_rate": 0.0009153672373963854, "loss": 3.7261, "step": 9965 }, { "epoch": 0.6774018208995788, "grad_norm": 1.54831862449646, "learning_rate": 0.0009153247723875526, "loss": 3.586, "step": 9970 }, { "epoch": 0.6777415409702405, "grad_norm": 1.6377944946289062, "learning_rate": 0.00091528230737872, "loss": 3.6452, "step": 9975 }, { "epoch": 0.6780812610409023, "grad_norm": 0.7046222686767578, "learning_rate": 0.0009152398423698873, "loss": 3.6725, "step": 9980 }, { "epoch": 0.6784209811115641, "grad_norm": 1.8921164274215698, "learning_rate": 0.0009151973773610545, "loss": 3.6119, "step": 9985 }, { "epoch": 0.6787607011822259, "grad_norm": 0.6316943168640137, "learning_rate": 0.0009151549123522218, "loss": 3.6952, "step": 9990 }, { "epoch": 0.6791004212528876, "grad_norm": 1.143775224685669, "learning_rate": 0.000915112447343389, "loss": 3.4317, "step": 9995 }, { "epoch": 0.6794401413235494, "grad_norm": 0.7595008015632629, "learning_rate": 0.0009150699823345563, "loss": 3.5677, "step": 10000 }, { "epoch": 0.6797798613942112, "grad_norm": 0.9786950349807739, "learning_rate": 0.0009150275173257236, "loss": 3.886, "step": 10005 }, { "epoch": 0.680119581464873, "grad_norm": 0.9359726309776306, "learning_rate": 0.0009149850523168909, "loss": 3.7313, "step": 10010 }, { "epoch": 0.6804593015355347, "grad_norm": 0.8002122044563293, "learning_rate": 0.0009149425873080582, "loss": 3.6052, "step": 10015 }, { "epoch": 0.6807990216061965, "grad_norm": 1.1193161010742188, "learning_rate": 0.0009149001222992255, "loss": 3.5973, "step": 10020 }, { "epoch": 0.6811387416768583, "grad_norm": 0.7547570466995239, "learning_rate": 0.0009148576572903927, "loss": 3.7883, "step": 10025 }, { "epoch": 0.68147846174752, "grad_norm": 0.8184522986412048, "learning_rate": 0.00091481519228156, "loss": 3.5993, "step": 10030 }, { "epoch": 0.6818181818181818, "grad_norm": 0.7300489544868469, "learning_rate": 0.0009147727272727273, "loss": 3.9503, "step": 10035 }, { "epoch": 0.6821579018888436, "grad_norm": 0.6881535053253174, "learning_rate": 0.0009147302622638945, "loss": 3.6524, "step": 10040 }, { "epoch": 0.6824976219595054, "grad_norm": 0.850186288356781, "learning_rate": 0.0009146877972550618, "loss": 3.5654, "step": 10045 }, { "epoch": 0.6828373420301671, "grad_norm": 1.0091590881347656, "learning_rate": 0.0009146453322462292, "loss": 3.4787, "step": 10050 }, { "epoch": 0.683177062100829, "grad_norm": 0.7153178453445435, "learning_rate": 0.0009146028672373964, "loss": 3.7557, "step": 10055 }, { "epoch": 0.6835167821714907, "grad_norm": 0.7237783670425415, "learning_rate": 0.0009145604022285637, "loss": 3.863, "step": 10060 }, { "epoch": 0.6838565022421524, "grad_norm": 0.7217298746109009, "learning_rate": 0.000914517937219731, "loss": 3.7342, "step": 10065 }, { "epoch": 0.6841962223128143, "grad_norm": 0.6769093871116638, "learning_rate": 0.0009144754722108982, "loss": 3.9304, "step": 10070 }, { "epoch": 0.684535942383476, "grad_norm": 1.0798230171203613, "learning_rate": 0.0009144330072020655, "loss": 3.3839, "step": 10075 }, { "epoch": 0.6848756624541378, "grad_norm": 0.9155729413032532, "learning_rate": 0.0009143905421932329, "loss": 3.6162, "step": 10080 }, { "epoch": 0.6852153825247995, "grad_norm": 0.8103668093681335, "learning_rate": 0.0009143480771844001, "loss": 3.7364, "step": 10085 }, { "epoch": 0.6855551025954614, "grad_norm": 0.8245783448219299, "learning_rate": 0.0009143056121755674, "loss": 3.6082, "step": 10090 }, { "epoch": 0.6858948226661231, "grad_norm": 0.9438191056251526, "learning_rate": 0.0009142631471667346, "loss": 3.6008, "step": 10095 }, { "epoch": 0.6862345427367849, "grad_norm": 1.074062705039978, "learning_rate": 0.0009142206821579019, "loss": 3.7567, "step": 10100 }, { "epoch": 0.6865742628074467, "grad_norm": 0.8823865056037903, "learning_rate": 0.0009141782171490692, "loss": 3.6116, "step": 10105 }, { "epoch": 0.6869139828781085, "grad_norm": 0.7746458649635315, "learning_rate": 0.0009141357521402364, "loss": 3.6769, "step": 10110 }, { "epoch": 0.6872537029487702, "grad_norm": 0.7455337643623352, "learning_rate": 0.0009140932871314038, "loss": 3.821, "step": 10115 }, { "epoch": 0.6875934230194319, "grad_norm": 6.852608680725098, "learning_rate": 0.0009140508221225711, "loss": 3.4532, "step": 10120 }, { "epoch": 0.6879331430900938, "grad_norm": 0.8582409620285034, "learning_rate": 0.0009140083571137383, "loss": 3.8375, "step": 10125 }, { "epoch": 0.6882728631607555, "grad_norm": 0.8052330613136292, "learning_rate": 0.0009139658921049056, "loss": 3.5496, "step": 10130 }, { "epoch": 0.6886125832314173, "grad_norm": 0.5799731612205505, "learning_rate": 0.0009139234270960729, "loss": 3.7185, "step": 10135 }, { "epoch": 0.6889523033020791, "grad_norm": 0.8182925581932068, "learning_rate": 0.0009138809620872401, "loss": 3.5338, "step": 10140 }, { "epoch": 0.6892920233727409, "grad_norm": 0.7465730905532837, "learning_rate": 0.0009138384970784073, "loss": 3.8362, "step": 10145 }, { "epoch": 0.6896317434434026, "grad_norm": 0.8224843144416809, "learning_rate": 0.0009137960320695748, "loss": 3.7746, "step": 10150 }, { "epoch": 0.6899714635140645, "grad_norm": 1.3773318529129028, "learning_rate": 0.000913753567060742, "loss": 3.6719, "step": 10155 }, { "epoch": 0.6903111835847262, "grad_norm": 0.9082929491996765, "learning_rate": 0.0009137111020519092, "loss": 3.6289, "step": 10160 }, { "epoch": 0.690650903655388, "grad_norm": 0.9704210758209229, "learning_rate": 0.0009136686370430766, "loss": 3.5949, "step": 10165 }, { "epoch": 0.6909906237260497, "grad_norm": 0.8680222034454346, "learning_rate": 0.0009136261720342438, "loss": 3.6271, "step": 10170 }, { "epoch": 0.6913303437967115, "grad_norm": 0.7385654449462891, "learning_rate": 0.000913583707025411, "loss": 3.5062, "step": 10175 }, { "epoch": 0.6916700638673733, "grad_norm": 0.9444783926010132, "learning_rate": 0.0009135412420165784, "loss": 3.9319, "step": 10180 }, { "epoch": 0.692009783938035, "grad_norm": 0.7116833329200745, "learning_rate": 0.0009134987770077457, "loss": 3.3171, "step": 10185 }, { "epoch": 0.6923495040086969, "grad_norm": 0.748099148273468, "learning_rate": 0.0009134563119989129, "loss": 3.5612, "step": 10190 }, { "epoch": 0.6926892240793586, "grad_norm": 1.1757534742355347, "learning_rate": 0.0009134138469900802, "loss": 3.7673, "step": 10195 }, { "epoch": 0.6930289441500204, "grad_norm": 0.8531996607780457, "learning_rate": 0.0009133713819812475, "loss": 3.691, "step": 10200 }, { "epoch": 0.6933686642206821, "grad_norm": 1.682693600654602, "learning_rate": 0.0009133289169724147, "loss": 3.4147, "step": 10205 }, { "epoch": 0.693708384291344, "grad_norm": 1.2797712087631226, "learning_rate": 0.000913286451963582, "loss": 3.6056, "step": 10210 }, { "epoch": 0.6940481043620057, "grad_norm": 0.814225971698761, "learning_rate": 0.0009132439869547493, "loss": 3.7924, "step": 10215 }, { "epoch": 0.6943878244326674, "grad_norm": 4.302161693572998, "learning_rate": 0.0009132015219459166, "loss": 3.7744, "step": 10220 }, { "epoch": 0.6947275445033293, "grad_norm": 1.2015944719314575, "learning_rate": 0.0009131590569370839, "loss": 3.7404, "step": 10225 }, { "epoch": 0.695067264573991, "grad_norm": 0.7665482759475708, "learning_rate": 0.0009131165919282512, "loss": 3.7591, "step": 10230 }, { "epoch": 0.6954069846446528, "grad_norm": 0.8855556845664978, "learning_rate": 0.0009130741269194184, "loss": 3.712, "step": 10235 }, { "epoch": 0.6957467047153146, "grad_norm": 1.6043473482131958, "learning_rate": 0.0009130316619105857, "loss": 3.6952, "step": 10240 }, { "epoch": 0.6960864247859764, "grad_norm": 0.7334523797035217, "learning_rate": 0.0009129891969017529, "loss": 3.8594, "step": 10245 }, { "epoch": 0.6964261448566381, "grad_norm": 0.8549174666404724, "learning_rate": 0.0009129467318929202, "loss": 3.6672, "step": 10250 }, { "epoch": 0.6967658649272999, "grad_norm": 1.082798719406128, "learning_rate": 0.0009129042668840876, "loss": 3.5842, "step": 10255 }, { "epoch": 0.6971055849979617, "grad_norm": 1.079723596572876, "learning_rate": 0.0009128618018752548, "loss": 3.7358, "step": 10260 }, { "epoch": 0.6974453050686235, "grad_norm": 0.9810878038406372, "learning_rate": 0.0009128193368664221, "loss": 3.6857, "step": 10265 }, { "epoch": 0.6977850251392852, "grad_norm": 0.8088982105255127, "learning_rate": 0.0009127768718575894, "loss": 3.8377, "step": 10270 }, { "epoch": 0.698124745209947, "grad_norm": 0.7503846883773804, "learning_rate": 0.0009127344068487566, "loss": 3.5365, "step": 10275 }, { "epoch": 0.6984644652806088, "grad_norm": 0.9040347933769226, "learning_rate": 0.0009126919418399238, "loss": 3.6803, "step": 10280 }, { "epoch": 0.6988041853512705, "grad_norm": 0.7197749018669128, "learning_rate": 0.0009126494768310912, "loss": 3.6487, "step": 10285 }, { "epoch": 0.6991439054219323, "grad_norm": 0.8298256397247314, "learning_rate": 0.0009126070118222585, "loss": 3.5874, "step": 10290 }, { "epoch": 0.6994836254925941, "grad_norm": 1.4128953218460083, "learning_rate": 0.0009125645468134257, "loss": 3.3925, "step": 10295 }, { "epoch": 0.6998233455632559, "grad_norm": 0.8453262448310852, "learning_rate": 0.0009125220818045931, "loss": 3.6807, "step": 10300 }, { "epoch": 0.7001630656339176, "grad_norm": 1.091887354850769, "learning_rate": 0.0009124796167957603, "loss": 3.6035, "step": 10305 }, { "epoch": 0.7005027857045795, "grad_norm": 1.373945713043213, "learning_rate": 0.0009124371517869275, "loss": 3.5907, "step": 10310 }, { "epoch": 0.7008425057752412, "grad_norm": 1.0452311038970947, "learning_rate": 0.0009123946867780949, "loss": 3.7165, "step": 10315 }, { "epoch": 0.701182225845903, "grad_norm": 0.75932776927948, "learning_rate": 0.0009123522217692621, "loss": 3.5479, "step": 10320 }, { "epoch": 0.7015219459165648, "grad_norm": 0.6936649680137634, "learning_rate": 0.0009123097567604294, "loss": 3.5975, "step": 10325 }, { "epoch": 0.7018616659872265, "grad_norm": 0.9894376993179321, "learning_rate": 0.0009122672917515968, "loss": 3.6726, "step": 10330 }, { "epoch": 0.7022013860578883, "grad_norm": 0.7210821509361267, "learning_rate": 0.000912224826742764, "loss": 3.6174, "step": 10335 }, { "epoch": 0.70254110612855, "grad_norm": 0.6754486560821533, "learning_rate": 0.0009121823617339312, "loss": 3.5336, "step": 10340 }, { "epoch": 0.7028808261992119, "grad_norm": 0.7861688137054443, "learning_rate": 0.0009121398967250985, "loss": 3.7957, "step": 10345 }, { "epoch": 0.7032205462698736, "grad_norm": 0.6608583927154541, "learning_rate": 0.0009120974317162658, "loss": 3.653, "step": 10350 }, { "epoch": 0.7035602663405354, "grad_norm": 0.7717050909996033, "learning_rate": 0.000912054966707433, "loss": 3.7493, "step": 10355 }, { "epoch": 0.7038999864111972, "grad_norm": 1.1313576698303223, "learning_rate": 0.0009120125016986004, "loss": 3.9364, "step": 10360 }, { "epoch": 0.704239706481859, "grad_norm": 0.8983592987060547, "learning_rate": 0.0009119700366897677, "loss": 3.4091, "step": 10365 }, { "epoch": 0.7045794265525207, "grad_norm": 0.9312572479248047, "learning_rate": 0.0009119275716809349, "loss": 3.7145, "step": 10370 }, { "epoch": 0.7049191466231824, "grad_norm": 3.520451307296753, "learning_rate": 0.0009118851066721022, "loss": 3.6161, "step": 10375 }, { "epoch": 0.7052588666938443, "grad_norm": 0.654140055179596, "learning_rate": 0.0009118426416632694, "loss": 3.8116, "step": 10380 }, { "epoch": 0.705598586764506, "grad_norm": 0.8536202907562256, "learning_rate": 0.0009118001766544367, "loss": 3.7052, "step": 10385 }, { "epoch": 0.7059383068351678, "grad_norm": 0.7853626608848572, "learning_rate": 0.000911757711645604, "loss": 3.7022, "step": 10390 }, { "epoch": 0.7062780269058296, "grad_norm": 0.7837145924568176, "learning_rate": 0.0009117152466367713, "loss": 3.7579, "step": 10395 }, { "epoch": 0.7066177469764914, "grad_norm": 0.6652942299842834, "learning_rate": 0.0009116727816279387, "loss": 3.6918, "step": 10400 }, { "epoch": 0.7069574670471531, "grad_norm": 0.782294750213623, "learning_rate": 0.0009116303166191059, "loss": 3.5485, "step": 10405 }, { "epoch": 0.707297187117815, "grad_norm": 0.6917309761047363, "learning_rate": 0.0009115878516102731, "loss": 3.7118, "step": 10410 }, { "epoch": 0.7076369071884767, "grad_norm": 0.7393103837966919, "learning_rate": 0.0009115453866014405, "loss": 3.7631, "step": 10415 }, { "epoch": 0.7079766272591385, "grad_norm": 1.7117981910705566, "learning_rate": 0.0009115029215926077, "loss": 3.8354, "step": 10420 }, { "epoch": 0.7083163473298002, "grad_norm": 1.0539406538009644, "learning_rate": 0.0009114604565837749, "loss": 3.8607, "step": 10425 }, { "epoch": 0.708656067400462, "grad_norm": 0.8765161633491516, "learning_rate": 0.0009114179915749424, "loss": 3.913, "step": 10430 }, { "epoch": 0.7089957874711238, "grad_norm": 0.679240345954895, "learning_rate": 0.0009113755265661096, "loss": 3.8422, "step": 10435 }, { "epoch": 0.7093355075417855, "grad_norm": 0.7008655667304993, "learning_rate": 0.0009113330615572768, "loss": 3.8569, "step": 10440 }, { "epoch": 0.7096752276124474, "grad_norm": 0.6589189767837524, "learning_rate": 0.0009112905965484441, "loss": 3.8421, "step": 10445 }, { "epoch": 0.7100149476831091, "grad_norm": 0.7388678193092346, "learning_rate": 0.0009112481315396114, "loss": 3.7708, "step": 10450 }, { "epoch": 0.7103546677537709, "grad_norm": 0.7643890380859375, "learning_rate": 0.0009112056665307786, "loss": 3.8267, "step": 10455 }, { "epoch": 0.7106943878244326, "grad_norm": 3.0079329013824463, "learning_rate": 0.0009111632015219459, "loss": 3.6751, "step": 10460 }, { "epoch": 0.7110341078950945, "grad_norm": 1.0114420652389526, "learning_rate": 0.0009111207365131133, "loss": 3.6703, "step": 10465 }, { "epoch": 0.7113738279657562, "grad_norm": 0.6157741546630859, "learning_rate": 0.0009110782715042805, "loss": 3.8127, "step": 10470 }, { "epoch": 0.711713548036418, "grad_norm": 1.0686148405075073, "learning_rate": 0.0009110358064954478, "loss": 3.6019, "step": 10475 }, { "epoch": 0.7120532681070798, "grad_norm": 0.7928467988967896, "learning_rate": 0.000910993341486615, "loss": 3.68, "step": 10480 }, { "epoch": 0.7123929881777415, "grad_norm": 0.9108426570892334, "learning_rate": 0.0009109508764777823, "loss": 3.4585, "step": 10485 }, { "epoch": 0.7127327082484033, "grad_norm": 0.7484140992164612, "learning_rate": 0.0009109084114689496, "loss": 3.7196, "step": 10490 }, { "epoch": 0.7130724283190651, "grad_norm": 1.058139443397522, "learning_rate": 0.0009108659464601168, "loss": 3.6579, "step": 10495 }, { "epoch": 0.7134121483897269, "grad_norm": 0.7690451741218567, "learning_rate": 0.0009108234814512842, "loss": 3.7067, "step": 10500 }, { "epoch": 0.7137518684603886, "grad_norm": 0.8497024178504944, "learning_rate": 0.0009107810164424515, "loss": 3.7172, "step": 10505 }, { "epoch": 0.7140915885310504, "grad_norm": 0.6333871483802795, "learning_rate": 0.0009107385514336187, "loss": 3.9435, "step": 10510 }, { "epoch": 0.7144313086017122, "grad_norm": 0.7330854535102844, "learning_rate": 0.000910696086424786, "loss": 3.5719, "step": 10515 }, { "epoch": 0.714771028672374, "grad_norm": 1.0492603778839111, "learning_rate": 0.0009106536214159533, "loss": 3.6217, "step": 10520 }, { "epoch": 0.7151107487430357, "grad_norm": 1.119070291519165, "learning_rate": 0.0009106111564071205, "loss": 3.6961, "step": 10525 }, { "epoch": 0.7154504688136976, "grad_norm": 0.6877725124359131, "learning_rate": 0.0009105686913982877, "loss": 3.5506, "step": 10530 }, { "epoch": 0.7157901888843593, "grad_norm": 0.6053353548049927, "learning_rate": 0.0009105262263894552, "loss": 3.7111, "step": 10535 }, { "epoch": 0.716129908955021, "grad_norm": 0.8503847122192383, "learning_rate": 0.0009104837613806224, "loss": 3.7108, "step": 10540 }, { "epoch": 0.7164696290256828, "grad_norm": 0.8892068862915039, "learning_rate": 0.0009104412963717896, "loss": 3.5369, "step": 10545 }, { "epoch": 0.7168093490963446, "grad_norm": 0.8155838251113892, "learning_rate": 0.000910398831362957, "loss": 3.7966, "step": 10550 }, { "epoch": 0.7171490691670064, "grad_norm": 1.7614003419876099, "learning_rate": 0.0009103563663541242, "loss": 3.7736, "step": 10555 }, { "epoch": 0.7174887892376681, "grad_norm": 0.640840470790863, "learning_rate": 0.0009103139013452914, "loss": 3.7568, "step": 10560 }, { "epoch": 0.71782850930833, "grad_norm": 0.6072386503219604, "learning_rate": 0.0009102714363364589, "loss": 3.8138, "step": 10565 }, { "epoch": 0.7181682293789917, "grad_norm": 0.8307803273200989, "learning_rate": 0.0009102289713276261, "loss": 3.901, "step": 10570 }, { "epoch": 0.7185079494496535, "grad_norm": 1.1475845575332642, "learning_rate": 0.0009101865063187933, "loss": 3.6658, "step": 10575 }, { "epoch": 0.7188476695203153, "grad_norm": 0.9262077808380127, "learning_rate": 0.0009101440413099607, "loss": 3.7299, "step": 10580 }, { "epoch": 0.719187389590977, "grad_norm": 1.0430647134780884, "learning_rate": 0.0009101015763011279, "loss": 3.4612, "step": 10585 }, { "epoch": 0.7195271096616388, "grad_norm": 0.7741380929946899, "learning_rate": 0.0009100591112922951, "loss": 3.8286, "step": 10590 }, { "epoch": 0.7198668297323005, "grad_norm": 0.9774120450019836, "learning_rate": 0.0009100166462834624, "loss": 3.5167, "step": 10595 }, { "epoch": 0.7202065498029624, "grad_norm": 0.7584857940673828, "learning_rate": 0.0009099741812746298, "loss": 3.5701, "step": 10600 }, { "epoch": 0.7205462698736241, "grad_norm": 0.8413480520248413, "learning_rate": 0.000909931716265797, "loss": 3.8187, "step": 10605 }, { "epoch": 0.7208859899442859, "grad_norm": 0.665310800075531, "learning_rate": 0.0009098892512569643, "loss": 3.7608, "step": 10610 }, { "epoch": 0.7212257100149477, "grad_norm": 0.6017893552780151, "learning_rate": 0.0009098467862481316, "loss": 3.6939, "step": 10615 }, { "epoch": 0.7215654300856095, "grad_norm": 0.7797799110412598, "learning_rate": 0.0009098043212392988, "loss": 3.6637, "step": 10620 }, { "epoch": 0.7219051501562712, "grad_norm": 1.0015398263931274, "learning_rate": 0.0009097618562304661, "loss": 3.878, "step": 10625 }, { "epoch": 0.722244870226933, "grad_norm": 0.9935557246208191, "learning_rate": 0.0009097193912216333, "loss": 3.7569, "step": 10630 }, { "epoch": 0.7225845902975948, "grad_norm": 0.6865654587745667, "learning_rate": 0.0009096769262128007, "loss": 3.6062, "step": 10635 }, { "epoch": 0.7229243103682566, "grad_norm": 3.791905641555786, "learning_rate": 0.000909634461203968, "loss": 3.6453, "step": 10640 }, { "epoch": 0.7232640304389183, "grad_norm": 1.3526341915130615, "learning_rate": 0.0009095919961951352, "loss": 3.7675, "step": 10645 }, { "epoch": 0.7236037505095801, "grad_norm": 1.0163742303848267, "learning_rate": 0.0009095495311863025, "loss": 3.5734, "step": 10650 }, { "epoch": 0.7239434705802419, "grad_norm": 0.8221908807754517, "learning_rate": 0.0009095070661774698, "loss": 3.6041, "step": 10655 }, { "epoch": 0.7242831906509036, "grad_norm": 0.7980278730392456, "learning_rate": 0.000909464601168637, "loss": 3.2872, "step": 10660 }, { "epoch": 0.7246229107215655, "grad_norm": 0.786431610584259, "learning_rate": 0.0009094221361598043, "loss": 3.7339, "step": 10665 }, { "epoch": 0.7249626307922272, "grad_norm": 0.7921804785728455, "learning_rate": 0.0009093796711509717, "loss": 3.6977, "step": 10670 }, { "epoch": 0.725302350862889, "grad_norm": 0.8534802198410034, "learning_rate": 0.0009093372061421389, "loss": 3.778, "step": 10675 }, { "epoch": 0.7256420709335507, "grad_norm": 0.73997563123703, "learning_rate": 0.0009092947411333061, "loss": 3.751, "step": 10680 }, { "epoch": 0.7259817910042126, "grad_norm": 0.9463241100311279, "learning_rate": 0.0009092522761244735, "loss": 3.704, "step": 10685 }, { "epoch": 0.7263215110748743, "grad_norm": 0.7088745832443237, "learning_rate": 0.0009092098111156407, "loss": 3.7575, "step": 10690 }, { "epoch": 0.726661231145536, "grad_norm": 1.01829993724823, "learning_rate": 0.0009091673461068079, "loss": 3.7426, "step": 10695 }, { "epoch": 0.7270009512161979, "grad_norm": 0.8043103218078613, "learning_rate": 0.0009091248810979753, "loss": 3.8787, "step": 10700 }, { "epoch": 0.7273406712868596, "grad_norm": 0.8780677914619446, "learning_rate": 0.0009090824160891426, "loss": 3.7631, "step": 10705 }, { "epoch": 0.7276803913575214, "grad_norm": 0.8562443256378174, "learning_rate": 0.0009090399510803098, "loss": 3.9021, "step": 10710 }, { "epoch": 0.7280201114281831, "grad_norm": 0.6382040977478027, "learning_rate": 0.0009089974860714772, "loss": 3.8719, "step": 10715 }, { "epoch": 0.728359831498845, "grad_norm": 1.00100839138031, "learning_rate": 0.0009089550210626444, "loss": 3.4644, "step": 10720 }, { "epoch": 0.7286995515695067, "grad_norm": 1.4583836793899536, "learning_rate": 0.0009089125560538116, "loss": 3.7087, "step": 10725 }, { "epoch": 0.7290392716401685, "grad_norm": 0.7512675523757935, "learning_rate": 0.000908870091044979, "loss": 3.5266, "step": 10730 }, { "epoch": 0.7293789917108303, "grad_norm": 1.338925838470459, "learning_rate": 0.0009088276260361462, "loss": 3.8108, "step": 10735 }, { "epoch": 0.7297187117814921, "grad_norm": 1.8903957605361938, "learning_rate": 0.0009087851610273136, "loss": 3.8596, "step": 10740 }, { "epoch": 0.7300584318521538, "grad_norm": 0.746347188949585, "learning_rate": 0.0009087426960184808, "loss": 3.6057, "step": 10745 }, { "epoch": 0.7303981519228157, "grad_norm": 0.9693640470504761, "learning_rate": 0.0009087002310096481, "loss": 3.695, "step": 10750 }, { "epoch": 0.7307378719934774, "grad_norm": 0.7466190457344055, "learning_rate": 0.0009086577660008154, "loss": 3.7194, "step": 10755 }, { "epoch": 0.7310775920641391, "grad_norm": 0.7932271957397461, "learning_rate": 0.0009086153009919826, "loss": 3.8626, "step": 10760 }, { "epoch": 0.7314173121348009, "grad_norm": 1.12835693359375, "learning_rate": 0.0009085728359831499, "loss": 3.7529, "step": 10765 }, { "epoch": 0.7317570322054627, "grad_norm": 0.8230791091918945, "learning_rate": 0.0009085303709743172, "loss": 3.5801, "step": 10770 }, { "epoch": 0.7320967522761245, "grad_norm": 1.0379669666290283, "learning_rate": 0.0009084879059654845, "loss": 3.8839, "step": 10775 }, { "epoch": 0.7324364723467862, "grad_norm": 0.6634736061096191, "learning_rate": 0.0009084454409566517, "loss": 3.6795, "step": 10780 }, { "epoch": 0.7327761924174481, "grad_norm": 0.8374850153923035, "learning_rate": 0.0009084029759478191, "loss": 3.7551, "step": 10785 }, { "epoch": 0.7331159124881098, "grad_norm": 0.8252646327018738, "learning_rate": 0.0009083605109389863, "loss": 3.5824, "step": 10790 }, { "epoch": 0.7334556325587716, "grad_norm": 0.8450417518615723, "learning_rate": 0.0009083180459301535, "loss": 3.6611, "step": 10795 }, { "epoch": 0.7337953526294333, "grad_norm": 0.8970220685005188, "learning_rate": 0.0009082755809213209, "loss": 3.6258, "step": 10800 }, { "epoch": 0.7341350727000951, "grad_norm": 0.8585121631622314, "learning_rate": 0.0009082331159124881, "loss": 3.8088, "step": 10805 }, { "epoch": 0.7344747927707569, "grad_norm": 0.7040704488754272, "learning_rate": 0.0009081906509036554, "loss": 3.8184, "step": 10810 }, { "epoch": 0.7348145128414186, "grad_norm": 0.8545712828636169, "learning_rate": 0.0009081481858948228, "loss": 3.7435, "step": 10815 }, { "epoch": 0.7351542329120805, "grad_norm": 0.6817483901977539, "learning_rate": 0.00090810572088599, "loss": 3.7205, "step": 10820 }, { "epoch": 0.7354939529827422, "grad_norm": 0.802337646484375, "learning_rate": 0.0009080632558771572, "loss": 3.7991, "step": 10825 }, { "epoch": 0.735833673053404, "grad_norm": 0.724261999130249, "learning_rate": 0.0009080207908683245, "loss": 3.5809, "step": 10830 }, { "epoch": 0.7361733931240658, "grad_norm": 1.217659592628479, "learning_rate": 0.0009079783258594918, "loss": 3.7884, "step": 10835 }, { "epoch": 0.7365131131947276, "grad_norm": 0.9047442078590393, "learning_rate": 0.000907935860850659, "loss": 3.5781, "step": 10840 }, { "epoch": 0.7368528332653893, "grad_norm": 0.8488001823425293, "learning_rate": 0.0009078933958418264, "loss": 3.5537, "step": 10845 }, { "epoch": 0.737192553336051, "grad_norm": 0.8094634413719177, "learning_rate": 0.0009078509308329937, "loss": 3.7588, "step": 10850 }, { "epoch": 0.7375322734067129, "grad_norm": 1.5975488424301147, "learning_rate": 0.0009078084658241609, "loss": 3.6672, "step": 10855 }, { "epoch": 0.7378719934773746, "grad_norm": 0.7606924772262573, "learning_rate": 0.0009077660008153282, "loss": 3.7268, "step": 10860 }, { "epoch": 0.7382117135480364, "grad_norm": 0.639672040939331, "learning_rate": 0.0009077235358064955, "loss": 3.716, "step": 10865 }, { "epoch": 0.7385514336186982, "grad_norm": 0.7177412509918213, "learning_rate": 0.0009076810707976627, "loss": 3.6089, "step": 10870 }, { "epoch": 0.73889115368936, "grad_norm": 0.8626235127449036, "learning_rate": 0.00090763860578883, "loss": 3.7268, "step": 10875 }, { "epoch": 0.7392308737600217, "grad_norm": 0.84871506690979, "learning_rate": 0.0009075961407799973, "loss": 3.725, "step": 10880 }, { "epoch": 0.7395705938306835, "grad_norm": 0.6467830538749695, "learning_rate": 0.0009075536757711646, "loss": 3.6621, "step": 10885 }, { "epoch": 0.7399103139013453, "grad_norm": 0.8289784789085388, "learning_rate": 0.0009075112107623319, "loss": 3.6479, "step": 10890 }, { "epoch": 0.7402500339720071, "grad_norm": 0.7648937106132507, "learning_rate": 0.0009074687457534991, "loss": 3.6385, "step": 10895 }, { "epoch": 0.7405897540426688, "grad_norm": 0.7217904925346375, "learning_rate": 0.0009074262807446664, "loss": 3.5578, "step": 10900 }, { "epoch": 0.7409294741133307, "grad_norm": 0.7535264492034912, "learning_rate": 0.0009073838157358337, "loss": 3.7525, "step": 10905 }, { "epoch": 0.7412691941839924, "grad_norm": 0.7339619398117065, "learning_rate": 0.0009073413507270009, "loss": 3.6714, "step": 10910 }, { "epoch": 0.7416089142546541, "grad_norm": 0.7947880625724792, "learning_rate": 0.0009072988857181683, "loss": 3.965, "step": 10915 }, { "epoch": 0.741948634325316, "grad_norm": 1.636953592300415, "learning_rate": 0.0009072564207093356, "loss": 3.617, "step": 10920 }, { "epoch": 0.7422883543959777, "grad_norm": 1.1998404264450073, "learning_rate": 0.0009072139557005028, "loss": 3.7413, "step": 10925 }, { "epoch": 0.7426280744666395, "grad_norm": 0.9806882739067078, "learning_rate": 0.00090717149069167, "loss": 3.6735, "step": 10930 }, { "epoch": 0.7429677945373012, "grad_norm": 2.759023666381836, "learning_rate": 0.0009071290256828374, "loss": 3.5842, "step": 10935 }, { "epoch": 0.7433075146079631, "grad_norm": 1.1711844205856323, "learning_rate": 0.0009070865606740046, "loss": 3.6398, "step": 10940 }, { "epoch": 0.7436472346786248, "grad_norm": 0.7088755965232849, "learning_rate": 0.0009070440956651718, "loss": 3.6239, "step": 10945 }, { "epoch": 0.7439869547492866, "grad_norm": 0.663390040397644, "learning_rate": 0.0009070016306563393, "loss": 4.053, "step": 10950 }, { "epoch": 0.7443266748199484, "grad_norm": 0.7811562418937683, "learning_rate": 0.0009069591656475065, "loss": 3.4957, "step": 10955 }, { "epoch": 0.7446663948906102, "grad_norm": 0.6137964129447937, "learning_rate": 0.0009069167006386737, "loss": 3.588, "step": 10960 }, { "epoch": 0.7450061149612719, "grad_norm": 0.7414093613624573, "learning_rate": 0.0009068742356298411, "loss": 3.8597, "step": 10965 }, { "epoch": 0.7453458350319336, "grad_norm": 0.860612690448761, "learning_rate": 0.0009068317706210083, "loss": 3.5892, "step": 10970 }, { "epoch": 0.7456855551025955, "grad_norm": 1.3363772630691528, "learning_rate": 0.0009067893056121755, "loss": 3.7388, "step": 10975 }, { "epoch": 0.7460252751732572, "grad_norm": 1.1836018562316895, "learning_rate": 0.0009067468406033428, "loss": 3.6186, "step": 10980 }, { "epoch": 0.746364995243919, "grad_norm": 0.7892703413963318, "learning_rate": 0.0009067043755945102, "loss": 3.6968, "step": 10985 }, { "epoch": 0.7467047153145808, "grad_norm": 0.7137872576713562, "learning_rate": 0.0009066619105856774, "loss": 3.5484, "step": 10990 }, { "epoch": 0.7470444353852426, "grad_norm": 0.7693912982940674, "learning_rate": 0.0009066194455768447, "loss": 3.4623, "step": 10995 }, { "epoch": 0.7473841554559043, "grad_norm": 0.6835779547691345, "learning_rate": 0.000906576980568012, "loss": 3.472, "step": 11000 }, { "epoch": 0.7477238755265662, "grad_norm": 1.3486907482147217, "learning_rate": 0.0009065345155591792, "loss": 3.5823, "step": 11005 }, { "epoch": 0.7480635955972279, "grad_norm": 1.3197927474975586, "learning_rate": 0.0009064920505503465, "loss": 3.6972, "step": 11010 }, { "epoch": 0.7484033156678896, "grad_norm": 0.7653129696846008, "learning_rate": 0.0009064495855415137, "loss": 3.3573, "step": 11015 }, { "epoch": 0.7487430357385514, "grad_norm": 0.832490086555481, "learning_rate": 0.0009064071205326811, "loss": 3.7542, "step": 11020 }, { "epoch": 0.7490827558092132, "grad_norm": 0.7332544326782227, "learning_rate": 0.0009063646555238484, "loss": 3.6438, "step": 11025 }, { "epoch": 0.749422475879875, "grad_norm": 1.0927499532699585, "learning_rate": 0.0009063221905150156, "loss": 3.6641, "step": 11030 }, { "epoch": 0.7497621959505367, "grad_norm": 0.9052128791809082, "learning_rate": 0.0009062797255061829, "loss": 3.7397, "step": 11035 }, { "epoch": 0.7501019160211986, "grad_norm": 0.7294290065765381, "learning_rate": 0.0009062372604973502, "loss": 3.8031, "step": 11040 }, { "epoch": 0.7504416360918603, "grad_norm": 1.0512290000915527, "learning_rate": 0.0009061947954885174, "loss": 3.6971, "step": 11045 }, { "epoch": 0.7507813561625221, "grad_norm": 3.3992373943328857, "learning_rate": 0.0009061523304796847, "loss": 3.7365, "step": 11050 }, { "epoch": 0.7511210762331838, "grad_norm": 0.9170488119125366, "learning_rate": 0.0009061098654708521, "loss": 3.4703, "step": 11055 }, { "epoch": 0.7514607963038457, "grad_norm": 0.775380551815033, "learning_rate": 0.0009060674004620193, "loss": 3.7283, "step": 11060 }, { "epoch": 0.7518005163745074, "grad_norm": 1.198280930519104, "learning_rate": 0.0009060249354531865, "loss": 3.8769, "step": 11065 }, { "epoch": 0.7521402364451691, "grad_norm": 2.0525169372558594, "learning_rate": 0.0009059824704443539, "loss": 3.5898, "step": 11070 }, { "epoch": 0.752479956515831, "grad_norm": 0.7144297361373901, "learning_rate": 0.0009059400054355211, "loss": 3.7656, "step": 11075 }, { "epoch": 0.7528196765864927, "grad_norm": 0.6905569434165955, "learning_rate": 0.0009058975404266884, "loss": 3.6303, "step": 11080 }, { "epoch": 0.7531593966571545, "grad_norm": 0.9297271966934204, "learning_rate": 0.0009058550754178557, "loss": 3.5039, "step": 11085 }, { "epoch": 0.7534991167278163, "grad_norm": 1.179353952407837, "learning_rate": 0.000905812610409023, "loss": 3.656, "step": 11090 }, { "epoch": 0.7538388367984781, "grad_norm": 0.7236602306365967, "learning_rate": 0.0009057701454001903, "loss": 3.824, "step": 11095 }, { "epoch": 0.7541785568691398, "grad_norm": 0.7393776774406433, "learning_rate": 0.0009057276803913576, "loss": 3.5526, "step": 11100 }, { "epoch": 0.7545182769398016, "grad_norm": 0.6739901304244995, "learning_rate": 0.0009056852153825248, "loss": 3.7601, "step": 11105 }, { "epoch": 0.7548579970104634, "grad_norm": 1.1672066450119019, "learning_rate": 0.0009056427503736921, "loss": 3.5944, "step": 11110 }, { "epoch": 0.7551977170811252, "grad_norm": 0.7919332385063171, "learning_rate": 0.0009056002853648593, "loss": 3.6986, "step": 11115 }, { "epoch": 0.7555374371517869, "grad_norm": 0.8015375137329102, "learning_rate": 0.0009055578203560266, "loss": 3.6726, "step": 11120 }, { "epoch": 0.7558771572224487, "grad_norm": 0.6916100978851318, "learning_rate": 0.000905515355347194, "loss": 3.8511, "step": 11125 }, { "epoch": 0.7562168772931105, "grad_norm": 0.7141310572624207, "learning_rate": 0.0009054728903383612, "loss": 3.7437, "step": 11130 }, { "epoch": 0.7565565973637722, "grad_norm": 0.7443316578865051, "learning_rate": 0.0009054304253295285, "loss": 3.8995, "step": 11135 }, { "epoch": 0.756896317434434, "grad_norm": 0.6934722661972046, "learning_rate": 0.0009053879603206958, "loss": 3.8128, "step": 11140 }, { "epoch": 0.7572360375050958, "grad_norm": 0.9614686369895935, "learning_rate": 0.000905345495311863, "loss": 3.5188, "step": 11145 }, { "epoch": 0.7575757575757576, "grad_norm": 1.4568780660629272, "learning_rate": 0.0009053030303030303, "loss": 3.9663, "step": 11150 }, { "epoch": 0.7579154776464193, "grad_norm": 1.0191596746444702, "learning_rate": 0.0009052605652941977, "loss": 3.7326, "step": 11155 }, { "epoch": 0.7582551977170812, "grad_norm": 0.8418881297111511, "learning_rate": 0.0009052181002853649, "loss": 3.6783, "step": 11160 }, { "epoch": 0.7585949177877429, "grad_norm": 0.9203734993934631, "learning_rate": 0.0009051756352765322, "loss": 3.4804, "step": 11165 }, { "epoch": 0.7589346378584046, "grad_norm": 0.8070054650306702, "learning_rate": 0.0009051331702676995, "loss": 3.5441, "step": 11170 }, { "epoch": 0.7592743579290665, "grad_norm": 0.8074763417243958, "learning_rate": 0.0009050907052588667, "loss": 3.6845, "step": 11175 }, { "epoch": 0.7596140779997282, "grad_norm": 0.9647040367126465, "learning_rate": 0.0009050482402500339, "loss": 3.5161, "step": 11180 }, { "epoch": 0.75995379807039, "grad_norm": 0.7623186111450195, "learning_rate": 0.0009050057752412013, "loss": 3.5158, "step": 11185 }, { "epoch": 0.7602935181410517, "grad_norm": 0.6776531338691711, "learning_rate": 0.0009049633102323686, "loss": 3.7268, "step": 11190 }, { "epoch": 0.7606332382117136, "grad_norm": 0.6575266122817993, "learning_rate": 0.0009049208452235358, "loss": 3.7898, "step": 11195 }, { "epoch": 0.7609729582823753, "grad_norm": 2.6512820720672607, "learning_rate": 0.0009048783802147032, "loss": 3.5087, "step": 11200 }, { "epoch": 0.7613126783530371, "grad_norm": 0.8543487787246704, "learning_rate": 0.0009048359152058704, "loss": 3.6902, "step": 11205 }, { "epoch": 0.7616523984236989, "grad_norm": 3.5970544815063477, "learning_rate": 0.0009047934501970376, "loss": 3.661, "step": 11210 }, { "epoch": 0.7619921184943607, "grad_norm": 0.8790910243988037, "learning_rate": 0.000904750985188205, "loss": 3.5509, "step": 11215 }, { "epoch": 0.7623318385650224, "grad_norm": 0.6596616506576538, "learning_rate": 0.0009047085201793722, "loss": 3.7149, "step": 11220 }, { "epoch": 0.7626715586356841, "grad_norm": 0.8629494905471802, "learning_rate": 0.0009046660551705395, "loss": 3.5913, "step": 11225 }, { "epoch": 0.763011278706346, "grad_norm": 0.679855465888977, "learning_rate": 0.0009046235901617068, "loss": 3.8356, "step": 11230 }, { "epoch": 0.7633509987770077, "grad_norm": 0.6809895038604736, "learning_rate": 0.0009045811251528741, "loss": 3.7419, "step": 11235 }, { "epoch": 0.7636907188476695, "grad_norm": 0.7851095795631409, "learning_rate": 0.0009045386601440413, "loss": 3.7303, "step": 11240 }, { "epoch": 0.7640304389183313, "grad_norm": 0.8468301892280579, "learning_rate": 0.0009044961951352086, "loss": 3.4092, "step": 11245 }, { "epoch": 0.7643701589889931, "grad_norm": 0.7759127616882324, "learning_rate": 0.0009044537301263759, "loss": 3.5728, "step": 11250 }, { "epoch": 0.7647098790596548, "grad_norm": 0.7105703353881836, "learning_rate": 0.0009044112651175431, "loss": 3.8708, "step": 11255 }, { "epoch": 0.7650495991303167, "grad_norm": 0.67023104429245, "learning_rate": 0.0009043688001087105, "loss": 3.775, "step": 11260 }, { "epoch": 0.7653893192009784, "grad_norm": 0.7250822186470032, "learning_rate": 0.0009043263350998778, "loss": 3.6947, "step": 11265 }, { "epoch": 0.7657290392716402, "grad_norm": 0.8358578085899353, "learning_rate": 0.000904283870091045, "loss": 3.7507, "step": 11270 }, { "epoch": 0.7660687593423019, "grad_norm": 0.7541988492012024, "learning_rate": 0.0009042414050822123, "loss": 3.7711, "step": 11275 }, { "epoch": 0.7664084794129638, "grad_norm": 6.97370719909668, "learning_rate": 0.0009041989400733795, "loss": 3.6055, "step": 11280 }, { "epoch": 0.7667481994836255, "grad_norm": 1.0621757507324219, "learning_rate": 0.0009041564750645468, "loss": 3.5172, "step": 11285 }, { "epoch": 0.7670879195542872, "grad_norm": 0.7520888447761536, "learning_rate": 0.0009041140100557141, "loss": 3.6724, "step": 11290 }, { "epoch": 0.7674276396249491, "grad_norm": 0.7989642024040222, "learning_rate": 0.0009040715450468814, "loss": 3.5582, "step": 11295 }, { "epoch": 0.7677673596956108, "grad_norm": 0.5823965668678284, "learning_rate": 0.0009040290800380487, "loss": 3.6233, "step": 11300 }, { "epoch": 0.7681070797662726, "grad_norm": 0.8049631714820862, "learning_rate": 0.000903986615029216, "loss": 3.5856, "step": 11305 }, { "epoch": 0.7684467998369343, "grad_norm": 0.8780303597450256, "learning_rate": 0.0009039441500203832, "loss": 3.6896, "step": 11310 }, { "epoch": 0.7687865199075962, "grad_norm": 0.994926393032074, "learning_rate": 0.0009039016850115504, "loss": 3.5965, "step": 11315 }, { "epoch": 0.7691262399782579, "grad_norm": 0.9362306594848633, "learning_rate": 0.0009038592200027178, "loss": 3.8483, "step": 11320 }, { "epoch": 0.7694659600489197, "grad_norm": 0.8986344933509827, "learning_rate": 0.000903816754993885, "loss": 3.6154, "step": 11325 }, { "epoch": 0.7698056801195815, "grad_norm": 0.9780957698822021, "learning_rate": 0.0009037742899850523, "loss": 3.5373, "step": 11330 }, { "epoch": 0.7701454001902432, "grad_norm": 0.9010120630264282, "learning_rate": 0.0009037318249762197, "loss": 3.5014, "step": 11335 }, { "epoch": 0.770485120260905, "grad_norm": 0.8093063235282898, "learning_rate": 0.0009036893599673869, "loss": 3.4193, "step": 11340 }, { "epoch": 0.7708248403315668, "grad_norm": 0.8574478626251221, "learning_rate": 0.0009036468949585541, "loss": 3.8386, "step": 11345 }, { "epoch": 0.7711645604022286, "grad_norm": 0.7611193060874939, "learning_rate": 0.0009036044299497215, "loss": 3.6519, "step": 11350 }, { "epoch": 0.7715042804728903, "grad_norm": 0.8525367379188538, "learning_rate": 0.0009035619649408887, "loss": 3.5253, "step": 11355 }, { "epoch": 0.7718440005435521, "grad_norm": 0.8548959493637085, "learning_rate": 0.0009035194999320559, "loss": 3.9712, "step": 11360 }, { "epoch": 0.7721837206142139, "grad_norm": 0.6429238319396973, "learning_rate": 0.0009034770349232234, "loss": 3.7011, "step": 11365 }, { "epoch": 0.7725234406848757, "grad_norm": 0.8945921063423157, "learning_rate": 0.0009034345699143906, "loss": 3.8966, "step": 11370 }, { "epoch": 0.7728631607555374, "grad_norm": 1.0628300905227661, "learning_rate": 0.0009033921049055578, "loss": 3.6197, "step": 11375 }, { "epoch": 0.7732028808261993, "grad_norm": 1.274933934211731, "learning_rate": 0.0009033496398967251, "loss": 3.4399, "step": 11380 }, { "epoch": 0.773542600896861, "grad_norm": 1.182814598083496, "learning_rate": 0.0009033071748878924, "loss": 3.7439, "step": 11385 }, { "epoch": 0.7738823209675227, "grad_norm": 1.0913844108581543, "learning_rate": 0.0009032647098790596, "loss": 3.8426, "step": 11390 }, { "epoch": 0.7742220410381845, "grad_norm": 1.3744111061096191, "learning_rate": 0.0009032222448702269, "loss": 3.7056, "step": 11395 }, { "epoch": 0.7745617611088463, "grad_norm": 0.7880125045776367, "learning_rate": 0.0009031797798613943, "loss": 3.6926, "step": 11400 }, { "epoch": 0.7749014811795081, "grad_norm": 0.7857582569122314, "learning_rate": 0.0009031373148525615, "loss": 3.855, "step": 11405 }, { "epoch": 0.7752412012501698, "grad_norm": 0.7638903260231018, "learning_rate": 0.0009030948498437288, "loss": 3.9538, "step": 11410 }, { "epoch": 0.7755809213208317, "grad_norm": 1.0896897315979004, "learning_rate": 0.000903052384834896, "loss": 3.7957, "step": 11415 }, { "epoch": 0.7759206413914934, "grad_norm": 1.5842639207839966, "learning_rate": 0.0009030099198260634, "loss": 3.8637, "step": 11420 }, { "epoch": 0.7762603614621552, "grad_norm": 1.6812632083892822, "learning_rate": 0.0009029674548172306, "loss": 3.3718, "step": 11425 }, { "epoch": 0.776600081532817, "grad_norm": 2.0605804920196533, "learning_rate": 0.0009029249898083978, "loss": 3.4116, "step": 11430 }, { "epoch": 0.7769398016034788, "grad_norm": 0.7474672794342041, "learning_rate": 0.0009028825247995653, "loss": 3.5948, "step": 11435 }, { "epoch": 0.7772795216741405, "grad_norm": 0.9415253400802612, "learning_rate": 0.0009028400597907325, "loss": 3.738, "step": 11440 }, { "epoch": 0.7776192417448022, "grad_norm": 0.946291983127594, "learning_rate": 0.0009027975947818997, "loss": 3.5003, "step": 11445 }, { "epoch": 0.7779589618154641, "grad_norm": 1.1923933029174805, "learning_rate": 0.0009027551297730671, "loss": 3.7299, "step": 11450 }, { "epoch": 0.7782986818861258, "grad_norm": 1.4449928998947144, "learning_rate": 0.0009027126647642343, "loss": 3.8693, "step": 11455 }, { "epoch": 0.7786384019567876, "grad_norm": 0.747748076915741, "learning_rate": 0.0009026701997554015, "loss": 3.4474, "step": 11460 }, { "epoch": 0.7789781220274494, "grad_norm": 0.8678235411643982, "learning_rate": 0.0009026277347465688, "loss": 3.9055, "step": 11465 }, { "epoch": 0.7793178420981112, "grad_norm": 0.8195172548294067, "learning_rate": 0.0009025852697377362, "loss": 3.6576, "step": 11470 }, { "epoch": 0.7796575621687729, "grad_norm": 0.8102189302444458, "learning_rate": 0.0009025428047289034, "loss": 3.7615, "step": 11475 }, { "epoch": 0.7799972822394347, "grad_norm": 0.786630392074585, "learning_rate": 0.0009025003397200707, "loss": 3.7133, "step": 11480 }, { "epoch": 0.7803370023100965, "grad_norm": 0.7049959301948547, "learning_rate": 0.000902457874711238, "loss": 3.594, "step": 11485 }, { "epoch": 0.7806767223807582, "grad_norm": 1.7219754457473755, "learning_rate": 0.0009024154097024052, "loss": 3.6476, "step": 11490 }, { "epoch": 0.78101644245142, "grad_norm": 16.35957908630371, "learning_rate": 0.0009023729446935725, "loss": 3.7201, "step": 11495 }, { "epoch": 0.7813561625220818, "grad_norm": 0.9783884882926941, "learning_rate": 0.0009023304796847398, "loss": 3.2817, "step": 11500 }, { "epoch": 0.7816958825927436, "grad_norm": 0.9637783169746399, "learning_rate": 0.0009022880146759071, "loss": 3.6162, "step": 11505 }, { "epoch": 0.7820356026634053, "grad_norm": 0.6852649450302124, "learning_rate": 0.0009022455496670744, "loss": 3.9328, "step": 11510 }, { "epoch": 0.7823753227340672, "grad_norm": 6.640917778015137, "learning_rate": 0.0009022030846582416, "loss": 3.7044, "step": 11515 }, { "epoch": 0.7827150428047289, "grad_norm": 0.7610793113708496, "learning_rate": 0.0009021606196494089, "loss": 3.7768, "step": 11520 }, { "epoch": 0.7830547628753907, "grad_norm": 0.6956524848937988, "learning_rate": 0.0009021181546405762, "loss": 3.8821, "step": 11525 }, { "epoch": 0.7833944829460524, "grad_norm": 0.7520929574966431, "learning_rate": 0.0009020756896317434, "loss": 3.6772, "step": 11530 }, { "epoch": 0.7837342030167143, "grad_norm": 0.959305465221405, "learning_rate": 0.0009020332246229107, "loss": 3.5052, "step": 11535 }, { "epoch": 0.784073923087376, "grad_norm": 0.6951141953468323, "learning_rate": 0.0009019907596140781, "loss": 3.8615, "step": 11540 }, { "epoch": 0.7844136431580377, "grad_norm": 0.8288753628730774, "learning_rate": 0.0009019482946052453, "loss": 3.6493, "step": 11545 }, { "epoch": 0.7847533632286996, "grad_norm": 0.6916031241416931, "learning_rate": 0.0009019058295964126, "loss": 3.756, "step": 11550 }, { "epoch": 0.7850930832993613, "grad_norm": 5.888177394866943, "learning_rate": 0.0009018633645875799, "loss": 3.6843, "step": 11555 }, { "epoch": 0.7854328033700231, "grad_norm": 0.7222215533256531, "learning_rate": 0.0009018208995787471, "loss": 3.8328, "step": 11560 }, { "epoch": 0.7857725234406848, "grad_norm": 0.7628923654556274, "learning_rate": 0.0009017784345699143, "loss": 3.6886, "step": 11565 }, { "epoch": 0.7861122435113467, "grad_norm": 1.0630370378494263, "learning_rate": 0.0009017359695610817, "loss": 3.447, "step": 11570 }, { "epoch": 0.7864519635820084, "grad_norm": 0.8000380396842957, "learning_rate": 0.000901693504552249, "loss": 3.65, "step": 11575 }, { "epoch": 0.7867916836526702, "grad_norm": 0.8064364194869995, "learning_rate": 0.0009016510395434162, "loss": 4.0026, "step": 11580 }, { "epoch": 0.787131403723332, "grad_norm": 87.47179412841797, "learning_rate": 0.0009016085745345836, "loss": 3.5893, "step": 11585 }, { "epoch": 0.7874711237939938, "grad_norm": 1.0113338232040405, "learning_rate": 0.0009015661095257508, "loss": 3.6089, "step": 11590 }, { "epoch": 0.7878108438646555, "grad_norm": 0.7106664180755615, "learning_rate": 0.000901523644516918, "loss": 3.6157, "step": 11595 }, { "epoch": 0.7881505639353173, "grad_norm": 0.6645505428314209, "learning_rate": 0.0009014811795080854, "loss": 3.5552, "step": 11600 }, { "epoch": 0.7884902840059791, "grad_norm": 3.2448296546936035, "learning_rate": 0.0009014387144992526, "loss": 3.8646, "step": 11605 }, { "epoch": 0.7888300040766408, "grad_norm": 0.7806961536407471, "learning_rate": 0.0009013962494904199, "loss": 3.8959, "step": 11610 }, { "epoch": 0.7891697241473026, "grad_norm": 0.8047600984573364, "learning_rate": 0.0009013537844815872, "loss": 3.4695, "step": 11615 }, { "epoch": 0.7895094442179644, "grad_norm": 1.112596869468689, "learning_rate": 0.0009013113194727545, "loss": 3.6292, "step": 11620 }, { "epoch": 0.7898491642886262, "grad_norm": 0.7855541110038757, "learning_rate": 0.0009012688544639217, "loss": 3.543, "step": 11625 }, { "epoch": 0.7901888843592879, "grad_norm": 0.7885885238647461, "learning_rate": 0.000901226389455089, "loss": 3.6944, "step": 11630 }, { "epoch": 0.7905286044299498, "grad_norm": 0.8197888731956482, "learning_rate": 0.0009011839244462563, "loss": 3.677, "step": 11635 }, { "epoch": 0.7908683245006115, "grad_norm": 0.8078897595405579, "learning_rate": 0.0009011414594374235, "loss": 3.7927, "step": 11640 }, { "epoch": 0.7912080445712733, "grad_norm": 1.9330220222473145, "learning_rate": 0.0009010989944285909, "loss": 3.4802, "step": 11645 }, { "epoch": 0.791547764641935, "grad_norm": 0.8217200636863708, "learning_rate": 0.0009010565294197582, "loss": 3.7468, "step": 11650 }, { "epoch": 0.7918874847125968, "grad_norm": 1.0662391185760498, "learning_rate": 0.0009010140644109254, "loss": 3.7532, "step": 11655 }, { "epoch": 0.7922272047832586, "grad_norm": 1.107478141784668, "learning_rate": 0.0009009715994020927, "loss": 3.7487, "step": 11660 }, { "epoch": 0.7925669248539203, "grad_norm": 0.8694434762001038, "learning_rate": 0.0009009291343932599, "loss": 3.6482, "step": 11665 }, { "epoch": 0.7929066449245822, "grad_norm": 1.516220211982727, "learning_rate": 0.0009008866693844272, "loss": 3.6547, "step": 11670 }, { "epoch": 0.7932463649952439, "grad_norm": 0.8359872102737427, "learning_rate": 0.0009008442043755946, "loss": 3.6229, "step": 11675 }, { "epoch": 0.7935860850659057, "grad_norm": 0.7333314418792725, "learning_rate": 0.0009008017393667618, "loss": 3.7603, "step": 11680 }, { "epoch": 0.7939258051365675, "grad_norm": 0.7693328857421875, "learning_rate": 0.0009007592743579291, "loss": 3.9751, "step": 11685 }, { "epoch": 0.7942655252072293, "grad_norm": 0.7747409343719482, "learning_rate": 0.0009007168093490964, "loss": 3.4737, "step": 11690 }, { "epoch": 0.794605245277891, "grad_norm": 1.2370717525482178, "learning_rate": 0.0009006743443402636, "loss": 3.7312, "step": 11695 }, { "epoch": 0.7949449653485527, "grad_norm": 1.145755410194397, "learning_rate": 0.0009006318793314308, "loss": 3.7021, "step": 11700 }, { "epoch": 0.7952846854192146, "grad_norm": 1.216779351234436, "learning_rate": 0.0009005894143225982, "loss": 3.4789, "step": 11705 }, { "epoch": 0.7956244054898763, "grad_norm": 0.7128127217292786, "learning_rate": 0.0009005469493137655, "loss": 3.4535, "step": 11710 }, { "epoch": 0.7959641255605381, "grad_norm": 0.6719933152198792, "learning_rate": 0.0009005044843049327, "loss": 3.7562, "step": 11715 }, { "epoch": 0.7963038456311999, "grad_norm": 1.7956007719039917, "learning_rate": 0.0009004620192961001, "loss": 3.7448, "step": 11720 }, { "epoch": 0.7966435657018617, "grad_norm": 0.9318267107009888, "learning_rate": 0.0009004195542872673, "loss": 3.6851, "step": 11725 }, { "epoch": 0.7969832857725234, "grad_norm": 1.8655145168304443, "learning_rate": 0.0009003770892784345, "loss": 3.7769, "step": 11730 }, { "epoch": 0.7973230058431852, "grad_norm": 0.8474512696266174, "learning_rate": 0.0009003346242696019, "loss": 3.6509, "step": 11735 }, { "epoch": 0.797662725913847, "grad_norm": 1.0019559860229492, "learning_rate": 0.0009002921592607691, "loss": 3.5633, "step": 11740 }, { "epoch": 0.7980024459845088, "grad_norm": 0.877302348613739, "learning_rate": 0.0009002496942519364, "loss": 3.6611, "step": 11745 }, { "epoch": 0.7983421660551705, "grad_norm": 1.024214267730713, "learning_rate": 0.0009002072292431038, "loss": 3.8209, "step": 11750 }, { "epoch": 0.7986818861258324, "grad_norm": 2.550053119659424, "learning_rate": 0.000900164764234271, "loss": 3.3611, "step": 11755 }, { "epoch": 0.7990216061964941, "grad_norm": 1.1882482767105103, "learning_rate": 0.0009001222992254383, "loss": 3.6797, "step": 11760 }, { "epoch": 0.7993613262671558, "grad_norm": 0.9128539562225342, "learning_rate": 0.0009000798342166055, "loss": 3.7904, "step": 11765 }, { "epoch": 0.7997010463378177, "grad_norm": 0.6798457503318787, "learning_rate": 0.0009000373692077728, "loss": 3.7553, "step": 11770 }, { "epoch": 0.8000407664084794, "grad_norm": 0.9786900281906128, "learning_rate": 0.0008999949041989401, "loss": 3.5874, "step": 11775 }, { "epoch": 0.8003804864791412, "grad_norm": 0.6784381866455078, "learning_rate": 0.0008999524391901074, "loss": 3.8471, "step": 11780 }, { "epoch": 0.8007202065498029, "grad_norm": 3.1511030197143555, "learning_rate": 0.0008999099741812747, "loss": 3.872, "step": 11785 }, { "epoch": 0.8010599266204648, "grad_norm": 0.6527305245399475, "learning_rate": 0.000899867509172442, "loss": 3.3656, "step": 11790 }, { "epoch": 0.8013996466911265, "grad_norm": 0.8578232526779175, "learning_rate": 0.0008998250441636092, "loss": 3.7483, "step": 11795 }, { "epoch": 0.8017393667617883, "grad_norm": 0.8288311958312988, "learning_rate": 0.0008997825791547764, "loss": 3.8449, "step": 11800 }, { "epoch": 0.8020790868324501, "grad_norm": 1.7888271808624268, "learning_rate": 0.0008997401141459438, "loss": 3.9227, "step": 11805 }, { "epoch": 0.8024188069031118, "grad_norm": 1.4842759370803833, "learning_rate": 0.000899697649137111, "loss": 3.4431, "step": 11810 }, { "epoch": 0.8027585269737736, "grad_norm": 1.0334523916244507, "learning_rate": 0.0008996551841282783, "loss": 3.5816, "step": 11815 }, { "epoch": 0.8030982470444353, "grad_norm": 0.8577939867973328, "learning_rate": 0.0008996127191194457, "loss": 3.7209, "step": 11820 }, { "epoch": 0.8034379671150972, "grad_norm": 0.868172824382782, "learning_rate": 0.0008995702541106129, "loss": 3.7975, "step": 11825 }, { "epoch": 0.8037776871857589, "grad_norm": 0.9573361873626709, "learning_rate": 0.0008995277891017801, "loss": 3.7591, "step": 11830 }, { "epoch": 0.8041174072564207, "grad_norm": 0.7482950091362, "learning_rate": 0.0008994853240929475, "loss": 3.7724, "step": 11835 }, { "epoch": 0.8044571273270825, "grad_norm": 5.594187259674072, "learning_rate": 0.0008994428590841147, "loss": 3.7639, "step": 11840 }, { "epoch": 0.8047968473977443, "grad_norm": 1.4929182529449463, "learning_rate": 0.0008994003940752819, "loss": 3.5914, "step": 11845 }, { "epoch": 0.805136567468406, "grad_norm": 1.2356071472167969, "learning_rate": 0.0008993579290664494, "loss": 3.7893, "step": 11850 }, { "epoch": 0.8054762875390679, "grad_norm": 1.3104801177978516, "learning_rate": 0.0008993154640576166, "loss": 3.6528, "step": 11855 }, { "epoch": 0.8058160076097296, "grad_norm": 1.1337311267852783, "learning_rate": 0.0008992729990487838, "loss": 3.5129, "step": 11860 }, { "epoch": 0.8061557276803913, "grad_norm": 0.8900738954544067, "learning_rate": 0.0008992305340399511, "loss": 3.6081, "step": 11865 }, { "epoch": 0.8064954477510531, "grad_norm": 0.7715288996696472, "learning_rate": 0.0008991880690311184, "loss": 3.7922, "step": 11870 }, { "epoch": 0.8068351678217149, "grad_norm": 0.7087821960449219, "learning_rate": 0.0008991456040222856, "loss": 3.6447, "step": 11875 }, { "epoch": 0.8071748878923767, "grad_norm": 0.856303870677948, "learning_rate": 0.0008991031390134529, "loss": 3.2649, "step": 11880 }, { "epoch": 0.8075146079630384, "grad_norm": 0.7621307969093323, "learning_rate": 0.0008990606740046203, "loss": 3.7453, "step": 11885 }, { "epoch": 0.8078543280337003, "grad_norm": 0.7358371615409851, "learning_rate": 0.0008990182089957875, "loss": 3.3933, "step": 11890 }, { "epoch": 0.808194048104362, "grad_norm": 1.152427077293396, "learning_rate": 0.0008989757439869548, "loss": 3.6491, "step": 11895 }, { "epoch": 0.8085337681750238, "grad_norm": 1.6236579418182373, "learning_rate": 0.000898933278978122, "loss": 3.505, "step": 11900 }, { "epoch": 0.8088734882456855, "grad_norm": 0.7944048047065735, "learning_rate": 0.0008988908139692893, "loss": 3.5157, "step": 11905 }, { "epoch": 0.8092132083163474, "grad_norm": 2.7386722564697266, "learning_rate": 0.0008988483489604566, "loss": 3.849, "step": 11910 }, { "epoch": 0.8095529283870091, "grad_norm": 0.6543929576873779, "learning_rate": 0.0008988058839516238, "loss": 3.7691, "step": 11915 }, { "epoch": 0.8098926484576708, "grad_norm": 0.7426961064338684, "learning_rate": 0.0008987634189427912, "loss": 3.4518, "step": 11920 }, { "epoch": 0.8102323685283327, "grad_norm": 0.680490255355835, "learning_rate": 0.0008987209539339585, "loss": 3.7327, "step": 11925 }, { "epoch": 0.8105720885989944, "grad_norm": 0.9860913753509521, "learning_rate": 0.0008986784889251257, "loss": 3.7608, "step": 11930 }, { "epoch": 0.8109118086696562, "grad_norm": 0.8084064722061157, "learning_rate": 0.000898636023916293, "loss": 3.7959, "step": 11935 }, { "epoch": 0.811251528740318, "grad_norm": 1.1407679319381714, "learning_rate": 0.0008985935589074603, "loss": 3.5039, "step": 11940 }, { "epoch": 0.8115912488109798, "grad_norm": 0.7349080443382263, "learning_rate": 0.0008985510938986275, "loss": 3.4194, "step": 11945 }, { "epoch": 0.8119309688816415, "grad_norm": 0.8958530426025391, "learning_rate": 0.0008985086288897947, "loss": 3.6837, "step": 11950 }, { "epoch": 0.8122706889523033, "grad_norm": 2.5459187030792236, "learning_rate": 0.0008984661638809622, "loss": 3.609, "step": 11955 }, { "epoch": 0.8126104090229651, "grad_norm": 0.810468852519989, "learning_rate": 0.0008984236988721294, "loss": 3.6765, "step": 11960 }, { "epoch": 0.8129501290936268, "grad_norm": 0.7730026245117188, "learning_rate": 0.0008983812338632966, "loss": 3.577, "step": 11965 }, { "epoch": 0.8132898491642886, "grad_norm": 1.1334856748580933, "learning_rate": 0.000898338768854464, "loss": 3.6766, "step": 11970 }, { "epoch": 0.8136295692349504, "grad_norm": 1.0055420398712158, "learning_rate": 0.0008982963038456312, "loss": 3.8258, "step": 11975 }, { "epoch": 0.8139692893056122, "grad_norm": 1.1260411739349365, "learning_rate": 0.0008982538388367984, "loss": 3.5624, "step": 11980 }, { "epoch": 0.8143090093762739, "grad_norm": 0.8112538456916809, "learning_rate": 0.0008982113738279658, "loss": 3.577, "step": 11985 }, { "epoch": 0.8146487294469357, "grad_norm": 0.8501037359237671, "learning_rate": 0.0008981689088191331, "loss": 3.8233, "step": 11990 }, { "epoch": 0.8149884495175975, "grad_norm": 1.2345054149627686, "learning_rate": 0.0008981264438103003, "loss": 3.4861, "step": 11995 }, { "epoch": 0.8153281695882593, "grad_norm": 0.8775907158851624, "learning_rate": 0.0008980839788014677, "loss": 3.5838, "step": 12000 }, { "epoch": 0.815667889658921, "grad_norm": 1.1107614040374756, "learning_rate": 0.0008980415137926349, "loss": 3.7822, "step": 12005 }, { "epoch": 0.8160076097295829, "grad_norm": 1.473344087600708, "learning_rate": 0.0008979990487838021, "loss": 3.6956, "step": 12010 }, { "epoch": 0.8163473298002446, "grad_norm": 0.7700946927070618, "learning_rate": 0.0008979565837749694, "loss": 3.8535, "step": 12015 }, { "epoch": 0.8166870498709063, "grad_norm": 0.8405547142028809, "learning_rate": 0.0008979141187661367, "loss": 3.7368, "step": 12020 }, { "epoch": 0.8170267699415682, "grad_norm": 0.7274438738822937, "learning_rate": 0.000897871653757304, "loss": 3.2918, "step": 12025 }, { "epoch": 0.8173664900122299, "grad_norm": 0.9372562170028687, "learning_rate": 0.0008978291887484713, "loss": 3.6491, "step": 12030 }, { "epoch": 0.8177062100828917, "grad_norm": 1.0108988285064697, "learning_rate": 0.0008977867237396386, "loss": 3.5966, "step": 12035 }, { "epoch": 0.8180459301535534, "grad_norm": 0.8893322348594666, "learning_rate": 0.0008977442587308058, "loss": 3.5553, "step": 12040 }, { "epoch": 0.8183856502242153, "grad_norm": 1.1882805824279785, "learning_rate": 0.0008977017937219731, "loss": 3.9158, "step": 12045 }, { "epoch": 0.818725370294877, "grad_norm": 0.6214559674263, "learning_rate": 0.0008976593287131403, "loss": 3.7214, "step": 12050 }, { "epoch": 0.8190650903655388, "grad_norm": 0.9441993236541748, "learning_rate": 0.0008976168637043076, "loss": 3.6625, "step": 12055 }, { "epoch": 0.8194048104362006, "grad_norm": 0.7592012882232666, "learning_rate": 0.000897574398695475, "loss": 3.5658, "step": 12060 }, { "epoch": 0.8197445305068624, "grad_norm": 0.8846549391746521, "learning_rate": 0.0008975319336866422, "loss": 3.8133, "step": 12065 }, { "epoch": 0.8200842505775241, "grad_norm": 1.0380690097808838, "learning_rate": 0.0008974894686778095, "loss": 3.6635, "step": 12070 }, { "epoch": 0.8204239706481858, "grad_norm": 0.7360149621963501, "learning_rate": 0.0008974470036689768, "loss": 3.7635, "step": 12075 }, { "epoch": 0.8207636907188477, "grad_norm": 1.2694586515426636, "learning_rate": 0.000897404538660144, "loss": 3.4643, "step": 12080 }, { "epoch": 0.8211034107895094, "grad_norm": 0.7986928224563599, "learning_rate": 0.0008973620736513113, "loss": 3.6993, "step": 12085 }, { "epoch": 0.8214431308601712, "grad_norm": 0.942507266998291, "learning_rate": 0.0008973196086424786, "loss": 3.857, "step": 12090 }, { "epoch": 0.821782850930833, "grad_norm": 0.8908464312553406, "learning_rate": 0.0008972771436336459, "loss": 3.8681, "step": 12095 }, { "epoch": 0.8221225710014948, "grad_norm": 0.8867223262786865, "learning_rate": 0.0008972346786248133, "loss": 3.7046, "step": 12100 }, { "epoch": 0.8224622910721565, "grad_norm": 0.8820211291313171, "learning_rate": 0.0008971922136159805, "loss": 3.5966, "step": 12105 }, { "epoch": 0.8228020111428184, "grad_norm": 1.1007713079452515, "learning_rate": 0.0008971497486071477, "loss": 3.6767, "step": 12110 }, { "epoch": 0.8231417312134801, "grad_norm": 0.8534795045852661, "learning_rate": 0.000897107283598315, "loss": 3.7248, "step": 12115 }, { "epoch": 0.8234814512841419, "grad_norm": 0.9195171594619751, "learning_rate": 0.0008970648185894823, "loss": 3.6925, "step": 12120 }, { "epoch": 0.8238211713548036, "grad_norm": 0.982778787612915, "learning_rate": 0.0008970223535806495, "loss": 3.6736, "step": 12125 }, { "epoch": 0.8241608914254654, "grad_norm": 0.7273781299591064, "learning_rate": 0.0008969798885718169, "loss": 3.9274, "step": 12130 }, { "epoch": 0.8245006114961272, "grad_norm": 0.9642855525016785, "learning_rate": 0.0008969374235629842, "loss": 3.5285, "step": 12135 }, { "epoch": 0.8248403315667889, "grad_norm": 1.291280746459961, "learning_rate": 0.0008968949585541514, "loss": 3.7009, "step": 12140 }, { "epoch": 0.8251800516374508, "grad_norm": 0.7058663964271545, "learning_rate": 0.0008968524935453187, "loss": 3.7475, "step": 12145 }, { "epoch": 0.8255197717081125, "grad_norm": 1.9695004224777222, "learning_rate": 0.000896810028536486, "loss": 3.6172, "step": 12150 }, { "epoch": 0.8258594917787743, "grad_norm": 0.8333840370178223, "learning_rate": 0.0008967675635276532, "loss": 3.7805, "step": 12155 }, { "epoch": 0.826199211849436, "grad_norm": 0.8567728996276855, "learning_rate": 0.0008967250985188205, "loss": 3.6374, "step": 12160 }, { "epoch": 0.8265389319200979, "grad_norm": 1.1291769742965698, "learning_rate": 0.0008966826335099878, "loss": 3.8096, "step": 12165 }, { "epoch": 0.8268786519907596, "grad_norm": 0.9778679609298706, "learning_rate": 0.0008966401685011551, "loss": 3.6984, "step": 12170 }, { "epoch": 0.8272183720614213, "grad_norm": 1.1576662063598633, "learning_rate": 0.0008965977034923224, "loss": 3.5487, "step": 12175 }, { "epoch": 0.8275580921320832, "grad_norm": 0.6185218095779419, "learning_rate": 0.0008965552384834896, "loss": 3.718, "step": 12180 }, { "epoch": 0.8278978122027449, "grad_norm": 0.6700850129127502, "learning_rate": 0.0008965127734746569, "loss": 3.6459, "step": 12185 }, { "epoch": 0.8282375322734067, "grad_norm": 0.8876090049743652, "learning_rate": 0.0008964703084658242, "loss": 3.8491, "step": 12190 }, { "epoch": 0.8285772523440685, "grad_norm": 0.6527369618415833, "learning_rate": 0.0008964278434569914, "loss": 3.8357, "step": 12195 }, { "epoch": 0.8289169724147303, "grad_norm": 0.7413597106933594, "learning_rate": 0.0008963853784481587, "loss": 3.9019, "step": 12200 }, { "epoch": 0.829256692485392, "grad_norm": 0.8062065243721008, "learning_rate": 0.0008963429134393261, "loss": 3.924, "step": 12205 }, { "epoch": 0.8295964125560538, "grad_norm": 0.9075581431388855, "learning_rate": 0.0008963004484304933, "loss": 3.6124, "step": 12210 }, { "epoch": 0.8299361326267156, "grad_norm": 0.9779383540153503, "learning_rate": 0.0008962579834216605, "loss": 3.6829, "step": 12215 }, { "epoch": 0.8302758526973774, "grad_norm": 0.755110502243042, "learning_rate": 0.0008962155184128279, "loss": 3.5705, "step": 12220 }, { "epoch": 0.8306155727680391, "grad_norm": 0.8037834167480469, "learning_rate": 0.0008961730534039951, "loss": 3.785, "step": 12225 }, { "epoch": 0.830955292838701, "grad_norm": 0.9082496762275696, "learning_rate": 0.0008961305883951623, "loss": 3.5452, "step": 12230 }, { "epoch": 0.8312950129093627, "grad_norm": 1.0774654150009155, "learning_rate": 0.0008960881233863298, "loss": 3.1426, "step": 12235 }, { "epoch": 0.8316347329800244, "grad_norm": 0.8939933180809021, "learning_rate": 0.000896045658377497, "loss": 3.5016, "step": 12240 }, { "epoch": 0.8319744530506862, "grad_norm": 0.7898679375648499, "learning_rate": 0.0008960031933686642, "loss": 3.3459, "step": 12245 }, { "epoch": 0.832314173121348, "grad_norm": 0.6527073383331299, "learning_rate": 0.0008959607283598315, "loss": 3.6323, "step": 12250 }, { "epoch": 0.8326538931920098, "grad_norm": 0.8865939378738403, "learning_rate": 0.0008959182633509988, "loss": 3.478, "step": 12255 }, { "epoch": 0.8329936132626715, "grad_norm": 1.1525355577468872, "learning_rate": 0.000895875798342166, "loss": 3.5154, "step": 12260 }, { "epoch": 0.8333333333333334, "grad_norm": 1.543592095375061, "learning_rate": 0.0008958333333333334, "loss": 3.7784, "step": 12265 }, { "epoch": 0.8336730534039951, "grad_norm": 1.6606383323669434, "learning_rate": 0.0008957908683245007, "loss": 3.2701, "step": 12270 }, { "epoch": 0.8340127734746569, "grad_norm": 1.9703108072280884, "learning_rate": 0.0008957484033156679, "loss": 3.8146, "step": 12275 }, { "epoch": 0.8343524935453187, "grad_norm": 0.8228875994682312, "learning_rate": 0.0008957059383068352, "loss": 3.6945, "step": 12280 }, { "epoch": 0.8346922136159804, "grad_norm": 0.792127251625061, "learning_rate": 0.0008956634732980025, "loss": 3.5868, "step": 12285 }, { "epoch": 0.8350319336866422, "grad_norm": 0.9458941221237183, "learning_rate": 0.0008956210082891697, "loss": 3.6609, "step": 12290 }, { "epoch": 0.8353716537573039, "grad_norm": 2.0094172954559326, "learning_rate": 0.000895578543280337, "loss": 3.5978, "step": 12295 }, { "epoch": 0.8357113738279658, "grad_norm": 0.9320061802864075, "learning_rate": 0.0008955360782715043, "loss": 3.7183, "step": 12300 }, { "epoch": 0.8360510938986275, "grad_norm": 2.128854990005493, "learning_rate": 0.0008954936132626716, "loss": 3.4003, "step": 12305 }, { "epoch": 0.8363908139692893, "grad_norm": 0.9589837789535522, "learning_rate": 0.0008954511482538389, "loss": 3.8215, "step": 12310 }, { "epoch": 0.8367305340399511, "grad_norm": 1.0171260833740234, "learning_rate": 0.0008954086832450061, "loss": 3.5688, "step": 12315 }, { "epoch": 0.8370702541106129, "grad_norm": 0.746832013130188, "learning_rate": 0.0008953662182361734, "loss": 3.7359, "step": 12320 }, { "epoch": 0.8374099741812746, "grad_norm": 0.8556201457977295, "learning_rate": 0.0008953237532273407, "loss": 3.8403, "step": 12325 }, { "epoch": 0.8377496942519363, "grad_norm": 0.7217704653739929, "learning_rate": 0.0008952812882185079, "loss": 3.2962, "step": 12330 }, { "epoch": 0.8380894143225982, "grad_norm": 1.1214584112167358, "learning_rate": 0.0008952388232096753, "loss": 3.8938, "step": 12335 }, { "epoch": 0.83842913439326, "grad_norm": 0.7692179679870605, "learning_rate": 0.0008951963582008426, "loss": 3.7128, "step": 12340 }, { "epoch": 0.8387688544639217, "grad_norm": 0.995206356048584, "learning_rate": 0.0008951538931920098, "loss": 3.679, "step": 12345 }, { "epoch": 0.8391085745345835, "grad_norm": 1.7244571447372437, "learning_rate": 0.000895111428183177, "loss": 3.6521, "step": 12350 }, { "epoch": 0.8394482946052453, "grad_norm": 0.7407826781272888, "learning_rate": 0.0008950689631743444, "loss": 3.6666, "step": 12355 }, { "epoch": 0.839788014675907, "grad_norm": 0.8281946778297424, "learning_rate": 0.0008950264981655116, "loss": 3.6433, "step": 12360 }, { "epoch": 0.8401277347465689, "grad_norm": 0.610300600528717, "learning_rate": 0.0008949840331566788, "loss": 3.9378, "step": 12365 }, { "epoch": 0.8404674548172306, "grad_norm": 1.0494595766067505, "learning_rate": 0.0008949415681478463, "loss": 3.7256, "step": 12370 }, { "epoch": 0.8408071748878924, "grad_norm": 1.2667574882507324, "learning_rate": 0.0008948991031390135, "loss": 3.6659, "step": 12375 }, { "epoch": 0.8411468949585541, "grad_norm": 1.021226167678833, "learning_rate": 0.0008948566381301807, "loss": 3.652, "step": 12380 }, { "epoch": 0.841486615029216, "grad_norm": 1.876592993736267, "learning_rate": 0.0008948141731213481, "loss": 3.822, "step": 12385 }, { "epoch": 0.8418263350998777, "grad_norm": 0.748383104801178, "learning_rate": 0.0008947717081125153, "loss": 3.5108, "step": 12390 }, { "epoch": 0.8421660551705394, "grad_norm": 0.8421717286109924, "learning_rate": 0.0008947292431036825, "loss": 3.5724, "step": 12395 }, { "epoch": 0.8425057752412013, "grad_norm": 0.9351264834403992, "learning_rate": 0.0008946867780948498, "loss": 3.7664, "step": 12400 }, { "epoch": 0.842845495311863, "grad_norm": 0.7131645083427429, "learning_rate": 0.0008946443130860172, "loss": 3.666, "step": 12405 }, { "epoch": 0.8431852153825248, "grad_norm": 0.8910898566246033, "learning_rate": 0.0008946018480771844, "loss": 3.8812, "step": 12410 }, { "epoch": 0.8435249354531865, "grad_norm": 0.7854522466659546, "learning_rate": 0.0008945593830683517, "loss": 3.7911, "step": 12415 }, { "epoch": 0.8438646555238484, "grad_norm": 0.7281268239021301, "learning_rate": 0.000894516918059519, "loss": 3.7536, "step": 12420 }, { "epoch": 0.8442043755945101, "grad_norm": 0.871080756187439, "learning_rate": 0.0008944744530506862, "loss": 3.8731, "step": 12425 }, { "epoch": 0.8445440956651719, "grad_norm": 0.6535822153091431, "learning_rate": 0.0008944319880418535, "loss": 3.6304, "step": 12430 }, { "epoch": 0.8448838157358337, "grad_norm": 0.8274858593940735, "learning_rate": 0.0008943895230330207, "loss": 3.8228, "step": 12435 }, { "epoch": 0.8452235358064955, "grad_norm": 1.337404489517212, "learning_rate": 0.0008943470580241882, "loss": 3.6757, "step": 12440 }, { "epoch": 0.8455632558771572, "grad_norm": 0.7477543354034424, "learning_rate": 0.0008943045930153554, "loss": 3.5688, "step": 12445 }, { "epoch": 0.845902975947819, "grad_norm": 2.138296365737915, "learning_rate": 0.0008942621280065226, "loss": 3.6284, "step": 12450 }, { "epoch": 0.8462426960184808, "grad_norm": 1.3792190551757812, "learning_rate": 0.00089421966299769, "loss": 3.9597, "step": 12455 }, { "epoch": 0.8465824160891425, "grad_norm": 0.9175506830215454, "learning_rate": 0.0008941771979888572, "loss": 3.6483, "step": 12460 }, { "epoch": 0.8469221361598043, "grad_norm": 0.6348444819450378, "learning_rate": 0.0008941347329800244, "loss": 3.7185, "step": 12465 }, { "epoch": 0.8472618562304661, "grad_norm": 0.8670823574066162, "learning_rate": 0.0008940922679711918, "loss": 3.4806, "step": 12470 }, { "epoch": 0.8476015763011279, "grad_norm": 1.2782633304595947, "learning_rate": 0.0008940498029623591, "loss": 3.2384, "step": 12475 }, { "epoch": 0.8479412963717896, "grad_norm": 1.0383013486862183, "learning_rate": 0.0008940073379535263, "loss": 3.5404, "step": 12480 }, { "epoch": 0.8482810164424515, "grad_norm": 0.9223533868789673, "learning_rate": 0.0008939648729446937, "loss": 3.6777, "step": 12485 }, { "epoch": 0.8486207365131132, "grad_norm": 0.7466956973075867, "learning_rate": 0.0008939224079358609, "loss": 3.7102, "step": 12490 }, { "epoch": 0.848960456583775, "grad_norm": 0.7598857283592224, "learning_rate": 0.0008938799429270281, "loss": 3.7285, "step": 12495 }, { "epoch": 0.8493001766544367, "grad_norm": 1.495668649673462, "learning_rate": 0.0008938374779181954, "loss": 3.7081, "step": 12500 }, { "epoch": 0.8496398967250985, "grad_norm": 0.8449199795722961, "learning_rate": 0.0008937950129093627, "loss": 3.6521, "step": 12505 }, { "epoch": 0.8499796167957603, "grad_norm": 0.8988255858421326, "learning_rate": 0.00089375254790053, "loss": 3.4967, "step": 12510 }, { "epoch": 0.850319336866422, "grad_norm": 0.6855485439300537, "learning_rate": 0.0008937100828916973, "loss": 3.5231, "step": 12515 }, { "epoch": 0.8506590569370839, "grad_norm": 0.7748841047286987, "learning_rate": 0.0008936676178828646, "loss": 3.7082, "step": 12520 }, { "epoch": 0.8509987770077456, "grad_norm": 0.9974066615104675, "learning_rate": 0.0008936251528740318, "loss": 3.7248, "step": 12525 }, { "epoch": 0.8513384970784074, "grad_norm": 0.833838164806366, "learning_rate": 0.0008935826878651991, "loss": 3.8281, "step": 12530 }, { "epoch": 0.8516782171490692, "grad_norm": 1.0375392436981201, "learning_rate": 0.0008935402228563663, "loss": 3.951, "step": 12535 }, { "epoch": 0.852017937219731, "grad_norm": 0.8951463103294373, "learning_rate": 0.0008934977578475336, "loss": 3.7844, "step": 12540 }, { "epoch": 0.8523576572903927, "grad_norm": 0.7702258825302124, "learning_rate": 0.000893455292838701, "loss": 3.7887, "step": 12545 }, { "epoch": 0.8526973773610544, "grad_norm": 1.7558034658432007, "learning_rate": 0.0008934128278298682, "loss": 3.7398, "step": 12550 }, { "epoch": 0.8530370974317163, "grad_norm": 0.9577825665473938, "learning_rate": 0.0008933703628210355, "loss": 3.739, "step": 12555 }, { "epoch": 0.853376817502378, "grad_norm": 1.3343416452407837, "learning_rate": 0.0008933278978122028, "loss": 3.4998, "step": 12560 }, { "epoch": 0.8537165375730398, "grad_norm": 0.7011745572090149, "learning_rate": 0.00089328543280337, "loss": 3.8674, "step": 12565 }, { "epoch": 0.8540562576437016, "grad_norm": 0.9189016222953796, "learning_rate": 0.0008932429677945373, "loss": 3.5648, "step": 12570 }, { "epoch": 0.8543959777143634, "grad_norm": 0.8981935381889343, "learning_rate": 0.0008932005027857046, "loss": 3.7673, "step": 12575 }, { "epoch": 0.8547356977850251, "grad_norm": 1.0040943622589111, "learning_rate": 0.0008931580377768719, "loss": 3.616, "step": 12580 }, { "epoch": 0.8550754178556869, "grad_norm": 0.7973901629447937, "learning_rate": 0.0008931155727680392, "loss": 3.4455, "step": 12585 }, { "epoch": 0.8554151379263487, "grad_norm": 0.8325662612915039, "learning_rate": 0.0008930731077592065, "loss": 3.7143, "step": 12590 }, { "epoch": 0.8557548579970105, "grad_norm": 0.7751691341400146, "learning_rate": 0.0008930306427503737, "loss": 3.7809, "step": 12595 }, { "epoch": 0.8560945780676722, "grad_norm": 7.4706196784973145, "learning_rate": 0.0008929881777415409, "loss": 3.7586, "step": 12600 }, { "epoch": 0.856434298138334, "grad_norm": 0.9639095664024353, "learning_rate": 0.0008929457127327083, "loss": 3.4086, "step": 12605 }, { "epoch": 0.8567740182089958, "grad_norm": 0.6753028035163879, "learning_rate": 0.0008929032477238755, "loss": 3.6616, "step": 12610 }, { "epoch": 0.8571137382796575, "grad_norm": 0.7249093055725098, "learning_rate": 0.0008928607827150428, "loss": 3.5685, "step": 12615 }, { "epoch": 0.8574534583503194, "grad_norm": 1.7473300695419312, "learning_rate": 0.0008928183177062102, "loss": 3.9909, "step": 12620 }, { "epoch": 0.8577931784209811, "grad_norm": 0.9212319850921631, "learning_rate": 0.0008927758526973774, "loss": 3.9006, "step": 12625 }, { "epoch": 0.8581328984916429, "grad_norm": 1.0572686195373535, "learning_rate": 0.0008927333876885446, "loss": 3.8801, "step": 12630 }, { "epoch": 0.8584726185623046, "grad_norm": 1.8485252857208252, "learning_rate": 0.000892690922679712, "loss": 3.4499, "step": 12635 }, { "epoch": 0.8588123386329665, "grad_norm": 0.7671639323234558, "learning_rate": 0.0008926484576708792, "loss": 3.8462, "step": 12640 }, { "epoch": 0.8591520587036282, "grad_norm": 1.237829327583313, "learning_rate": 0.0008926059926620464, "loss": 3.767, "step": 12645 }, { "epoch": 0.85949177877429, "grad_norm": 0.8066554069519043, "learning_rate": 0.0008925635276532138, "loss": 3.71, "step": 12650 }, { "epoch": 0.8598314988449518, "grad_norm": 0.7689037919044495, "learning_rate": 0.0008925210626443811, "loss": 3.5923, "step": 12655 }, { "epoch": 0.8601712189156135, "grad_norm": 0.874286949634552, "learning_rate": 0.0008924785976355483, "loss": 3.5279, "step": 12660 }, { "epoch": 0.8605109389862753, "grad_norm": 0.8563452363014221, "learning_rate": 0.0008924361326267156, "loss": 3.6292, "step": 12665 }, { "epoch": 0.860850659056937, "grad_norm": 1.5231919288635254, "learning_rate": 0.0008923936676178829, "loss": 3.6664, "step": 12670 }, { "epoch": 0.8611903791275989, "grad_norm": 0.6971249580383301, "learning_rate": 0.0008923512026090501, "loss": 3.7749, "step": 12675 }, { "epoch": 0.8615300991982606, "grad_norm": 1.3179380893707275, "learning_rate": 0.0008923087376002174, "loss": 3.6636, "step": 12680 }, { "epoch": 0.8618698192689224, "grad_norm": 0.7259864211082458, "learning_rate": 0.0008922662725913848, "loss": 3.7117, "step": 12685 }, { "epoch": 0.8622095393395842, "grad_norm": 1.1027296781539917, "learning_rate": 0.000892223807582552, "loss": 3.4609, "step": 12690 }, { "epoch": 0.862549259410246, "grad_norm": 0.7830087542533875, "learning_rate": 0.0008921813425737193, "loss": 3.7428, "step": 12695 }, { "epoch": 0.8628889794809077, "grad_norm": 0.9170404076576233, "learning_rate": 0.0008921388775648865, "loss": 3.7073, "step": 12700 }, { "epoch": 0.8632286995515696, "grad_norm": 0.7713662385940552, "learning_rate": 0.0008920964125560538, "loss": 3.3853, "step": 12705 }, { "epoch": 0.8635684196222313, "grad_norm": 0.8968492150306702, "learning_rate": 0.0008920539475472211, "loss": 3.4537, "step": 12710 }, { "epoch": 0.863908139692893, "grad_norm": 1.0002405643463135, "learning_rate": 0.0008920114825383883, "loss": 3.7209, "step": 12715 }, { "epoch": 0.8642478597635548, "grad_norm": 1.170670747756958, "learning_rate": 0.0008919690175295557, "loss": 3.7614, "step": 12720 }, { "epoch": 0.8645875798342166, "grad_norm": 0.9491438865661621, "learning_rate": 0.000891926552520723, "loss": 3.8023, "step": 12725 }, { "epoch": 0.8649272999048784, "grad_norm": 0.7927387952804565, "learning_rate": 0.0008918840875118902, "loss": 3.6425, "step": 12730 }, { "epoch": 0.8652670199755401, "grad_norm": 0.9267032742500305, "learning_rate": 0.0008918416225030574, "loss": 3.9812, "step": 12735 }, { "epoch": 0.865606740046202, "grad_norm": 0.9640617966651917, "learning_rate": 0.0008917991574942248, "loss": 3.742, "step": 12740 }, { "epoch": 0.8659464601168637, "grad_norm": 0.8019275069236755, "learning_rate": 0.000891756692485392, "loss": 3.7103, "step": 12745 }, { "epoch": 0.8662861801875255, "grad_norm": 0.6883156895637512, "learning_rate": 0.0008917142274765592, "loss": 3.8125, "step": 12750 }, { "epoch": 0.8666259002581872, "grad_norm": 0.7106084227561951, "learning_rate": 0.0008916717624677267, "loss": 3.5724, "step": 12755 }, { "epoch": 0.866965620328849, "grad_norm": 0.7619748115539551, "learning_rate": 0.0008916292974588939, "loss": 3.6146, "step": 12760 }, { "epoch": 0.8673053403995108, "grad_norm": 0.7439718246459961, "learning_rate": 0.0008915868324500611, "loss": 3.7144, "step": 12765 }, { "epoch": 0.8676450604701725, "grad_norm": 0.7859118580818176, "learning_rate": 0.0008915443674412285, "loss": 3.7746, "step": 12770 }, { "epoch": 0.8679847805408344, "grad_norm": 1.412725567817688, "learning_rate": 0.0008915019024323957, "loss": 3.8148, "step": 12775 }, { "epoch": 0.8683245006114961, "grad_norm": 0.7228496670722961, "learning_rate": 0.000891459437423563, "loss": 3.6268, "step": 12780 }, { "epoch": 0.8686642206821579, "grad_norm": 0.8363052606582642, "learning_rate": 0.0008914169724147302, "loss": 3.8245, "step": 12785 }, { "epoch": 0.8690039407528197, "grad_norm": 1.4017343521118164, "learning_rate": 0.0008913745074058976, "loss": 3.592, "step": 12790 }, { "epoch": 0.8693436608234815, "grad_norm": 0.6179370880126953, "learning_rate": 0.0008913320423970649, "loss": 3.724, "step": 12795 }, { "epoch": 0.8696833808941432, "grad_norm": 0.8103336691856384, "learning_rate": 0.0008912895773882321, "loss": 3.8599, "step": 12800 }, { "epoch": 0.870023100964805, "grad_norm": 0.7341117858886719, "learning_rate": 0.0008912471123793994, "loss": 3.8265, "step": 12805 }, { "epoch": 0.8703628210354668, "grad_norm": 0.8728798627853394, "learning_rate": 0.0008912046473705667, "loss": 3.9017, "step": 12810 }, { "epoch": 0.8707025411061285, "grad_norm": 1.0656594038009644, "learning_rate": 0.0008911621823617339, "loss": 3.6751, "step": 12815 }, { "epoch": 0.8710422611767903, "grad_norm": 0.7849941253662109, "learning_rate": 0.0008911197173529012, "loss": 3.7614, "step": 12820 }, { "epoch": 0.8713819812474521, "grad_norm": 1.0461782217025757, "learning_rate": 0.0008910772523440686, "loss": 3.8796, "step": 12825 }, { "epoch": 0.8717217013181139, "grad_norm": 0.7844672799110413, "learning_rate": 0.0008910347873352358, "loss": 3.6107, "step": 12830 }, { "epoch": 0.8720614213887756, "grad_norm": 0.733753502368927, "learning_rate": 0.000890992322326403, "loss": 3.4929, "step": 12835 }, { "epoch": 0.8724011414594374, "grad_norm": 1.1124409437179565, "learning_rate": 0.0008909498573175704, "loss": 3.6283, "step": 12840 }, { "epoch": 0.8727408615300992, "grad_norm": 0.8028335571289062, "learning_rate": 0.0008909073923087376, "loss": 3.5783, "step": 12845 }, { "epoch": 0.873080581600761, "grad_norm": 0.8884498476982117, "learning_rate": 0.0008908649272999048, "loss": 3.9649, "step": 12850 }, { "epoch": 0.8734203016714227, "grad_norm": 0.9446478486061096, "learning_rate": 0.0008908224622910723, "loss": 3.7274, "step": 12855 }, { "epoch": 0.8737600217420846, "grad_norm": 0.7945959568023682, "learning_rate": 0.0008907799972822395, "loss": 3.6012, "step": 12860 }, { "epoch": 0.8740997418127463, "grad_norm": 0.7741202712059021, "learning_rate": 0.0008907375322734067, "loss": 3.5847, "step": 12865 }, { "epoch": 0.874439461883408, "grad_norm": 1.3905640840530396, "learning_rate": 0.0008906950672645741, "loss": 3.4079, "step": 12870 }, { "epoch": 0.8747791819540699, "grad_norm": 0.8681594133377075, "learning_rate": 0.0008906526022557413, "loss": 3.7226, "step": 12875 }, { "epoch": 0.8751189020247316, "grad_norm": 0.8703209757804871, "learning_rate": 0.0008906101372469085, "loss": 3.4136, "step": 12880 }, { "epoch": 0.8754586220953934, "grad_norm": 2.2236297130584717, "learning_rate": 0.0008905676722380758, "loss": 3.6976, "step": 12885 }, { "epoch": 0.8757983421660551, "grad_norm": 0.7414122819900513, "learning_rate": 0.0008905252072292432, "loss": 3.5893, "step": 12890 }, { "epoch": 0.876138062236717, "grad_norm": 0.8235514163970947, "learning_rate": 0.0008904827422204104, "loss": 3.7593, "step": 12895 }, { "epoch": 0.8764777823073787, "grad_norm": 1.1876682043075562, "learning_rate": 0.0008904402772115777, "loss": 3.8826, "step": 12900 }, { "epoch": 0.8768175023780405, "grad_norm": 0.9803397059440613, "learning_rate": 0.000890397812202745, "loss": 3.6454, "step": 12905 }, { "epoch": 0.8771572224487023, "grad_norm": 1.0319056510925293, "learning_rate": 0.0008903553471939122, "loss": 3.658, "step": 12910 }, { "epoch": 0.877496942519364, "grad_norm": 1.392046570777893, "learning_rate": 0.0008903128821850795, "loss": 3.3554, "step": 12915 }, { "epoch": 0.8778366625900258, "grad_norm": 1.0085598230361938, "learning_rate": 0.0008902704171762468, "loss": 3.6224, "step": 12920 }, { "epoch": 0.8781763826606876, "grad_norm": 1.091357946395874, "learning_rate": 0.0008902279521674141, "loss": 3.4549, "step": 12925 }, { "epoch": 0.8785161027313494, "grad_norm": 0.9164418578147888, "learning_rate": 0.0008901854871585814, "loss": 3.6929, "step": 12930 }, { "epoch": 0.8788558228020111, "grad_norm": 0.8552413582801819, "learning_rate": 0.0008901430221497486, "loss": 3.662, "step": 12935 }, { "epoch": 0.8791955428726729, "grad_norm": 1.0654689073562622, "learning_rate": 0.0008901005571409159, "loss": 3.6454, "step": 12940 }, { "epoch": 0.8795352629433347, "grad_norm": 0.6858550906181335, "learning_rate": 0.0008900580921320832, "loss": 3.6554, "step": 12945 }, { "epoch": 0.8798749830139965, "grad_norm": 0.8437381386756897, "learning_rate": 0.0008900156271232504, "loss": 3.4375, "step": 12950 }, { "epoch": 0.8802147030846582, "grad_norm": 0.8676014542579651, "learning_rate": 0.0008899731621144177, "loss": 3.734, "step": 12955 }, { "epoch": 0.8805544231553201, "grad_norm": 0.9134470224380493, "learning_rate": 0.0008899306971055851, "loss": 3.7288, "step": 12960 }, { "epoch": 0.8808941432259818, "grad_norm": 0.8111168146133423, "learning_rate": 0.0008898882320967523, "loss": 3.7357, "step": 12965 }, { "epoch": 0.8812338632966435, "grad_norm": 3.776362657546997, "learning_rate": 0.0008898457670879196, "loss": 3.7278, "step": 12970 }, { "epoch": 0.8815735833673053, "grad_norm": 6.689911365509033, "learning_rate": 0.0008898033020790869, "loss": 3.6838, "step": 12975 }, { "epoch": 0.8819133034379671, "grad_norm": 0.7403917908668518, "learning_rate": 0.0008897608370702541, "loss": 3.7995, "step": 12980 }, { "epoch": 0.8822530235086289, "grad_norm": 0.8998690247535706, "learning_rate": 0.0008897183720614213, "loss": 3.5744, "step": 12985 }, { "epoch": 0.8825927435792906, "grad_norm": 1.1085846424102783, "learning_rate": 0.0008896759070525887, "loss": 3.9121, "step": 12990 }, { "epoch": 0.8829324636499525, "grad_norm": 0.7872055172920227, "learning_rate": 0.000889633442043756, "loss": 3.689, "step": 12995 }, { "epoch": 0.8832721837206142, "grad_norm": 0.7448084354400635, "learning_rate": 0.0008895909770349232, "loss": 3.7417, "step": 13000 }, { "epoch": 0.883611903791276, "grad_norm": 1.121294379234314, "learning_rate": 0.0008895485120260906, "loss": 3.7674, "step": 13005 }, { "epoch": 0.8839516238619378, "grad_norm": 0.6590057611465454, "learning_rate": 0.0008895060470172578, "loss": 3.4937, "step": 13010 }, { "epoch": 0.8842913439325996, "grad_norm": 0.7215134501457214, "learning_rate": 0.000889463582008425, "loss": 3.6161, "step": 13015 }, { "epoch": 0.8846310640032613, "grad_norm": 0.6728622317314148, "learning_rate": 0.0008894211169995924, "loss": 3.5708, "step": 13020 }, { "epoch": 0.884970784073923, "grad_norm": 0.889244556427002, "learning_rate": 0.0008893786519907596, "loss": 3.5425, "step": 13025 }, { "epoch": 0.8853105041445849, "grad_norm": 0.9045710563659668, "learning_rate": 0.0008893361869819269, "loss": 3.6133, "step": 13030 }, { "epoch": 0.8856502242152466, "grad_norm": 0.86453777551651, "learning_rate": 0.0008892937219730942, "loss": 3.7012, "step": 13035 }, { "epoch": 0.8859899442859084, "grad_norm": 0.9362204670906067, "learning_rate": 0.0008892512569642615, "loss": 3.5917, "step": 13040 }, { "epoch": 0.8863296643565702, "grad_norm": 0.7073429226875305, "learning_rate": 0.0008892087919554287, "loss": 3.6386, "step": 13045 }, { "epoch": 0.886669384427232, "grad_norm": 0.7611358165740967, "learning_rate": 0.000889166326946596, "loss": 3.769, "step": 13050 }, { "epoch": 0.8870091044978937, "grad_norm": 0.8660561442375183, "learning_rate": 0.0008891238619377633, "loss": 3.6306, "step": 13055 }, { "epoch": 0.8873488245685555, "grad_norm": 1.0548138618469238, "learning_rate": 0.0008890813969289305, "loss": 3.5828, "step": 13060 }, { "epoch": 0.8876885446392173, "grad_norm": 0.7869116067886353, "learning_rate": 0.0008890389319200979, "loss": 3.7106, "step": 13065 }, { "epoch": 0.8880282647098791, "grad_norm": 0.929925799369812, "learning_rate": 0.0008889964669112652, "loss": 3.5634, "step": 13070 }, { "epoch": 0.8883679847805408, "grad_norm": 0.8506751656532288, "learning_rate": 0.0008889540019024324, "loss": 3.7833, "step": 13075 }, { "epoch": 0.8887077048512027, "grad_norm": 0.6812782287597656, "learning_rate": 0.0008889115368935997, "loss": 3.6973, "step": 13080 }, { "epoch": 0.8890474249218644, "grad_norm": 0.8563469052314758, "learning_rate": 0.0008888690718847669, "loss": 3.6322, "step": 13085 }, { "epoch": 0.8893871449925261, "grad_norm": 5.279268741607666, "learning_rate": 0.0008888266068759342, "loss": 3.7121, "step": 13090 }, { "epoch": 0.889726865063188, "grad_norm": 0.8281797170639038, "learning_rate": 0.0008887841418671015, "loss": 3.6366, "step": 13095 }, { "epoch": 0.8900665851338497, "grad_norm": 0.7403798699378967, "learning_rate": 0.0008887416768582688, "loss": 3.6392, "step": 13100 }, { "epoch": 0.8904063052045115, "grad_norm": 0.790982723236084, "learning_rate": 0.0008886992118494361, "loss": 3.629, "step": 13105 }, { "epoch": 0.8907460252751732, "grad_norm": 1.0384727716445923, "learning_rate": 0.0008886567468406034, "loss": 3.8037, "step": 13110 }, { "epoch": 0.8910857453458351, "grad_norm": 0.7752336263656616, "learning_rate": 0.0008886142818317706, "loss": 3.6234, "step": 13115 }, { "epoch": 0.8914254654164968, "grad_norm": 0.7246715426445007, "learning_rate": 0.000888571816822938, "loss": 3.8231, "step": 13120 }, { "epoch": 0.8917651854871586, "grad_norm": 1.1059414148330688, "learning_rate": 0.0008885293518141052, "loss": 3.4965, "step": 13125 }, { "epoch": 0.8921049055578204, "grad_norm": 0.9054039716720581, "learning_rate": 0.0008884868868052724, "loss": 3.454, "step": 13130 }, { "epoch": 0.8924446256284821, "grad_norm": 1.0211822986602783, "learning_rate": 0.0008884444217964399, "loss": 3.5745, "step": 13135 }, { "epoch": 0.8927843456991439, "grad_norm": 0.6604037284851074, "learning_rate": 0.0008884019567876071, "loss": 3.7203, "step": 13140 }, { "epoch": 0.8931240657698056, "grad_norm": 1.4752808809280396, "learning_rate": 0.0008883594917787743, "loss": 3.823, "step": 13145 }, { "epoch": 0.8934637858404675, "grad_norm": 0.8650283813476562, "learning_rate": 0.0008883170267699416, "loss": 3.643, "step": 13150 }, { "epoch": 0.8938035059111292, "grad_norm": 0.7194262146949768, "learning_rate": 0.0008882745617611089, "loss": 3.8213, "step": 13155 }, { "epoch": 0.894143225981791, "grad_norm": 0.849518358707428, "learning_rate": 0.0008882320967522761, "loss": 3.4095, "step": 13160 }, { "epoch": 0.8944829460524528, "grad_norm": 1.9330400228500366, "learning_rate": 0.0008881896317434434, "loss": 3.4546, "step": 13165 }, { "epoch": 0.8948226661231146, "grad_norm": 1.4759125709533691, "learning_rate": 0.0008881471667346108, "loss": 3.4915, "step": 13170 }, { "epoch": 0.8951623861937763, "grad_norm": 0.5968790650367737, "learning_rate": 0.000888104701725778, "loss": 3.7624, "step": 13175 }, { "epoch": 0.8955021062644382, "grad_norm": 0.9586802124977112, "learning_rate": 0.0008880622367169453, "loss": 3.6566, "step": 13180 }, { "epoch": 0.8958418263350999, "grad_norm": 0.8077282309532166, "learning_rate": 0.0008880197717081125, "loss": 3.8535, "step": 13185 }, { "epoch": 0.8961815464057616, "grad_norm": 2.1277451515197754, "learning_rate": 0.0008879773066992798, "loss": 3.6182, "step": 13190 }, { "epoch": 0.8965212664764234, "grad_norm": 0.901930570602417, "learning_rate": 0.0008879348416904471, "loss": 3.758, "step": 13195 }, { "epoch": 0.8968609865470852, "grad_norm": 0.7633906602859497, "learning_rate": 0.0008878923766816143, "loss": 3.6279, "step": 13200 }, { "epoch": 0.897200706617747, "grad_norm": 0.8146492838859558, "learning_rate": 0.0008878499116727817, "loss": 3.5136, "step": 13205 }, { "epoch": 0.8975404266884087, "grad_norm": 0.702854573726654, "learning_rate": 0.000887807446663949, "loss": 3.5898, "step": 13210 }, { "epoch": 0.8978801467590706, "grad_norm": 0.9555582404136658, "learning_rate": 0.0008877649816551162, "loss": 3.6541, "step": 13215 }, { "epoch": 0.8982198668297323, "grad_norm": 0.8376206159591675, "learning_rate": 0.0008877225166462834, "loss": 3.7081, "step": 13220 }, { "epoch": 0.8985595869003941, "grad_norm": 0.8409804105758667, "learning_rate": 0.0008876800516374508, "loss": 3.8191, "step": 13225 }, { "epoch": 0.8988993069710558, "grad_norm": 0.9086755514144897, "learning_rate": 0.000887637586628618, "loss": 3.8514, "step": 13230 }, { "epoch": 0.8992390270417177, "grad_norm": 1.650781512260437, "learning_rate": 0.0008875951216197852, "loss": 3.5137, "step": 13235 }, { "epoch": 0.8995787471123794, "grad_norm": 0.742462158203125, "learning_rate": 0.0008875526566109527, "loss": 3.8492, "step": 13240 }, { "epoch": 0.8999184671830411, "grad_norm": 0.767926037311554, "learning_rate": 0.0008875101916021199, "loss": 3.4037, "step": 13245 }, { "epoch": 0.900258187253703, "grad_norm": 0.8409079313278198, "learning_rate": 0.0008874677265932871, "loss": 3.6168, "step": 13250 }, { "epoch": 0.9005979073243647, "grad_norm": 0.8704046010971069, "learning_rate": 0.0008874252615844545, "loss": 3.6999, "step": 13255 }, { "epoch": 0.9009376273950265, "grad_norm": 0.7593732476234436, "learning_rate": 0.0008873827965756217, "loss": 3.7242, "step": 13260 }, { "epoch": 0.9012773474656883, "grad_norm": 0.7422963380813599, "learning_rate": 0.0008873403315667889, "loss": 3.6065, "step": 13265 }, { "epoch": 0.9016170675363501, "grad_norm": 1.0189372301101685, "learning_rate": 0.0008872978665579562, "loss": 3.6786, "step": 13270 }, { "epoch": 0.9019567876070118, "grad_norm": 0.9683796167373657, "learning_rate": 0.0008872554015491236, "loss": 3.8129, "step": 13275 }, { "epoch": 0.9022965076776736, "grad_norm": 1.0309350490570068, "learning_rate": 0.0008872129365402908, "loss": 3.884, "step": 13280 }, { "epoch": 0.9026362277483354, "grad_norm": 0.7175368070602417, "learning_rate": 0.0008871704715314581, "loss": 3.6692, "step": 13285 }, { "epoch": 0.9029759478189971, "grad_norm": 1.0159885883331299, "learning_rate": 0.0008871280065226254, "loss": 3.7348, "step": 13290 }, { "epoch": 0.9033156678896589, "grad_norm": 1.0885182619094849, "learning_rate": 0.0008870855415137926, "loss": 3.8768, "step": 13295 }, { "epoch": 0.9036553879603207, "grad_norm": 0.7318781614303589, "learning_rate": 0.0008870430765049599, "loss": 3.772, "step": 13300 }, { "epoch": 0.9039951080309825, "grad_norm": 3.8705921173095703, "learning_rate": 0.0008870006114961272, "loss": 3.4644, "step": 13305 }, { "epoch": 0.9043348281016442, "grad_norm": 0.6893762946128845, "learning_rate": 0.0008869581464872945, "loss": 3.8524, "step": 13310 }, { "epoch": 0.904674548172306, "grad_norm": 0.6884678602218628, "learning_rate": 0.0008869156814784618, "loss": 3.9995, "step": 13315 }, { "epoch": 0.9050142682429678, "grad_norm": 1.6105153560638428, "learning_rate": 0.000886873216469629, "loss": 3.4808, "step": 13320 }, { "epoch": 0.9053539883136296, "grad_norm": 0.7443768978118896, "learning_rate": 0.0008868307514607963, "loss": 3.5492, "step": 13325 }, { "epoch": 0.9056937083842913, "grad_norm": 0.8776019811630249, "learning_rate": 0.0008867882864519636, "loss": 3.5964, "step": 13330 }, { "epoch": 0.9060334284549532, "grad_norm": 0.9091726541519165, "learning_rate": 0.0008867458214431308, "loss": 3.79, "step": 13335 }, { "epoch": 0.9063731485256149, "grad_norm": 1.0782090425491333, "learning_rate": 0.0008867033564342981, "loss": 3.6298, "step": 13340 }, { "epoch": 0.9067128685962766, "grad_norm": 0.7235108017921448, "learning_rate": 0.0008866608914254655, "loss": 3.763, "step": 13345 }, { "epoch": 0.9070525886669385, "grad_norm": 1.042415738105774, "learning_rate": 0.0008866184264166327, "loss": 3.6742, "step": 13350 }, { "epoch": 0.9073923087376002, "grad_norm": 0.8211411237716675, "learning_rate": 0.0008865759614078, "loss": 3.5995, "step": 13355 }, { "epoch": 0.907732028808262, "grad_norm": 0.9127249717712402, "learning_rate": 0.0008865334963989673, "loss": 3.5836, "step": 13360 }, { "epoch": 0.9080717488789237, "grad_norm": 1.2099254131317139, "learning_rate": 0.0008864910313901345, "loss": 3.6074, "step": 13365 }, { "epoch": 0.9084114689495856, "grad_norm": 1.1270442008972168, "learning_rate": 0.0008864485663813017, "loss": 3.6886, "step": 13370 }, { "epoch": 0.9087511890202473, "grad_norm": 0.7445120811462402, "learning_rate": 0.0008864061013724691, "loss": 3.2686, "step": 13375 }, { "epoch": 0.9090909090909091, "grad_norm": 0.7615050673484802, "learning_rate": 0.0008863636363636364, "loss": 3.8553, "step": 13380 }, { "epoch": 0.9094306291615709, "grad_norm": 0.871473491191864, "learning_rate": 0.0008863211713548036, "loss": 3.9049, "step": 13385 }, { "epoch": 0.9097703492322327, "grad_norm": 0.9612770080566406, "learning_rate": 0.000886278706345971, "loss": 3.7613, "step": 13390 }, { "epoch": 0.9101100693028944, "grad_norm": 6.434609889984131, "learning_rate": 0.0008862362413371382, "loss": 3.5481, "step": 13395 }, { "epoch": 0.9104497893735561, "grad_norm": 1.103271245956421, "learning_rate": 0.0008861937763283054, "loss": 3.418, "step": 13400 }, { "epoch": 0.910789509444218, "grad_norm": 1.239216685295105, "learning_rate": 0.0008861513113194728, "loss": 4.1605, "step": 13405 }, { "epoch": 0.9111292295148797, "grad_norm": 0.8537166118621826, "learning_rate": 0.00088610884631064, "loss": 3.4811, "step": 13410 }, { "epoch": 0.9114689495855415, "grad_norm": 0.6005226969718933, "learning_rate": 0.0008860663813018073, "loss": 3.6195, "step": 13415 }, { "epoch": 0.9118086696562033, "grad_norm": 0.7821482419967651, "learning_rate": 0.0008860239162929747, "loss": 3.8903, "step": 13420 }, { "epoch": 0.9121483897268651, "grad_norm": 0.9275714755058289, "learning_rate": 0.0008859814512841419, "loss": 3.6626, "step": 13425 }, { "epoch": 0.9124881097975268, "grad_norm": 1.5872000455856323, "learning_rate": 0.0008859389862753091, "loss": 3.4835, "step": 13430 }, { "epoch": 0.9128278298681887, "grad_norm": 0.9722049832344055, "learning_rate": 0.0008858965212664764, "loss": 3.6821, "step": 13435 }, { "epoch": 0.9131675499388504, "grad_norm": 0.9756625890731812, "learning_rate": 0.0008858540562576437, "loss": 3.4526, "step": 13440 }, { "epoch": 0.9135072700095122, "grad_norm": 0.8384751081466675, "learning_rate": 0.0008858115912488109, "loss": 3.5765, "step": 13445 }, { "epoch": 0.9138469900801739, "grad_norm": 0.7821000814437866, "learning_rate": 0.0008857691262399783, "loss": 3.5271, "step": 13450 }, { "epoch": 0.9141867101508357, "grad_norm": 0.7574872970581055, "learning_rate": 0.0008857266612311456, "loss": 3.5454, "step": 13455 }, { "epoch": 0.9145264302214975, "grad_norm": 0.8772850036621094, "learning_rate": 0.0008856841962223129, "loss": 3.9148, "step": 13460 }, { "epoch": 0.9148661502921592, "grad_norm": 0.8406215310096741, "learning_rate": 0.0008856417312134801, "loss": 3.737, "step": 13465 }, { "epoch": 0.9152058703628211, "grad_norm": 0.855576753616333, "learning_rate": 0.0008855992662046473, "loss": 3.8133, "step": 13470 }, { "epoch": 0.9155455904334828, "grad_norm": 6.184667587280273, "learning_rate": 0.0008855568011958147, "loss": 3.6333, "step": 13475 }, { "epoch": 0.9158853105041446, "grad_norm": 0.835614800453186, "learning_rate": 0.000885514336186982, "loss": 3.502, "step": 13480 }, { "epoch": 0.9162250305748063, "grad_norm": 1.0329148769378662, "learning_rate": 0.0008854718711781492, "loss": 3.6392, "step": 13485 }, { "epoch": 0.9165647506454682, "grad_norm": 1.8290917873382568, "learning_rate": 0.0008854294061693166, "loss": 3.7724, "step": 13490 }, { "epoch": 0.9169044707161299, "grad_norm": 0.9795731902122498, "learning_rate": 0.0008853869411604838, "loss": 3.7244, "step": 13495 }, { "epoch": 0.9172441907867916, "grad_norm": 0.7607027888298035, "learning_rate": 0.000885344476151651, "loss": 3.5909, "step": 13500 }, { "epoch": 0.9175839108574535, "grad_norm": 1.6879788637161255, "learning_rate": 0.0008853020111428184, "loss": 3.7135, "step": 13505 }, { "epoch": 0.9179236309281152, "grad_norm": 1.1392910480499268, "learning_rate": 0.0008852595461339856, "loss": 3.9939, "step": 13510 }, { "epoch": 0.918263350998777, "grad_norm": 0.8106732964515686, "learning_rate": 0.0008852170811251529, "loss": 3.2409, "step": 13515 }, { "epoch": 0.9186030710694388, "grad_norm": 0.9688677787780762, "learning_rate": 0.0008851746161163203, "loss": 3.7299, "step": 13520 }, { "epoch": 0.9189427911401006, "grad_norm": 1.7550389766693115, "learning_rate": 0.0008851321511074875, "loss": 3.6679, "step": 13525 }, { "epoch": 0.9192825112107623, "grad_norm": 0.894064724445343, "learning_rate": 0.0008850896860986547, "loss": 3.386, "step": 13530 }, { "epoch": 0.9196222312814241, "grad_norm": 0.8156725764274597, "learning_rate": 0.000885047221089822, "loss": 3.8207, "step": 13535 }, { "epoch": 0.9199619513520859, "grad_norm": 0.7993063926696777, "learning_rate": 0.0008850047560809893, "loss": 3.8328, "step": 13540 }, { "epoch": 0.9203016714227477, "grad_norm": 0.9036015868186951, "learning_rate": 0.0008849622910721565, "loss": 3.551, "step": 13545 }, { "epoch": 0.9206413914934094, "grad_norm": 0.7851384282112122, "learning_rate": 0.0008849198260633239, "loss": 3.8335, "step": 13550 }, { "epoch": 0.9209811115640713, "grad_norm": 0.7969225645065308, "learning_rate": 0.0008848773610544912, "loss": 3.6863, "step": 13555 }, { "epoch": 0.921320831634733, "grad_norm": 0.9233984351158142, "learning_rate": 0.0008848348960456584, "loss": 3.633, "step": 13560 }, { "epoch": 0.9216605517053947, "grad_norm": 1.0682228803634644, "learning_rate": 0.0008847924310368257, "loss": 3.7084, "step": 13565 }, { "epoch": 0.9220002717760565, "grad_norm": 0.7216653227806091, "learning_rate": 0.000884749966027993, "loss": 3.6827, "step": 13570 }, { "epoch": 0.9223399918467183, "grad_norm": 0.728904664516449, "learning_rate": 0.0008847075010191602, "loss": 3.6725, "step": 13575 }, { "epoch": 0.9226797119173801, "grad_norm": 1.3324331045150757, "learning_rate": 0.0008846650360103275, "loss": 3.5884, "step": 13580 }, { "epoch": 0.9230194319880418, "grad_norm": 0.8203989267349243, "learning_rate": 0.0008846225710014948, "loss": 3.4003, "step": 13585 }, { "epoch": 0.9233591520587037, "grad_norm": 0.8179434537887573, "learning_rate": 0.0008845801059926621, "loss": 3.7058, "step": 13590 }, { "epoch": 0.9236988721293654, "grad_norm": 1.7413372993469238, "learning_rate": 0.0008845376409838294, "loss": 3.6786, "step": 13595 }, { "epoch": 0.9240385922000272, "grad_norm": 0.7709000110626221, "learning_rate": 0.0008844951759749966, "loss": 3.5964, "step": 13600 }, { "epoch": 0.924378312270689, "grad_norm": 0.8884493708610535, "learning_rate": 0.0008844527109661639, "loss": 3.8318, "step": 13605 }, { "epoch": 0.9247180323413507, "grad_norm": 0.7240601778030396, "learning_rate": 0.0008844102459573312, "loss": 3.8144, "step": 13610 }, { "epoch": 0.9250577524120125, "grad_norm": 0.8557507991790771, "learning_rate": 0.0008843677809484984, "loss": 3.5952, "step": 13615 }, { "epoch": 0.9253974724826742, "grad_norm": 0.8917243480682373, "learning_rate": 0.0008843253159396657, "loss": 3.6763, "step": 13620 }, { "epoch": 0.9257371925533361, "grad_norm": 0.7802959680557251, "learning_rate": 0.0008842828509308331, "loss": 3.7661, "step": 13625 }, { "epoch": 0.9260769126239978, "grad_norm": 0.6309027075767517, "learning_rate": 0.0008842403859220003, "loss": 3.5654, "step": 13630 }, { "epoch": 0.9264166326946596, "grad_norm": 1.106672763824463, "learning_rate": 0.0008841979209131675, "loss": 3.7878, "step": 13635 }, { "epoch": 0.9267563527653214, "grad_norm": 0.8464555144309998, "learning_rate": 0.0008841554559043349, "loss": 3.6785, "step": 13640 }, { "epoch": 0.9270960728359832, "grad_norm": 1.3956855535507202, "learning_rate": 0.0008841129908955021, "loss": 3.4541, "step": 13645 }, { "epoch": 0.9274357929066449, "grad_norm": 0.9163585305213928, "learning_rate": 0.0008840705258866693, "loss": 3.8821, "step": 13650 }, { "epoch": 0.9277755129773066, "grad_norm": 0.9337664842605591, "learning_rate": 0.0008840280608778368, "loss": 3.7544, "step": 13655 }, { "epoch": 0.9281152330479685, "grad_norm": 0.6309140920639038, "learning_rate": 0.000883985595869004, "loss": 3.8096, "step": 13660 }, { "epoch": 0.9284549531186302, "grad_norm": 0.8382517695426941, "learning_rate": 0.0008839431308601712, "loss": 3.6736, "step": 13665 }, { "epoch": 0.928794673189292, "grad_norm": 0.7094393968582153, "learning_rate": 0.0008839006658513385, "loss": 3.8144, "step": 13670 }, { "epoch": 0.9291343932599538, "grad_norm": 1.1005032062530518, "learning_rate": 0.0008838582008425058, "loss": 3.7603, "step": 13675 }, { "epoch": 0.9294741133306156, "grad_norm": 1.0306353569030762, "learning_rate": 0.000883815735833673, "loss": 3.7025, "step": 13680 }, { "epoch": 0.9298138334012773, "grad_norm": 0.8249132633209229, "learning_rate": 0.0008837732708248403, "loss": 3.4602, "step": 13685 }, { "epoch": 0.9301535534719392, "grad_norm": 0.6795862913131714, "learning_rate": 0.0008837308058160077, "loss": 3.5854, "step": 13690 }, { "epoch": 0.9304932735426009, "grad_norm": 0.8747157454490662, "learning_rate": 0.0008836883408071749, "loss": 3.5667, "step": 13695 }, { "epoch": 0.9308329936132627, "grad_norm": 0.7587159872055054, "learning_rate": 0.0008836458757983422, "loss": 3.5191, "step": 13700 }, { "epoch": 0.9311727136839244, "grad_norm": 0.8577826023101807, "learning_rate": 0.0008836034107895095, "loss": 3.6191, "step": 13705 }, { "epoch": 0.9315124337545863, "grad_norm": 0.9343301057815552, "learning_rate": 0.0008835609457806767, "loss": 3.6625, "step": 13710 }, { "epoch": 0.931852153825248, "grad_norm": 0.8171669840812683, "learning_rate": 0.000883518480771844, "loss": 3.766, "step": 13715 }, { "epoch": 0.9321918738959097, "grad_norm": 0.8000214695930481, "learning_rate": 0.0008834760157630112, "loss": 3.6236, "step": 13720 }, { "epoch": 0.9325315939665716, "grad_norm": 0.9358586668968201, "learning_rate": 0.0008834335507541786, "loss": 3.4313, "step": 13725 }, { "epoch": 0.9328713140372333, "grad_norm": 0.7570367455482483, "learning_rate": 0.0008833910857453459, "loss": 3.6079, "step": 13730 }, { "epoch": 0.9332110341078951, "grad_norm": 1.0521817207336426, "learning_rate": 0.0008833486207365131, "loss": 3.8812, "step": 13735 }, { "epoch": 0.9335507541785568, "grad_norm": 0.6636382341384888, "learning_rate": 0.0008833061557276804, "loss": 3.6594, "step": 13740 }, { "epoch": 0.9338904742492187, "grad_norm": 0.8521907925605774, "learning_rate": 0.0008832636907188477, "loss": 3.7076, "step": 13745 }, { "epoch": 0.9342301943198804, "grad_norm": 0.919730544090271, "learning_rate": 0.0008832212257100149, "loss": 3.237, "step": 13750 }, { "epoch": 0.9345699143905422, "grad_norm": 0.7903971076011658, "learning_rate": 0.0008831787607011821, "loss": 3.8059, "step": 13755 }, { "epoch": 0.934909634461204, "grad_norm": 0.8670375347137451, "learning_rate": 0.0008831362956923496, "loss": 3.591, "step": 13760 }, { "epoch": 0.9352493545318658, "grad_norm": 1.3178188800811768, "learning_rate": 0.0008830938306835168, "loss": 3.6953, "step": 13765 }, { "epoch": 0.9355890746025275, "grad_norm": 0.8890280723571777, "learning_rate": 0.000883051365674684, "loss": 3.7027, "step": 13770 }, { "epoch": 0.9359287946731893, "grad_norm": 0.8231936097145081, "learning_rate": 0.0008830089006658514, "loss": 3.4748, "step": 13775 }, { "epoch": 0.9362685147438511, "grad_norm": 0.9336920976638794, "learning_rate": 0.0008829664356570186, "loss": 3.5724, "step": 13780 }, { "epoch": 0.9366082348145128, "grad_norm": 0.6202356219291687, "learning_rate": 0.0008829239706481858, "loss": 3.7062, "step": 13785 }, { "epoch": 0.9369479548851746, "grad_norm": 0.7148352265357971, "learning_rate": 0.0008828815056393532, "loss": 3.6556, "step": 13790 }, { "epoch": 0.9372876749558364, "grad_norm": 0.9888379573822021, "learning_rate": 0.0008828390406305205, "loss": 3.7264, "step": 13795 }, { "epoch": 0.9376273950264982, "grad_norm": 0.8397827744483948, "learning_rate": 0.0008827965756216878, "loss": 3.4699, "step": 13800 }, { "epoch": 0.9379671150971599, "grad_norm": 0.6670814752578735, "learning_rate": 0.0008827541106128551, "loss": 3.6653, "step": 13805 }, { "epoch": 0.9383068351678218, "grad_norm": 0.8079091906547546, "learning_rate": 0.0008827116456040223, "loss": 3.3031, "step": 13810 }, { "epoch": 0.9386465552384835, "grad_norm": 0.8372642397880554, "learning_rate": 0.0008826691805951896, "loss": 3.6295, "step": 13815 }, { "epoch": 0.9389862753091452, "grad_norm": 0.6966977119445801, "learning_rate": 0.0008826267155863568, "loss": 3.7738, "step": 13820 }, { "epoch": 0.939325995379807, "grad_norm": 0.9013363122940063, "learning_rate": 0.0008825842505775241, "loss": 3.2969, "step": 13825 }, { "epoch": 0.9396657154504688, "grad_norm": 1.0251212120056152, "learning_rate": 0.0008825417855686915, "loss": 3.6845, "step": 13830 }, { "epoch": 0.9400054355211306, "grad_norm": 0.7265050411224365, "learning_rate": 0.0008824993205598587, "loss": 3.5742, "step": 13835 }, { "epoch": 0.9403451555917923, "grad_norm": 0.6548272371292114, "learning_rate": 0.000882456855551026, "loss": 3.6591, "step": 13840 }, { "epoch": 0.9406848756624542, "grad_norm": 0.99984210729599, "learning_rate": 0.0008824143905421933, "loss": 3.5607, "step": 13845 }, { "epoch": 0.9410245957331159, "grad_norm": 0.6612033247947693, "learning_rate": 0.0008823719255333605, "loss": 3.812, "step": 13850 }, { "epoch": 0.9413643158037777, "grad_norm": 0.662527322769165, "learning_rate": 0.0008823294605245277, "loss": 3.8006, "step": 13855 }, { "epoch": 0.9417040358744395, "grad_norm": 0.7916511297225952, "learning_rate": 0.0008822869955156951, "loss": 3.6858, "step": 13860 }, { "epoch": 0.9420437559451013, "grad_norm": 1.1828975677490234, "learning_rate": 0.0008822445305068624, "loss": 3.4239, "step": 13865 }, { "epoch": 0.942383476015763, "grad_norm": 2.4112625122070312, "learning_rate": 0.0008822020654980296, "loss": 3.6426, "step": 13870 }, { "epoch": 0.9427231960864247, "grad_norm": 0.8874445557594299, "learning_rate": 0.000882159600489197, "loss": 3.6877, "step": 13875 }, { "epoch": 0.9430629161570866, "grad_norm": 0.91973876953125, "learning_rate": 0.0008821171354803642, "loss": 3.6937, "step": 13880 }, { "epoch": 0.9434026362277483, "grad_norm": 0.8752864599227905, "learning_rate": 0.0008820746704715314, "loss": 3.4457, "step": 13885 }, { "epoch": 0.9437423562984101, "grad_norm": 0.7345873117446899, "learning_rate": 0.0008820322054626988, "loss": 3.8366, "step": 13890 }, { "epoch": 0.9440820763690719, "grad_norm": 1.2251546382904053, "learning_rate": 0.000881989740453866, "loss": 3.8125, "step": 13895 }, { "epoch": 0.9444217964397337, "grad_norm": 1.0018945932388306, "learning_rate": 0.0008819472754450333, "loss": 3.59, "step": 13900 }, { "epoch": 0.9447615165103954, "grad_norm": 0.8775460720062256, "learning_rate": 0.0008819048104362007, "loss": 3.649, "step": 13905 }, { "epoch": 0.9451012365810572, "grad_norm": 0.8337073922157288, "learning_rate": 0.0008818623454273679, "loss": 3.8179, "step": 13910 }, { "epoch": 0.945440956651719, "grad_norm": 0.8015432953834534, "learning_rate": 0.0008818198804185351, "loss": 3.6612, "step": 13915 }, { "epoch": 0.9457806767223808, "grad_norm": 0.8323782682418823, "learning_rate": 0.0008817774154097024, "loss": 3.5186, "step": 13920 }, { "epoch": 0.9461203967930425, "grad_norm": 0.6812918186187744, "learning_rate": 0.0008817349504008697, "loss": 3.8222, "step": 13925 }, { "epoch": 0.9464601168637043, "grad_norm": 0.8665046095848083, "learning_rate": 0.0008816924853920369, "loss": 3.5126, "step": 13930 }, { "epoch": 0.9467998369343661, "grad_norm": 0.8575319051742554, "learning_rate": 0.0008816500203832043, "loss": 3.5755, "step": 13935 }, { "epoch": 0.9471395570050278, "grad_norm": 1.6602516174316406, "learning_rate": 0.0008816075553743716, "loss": 3.8821, "step": 13940 }, { "epoch": 0.9474792770756897, "grad_norm": 1.1572099924087524, "learning_rate": 0.0008815650903655388, "loss": 3.6644, "step": 13945 }, { "epoch": 0.9478189971463514, "grad_norm": 0.8157739043235779, "learning_rate": 0.0008815226253567061, "loss": 3.726, "step": 13950 }, { "epoch": 0.9481587172170132, "grad_norm": 0.7523817420005798, "learning_rate": 0.0008814801603478733, "loss": 3.3739, "step": 13955 }, { "epoch": 0.9484984372876749, "grad_norm": 0.7100839018821716, "learning_rate": 0.0008814376953390406, "loss": 3.5778, "step": 13960 }, { "epoch": 0.9488381573583368, "grad_norm": 0.7609274983406067, "learning_rate": 0.000881395230330208, "loss": 3.4127, "step": 13965 }, { "epoch": 0.9491778774289985, "grad_norm": 1.1577061414718628, "learning_rate": 0.0008813527653213752, "loss": 3.8804, "step": 13970 }, { "epoch": 0.9495175974996602, "grad_norm": 0.8294179439544678, "learning_rate": 0.0008813103003125425, "loss": 3.6693, "step": 13975 }, { "epoch": 0.9498573175703221, "grad_norm": 0.8605481386184692, "learning_rate": 0.0008812678353037098, "loss": 3.7811, "step": 13980 }, { "epoch": 0.9501970376409838, "grad_norm": 0.9040095210075378, "learning_rate": 0.000881225370294877, "loss": 3.5954, "step": 13985 }, { "epoch": 0.9505367577116456, "grad_norm": 0.6817090511322021, "learning_rate": 0.0008811829052860443, "loss": 3.6321, "step": 13990 }, { "epoch": 0.9508764777823073, "grad_norm": 0.8138971328735352, "learning_rate": 0.0008811404402772116, "loss": 3.7651, "step": 13995 }, { "epoch": 0.9512161978529692, "grad_norm": 0.7704663276672363, "learning_rate": 0.0008810979752683789, "loss": 3.5433, "step": 14000 }, { "epoch": 0.9515559179236309, "grad_norm": 0.6035419702529907, "learning_rate": 0.0008810555102595462, "loss": 3.3425, "step": 14005 }, { "epoch": 0.9518956379942927, "grad_norm": 0.7641417980194092, "learning_rate": 0.0008810130452507135, "loss": 3.4852, "step": 14010 }, { "epoch": 0.9522353580649545, "grad_norm": 0.8048719763755798, "learning_rate": 0.0008809705802418807, "loss": 3.7314, "step": 14015 }, { "epoch": 0.9525750781356163, "grad_norm": 0.7399217486381531, "learning_rate": 0.0008809281152330479, "loss": 3.6636, "step": 14020 }, { "epoch": 0.952914798206278, "grad_norm": 0.816810667514801, "learning_rate": 0.0008808856502242153, "loss": 3.7608, "step": 14025 }, { "epoch": 0.9532545182769399, "grad_norm": 0.6726149320602417, "learning_rate": 0.0008808431852153825, "loss": 3.6351, "step": 14030 }, { "epoch": 0.9535942383476016, "grad_norm": 0.7571033239364624, "learning_rate": 0.0008808007202065498, "loss": 3.6505, "step": 14035 }, { "epoch": 0.9539339584182633, "grad_norm": 0.6782366633415222, "learning_rate": 0.0008807582551977172, "loss": 3.8236, "step": 14040 }, { "epoch": 0.9542736784889251, "grad_norm": 1.115885853767395, "learning_rate": 0.0008807157901888844, "loss": 3.7044, "step": 14045 }, { "epoch": 0.9546133985595869, "grad_norm": 0.8412324786186218, "learning_rate": 0.0008806733251800516, "loss": 3.9145, "step": 14050 }, { "epoch": 0.9549531186302487, "grad_norm": 0.77687007188797, "learning_rate": 0.000880630860171219, "loss": 3.6443, "step": 14055 }, { "epoch": 0.9552928387009104, "grad_norm": 0.7641855478286743, "learning_rate": 0.0008805883951623862, "loss": 3.53, "step": 14060 }, { "epoch": 0.9556325587715723, "grad_norm": 0.7573676109313965, "learning_rate": 0.0008805459301535534, "loss": 3.6638, "step": 14065 }, { "epoch": 0.955972278842234, "grad_norm": 0.781079888343811, "learning_rate": 0.0008805034651447208, "loss": 3.5952, "step": 14070 }, { "epoch": 0.9563119989128958, "grad_norm": 0.7804728746414185, "learning_rate": 0.0008804610001358881, "loss": 3.7907, "step": 14075 }, { "epoch": 0.9566517189835575, "grad_norm": 1.077364444732666, "learning_rate": 0.0008804185351270553, "loss": 3.6663, "step": 14080 }, { "epoch": 0.9569914390542194, "grad_norm": 0.7271178364753723, "learning_rate": 0.0008803760701182226, "loss": 3.5515, "step": 14085 }, { "epoch": 0.9573311591248811, "grad_norm": 0.8874168992042542, "learning_rate": 0.0008803336051093899, "loss": 3.647, "step": 14090 }, { "epoch": 0.9576708791955428, "grad_norm": 0.7073732614517212, "learning_rate": 0.0008802911401005571, "loss": 3.7275, "step": 14095 }, { "epoch": 0.9580105992662047, "grad_norm": 1.1617673635482788, "learning_rate": 0.0008802486750917244, "loss": 3.5342, "step": 14100 }, { "epoch": 0.9583503193368664, "grad_norm": 0.9121617078781128, "learning_rate": 0.0008802062100828918, "loss": 3.4911, "step": 14105 }, { "epoch": 0.9586900394075282, "grad_norm": 0.7611116170883179, "learning_rate": 0.000880163745074059, "loss": 3.4952, "step": 14110 }, { "epoch": 0.95902975947819, "grad_norm": 0.8260694146156311, "learning_rate": 0.0008801212800652263, "loss": 3.6234, "step": 14115 }, { "epoch": 0.9593694795488518, "grad_norm": 0.8095779418945312, "learning_rate": 0.0008800788150563935, "loss": 4.0281, "step": 14120 }, { "epoch": 0.9597091996195135, "grad_norm": 0.894602358341217, "learning_rate": 0.0008800363500475608, "loss": 3.8609, "step": 14125 }, { "epoch": 0.9600489196901753, "grad_norm": 1.0150920152664185, "learning_rate": 0.0008799938850387281, "loss": 3.6769, "step": 14130 }, { "epoch": 0.9603886397608371, "grad_norm": 2.2357912063598633, "learning_rate": 0.0008799514200298953, "loss": 3.681, "step": 14135 }, { "epoch": 0.9607283598314988, "grad_norm": 0.8003286123275757, "learning_rate": 0.0008799089550210628, "loss": 3.8388, "step": 14140 }, { "epoch": 0.9610680799021606, "grad_norm": 0.6359890699386597, "learning_rate": 0.00087986649001223, "loss": 3.5662, "step": 14145 }, { "epoch": 0.9614077999728224, "grad_norm": 0.6818934679031372, "learning_rate": 0.0008798240250033972, "loss": 3.8263, "step": 14150 }, { "epoch": 0.9617475200434842, "grad_norm": 0.9057003855705261, "learning_rate": 0.0008797815599945646, "loss": 3.66, "step": 14155 }, { "epoch": 0.9620872401141459, "grad_norm": 0.7551100254058838, "learning_rate": 0.0008797390949857318, "loss": 3.7164, "step": 14160 }, { "epoch": 0.9624269601848077, "grad_norm": 0.7440225481987, "learning_rate": 0.000879696629976899, "loss": 3.7412, "step": 14165 }, { "epoch": 0.9627666802554695, "grad_norm": 0.7717633843421936, "learning_rate": 0.0008796541649680663, "loss": 3.6196, "step": 14170 }, { "epoch": 0.9631064003261313, "grad_norm": 0.7985604405403137, "learning_rate": 0.0008796116999592337, "loss": 3.6836, "step": 14175 }, { "epoch": 0.963446120396793, "grad_norm": 0.7992721199989319, "learning_rate": 0.0008795692349504009, "loss": 3.6446, "step": 14180 }, { "epoch": 0.9637858404674549, "grad_norm": 0.8389267921447754, "learning_rate": 0.0008795267699415682, "loss": 3.8309, "step": 14185 }, { "epoch": 0.9641255605381166, "grad_norm": 0.8056051135063171, "learning_rate": 0.0008794843049327355, "loss": 3.5781, "step": 14190 }, { "epoch": 0.9644652806087783, "grad_norm": 0.7733875513076782, "learning_rate": 0.0008794418399239027, "loss": 3.6502, "step": 14195 }, { "epoch": 0.9648050006794402, "grad_norm": 2.093970775604248, "learning_rate": 0.00087939937491507, "loss": 3.5789, "step": 14200 }, { "epoch": 0.9651447207501019, "grad_norm": 0.9151191711425781, "learning_rate": 0.0008793569099062372, "loss": 3.5379, "step": 14205 }, { "epoch": 0.9654844408207637, "grad_norm": 0.765770673751831, "learning_rate": 0.0008793144448974046, "loss": 3.6453, "step": 14210 }, { "epoch": 0.9658241608914254, "grad_norm": 1.0559406280517578, "learning_rate": 0.0008792719798885719, "loss": 3.6769, "step": 14215 }, { "epoch": 0.9661638809620873, "grad_norm": 0.9609495997428894, "learning_rate": 0.0008792295148797391, "loss": 3.8035, "step": 14220 }, { "epoch": 0.966503601032749, "grad_norm": 0.8505371809005737, "learning_rate": 0.0008791870498709064, "loss": 3.6685, "step": 14225 }, { "epoch": 0.9668433211034108, "grad_norm": 0.8088500499725342, "learning_rate": 0.0008791445848620737, "loss": 3.6269, "step": 14230 }, { "epoch": 0.9671830411740726, "grad_norm": 0.6623719334602356, "learning_rate": 0.0008791021198532409, "loss": 3.7227, "step": 14235 }, { "epoch": 0.9675227612447344, "grad_norm": 0.9269905686378479, "learning_rate": 0.0008790596548444082, "loss": 3.4804, "step": 14240 }, { "epoch": 0.9678624813153961, "grad_norm": 0.7461432218551636, "learning_rate": 0.0008790171898355756, "loss": 3.5073, "step": 14245 }, { "epoch": 0.9682022013860578, "grad_norm": 0.686406672000885, "learning_rate": 0.0008789747248267428, "loss": 3.7627, "step": 14250 }, { "epoch": 0.9685419214567197, "grad_norm": 0.7554128170013428, "learning_rate": 0.00087893225981791, "loss": 3.7856, "step": 14255 }, { "epoch": 0.9688816415273814, "grad_norm": 0.7116771340370178, "learning_rate": 0.0008788897948090774, "loss": 3.6954, "step": 14260 }, { "epoch": 0.9692213615980432, "grad_norm": 0.6576034426689148, "learning_rate": 0.0008788473298002446, "loss": 3.5698, "step": 14265 }, { "epoch": 0.969561081668705, "grad_norm": 0.9415531754493713, "learning_rate": 0.0008788048647914118, "loss": 3.5838, "step": 14270 }, { "epoch": 0.9699008017393668, "grad_norm": 0.8466349840164185, "learning_rate": 0.0008787623997825792, "loss": 3.768, "step": 14275 }, { "epoch": 0.9702405218100285, "grad_norm": 0.9635273218154907, "learning_rate": 0.0008787199347737465, "loss": 3.5905, "step": 14280 }, { "epoch": 0.9705802418806904, "grad_norm": 0.8305516242980957, "learning_rate": 0.0008786774697649137, "loss": 3.643, "step": 14285 }, { "epoch": 0.9709199619513521, "grad_norm": 0.640354573726654, "learning_rate": 0.0008786350047560811, "loss": 3.6625, "step": 14290 }, { "epoch": 0.9712596820220138, "grad_norm": 0.7893102169036865, "learning_rate": 0.0008785925397472483, "loss": 3.6282, "step": 14295 }, { "epoch": 0.9715994020926756, "grad_norm": 0.8902161121368408, "learning_rate": 0.0008785500747384155, "loss": 3.6412, "step": 14300 }, { "epoch": 0.9719391221633374, "grad_norm": 0.6888489723205566, "learning_rate": 0.0008785076097295828, "loss": 3.823, "step": 14305 }, { "epoch": 0.9722788422339992, "grad_norm": 0.8645451068878174, "learning_rate": 0.0008784651447207501, "loss": 3.666, "step": 14310 }, { "epoch": 0.9726185623046609, "grad_norm": 0.8414638042449951, "learning_rate": 0.0008784226797119174, "loss": 3.4548, "step": 14315 }, { "epoch": 0.9729582823753228, "grad_norm": 0.7070507407188416, "learning_rate": 0.0008783802147030847, "loss": 3.6038, "step": 14320 }, { "epoch": 0.9732980024459845, "grad_norm": 0.9197102785110474, "learning_rate": 0.000878337749694252, "loss": 3.1827, "step": 14325 }, { "epoch": 0.9736377225166463, "grad_norm": 0.9254668951034546, "learning_rate": 0.0008782952846854192, "loss": 3.7677, "step": 14330 }, { "epoch": 0.973977442587308, "grad_norm": 0.6447213292121887, "learning_rate": 0.0008782528196765865, "loss": 3.7592, "step": 14335 }, { "epoch": 0.9743171626579699, "grad_norm": 0.5707345008850098, "learning_rate": 0.0008782103546677538, "loss": 3.6348, "step": 14340 }, { "epoch": 0.9746568827286316, "grad_norm": 0.8413156867027283, "learning_rate": 0.000878167889658921, "loss": 3.8071, "step": 14345 }, { "epoch": 0.9749966027992933, "grad_norm": 0.8275386095046997, "learning_rate": 0.0008781254246500884, "loss": 3.6867, "step": 14350 }, { "epoch": 0.9753363228699552, "grad_norm": 0.7195150256156921, "learning_rate": 0.0008780829596412556, "loss": 3.3275, "step": 14355 }, { "epoch": 0.9756760429406169, "grad_norm": 0.6811602115631104, "learning_rate": 0.0008780404946324229, "loss": 3.7898, "step": 14360 }, { "epoch": 0.9760157630112787, "grad_norm": 0.6916821599006653, "learning_rate": 0.0008779980296235902, "loss": 3.6729, "step": 14365 }, { "epoch": 0.9763554830819405, "grad_norm": 0.7175919413566589, "learning_rate": 0.0008779555646147574, "loss": 3.7737, "step": 14370 }, { "epoch": 0.9766952031526023, "grad_norm": 0.8398088812828064, "learning_rate": 0.0008779130996059247, "loss": 3.4601, "step": 14375 }, { "epoch": 0.977034923223264, "grad_norm": 0.7387117743492126, "learning_rate": 0.000877870634597092, "loss": 3.7371, "step": 14380 }, { "epoch": 0.9773746432939258, "grad_norm": 0.6802313923835754, "learning_rate": 0.0008778281695882593, "loss": 3.368, "step": 14385 }, { "epoch": 0.9777143633645876, "grad_norm": 0.9588213562965393, "learning_rate": 0.0008777857045794266, "loss": 3.4924, "step": 14390 }, { "epoch": 0.9780540834352494, "grad_norm": 1.0174281597137451, "learning_rate": 0.0008777432395705939, "loss": 3.7799, "step": 14395 }, { "epoch": 0.9783938035059111, "grad_norm": 0.8835201859474182, "learning_rate": 0.0008777007745617611, "loss": 3.6366, "step": 14400 }, { "epoch": 0.978733523576573, "grad_norm": 0.7212127447128296, "learning_rate": 0.0008776583095529283, "loss": 3.4659, "step": 14405 }, { "epoch": 0.9790732436472347, "grad_norm": 0.7751473188400269, "learning_rate": 0.0008776158445440957, "loss": 3.5266, "step": 14410 }, { "epoch": 0.9794129637178964, "grad_norm": 0.7671706676483154, "learning_rate": 0.0008775733795352629, "loss": 3.7281, "step": 14415 }, { "epoch": 0.9797526837885582, "grad_norm": 1.0055835247039795, "learning_rate": 0.0008775309145264302, "loss": 3.3887, "step": 14420 }, { "epoch": 0.98009240385922, "grad_norm": 0.7222244143486023, "learning_rate": 0.0008774884495175976, "loss": 3.4246, "step": 14425 }, { "epoch": 0.9804321239298818, "grad_norm": 0.8830586075782776, "learning_rate": 0.0008774459845087648, "loss": 3.73, "step": 14430 }, { "epoch": 0.9807718440005435, "grad_norm": 1.598350167274475, "learning_rate": 0.000877403519499932, "loss": 3.2986, "step": 14435 }, { "epoch": 0.9811115640712054, "grad_norm": 0.7917866110801697, "learning_rate": 0.0008773610544910994, "loss": 3.6402, "step": 14440 }, { "epoch": 0.9814512841418671, "grad_norm": 0.7206060886383057, "learning_rate": 0.0008773185894822666, "loss": 3.8055, "step": 14445 }, { "epoch": 0.9817910042125289, "grad_norm": 1.0332262516021729, "learning_rate": 0.0008772761244734338, "loss": 3.366, "step": 14450 }, { "epoch": 0.9821307242831907, "grad_norm": 0.5614504218101501, "learning_rate": 0.0008772336594646012, "loss": 3.7558, "step": 14455 }, { "epoch": 0.9824704443538524, "grad_norm": 0.8473602533340454, "learning_rate": 0.0008771911944557685, "loss": 3.5177, "step": 14460 }, { "epoch": 0.9828101644245142, "grad_norm": 0.64482581615448, "learning_rate": 0.0008771487294469357, "loss": 3.6836, "step": 14465 }, { "epoch": 0.9831498844951759, "grad_norm": 1.353542685508728, "learning_rate": 0.000877106264438103, "loss": 3.6789, "step": 14470 }, { "epoch": 0.9834896045658378, "grad_norm": 1.7296388149261475, "learning_rate": 0.0008770637994292703, "loss": 3.427, "step": 14475 }, { "epoch": 0.9838293246364995, "grad_norm": 0.8996850848197937, "learning_rate": 0.0008770213344204376, "loss": 3.5071, "step": 14480 }, { "epoch": 0.9841690447071613, "grad_norm": 0.6983279585838318, "learning_rate": 0.0008769788694116048, "loss": 3.7391, "step": 14485 }, { "epoch": 0.9845087647778231, "grad_norm": 0.7334893941879272, "learning_rate": 0.0008769364044027722, "loss": 3.5943, "step": 14490 }, { "epoch": 0.9848484848484849, "grad_norm": 1.382614016532898, "learning_rate": 0.0008768939393939395, "loss": 3.789, "step": 14495 }, { "epoch": 0.9851882049191466, "grad_norm": 0.8171646595001221, "learning_rate": 0.0008768514743851067, "loss": 3.6549, "step": 14500 }, { "epoch": 0.9855279249898083, "grad_norm": 0.8168349266052246, "learning_rate": 0.0008768090093762739, "loss": 3.5184, "step": 14505 }, { "epoch": 0.9858676450604702, "grad_norm": 0.9689443707466125, "learning_rate": 0.0008767665443674413, "loss": 3.5, "step": 14510 }, { "epoch": 0.9862073651311319, "grad_norm": 0.7656502723693848, "learning_rate": 0.0008767240793586085, "loss": 3.7818, "step": 14515 }, { "epoch": 0.9865470852017937, "grad_norm": 0.9008796811103821, "learning_rate": 0.0008766816143497757, "loss": 3.526, "step": 14520 }, { "epoch": 0.9868868052724555, "grad_norm": 0.7302773594856262, "learning_rate": 0.0008766391493409432, "loss": 3.4609, "step": 14525 }, { "epoch": 0.9872265253431173, "grad_norm": 0.8998444676399231, "learning_rate": 0.0008765966843321104, "loss": 3.7165, "step": 14530 }, { "epoch": 0.987566245413779, "grad_norm": 1.2108217477798462, "learning_rate": 0.0008765542193232776, "loss": 3.5773, "step": 14535 }, { "epoch": 0.9879059654844409, "grad_norm": 1.1694490909576416, "learning_rate": 0.000876511754314445, "loss": 3.7066, "step": 14540 }, { "epoch": 0.9882456855551026, "grad_norm": 0.688947319984436, "learning_rate": 0.0008764692893056122, "loss": 3.5175, "step": 14545 }, { "epoch": 0.9885854056257644, "grad_norm": 1.004987120628357, "learning_rate": 0.0008764268242967794, "loss": 3.5854, "step": 14550 }, { "epoch": 0.9889251256964261, "grad_norm": 1.1951470375061035, "learning_rate": 0.0008763843592879469, "loss": 3.6282, "step": 14555 }, { "epoch": 0.989264845767088, "grad_norm": 0.7731999754905701, "learning_rate": 0.0008763418942791141, "loss": 3.6051, "step": 14560 }, { "epoch": 0.9896045658377497, "grad_norm": 0.6846461296081543, "learning_rate": 0.0008762994292702813, "loss": 3.754, "step": 14565 }, { "epoch": 0.9899442859084114, "grad_norm": 0.5805031061172485, "learning_rate": 0.0008762569642614486, "loss": 3.4636, "step": 14570 }, { "epoch": 0.9902840059790733, "grad_norm": 0.8409395813941956, "learning_rate": 0.0008762144992526159, "loss": 3.7109, "step": 14575 }, { "epoch": 0.990623726049735, "grad_norm": 0.8794849514961243, "learning_rate": 0.0008761720342437831, "loss": 3.572, "step": 14580 }, { "epoch": 0.9909634461203968, "grad_norm": 0.8855902552604675, "learning_rate": 0.0008761295692349504, "loss": 3.6108, "step": 14585 }, { "epoch": 0.9913031661910585, "grad_norm": 0.6604815721511841, "learning_rate": 0.0008760871042261178, "loss": 3.7763, "step": 14590 }, { "epoch": 0.9916428862617204, "grad_norm": 0.7075049877166748, "learning_rate": 0.000876044639217285, "loss": 3.5551, "step": 14595 }, { "epoch": 0.9919826063323821, "grad_norm": 1.19882333278656, "learning_rate": 0.0008760021742084523, "loss": 3.7255, "step": 14600 }, { "epoch": 0.9923223264030439, "grad_norm": 0.920500636100769, "learning_rate": 0.0008759597091996195, "loss": 3.8716, "step": 14605 }, { "epoch": 0.9926620464737057, "grad_norm": 0.9802355170249939, "learning_rate": 0.0008759172441907868, "loss": 3.5886, "step": 14610 }, { "epoch": 0.9930017665443674, "grad_norm": 0.7546324133872986, "learning_rate": 0.0008758747791819541, "loss": 3.7325, "step": 14615 }, { "epoch": 0.9933414866150292, "grad_norm": 0.7007372379302979, "learning_rate": 0.0008758323141731213, "loss": 3.541, "step": 14620 }, { "epoch": 0.993681206685691, "grad_norm": 0.7954425811767578, "learning_rate": 0.0008757898491642887, "loss": 3.6988, "step": 14625 }, { "epoch": 0.9940209267563528, "grad_norm": 0.7672184109687805, "learning_rate": 0.000875747384155456, "loss": 3.8338, "step": 14630 }, { "epoch": 0.9943606468270145, "grad_norm": 0.9527212381362915, "learning_rate": 0.0008757049191466232, "loss": 3.5615, "step": 14635 }, { "epoch": 0.9947003668976763, "grad_norm": 0.8215718269348145, "learning_rate": 0.0008756624541377904, "loss": 3.8083, "step": 14640 }, { "epoch": 0.9950400869683381, "grad_norm": 1.2067676782608032, "learning_rate": 0.0008756199891289578, "loss": 3.6209, "step": 14645 }, { "epoch": 0.9953798070389999, "grad_norm": 0.7917919158935547, "learning_rate": 0.000875577524120125, "loss": 3.5736, "step": 14650 }, { "epoch": 0.9957195271096616, "grad_norm": 0.9128100872039795, "learning_rate": 0.0008755350591112922, "loss": 3.6215, "step": 14655 }, { "epoch": 0.9960592471803235, "grad_norm": 0.7717034220695496, "learning_rate": 0.0008754925941024597, "loss": 3.6486, "step": 14660 }, { "epoch": 0.9963989672509852, "grad_norm": 0.7634798288345337, "learning_rate": 0.0008754501290936269, "loss": 3.8336, "step": 14665 }, { "epoch": 0.9967386873216469, "grad_norm": 0.7274680733680725, "learning_rate": 0.0008754076640847941, "loss": 3.7638, "step": 14670 }, { "epoch": 0.9970784073923087, "grad_norm": 0.7382634282112122, "learning_rate": 0.0008753651990759615, "loss": 3.7377, "step": 14675 }, { "epoch": 0.9974181274629705, "grad_norm": 0.743107259273529, "learning_rate": 0.0008753227340671287, "loss": 3.7184, "step": 14680 }, { "epoch": 0.9977578475336323, "grad_norm": 0.8931724429130554, "learning_rate": 0.0008752802690582959, "loss": 3.6193, "step": 14685 }, { "epoch": 0.998097567604294, "grad_norm": 0.5748050212860107, "learning_rate": 0.0008752378040494632, "loss": 3.7684, "step": 14690 }, { "epoch": 0.9984372876749559, "grad_norm": 0.8331864476203918, "learning_rate": 0.0008751953390406306, "loss": 3.9459, "step": 14695 }, { "epoch": 0.9987770077456176, "grad_norm": 0.7781587839126587, "learning_rate": 0.0008751528740317978, "loss": 3.8221, "step": 14700 }, { "epoch": 0.9991167278162794, "grad_norm": 1.0691982507705688, "learning_rate": 0.0008751104090229651, "loss": 3.6112, "step": 14705 }, { "epoch": 0.9994564478869412, "grad_norm": 0.9747766852378845, "learning_rate": 0.0008750679440141324, "loss": 3.7022, "step": 14710 }, { "epoch": 0.999796167957603, "grad_norm": 0.820070207118988, "learning_rate": 0.0008750254790052996, "loss": 3.8419, "step": 14715 }, { "epoch": 1.0, "eval_bertscore": { "f1": 0.8381534741476809, "precision": 0.8350563258703786, "recall": 0.8423021950917208 }, "eval_bleu_4": 0.0204193666824347, "eval_exact_match": 9.690861517588914e-05, "eval_loss": 3.501424789428711, "eval_meteor": 0.08368346610482402, "eval_rouge": { "rouge1": 0.11350856223882408, "rouge2": 0.01748823344153523, "rougeL": 0.0994305933081177, "rougeLsum": 0.09939504392279817 }, "eval_runtime": 3799.1101, "eval_samples_per_second": 2.716, "eval_steps_per_second": 0.34, "step": 14718 }, { "epoch": 1.0001358880282647, "grad_norm": 0.8635953664779663, "learning_rate": 0.0008749830139964669, "loss": 3.625, "step": 14720 }, { "epoch": 1.0004756080989265, "grad_norm": 0.7356550693511963, "learning_rate": 0.0008749405489876342, "loss": 3.6107, "step": 14725 }, { "epoch": 1.0008153281695882, "grad_norm": 1.0746026039123535, "learning_rate": 0.0008748980839788015, "loss": 3.5392, "step": 14730 }, { "epoch": 1.00115504824025, "grad_norm": 0.6924282908439636, "learning_rate": 0.0008748556189699688, "loss": 3.6395, "step": 14735 }, { "epoch": 1.0014947683109119, "grad_norm": 0.9120036959648132, "learning_rate": 0.000874813153961136, "loss": 3.625, "step": 14740 }, { "epoch": 1.0018344883815735, "grad_norm": 1.085540771484375, "learning_rate": 0.0008747706889523033, "loss": 3.6414, "step": 14745 }, { "epoch": 1.0021742084522354, "grad_norm": 0.7921756505966187, "learning_rate": 0.0008747282239434706, "loss": 3.3817, "step": 14750 }, { "epoch": 1.0025139285228972, "grad_norm": 0.9197180867195129, "learning_rate": 0.0008746857589346378, "loss": 3.4746, "step": 14755 }, { "epoch": 1.0028536485935589, "grad_norm": 1.0967633724212646, "learning_rate": 0.0008746432939258051, "loss": 3.7374, "step": 14760 }, { "epoch": 1.0031933686642207, "grad_norm": 0.7022275328636169, "learning_rate": 0.0008746008289169725, "loss": 3.6564, "step": 14765 }, { "epoch": 1.0035330887348826, "grad_norm": 0.7609009146690369, "learning_rate": 0.0008745583639081397, "loss": 3.4658, "step": 14770 }, { "epoch": 1.0038728088055442, "grad_norm": 0.8393577933311462, "learning_rate": 0.000874515898899307, "loss": 3.718, "step": 14775 }, { "epoch": 1.004212528876206, "grad_norm": 1.5050324201583862, "learning_rate": 0.0008744734338904743, "loss": 3.6466, "step": 14780 }, { "epoch": 1.0045522489468677, "grad_norm": 0.9276782274246216, "learning_rate": 0.0008744309688816415, "loss": 3.6929, "step": 14785 }, { "epoch": 1.0048919690175295, "grad_norm": 0.9778801798820496, "learning_rate": 0.0008743885038728087, "loss": 3.7286, "step": 14790 }, { "epoch": 1.0052316890881914, "grad_norm": 0.8289713263511658, "learning_rate": 0.0008743460388639761, "loss": 3.4104, "step": 14795 }, { "epoch": 1.005571409158853, "grad_norm": 0.7857244610786438, "learning_rate": 0.0008743035738551434, "loss": 3.5886, "step": 14800 }, { "epoch": 1.0059111292295149, "grad_norm": 2.2191922664642334, "learning_rate": 0.0008742611088463106, "loss": 3.5865, "step": 14805 }, { "epoch": 1.0062508493001767, "grad_norm": 1.045069694519043, "learning_rate": 0.000874218643837478, "loss": 3.4953, "step": 14810 }, { "epoch": 1.0065905693708384, "grad_norm": 0.7998431921005249, "learning_rate": 0.0008741761788286452, "loss": 3.4237, "step": 14815 }, { "epoch": 1.0069302894415002, "grad_norm": 1.0983692407608032, "learning_rate": 0.0008741337138198124, "loss": 3.9104, "step": 14820 }, { "epoch": 1.007270009512162, "grad_norm": 1.054249882698059, "learning_rate": 0.0008740912488109798, "loss": 3.6365, "step": 14825 }, { "epoch": 1.0076097295828237, "grad_norm": 0.8268421292304993, "learning_rate": 0.000874048783802147, "loss": 3.6579, "step": 14830 }, { "epoch": 1.0079494496534855, "grad_norm": 0.9890739917755127, "learning_rate": 0.0008740063187933144, "loss": 3.6686, "step": 14835 }, { "epoch": 1.0082891697241474, "grad_norm": 0.7856653332710266, "learning_rate": 0.0008739638537844817, "loss": 3.5731, "step": 14840 }, { "epoch": 1.008628889794809, "grad_norm": 0.8003954291343689, "learning_rate": 0.0008739213887756489, "loss": 3.9485, "step": 14845 }, { "epoch": 1.0089686098654709, "grad_norm": 1.0611882209777832, "learning_rate": 0.0008738789237668162, "loss": 3.7569, "step": 14850 }, { "epoch": 1.0093083299361327, "grad_norm": 0.7606302499771118, "learning_rate": 0.0008738364587579834, "loss": 3.8321, "step": 14855 }, { "epoch": 1.0096480500067944, "grad_norm": 0.9010590314865112, "learning_rate": 0.0008737939937491507, "loss": 3.6095, "step": 14860 }, { "epoch": 1.0099877700774562, "grad_norm": 0.9839968681335449, "learning_rate": 0.000873751528740318, "loss": 3.5869, "step": 14865 }, { "epoch": 1.0103274901481178, "grad_norm": 0.884035050868988, "learning_rate": 0.0008737090637314853, "loss": 3.5727, "step": 14870 }, { "epoch": 1.0106672102187797, "grad_norm": 0.9170675873756409, "learning_rate": 0.0008736665987226526, "loss": 3.643, "step": 14875 }, { "epoch": 1.0110069302894416, "grad_norm": 0.9580128192901611, "learning_rate": 0.0008736241337138199, "loss": 3.5574, "step": 14880 }, { "epoch": 1.0113466503601032, "grad_norm": 0.7552632093429565, "learning_rate": 0.0008735816687049871, "loss": 3.7948, "step": 14885 }, { "epoch": 1.011686370430765, "grad_norm": 0.8886842131614685, "learning_rate": 0.0008735392036961543, "loss": 3.8444, "step": 14890 }, { "epoch": 1.012026090501427, "grad_norm": 0.7021509408950806, "learning_rate": 0.0008734967386873217, "loss": 3.5689, "step": 14895 }, { "epoch": 1.0123658105720885, "grad_norm": 0.7798046469688416, "learning_rate": 0.0008734542736784889, "loss": 3.336, "step": 14900 }, { "epoch": 1.0127055306427504, "grad_norm": 0.7485750317573547, "learning_rate": 0.0008734118086696562, "loss": 3.8953, "step": 14905 }, { "epoch": 1.0130452507134122, "grad_norm": 0.7091067433357239, "learning_rate": 0.0008733693436608236, "loss": 3.9294, "step": 14910 }, { "epoch": 1.0133849707840739, "grad_norm": 0.7800996899604797, "learning_rate": 0.0008733268786519908, "loss": 3.5407, "step": 14915 }, { "epoch": 1.0137246908547357, "grad_norm": 0.9742063283920288, "learning_rate": 0.000873284413643158, "loss": 3.3899, "step": 14920 }, { "epoch": 1.0140644109253976, "grad_norm": 0.7232404947280884, "learning_rate": 0.0008732419486343254, "loss": 3.6032, "step": 14925 }, { "epoch": 1.0144041309960592, "grad_norm": 0.9668446183204651, "learning_rate": 0.0008731994836254926, "loss": 3.4827, "step": 14930 }, { "epoch": 1.014743851066721, "grad_norm": 0.9133487939834595, "learning_rate": 0.0008731570186166598, "loss": 3.4154, "step": 14935 }, { "epoch": 1.015083571137383, "grad_norm": 0.7239922881126404, "learning_rate": 0.0008731145536078273, "loss": 3.3123, "step": 14940 }, { "epoch": 1.0154232912080445, "grad_norm": 0.7745734453201294, "learning_rate": 0.0008730720885989945, "loss": 3.868, "step": 14945 }, { "epoch": 1.0157630112787064, "grad_norm": 0.7706164121627808, "learning_rate": 0.0008730296235901617, "loss": 3.7817, "step": 14950 }, { "epoch": 1.016102731349368, "grad_norm": 0.734239399433136, "learning_rate": 0.000872987158581329, "loss": 3.5991, "step": 14955 }, { "epoch": 1.0164424514200299, "grad_norm": 0.7745044827461243, "learning_rate": 0.0008729446935724963, "loss": 3.6409, "step": 14960 }, { "epoch": 1.0167821714906917, "grad_norm": 0.9256260991096497, "learning_rate": 0.0008729022285636635, "loss": 3.5508, "step": 14965 }, { "epoch": 1.0171218915613534, "grad_norm": 0.8038735389709473, "learning_rate": 0.0008728597635548308, "loss": 3.4701, "step": 14970 }, { "epoch": 1.0174616116320152, "grad_norm": 0.9272863268852234, "learning_rate": 0.0008728172985459982, "loss": 3.5885, "step": 14975 }, { "epoch": 1.017801331702677, "grad_norm": 0.7427102327346802, "learning_rate": 0.0008727748335371654, "loss": 3.6044, "step": 14980 }, { "epoch": 1.0181410517733387, "grad_norm": 0.8176618218421936, "learning_rate": 0.0008727323685283327, "loss": 3.8535, "step": 14985 }, { "epoch": 1.0184807718440005, "grad_norm": 0.8393145799636841, "learning_rate": 0.0008726899035195, "loss": 3.6163, "step": 14990 }, { "epoch": 1.0188204919146624, "grad_norm": 1.0643292665481567, "learning_rate": 0.0008726474385106672, "loss": 3.6557, "step": 14995 }, { "epoch": 1.019160211985324, "grad_norm": 0.7359565496444702, "learning_rate": 0.0008726049735018345, "loss": 3.6195, "step": 15000 }, { "epoch": 1.0194999320559859, "grad_norm": 0.7306636571884155, "learning_rate": 0.0008725625084930017, "loss": 3.4327, "step": 15005 }, { "epoch": 1.0198396521266477, "grad_norm": 0.7349743247032166, "learning_rate": 0.0008725200434841691, "loss": 3.6665, "step": 15010 }, { "epoch": 1.0201793721973094, "grad_norm": 0.6671044230461121, "learning_rate": 0.0008724775784753364, "loss": 3.647, "step": 15015 }, { "epoch": 1.0205190922679712, "grad_norm": 1.2748668193817139, "learning_rate": 0.0008724351134665036, "loss": 3.3336, "step": 15020 }, { "epoch": 1.020858812338633, "grad_norm": 0.9750664234161377, "learning_rate": 0.0008723926484576709, "loss": 3.6654, "step": 15025 }, { "epoch": 1.0211985324092947, "grad_norm": 0.9526344537734985, "learning_rate": 0.0008723501834488382, "loss": 3.3167, "step": 15030 }, { "epoch": 1.0215382524799566, "grad_norm": 0.7460962533950806, "learning_rate": 0.0008723077184400054, "loss": 3.9022, "step": 15035 }, { "epoch": 1.0218779725506182, "grad_norm": 0.7792105674743652, "learning_rate": 0.0008722652534311726, "loss": 3.5521, "step": 15040 }, { "epoch": 1.02221769262128, "grad_norm": 0.7971070408821106, "learning_rate": 0.0008722227884223401, "loss": 3.6695, "step": 15045 }, { "epoch": 1.022557412691942, "grad_norm": 0.9763092398643494, "learning_rate": 0.0008721803234135073, "loss": 3.622, "step": 15050 }, { "epoch": 1.0228971327626035, "grad_norm": 0.7915216684341431, "learning_rate": 0.0008721378584046745, "loss": 3.5439, "step": 15055 }, { "epoch": 1.0232368528332654, "grad_norm": 0.8394269943237305, "learning_rate": 0.0008720953933958419, "loss": 3.582, "step": 15060 }, { "epoch": 1.0235765729039272, "grad_norm": 0.9936326742172241, "learning_rate": 0.0008720529283870091, "loss": 3.5888, "step": 15065 }, { "epoch": 1.0239162929745889, "grad_norm": 0.976253092288971, "learning_rate": 0.0008720104633781763, "loss": 3.5825, "step": 15070 }, { "epoch": 1.0242560130452507, "grad_norm": 1.0398626327514648, "learning_rate": 0.0008719679983693437, "loss": 3.5866, "step": 15075 }, { "epoch": 1.0245957331159126, "grad_norm": 0.7951353788375854, "learning_rate": 0.000871925533360511, "loss": 3.8302, "step": 15080 }, { "epoch": 1.0249354531865742, "grad_norm": 0.7903410196304321, "learning_rate": 0.0008718830683516782, "loss": 3.4905, "step": 15085 }, { "epoch": 1.025275173257236, "grad_norm": 0.8046399354934692, "learning_rate": 0.0008718406033428455, "loss": 3.6093, "step": 15090 }, { "epoch": 1.025614893327898, "grad_norm": 1.0281310081481934, "learning_rate": 0.0008717981383340128, "loss": 3.8742, "step": 15095 }, { "epoch": 1.0259546133985595, "grad_norm": 0.6996908783912659, "learning_rate": 0.00087175567332518, "loss": 3.6023, "step": 15100 }, { "epoch": 1.0262943334692214, "grad_norm": 0.785322904586792, "learning_rate": 0.0008717132083163473, "loss": 3.5748, "step": 15105 }, { "epoch": 1.0266340535398832, "grad_norm": 0.8924939632415771, "learning_rate": 0.0008716707433075146, "loss": 3.6664, "step": 15110 }, { "epoch": 1.0269737736105449, "grad_norm": 1.2890547513961792, "learning_rate": 0.0008716282782986819, "loss": 3.5811, "step": 15115 }, { "epoch": 1.0273134936812067, "grad_norm": 0.6925711035728455, "learning_rate": 0.0008715858132898492, "loss": 3.6863, "step": 15120 }, { "epoch": 1.0276532137518684, "grad_norm": 0.8621943593025208, "learning_rate": 0.0008715433482810165, "loss": 3.5405, "step": 15125 }, { "epoch": 1.0279929338225302, "grad_norm": 0.8417950868606567, "learning_rate": 0.0008715008832721837, "loss": 3.7407, "step": 15130 }, { "epoch": 1.028332653893192, "grad_norm": 0.8515392541885376, "learning_rate": 0.000871458418263351, "loss": 3.6746, "step": 15135 }, { "epoch": 1.0286723739638537, "grad_norm": 0.653509795665741, "learning_rate": 0.0008714159532545182, "loss": 3.699, "step": 15140 }, { "epoch": 1.0290120940345155, "grad_norm": 0.724891722202301, "learning_rate": 0.0008713734882456855, "loss": 3.5059, "step": 15145 }, { "epoch": 1.0293518141051774, "grad_norm": 0.8254064917564392, "learning_rate": 0.0008713310232368529, "loss": 3.757, "step": 15150 }, { "epoch": 1.029691534175839, "grad_norm": 0.7997815608978271, "learning_rate": 0.0008712885582280201, "loss": 3.6241, "step": 15155 }, { "epoch": 1.0300312542465009, "grad_norm": 0.6828070878982544, "learning_rate": 0.0008712460932191874, "loss": 3.7626, "step": 15160 }, { "epoch": 1.0303709743171627, "grad_norm": 0.7282760143280029, "learning_rate": 0.0008712036282103547, "loss": 3.5814, "step": 15165 }, { "epoch": 1.0307106943878244, "grad_norm": 0.627254843711853, "learning_rate": 0.0008711611632015219, "loss": 3.7564, "step": 15170 }, { "epoch": 1.0310504144584862, "grad_norm": 0.9231497049331665, "learning_rate": 0.0008711186981926893, "loss": 3.4034, "step": 15175 }, { "epoch": 1.031390134529148, "grad_norm": 0.9329749345779419, "learning_rate": 0.0008710762331838566, "loss": 3.6913, "step": 15180 }, { "epoch": 1.0317298545998097, "grad_norm": 0.6896570324897766, "learning_rate": 0.0008710337681750238, "loss": 3.6709, "step": 15185 }, { "epoch": 1.0320695746704716, "grad_norm": 0.8407685160636902, "learning_rate": 0.0008709913031661911, "loss": 3.7709, "step": 15190 }, { "epoch": 1.0324092947411334, "grad_norm": 0.798702597618103, "learning_rate": 0.0008709488381573584, "loss": 3.6941, "step": 15195 }, { "epoch": 1.032749014811795, "grad_norm": 0.8163991570472717, "learning_rate": 0.0008709063731485256, "loss": 3.5403, "step": 15200 }, { "epoch": 1.033088734882457, "grad_norm": 0.790916383266449, "learning_rate": 0.0008708639081396929, "loss": 3.7373, "step": 15205 }, { "epoch": 1.0334284549531185, "grad_norm": 0.9498236179351807, "learning_rate": 0.0008708214431308602, "loss": 3.6599, "step": 15210 }, { "epoch": 1.0337681750237804, "grad_norm": 0.6762052178382874, "learning_rate": 0.0008707789781220275, "loss": 3.6779, "step": 15215 }, { "epoch": 1.0341078950944422, "grad_norm": 0.7648082971572876, "learning_rate": 0.0008707365131131948, "loss": 3.8165, "step": 15220 }, { "epoch": 1.0344476151651039, "grad_norm": 0.8258038759231567, "learning_rate": 0.0008706940481043621, "loss": 3.2511, "step": 15225 }, { "epoch": 1.0347873352357657, "grad_norm": 0.8653243780136108, "learning_rate": 0.0008706515830955293, "loss": 3.636, "step": 15230 }, { "epoch": 1.0351270553064276, "grad_norm": 0.7162191271781921, "learning_rate": 0.0008706091180866966, "loss": 3.854, "step": 15235 }, { "epoch": 1.0354667753770892, "grad_norm": 0.857960045337677, "learning_rate": 0.0008705666530778638, "loss": 3.5439, "step": 15240 }, { "epoch": 1.035806495447751, "grad_norm": 1.0894333124160767, "learning_rate": 0.0008705241880690311, "loss": 3.8655, "step": 15245 }, { "epoch": 1.036146215518413, "grad_norm": 0.682677149772644, "learning_rate": 0.0008704817230601985, "loss": 3.7218, "step": 15250 }, { "epoch": 1.0364859355890745, "grad_norm": 0.7342495322227478, "learning_rate": 0.0008704392580513657, "loss": 3.6575, "step": 15255 }, { "epoch": 1.0368256556597364, "grad_norm": 0.8051048517227173, "learning_rate": 0.000870396793042533, "loss": 3.7448, "step": 15260 }, { "epoch": 1.0371653757303982, "grad_norm": 0.6888447999954224, "learning_rate": 0.0008703543280337003, "loss": 3.6063, "step": 15265 }, { "epoch": 1.0375050958010599, "grad_norm": 1.0684417486190796, "learning_rate": 0.0008703118630248675, "loss": 3.6057, "step": 15270 }, { "epoch": 1.0378448158717217, "grad_norm": 0.9023168087005615, "learning_rate": 0.0008702693980160347, "loss": 3.3609, "step": 15275 }, { "epoch": 1.0381845359423836, "grad_norm": 1.139523983001709, "learning_rate": 0.0008702269330072021, "loss": 3.6831, "step": 15280 }, { "epoch": 1.0385242560130452, "grad_norm": 0.860252857208252, "learning_rate": 0.0008701844679983694, "loss": 3.7156, "step": 15285 }, { "epoch": 1.038863976083707, "grad_norm": 0.6485652923583984, "learning_rate": 0.0008701420029895366, "loss": 3.9258, "step": 15290 }, { "epoch": 1.0392036961543687, "grad_norm": 0.9340188503265381, "learning_rate": 0.000870099537980704, "loss": 3.6028, "step": 15295 }, { "epoch": 1.0395434162250305, "grad_norm": 0.7440124750137329, "learning_rate": 0.0008700570729718712, "loss": 3.623, "step": 15300 }, { "epoch": 1.0398831362956924, "grad_norm": 2.039768934249878, "learning_rate": 0.0008700146079630384, "loss": 3.6893, "step": 15305 }, { "epoch": 1.040222856366354, "grad_norm": 1.0455355644226074, "learning_rate": 0.0008699721429542058, "loss": 3.3212, "step": 15310 }, { "epoch": 1.0405625764370159, "grad_norm": 0.9251106381416321, "learning_rate": 0.000869929677945373, "loss": 3.7865, "step": 15315 }, { "epoch": 1.0409022965076777, "grad_norm": 1.0914902687072754, "learning_rate": 0.0008698872129365403, "loss": 3.5292, "step": 15320 }, { "epoch": 1.0412420165783394, "grad_norm": 0.9012385606765747, "learning_rate": 0.0008698447479277077, "loss": 3.5812, "step": 15325 }, { "epoch": 1.0415817366490012, "grad_norm": 0.8007596135139465, "learning_rate": 0.0008698022829188749, "loss": 3.6419, "step": 15330 }, { "epoch": 1.041921456719663, "grad_norm": 0.9741922616958618, "learning_rate": 0.0008697598179100421, "loss": 3.7753, "step": 15335 }, { "epoch": 1.0422611767903247, "grad_norm": 0.7994995713233948, "learning_rate": 0.0008697173529012094, "loss": 3.9091, "step": 15340 }, { "epoch": 1.0426008968609866, "grad_norm": 0.7818124294281006, "learning_rate": 0.0008696748878923767, "loss": 3.6, "step": 15345 }, { "epoch": 1.0429406169316484, "grad_norm": 1.2978562116622925, "learning_rate": 0.0008696324228835439, "loss": 3.4725, "step": 15350 }, { "epoch": 1.04328033700231, "grad_norm": 0.8045937418937683, "learning_rate": 0.0008695899578747113, "loss": 3.3523, "step": 15355 }, { "epoch": 1.043620057072972, "grad_norm": 0.6882285475730896, "learning_rate": 0.0008695474928658786, "loss": 3.6876, "step": 15360 }, { "epoch": 1.0439597771436337, "grad_norm": 5.161287784576416, "learning_rate": 0.0008695050278570458, "loss": 3.3709, "step": 15365 }, { "epoch": 1.0442994972142954, "grad_norm": 0.952535092830658, "learning_rate": 0.0008694625628482131, "loss": 3.7402, "step": 15370 }, { "epoch": 1.0446392172849572, "grad_norm": 0.8316429853439331, "learning_rate": 0.0008694200978393803, "loss": 3.6016, "step": 15375 }, { "epoch": 1.0449789373556189, "grad_norm": 0.7014027237892151, "learning_rate": 0.0008693776328305476, "loss": 3.4609, "step": 15380 }, { "epoch": 1.0453186574262807, "grad_norm": 0.9950093030929565, "learning_rate": 0.0008693351678217149, "loss": 3.4544, "step": 15385 }, { "epoch": 1.0456583774969426, "grad_norm": 0.8154751658439636, "learning_rate": 0.0008692927028128822, "loss": 3.4839, "step": 15390 }, { "epoch": 1.0459980975676042, "grad_norm": 0.7050445079803467, "learning_rate": 0.0008692502378040495, "loss": 3.4533, "step": 15395 }, { "epoch": 1.046337817638266, "grad_norm": 1.300398588180542, "learning_rate": 0.0008692077727952168, "loss": 3.4721, "step": 15400 }, { "epoch": 1.046677537708928, "grad_norm": 0.9842777848243713, "learning_rate": 0.000869165307786384, "loss": 3.5049, "step": 15405 }, { "epoch": 1.0470172577795895, "grad_norm": 1.1697839498519897, "learning_rate": 0.0008691228427775513, "loss": 3.6207, "step": 15410 }, { "epoch": 1.0473569778502514, "grad_norm": 0.7548295855522156, "learning_rate": 0.0008690803777687186, "loss": 3.9181, "step": 15415 }, { "epoch": 1.0476966979209132, "grad_norm": 0.9170308709144592, "learning_rate": 0.0008690379127598858, "loss": 3.434, "step": 15420 }, { "epoch": 1.0480364179915749, "grad_norm": 0.6952283978462219, "learning_rate": 0.0008689954477510532, "loss": 3.5821, "step": 15425 }, { "epoch": 1.0483761380622367, "grad_norm": 0.9291009306907654, "learning_rate": 0.0008689529827422205, "loss": 3.417, "step": 15430 }, { "epoch": 1.0487158581328986, "grad_norm": 0.777187705039978, "learning_rate": 0.0008689105177333877, "loss": 3.5713, "step": 15435 }, { "epoch": 1.0490555782035602, "grad_norm": 0.6541638374328613, "learning_rate": 0.0008688680527245549, "loss": 3.5258, "step": 15440 }, { "epoch": 1.049395298274222, "grad_norm": 0.8184323310852051, "learning_rate": 0.0008688255877157223, "loss": 3.4299, "step": 15445 }, { "epoch": 1.049735018344884, "grad_norm": 0.8719306588172913, "learning_rate": 0.0008687831227068895, "loss": 3.761, "step": 15450 }, { "epoch": 1.0500747384155455, "grad_norm": 0.8403478264808655, "learning_rate": 0.0008687406576980567, "loss": 3.6937, "step": 15455 }, { "epoch": 1.0504144584862074, "grad_norm": 1.1352113485336304, "learning_rate": 0.0008686981926892242, "loss": 3.3911, "step": 15460 }, { "epoch": 1.050754178556869, "grad_norm": 0.8563434481620789, "learning_rate": 0.0008686557276803914, "loss": 3.8448, "step": 15465 }, { "epoch": 1.0510938986275309, "grad_norm": 0.8117370009422302, "learning_rate": 0.0008686132626715586, "loss": 3.8048, "step": 15470 }, { "epoch": 1.0514336186981927, "grad_norm": 0.8282987475395203, "learning_rate": 0.000868570797662726, "loss": 3.4767, "step": 15475 }, { "epoch": 1.0517733387688544, "grad_norm": 0.7735786437988281, "learning_rate": 0.0008685283326538932, "loss": 3.7295, "step": 15480 }, { "epoch": 1.0521130588395162, "grad_norm": 0.7515607476234436, "learning_rate": 0.0008684858676450604, "loss": 3.7812, "step": 15485 }, { "epoch": 1.052452778910178, "grad_norm": 0.6671996116638184, "learning_rate": 0.0008684434026362277, "loss": 3.7486, "step": 15490 }, { "epoch": 1.0527924989808397, "grad_norm": 0.9180407524108887, "learning_rate": 0.0008684009376273951, "loss": 3.5754, "step": 15495 }, { "epoch": 1.0531322190515016, "grad_norm": 0.7398190498352051, "learning_rate": 0.0008683584726185623, "loss": 3.4129, "step": 15500 }, { "epoch": 1.0534719391221634, "grad_norm": 0.8888563513755798, "learning_rate": 0.0008683160076097296, "loss": 3.7205, "step": 15505 }, { "epoch": 1.053811659192825, "grad_norm": 0.6766151785850525, "learning_rate": 0.0008682735426008969, "loss": 3.895, "step": 15510 }, { "epoch": 1.054151379263487, "grad_norm": 0.8470721244812012, "learning_rate": 0.0008682310775920642, "loss": 3.5278, "step": 15515 }, { "epoch": 1.0544910993341488, "grad_norm": 0.8622351884841919, "learning_rate": 0.0008681886125832314, "loss": 3.7996, "step": 15520 }, { "epoch": 1.0548308194048104, "grad_norm": 0.9552620053291321, "learning_rate": 0.0008681461475743986, "loss": 3.6605, "step": 15525 }, { "epoch": 1.0551705394754722, "grad_norm": 0.6540444493293762, "learning_rate": 0.0008681036825655661, "loss": 3.5894, "step": 15530 }, { "epoch": 1.055510259546134, "grad_norm": 0.9241417050361633, "learning_rate": 0.0008680612175567333, "loss": 3.8359, "step": 15535 }, { "epoch": 1.0558499796167957, "grad_norm": 0.8745183944702148, "learning_rate": 0.0008680187525479005, "loss": 3.7336, "step": 15540 }, { "epoch": 1.0561896996874576, "grad_norm": 0.773281455039978, "learning_rate": 0.0008679762875390679, "loss": 3.6772, "step": 15545 }, { "epoch": 1.0565294197581192, "grad_norm": 0.8701076507568359, "learning_rate": 0.0008679338225302351, "loss": 3.7153, "step": 15550 }, { "epoch": 1.056869139828781, "grad_norm": 0.7325215339660645, "learning_rate": 0.0008678913575214023, "loss": 3.4492, "step": 15555 }, { "epoch": 1.057208859899443, "grad_norm": 0.8060164451599121, "learning_rate": 0.0008678488925125697, "loss": 3.3348, "step": 15560 }, { "epoch": 1.0575485799701045, "grad_norm": 0.8601083755493164, "learning_rate": 0.000867806427503737, "loss": 3.8161, "step": 15565 }, { "epoch": 1.0578883000407664, "grad_norm": 1.020885944366455, "learning_rate": 0.0008677639624949042, "loss": 3.6374, "step": 15570 }, { "epoch": 1.0582280201114282, "grad_norm": 1.8093150854110718, "learning_rate": 0.0008677214974860716, "loss": 3.4302, "step": 15575 }, { "epoch": 1.0585677401820899, "grad_norm": 0.7937633395195007, "learning_rate": 0.0008676790324772388, "loss": 3.638, "step": 15580 }, { "epoch": 1.0589074602527517, "grad_norm": 0.7711955904960632, "learning_rate": 0.000867636567468406, "loss": 3.7284, "step": 15585 }, { "epoch": 1.0592471803234136, "grad_norm": 0.7265394926071167, "learning_rate": 0.0008675941024595733, "loss": 3.7851, "step": 15590 }, { "epoch": 1.0595869003940752, "grad_norm": 0.8184247612953186, "learning_rate": 0.0008675516374507406, "loss": 3.7141, "step": 15595 }, { "epoch": 1.059926620464737, "grad_norm": 0.9044502973556519, "learning_rate": 0.0008675091724419079, "loss": 3.5797, "step": 15600 }, { "epoch": 1.060266340535399, "grad_norm": 0.6830253005027771, "learning_rate": 0.0008674667074330752, "loss": 3.5252, "step": 15605 }, { "epoch": 1.0606060606060606, "grad_norm": 0.8944404125213623, "learning_rate": 0.0008674242424242425, "loss": 3.6994, "step": 15610 }, { "epoch": 1.0609457806767224, "grad_norm": 0.7353104948997498, "learning_rate": 0.0008673817774154097, "loss": 3.7032, "step": 15615 }, { "epoch": 1.0612855007473843, "grad_norm": 0.7380582094192505, "learning_rate": 0.000867339312406577, "loss": 3.9983, "step": 15620 }, { "epoch": 1.061625220818046, "grad_norm": 0.8265982866287231, "learning_rate": 0.0008672968473977442, "loss": 3.597, "step": 15625 }, { "epoch": 1.0619649408887077, "grad_norm": 0.7482130527496338, "learning_rate": 0.0008672543823889115, "loss": 3.7222, "step": 15630 }, { "epoch": 1.0623046609593694, "grad_norm": 1.041133999824524, "learning_rate": 0.0008672119173800789, "loss": 3.7337, "step": 15635 }, { "epoch": 1.0626443810300312, "grad_norm": 1.0080825090408325, "learning_rate": 0.0008671694523712461, "loss": 3.4739, "step": 15640 }, { "epoch": 1.062984101100693, "grad_norm": 0.8892780542373657, "learning_rate": 0.0008671269873624134, "loss": 3.5258, "step": 15645 }, { "epoch": 1.0633238211713547, "grad_norm": 0.9092037677764893, "learning_rate": 0.0008670845223535807, "loss": 3.6483, "step": 15650 }, { "epoch": 1.0636635412420166, "grad_norm": 0.9016504287719727, "learning_rate": 0.0008670420573447479, "loss": 3.635, "step": 15655 }, { "epoch": 1.0640032613126784, "grad_norm": 1.3748823404312134, "learning_rate": 0.0008669995923359152, "loss": 3.5499, "step": 15660 }, { "epoch": 1.06434298138334, "grad_norm": 0.6727843880653381, "learning_rate": 0.0008669571273270826, "loss": 3.5387, "step": 15665 }, { "epoch": 1.064682701454002, "grad_norm": 0.6666795015335083, "learning_rate": 0.0008669146623182498, "loss": 3.4707, "step": 15670 }, { "epoch": 1.0650224215246638, "grad_norm": 1.2119091749191284, "learning_rate": 0.000866872197309417, "loss": 3.6234, "step": 15675 }, { "epoch": 1.0653621415953254, "grad_norm": 0.8031150698661804, "learning_rate": 0.0008668297323005844, "loss": 3.8277, "step": 15680 }, { "epoch": 1.0657018616659872, "grad_norm": 0.8668376207351685, "learning_rate": 0.0008667872672917516, "loss": 3.3959, "step": 15685 }, { "epoch": 1.066041581736649, "grad_norm": 0.6989830732345581, "learning_rate": 0.0008667448022829188, "loss": 3.8552, "step": 15690 }, { "epoch": 1.0663813018073107, "grad_norm": 1.4462339878082275, "learning_rate": 0.0008667023372740862, "loss": 3.6881, "step": 15695 }, { "epoch": 1.0667210218779726, "grad_norm": 0.9396673440933228, "learning_rate": 0.0008666598722652535, "loss": 3.6808, "step": 15700 }, { "epoch": 1.0670607419486344, "grad_norm": 1.7965008020401, "learning_rate": 0.0008666174072564207, "loss": 3.2489, "step": 15705 }, { "epoch": 1.067400462019296, "grad_norm": 0.797441303730011, "learning_rate": 0.0008665749422475881, "loss": 3.6139, "step": 15710 }, { "epoch": 1.067740182089958, "grad_norm": 0.7148320078849792, "learning_rate": 0.0008665324772387553, "loss": 3.5044, "step": 15715 }, { "epoch": 1.0680799021606195, "grad_norm": 0.7780255675315857, "learning_rate": 0.0008664900122299225, "loss": 3.576, "step": 15720 }, { "epoch": 1.0684196222312814, "grad_norm": 0.8445881009101868, "learning_rate": 0.0008664475472210898, "loss": 3.6259, "step": 15725 }, { "epoch": 1.0687593423019432, "grad_norm": 0.855167031288147, "learning_rate": 0.0008664050822122571, "loss": 3.6185, "step": 15730 }, { "epoch": 1.0690990623726049, "grad_norm": 0.6781607866287231, "learning_rate": 0.0008663626172034244, "loss": 3.8138, "step": 15735 }, { "epoch": 1.0694387824432667, "grad_norm": 0.7494103312492371, "learning_rate": 0.0008663201521945917, "loss": 3.7026, "step": 15740 }, { "epoch": 1.0697785025139286, "grad_norm": 0.9175722002983093, "learning_rate": 0.000866277687185759, "loss": 3.4927, "step": 15745 }, { "epoch": 1.0701182225845902, "grad_norm": 0.6961216330528259, "learning_rate": 0.0008662352221769262, "loss": 3.3283, "step": 15750 }, { "epoch": 1.070457942655252, "grad_norm": 0.8230640888214111, "learning_rate": 0.0008661927571680935, "loss": 3.8827, "step": 15755 }, { "epoch": 1.070797662725914, "grad_norm": 0.9357647895812988, "learning_rate": 0.0008661502921592608, "loss": 3.5325, "step": 15760 }, { "epoch": 1.0711373827965756, "grad_norm": 0.840833306312561, "learning_rate": 0.000866107827150428, "loss": 3.6716, "step": 15765 }, { "epoch": 1.0714771028672374, "grad_norm": 0.8124556541442871, "learning_rate": 0.0008660653621415954, "loss": 3.4026, "step": 15770 }, { "epoch": 1.0718168229378993, "grad_norm": 0.7972529530525208, "learning_rate": 0.0008660228971327626, "loss": 3.6875, "step": 15775 }, { "epoch": 1.072156543008561, "grad_norm": 1.0712969303131104, "learning_rate": 0.0008659804321239299, "loss": 3.602, "step": 15780 }, { "epoch": 1.0724962630792227, "grad_norm": 0.8613400459289551, "learning_rate": 0.0008659379671150972, "loss": 3.4832, "step": 15785 }, { "epoch": 1.0728359831498846, "grad_norm": 0.9217832088470459, "learning_rate": 0.0008658955021062644, "loss": 3.6408, "step": 15790 }, { "epoch": 1.0731757032205462, "grad_norm": 0.8431523442268372, "learning_rate": 0.0008658530370974317, "loss": 3.8588, "step": 15795 }, { "epoch": 1.073515423291208, "grad_norm": 0.9289519786834717, "learning_rate": 0.000865810572088599, "loss": 3.6217, "step": 15800 }, { "epoch": 1.07385514336187, "grad_norm": 0.7121431231498718, "learning_rate": 0.0008657681070797663, "loss": 3.655, "step": 15805 }, { "epoch": 1.0741948634325316, "grad_norm": 0.7740139365196228, "learning_rate": 0.0008657256420709336, "loss": 3.524, "step": 15810 }, { "epoch": 1.0745345835031934, "grad_norm": 0.9154728651046753, "learning_rate": 0.0008656831770621009, "loss": 3.6548, "step": 15815 }, { "epoch": 1.074874303573855, "grad_norm": 0.6473360657691956, "learning_rate": 0.0008656407120532681, "loss": 3.6962, "step": 15820 }, { "epoch": 1.075214023644517, "grad_norm": 0.8365759253501892, "learning_rate": 0.0008655982470444353, "loss": 3.6452, "step": 15825 }, { "epoch": 1.0755537437151788, "grad_norm": 0.7894644141197205, "learning_rate": 0.0008655557820356027, "loss": 3.656, "step": 15830 }, { "epoch": 1.0758934637858404, "grad_norm": 0.7554925084114075, "learning_rate": 0.0008655133170267699, "loss": 3.4169, "step": 15835 }, { "epoch": 1.0762331838565022, "grad_norm": 0.682033121585846, "learning_rate": 0.0008654708520179372, "loss": 3.6037, "step": 15840 }, { "epoch": 1.076572903927164, "grad_norm": 0.9009461998939514, "learning_rate": 0.0008654283870091046, "loss": 3.1913, "step": 15845 }, { "epoch": 1.0769126239978257, "grad_norm": 0.791874349117279, "learning_rate": 0.0008653859220002718, "loss": 3.3567, "step": 15850 }, { "epoch": 1.0772523440684876, "grad_norm": 0.724998950958252, "learning_rate": 0.0008653434569914391, "loss": 3.6087, "step": 15855 }, { "epoch": 1.0775920641391494, "grad_norm": 0.8859862685203552, "learning_rate": 0.0008653009919826064, "loss": 3.8405, "step": 15860 }, { "epoch": 1.077931784209811, "grad_norm": 0.8791608810424805, "learning_rate": 0.0008652585269737736, "loss": 3.361, "step": 15865 }, { "epoch": 1.078271504280473, "grad_norm": 0.826759934425354, "learning_rate": 0.0008652160619649409, "loss": 3.712, "step": 15870 }, { "epoch": 1.0786112243511348, "grad_norm": 0.8054351210594177, "learning_rate": 0.0008651735969561082, "loss": 3.5739, "step": 15875 }, { "epoch": 1.0789509444217964, "grad_norm": 0.9845974445343018, "learning_rate": 0.0008651311319472755, "loss": 3.4905, "step": 15880 }, { "epoch": 1.0792906644924583, "grad_norm": 0.775550901889801, "learning_rate": 0.0008650886669384428, "loss": 3.7116, "step": 15885 }, { "epoch": 1.0796303845631199, "grad_norm": 0.6726576685905457, "learning_rate": 0.00086504620192961, "loss": 3.5282, "step": 15890 }, { "epoch": 1.0799701046337817, "grad_norm": 0.8075435757637024, "learning_rate": 0.0008650037369207773, "loss": 3.7082, "step": 15895 }, { "epoch": 1.0803098247044436, "grad_norm": 0.9395415186882019, "learning_rate": 0.0008649612719119446, "loss": 3.2468, "step": 15900 }, { "epoch": 1.0806495447751052, "grad_norm": 0.7269135117530823, "learning_rate": 0.0008649188069031118, "loss": 3.8944, "step": 15905 }, { "epoch": 1.080989264845767, "grad_norm": 0.8821495175361633, "learning_rate": 0.0008648763418942792, "loss": 3.8836, "step": 15910 }, { "epoch": 1.081328984916429, "grad_norm": 0.7838165760040283, "learning_rate": 0.0008648338768854465, "loss": 3.6889, "step": 15915 }, { "epoch": 1.0816687049870906, "grad_norm": 0.8660455346107483, "learning_rate": 0.0008647914118766137, "loss": 3.5453, "step": 15920 }, { "epoch": 1.0820084250577524, "grad_norm": 0.7564034461975098, "learning_rate": 0.0008647489468677809, "loss": 3.6311, "step": 15925 }, { "epoch": 1.0823481451284143, "grad_norm": 0.7242792844772339, "learning_rate": 0.0008647064818589483, "loss": 3.532, "step": 15930 }, { "epoch": 1.082687865199076, "grad_norm": 0.7514887452125549, "learning_rate": 0.0008646640168501155, "loss": 3.4765, "step": 15935 }, { "epoch": 1.0830275852697377, "grad_norm": 1.032804250717163, "learning_rate": 0.0008646215518412827, "loss": 3.6304, "step": 15940 }, { "epoch": 1.0833673053403996, "grad_norm": 0.8599602580070496, "learning_rate": 0.0008645790868324502, "loss": 3.6612, "step": 15945 }, { "epoch": 1.0837070254110612, "grad_norm": 0.7160202264785767, "learning_rate": 0.0008645366218236174, "loss": 3.5995, "step": 15950 }, { "epoch": 1.084046745481723, "grad_norm": 0.7802814245223999, "learning_rate": 0.0008644941568147846, "loss": 3.669, "step": 15955 }, { "epoch": 1.084386465552385, "grad_norm": 0.7167781591415405, "learning_rate": 0.000864451691805952, "loss": 3.5472, "step": 15960 }, { "epoch": 1.0847261856230466, "grad_norm": 0.7563731670379639, "learning_rate": 0.0008644092267971192, "loss": 3.6312, "step": 15965 }, { "epoch": 1.0850659056937084, "grad_norm": 0.7367556691169739, "learning_rate": 0.0008643667617882864, "loss": 3.5568, "step": 15970 }, { "epoch": 1.0854056257643703, "grad_norm": 0.7233540415763855, "learning_rate": 0.0008643242967794537, "loss": 3.7646, "step": 15975 }, { "epoch": 1.085745345835032, "grad_norm": 0.704897940158844, "learning_rate": 0.0008642818317706211, "loss": 3.7845, "step": 15980 }, { "epoch": 1.0860850659056938, "grad_norm": 0.8350036144256592, "learning_rate": 0.0008642393667617883, "loss": 3.6337, "step": 15985 }, { "epoch": 1.0864247859763554, "grad_norm": 0.6900033950805664, "learning_rate": 0.0008641969017529556, "loss": 3.5762, "step": 15990 }, { "epoch": 1.0867645060470172, "grad_norm": 0.7977431416511536, "learning_rate": 0.0008641544367441229, "loss": 3.6783, "step": 15995 }, { "epoch": 1.087104226117679, "grad_norm": 0.8137974143028259, "learning_rate": 0.0008641119717352901, "loss": 3.7409, "step": 16000 }, { "epoch": 1.0874439461883407, "grad_norm": 0.8998544812202454, "learning_rate": 0.0008640695067264574, "loss": 3.6384, "step": 16005 }, { "epoch": 1.0877836662590026, "grad_norm": 0.7980680465698242, "learning_rate": 0.0008640270417176246, "loss": 3.8776, "step": 16010 }, { "epoch": 1.0881233863296644, "grad_norm": 0.6561006903648376, "learning_rate": 0.000863984576708792, "loss": 3.4956, "step": 16015 }, { "epoch": 1.088463106400326, "grad_norm": 0.5969799160957336, "learning_rate": 0.0008639421116999593, "loss": 3.5969, "step": 16020 }, { "epoch": 1.088802826470988, "grad_norm": 0.824756383895874, "learning_rate": 0.0008638996466911265, "loss": 3.6241, "step": 16025 }, { "epoch": 1.0891425465416498, "grad_norm": 0.8661601543426514, "learning_rate": 0.0008638571816822938, "loss": 3.6212, "step": 16030 }, { "epoch": 1.0894822666123114, "grad_norm": 0.8394104242324829, "learning_rate": 0.0008638147166734611, "loss": 3.5966, "step": 16035 }, { "epoch": 1.0898219866829733, "grad_norm": 0.8741182684898376, "learning_rate": 0.0008637722516646283, "loss": 3.4223, "step": 16040 }, { "epoch": 1.090161706753635, "grad_norm": 0.7803946733474731, "learning_rate": 0.0008637297866557956, "loss": 3.4778, "step": 16045 }, { "epoch": 1.0905014268242967, "grad_norm": 0.6031648516654968, "learning_rate": 0.000863687321646963, "loss": 3.862, "step": 16050 }, { "epoch": 1.0908411468949586, "grad_norm": 0.6892852783203125, "learning_rate": 0.0008636448566381302, "loss": 3.5967, "step": 16055 }, { "epoch": 1.0911808669656202, "grad_norm": 0.8592513799667358, "learning_rate": 0.0008636023916292974, "loss": 3.3707, "step": 16060 }, { "epoch": 1.091520587036282, "grad_norm": 0.8277498483657837, "learning_rate": 0.0008635599266204648, "loss": 3.6386, "step": 16065 }, { "epoch": 1.091860307106944, "grad_norm": 2.0371274948120117, "learning_rate": 0.000863517461611632, "loss": 3.5039, "step": 16070 }, { "epoch": 1.0922000271776056, "grad_norm": 0.7140267491340637, "learning_rate": 0.0008634749966027992, "loss": 3.6246, "step": 16075 }, { "epoch": 1.0925397472482674, "grad_norm": 0.6711429357528687, "learning_rate": 0.0008634325315939666, "loss": 3.5917, "step": 16080 }, { "epoch": 1.0928794673189293, "grad_norm": 0.8528093099594116, "learning_rate": 0.0008633900665851339, "loss": 3.5662, "step": 16085 }, { "epoch": 1.093219187389591, "grad_norm": 1.503667950630188, "learning_rate": 0.0008633476015763011, "loss": 3.3591, "step": 16090 }, { "epoch": 1.0935589074602527, "grad_norm": 0.8576028943061829, "learning_rate": 0.0008633051365674685, "loss": 3.7892, "step": 16095 }, { "epoch": 1.0938986275309146, "grad_norm": 1.0386401414871216, "learning_rate": 0.0008632626715586357, "loss": 3.7382, "step": 16100 }, { "epoch": 1.0942383476015762, "grad_norm": 0.777122437953949, "learning_rate": 0.0008632202065498029, "loss": 3.5138, "step": 16105 }, { "epoch": 1.094578067672238, "grad_norm": 0.8399432301521301, "learning_rate": 0.0008631777415409702, "loss": 3.5598, "step": 16110 }, { "epoch": 1.0949177877429, "grad_norm": 0.9027761816978455, "learning_rate": 0.0008631352765321375, "loss": 3.6889, "step": 16115 }, { "epoch": 1.0952575078135616, "grad_norm": 0.73099684715271, "learning_rate": 0.0008630928115233048, "loss": 3.6822, "step": 16120 }, { "epoch": 1.0955972278842234, "grad_norm": 0.887734055519104, "learning_rate": 0.0008630503465144721, "loss": 3.4904, "step": 16125 }, { "epoch": 1.0959369479548853, "grad_norm": 0.7868804335594177, "learning_rate": 0.0008630078815056394, "loss": 3.8223, "step": 16130 }, { "epoch": 1.096276668025547, "grad_norm": 1.0577092170715332, "learning_rate": 0.0008629654164968066, "loss": 3.4639, "step": 16135 }, { "epoch": 1.0966163880962088, "grad_norm": 0.6253250241279602, "learning_rate": 0.0008629229514879739, "loss": 3.75, "step": 16140 }, { "epoch": 1.0969561081668706, "grad_norm": 0.8179384469985962, "learning_rate": 0.0008628804864791412, "loss": 3.5636, "step": 16145 }, { "epoch": 1.0972958282375322, "grad_norm": 0.7240017056465149, "learning_rate": 0.0008628380214703084, "loss": 3.6615, "step": 16150 }, { "epoch": 1.097635548308194, "grad_norm": 0.784687340259552, "learning_rate": 0.0008627955564614758, "loss": 3.7314, "step": 16155 }, { "epoch": 1.0979752683788557, "grad_norm": 0.6818301677703857, "learning_rate": 0.000862753091452643, "loss": 3.5592, "step": 16160 }, { "epoch": 1.0983149884495176, "grad_norm": 0.9793298244476318, "learning_rate": 0.0008627106264438103, "loss": 3.5025, "step": 16165 }, { "epoch": 1.0986547085201794, "grad_norm": 1.1576931476593018, "learning_rate": 0.0008626681614349776, "loss": 3.6291, "step": 16170 }, { "epoch": 1.098994428590841, "grad_norm": 1.016464114189148, "learning_rate": 0.0008626256964261448, "loss": 3.724, "step": 16175 }, { "epoch": 1.099334148661503, "grad_norm": 0.8881516456604004, "learning_rate": 0.0008625832314173121, "loss": 3.6412, "step": 16180 }, { "epoch": 1.0996738687321648, "grad_norm": 0.6917318105697632, "learning_rate": 0.0008625407664084794, "loss": 3.7253, "step": 16185 }, { "epoch": 1.1000135888028264, "grad_norm": 0.8780159950256348, "learning_rate": 0.0008624983013996467, "loss": 3.4274, "step": 16190 }, { "epoch": 1.1003533088734883, "grad_norm": 0.7432769536972046, "learning_rate": 0.0008624558363908141, "loss": 3.6838, "step": 16195 }, { "epoch": 1.10069302894415, "grad_norm": 0.802573025226593, "learning_rate": 0.0008624133713819813, "loss": 3.6626, "step": 16200 }, { "epoch": 1.1010327490148117, "grad_norm": 0.7606637477874756, "learning_rate": 0.0008623709063731485, "loss": 3.6076, "step": 16205 }, { "epoch": 1.1013724690854736, "grad_norm": 0.7026821374893188, "learning_rate": 0.0008623284413643159, "loss": 3.4194, "step": 16210 }, { "epoch": 1.1017121891561354, "grad_norm": 0.6719335913658142, "learning_rate": 0.0008622859763554831, "loss": 3.4139, "step": 16215 }, { "epoch": 1.102051909226797, "grad_norm": 0.8041277527809143, "learning_rate": 0.0008622435113466503, "loss": 3.7013, "step": 16220 }, { "epoch": 1.102391629297459, "grad_norm": 0.8182024359703064, "learning_rate": 0.0008622010463378177, "loss": 3.8294, "step": 16225 }, { "epoch": 1.1027313493681206, "grad_norm": 0.7067434191703796, "learning_rate": 0.000862158581328985, "loss": 3.6723, "step": 16230 }, { "epoch": 1.1030710694387824, "grad_norm": 0.9231346845626831, "learning_rate": 0.0008621161163201522, "loss": 3.6599, "step": 16235 }, { "epoch": 1.1034107895094443, "grad_norm": 0.6359360218048096, "learning_rate": 0.0008620736513113195, "loss": 3.7619, "step": 16240 }, { "epoch": 1.103750509580106, "grad_norm": 0.7554166316986084, "learning_rate": 0.0008620311863024868, "loss": 3.7817, "step": 16245 }, { "epoch": 1.1040902296507678, "grad_norm": 0.7820613980293274, "learning_rate": 0.000861988721293654, "loss": 3.7275, "step": 16250 }, { "epoch": 1.1044299497214296, "grad_norm": 0.8319656252861023, "learning_rate": 0.0008619462562848214, "loss": 3.65, "step": 16255 }, { "epoch": 1.1047696697920912, "grad_norm": 21.17509651184082, "learning_rate": 0.0008619037912759887, "loss": 3.5016, "step": 16260 }, { "epoch": 1.105109389862753, "grad_norm": 0.8816961646080017, "learning_rate": 0.0008618613262671559, "loss": 3.4664, "step": 16265 }, { "epoch": 1.105449109933415, "grad_norm": 0.9653134942054749, "learning_rate": 0.0008618188612583232, "loss": 3.4988, "step": 16270 }, { "epoch": 1.1057888300040766, "grad_norm": 0.7862028479576111, "learning_rate": 0.0008617763962494904, "loss": 3.6665, "step": 16275 }, { "epoch": 1.1061285500747384, "grad_norm": 0.9713080525398254, "learning_rate": 0.0008617339312406577, "loss": 3.5714, "step": 16280 }, { "epoch": 1.1064682701454003, "grad_norm": 0.8657552599906921, "learning_rate": 0.000861691466231825, "loss": 3.5994, "step": 16285 }, { "epoch": 1.106807990216062, "grad_norm": 0.8849959969520569, "learning_rate": 0.0008616490012229923, "loss": 3.5815, "step": 16290 }, { "epoch": 1.1071477102867238, "grad_norm": 0.8014518618583679, "learning_rate": 0.0008616065362141596, "loss": 3.2865, "step": 16295 }, { "epoch": 1.1074874303573856, "grad_norm": 0.9684229493141174, "learning_rate": 0.0008615640712053269, "loss": 3.374, "step": 16300 }, { "epoch": 1.1078271504280472, "grad_norm": 1.1310782432556152, "learning_rate": 0.0008615216061964941, "loss": 3.5702, "step": 16305 }, { "epoch": 1.108166870498709, "grad_norm": 0.7408681511878967, "learning_rate": 0.0008614791411876613, "loss": 3.8609, "step": 16310 }, { "epoch": 1.108506590569371, "grad_norm": 0.799790620803833, "learning_rate": 0.0008614366761788287, "loss": 3.4911, "step": 16315 }, { "epoch": 1.1088463106400326, "grad_norm": 0.7795165181159973, "learning_rate": 0.0008613942111699959, "loss": 3.7212, "step": 16320 }, { "epoch": 1.1091860307106944, "grad_norm": 0.6737672090530396, "learning_rate": 0.0008613517461611632, "loss": 3.7207, "step": 16325 }, { "epoch": 1.109525750781356, "grad_norm": 0.9147469997406006, "learning_rate": 0.0008613092811523306, "loss": 3.572, "step": 16330 }, { "epoch": 1.109865470852018, "grad_norm": 0.5638120770454407, "learning_rate": 0.0008612668161434978, "loss": 3.7112, "step": 16335 }, { "epoch": 1.1102051909226798, "grad_norm": 0.8296770453453064, "learning_rate": 0.000861224351134665, "loss": 3.46, "step": 16340 }, { "epoch": 1.1105449109933414, "grad_norm": 0.8826504349708557, "learning_rate": 0.0008611818861258324, "loss": 3.7987, "step": 16345 }, { "epoch": 1.1108846310640033, "grad_norm": 0.7032650113105774, "learning_rate": 0.0008611394211169996, "loss": 3.6871, "step": 16350 }, { "epoch": 1.1112243511346651, "grad_norm": 1.0232462882995605, "learning_rate": 0.0008610969561081668, "loss": 3.6665, "step": 16355 }, { "epoch": 1.1115640712053267, "grad_norm": 0.7778091430664062, "learning_rate": 0.0008610544910993343, "loss": 3.4323, "step": 16360 }, { "epoch": 1.1119037912759886, "grad_norm": 0.7016257643699646, "learning_rate": 0.0008610120260905015, "loss": 3.6359, "step": 16365 }, { "epoch": 1.1122435113466504, "grad_norm": 0.8150584697723389, "learning_rate": 0.0008609695610816687, "loss": 3.5146, "step": 16370 }, { "epoch": 1.112583231417312, "grad_norm": 0.7057853937149048, "learning_rate": 0.000860927096072836, "loss": 3.3827, "step": 16375 }, { "epoch": 1.112922951487974, "grad_norm": 0.9030365943908691, "learning_rate": 0.0008608846310640033, "loss": 3.7423, "step": 16380 }, { "epoch": 1.1132626715586358, "grad_norm": 0.8879201412200928, "learning_rate": 0.0008608421660551705, "loss": 3.6915, "step": 16385 }, { "epoch": 1.1136023916292974, "grad_norm": 0.768269956111908, "learning_rate": 0.0008607997010463378, "loss": 3.8513, "step": 16390 }, { "epoch": 1.1139421116999593, "grad_norm": 0.7520266771316528, "learning_rate": 0.0008607572360375052, "loss": 3.8602, "step": 16395 }, { "epoch": 1.114281831770621, "grad_norm": 0.7475628852844238, "learning_rate": 0.0008607147710286724, "loss": 3.4871, "step": 16400 }, { "epoch": 1.1146215518412828, "grad_norm": 1.7916303873062134, "learning_rate": 0.0008606723060198397, "loss": 3.5601, "step": 16405 }, { "epoch": 1.1149612719119446, "grad_norm": 0.7615216374397278, "learning_rate": 0.000860629841011007, "loss": 3.7309, "step": 16410 }, { "epoch": 1.1153009919826062, "grad_norm": 0.8642655611038208, "learning_rate": 0.0008605873760021742, "loss": 3.6074, "step": 16415 }, { "epoch": 1.115640712053268, "grad_norm": 0.833690345287323, "learning_rate": 0.0008605449109933415, "loss": 3.7721, "step": 16420 }, { "epoch": 1.11598043212393, "grad_norm": 0.6924107670783997, "learning_rate": 0.0008605024459845087, "loss": 3.4532, "step": 16425 }, { "epoch": 1.1163201521945916, "grad_norm": 0.9325452446937561, "learning_rate": 0.0008604599809756761, "loss": 3.7371, "step": 16430 }, { "epoch": 1.1166598722652534, "grad_norm": 0.9256713390350342, "learning_rate": 0.0008604175159668434, "loss": 3.524, "step": 16435 }, { "epoch": 1.1169995923359153, "grad_norm": 0.8062615990638733, "learning_rate": 0.0008603750509580106, "loss": 3.6315, "step": 16440 }, { "epoch": 1.117339312406577, "grad_norm": 0.9539016485214233, "learning_rate": 0.0008603325859491779, "loss": 3.5954, "step": 16445 }, { "epoch": 1.1176790324772388, "grad_norm": 0.9835814237594604, "learning_rate": 0.0008602901209403452, "loss": 3.9943, "step": 16450 }, { "epoch": 1.1180187525479006, "grad_norm": 0.7676118016242981, "learning_rate": 0.0008602476559315124, "loss": 3.6656, "step": 16455 }, { "epoch": 1.1183584726185622, "grad_norm": 1.1830953359603882, "learning_rate": 0.0008602051909226796, "loss": 3.6766, "step": 16460 }, { "epoch": 1.118698192689224, "grad_norm": 0.7889232039451599, "learning_rate": 0.0008601627259138471, "loss": 3.8535, "step": 16465 }, { "epoch": 1.119037912759886, "grad_norm": 0.9148766398429871, "learning_rate": 0.0008601202609050143, "loss": 3.3512, "step": 16470 }, { "epoch": 1.1193776328305476, "grad_norm": 1.1237456798553467, "learning_rate": 0.0008600777958961815, "loss": 3.6986, "step": 16475 }, { "epoch": 1.1197173529012094, "grad_norm": 0.7358617186546326, "learning_rate": 0.0008600353308873489, "loss": 3.6707, "step": 16480 }, { "epoch": 1.1200570729718713, "grad_norm": 1.004721760749817, "learning_rate": 0.0008599928658785161, "loss": 3.5836, "step": 16485 }, { "epoch": 1.120396793042533, "grad_norm": 0.6926807761192322, "learning_rate": 0.0008599504008696833, "loss": 3.5393, "step": 16490 }, { "epoch": 1.1207365131131948, "grad_norm": 0.710195779800415, "learning_rate": 0.0008599079358608507, "loss": 3.6593, "step": 16495 }, { "epoch": 1.1210762331838564, "grad_norm": 0.798686146736145, "learning_rate": 0.000859865470852018, "loss": 3.6687, "step": 16500 }, { "epoch": 1.1214159532545183, "grad_norm": 0.7609354257583618, "learning_rate": 0.0008598230058431852, "loss": 3.636, "step": 16505 }, { "epoch": 1.1217556733251801, "grad_norm": 0.7079862356185913, "learning_rate": 0.0008597805408343525, "loss": 3.5538, "step": 16510 }, { "epoch": 1.1220953933958417, "grad_norm": 0.7339089512825012, "learning_rate": 0.0008597380758255198, "loss": 3.6521, "step": 16515 }, { "epoch": 1.1224351134665036, "grad_norm": 0.7709986567497253, "learning_rate": 0.000859695610816687, "loss": 3.7699, "step": 16520 }, { "epoch": 1.1227748335371655, "grad_norm": 0.8063376545906067, "learning_rate": 0.0008596531458078543, "loss": 3.5019, "step": 16525 }, { "epoch": 1.123114553607827, "grad_norm": 1.043861746788025, "learning_rate": 0.0008596106807990216, "loss": 3.5688, "step": 16530 }, { "epoch": 1.123454273678489, "grad_norm": 0.8596996068954468, "learning_rate": 0.000859568215790189, "loss": 3.5837, "step": 16535 }, { "epoch": 1.1237939937491508, "grad_norm": 0.78280109167099, "learning_rate": 0.0008595257507813562, "loss": 3.8415, "step": 16540 }, { "epoch": 1.1241337138198124, "grad_norm": 0.8170922994613647, "learning_rate": 0.0008594832857725235, "loss": 3.6423, "step": 16545 }, { "epoch": 1.1244734338904743, "grad_norm": 0.8567583560943604, "learning_rate": 0.0008594408207636908, "loss": 3.5889, "step": 16550 }, { "epoch": 1.1248131539611361, "grad_norm": 0.9866414070129395, "learning_rate": 0.000859398355754858, "loss": 3.6437, "step": 16555 }, { "epoch": 1.1251528740317978, "grad_norm": 0.8666515350341797, "learning_rate": 0.0008593558907460252, "loss": 3.6796, "step": 16560 }, { "epoch": 1.1254925941024596, "grad_norm": 0.9461397528648376, "learning_rate": 0.0008593134257371926, "loss": 3.7611, "step": 16565 }, { "epoch": 1.1258323141731212, "grad_norm": 0.8283349871635437, "learning_rate": 0.0008592709607283599, "loss": 3.56, "step": 16570 }, { "epoch": 1.126172034243783, "grad_norm": 0.8518447875976562, "learning_rate": 0.0008592284957195271, "loss": 3.7552, "step": 16575 }, { "epoch": 1.126511754314445, "grad_norm": 0.8528580665588379, "learning_rate": 0.0008591860307106945, "loss": 3.7868, "step": 16580 }, { "epoch": 1.1268514743851066, "grad_norm": 0.6408374905586243, "learning_rate": 0.0008591435657018617, "loss": 3.7224, "step": 16585 }, { "epoch": 1.1271911944557684, "grad_norm": 0.853872537612915, "learning_rate": 0.0008591011006930289, "loss": 3.5575, "step": 16590 }, { "epoch": 1.1275309145264303, "grad_norm": 0.8306583166122437, "learning_rate": 0.0008590586356841963, "loss": 3.9019, "step": 16595 }, { "epoch": 1.127870634597092, "grad_norm": 1.2525982856750488, "learning_rate": 0.0008590161706753635, "loss": 3.5223, "step": 16600 }, { "epoch": 1.1282103546677538, "grad_norm": 0.8633636236190796, "learning_rate": 0.0008589737056665308, "loss": 3.4292, "step": 16605 }, { "epoch": 1.1285500747384156, "grad_norm": 0.8228412866592407, "learning_rate": 0.0008589312406576981, "loss": 3.6021, "step": 16610 }, { "epoch": 1.1288897948090773, "grad_norm": 0.8219278454780579, "learning_rate": 0.0008588887756488654, "loss": 3.6794, "step": 16615 }, { "epoch": 1.129229514879739, "grad_norm": 1.0117695331573486, "learning_rate": 0.0008588463106400326, "loss": 3.4624, "step": 16620 }, { "epoch": 1.129569234950401, "grad_norm": 1.118656039237976, "learning_rate": 0.0008588038456311999, "loss": 3.6851, "step": 16625 }, { "epoch": 1.1299089550210626, "grad_norm": 0.8691116571426392, "learning_rate": 0.0008587613806223672, "loss": 3.5823, "step": 16630 }, { "epoch": 1.1302486750917244, "grad_norm": 0.8138573169708252, "learning_rate": 0.0008587189156135344, "loss": 3.7302, "step": 16635 }, { "epoch": 1.1305883951623863, "grad_norm": 0.8142614364624023, "learning_rate": 0.0008586764506047018, "loss": 3.6892, "step": 16640 }, { "epoch": 1.130928115233048, "grad_norm": 0.7584553956985474, "learning_rate": 0.0008586339855958691, "loss": 3.531, "step": 16645 }, { "epoch": 1.1312678353037098, "grad_norm": 0.9203415513038635, "learning_rate": 0.0008585915205870363, "loss": 3.8447, "step": 16650 }, { "epoch": 1.1316075553743716, "grad_norm": 0.770386815071106, "learning_rate": 0.0008585490555782036, "loss": 3.3881, "step": 16655 }, { "epoch": 1.1319472754450333, "grad_norm": 0.7741190195083618, "learning_rate": 0.0008585065905693708, "loss": 3.6307, "step": 16660 }, { "epoch": 1.1322869955156951, "grad_norm": 0.6954805254936218, "learning_rate": 0.0008584641255605381, "loss": 3.593, "step": 16665 }, { "epoch": 1.1326267155863567, "grad_norm": 0.8596692681312561, "learning_rate": 0.0008584216605517054, "loss": 3.6929, "step": 16670 }, { "epoch": 1.1329664356570186, "grad_norm": 0.819408118724823, "learning_rate": 0.0008583791955428727, "loss": 3.64, "step": 16675 }, { "epoch": 1.1333061557276805, "grad_norm": 1.0247679948806763, "learning_rate": 0.00085833673053404, "loss": 3.6088, "step": 16680 }, { "epoch": 1.133645875798342, "grad_norm": 0.7671202421188354, "learning_rate": 0.0008582942655252073, "loss": 3.6303, "step": 16685 }, { "epoch": 1.133985595869004, "grad_norm": 0.8971432447433472, "learning_rate": 0.0008582518005163745, "loss": 3.7277, "step": 16690 }, { "epoch": 1.1343253159396658, "grad_norm": 0.7380162477493286, "learning_rate": 0.0008582093355075417, "loss": 3.5407, "step": 16695 }, { "epoch": 1.1346650360103274, "grad_norm": 1.3466129302978516, "learning_rate": 0.0008581668704987091, "loss": 3.3349, "step": 16700 }, { "epoch": 1.1350047560809893, "grad_norm": 0.7564104199409485, "learning_rate": 0.0008581244054898763, "loss": 3.6664, "step": 16705 }, { "epoch": 1.1353444761516511, "grad_norm": 0.9620634913444519, "learning_rate": 0.0008580819404810436, "loss": 3.7406, "step": 16710 }, { "epoch": 1.1356841962223128, "grad_norm": 0.721673846244812, "learning_rate": 0.000858039475472211, "loss": 3.5365, "step": 16715 }, { "epoch": 1.1360239162929746, "grad_norm": 0.7465148568153381, "learning_rate": 0.0008579970104633782, "loss": 3.8301, "step": 16720 }, { "epoch": 1.1363636363636362, "grad_norm": 0.8791694641113281, "learning_rate": 0.0008579545454545454, "loss": 3.6285, "step": 16725 }, { "epoch": 1.136703356434298, "grad_norm": 0.7738268971443176, "learning_rate": 0.0008579120804457128, "loss": 3.7493, "step": 16730 }, { "epoch": 1.13704307650496, "grad_norm": 0.9750152826309204, "learning_rate": 0.00085786961543688, "loss": 3.7962, "step": 16735 }, { "epoch": 1.1373827965756216, "grad_norm": 2.011751890182495, "learning_rate": 0.0008578271504280472, "loss": 3.5232, "step": 16740 }, { "epoch": 1.1377225166462834, "grad_norm": 0.88902747631073, "learning_rate": 0.0008577846854192147, "loss": 3.5958, "step": 16745 }, { "epoch": 1.1380622367169453, "grad_norm": 0.9654760956764221, "learning_rate": 0.0008577422204103819, "loss": 3.6575, "step": 16750 }, { "epoch": 1.138401956787607, "grad_norm": 1.0040602684020996, "learning_rate": 0.0008576997554015491, "loss": 3.8901, "step": 16755 }, { "epoch": 1.1387416768582688, "grad_norm": 0.9915438890457153, "learning_rate": 0.0008576572903927164, "loss": 3.5477, "step": 16760 }, { "epoch": 1.1390813969289306, "grad_norm": 0.7121073007583618, "learning_rate": 0.0008576148253838837, "loss": 3.9085, "step": 16765 }, { "epoch": 1.1394211169995923, "grad_norm": 1.034774661064148, "learning_rate": 0.0008575723603750509, "loss": 3.6523, "step": 16770 }, { "epoch": 1.139760837070254, "grad_norm": 0.7872732877731323, "learning_rate": 0.0008575298953662182, "loss": 3.5303, "step": 16775 }, { "epoch": 1.140100557140916, "grad_norm": 0.766667366027832, "learning_rate": 0.0008574874303573856, "loss": 3.5771, "step": 16780 }, { "epoch": 1.1404402772115776, "grad_norm": 0.8747329115867615, "learning_rate": 0.0008574449653485528, "loss": 3.6362, "step": 16785 }, { "epoch": 1.1407799972822394, "grad_norm": 0.7882775068283081, "learning_rate": 0.0008574025003397201, "loss": 3.7866, "step": 16790 }, { "epoch": 1.1411197173529013, "grad_norm": 1.009997844696045, "learning_rate": 0.0008573600353308873, "loss": 3.4335, "step": 16795 }, { "epoch": 1.141459437423563, "grad_norm": 0.8503082990646362, "learning_rate": 0.0008573175703220546, "loss": 3.7175, "step": 16800 }, { "epoch": 1.1417991574942248, "grad_norm": 0.9235851764678955, "learning_rate": 0.0008572751053132219, "loss": 3.6923, "step": 16805 }, { "epoch": 1.1421388775648866, "grad_norm": 0.8511438369750977, "learning_rate": 0.0008572326403043891, "loss": 3.713, "step": 16810 }, { "epoch": 1.1424785976355483, "grad_norm": 1.3942556381225586, "learning_rate": 0.0008571901752955565, "loss": 3.6219, "step": 16815 }, { "epoch": 1.1428183177062101, "grad_norm": 0.7534943222999573, "learning_rate": 0.0008571477102867238, "loss": 3.4717, "step": 16820 }, { "epoch": 1.143158037776872, "grad_norm": 0.8964465260505676, "learning_rate": 0.000857105245277891, "loss": 3.5183, "step": 16825 }, { "epoch": 1.1434977578475336, "grad_norm": 0.7877064943313599, "learning_rate": 0.0008570627802690583, "loss": 3.5627, "step": 16830 }, { "epoch": 1.1438374779181955, "grad_norm": 1.1212519407272339, "learning_rate": 0.0008570203152602256, "loss": 3.7749, "step": 16835 }, { "epoch": 1.144177197988857, "grad_norm": 0.7497186064720154, "learning_rate": 0.0008569778502513928, "loss": 3.7101, "step": 16840 }, { "epoch": 1.144516918059519, "grad_norm": 2.6848554611206055, "learning_rate": 0.00085693538524256, "loss": 3.6506, "step": 16845 }, { "epoch": 1.1448566381301808, "grad_norm": 0.81125408411026, "learning_rate": 0.0008568929202337275, "loss": 3.7755, "step": 16850 }, { "epoch": 1.1451963582008424, "grad_norm": 0.9101123213768005, "learning_rate": 0.0008568504552248947, "loss": 3.5634, "step": 16855 }, { "epoch": 1.1455360782715043, "grad_norm": 0.6549205780029297, "learning_rate": 0.0008568079902160619, "loss": 3.7316, "step": 16860 }, { "epoch": 1.1458757983421661, "grad_norm": 0.83475261926651, "learning_rate": 0.0008567655252072293, "loss": 3.4136, "step": 16865 }, { "epoch": 1.1462155184128278, "grad_norm": 0.8266447186470032, "learning_rate": 0.0008567230601983965, "loss": 3.5383, "step": 16870 }, { "epoch": 1.1465552384834896, "grad_norm": 1.1312578916549683, "learning_rate": 0.0008566805951895638, "loss": 3.8844, "step": 16875 }, { "epoch": 1.1468949585541515, "grad_norm": 0.8602306842803955, "learning_rate": 0.0008566381301807312, "loss": 3.6204, "step": 16880 }, { "epoch": 1.147234678624813, "grad_norm": 0.8610104918479919, "learning_rate": 0.0008565956651718984, "loss": 3.657, "step": 16885 }, { "epoch": 1.147574398695475, "grad_norm": 0.8559669256210327, "learning_rate": 0.0008565532001630657, "loss": 3.6957, "step": 16890 }, { "epoch": 1.1479141187661366, "grad_norm": 0.8816953301429749, "learning_rate": 0.000856510735154233, "loss": 3.6052, "step": 16895 }, { "epoch": 1.1482538388367984, "grad_norm": 0.7338087558746338, "learning_rate": 0.0008564682701454002, "loss": 3.5228, "step": 16900 }, { "epoch": 1.1485935589074603, "grad_norm": 0.646308422088623, "learning_rate": 0.0008564258051365675, "loss": 3.6746, "step": 16905 }, { "epoch": 1.148933278978122, "grad_norm": 0.8172942996025085, "learning_rate": 0.0008563833401277347, "loss": 3.5247, "step": 16910 }, { "epoch": 1.1492729990487838, "grad_norm": 1.034718632698059, "learning_rate": 0.0008563408751189021, "loss": 3.5556, "step": 16915 }, { "epoch": 1.1496127191194456, "grad_norm": 0.8653772473335266, "learning_rate": 0.0008562984101100694, "loss": 3.5308, "step": 16920 }, { "epoch": 1.1499524391901073, "grad_norm": 1.1585173606872559, "learning_rate": 0.0008562559451012366, "loss": 3.5779, "step": 16925 }, { "epoch": 1.150292159260769, "grad_norm": 1.0344363451004028, "learning_rate": 0.0008562134800924039, "loss": 3.6363, "step": 16930 }, { "epoch": 1.150631879331431, "grad_norm": 1.2529666423797607, "learning_rate": 0.0008561710150835712, "loss": 3.54, "step": 16935 }, { "epoch": 1.1509715994020926, "grad_norm": 0.7306733727455139, "learning_rate": 0.0008561285500747384, "loss": 3.7216, "step": 16940 }, { "epoch": 1.1513113194727544, "grad_norm": 0.7992914319038391, "learning_rate": 0.0008560860850659056, "loss": 3.4651, "step": 16945 }, { "epoch": 1.1516510395434163, "grad_norm": 0.7225033640861511, "learning_rate": 0.0008560436200570731, "loss": 3.6823, "step": 16950 }, { "epoch": 1.151990759614078, "grad_norm": 0.7301670908927917, "learning_rate": 0.0008560011550482403, "loss": 3.3418, "step": 16955 }, { "epoch": 1.1523304796847398, "grad_norm": 0.9155261516571045, "learning_rate": 0.0008559586900394075, "loss": 3.4561, "step": 16960 }, { "epoch": 1.1526701997554016, "grad_norm": 0.962055504322052, "learning_rate": 0.0008559162250305749, "loss": 3.5986, "step": 16965 }, { "epoch": 1.1530099198260633, "grad_norm": 0.8913617730140686, "learning_rate": 0.0008558737600217421, "loss": 3.5035, "step": 16970 }, { "epoch": 1.1533496398967251, "grad_norm": 0.944158136844635, "learning_rate": 0.0008558312950129093, "loss": 3.5333, "step": 16975 }, { "epoch": 1.153689359967387, "grad_norm": 1.0185366868972778, "learning_rate": 0.0008557888300040767, "loss": 3.6026, "step": 16980 }, { "epoch": 1.1540290800380486, "grad_norm": 1.439587950706482, "learning_rate": 0.000855746364995244, "loss": 3.8101, "step": 16985 }, { "epoch": 1.1543688001087105, "grad_norm": 1.1215914487838745, "learning_rate": 0.0008557038999864112, "loss": 3.4887, "step": 16990 }, { "epoch": 1.1547085201793723, "grad_norm": 1.2692121267318726, "learning_rate": 0.0008556614349775786, "loss": 3.5968, "step": 16995 }, { "epoch": 1.155048240250034, "grad_norm": 0.9150411486625671, "learning_rate": 0.0008556189699687458, "loss": 3.5848, "step": 17000 }, { "epoch": 1.1553879603206958, "grad_norm": 1.4083114862442017, "learning_rate": 0.000855576504959913, "loss": 3.653, "step": 17005 }, { "epoch": 1.1557276803913574, "grad_norm": 0.9078852534294128, "learning_rate": 0.0008555340399510803, "loss": 3.7396, "step": 17010 }, { "epoch": 1.1560674004620193, "grad_norm": 0.8285259008407593, "learning_rate": 0.0008554915749422476, "loss": 3.7799, "step": 17015 }, { "epoch": 1.1564071205326811, "grad_norm": 0.6048609018325806, "learning_rate": 0.0008554491099334149, "loss": 3.6569, "step": 17020 }, { "epoch": 1.1567468406033428, "grad_norm": 1.0835025310516357, "learning_rate": 0.0008554066449245822, "loss": 3.8416, "step": 17025 }, { "epoch": 1.1570865606740046, "grad_norm": 1.3159259557724, "learning_rate": 0.0008553641799157495, "loss": 3.5665, "step": 17030 }, { "epoch": 1.1574262807446665, "grad_norm": 1.063502550125122, "learning_rate": 0.0008553217149069167, "loss": 3.5922, "step": 17035 }, { "epoch": 1.157766000815328, "grad_norm": 1.0917658805847168, "learning_rate": 0.000855279249898084, "loss": 3.3665, "step": 17040 }, { "epoch": 1.15810572088599, "grad_norm": 1.282745599746704, "learning_rate": 0.0008552367848892512, "loss": 3.8931, "step": 17045 }, { "epoch": 1.1584454409566518, "grad_norm": 0.9100906848907471, "learning_rate": 0.0008551943198804185, "loss": 3.3497, "step": 17050 }, { "epoch": 1.1587851610273134, "grad_norm": 1.7236732244491577, "learning_rate": 0.0008551518548715859, "loss": 3.6626, "step": 17055 }, { "epoch": 1.1591248810979753, "grad_norm": 0.8754659295082092, "learning_rate": 0.0008551093898627531, "loss": 3.6369, "step": 17060 }, { "epoch": 1.159464601168637, "grad_norm": 0.8582232594490051, "learning_rate": 0.0008550669248539204, "loss": 3.7551, "step": 17065 }, { "epoch": 1.1598043212392988, "grad_norm": 0.7621845602989197, "learning_rate": 0.0008550244598450877, "loss": 3.5718, "step": 17070 }, { "epoch": 1.1601440413099606, "grad_norm": 0.7973881959915161, "learning_rate": 0.0008549819948362549, "loss": 3.748, "step": 17075 }, { "epoch": 1.1604837613806223, "grad_norm": 2.1430253982543945, "learning_rate": 0.0008549395298274222, "loss": 3.3996, "step": 17080 }, { "epoch": 1.1608234814512841, "grad_norm": 1.0586857795715332, "learning_rate": 0.0008548970648185895, "loss": 3.5756, "step": 17085 }, { "epoch": 1.161163201521946, "grad_norm": 0.7825150489807129, "learning_rate": 0.0008548545998097568, "loss": 3.5194, "step": 17090 }, { "epoch": 1.1615029215926076, "grad_norm": 0.7929461002349854, "learning_rate": 0.000854812134800924, "loss": 3.5636, "step": 17095 }, { "epoch": 1.1618426416632694, "grad_norm": 0.8021267652511597, "learning_rate": 0.0008547696697920914, "loss": 3.5552, "step": 17100 }, { "epoch": 1.1621823617339313, "grad_norm": 0.7752456068992615, "learning_rate": 0.0008547272047832586, "loss": 3.5965, "step": 17105 }, { "epoch": 1.162522081804593, "grad_norm": 0.7759296894073486, "learning_rate": 0.0008546847397744258, "loss": 3.4726, "step": 17110 }, { "epoch": 1.1628618018752548, "grad_norm": 2.4652488231658936, "learning_rate": 0.0008546422747655932, "loss": 3.476, "step": 17115 }, { "epoch": 1.1632015219459166, "grad_norm": 0.7785670757293701, "learning_rate": 0.0008545998097567604, "loss": 3.589, "step": 17120 }, { "epoch": 1.1635412420165783, "grad_norm": 0.7526088356971741, "learning_rate": 0.0008545573447479277, "loss": 3.5264, "step": 17125 }, { "epoch": 1.1638809620872401, "grad_norm": 0.6999656558036804, "learning_rate": 0.0008545148797390951, "loss": 3.8938, "step": 17130 }, { "epoch": 1.164220682157902, "grad_norm": 0.8884168267250061, "learning_rate": 0.0008544724147302623, "loss": 3.3884, "step": 17135 }, { "epoch": 1.1645604022285636, "grad_norm": 0.8296924829483032, "learning_rate": 0.0008544299497214295, "loss": 3.8405, "step": 17140 }, { "epoch": 1.1649001222992255, "grad_norm": 0.8450603485107422, "learning_rate": 0.0008543874847125968, "loss": 3.6565, "step": 17145 }, { "epoch": 1.1652398423698873, "grad_norm": 0.9728952646255493, "learning_rate": 0.0008543450197037641, "loss": 3.6436, "step": 17150 }, { "epoch": 1.165579562440549, "grad_norm": 0.8507139682769775, "learning_rate": 0.0008543025546949313, "loss": 3.6495, "step": 17155 }, { "epoch": 1.1659192825112108, "grad_norm": 0.728164792060852, "learning_rate": 0.0008542600896860987, "loss": 3.8071, "step": 17160 }, { "epoch": 1.1662590025818727, "grad_norm": 0.971473753452301, "learning_rate": 0.000854217624677266, "loss": 3.6427, "step": 17165 }, { "epoch": 1.1665987226525343, "grad_norm": 0.6229178309440613, "learning_rate": 0.0008541751596684332, "loss": 3.489, "step": 17170 }, { "epoch": 1.1669384427231961, "grad_norm": 0.9602677226066589, "learning_rate": 0.0008541326946596005, "loss": 3.8143, "step": 17175 }, { "epoch": 1.1672781627938578, "grad_norm": 2.633232593536377, "learning_rate": 0.0008540902296507678, "loss": 3.8185, "step": 17180 }, { "epoch": 1.1676178828645196, "grad_norm": 0.764832615852356, "learning_rate": 0.000854047764641935, "loss": 3.6915, "step": 17185 }, { "epoch": 1.1679576029351815, "grad_norm": 0.9543217420578003, "learning_rate": 0.0008540052996331023, "loss": 3.4057, "step": 17190 }, { "epoch": 1.168297323005843, "grad_norm": 1.017743468284607, "learning_rate": 0.0008539628346242696, "loss": 3.527, "step": 17195 }, { "epoch": 1.168637043076505, "grad_norm": 1.7346428632736206, "learning_rate": 0.0008539203696154369, "loss": 3.8696, "step": 17200 }, { "epoch": 1.1689767631471668, "grad_norm": 0.6835223436355591, "learning_rate": 0.0008538779046066042, "loss": 3.5876, "step": 17205 }, { "epoch": 1.1693164832178284, "grad_norm": 0.7966700196266174, "learning_rate": 0.0008538354395977714, "loss": 3.6236, "step": 17210 }, { "epoch": 1.1696562032884903, "grad_norm": 0.7694163918495178, "learning_rate": 0.0008537929745889388, "loss": 3.5424, "step": 17215 }, { "epoch": 1.1699959233591521, "grad_norm": 7.493810176849365, "learning_rate": 0.000853750509580106, "loss": 3.703, "step": 17220 }, { "epoch": 1.1703356434298138, "grad_norm": 1.1680781841278076, "learning_rate": 0.0008537080445712732, "loss": 3.5469, "step": 17225 }, { "epoch": 1.1706753635004756, "grad_norm": 1.880760669708252, "learning_rate": 0.0008536655795624407, "loss": 3.5878, "step": 17230 }, { "epoch": 1.1710150835711373, "grad_norm": 5.972364902496338, "learning_rate": 0.0008536231145536079, "loss": 3.4208, "step": 17235 }, { "epoch": 1.1713548036417991, "grad_norm": 1.596903681755066, "learning_rate": 0.0008535806495447751, "loss": 3.3627, "step": 17240 }, { "epoch": 1.171694523712461, "grad_norm": 2.3864011764526367, "learning_rate": 0.0008535381845359424, "loss": 3.5494, "step": 17245 }, { "epoch": 1.1720342437831226, "grad_norm": 0.8851460218429565, "learning_rate": 0.0008534957195271097, "loss": 3.6108, "step": 17250 }, { "epoch": 1.1723739638537845, "grad_norm": 1.5603492259979248, "learning_rate": 0.0008534532545182769, "loss": 3.8096, "step": 17255 }, { "epoch": 1.1727136839244463, "grad_norm": 0.7604426741600037, "learning_rate": 0.0008534107895094442, "loss": 3.6608, "step": 17260 }, { "epoch": 1.173053403995108, "grad_norm": 0.8749967217445374, "learning_rate": 0.0008533683245006116, "loss": 3.6138, "step": 17265 }, { "epoch": 1.1733931240657698, "grad_norm": 0.9765446186065674, "learning_rate": 0.0008533258594917788, "loss": 3.5996, "step": 17270 }, { "epoch": 1.1737328441364316, "grad_norm": 0.9182650446891785, "learning_rate": 0.0008532833944829461, "loss": 3.3802, "step": 17275 }, { "epoch": 1.1740725642070933, "grad_norm": 1.864100456237793, "learning_rate": 0.0008532409294741134, "loss": 3.8625, "step": 17280 }, { "epoch": 1.1744122842777551, "grad_norm": 0.9317402243614197, "learning_rate": 0.0008531984644652806, "loss": 3.5741, "step": 17285 }, { "epoch": 1.174752004348417, "grad_norm": 0.7591428756713867, "learning_rate": 0.0008531559994564479, "loss": 3.6428, "step": 17290 }, { "epoch": 1.1750917244190786, "grad_norm": 0.8667760491371155, "learning_rate": 0.0008531135344476151, "loss": 3.7521, "step": 17295 }, { "epoch": 1.1754314444897405, "grad_norm": 0.8764576315879822, "learning_rate": 0.0008530710694387825, "loss": 3.7382, "step": 17300 }, { "epoch": 1.1757711645604023, "grad_norm": 0.7918635010719299, "learning_rate": 0.0008530286044299498, "loss": 3.7379, "step": 17305 }, { "epoch": 1.176110884631064, "grad_norm": 0.9020955562591553, "learning_rate": 0.000852986139421117, "loss": 3.7546, "step": 17310 }, { "epoch": 1.1764506047017258, "grad_norm": 0.9699614644050598, "learning_rate": 0.0008529436744122843, "loss": 3.6168, "step": 17315 }, { "epoch": 1.1767903247723877, "grad_norm": 0.6874472498893738, "learning_rate": 0.0008529012094034516, "loss": 3.8207, "step": 17320 }, { "epoch": 1.1771300448430493, "grad_norm": 0.8439446687698364, "learning_rate": 0.0008528587443946188, "loss": 3.6292, "step": 17325 }, { "epoch": 1.1774697649137111, "grad_norm": 0.7672669291496277, "learning_rate": 0.000852816279385786, "loss": 3.8878, "step": 17330 }, { "epoch": 1.177809484984373, "grad_norm": 0.7623843550682068, "learning_rate": 0.0008527738143769535, "loss": 3.7638, "step": 17335 }, { "epoch": 1.1781492050550346, "grad_norm": 3.8027656078338623, "learning_rate": 0.0008527313493681207, "loss": 3.6529, "step": 17340 }, { "epoch": 1.1784889251256965, "grad_norm": 1.0129262208938599, "learning_rate": 0.0008526888843592879, "loss": 3.5983, "step": 17345 }, { "epoch": 1.178828645196358, "grad_norm": 0.684468150138855, "learning_rate": 0.0008526464193504553, "loss": 3.5052, "step": 17350 }, { "epoch": 1.17916836526702, "grad_norm": 0.7793314456939697, "learning_rate": 0.0008526039543416225, "loss": 3.4746, "step": 17355 }, { "epoch": 1.1795080853376818, "grad_norm": 0.8944817185401917, "learning_rate": 0.0008525614893327897, "loss": 3.7936, "step": 17360 }, { "epoch": 1.1798478054083434, "grad_norm": 1.2124223709106445, "learning_rate": 0.0008525190243239571, "loss": 3.313, "step": 17365 }, { "epoch": 1.1801875254790053, "grad_norm": 0.9932569861412048, "learning_rate": 0.0008524765593151244, "loss": 3.5552, "step": 17370 }, { "epoch": 1.1805272455496671, "grad_norm": 0.84503173828125, "learning_rate": 0.0008524340943062916, "loss": 3.5939, "step": 17375 }, { "epoch": 1.1808669656203288, "grad_norm": 0.7551472783088684, "learning_rate": 0.000852391629297459, "loss": 3.6712, "step": 17380 }, { "epoch": 1.1812066856909906, "grad_norm": 0.9820950031280518, "learning_rate": 0.0008523491642886262, "loss": 3.3883, "step": 17385 }, { "epoch": 1.1815464057616525, "grad_norm": 2.210876941680908, "learning_rate": 0.0008523066992797934, "loss": 3.5562, "step": 17390 }, { "epoch": 1.1818861258323141, "grad_norm": 0.9046183824539185, "learning_rate": 0.0008522642342709607, "loss": 3.4567, "step": 17395 }, { "epoch": 1.182225845902976, "grad_norm": 0.7804042100906372, "learning_rate": 0.000852221769262128, "loss": 3.4418, "step": 17400 }, { "epoch": 1.1825655659736376, "grad_norm": 1.4414958953857422, "learning_rate": 0.0008521793042532953, "loss": 3.4449, "step": 17405 }, { "epoch": 1.1829052860442995, "grad_norm": 0.8063602447509766, "learning_rate": 0.0008521368392444626, "loss": 3.7806, "step": 17410 }, { "epoch": 1.1832450061149613, "grad_norm": 0.7786731719970703, "learning_rate": 0.0008520943742356299, "loss": 3.5172, "step": 17415 }, { "epoch": 1.183584726185623, "grad_norm": 1.3335820436477661, "learning_rate": 0.0008520519092267971, "loss": 3.5369, "step": 17420 }, { "epoch": 1.1839244462562848, "grad_norm": 0.7360917329788208, "learning_rate": 0.0008520094442179644, "loss": 3.5619, "step": 17425 }, { "epoch": 1.1842641663269466, "grad_norm": 0.7670918107032776, "learning_rate": 0.0008519669792091316, "loss": 3.8127, "step": 17430 }, { "epoch": 1.1846038863976083, "grad_norm": 0.6371151208877563, "learning_rate": 0.0008519245142002989, "loss": 3.9432, "step": 17435 }, { "epoch": 1.1849436064682701, "grad_norm": 0.916063129901886, "learning_rate": 0.0008518820491914663, "loss": 3.5995, "step": 17440 }, { "epoch": 1.185283326538932, "grad_norm": 0.7930320501327515, "learning_rate": 0.0008518395841826335, "loss": 3.3264, "step": 17445 }, { "epoch": 1.1856230466095936, "grad_norm": 0.86455237865448, "learning_rate": 0.0008517971191738008, "loss": 3.5734, "step": 17450 }, { "epoch": 1.1859627666802555, "grad_norm": 0.8365366458892822, "learning_rate": 0.0008517546541649681, "loss": 3.743, "step": 17455 }, { "epoch": 1.1863024867509173, "grad_norm": 0.6427135467529297, "learning_rate": 0.0008517121891561353, "loss": 3.6662, "step": 17460 }, { "epoch": 1.186642206821579, "grad_norm": 0.952447772026062, "learning_rate": 0.0008516697241473026, "loss": 3.6493, "step": 17465 }, { "epoch": 1.1869819268922408, "grad_norm": 0.765146791934967, "learning_rate": 0.00085162725913847, "loss": 3.8014, "step": 17470 }, { "epoch": 1.1873216469629027, "grad_norm": 0.9991302490234375, "learning_rate": 0.0008515847941296372, "loss": 3.2053, "step": 17475 }, { "epoch": 1.1876613670335643, "grad_norm": 0.9211218953132629, "learning_rate": 0.0008515423291208044, "loss": 3.6564, "step": 17480 }, { "epoch": 1.1880010871042261, "grad_norm": 3.253815174102783, "learning_rate": 0.0008514998641119718, "loss": 3.7077, "step": 17485 }, { "epoch": 1.188340807174888, "grad_norm": 0.8603718876838684, "learning_rate": 0.000851457399103139, "loss": 3.3403, "step": 17490 }, { "epoch": 1.1886805272455496, "grad_norm": 0.8207667469978333, "learning_rate": 0.0008514149340943062, "loss": 3.4288, "step": 17495 }, { "epoch": 1.1890202473162115, "grad_norm": 0.7828772068023682, "learning_rate": 0.0008513724690854736, "loss": 3.4702, "step": 17500 }, { "epoch": 1.1893599673868733, "grad_norm": 0.790790319442749, "learning_rate": 0.0008513300040766409, "loss": 3.4339, "step": 17505 }, { "epoch": 1.189699687457535, "grad_norm": 0.742437481880188, "learning_rate": 0.0008512875390678081, "loss": 3.5319, "step": 17510 }, { "epoch": 1.1900394075281968, "grad_norm": 0.7208537459373474, "learning_rate": 0.0008512450740589755, "loss": 3.8602, "step": 17515 }, { "epoch": 1.1903791275988584, "grad_norm": 0.8345505595207214, "learning_rate": 0.0008512026090501427, "loss": 3.5534, "step": 17520 }, { "epoch": 1.1907188476695203, "grad_norm": 1.2964720726013184, "learning_rate": 0.0008511601440413099, "loss": 3.633, "step": 17525 }, { "epoch": 1.1910585677401822, "grad_norm": 0.7335799932479858, "learning_rate": 0.0008511176790324772, "loss": 3.652, "step": 17530 }, { "epoch": 1.1913982878108438, "grad_norm": 0.8151165843009949, "learning_rate": 0.0008510752140236445, "loss": 3.7206, "step": 17535 }, { "epoch": 1.1917380078815056, "grad_norm": 0.8891379833221436, "learning_rate": 0.0008510327490148118, "loss": 3.5112, "step": 17540 }, { "epoch": 1.1920777279521675, "grad_norm": 0.9424435496330261, "learning_rate": 0.0008509902840059791, "loss": 3.6306, "step": 17545 }, { "epoch": 1.1924174480228291, "grad_norm": 0.8631328344345093, "learning_rate": 0.0008509478189971464, "loss": 3.7135, "step": 17550 }, { "epoch": 1.192757168093491, "grad_norm": 1.1773486137390137, "learning_rate": 0.0008509053539883137, "loss": 3.6368, "step": 17555 }, { "epoch": 1.1930968881641528, "grad_norm": 0.8781136870384216, "learning_rate": 0.0008508628889794809, "loss": 3.6872, "step": 17560 }, { "epoch": 1.1934366082348145, "grad_norm": 0.9004260897636414, "learning_rate": 0.0008508204239706482, "loss": 3.3981, "step": 17565 }, { "epoch": 1.1937763283054763, "grad_norm": 0.8195340633392334, "learning_rate": 0.0008507779589618155, "loss": 3.6932, "step": 17570 }, { "epoch": 1.194116048376138, "grad_norm": 1.0165120363235474, "learning_rate": 0.0008507354939529828, "loss": 3.3475, "step": 17575 }, { "epoch": 1.1944557684467998, "grad_norm": 0.7121320366859436, "learning_rate": 0.00085069302894415, "loss": 3.6105, "step": 17580 }, { "epoch": 1.1947954885174616, "grad_norm": 1.007370114326477, "learning_rate": 0.0008506505639353174, "loss": 3.7749, "step": 17585 }, { "epoch": 1.1951352085881233, "grad_norm": 0.9096401333808899, "learning_rate": 0.0008506080989264846, "loss": 3.7979, "step": 17590 }, { "epoch": 1.1954749286587851, "grad_norm": 0.8496130704879761, "learning_rate": 0.0008505656339176518, "loss": 3.7891, "step": 17595 }, { "epoch": 1.195814648729447, "grad_norm": 0.6904847621917725, "learning_rate": 0.0008505231689088192, "loss": 3.697, "step": 17600 }, { "epoch": 1.1961543688001086, "grad_norm": 0.9885188341140747, "learning_rate": 0.0008504807038999864, "loss": 3.4673, "step": 17605 }, { "epoch": 1.1964940888707705, "grad_norm": 0.7779517769813538, "learning_rate": 0.0008504382388911537, "loss": 3.1831, "step": 17610 }, { "epoch": 1.1968338089414323, "grad_norm": 1.2768638134002686, "learning_rate": 0.0008503957738823211, "loss": 3.6647, "step": 17615 }, { "epoch": 1.197173529012094, "grad_norm": 0.8547077775001526, "learning_rate": 0.0008503533088734883, "loss": 3.5678, "step": 17620 }, { "epoch": 1.1975132490827558, "grad_norm": 0.9448862075805664, "learning_rate": 0.0008503108438646555, "loss": 3.3435, "step": 17625 }, { "epoch": 1.1978529691534177, "grad_norm": 0.7758262157440186, "learning_rate": 0.0008502683788558229, "loss": 3.5426, "step": 17630 }, { "epoch": 1.1981926892240793, "grad_norm": 0.6783838868141174, "learning_rate": 0.0008502259138469901, "loss": 3.4621, "step": 17635 }, { "epoch": 1.1985324092947411, "grad_norm": 0.8388721942901611, "learning_rate": 0.0008501834488381573, "loss": 3.7712, "step": 17640 }, { "epoch": 1.198872129365403, "grad_norm": 1.356605887413025, "learning_rate": 0.0008501409838293247, "loss": 3.4928, "step": 17645 }, { "epoch": 1.1992118494360646, "grad_norm": 0.8604318499565125, "learning_rate": 0.000850098518820492, "loss": 3.5861, "step": 17650 }, { "epoch": 1.1995515695067265, "grad_norm": 0.758072555065155, "learning_rate": 0.0008500560538116592, "loss": 3.6657, "step": 17655 }, { "epoch": 1.1998912895773883, "grad_norm": 0.816387414932251, "learning_rate": 0.0008500135888028265, "loss": 3.6745, "step": 17660 }, { "epoch": 1.20023100964805, "grad_norm": 0.8752282857894897, "learning_rate": 0.0008499711237939938, "loss": 3.7374, "step": 17665 }, { "epoch": 1.2005707297187118, "grad_norm": 0.6966831684112549, "learning_rate": 0.000849928658785161, "loss": 3.5319, "step": 17670 }, { "epoch": 1.2009104497893737, "grad_norm": 0.8720716834068298, "learning_rate": 0.0008498861937763283, "loss": 3.5579, "step": 17675 }, { "epoch": 1.2012501698600353, "grad_norm": 0.8371026515960693, "learning_rate": 0.0008498437287674957, "loss": 3.379, "step": 17680 }, { "epoch": 1.2015898899306972, "grad_norm": 1.0002015829086304, "learning_rate": 0.0008498012637586629, "loss": 3.757, "step": 17685 }, { "epoch": 1.2019296100013588, "grad_norm": 0.7890312075614929, "learning_rate": 0.0008497587987498302, "loss": 3.8396, "step": 17690 }, { "epoch": 1.2022693300720206, "grad_norm": 0.7668339014053345, "learning_rate": 0.0008497163337409974, "loss": 3.4249, "step": 17695 }, { "epoch": 1.2026090501426825, "grad_norm": 0.6268309950828552, "learning_rate": 0.0008496738687321647, "loss": 3.3876, "step": 17700 }, { "epoch": 1.2029487702133441, "grad_norm": 0.8368004560470581, "learning_rate": 0.000849631403723332, "loss": 3.3393, "step": 17705 }, { "epoch": 1.203288490284006, "grad_norm": 0.8494678139686584, "learning_rate": 0.0008495889387144992, "loss": 3.9974, "step": 17710 }, { "epoch": 1.2036282103546678, "grad_norm": 2.8528928756713867, "learning_rate": 0.0008495464737056666, "loss": 3.3394, "step": 17715 }, { "epoch": 1.2039679304253295, "grad_norm": 0.7073647379875183, "learning_rate": 0.0008495040086968339, "loss": 3.7781, "step": 17720 }, { "epoch": 1.2043076504959913, "grad_norm": 0.8016384243965149, "learning_rate": 0.0008494615436880011, "loss": 3.7749, "step": 17725 }, { "epoch": 1.2046473705666532, "grad_norm": 0.8881009817123413, "learning_rate": 0.0008494190786791683, "loss": 3.5789, "step": 17730 }, { "epoch": 1.2049870906373148, "grad_norm": 1.0380406379699707, "learning_rate": 0.0008493766136703357, "loss": 3.5601, "step": 17735 }, { "epoch": 1.2053268107079766, "grad_norm": 0.8030280470848083, "learning_rate": 0.0008493341486615029, "loss": 3.5167, "step": 17740 }, { "epoch": 1.2056665307786383, "grad_norm": 0.6908677816390991, "learning_rate": 0.0008492916836526701, "loss": 3.636, "step": 17745 }, { "epoch": 1.2060062508493001, "grad_norm": 0.7932618856430054, "learning_rate": 0.0008492492186438376, "loss": 3.7781, "step": 17750 }, { "epoch": 1.206345970919962, "grad_norm": 1.4347883462905884, "learning_rate": 0.0008492067536350048, "loss": 3.4018, "step": 17755 }, { "epoch": 1.2066856909906236, "grad_norm": 2.564614772796631, "learning_rate": 0.000849164288626172, "loss": 3.68, "step": 17760 }, { "epoch": 1.2070254110612855, "grad_norm": 0.7498965263366699, "learning_rate": 0.0008491218236173394, "loss": 3.7144, "step": 17765 }, { "epoch": 1.2073651311319473, "grad_norm": 1.0869518518447876, "learning_rate": 0.0008490793586085066, "loss": 3.4769, "step": 17770 }, { "epoch": 1.207704851202609, "grad_norm": 3.0085835456848145, "learning_rate": 0.0008490368935996738, "loss": 3.8415, "step": 17775 }, { "epoch": 1.2080445712732708, "grad_norm": 0.8655113577842712, "learning_rate": 0.0008489944285908411, "loss": 3.6228, "step": 17780 }, { "epoch": 1.2083842913439327, "grad_norm": 10.374820709228516, "learning_rate": 0.0008489519635820085, "loss": 3.5565, "step": 17785 }, { "epoch": 1.2087240114145943, "grad_norm": 1.2534915208816528, "learning_rate": 0.0008489094985731757, "loss": 3.9803, "step": 17790 }, { "epoch": 1.2090637314852561, "grad_norm": 0.7677974700927734, "learning_rate": 0.000848867033564343, "loss": 3.6673, "step": 17795 }, { "epoch": 1.209403451555918, "grad_norm": 0.8613688945770264, "learning_rate": 0.0008488245685555103, "loss": 3.8141, "step": 17800 }, { "epoch": 1.2097431716265796, "grad_norm": 0.9198121428489685, "learning_rate": 0.0008487821035466775, "loss": 3.3198, "step": 17805 }, { "epoch": 1.2100828916972415, "grad_norm": 2.8198814392089844, "learning_rate": 0.0008487396385378448, "loss": 3.5049, "step": 17810 }, { "epoch": 1.2104226117679033, "grad_norm": 1.3424034118652344, "learning_rate": 0.000848697173529012, "loss": 3.7246, "step": 17815 }, { "epoch": 1.210762331838565, "grad_norm": 0.7301501035690308, "learning_rate": 0.0008486547085201794, "loss": 3.6335, "step": 17820 }, { "epoch": 1.2111020519092268, "grad_norm": 5.553789138793945, "learning_rate": 0.0008486122435113467, "loss": 3.6726, "step": 17825 }, { "epoch": 1.2114417719798887, "grad_norm": 1.5404613018035889, "learning_rate": 0.0008485697785025139, "loss": 3.7583, "step": 17830 }, { "epoch": 1.2117814920505503, "grad_norm": 0.8810997009277344, "learning_rate": 0.0008485273134936812, "loss": 3.6484, "step": 17835 }, { "epoch": 1.2121212121212122, "grad_norm": 0.8727395534515381, "learning_rate": 0.0008484848484848485, "loss": 3.8148, "step": 17840 }, { "epoch": 1.212460932191874, "grad_norm": 0.8452165126800537, "learning_rate": 0.0008484423834760157, "loss": 3.7303, "step": 17845 }, { "epoch": 1.2128006522625356, "grad_norm": 0.8771845102310181, "learning_rate": 0.000848399918467183, "loss": 3.6504, "step": 17850 }, { "epoch": 1.2131403723331975, "grad_norm": 0.9076299071311951, "learning_rate": 0.0008483574534583504, "loss": 3.8004, "step": 17855 }, { "epoch": 1.2134800924038591, "grad_norm": 0.7213129997253418, "learning_rate": 0.0008483149884495176, "loss": 3.565, "step": 17860 }, { "epoch": 1.213819812474521, "grad_norm": 1.2307398319244385, "learning_rate": 0.0008482725234406849, "loss": 3.8107, "step": 17865 }, { "epoch": 1.2141595325451828, "grad_norm": 0.8960558772087097, "learning_rate": 0.0008482300584318522, "loss": 3.4757, "step": 17870 }, { "epoch": 1.2144992526158445, "grad_norm": 0.7587851285934448, "learning_rate": 0.0008481875934230194, "loss": 3.854, "step": 17875 }, { "epoch": 1.2148389726865063, "grad_norm": 0.7378130555152893, "learning_rate": 0.0008481451284141866, "loss": 3.7177, "step": 17880 }, { "epoch": 1.2151786927571682, "grad_norm": 0.9377250671386719, "learning_rate": 0.000848102663405354, "loss": 3.4566, "step": 17885 }, { "epoch": 1.2155184128278298, "grad_norm": 0.6432008147239685, "learning_rate": 0.0008480601983965213, "loss": 3.6867, "step": 17890 }, { "epoch": 1.2158581328984917, "grad_norm": 0.7586209177970886, "learning_rate": 0.0008480177333876886, "loss": 3.8213, "step": 17895 }, { "epoch": 1.2161978529691535, "grad_norm": 0.7203810214996338, "learning_rate": 0.0008479752683788559, "loss": 3.7752, "step": 17900 }, { "epoch": 1.2165375730398151, "grad_norm": 1.1642966270446777, "learning_rate": 0.0008479328033700231, "loss": 3.6636, "step": 17905 }, { "epoch": 1.216877293110477, "grad_norm": 0.7937231659889221, "learning_rate": 0.0008478903383611904, "loss": 3.5277, "step": 17910 }, { "epoch": 1.2172170131811386, "grad_norm": 1.0121312141418457, "learning_rate": 0.0008478478733523577, "loss": 3.585, "step": 17915 }, { "epoch": 1.2175567332518005, "grad_norm": 0.7786787152290344, "learning_rate": 0.0008478054083435249, "loss": 3.7303, "step": 17920 }, { "epoch": 1.2178964533224623, "grad_norm": 1.0357321500778198, "learning_rate": 0.0008477629433346923, "loss": 3.7174, "step": 17925 }, { "epoch": 1.218236173393124, "grad_norm": 0.8810575604438782, "learning_rate": 0.0008477204783258595, "loss": 3.4362, "step": 17930 }, { "epoch": 1.2185758934637858, "grad_norm": 0.7524788975715637, "learning_rate": 0.0008476780133170268, "loss": 3.7864, "step": 17935 }, { "epoch": 1.2189156135344477, "grad_norm": 0.8959475159645081, "learning_rate": 0.0008476355483081941, "loss": 3.4437, "step": 17940 }, { "epoch": 1.2192553336051093, "grad_norm": 0.6720718741416931, "learning_rate": 0.0008475930832993613, "loss": 3.9562, "step": 17945 }, { "epoch": 1.2195950536757711, "grad_norm": 0.816149890422821, "learning_rate": 0.0008475506182905286, "loss": 3.5219, "step": 17950 }, { "epoch": 1.219934773746433, "grad_norm": 2.802460193634033, "learning_rate": 0.000847508153281696, "loss": 3.5521, "step": 17955 }, { "epoch": 1.2202744938170946, "grad_norm": 0.7368342280387878, "learning_rate": 0.0008474656882728632, "loss": 3.6967, "step": 17960 }, { "epoch": 1.2206142138877565, "grad_norm": 0.935755729675293, "learning_rate": 0.0008474232232640305, "loss": 3.5699, "step": 17965 }, { "epoch": 1.2209539339584183, "grad_norm": 0.679125189781189, "learning_rate": 0.0008473807582551978, "loss": 3.6766, "step": 17970 }, { "epoch": 1.22129365402908, "grad_norm": 0.9291485548019409, "learning_rate": 0.000847338293246365, "loss": 3.695, "step": 17975 }, { "epoch": 1.2216333740997418, "grad_norm": 0.652441680431366, "learning_rate": 0.0008472958282375322, "loss": 3.6368, "step": 17980 }, { "epoch": 1.2219730941704037, "grad_norm": 1.0129534006118774, "learning_rate": 0.0008472533632286996, "loss": 3.4031, "step": 17985 }, { "epoch": 1.2223128142410653, "grad_norm": 0.8840212225914001, "learning_rate": 0.0008472108982198669, "loss": 3.7429, "step": 17990 }, { "epoch": 1.2226525343117272, "grad_norm": 0.9972313642501831, "learning_rate": 0.0008471684332110341, "loss": 3.7065, "step": 17995 }, { "epoch": 1.222992254382389, "grad_norm": 1.400220274925232, "learning_rate": 0.0008471259682022015, "loss": 3.6157, "step": 18000 }, { "epoch": 1.2233319744530506, "grad_norm": 0.8838474154472351, "learning_rate": 0.0008470835031933687, "loss": 3.5444, "step": 18005 }, { "epoch": 1.2236716945237125, "grad_norm": 0.8083096146583557, "learning_rate": 0.0008470410381845359, "loss": 3.6164, "step": 18010 }, { "epoch": 1.2240114145943743, "grad_norm": 0.6595573425292969, "learning_rate": 0.0008469985731757033, "loss": 3.4771, "step": 18015 }, { "epoch": 1.224351134665036, "grad_norm": 0.6899487972259521, "learning_rate": 0.0008469561081668705, "loss": 3.7901, "step": 18020 }, { "epoch": 1.2246908547356978, "grad_norm": 0.8112339377403259, "learning_rate": 0.0008469136431580378, "loss": 3.5362, "step": 18025 }, { "epoch": 1.2250305748063597, "grad_norm": 0.9344635605812073, "learning_rate": 0.0008468711781492051, "loss": 3.4623, "step": 18030 }, { "epoch": 1.2253702948770213, "grad_norm": 0.7305163741111755, "learning_rate": 0.0008468287131403724, "loss": 3.9629, "step": 18035 }, { "epoch": 1.2257100149476832, "grad_norm": 0.9904261827468872, "learning_rate": 0.0008467862481315396, "loss": 3.7765, "step": 18040 }, { "epoch": 1.2260497350183448, "grad_norm": 0.7735642790794373, "learning_rate": 0.0008467437831227069, "loss": 3.7925, "step": 18045 }, { "epoch": 1.2263894550890067, "grad_norm": 0.7642565965652466, "learning_rate": 0.0008467013181138742, "loss": 3.5578, "step": 18050 }, { "epoch": 1.2267291751596685, "grad_norm": 0.8942404389381409, "learning_rate": 0.0008466588531050414, "loss": 3.6719, "step": 18055 }, { "epoch": 1.2270688952303301, "grad_norm": 1.0126745700836182, "learning_rate": 0.0008466163880962088, "loss": 3.3308, "step": 18060 }, { "epoch": 1.227408615300992, "grad_norm": 0.765275239944458, "learning_rate": 0.0008465739230873761, "loss": 3.5693, "step": 18065 }, { "epoch": 1.2277483353716538, "grad_norm": 1.321358323097229, "learning_rate": 0.0008465314580785433, "loss": 3.758, "step": 18070 }, { "epoch": 1.2280880554423155, "grad_norm": 0.8480993509292603, "learning_rate": 0.0008464889930697106, "loss": 3.7312, "step": 18075 }, { "epoch": 1.2284277755129773, "grad_norm": 0.8472058773040771, "learning_rate": 0.0008464465280608778, "loss": 3.4252, "step": 18080 }, { "epoch": 1.228767495583639, "grad_norm": 0.8045173287391663, "learning_rate": 0.0008464040630520451, "loss": 3.5508, "step": 18085 }, { "epoch": 1.2291072156543008, "grad_norm": 0.9411778450012207, "learning_rate": 0.0008463615980432124, "loss": 3.6773, "step": 18090 }, { "epoch": 1.2294469357249627, "grad_norm": 0.8735380172729492, "learning_rate": 0.0008463191330343797, "loss": 3.6757, "step": 18095 }, { "epoch": 1.2297866557956243, "grad_norm": 0.7130782604217529, "learning_rate": 0.000846276668025547, "loss": 3.8401, "step": 18100 }, { "epoch": 1.2301263758662861, "grad_norm": 0.9153977632522583, "learning_rate": 0.0008462342030167143, "loss": 3.7362, "step": 18105 }, { "epoch": 1.230466095936948, "grad_norm": 0.7222729921340942, "learning_rate": 0.0008461917380078815, "loss": 3.606, "step": 18110 }, { "epoch": 1.2308058160076096, "grad_norm": 0.7029339075088501, "learning_rate": 0.0008461492729990487, "loss": 3.7076, "step": 18115 }, { "epoch": 1.2311455360782715, "grad_norm": 0.8207064270973206, "learning_rate": 0.0008461068079902161, "loss": 3.8793, "step": 18120 }, { "epoch": 1.2314852561489333, "grad_norm": 0.8649730682373047, "learning_rate": 0.0008460643429813833, "loss": 3.4853, "step": 18125 }, { "epoch": 1.231824976219595, "grad_norm": 0.9953786730766296, "learning_rate": 0.0008460218779725506, "loss": 3.5368, "step": 18130 }, { "epoch": 1.2321646962902568, "grad_norm": 0.745816171169281, "learning_rate": 0.000845979412963718, "loss": 3.6434, "step": 18135 }, { "epoch": 1.2325044163609187, "grad_norm": 0.8291403651237488, "learning_rate": 0.0008459369479548852, "loss": 3.4113, "step": 18140 }, { "epoch": 1.2328441364315803, "grad_norm": 1.1412771940231323, "learning_rate": 0.0008458944829460524, "loss": 3.5915, "step": 18145 }, { "epoch": 1.2331838565022422, "grad_norm": 0.8733716011047363, "learning_rate": 0.0008458520179372198, "loss": 3.6662, "step": 18150 }, { "epoch": 1.233523576572904, "grad_norm": 0.7471228837966919, "learning_rate": 0.000845809552928387, "loss": 3.483, "step": 18155 }, { "epoch": 1.2338632966435656, "grad_norm": 1.6504098176956177, "learning_rate": 0.0008457670879195542, "loss": 3.6134, "step": 18160 }, { "epoch": 1.2342030167142275, "grad_norm": 1.4210253953933716, "learning_rate": 0.0008457246229107217, "loss": 3.3989, "step": 18165 }, { "epoch": 1.2345427367848893, "grad_norm": 0.9557244181632996, "learning_rate": 0.0008456821579018889, "loss": 3.5461, "step": 18170 }, { "epoch": 1.234882456855551, "grad_norm": 0.881510317325592, "learning_rate": 0.0008456396928930561, "loss": 3.6149, "step": 18175 }, { "epoch": 1.2352221769262128, "grad_norm": 0.9930477142333984, "learning_rate": 0.0008455972278842234, "loss": 3.6329, "step": 18180 }, { "epoch": 1.2355618969968747, "grad_norm": 0.9029057621955872, "learning_rate": 0.0008455547628753907, "loss": 3.6404, "step": 18185 }, { "epoch": 1.2359016170675363, "grad_norm": 0.7720837593078613, "learning_rate": 0.0008455122978665579, "loss": 3.511, "step": 18190 }, { "epoch": 1.2362413371381982, "grad_norm": 0.9271489381790161, "learning_rate": 0.0008454698328577252, "loss": 3.6233, "step": 18195 }, { "epoch": 1.23658105720886, "grad_norm": 0.7605803608894348, "learning_rate": 0.0008454273678488926, "loss": 3.6811, "step": 18200 }, { "epoch": 1.2369207772795217, "grad_norm": 0.8124759197235107, "learning_rate": 0.0008453849028400598, "loss": 3.4208, "step": 18205 }, { "epoch": 1.2372604973501835, "grad_norm": 0.9307723045349121, "learning_rate": 0.0008453424378312271, "loss": 3.59, "step": 18210 }, { "epoch": 1.2376002174208451, "grad_norm": 0.7233723402023315, "learning_rate": 0.0008452999728223943, "loss": 3.5635, "step": 18215 }, { "epoch": 1.237939937491507, "grad_norm": 0.7466638088226318, "learning_rate": 0.0008452575078135616, "loss": 3.4461, "step": 18220 }, { "epoch": 1.2382796575621688, "grad_norm": 0.8962716460227966, "learning_rate": 0.0008452150428047289, "loss": 3.8297, "step": 18225 }, { "epoch": 1.2386193776328305, "grad_norm": 1.0340651273727417, "learning_rate": 0.0008451725777958961, "loss": 3.6565, "step": 18230 }, { "epoch": 1.2389590977034923, "grad_norm": 0.7753156423568726, "learning_rate": 0.0008451301127870636, "loss": 3.7198, "step": 18235 }, { "epoch": 1.2392988177741542, "grad_norm": 0.6185409426689148, "learning_rate": 0.0008450876477782308, "loss": 3.5466, "step": 18240 }, { "epoch": 1.2396385378448158, "grad_norm": 0.9427835941314697, "learning_rate": 0.000845045182769398, "loss": 3.5347, "step": 18245 }, { "epoch": 1.2399782579154777, "grad_norm": 0.8883402347564697, "learning_rate": 0.0008450027177605654, "loss": 3.6476, "step": 18250 }, { "epoch": 1.2403179779861393, "grad_norm": 0.9068079590797424, "learning_rate": 0.0008449602527517326, "loss": 3.634, "step": 18255 }, { "epoch": 1.2406576980568012, "grad_norm": 0.8822120428085327, "learning_rate": 0.0008449177877428998, "loss": 3.4382, "step": 18260 }, { "epoch": 1.240997418127463, "grad_norm": 0.8714054226875305, "learning_rate": 0.0008448753227340671, "loss": 3.8631, "step": 18265 }, { "epoch": 1.2413371381981246, "grad_norm": 0.8798202276229858, "learning_rate": 0.0008448328577252345, "loss": 3.3534, "step": 18270 }, { "epoch": 1.2416768582687865, "grad_norm": 0.9595583081245422, "learning_rate": 0.0008447903927164017, "loss": 3.506, "step": 18275 }, { "epoch": 1.2420165783394483, "grad_norm": 0.7908827662467957, "learning_rate": 0.000844747927707569, "loss": 3.7485, "step": 18280 }, { "epoch": 1.24235629841011, "grad_norm": 0.8436384201049805, "learning_rate": 0.0008447054626987363, "loss": 3.9602, "step": 18285 }, { "epoch": 1.2426960184807718, "grad_norm": 0.9083687663078308, "learning_rate": 0.0008446629976899035, "loss": 3.6702, "step": 18290 }, { "epoch": 1.2430357385514337, "grad_norm": 0.7771982550621033, "learning_rate": 0.0008446205326810708, "loss": 3.5023, "step": 18295 }, { "epoch": 1.2433754586220953, "grad_norm": 0.8990803360939026, "learning_rate": 0.0008445780676722381, "loss": 3.7242, "step": 18300 }, { "epoch": 1.2437151786927572, "grad_norm": 0.7780101299285889, "learning_rate": 0.0008445356026634054, "loss": 3.7996, "step": 18305 }, { "epoch": 1.244054898763419, "grad_norm": 0.9294916987419128, "learning_rate": 0.0008444931376545727, "loss": 3.7713, "step": 18310 }, { "epoch": 1.2443946188340806, "grad_norm": 0.7924439311027527, "learning_rate": 0.00084445067264574, "loss": 3.8392, "step": 18315 }, { "epoch": 1.2447343389047425, "grad_norm": 1.612853765487671, "learning_rate": 0.0008444082076369072, "loss": 3.6889, "step": 18320 }, { "epoch": 1.2450740589754044, "grad_norm": 0.7830581068992615, "learning_rate": 0.0008443657426280745, "loss": 3.4884, "step": 18325 }, { "epoch": 1.245413779046066, "grad_norm": 0.7962588667869568, "learning_rate": 0.0008443232776192417, "loss": 3.4492, "step": 18330 }, { "epoch": 1.2457534991167278, "grad_norm": 1.3123995065689087, "learning_rate": 0.000844280812610409, "loss": 3.5741, "step": 18335 }, { "epoch": 1.2460932191873897, "grad_norm": 0.8696267008781433, "learning_rate": 0.0008442383476015764, "loss": 3.5459, "step": 18340 }, { "epoch": 1.2464329392580513, "grad_norm": 0.7638306617736816, "learning_rate": 0.0008441958825927436, "loss": 3.594, "step": 18345 }, { "epoch": 1.2467726593287132, "grad_norm": 0.8447389006614685, "learning_rate": 0.0008441534175839109, "loss": 3.4698, "step": 18350 }, { "epoch": 1.247112379399375, "grad_norm": 0.716288149356842, "learning_rate": 0.0008441109525750782, "loss": 3.8621, "step": 18355 }, { "epoch": 1.2474520994700367, "grad_norm": 0.7221596837043762, "learning_rate": 0.0008440684875662454, "loss": 3.7811, "step": 18360 }, { "epoch": 1.2477918195406985, "grad_norm": 0.6570661664009094, "learning_rate": 0.0008440260225574126, "loss": 3.6572, "step": 18365 }, { "epoch": 1.2481315396113604, "grad_norm": 0.8833088874816895, "learning_rate": 0.00084398355754858, "loss": 3.4318, "step": 18370 }, { "epoch": 1.248471259682022, "grad_norm": 2.111253023147583, "learning_rate": 0.0008439410925397473, "loss": 3.7862, "step": 18375 }, { "epoch": 1.2488109797526838, "grad_norm": 1.6731688976287842, "learning_rate": 0.0008438986275309145, "loss": 3.3248, "step": 18380 }, { "epoch": 1.2491506998233455, "grad_norm": 0.6982871890068054, "learning_rate": 0.0008438561625220819, "loss": 3.8462, "step": 18385 }, { "epoch": 1.2494904198940073, "grad_norm": 0.8089686632156372, "learning_rate": 0.0008438136975132491, "loss": 3.4609, "step": 18390 }, { "epoch": 1.2498301399646692, "grad_norm": 0.7364463210105896, "learning_rate": 0.0008437712325044163, "loss": 3.5474, "step": 18395 }, { "epoch": 1.2501698600353308, "grad_norm": 0.6618789434432983, "learning_rate": 0.0008437287674955837, "loss": 3.6525, "step": 18400 }, { "epoch": 1.2505095801059927, "grad_norm": 1.1544857025146484, "learning_rate": 0.0008436863024867509, "loss": 3.7402, "step": 18405 }, { "epoch": 1.2508493001766543, "grad_norm": 0.6474128365516663, "learning_rate": 0.0008436438374779182, "loss": 3.724, "step": 18410 }, { "epoch": 1.2511890202473162, "grad_norm": 0.7408311367034912, "learning_rate": 0.0008436013724690856, "loss": 3.3269, "step": 18415 }, { "epoch": 1.251528740317978, "grad_norm": 0.9004170894622803, "learning_rate": 0.0008435589074602528, "loss": 3.4703, "step": 18420 }, { "epoch": 1.2518684603886396, "grad_norm": 1.001169204711914, "learning_rate": 0.00084351644245142, "loss": 3.6058, "step": 18425 }, { "epoch": 1.2522081804593015, "grad_norm": 1.0033454895019531, "learning_rate": 0.0008434739774425873, "loss": 3.3701, "step": 18430 }, { "epoch": 1.2525479005299633, "grad_norm": 1.0031142234802246, "learning_rate": 0.0008434315124337546, "loss": 3.7816, "step": 18435 }, { "epoch": 1.252887620600625, "grad_norm": 0.8103834986686707, "learning_rate": 0.0008433890474249218, "loss": 3.5997, "step": 18440 }, { "epoch": 1.2532273406712868, "grad_norm": 0.870478093624115, "learning_rate": 0.0008433465824160892, "loss": 3.515, "step": 18445 }, { "epoch": 1.2535670607419487, "grad_norm": 0.6764253973960876, "learning_rate": 0.0008433041174072565, "loss": 3.79, "step": 18450 }, { "epoch": 1.2539067808126103, "grad_norm": 0.7315080761909485, "learning_rate": 0.0008432616523984237, "loss": 3.4807, "step": 18455 }, { "epoch": 1.2542465008832722, "grad_norm": 1.4298170804977417, "learning_rate": 0.000843219187389591, "loss": 3.8018, "step": 18460 }, { "epoch": 1.254586220953934, "grad_norm": 1.0117830038070679, "learning_rate": 0.0008431767223807582, "loss": 3.6612, "step": 18465 }, { "epoch": 1.2549259410245956, "grad_norm": 1.1819978952407837, "learning_rate": 0.0008431342573719255, "loss": 3.3462, "step": 18470 }, { "epoch": 1.2552656610952575, "grad_norm": 0.9841753840446472, "learning_rate": 0.0008430917923630928, "loss": 3.6861, "step": 18475 }, { "epoch": 1.2556053811659194, "grad_norm": 1.1169837713241577, "learning_rate": 0.0008430493273542601, "loss": 3.5949, "step": 18480 }, { "epoch": 1.255945101236581, "grad_norm": 0.8819544911384583, "learning_rate": 0.0008430068623454274, "loss": 3.4074, "step": 18485 }, { "epoch": 1.2562848213072428, "grad_norm": 1.1495901346206665, "learning_rate": 0.0008429643973365947, "loss": 3.6429, "step": 18490 }, { "epoch": 1.2566245413779047, "grad_norm": 1.0049405097961426, "learning_rate": 0.0008429219323277619, "loss": 3.6618, "step": 18495 }, { "epoch": 1.2569642614485663, "grad_norm": 0.7330548763275146, "learning_rate": 0.0008428794673189292, "loss": 3.4593, "step": 18500 }, { "epoch": 1.2573039815192282, "grad_norm": 1.1443344354629517, "learning_rate": 0.0008428370023100965, "loss": 3.6317, "step": 18505 }, { "epoch": 1.25764370158989, "grad_norm": 0.867304801940918, "learning_rate": 0.0008427945373012637, "loss": 3.6558, "step": 18510 }, { "epoch": 1.2579834216605517, "grad_norm": 0.876792848110199, "learning_rate": 0.000842752072292431, "loss": 3.8291, "step": 18515 }, { "epoch": 1.2583231417312135, "grad_norm": 1.71896493434906, "learning_rate": 0.0008427096072835984, "loss": 3.6977, "step": 18520 }, { "epoch": 1.2586628618018754, "grad_norm": 0.7942525148391724, "learning_rate": 0.0008426671422747656, "loss": 3.8734, "step": 18525 }, { "epoch": 1.259002581872537, "grad_norm": 0.6905406713485718, "learning_rate": 0.0008426246772659328, "loss": 3.6691, "step": 18530 }, { "epoch": 1.2593423019431988, "grad_norm": 0.7500548958778381, "learning_rate": 0.0008425822122571002, "loss": 3.9068, "step": 18535 }, { "epoch": 1.2596820220138607, "grad_norm": 0.822364866733551, "learning_rate": 0.0008425397472482674, "loss": 3.611, "step": 18540 }, { "epoch": 1.2600217420845223, "grad_norm": 0.8194557428359985, "learning_rate": 0.0008424972822394346, "loss": 3.7225, "step": 18545 }, { "epoch": 1.2603614621551842, "grad_norm": 0.9240332245826721, "learning_rate": 0.0008424548172306021, "loss": 3.8984, "step": 18550 }, { "epoch": 1.260701182225846, "grad_norm": 0.9217618703842163, "learning_rate": 0.0008424123522217693, "loss": 3.4859, "step": 18555 }, { "epoch": 1.2610409022965077, "grad_norm": 0.6778313517570496, "learning_rate": 0.0008423698872129365, "loss": 3.9416, "step": 18560 }, { "epoch": 1.2613806223671695, "grad_norm": 0.8785179257392883, "learning_rate": 0.0008423274222041038, "loss": 3.6826, "step": 18565 }, { "epoch": 1.2617203424378312, "grad_norm": 0.8147216439247131, "learning_rate": 0.0008422849571952711, "loss": 3.7592, "step": 18570 }, { "epoch": 1.262060062508493, "grad_norm": 0.8724662065505981, "learning_rate": 0.0008422424921864384, "loss": 3.4872, "step": 18575 }, { "epoch": 1.2623997825791546, "grad_norm": 0.8504079580307007, "learning_rate": 0.0008422000271776057, "loss": 3.4962, "step": 18580 }, { "epoch": 1.2627395026498165, "grad_norm": 1.1918799877166748, "learning_rate": 0.000842157562168773, "loss": 3.7573, "step": 18585 }, { "epoch": 1.2630792227204783, "grad_norm": 0.9112948775291443, "learning_rate": 0.0008421150971599403, "loss": 3.7774, "step": 18590 }, { "epoch": 1.26341894279114, "grad_norm": 0.83143150806427, "learning_rate": 0.0008420726321511075, "loss": 3.5513, "step": 18595 }, { "epoch": 1.2637586628618018, "grad_norm": 0.8087087273597717, "learning_rate": 0.0008420301671422748, "loss": 3.745, "step": 18600 }, { "epoch": 1.2640983829324637, "grad_norm": 0.8781185150146484, "learning_rate": 0.0008419877021334421, "loss": 3.507, "step": 18605 }, { "epoch": 1.2644381030031253, "grad_norm": 0.6325780749320984, "learning_rate": 0.0008419452371246093, "loss": 3.3865, "step": 18610 }, { "epoch": 1.2647778230737872, "grad_norm": 0.8836072683334351, "learning_rate": 0.0008419027721157766, "loss": 3.7734, "step": 18615 }, { "epoch": 1.265117543144449, "grad_norm": 0.8629582524299622, "learning_rate": 0.000841860307106944, "loss": 3.7886, "step": 18620 }, { "epoch": 1.2654572632151107, "grad_norm": 0.8805978298187256, "learning_rate": 0.0008418178420981112, "loss": 3.4427, "step": 18625 }, { "epoch": 1.2657969832857725, "grad_norm": 0.9512669444084167, "learning_rate": 0.0008417753770892784, "loss": 3.3798, "step": 18630 }, { "epoch": 1.2661367033564344, "grad_norm": 0.8685581684112549, "learning_rate": 0.0008417329120804458, "loss": 3.8713, "step": 18635 }, { "epoch": 1.266476423427096, "grad_norm": 0.7789969444274902, "learning_rate": 0.000841690447071613, "loss": 3.3721, "step": 18640 }, { "epoch": 1.2668161434977578, "grad_norm": 0.7959485054016113, "learning_rate": 0.0008416479820627802, "loss": 3.3812, "step": 18645 }, { "epoch": 1.2671558635684197, "grad_norm": 0.9449043869972229, "learning_rate": 0.0008416055170539477, "loss": 3.6233, "step": 18650 }, { "epoch": 1.2674955836390813, "grad_norm": 0.668481171131134, "learning_rate": 0.0008415630520451149, "loss": 3.7671, "step": 18655 }, { "epoch": 1.2678353037097432, "grad_norm": 0.839979887008667, "learning_rate": 0.0008415205870362821, "loss": 3.7694, "step": 18660 }, { "epoch": 1.268175023780405, "grad_norm": 0.9389800429344177, "learning_rate": 0.0008414781220274494, "loss": 3.6999, "step": 18665 }, { "epoch": 1.2685147438510667, "grad_norm": 1.1306737661361694, "learning_rate": 0.0008414356570186167, "loss": 3.5589, "step": 18670 }, { "epoch": 1.2688544639217285, "grad_norm": 0.771314799785614, "learning_rate": 0.0008413931920097839, "loss": 3.4752, "step": 18675 }, { "epoch": 1.2691941839923904, "grad_norm": 0.9299555420875549, "learning_rate": 0.0008413507270009512, "loss": 3.5053, "step": 18680 }, { "epoch": 1.269533904063052, "grad_norm": 0.8187107443809509, "learning_rate": 0.0008413082619921186, "loss": 3.8558, "step": 18685 }, { "epoch": 1.2698736241337139, "grad_norm": 1.8932468891143799, "learning_rate": 0.0008412657969832858, "loss": 3.8907, "step": 18690 }, { "epoch": 1.2702133442043757, "grad_norm": 0.9495120048522949, "learning_rate": 0.0008412233319744531, "loss": 3.6481, "step": 18695 }, { "epoch": 1.2705530642750373, "grad_norm": 0.8422479033470154, "learning_rate": 0.0008411808669656204, "loss": 3.8434, "step": 18700 }, { "epoch": 1.2708927843456992, "grad_norm": 0.6998239755630493, "learning_rate": 0.0008411384019567876, "loss": 3.5922, "step": 18705 }, { "epoch": 1.271232504416361, "grad_norm": 0.7931044697761536, "learning_rate": 0.0008410959369479549, "loss": 3.6347, "step": 18710 }, { "epoch": 1.2715722244870227, "grad_norm": 0.98618483543396, "learning_rate": 0.0008410534719391221, "loss": 3.483, "step": 18715 }, { "epoch": 1.2719119445576845, "grad_norm": 1.4689273834228516, "learning_rate": 0.0008410110069302895, "loss": 3.4692, "step": 18720 }, { "epoch": 1.2722516646283464, "grad_norm": 0.7294166684150696, "learning_rate": 0.0008409685419214568, "loss": 3.5618, "step": 18725 }, { "epoch": 1.272591384699008, "grad_norm": 0.986870527267456, "learning_rate": 0.000840926076912624, "loss": 3.4794, "step": 18730 }, { "epoch": 1.2729311047696699, "grad_norm": 1.0757185220718384, "learning_rate": 0.0008408836119037913, "loss": 3.5533, "step": 18735 }, { "epoch": 1.2732708248403315, "grad_norm": 0.7239949703216553, "learning_rate": 0.0008408411468949586, "loss": 3.3803, "step": 18740 }, { "epoch": 1.2736105449109933, "grad_norm": 0.9407927393913269, "learning_rate": 0.0008407986818861258, "loss": 3.5135, "step": 18745 }, { "epoch": 1.273950264981655, "grad_norm": 0.9007251262664795, "learning_rate": 0.000840756216877293, "loss": 3.6895, "step": 18750 }, { "epoch": 1.2742899850523168, "grad_norm": 1.447066068649292, "learning_rate": 0.0008407137518684605, "loss": 3.6948, "step": 18755 }, { "epoch": 1.2746297051229787, "grad_norm": 0.7781888246536255, "learning_rate": 0.0008406712868596277, "loss": 3.6503, "step": 18760 }, { "epoch": 1.2749694251936403, "grad_norm": 1.0265910625457764, "learning_rate": 0.0008406288218507949, "loss": 3.4891, "step": 18765 }, { "epoch": 1.2753091452643022, "grad_norm": 0.7800175547599792, "learning_rate": 0.0008405863568419623, "loss": 3.4718, "step": 18770 }, { "epoch": 1.275648865334964, "grad_norm": 0.943182647228241, "learning_rate": 0.0008405438918331295, "loss": 3.6423, "step": 18775 }, { "epoch": 1.2759885854056257, "grad_norm": 0.8891888856887817, "learning_rate": 0.0008405014268242967, "loss": 3.8814, "step": 18780 }, { "epoch": 1.2763283054762875, "grad_norm": 0.9731910228729248, "learning_rate": 0.0008404589618154641, "loss": 3.7375, "step": 18785 }, { "epoch": 1.2766680255469494, "grad_norm": 0.8911810517311096, "learning_rate": 0.0008404164968066314, "loss": 3.9072, "step": 18790 }, { "epoch": 1.277007745617611, "grad_norm": 0.6762071847915649, "learning_rate": 0.0008403740317977986, "loss": 3.671, "step": 18795 }, { "epoch": 1.2773474656882728, "grad_norm": 0.9559621810913086, "learning_rate": 0.000840331566788966, "loss": 3.5413, "step": 18800 }, { "epoch": 1.2776871857589347, "grad_norm": 0.7364009022712708, "learning_rate": 0.0008402891017801332, "loss": 3.6753, "step": 18805 }, { "epoch": 1.2780269058295963, "grad_norm": 0.7066251039505005, "learning_rate": 0.0008402466367713004, "loss": 3.5796, "step": 18810 }, { "epoch": 1.2783666259002582, "grad_norm": 0.8029266595840454, "learning_rate": 0.0008402041717624677, "loss": 3.6567, "step": 18815 }, { "epoch": 1.27870634597092, "grad_norm": 0.8868125677108765, "learning_rate": 0.000840161706753635, "loss": 3.7226, "step": 18820 }, { "epoch": 1.2790460660415817, "grad_norm": 0.7348118424415588, "learning_rate": 0.0008401192417448023, "loss": 3.6647, "step": 18825 }, { "epoch": 1.2793857861122435, "grad_norm": 1.0088237524032593, "learning_rate": 0.0008400767767359696, "loss": 3.5265, "step": 18830 }, { "epoch": 1.2797255061829054, "grad_norm": 1.6056684255599976, "learning_rate": 0.0008400343117271369, "loss": 3.8512, "step": 18835 }, { "epoch": 1.280065226253567, "grad_norm": 0.8223760724067688, "learning_rate": 0.0008399918467183041, "loss": 3.7661, "step": 18840 }, { "epoch": 1.2804049463242289, "grad_norm": 0.7191720008850098, "learning_rate": 0.0008399493817094714, "loss": 3.5785, "step": 18845 }, { "epoch": 1.2807446663948907, "grad_norm": 1.026749610900879, "learning_rate": 0.0008399069167006386, "loss": 3.5328, "step": 18850 }, { "epoch": 1.2810843864655523, "grad_norm": 0.6405095458030701, "learning_rate": 0.0008398644516918059, "loss": 3.6255, "step": 18855 }, { "epoch": 1.2814241065362142, "grad_norm": 0.9298734068870544, "learning_rate": 0.0008398219866829733, "loss": 3.6699, "step": 18860 }, { "epoch": 1.281763826606876, "grad_norm": 0.7743015885353088, "learning_rate": 0.0008397795216741405, "loss": 3.5715, "step": 18865 }, { "epoch": 1.2821035466775377, "grad_norm": 0.7801750898361206, "learning_rate": 0.0008397370566653078, "loss": 3.7601, "step": 18870 }, { "epoch": 1.2824432667481995, "grad_norm": 0.7174571752548218, "learning_rate": 0.0008396945916564751, "loss": 3.6115, "step": 18875 }, { "epoch": 1.2827829868188614, "grad_norm": 0.8859849572181702, "learning_rate": 0.0008396521266476423, "loss": 3.4274, "step": 18880 }, { "epoch": 1.283122706889523, "grad_norm": 0.823289692401886, "learning_rate": 0.0008396096616388096, "loss": 3.7089, "step": 18885 }, { "epoch": 1.2834624269601849, "grad_norm": 0.9599798917770386, "learning_rate": 0.0008395671966299769, "loss": 3.3319, "step": 18890 }, { "epoch": 1.2838021470308467, "grad_norm": 0.7811353802680969, "learning_rate": 0.0008395247316211442, "loss": 3.6249, "step": 18895 }, { "epoch": 1.2841418671015083, "grad_norm": 0.8638587594032288, "learning_rate": 0.0008394822666123114, "loss": 3.575, "step": 18900 }, { "epoch": 1.2844815871721702, "grad_norm": 1.471102237701416, "learning_rate": 0.0008394398016034788, "loss": 3.583, "step": 18905 }, { "epoch": 1.2848213072428318, "grad_norm": 0.6785420179367065, "learning_rate": 0.000839397336594646, "loss": 3.8226, "step": 18910 }, { "epoch": 1.2851610273134937, "grad_norm": 0.8843326568603516, "learning_rate": 0.0008393548715858133, "loss": 3.1594, "step": 18915 }, { "epoch": 1.2855007473841553, "grad_norm": 0.7196475863456726, "learning_rate": 0.0008393124065769806, "loss": 3.5177, "step": 18920 }, { "epoch": 1.2858404674548172, "grad_norm": 0.9442929029464722, "learning_rate": 0.0008392699415681478, "loss": 3.5674, "step": 18925 }, { "epoch": 1.286180187525479, "grad_norm": 1.0013362169265747, "learning_rate": 0.0008392274765593152, "loss": 3.5787, "step": 18930 }, { "epoch": 1.2865199075961407, "grad_norm": 0.780559778213501, "learning_rate": 0.0008391850115504825, "loss": 3.4753, "step": 18935 }, { "epoch": 1.2868596276668025, "grad_norm": 1.0348625183105469, "learning_rate": 0.0008391425465416497, "loss": 3.7592, "step": 18940 }, { "epoch": 1.2871993477374644, "grad_norm": 0.8198745846748352, "learning_rate": 0.000839100081532817, "loss": 3.5791, "step": 18945 }, { "epoch": 1.287539067808126, "grad_norm": 0.8706223368644714, "learning_rate": 0.0008390576165239842, "loss": 3.8354, "step": 18950 }, { "epoch": 1.2878787878787878, "grad_norm": 0.6900718212127686, "learning_rate": 0.0008390151515151515, "loss": 3.7682, "step": 18955 }, { "epoch": 1.2882185079494497, "grad_norm": 0.99193274974823, "learning_rate": 0.0008389726865063188, "loss": 3.4471, "step": 18960 }, { "epoch": 1.2885582280201113, "grad_norm": 0.8024405241012573, "learning_rate": 0.0008389302214974861, "loss": 3.4529, "step": 18965 }, { "epoch": 1.2888979480907732, "grad_norm": 0.8801078796386719, "learning_rate": 0.0008388877564886534, "loss": 3.7201, "step": 18970 }, { "epoch": 1.289237668161435, "grad_norm": 0.746083676815033, "learning_rate": 0.0008388452914798207, "loss": 3.6776, "step": 18975 }, { "epoch": 1.2895773882320967, "grad_norm": 1.5363494157791138, "learning_rate": 0.0008388028264709879, "loss": 3.6466, "step": 18980 }, { "epoch": 1.2899171083027585, "grad_norm": 0.6721611022949219, "learning_rate": 0.0008387603614621552, "loss": 3.669, "step": 18985 }, { "epoch": 1.2902568283734204, "grad_norm": 0.720432460308075, "learning_rate": 0.0008387178964533225, "loss": 3.4773, "step": 18990 }, { "epoch": 1.290596548444082, "grad_norm": 1.061953067779541, "learning_rate": 0.0008386754314444897, "loss": 3.997, "step": 18995 }, { "epoch": 1.2909362685147439, "grad_norm": 0.761386513710022, "learning_rate": 0.000838632966435657, "loss": 3.496, "step": 19000 }, { "epoch": 1.2912759885854057, "grad_norm": 0.8305133581161499, "learning_rate": 0.0008385905014268244, "loss": 3.5356, "step": 19005 }, { "epoch": 1.2916157086560673, "grad_norm": 0.7632470726966858, "learning_rate": 0.0008385480364179916, "loss": 3.6356, "step": 19010 }, { "epoch": 1.2919554287267292, "grad_norm": 0.9063440561294556, "learning_rate": 0.0008385055714091588, "loss": 3.6967, "step": 19015 }, { "epoch": 1.292295148797391, "grad_norm": 0.9601851105690002, "learning_rate": 0.0008384631064003262, "loss": 3.7234, "step": 19020 }, { "epoch": 1.2926348688680527, "grad_norm": 1.3528258800506592, "learning_rate": 0.0008384206413914934, "loss": 3.6734, "step": 19025 }, { "epoch": 1.2929745889387145, "grad_norm": 0.9065192341804504, "learning_rate": 0.0008383781763826606, "loss": 3.7763, "step": 19030 }, { "epoch": 1.2933143090093764, "grad_norm": 0.9659169912338257, "learning_rate": 0.0008383357113738281, "loss": 3.9401, "step": 19035 }, { "epoch": 1.293654029080038, "grad_norm": 0.8508115410804749, "learning_rate": 0.0008382932463649953, "loss": 3.6517, "step": 19040 }, { "epoch": 1.2939937491506999, "grad_norm": 0.7941755056381226, "learning_rate": 0.0008382507813561625, "loss": 3.803, "step": 19045 }, { "epoch": 1.2943334692213617, "grad_norm": 0.7235051393508911, "learning_rate": 0.0008382083163473299, "loss": 3.3518, "step": 19050 }, { "epoch": 1.2946731892920234, "grad_norm": 3.7901744842529297, "learning_rate": 0.0008381658513384971, "loss": 3.6621, "step": 19055 }, { "epoch": 1.2950129093626852, "grad_norm": 0.7474851012229919, "learning_rate": 0.0008381233863296643, "loss": 3.7553, "step": 19060 }, { "epoch": 1.295352629433347, "grad_norm": 0.7516352534294128, "learning_rate": 0.0008380809213208316, "loss": 3.4176, "step": 19065 }, { "epoch": 1.2956923495040087, "grad_norm": 0.8967887163162231, "learning_rate": 0.000838038456311999, "loss": 3.499, "step": 19070 }, { "epoch": 1.2960320695746705, "grad_norm": 1.0398764610290527, "learning_rate": 0.0008379959913031662, "loss": 3.5029, "step": 19075 }, { "epoch": 1.2963717896453322, "grad_norm": 0.7808350920677185, "learning_rate": 0.0008379535262943335, "loss": 3.8588, "step": 19080 }, { "epoch": 1.296711509715994, "grad_norm": 0.8476454019546509, "learning_rate": 0.0008379110612855008, "loss": 3.8217, "step": 19085 }, { "epoch": 1.2970512297866557, "grad_norm": 3.1767690181732178, "learning_rate": 0.000837868596276668, "loss": 3.6801, "step": 19090 }, { "epoch": 1.2973909498573175, "grad_norm": 0.7458839416503906, "learning_rate": 0.0008378261312678353, "loss": 3.4079, "step": 19095 }, { "epoch": 1.2977306699279794, "grad_norm": 1.2557902336120605, "learning_rate": 0.0008377836662590025, "loss": 3.528, "step": 19100 }, { "epoch": 1.298070389998641, "grad_norm": 0.8059072494506836, "learning_rate": 0.0008377412012501699, "loss": 3.5666, "step": 19105 }, { "epoch": 1.2984101100693028, "grad_norm": 0.9414175152778625, "learning_rate": 0.0008376987362413372, "loss": 3.535, "step": 19110 }, { "epoch": 1.2987498301399647, "grad_norm": 0.9976986646652222, "learning_rate": 0.0008376562712325044, "loss": 3.5132, "step": 19115 }, { "epoch": 1.2990895502106263, "grad_norm": 0.7399609684944153, "learning_rate": 0.0008376138062236717, "loss": 3.702, "step": 19120 }, { "epoch": 1.2994292702812882, "grad_norm": 0.746816098690033, "learning_rate": 0.000837571341214839, "loss": 3.6675, "step": 19125 }, { "epoch": 1.29976899035195, "grad_norm": 0.7854049205780029, "learning_rate": 0.0008375288762060062, "loss": 3.6947, "step": 19130 }, { "epoch": 1.3001087104226117, "grad_norm": 0.9822672009468079, "learning_rate": 0.0008374864111971734, "loss": 3.5687, "step": 19135 }, { "epoch": 1.3004484304932735, "grad_norm": 0.8535969257354736, "learning_rate": 0.0008374439461883409, "loss": 3.4407, "step": 19140 }, { "epoch": 1.3007881505639354, "grad_norm": 0.9285767674446106, "learning_rate": 0.0008374014811795081, "loss": 3.707, "step": 19145 }, { "epoch": 1.301127870634597, "grad_norm": 0.7824862003326416, "learning_rate": 0.0008373590161706753, "loss": 3.7798, "step": 19150 }, { "epoch": 1.3014675907052589, "grad_norm": 0.7672741413116455, "learning_rate": 0.0008373165511618427, "loss": 3.5057, "step": 19155 }, { "epoch": 1.3018073107759207, "grad_norm": 0.8770267367362976, "learning_rate": 0.0008372740861530099, "loss": 3.6849, "step": 19160 }, { "epoch": 1.3021470308465823, "grad_norm": 1.0106281042099, "learning_rate": 0.0008372316211441771, "loss": 3.5227, "step": 19165 }, { "epoch": 1.3024867509172442, "grad_norm": 0.8849656581878662, "learning_rate": 0.0008371891561353446, "loss": 3.9317, "step": 19170 }, { "epoch": 1.302826470987906, "grad_norm": 0.8273836970329285, "learning_rate": 0.0008371466911265118, "loss": 3.5142, "step": 19175 }, { "epoch": 1.3031661910585677, "grad_norm": 1.0824638605117798, "learning_rate": 0.000837104226117679, "loss": 3.5279, "step": 19180 }, { "epoch": 1.3035059111292295, "grad_norm": 0.7543104887008667, "learning_rate": 0.0008370617611088464, "loss": 3.5396, "step": 19185 }, { "epoch": 1.3038456311998914, "grad_norm": 0.7501362562179565, "learning_rate": 0.0008370192961000136, "loss": 3.9378, "step": 19190 }, { "epoch": 1.304185351270553, "grad_norm": 0.759842038154602, "learning_rate": 0.0008369768310911808, "loss": 3.5307, "step": 19195 }, { "epoch": 1.3045250713412149, "grad_norm": 0.8994619846343994, "learning_rate": 0.0008369343660823481, "loss": 3.9167, "step": 19200 }, { "epoch": 1.3048647914118767, "grad_norm": 1.0880717039108276, "learning_rate": 0.0008368919010735155, "loss": 3.6136, "step": 19205 }, { "epoch": 1.3052045114825384, "grad_norm": 0.7082797884941101, "learning_rate": 0.0008368494360646827, "loss": 3.6299, "step": 19210 }, { "epoch": 1.3055442315532002, "grad_norm": 0.7700816988945007, "learning_rate": 0.00083680697105585, "loss": 3.8674, "step": 19215 }, { "epoch": 1.305883951623862, "grad_norm": 0.7759992480278015, "learning_rate": 0.0008367645060470173, "loss": 3.4494, "step": 19220 }, { "epoch": 1.3062236716945237, "grad_norm": 0.8918359875679016, "learning_rate": 0.0008367220410381845, "loss": 3.7901, "step": 19225 }, { "epoch": 1.3065633917651855, "grad_norm": 2.300729513168335, "learning_rate": 0.0008366795760293518, "loss": 3.6712, "step": 19230 }, { "epoch": 1.3069031118358474, "grad_norm": 1.0091993808746338, "learning_rate": 0.000836637111020519, "loss": 3.3453, "step": 19235 }, { "epoch": 1.307242831906509, "grad_norm": 0.7243125438690186, "learning_rate": 0.0008365946460116864, "loss": 3.5832, "step": 19240 }, { "epoch": 1.3075825519771709, "grad_norm": 0.7430331707000732, "learning_rate": 0.0008365521810028537, "loss": 3.7651, "step": 19245 }, { "epoch": 1.3079222720478325, "grad_norm": 1.9062978029251099, "learning_rate": 0.0008365097159940209, "loss": 3.8237, "step": 19250 }, { "epoch": 1.3082619921184944, "grad_norm": 0.7068991661071777, "learning_rate": 0.0008364672509851883, "loss": 3.8731, "step": 19255 }, { "epoch": 1.308601712189156, "grad_norm": 1.2697590589523315, "learning_rate": 0.0008364247859763555, "loss": 3.8133, "step": 19260 }, { "epoch": 1.3089414322598178, "grad_norm": 0.8661418557167053, "learning_rate": 0.0008363823209675227, "loss": 3.1856, "step": 19265 }, { "epoch": 1.3092811523304797, "grad_norm": 1.2155336141586304, "learning_rate": 0.0008363398559586901, "loss": 3.5142, "step": 19270 }, { "epoch": 1.3096208724011413, "grad_norm": 0.5832532048225403, "learning_rate": 0.0008362973909498574, "loss": 3.5184, "step": 19275 }, { "epoch": 1.3099605924718032, "grad_norm": 0.8530396819114685, "learning_rate": 0.0008362549259410246, "loss": 3.2229, "step": 19280 }, { "epoch": 1.310300312542465, "grad_norm": 0.8273479342460632, "learning_rate": 0.000836212460932192, "loss": 3.656, "step": 19285 }, { "epoch": 1.3106400326131267, "grad_norm": 1.3767008781433105, "learning_rate": 0.0008361699959233592, "loss": 3.6, "step": 19290 }, { "epoch": 1.3109797526837885, "grad_norm": 0.7706455588340759, "learning_rate": 0.0008361275309145264, "loss": 3.6458, "step": 19295 }, { "epoch": 1.3113194727544504, "grad_norm": 0.7214418649673462, "learning_rate": 0.0008360850659056937, "loss": 3.6441, "step": 19300 }, { "epoch": 1.311659192825112, "grad_norm": 0.6593573689460754, "learning_rate": 0.000836042600896861, "loss": 3.7781, "step": 19305 }, { "epoch": 1.3119989128957739, "grad_norm": 0.6926696300506592, "learning_rate": 0.0008360001358880283, "loss": 3.4172, "step": 19310 }, { "epoch": 1.3123386329664357, "grad_norm": 0.9231415390968323, "learning_rate": 0.0008359576708791956, "loss": 3.4628, "step": 19315 }, { "epoch": 1.3126783530370973, "grad_norm": 0.664509654045105, "learning_rate": 0.0008359152058703629, "loss": 3.527, "step": 19320 }, { "epoch": 1.3130180731077592, "grad_norm": 0.7721145749092102, "learning_rate": 0.0008358727408615301, "loss": 3.6316, "step": 19325 }, { "epoch": 1.313357793178421, "grad_norm": 1.0311793088912964, "learning_rate": 0.0008358302758526974, "loss": 3.6091, "step": 19330 }, { "epoch": 1.3136975132490827, "grad_norm": 0.93877774477005, "learning_rate": 0.0008357878108438647, "loss": 3.4597, "step": 19335 }, { "epoch": 1.3140372333197445, "grad_norm": 0.9329226613044739, "learning_rate": 0.0008357453458350319, "loss": 3.6775, "step": 19340 }, { "epoch": 1.3143769533904064, "grad_norm": 0.9558454751968384, "learning_rate": 0.0008357028808261993, "loss": 3.7005, "step": 19345 }, { "epoch": 1.314716673461068, "grad_norm": 1.8134329319000244, "learning_rate": 0.0008356604158173665, "loss": 3.5576, "step": 19350 }, { "epoch": 1.3150563935317299, "grad_norm": 1.1518900394439697, "learning_rate": 0.0008356179508085338, "loss": 3.5975, "step": 19355 }, { "epoch": 1.3153961136023917, "grad_norm": 1.0108559131622314, "learning_rate": 0.0008355754857997011, "loss": 3.5175, "step": 19360 }, { "epoch": 1.3157358336730534, "grad_norm": 0.9091687798500061, "learning_rate": 0.0008355330207908683, "loss": 3.7377, "step": 19365 }, { "epoch": 1.3160755537437152, "grad_norm": 0.7748539447784424, "learning_rate": 0.0008354905557820356, "loss": 3.7376, "step": 19370 }, { "epoch": 1.316415273814377, "grad_norm": 0.7644347548484802, "learning_rate": 0.0008354480907732029, "loss": 3.9485, "step": 19375 }, { "epoch": 1.3167549938850387, "grad_norm": 0.8928879499435425, "learning_rate": 0.0008354056257643702, "loss": 3.6535, "step": 19380 }, { "epoch": 1.3170947139557005, "grad_norm": 0.7633859515190125, "learning_rate": 0.0008353631607555375, "loss": 3.7145, "step": 19385 }, { "epoch": 1.3174344340263624, "grad_norm": 1.004683256149292, "learning_rate": 0.0008353206957467048, "loss": 3.677, "step": 19390 }, { "epoch": 1.317774154097024, "grad_norm": 1.224363923072815, "learning_rate": 0.000835278230737872, "loss": 3.2911, "step": 19395 }, { "epoch": 1.3181138741676859, "grad_norm": 0.7706454992294312, "learning_rate": 0.0008352357657290392, "loss": 3.8322, "step": 19400 }, { "epoch": 1.3184535942383477, "grad_norm": 1.0382957458496094, "learning_rate": 0.0008351933007202066, "loss": 3.6499, "step": 19405 }, { "epoch": 1.3187933143090094, "grad_norm": 0.9529634118080139, "learning_rate": 0.0008351508357113738, "loss": 3.6193, "step": 19410 }, { "epoch": 1.3191330343796712, "grad_norm": 0.9824109673500061, "learning_rate": 0.0008351083707025411, "loss": 3.5582, "step": 19415 }, { "epoch": 1.3194727544503329, "grad_norm": 0.7511589527130127, "learning_rate": 0.0008350659056937085, "loss": 3.7914, "step": 19420 }, { "epoch": 1.3198124745209947, "grad_norm": 1.3449985980987549, "learning_rate": 0.0008350234406848757, "loss": 3.5377, "step": 19425 }, { "epoch": 1.3201521945916566, "grad_norm": 0.7778506278991699, "learning_rate": 0.0008349809756760429, "loss": 3.7603, "step": 19430 }, { "epoch": 1.3204919146623182, "grad_norm": 0.7010202407836914, "learning_rate": 0.0008349385106672103, "loss": 3.6515, "step": 19435 }, { "epoch": 1.32083163473298, "grad_norm": 0.9390833973884583, "learning_rate": 0.0008348960456583775, "loss": 3.5299, "step": 19440 }, { "epoch": 1.3211713548036417, "grad_norm": 0.8722630739212036, "learning_rate": 0.0008348535806495447, "loss": 3.6038, "step": 19445 }, { "epoch": 1.3215110748743035, "grad_norm": 0.8852182030677795, "learning_rate": 0.0008348111156407121, "loss": 3.6416, "step": 19450 }, { "epoch": 1.3218507949449654, "grad_norm": 0.8159305453300476, "learning_rate": 0.0008347686506318794, "loss": 3.8608, "step": 19455 }, { "epoch": 1.322190515015627, "grad_norm": 0.8102395534515381, "learning_rate": 0.0008347261856230466, "loss": 3.8264, "step": 19460 }, { "epoch": 1.3225302350862889, "grad_norm": 0.7249893546104431, "learning_rate": 0.0008346837206142139, "loss": 3.5293, "step": 19465 }, { "epoch": 1.3228699551569507, "grad_norm": 1.4663506746292114, "learning_rate": 0.0008346412556053812, "loss": 3.8016, "step": 19470 }, { "epoch": 1.3232096752276123, "grad_norm": 0.9181190133094788, "learning_rate": 0.0008345987905965484, "loss": 3.5921, "step": 19475 }, { "epoch": 1.3235493952982742, "grad_norm": 3.3967654705047607, "learning_rate": 0.0008345563255877157, "loss": 3.5958, "step": 19480 }, { "epoch": 1.323889115368936, "grad_norm": 0.9114540815353394, "learning_rate": 0.0008345138605788831, "loss": 3.6338, "step": 19485 }, { "epoch": 1.3242288354395977, "grad_norm": 0.7916017770767212, "learning_rate": 0.0008344713955700503, "loss": 3.4627, "step": 19490 }, { "epoch": 1.3245685555102595, "grad_norm": 0.7891761660575867, "learning_rate": 0.0008344289305612176, "loss": 3.6942, "step": 19495 }, { "epoch": 1.3249082755809214, "grad_norm": 0.7993078231811523, "learning_rate": 0.0008343864655523848, "loss": 3.7123, "step": 19500 }, { "epoch": 1.325247995651583, "grad_norm": 1.2489020824432373, "learning_rate": 0.0008343440005435521, "loss": 3.4263, "step": 19505 }, { "epoch": 1.3255877157222449, "grad_norm": 1.1415542364120483, "learning_rate": 0.0008343015355347194, "loss": 3.5019, "step": 19510 }, { "epoch": 1.3259274357929067, "grad_norm": 0.6854630708694458, "learning_rate": 0.0008342590705258866, "loss": 3.6906, "step": 19515 }, { "epoch": 1.3262671558635684, "grad_norm": 0.7478453516960144, "learning_rate": 0.000834216605517054, "loss": 3.5565, "step": 19520 }, { "epoch": 1.3266068759342302, "grad_norm": 0.7282377481460571, "learning_rate": 0.0008341741405082213, "loss": 3.5382, "step": 19525 }, { "epoch": 1.326946596004892, "grad_norm": 0.9372228384017944, "learning_rate": 0.0008341316754993885, "loss": 3.4152, "step": 19530 }, { "epoch": 1.3272863160755537, "grad_norm": 0.8004382252693176, "learning_rate": 0.0008340892104905557, "loss": 3.6709, "step": 19535 }, { "epoch": 1.3276260361462155, "grad_norm": 0.9729716777801514, "learning_rate": 0.0008340467454817231, "loss": 3.6408, "step": 19540 }, { "epoch": 1.3279657562168774, "grad_norm": 0.574894905090332, "learning_rate": 0.0008340042804728903, "loss": 3.5192, "step": 19545 }, { "epoch": 1.328305476287539, "grad_norm": 0.7566072940826416, "learning_rate": 0.0008339618154640575, "loss": 3.4148, "step": 19550 }, { "epoch": 1.3286451963582009, "grad_norm": 1.0383200645446777, "learning_rate": 0.000833919350455225, "loss": 3.7664, "step": 19555 }, { "epoch": 1.3289849164288627, "grad_norm": 1.513988733291626, "learning_rate": 0.0008338768854463922, "loss": 3.7069, "step": 19560 }, { "epoch": 1.3293246364995244, "grad_norm": 1.3090567588806152, "learning_rate": 0.0008338344204375594, "loss": 3.4956, "step": 19565 }, { "epoch": 1.3296643565701862, "grad_norm": 0.7339676022529602, "learning_rate": 0.0008337919554287268, "loss": 3.5868, "step": 19570 }, { "epoch": 1.330004076640848, "grad_norm": 0.8436015844345093, "learning_rate": 0.000833749490419894, "loss": 3.7834, "step": 19575 }, { "epoch": 1.3303437967115097, "grad_norm": 0.7996444702148438, "learning_rate": 0.0008337070254110612, "loss": 3.5127, "step": 19580 }, { "epoch": 1.3306835167821716, "grad_norm": 0.8504420518875122, "learning_rate": 0.0008336645604022285, "loss": 3.6038, "step": 19585 }, { "epoch": 1.3310232368528332, "grad_norm": 0.7843599915504456, "learning_rate": 0.0008336220953933959, "loss": 3.5667, "step": 19590 }, { "epoch": 1.331362956923495, "grad_norm": 0.7322075366973877, "learning_rate": 0.0008335796303845632, "loss": 3.6758, "step": 19595 }, { "epoch": 1.331702676994157, "grad_norm": 0.8108183145523071, "learning_rate": 0.0008335371653757304, "loss": 3.6849, "step": 19600 }, { "epoch": 1.3320423970648185, "grad_norm": 0.9602959752082825, "learning_rate": 0.0008334947003668977, "loss": 3.6558, "step": 19605 }, { "epoch": 1.3323821171354804, "grad_norm": 0.8836127519607544, "learning_rate": 0.000833452235358065, "loss": 3.5397, "step": 19610 }, { "epoch": 1.332721837206142, "grad_norm": 0.8938495516777039, "learning_rate": 0.0008334097703492322, "loss": 3.3683, "step": 19615 }, { "epoch": 1.3330615572768039, "grad_norm": 0.8161758184432983, "learning_rate": 0.0008333673053403995, "loss": 3.8448, "step": 19620 }, { "epoch": 1.3334012773474657, "grad_norm": 0.927852213382721, "learning_rate": 0.0008333248403315669, "loss": 3.778, "step": 19625 }, { "epoch": 1.3337409974181273, "grad_norm": 1.045912742614746, "learning_rate": 0.0008332823753227341, "loss": 4.0483, "step": 19630 }, { "epoch": 1.3340807174887892, "grad_norm": 0.9028273820877075, "learning_rate": 0.0008332399103139013, "loss": 3.6994, "step": 19635 }, { "epoch": 1.334420437559451, "grad_norm": 0.8321359753608704, "learning_rate": 0.0008331974453050687, "loss": 3.5622, "step": 19640 }, { "epoch": 1.3347601576301127, "grad_norm": 0.7420857548713684, "learning_rate": 0.0008331549802962359, "loss": 3.6311, "step": 19645 }, { "epoch": 1.3350998777007745, "grad_norm": 0.9606713056564331, "learning_rate": 0.0008331125152874031, "loss": 3.3942, "step": 19650 }, { "epoch": 1.3354395977714364, "grad_norm": 0.7483904957771301, "learning_rate": 0.0008330700502785706, "loss": 3.4491, "step": 19655 }, { "epoch": 1.335779317842098, "grad_norm": 0.8392571806907654, "learning_rate": 0.0008330275852697378, "loss": 3.7821, "step": 19660 }, { "epoch": 1.3361190379127599, "grad_norm": 1.0173481702804565, "learning_rate": 0.000832985120260905, "loss": 3.726, "step": 19665 }, { "epoch": 1.3364587579834217, "grad_norm": 0.905332088470459, "learning_rate": 0.0008329426552520724, "loss": 3.7561, "step": 19670 }, { "epoch": 1.3367984780540834, "grad_norm": 1.1367340087890625, "learning_rate": 0.0008329001902432396, "loss": 3.7775, "step": 19675 }, { "epoch": 1.3371381981247452, "grad_norm": 1.0209132432937622, "learning_rate": 0.0008328577252344068, "loss": 3.7185, "step": 19680 }, { "epoch": 1.337477918195407, "grad_norm": 0.7754608988761902, "learning_rate": 0.0008328152602255741, "loss": 3.8013, "step": 19685 }, { "epoch": 1.3378176382660687, "grad_norm": 0.7911690473556519, "learning_rate": 0.0008327727952167415, "loss": 3.6321, "step": 19690 }, { "epoch": 1.3381573583367306, "grad_norm": 0.6917429566383362, "learning_rate": 0.0008327303302079087, "loss": 3.7086, "step": 19695 }, { "epoch": 1.3384970784073924, "grad_norm": 1.0019865036010742, "learning_rate": 0.000832687865199076, "loss": 3.4491, "step": 19700 }, { "epoch": 1.338836798478054, "grad_norm": 0.7981930375099182, "learning_rate": 0.0008326454001902433, "loss": 3.703, "step": 19705 }, { "epoch": 1.3391765185487159, "grad_norm": 1.3944613933563232, "learning_rate": 0.0008326029351814105, "loss": 3.5023, "step": 19710 }, { "epoch": 1.3395162386193777, "grad_norm": 0.7388907074928284, "learning_rate": 0.0008325604701725778, "loss": 3.4653, "step": 19715 }, { "epoch": 1.3398559586900394, "grad_norm": 1.0520060062408447, "learning_rate": 0.0008325180051637451, "loss": 3.6993, "step": 19720 }, { "epoch": 1.3401956787607012, "grad_norm": 0.7282788753509521, "learning_rate": 0.0008324755401549124, "loss": 3.7358, "step": 19725 }, { "epoch": 1.340535398831363, "grad_norm": 0.7743442058563232, "learning_rate": 0.0008324330751460797, "loss": 3.5194, "step": 19730 }, { "epoch": 1.3408751189020247, "grad_norm": 0.771087110042572, "learning_rate": 0.000832390610137247, "loss": 3.6726, "step": 19735 }, { "epoch": 1.3412148389726866, "grad_norm": 2.1107139587402344, "learning_rate": 0.0008323481451284142, "loss": 3.3392, "step": 19740 }, { "epoch": 1.3415545590433484, "grad_norm": 0.7359451651573181, "learning_rate": 0.0008323056801195815, "loss": 3.7288, "step": 19745 }, { "epoch": 1.34189427911401, "grad_norm": 1.036449909210205, "learning_rate": 0.0008322632151107487, "loss": 3.4754, "step": 19750 }, { "epoch": 1.342233999184672, "grad_norm": 0.8496060371398926, "learning_rate": 0.000832220750101916, "loss": 3.7369, "step": 19755 }, { "epoch": 1.3425737192553335, "grad_norm": 1.0397948026657104, "learning_rate": 0.0008321782850930834, "loss": 3.5993, "step": 19760 }, { "epoch": 1.3429134393259954, "grad_norm": 1.02755606174469, "learning_rate": 0.0008321358200842506, "loss": 3.3733, "step": 19765 }, { "epoch": 1.3432531593966572, "grad_norm": 0.9234209656715393, "learning_rate": 0.0008320933550754179, "loss": 3.4907, "step": 19770 }, { "epoch": 1.3435928794673189, "grad_norm": 1.6648839712142944, "learning_rate": 0.0008320508900665852, "loss": 3.5315, "step": 19775 }, { "epoch": 1.3439325995379807, "grad_norm": 0.7281484007835388, "learning_rate": 0.0008320084250577524, "loss": 3.6876, "step": 19780 }, { "epoch": 1.3442723196086424, "grad_norm": 0.8084084391593933, "learning_rate": 0.0008319659600489196, "loss": 3.6864, "step": 19785 }, { "epoch": 1.3446120396793042, "grad_norm": 0.6933193206787109, "learning_rate": 0.000831923495040087, "loss": 3.7692, "step": 19790 }, { "epoch": 1.344951759749966, "grad_norm": 0.7642212510108948, "learning_rate": 0.0008318810300312543, "loss": 3.7358, "step": 19795 }, { "epoch": 1.3452914798206277, "grad_norm": 1.1583991050720215, "learning_rate": 0.0008318385650224215, "loss": 3.5249, "step": 19800 }, { "epoch": 1.3456311998912895, "grad_norm": 0.7731074690818787, "learning_rate": 0.0008317961000135889, "loss": 3.6161, "step": 19805 }, { "epoch": 1.3459709199619514, "grad_norm": 0.7493581175804138, "learning_rate": 0.0008317536350047561, "loss": 3.4031, "step": 19810 }, { "epoch": 1.346310640032613, "grad_norm": 0.9421755075454712, "learning_rate": 0.0008317111699959233, "loss": 3.6214, "step": 19815 }, { "epoch": 1.3466503601032749, "grad_norm": 0.8453624844551086, "learning_rate": 0.0008316687049870907, "loss": 3.5466, "step": 19820 }, { "epoch": 1.3469900801739367, "grad_norm": 0.7539472579956055, "learning_rate": 0.0008316262399782579, "loss": 3.5655, "step": 19825 }, { "epoch": 1.3473298002445984, "grad_norm": 0.785384476184845, "learning_rate": 0.0008315837749694252, "loss": 3.5493, "step": 19830 }, { "epoch": 1.3476695203152602, "grad_norm": 0.6150581240653992, "learning_rate": 0.0008315413099605926, "loss": 3.7445, "step": 19835 }, { "epoch": 1.348009240385922, "grad_norm": 0.7939785122871399, "learning_rate": 0.0008314988449517598, "loss": 3.558, "step": 19840 }, { "epoch": 1.3483489604565837, "grad_norm": 0.896045446395874, "learning_rate": 0.000831456379942927, "loss": 3.5743, "step": 19845 }, { "epoch": 1.3486886805272456, "grad_norm": 0.9773976802825928, "learning_rate": 0.0008314139149340943, "loss": 3.7642, "step": 19850 }, { "epoch": 1.3490284005979074, "grad_norm": 0.725162148475647, "learning_rate": 0.0008313714499252616, "loss": 3.693, "step": 19855 }, { "epoch": 1.349368120668569, "grad_norm": 0.8170601725578308, "learning_rate": 0.0008313289849164288, "loss": 3.6087, "step": 19860 }, { "epoch": 1.349707840739231, "grad_norm": 0.965078592300415, "learning_rate": 0.0008312865199075962, "loss": 3.7533, "step": 19865 }, { "epoch": 1.3500475608098927, "grad_norm": 0.8299670219421387, "learning_rate": 0.0008312440548987635, "loss": 3.713, "step": 19870 }, { "epoch": 1.3503872808805544, "grad_norm": 0.8961119055747986, "learning_rate": 0.0008312015898899307, "loss": 3.8778, "step": 19875 }, { "epoch": 1.3507270009512162, "grad_norm": 0.8540622591972351, "learning_rate": 0.000831159124881098, "loss": 3.928, "step": 19880 }, { "epoch": 1.351066721021878, "grad_norm": 0.9417552351951599, "learning_rate": 0.0008311166598722652, "loss": 3.7619, "step": 19885 }, { "epoch": 1.3514064410925397, "grad_norm": 0.8376766443252563, "learning_rate": 0.0008310741948634325, "loss": 3.4543, "step": 19890 }, { "epoch": 1.3517461611632016, "grad_norm": 0.7920647263526917, "learning_rate": 0.0008310317298545998, "loss": 3.7001, "step": 19895 }, { "epoch": 1.3520858812338634, "grad_norm": 0.8727974891662598, "learning_rate": 0.0008309892648457671, "loss": 3.5563, "step": 19900 }, { "epoch": 1.352425601304525, "grad_norm": 1.4218459129333496, "learning_rate": 0.0008309467998369344, "loss": 3.6041, "step": 19905 }, { "epoch": 1.352765321375187, "grad_norm": 0.9833852052688599, "learning_rate": 0.0008309043348281017, "loss": 3.6877, "step": 19910 }, { "epoch": 1.3531050414458488, "grad_norm": 0.7281168103218079, "learning_rate": 0.0008308618698192689, "loss": 3.6883, "step": 19915 }, { "epoch": 1.3534447615165104, "grad_norm": 1.0514922142028809, "learning_rate": 0.0008308194048104362, "loss": 3.7067, "step": 19920 }, { "epoch": 1.3537844815871722, "grad_norm": 0.9565787315368652, "learning_rate": 0.0008307769398016035, "loss": 3.5998, "step": 19925 }, { "epoch": 1.3541242016578339, "grad_norm": 0.7510514855384827, "learning_rate": 0.0008307344747927707, "loss": 3.8489, "step": 19930 }, { "epoch": 1.3544639217284957, "grad_norm": 0.7497783899307251, "learning_rate": 0.0008306920097839382, "loss": 3.7901, "step": 19935 }, { "epoch": 1.3548036417991576, "grad_norm": 0.8029617667198181, "learning_rate": 0.0008306495447751054, "loss": 3.5973, "step": 19940 }, { "epoch": 1.3551433618698192, "grad_norm": 0.7661157250404358, "learning_rate": 0.0008306070797662726, "loss": 3.7219, "step": 19945 }, { "epoch": 1.355483081940481, "grad_norm": 0.7473312616348267, "learning_rate": 0.0008305646147574399, "loss": 3.6941, "step": 19950 }, { "epoch": 1.3558228020111427, "grad_norm": 0.7058807611465454, "learning_rate": 0.0008305221497486072, "loss": 3.6592, "step": 19955 }, { "epoch": 1.3561625220818045, "grad_norm": 0.7740005850791931, "learning_rate": 0.0008304796847397744, "loss": 3.7558, "step": 19960 }, { "epoch": 1.3565022421524664, "grad_norm": 0.8686883449554443, "learning_rate": 0.0008304372197309417, "loss": 3.8264, "step": 19965 }, { "epoch": 1.356841962223128, "grad_norm": 0.9508535861968994, "learning_rate": 0.0008303947547221091, "loss": 3.6278, "step": 19970 }, { "epoch": 1.3571816822937899, "grad_norm": 0.8972076773643494, "learning_rate": 0.0008303522897132763, "loss": 3.5942, "step": 19975 }, { "epoch": 1.3575214023644517, "grad_norm": 0.7137060165405273, "learning_rate": 0.0008303098247044436, "loss": 3.7111, "step": 19980 }, { "epoch": 1.3578611224351134, "grad_norm": 0.7540163993835449, "learning_rate": 0.0008302673596956108, "loss": 3.5098, "step": 19985 }, { "epoch": 1.3582008425057752, "grad_norm": 0.6191962957382202, "learning_rate": 0.0008302248946867781, "loss": 3.567, "step": 19990 }, { "epoch": 1.358540562576437, "grad_norm": 1.0202337503433228, "learning_rate": 0.0008301824296779454, "loss": 3.4531, "step": 19995 }, { "epoch": 1.3588802826470987, "grad_norm": 0.8122200965881348, "learning_rate": 0.0008301399646691126, "loss": 3.7727, "step": 20000 }, { "epoch": 1.3592200027177606, "grad_norm": 0.7568419575691223, "learning_rate": 0.00083009749966028, "loss": 3.8636, "step": 20005 }, { "epoch": 1.3595597227884224, "grad_norm": 0.9622646570205688, "learning_rate": 0.0008300550346514473, "loss": 3.7696, "step": 20010 }, { "epoch": 1.359899442859084, "grad_norm": 0.7535944581031799, "learning_rate": 0.0008300125696426145, "loss": 3.8556, "step": 20015 }, { "epoch": 1.360239162929746, "grad_norm": 0.7176857590675354, "learning_rate": 0.0008299701046337818, "loss": 3.7539, "step": 20020 }, { "epoch": 1.3605788830004077, "grad_norm": 1.5292377471923828, "learning_rate": 0.0008299276396249491, "loss": 3.5624, "step": 20025 }, { "epoch": 1.3609186030710694, "grad_norm": 0.6986082792282104, "learning_rate": 0.0008298851746161163, "loss": 3.6364, "step": 20030 }, { "epoch": 1.3612583231417312, "grad_norm": 0.7370955348014832, "learning_rate": 0.0008298427096072835, "loss": 3.772, "step": 20035 }, { "epoch": 1.361598043212393, "grad_norm": 0.8615638017654419, "learning_rate": 0.000829800244598451, "loss": 3.7706, "step": 20040 }, { "epoch": 1.3619377632830547, "grad_norm": 0.8507272601127625, "learning_rate": 0.0008297577795896182, "loss": 3.5582, "step": 20045 }, { "epoch": 1.3622774833537166, "grad_norm": 0.9351620078086853, "learning_rate": 0.0008297153145807854, "loss": 3.7316, "step": 20050 }, { "epoch": 1.3626172034243784, "grad_norm": 0.8998017907142639, "learning_rate": 0.0008296728495719528, "loss": 3.6514, "step": 20055 }, { "epoch": 1.36295692349504, "grad_norm": 0.6098034977912903, "learning_rate": 0.00082963038456312, "loss": 3.6465, "step": 20060 }, { "epoch": 1.363296643565702, "grad_norm": 0.8833012580871582, "learning_rate": 0.0008295879195542872, "loss": 3.7565, "step": 20065 }, { "epoch": 1.3636363636363638, "grad_norm": 1.0289818048477173, "learning_rate": 0.0008295454545454546, "loss": 3.6553, "step": 20070 }, { "epoch": 1.3639760837070254, "grad_norm": 0.9692005515098572, "learning_rate": 0.0008295029895366219, "loss": 3.6894, "step": 20075 }, { "epoch": 1.3643158037776872, "grad_norm": 0.7152788639068604, "learning_rate": 0.0008294605245277891, "loss": 3.5481, "step": 20080 }, { "epoch": 1.364655523848349, "grad_norm": 0.8993133902549744, "learning_rate": 0.0008294180595189564, "loss": 3.4693, "step": 20085 }, { "epoch": 1.3649952439190107, "grad_norm": 0.8196595907211304, "learning_rate": 0.0008293755945101237, "loss": 3.5787, "step": 20090 }, { "epoch": 1.3653349639896726, "grad_norm": 0.7238662242889404, "learning_rate": 0.0008293331295012909, "loss": 3.5447, "step": 20095 }, { "epoch": 1.3656746840603342, "grad_norm": 1.0158884525299072, "learning_rate": 0.0008292906644924582, "loss": 3.6347, "step": 20100 }, { "epoch": 1.366014404130996, "grad_norm": 0.9779278635978699, "learning_rate": 0.0008292481994836255, "loss": 3.7077, "step": 20105 }, { "epoch": 1.366354124201658, "grad_norm": 1.117322564125061, "learning_rate": 0.0008292057344747928, "loss": 3.4578, "step": 20110 }, { "epoch": 1.3666938442723195, "grad_norm": 1.8293728828430176, "learning_rate": 0.0008291632694659601, "loss": 3.6426, "step": 20115 }, { "epoch": 1.3670335643429814, "grad_norm": 0.8472502827644348, "learning_rate": 0.0008291208044571274, "loss": 3.1927, "step": 20120 }, { "epoch": 1.367373284413643, "grad_norm": 0.7554154396057129, "learning_rate": 0.0008290783394482946, "loss": 3.4672, "step": 20125 }, { "epoch": 1.3677130044843049, "grad_norm": 0.790399968624115, "learning_rate": 0.0008290358744394619, "loss": 3.5768, "step": 20130 }, { "epoch": 1.3680527245549667, "grad_norm": 0.8999508023262024, "learning_rate": 0.0008289934094306291, "loss": 3.6848, "step": 20135 }, { "epoch": 1.3683924446256284, "grad_norm": 0.9056254625320435, "learning_rate": 0.0008289509444217964, "loss": 3.3525, "step": 20140 }, { "epoch": 1.3687321646962902, "grad_norm": 3.9266421794891357, "learning_rate": 0.0008289084794129638, "loss": 3.524, "step": 20145 }, { "epoch": 1.369071884766952, "grad_norm": 0.9506530165672302, "learning_rate": 0.000828866014404131, "loss": 3.4021, "step": 20150 }, { "epoch": 1.3694116048376137, "grad_norm": 0.9955437183380127, "learning_rate": 0.0008288235493952983, "loss": 3.8126, "step": 20155 }, { "epoch": 1.3697513249082756, "grad_norm": 0.8767886161804199, "learning_rate": 0.0008287810843864656, "loss": 3.6191, "step": 20160 }, { "epoch": 1.3700910449789374, "grad_norm": 0.7537773847579956, "learning_rate": 0.0008287386193776328, "loss": 3.6686, "step": 20165 }, { "epoch": 1.370430765049599, "grad_norm": 0.8429709672927856, "learning_rate": 0.0008286961543688, "loss": 3.4358, "step": 20170 }, { "epoch": 1.370770485120261, "grad_norm": 0.6619389057159424, "learning_rate": 0.0008286536893599674, "loss": 3.4888, "step": 20175 }, { "epoch": 1.3711102051909227, "grad_norm": 1.3760391473770142, "learning_rate": 0.0008286112243511347, "loss": 3.6753, "step": 20180 }, { "epoch": 1.3714499252615844, "grad_norm": 0.7743170261383057, "learning_rate": 0.0008285687593423019, "loss": 3.3126, "step": 20185 }, { "epoch": 1.3717896453322462, "grad_norm": 0.9974854588508606, "learning_rate": 0.0008285262943334693, "loss": 3.7557, "step": 20190 }, { "epoch": 1.372129365402908, "grad_norm": 0.983306348323822, "learning_rate": 0.0008284838293246365, "loss": 3.4007, "step": 20195 }, { "epoch": 1.3724690854735697, "grad_norm": 1.081614375114441, "learning_rate": 0.0008284413643158037, "loss": 3.3639, "step": 20200 }, { "epoch": 1.3728088055442316, "grad_norm": 0.7151765823364258, "learning_rate": 0.0008283988993069711, "loss": 3.4608, "step": 20205 }, { "epoch": 1.3731485256148934, "grad_norm": 0.7609778642654419, "learning_rate": 0.0008283564342981383, "loss": 3.5031, "step": 20210 }, { "epoch": 1.373488245685555, "grad_norm": 0.8248502612113953, "learning_rate": 0.0008283139692893056, "loss": 3.5884, "step": 20215 }, { "epoch": 1.373827965756217, "grad_norm": 1.5455756187438965, "learning_rate": 0.000828271504280473, "loss": 3.392, "step": 20220 }, { "epoch": 1.3741676858268788, "grad_norm": 0.7607990503311157, "learning_rate": 0.0008282290392716402, "loss": 3.684, "step": 20225 }, { "epoch": 1.3745074058975404, "grad_norm": 3.544597864151001, "learning_rate": 0.0008281865742628074, "loss": 3.3773, "step": 20230 }, { "epoch": 1.3748471259682022, "grad_norm": 1.0479261875152588, "learning_rate": 0.0008281441092539747, "loss": 3.574, "step": 20235 }, { "epoch": 1.375186846038864, "grad_norm": 0.7941829562187195, "learning_rate": 0.000828101644245142, "loss": 3.5016, "step": 20240 }, { "epoch": 1.3755265661095257, "grad_norm": 1.0169727802276611, "learning_rate": 0.0008280591792363092, "loss": 3.5307, "step": 20245 }, { "epoch": 1.3758662861801876, "grad_norm": 0.9216648936271667, "learning_rate": 0.0008280167142274766, "loss": 3.5364, "step": 20250 }, { "epoch": 1.3762060062508494, "grad_norm": 0.8656046986579895, "learning_rate": 0.0008279742492186439, "loss": 3.519, "step": 20255 }, { "epoch": 1.376545726321511, "grad_norm": 0.9340004324913025, "learning_rate": 0.0008279317842098111, "loss": 3.4805, "step": 20260 }, { "epoch": 1.376885446392173, "grad_norm": 1.1377168893814087, "learning_rate": 0.0008278893192009784, "loss": 3.6431, "step": 20265 }, { "epoch": 1.3772251664628345, "grad_norm": 0.9111413359642029, "learning_rate": 0.0008278468541921456, "loss": 3.8543, "step": 20270 }, { "epoch": 1.3775648865334964, "grad_norm": 1.0429924726486206, "learning_rate": 0.000827804389183313, "loss": 3.6353, "step": 20275 }, { "epoch": 1.3779046066041583, "grad_norm": 0.7700743079185486, "learning_rate": 0.0008277619241744803, "loss": 3.6987, "step": 20280 }, { "epoch": 1.3782443266748199, "grad_norm": 1.1103097200393677, "learning_rate": 0.0008277194591656475, "loss": 3.5317, "step": 20285 }, { "epoch": 1.3785840467454817, "grad_norm": 1.1317721605300903, "learning_rate": 0.0008276769941568149, "loss": 3.8176, "step": 20290 }, { "epoch": 1.3789237668161434, "grad_norm": 0.9521926045417786, "learning_rate": 0.0008276345291479821, "loss": 3.7743, "step": 20295 }, { "epoch": 1.3792634868868052, "grad_norm": 1.0168817043304443, "learning_rate": 0.0008275920641391493, "loss": 3.8019, "step": 20300 }, { "epoch": 1.379603206957467, "grad_norm": 0.8945667743682861, "learning_rate": 0.0008275495991303167, "loss": 3.4125, "step": 20305 }, { "epoch": 1.3799429270281287, "grad_norm": 0.7459174394607544, "learning_rate": 0.0008275071341214839, "loss": 3.5869, "step": 20310 }, { "epoch": 1.3802826470987906, "grad_norm": 0.7691378593444824, "learning_rate": 0.0008274646691126512, "loss": 3.7097, "step": 20315 }, { "epoch": 1.3806223671694524, "grad_norm": 1.0497788190841675, "learning_rate": 0.0008274222041038186, "loss": 3.6395, "step": 20320 }, { "epoch": 1.380962087240114, "grad_norm": 1.243142008781433, "learning_rate": 0.0008273797390949858, "loss": 3.5531, "step": 20325 }, { "epoch": 1.381301807310776, "grad_norm": 0.7451356053352356, "learning_rate": 0.000827337274086153, "loss": 3.5585, "step": 20330 }, { "epoch": 1.3816415273814378, "grad_norm": 1.9425336122512817, "learning_rate": 0.0008272948090773203, "loss": 3.5113, "step": 20335 }, { "epoch": 1.3819812474520994, "grad_norm": 0.7421507239341736, "learning_rate": 0.0008272523440684876, "loss": 3.7413, "step": 20340 }, { "epoch": 1.3823209675227612, "grad_norm": 0.7696300745010376, "learning_rate": 0.0008272098790596548, "loss": 3.5647, "step": 20345 }, { "epoch": 1.382660687593423, "grad_norm": 0.7083591222763062, "learning_rate": 0.0008271674140508222, "loss": 3.5757, "step": 20350 }, { "epoch": 1.3830004076640847, "grad_norm": 0.8059934377670288, "learning_rate": 0.0008271249490419895, "loss": 3.4186, "step": 20355 }, { "epoch": 1.3833401277347466, "grad_norm": 0.8010045289993286, "learning_rate": 0.0008270824840331567, "loss": 3.4904, "step": 20360 }, { "epoch": 1.3836798478054084, "grad_norm": 0.7706525921821594, "learning_rate": 0.000827040019024324, "loss": 3.7753, "step": 20365 }, { "epoch": 1.38401956787607, "grad_norm": 1.0286221504211426, "learning_rate": 0.0008269975540154912, "loss": 3.679, "step": 20370 }, { "epoch": 1.384359287946732, "grad_norm": 0.9037805199623108, "learning_rate": 0.0008269550890066585, "loss": 3.6565, "step": 20375 }, { "epoch": 1.3846990080173938, "grad_norm": 0.7697586417198181, "learning_rate": 0.0008269126239978258, "loss": 3.8985, "step": 20380 }, { "epoch": 1.3850387280880554, "grad_norm": 0.7971416115760803, "learning_rate": 0.0008268701589889931, "loss": 3.7845, "step": 20385 }, { "epoch": 1.3853784481587172, "grad_norm": 0.6265209317207336, "learning_rate": 0.0008268276939801604, "loss": 3.501, "step": 20390 }, { "epoch": 1.385718168229379, "grad_norm": 0.8635155558586121, "learning_rate": 0.0008267852289713277, "loss": 3.6134, "step": 20395 }, { "epoch": 1.3860578883000407, "grad_norm": 0.8051210045814514, "learning_rate": 0.0008267427639624949, "loss": 3.5318, "step": 20400 }, { "epoch": 1.3863976083707026, "grad_norm": 1.0002379417419434, "learning_rate": 0.0008267002989536622, "loss": 3.5288, "step": 20405 }, { "epoch": 1.3867373284413644, "grad_norm": 0.8467056155204773, "learning_rate": 0.0008266578339448295, "loss": 3.5673, "step": 20410 }, { "epoch": 1.387077048512026, "grad_norm": 0.9337602853775024, "learning_rate": 0.0008266153689359967, "loss": 3.7144, "step": 20415 }, { "epoch": 1.387416768582688, "grad_norm": 0.7187165021896362, "learning_rate": 0.000826572903927164, "loss": 3.7148, "step": 20420 }, { "epoch": 1.3877564886533498, "grad_norm": 0.8400835394859314, "learning_rate": 0.0008265304389183314, "loss": 3.5658, "step": 20425 }, { "epoch": 1.3880962087240114, "grad_norm": 0.7464248538017273, "learning_rate": 0.0008264879739094986, "loss": 3.5879, "step": 20430 }, { "epoch": 1.3884359287946733, "grad_norm": 0.819556474685669, "learning_rate": 0.0008264455089006658, "loss": 3.774, "step": 20435 }, { "epoch": 1.3887756488653349, "grad_norm": 0.8895112872123718, "learning_rate": 0.0008264030438918332, "loss": 3.5174, "step": 20440 }, { "epoch": 1.3891153689359967, "grad_norm": 0.7157255411148071, "learning_rate": 0.0008263605788830004, "loss": 3.76, "step": 20445 }, { "epoch": 1.3894550890066586, "grad_norm": 0.9935908913612366, "learning_rate": 0.0008263181138741676, "loss": 3.6405, "step": 20450 }, { "epoch": 1.3897948090773202, "grad_norm": 0.798677384853363, "learning_rate": 0.0008262756488653351, "loss": 3.4065, "step": 20455 }, { "epoch": 1.390134529147982, "grad_norm": 0.9333176016807556, "learning_rate": 0.0008262331838565023, "loss": 3.8207, "step": 20460 }, { "epoch": 1.3904742492186437, "grad_norm": 0.6949829459190369, "learning_rate": 0.0008261907188476695, "loss": 3.5882, "step": 20465 }, { "epoch": 1.3908139692893056, "grad_norm": 0.8420959711074829, "learning_rate": 0.0008261482538388369, "loss": 3.4167, "step": 20470 }, { "epoch": 1.3911536893599674, "grad_norm": 0.7815521955490112, "learning_rate": 0.0008261057888300041, "loss": 3.6029, "step": 20475 }, { "epoch": 1.391493409430629, "grad_norm": 0.9085712432861328, "learning_rate": 0.0008260633238211713, "loss": 3.6257, "step": 20480 }, { "epoch": 1.391833129501291, "grad_norm": 0.9145505428314209, "learning_rate": 0.0008260208588123386, "loss": 3.4722, "step": 20485 }, { "epoch": 1.3921728495719528, "grad_norm": 0.8195421695709229, "learning_rate": 0.000825978393803506, "loss": 3.6158, "step": 20490 }, { "epoch": 1.3925125696426144, "grad_norm": 0.872031033039093, "learning_rate": 0.0008259359287946732, "loss": 3.469, "step": 20495 }, { "epoch": 1.3928522897132762, "grad_norm": 1.4268487691879272, "learning_rate": 0.0008258934637858405, "loss": 3.5495, "step": 20500 }, { "epoch": 1.393192009783938, "grad_norm": 0.8297573924064636, "learning_rate": 0.0008258509987770078, "loss": 3.5599, "step": 20505 }, { "epoch": 1.3935317298545997, "grad_norm": 0.824938952922821, "learning_rate": 0.000825808533768175, "loss": 3.5409, "step": 20510 }, { "epoch": 1.3938714499252616, "grad_norm": 0.8266805410385132, "learning_rate": 0.0008257660687593423, "loss": 3.5616, "step": 20515 }, { "epoch": 1.3942111699959234, "grad_norm": 0.9786876440048218, "learning_rate": 0.0008257236037505095, "loss": 3.7859, "step": 20520 }, { "epoch": 1.394550890066585, "grad_norm": 1.1329405307769775, "learning_rate": 0.0008256811387416769, "loss": 3.6537, "step": 20525 }, { "epoch": 1.394890610137247, "grad_norm": 1.0957200527191162, "learning_rate": 0.0008256386737328442, "loss": 3.3437, "step": 20530 }, { "epoch": 1.3952303302079088, "grad_norm": 0.8033496737480164, "learning_rate": 0.0008255962087240114, "loss": 3.6547, "step": 20535 }, { "epoch": 1.3955700502785704, "grad_norm": 0.7064065933227539, "learning_rate": 0.0008255537437151787, "loss": 3.7429, "step": 20540 }, { "epoch": 1.3959097703492322, "grad_norm": 0.7505779266357422, "learning_rate": 0.000825511278706346, "loss": 3.6715, "step": 20545 }, { "epoch": 1.396249490419894, "grad_norm": 0.8774320483207703, "learning_rate": 0.0008254688136975132, "loss": 3.333, "step": 20550 }, { "epoch": 1.3965892104905557, "grad_norm": 0.6276168823242188, "learning_rate": 0.0008254263486886804, "loss": 3.4375, "step": 20555 }, { "epoch": 1.3969289305612176, "grad_norm": 0.6749109625816345, "learning_rate": 0.0008253838836798479, "loss": 3.5985, "step": 20560 }, { "epoch": 1.3972686506318794, "grad_norm": 0.7648650407791138, "learning_rate": 0.0008253414186710151, "loss": 3.6375, "step": 20565 }, { "epoch": 1.397608370702541, "grad_norm": 0.8186941146850586, "learning_rate": 0.0008252989536621823, "loss": 3.5453, "step": 20570 }, { "epoch": 1.397948090773203, "grad_norm": 0.7318486571311951, "learning_rate": 0.0008252564886533497, "loss": 3.2976, "step": 20575 }, { "epoch": 1.3982878108438648, "grad_norm": 0.9368587732315063, "learning_rate": 0.0008252140236445169, "loss": 3.6553, "step": 20580 }, { "epoch": 1.3986275309145264, "grad_norm": 0.9344570636749268, "learning_rate": 0.0008251715586356841, "loss": 3.6233, "step": 20585 }, { "epoch": 1.3989672509851883, "grad_norm": 0.8090149164199829, "learning_rate": 0.0008251290936268515, "loss": 3.6855, "step": 20590 }, { "epoch": 1.3993069710558501, "grad_norm": 1.348703384399414, "learning_rate": 0.0008250866286180188, "loss": 3.4964, "step": 20595 }, { "epoch": 1.3996466911265117, "grad_norm": 1.0034469366073608, "learning_rate": 0.000825044163609186, "loss": 3.5434, "step": 20600 }, { "epoch": 1.3999864111971736, "grad_norm": 0.9152267575263977, "learning_rate": 0.0008250016986003534, "loss": 3.7884, "step": 20605 }, { "epoch": 1.4003261312678352, "grad_norm": 0.8734991550445557, "learning_rate": 0.0008249592335915206, "loss": 3.6744, "step": 20610 }, { "epoch": 1.400665851338497, "grad_norm": 0.6777758598327637, "learning_rate": 0.0008249167685826879, "loss": 3.7508, "step": 20615 }, { "epoch": 1.401005571409159, "grad_norm": 0.7659586071968079, "learning_rate": 0.0008248743035738551, "loss": 3.5722, "step": 20620 }, { "epoch": 1.4013452914798206, "grad_norm": 0.959725558757782, "learning_rate": 0.0008248318385650224, "loss": 3.5768, "step": 20625 }, { "epoch": 1.4016850115504824, "grad_norm": 0.5695216059684753, "learning_rate": 0.0008247893735561898, "loss": 3.6181, "step": 20630 }, { "epoch": 1.402024731621144, "grad_norm": 0.8693369030952454, "learning_rate": 0.000824746908547357, "loss": 3.4066, "step": 20635 }, { "epoch": 1.402364451691806, "grad_norm": 0.7927300333976746, "learning_rate": 0.0008247044435385243, "loss": 4.0636, "step": 20640 }, { "epoch": 1.4027041717624678, "grad_norm": 0.9798035025596619, "learning_rate": 0.0008246619785296916, "loss": 3.9302, "step": 20645 }, { "epoch": 1.4030438918331294, "grad_norm": 0.9714420437812805, "learning_rate": 0.0008246195135208588, "loss": 3.4737, "step": 20650 }, { "epoch": 1.4033836119037912, "grad_norm": 0.7763824462890625, "learning_rate": 0.000824577048512026, "loss": 3.4242, "step": 20655 }, { "epoch": 1.403723331974453, "grad_norm": 0.8162153363227844, "learning_rate": 0.0008245345835031934, "loss": 3.6222, "step": 20660 }, { "epoch": 1.4040630520451147, "grad_norm": 0.8270282745361328, "learning_rate": 0.0008244921184943607, "loss": 3.757, "step": 20665 }, { "epoch": 1.4044027721157766, "grad_norm": 0.8101107478141785, "learning_rate": 0.0008244496534855279, "loss": 3.5851, "step": 20670 }, { "epoch": 1.4047424921864384, "grad_norm": 0.8065143823623657, "learning_rate": 0.0008244071884766953, "loss": 3.5528, "step": 20675 }, { "epoch": 1.4050822122571, "grad_norm": 0.7318259477615356, "learning_rate": 0.0008243647234678625, "loss": 3.739, "step": 20680 }, { "epoch": 1.405421932327762, "grad_norm": 1.1027981042861938, "learning_rate": 0.0008243222584590297, "loss": 3.586, "step": 20685 }, { "epoch": 1.4057616523984238, "grad_norm": 1.04094398021698, "learning_rate": 0.0008242797934501971, "loss": 3.4476, "step": 20690 }, { "epoch": 1.4061013724690854, "grad_norm": 0.8588951230049133, "learning_rate": 0.0008242373284413643, "loss": 3.6069, "step": 20695 }, { "epoch": 1.4064410925397473, "grad_norm": 0.9245058298110962, "learning_rate": 0.0008241948634325316, "loss": 3.6423, "step": 20700 }, { "epoch": 1.406780812610409, "grad_norm": 1.0811768770217896, "learning_rate": 0.000824152398423699, "loss": 3.4884, "step": 20705 }, { "epoch": 1.4071205326810707, "grad_norm": 0.8094359636306763, "learning_rate": 0.0008241099334148662, "loss": 3.8573, "step": 20710 }, { "epoch": 1.4074602527517326, "grad_norm": 0.8182267546653748, "learning_rate": 0.0008240674684060334, "loss": 3.3837, "step": 20715 }, { "epoch": 1.4077999728223944, "grad_norm": 0.6878779530525208, "learning_rate": 0.0008240250033972007, "loss": 3.3033, "step": 20720 }, { "epoch": 1.408139692893056, "grad_norm": 0.7361664175987244, "learning_rate": 0.000823982538388368, "loss": 3.5176, "step": 20725 }, { "epoch": 1.408479412963718, "grad_norm": 1.0483208894729614, "learning_rate": 0.0008239400733795352, "loss": 3.4674, "step": 20730 }, { "epoch": 1.4088191330343798, "grad_norm": 0.8424781560897827, "learning_rate": 0.0008238976083707026, "loss": 3.6651, "step": 20735 }, { "epoch": 1.4091588531050414, "grad_norm": 0.9902977347373962, "learning_rate": 0.0008238551433618699, "loss": 3.4855, "step": 20740 }, { "epoch": 1.4094985731757033, "grad_norm": 0.9176404476165771, "learning_rate": 0.0008238126783530371, "loss": 3.5121, "step": 20745 }, { "epoch": 1.4098382932463651, "grad_norm": 1.2062361240386963, "learning_rate": 0.0008237702133442044, "loss": 3.6757, "step": 20750 }, { "epoch": 1.4101780133170267, "grad_norm": 0.8620239496231079, "learning_rate": 0.0008237277483353717, "loss": 3.4297, "step": 20755 }, { "epoch": 1.4105177333876886, "grad_norm": 0.7602016925811768, "learning_rate": 0.0008236852833265389, "loss": 3.5532, "step": 20760 }, { "epoch": 1.4108574534583505, "grad_norm": 0.8381115198135376, "learning_rate": 0.0008236428183177062, "loss": 3.7047, "step": 20765 }, { "epoch": 1.411197173529012, "grad_norm": 0.8198944926261902, "learning_rate": 0.0008236003533088735, "loss": 3.6948, "step": 20770 }, { "epoch": 1.411536893599674, "grad_norm": 0.9184916019439697, "learning_rate": 0.0008235578883000408, "loss": 3.5166, "step": 20775 }, { "epoch": 1.4118766136703356, "grad_norm": 0.7563937306404114, "learning_rate": 0.0008235154232912081, "loss": 3.8841, "step": 20780 }, { "epoch": 1.4122163337409974, "grad_norm": 1.1413289308547974, "learning_rate": 0.0008234729582823753, "loss": 3.5892, "step": 20785 }, { "epoch": 1.4125560538116593, "grad_norm": 1.0585217475891113, "learning_rate": 0.0008234304932735426, "loss": 3.531, "step": 20790 }, { "epoch": 1.412895773882321, "grad_norm": 0.7346665263175964, "learning_rate": 0.0008233880282647099, "loss": 3.7766, "step": 20795 }, { "epoch": 1.4132354939529828, "grad_norm": 0.8998989462852478, "learning_rate": 0.0008233455632558771, "loss": 3.5497, "step": 20800 }, { "epoch": 1.4135752140236444, "grad_norm": 0.7291684150695801, "learning_rate": 0.0008233030982470445, "loss": 3.5563, "step": 20805 }, { "epoch": 1.4139149340943062, "grad_norm": 0.8629082441329956, "learning_rate": 0.0008232606332382118, "loss": 3.4234, "step": 20810 }, { "epoch": 1.414254654164968, "grad_norm": 1.0397497415542603, "learning_rate": 0.000823218168229379, "loss": 3.7712, "step": 20815 }, { "epoch": 1.4145943742356297, "grad_norm": 1.0735012292861938, "learning_rate": 0.0008231757032205462, "loss": 3.6501, "step": 20820 }, { "epoch": 1.4149340943062916, "grad_norm": 0.8563271164894104, "learning_rate": 0.0008231332382117136, "loss": 3.7, "step": 20825 }, { "epoch": 1.4152738143769534, "grad_norm": 0.9112222790718079, "learning_rate": 0.0008230907732028808, "loss": 3.6677, "step": 20830 }, { "epoch": 1.415613534447615, "grad_norm": 0.7145978808403015, "learning_rate": 0.000823048308194048, "loss": 3.6044, "step": 20835 }, { "epoch": 1.415953254518277, "grad_norm": 0.739173412322998, "learning_rate": 0.0008230058431852155, "loss": 3.79, "step": 20840 }, { "epoch": 1.4162929745889388, "grad_norm": 1.0261688232421875, "learning_rate": 0.0008229633781763827, "loss": 3.83, "step": 20845 }, { "epoch": 1.4166326946596004, "grad_norm": 1.3438572883605957, "learning_rate": 0.0008229209131675499, "loss": 3.4498, "step": 20850 }, { "epoch": 1.4169724147302623, "grad_norm": 0.7410697937011719, "learning_rate": 0.0008228784481587173, "loss": 3.3386, "step": 20855 }, { "epoch": 1.417312134800924, "grad_norm": 0.8338222503662109, "learning_rate": 0.0008228359831498845, "loss": 3.5874, "step": 20860 }, { "epoch": 1.4176518548715857, "grad_norm": 0.9488686919212341, "learning_rate": 0.0008227935181410517, "loss": 3.6481, "step": 20865 }, { "epoch": 1.4179915749422476, "grad_norm": 0.750819206237793, "learning_rate": 0.0008227510531322191, "loss": 3.6444, "step": 20870 }, { "epoch": 1.4183312950129094, "grad_norm": 0.8117307424545288, "learning_rate": 0.0008227085881233864, "loss": 3.6442, "step": 20875 }, { "epoch": 1.418671015083571, "grad_norm": 0.8307729363441467, "learning_rate": 0.0008226661231145536, "loss": 3.6969, "step": 20880 }, { "epoch": 1.419010735154233, "grad_norm": 0.7594723105430603, "learning_rate": 0.0008226236581057209, "loss": 3.6485, "step": 20885 }, { "epoch": 1.4193504552248948, "grad_norm": 0.7602181434631348, "learning_rate": 0.0008225811930968882, "loss": 3.6384, "step": 20890 }, { "epoch": 1.4196901752955564, "grad_norm": 0.9653111100196838, "learning_rate": 0.0008225387280880554, "loss": 3.8369, "step": 20895 }, { "epoch": 1.4200298953662183, "grad_norm": 0.9489719867706299, "learning_rate": 0.0008224962630792227, "loss": 3.7231, "step": 20900 }, { "epoch": 1.4203696154368801, "grad_norm": 0.9118810296058655, "learning_rate": 0.0008224537980703901, "loss": 3.4382, "step": 20905 }, { "epoch": 1.4207093355075417, "grad_norm": 0.893730878829956, "learning_rate": 0.0008224113330615573, "loss": 3.788, "step": 20910 }, { "epoch": 1.4210490555782036, "grad_norm": 0.7363401055335999, "learning_rate": 0.0008223688680527246, "loss": 3.5311, "step": 20915 }, { "epoch": 1.4213887756488655, "grad_norm": 0.8287197351455688, "learning_rate": 0.0008223264030438918, "loss": 3.7329, "step": 20920 }, { "epoch": 1.421728495719527, "grad_norm": 1.3203455209732056, "learning_rate": 0.0008222839380350591, "loss": 3.6321, "step": 20925 }, { "epoch": 1.422068215790189, "grad_norm": 0.870648980140686, "learning_rate": 0.0008222414730262264, "loss": 3.5652, "step": 20930 }, { "epoch": 1.4224079358608508, "grad_norm": 0.9471779465675354, "learning_rate": 0.0008221990080173936, "loss": 3.5954, "step": 20935 }, { "epoch": 1.4227476559315124, "grad_norm": 0.7863608598709106, "learning_rate": 0.000822156543008561, "loss": 3.4203, "step": 20940 }, { "epoch": 1.4230873760021743, "grad_norm": 0.8741341233253479, "learning_rate": 0.0008221140779997283, "loss": 3.4349, "step": 20945 }, { "epoch": 1.423427096072836, "grad_norm": 1.0044779777526855, "learning_rate": 0.0008220716129908955, "loss": 3.4346, "step": 20950 }, { "epoch": 1.4237668161434978, "grad_norm": 0.7439308166503906, "learning_rate": 0.0008220291479820629, "loss": 3.8433, "step": 20955 }, { "epoch": 1.4241065362141596, "grad_norm": 0.8374903202056885, "learning_rate": 0.0008219866829732301, "loss": 3.7173, "step": 20960 }, { "epoch": 1.4244462562848212, "grad_norm": 1.6416101455688477, "learning_rate": 0.0008219442179643973, "loss": 3.5514, "step": 20965 }, { "epoch": 1.424785976355483, "grad_norm": 1.4563789367675781, "learning_rate": 0.0008219017529555646, "loss": 3.7034, "step": 20970 }, { "epoch": 1.4251256964261447, "grad_norm": 0.7204855680465698, "learning_rate": 0.000821859287946732, "loss": 3.4418, "step": 20975 }, { "epoch": 1.4254654164968066, "grad_norm": 0.8681305646896362, "learning_rate": 0.0008218168229378992, "loss": 3.8385, "step": 20980 }, { "epoch": 1.4258051365674684, "grad_norm": 2.1982781887054443, "learning_rate": 0.0008217743579290665, "loss": 3.5276, "step": 20985 }, { "epoch": 1.42614485663813, "grad_norm": 1.1381447315216064, "learning_rate": 0.0008217318929202338, "loss": 3.4344, "step": 20990 }, { "epoch": 1.426484576708792, "grad_norm": 0.7454198002815247, "learning_rate": 0.000821689427911401, "loss": 3.352, "step": 20995 }, { "epoch": 1.4268242967794538, "grad_norm": 0.7620583772659302, "learning_rate": 0.0008216469629025683, "loss": 3.5581, "step": 21000 }, { "epoch": 1.4271640168501154, "grad_norm": 0.7967021465301514, "learning_rate": 0.0008216044978937355, "loss": 3.7029, "step": 21005 }, { "epoch": 1.4275037369207773, "grad_norm": 0.8864526152610779, "learning_rate": 0.0008215620328849029, "loss": 3.7122, "step": 21010 }, { "epoch": 1.427843456991439, "grad_norm": 0.7317259907722473, "learning_rate": 0.0008215195678760702, "loss": 3.8906, "step": 21015 }, { "epoch": 1.4281831770621007, "grad_norm": 0.8627270460128784, "learning_rate": 0.0008214771028672374, "loss": 3.4338, "step": 21020 }, { "epoch": 1.4285228971327626, "grad_norm": 1.137544870376587, "learning_rate": 0.0008214346378584047, "loss": 3.54, "step": 21025 }, { "epoch": 1.4288626172034244, "grad_norm": 0.8415155410766602, "learning_rate": 0.000821392172849572, "loss": 3.6983, "step": 21030 }, { "epoch": 1.429202337274086, "grad_norm": 0.784503161907196, "learning_rate": 0.0008213497078407392, "loss": 3.6645, "step": 21035 }, { "epoch": 1.429542057344748, "grad_norm": 0.7717975378036499, "learning_rate": 0.0008213072428319065, "loss": 3.8199, "step": 21040 }, { "epoch": 1.4298817774154098, "grad_norm": 0.6637728214263916, "learning_rate": 0.0008212647778230739, "loss": 3.6402, "step": 21045 }, { "epoch": 1.4302214974860714, "grad_norm": 0.7215205430984497, "learning_rate": 0.0008212223128142411, "loss": 3.8438, "step": 21050 }, { "epoch": 1.4305612175567333, "grad_norm": 0.848948061466217, "learning_rate": 0.0008211798478054083, "loss": 3.74, "step": 21055 }, { "epoch": 1.4309009376273951, "grad_norm": 0.9796594381332397, "learning_rate": 0.0008211373827965757, "loss": 3.6688, "step": 21060 }, { "epoch": 1.4312406576980568, "grad_norm": 0.658288836479187, "learning_rate": 0.0008210949177877429, "loss": 3.6781, "step": 21065 }, { "epoch": 1.4315803777687186, "grad_norm": 0.8367807865142822, "learning_rate": 0.0008210524527789101, "loss": 3.478, "step": 21070 }, { "epoch": 1.4319200978393805, "grad_norm": 0.9000985026359558, "learning_rate": 0.0008210099877700775, "loss": 3.493, "step": 21075 }, { "epoch": 1.432259817910042, "grad_norm": 1.0970194339752197, "learning_rate": 0.0008209675227612448, "loss": 3.5552, "step": 21080 }, { "epoch": 1.432599537980704, "grad_norm": 1.3618448972702026, "learning_rate": 0.000820925057752412, "loss": 3.6232, "step": 21085 }, { "epoch": 1.4329392580513658, "grad_norm": 0.9046831130981445, "learning_rate": 0.0008208825927435794, "loss": 3.5048, "step": 21090 }, { "epoch": 1.4332789781220274, "grad_norm": 0.7496330142021179, "learning_rate": 0.0008208401277347466, "loss": 3.5051, "step": 21095 }, { "epoch": 1.4336186981926893, "grad_norm": 1.044731616973877, "learning_rate": 0.0008207976627259138, "loss": 3.3247, "step": 21100 }, { "epoch": 1.4339584182633511, "grad_norm": 0.8243861198425293, "learning_rate": 0.0008207551977170811, "loss": 3.4564, "step": 21105 }, { "epoch": 1.4342981383340128, "grad_norm": 0.6760514378547668, "learning_rate": 0.0008207127327082484, "loss": 3.6602, "step": 21110 }, { "epoch": 1.4346378584046746, "grad_norm": 0.792583167552948, "learning_rate": 0.0008206702676994157, "loss": 3.5704, "step": 21115 }, { "epoch": 1.4349775784753362, "grad_norm": 0.9021437168121338, "learning_rate": 0.000820627802690583, "loss": 3.7041, "step": 21120 }, { "epoch": 1.435317298545998, "grad_norm": 0.7091696262359619, "learning_rate": 0.0008205853376817503, "loss": 3.5509, "step": 21125 }, { "epoch": 1.43565701861666, "grad_norm": 1.3741788864135742, "learning_rate": 0.0008205428726729175, "loss": 3.7019, "step": 21130 }, { "epoch": 1.4359967386873216, "grad_norm": 0.8589696884155273, "learning_rate": 0.0008205004076640848, "loss": 3.6868, "step": 21135 }, { "epoch": 1.4363364587579834, "grad_norm": 0.8531078100204468, "learning_rate": 0.0008204579426552521, "loss": 3.6759, "step": 21140 }, { "epoch": 1.436676178828645, "grad_norm": 0.9978375434875488, "learning_rate": 0.0008204154776464193, "loss": 3.6129, "step": 21145 }, { "epoch": 1.437015898899307, "grad_norm": 1.1907693147659302, "learning_rate": 0.0008203730126375867, "loss": 3.5617, "step": 21150 }, { "epoch": 1.4373556189699688, "grad_norm": 0.7551723122596741, "learning_rate": 0.000820330547628754, "loss": 3.5167, "step": 21155 }, { "epoch": 1.4376953390406304, "grad_norm": 0.9808022975921631, "learning_rate": 0.0008202880826199212, "loss": 3.513, "step": 21160 }, { "epoch": 1.4380350591112923, "grad_norm": 1.1801602840423584, "learning_rate": 0.0008202456176110885, "loss": 3.6696, "step": 21165 }, { "epoch": 1.438374779181954, "grad_norm": 4.158160209655762, "learning_rate": 0.0008202031526022557, "loss": 4.0072, "step": 21170 }, { "epoch": 1.4387144992526157, "grad_norm": 1.271159052848816, "learning_rate": 0.000820160687593423, "loss": 3.6832, "step": 21175 }, { "epoch": 1.4390542193232776, "grad_norm": 0.6326258778572083, "learning_rate": 0.0008201182225845903, "loss": 3.598, "step": 21180 }, { "epoch": 1.4393939393939394, "grad_norm": 0.7248108983039856, "learning_rate": 0.0008200757575757576, "loss": 3.7626, "step": 21185 }, { "epoch": 1.439733659464601, "grad_norm": 1.2428157329559326, "learning_rate": 0.0008200332925669249, "loss": 3.4822, "step": 21190 }, { "epoch": 1.440073379535263, "grad_norm": 0.877465546131134, "learning_rate": 0.0008199908275580922, "loss": 3.6096, "step": 21195 }, { "epoch": 1.4404130996059248, "grad_norm": 0.8538775444030762, "learning_rate": 0.0008199483625492594, "loss": 3.5902, "step": 21200 }, { "epoch": 1.4407528196765864, "grad_norm": 1.1947253942489624, "learning_rate": 0.0008199058975404266, "loss": 3.9568, "step": 21205 }, { "epoch": 1.4410925397472483, "grad_norm": 0.7671557068824768, "learning_rate": 0.000819863432531594, "loss": 3.5745, "step": 21210 }, { "epoch": 1.4414322598179101, "grad_norm": 0.7160440683364868, "learning_rate": 0.0008198209675227612, "loss": 3.7933, "step": 21215 }, { "epoch": 1.4417719798885718, "grad_norm": 0.8745556473731995, "learning_rate": 0.0008197785025139285, "loss": 3.7365, "step": 21220 }, { "epoch": 1.4421116999592336, "grad_norm": 0.8915352821350098, "learning_rate": 0.0008197360375050959, "loss": 3.7093, "step": 21225 }, { "epoch": 1.4424514200298955, "grad_norm": 1.2859364748001099, "learning_rate": 0.0008196935724962631, "loss": 3.6747, "step": 21230 }, { "epoch": 1.442791140100557, "grad_norm": 1.118139386177063, "learning_rate": 0.0008196511074874303, "loss": 3.6023, "step": 21235 }, { "epoch": 1.443130860171219, "grad_norm": 0.8791807889938354, "learning_rate": 0.0008196086424785977, "loss": 3.5695, "step": 21240 }, { "epoch": 1.4434705802418808, "grad_norm": 0.8088740110397339, "learning_rate": 0.0008195661774697649, "loss": 3.7669, "step": 21245 }, { "epoch": 1.4438103003125424, "grad_norm": 0.8321335911750793, "learning_rate": 0.0008195237124609321, "loss": 3.4593, "step": 21250 }, { "epoch": 1.4441500203832043, "grad_norm": 0.9366874694824219, "learning_rate": 0.0008194812474520996, "loss": 3.7584, "step": 21255 }, { "epoch": 1.4444897404538661, "grad_norm": 0.8399820923805237, "learning_rate": 0.0008194387824432668, "loss": 3.4822, "step": 21260 }, { "epoch": 1.4448294605245278, "grad_norm": 0.8835268616676331, "learning_rate": 0.000819396317434434, "loss": 3.4674, "step": 21265 }, { "epoch": 1.4451691805951896, "grad_norm": 0.7493513822555542, "learning_rate": 0.0008193538524256013, "loss": 3.6053, "step": 21270 }, { "epoch": 1.4455089006658515, "grad_norm": 1.2194836139678955, "learning_rate": 0.0008193113874167686, "loss": 3.5036, "step": 21275 }, { "epoch": 1.445848620736513, "grad_norm": 0.7187374234199524, "learning_rate": 0.0008192689224079358, "loss": 3.5007, "step": 21280 }, { "epoch": 1.446188340807175, "grad_norm": 0.9049649238586426, "learning_rate": 0.0008192264573991031, "loss": 3.7869, "step": 21285 }, { "epoch": 1.4465280608778366, "grad_norm": 0.7639972567558289, "learning_rate": 0.0008191839923902705, "loss": 3.9195, "step": 21290 }, { "epoch": 1.4468677809484984, "grad_norm": 0.8590627908706665, "learning_rate": 0.0008191415273814378, "loss": 3.5509, "step": 21295 }, { "epoch": 1.4472075010191603, "grad_norm": 0.7979729771614075, "learning_rate": 0.000819099062372605, "loss": 3.677, "step": 21300 }, { "epoch": 1.447547221089822, "grad_norm": 0.9269406199455261, "learning_rate": 0.0008190565973637722, "loss": 3.5252, "step": 21305 }, { "epoch": 1.4478869411604838, "grad_norm": 0.9275681972503662, "learning_rate": 0.0008190141323549396, "loss": 3.5098, "step": 21310 }, { "epoch": 1.4482266612311454, "grad_norm": 0.8810122609138489, "learning_rate": 0.0008189716673461068, "loss": 3.5391, "step": 21315 }, { "epoch": 1.4485663813018073, "grad_norm": 0.8511087894439697, "learning_rate": 0.000818929202337274, "loss": 3.5226, "step": 21320 }, { "epoch": 1.4489061013724691, "grad_norm": 0.9180045127868652, "learning_rate": 0.0008188867373284415, "loss": 3.7932, "step": 21325 }, { "epoch": 1.4492458214431307, "grad_norm": 0.8433115482330322, "learning_rate": 0.0008188442723196087, "loss": 3.5941, "step": 21330 }, { "epoch": 1.4495855415137926, "grad_norm": 0.939642608165741, "learning_rate": 0.0008188018073107759, "loss": 3.8022, "step": 21335 }, { "epoch": 1.4499252615844545, "grad_norm": 0.9234473705291748, "learning_rate": 0.0008187593423019433, "loss": 3.4986, "step": 21340 }, { "epoch": 1.450264981655116, "grad_norm": 0.7766485810279846, "learning_rate": 0.0008187168772931105, "loss": 3.7319, "step": 21345 }, { "epoch": 1.450604701725778, "grad_norm": 2.1646480560302734, "learning_rate": 0.0008186744122842777, "loss": 3.565, "step": 21350 }, { "epoch": 1.4509444217964398, "grad_norm": 1.0277379751205444, "learning_rate": 0.0008186319472754452, "loss": 3.6447, "step": 21355 }, { "epoch": 1.4512841418671014, "grad_norm": 0.9476875066757202, "learning_rate": 0.0008185894822666124, "loss": 3.6489, "step": 21360 }, { "epoch": 1.4516238619377633, "grad_norm": 0.7743902802467346, "learning_rate": 0.0008185470172577796, "loss": 3.5181, "step": 21365 }, { "epoch": 1.4519635820084251, "grad_norm": 0.7930931448936462, "learning_rate": 0.0008185045522489469, "loss": 3.9411, "step": 21370 }, { "epoch": 1.4523033020790868, "grad_norm": 0.8143540620803833, "learning_rate": 0.0008184620872401142, "loss": 3.5559, "step": 21375 }, { "epoch": 1.4526430221497486, "grad_norm": 1.3362376689910889, "learning_rate": 0.0008184196222312814, "loss": 3.3382, "step": 21380 }, { "epoch": 1.4529827422204105, "grad_norm": 0.9951709508895874, "learning_rate": 0.0008183771572224487, "loss": 3.7575, "step": 21385 }, { "epoch": 1.453322462291072, "grad_norm": 0.8831445574760437, "learning_rate": 0.0008183346922136161, "loss": 3.7641, "step": 21390 }, { "epoch": 1.453662182361734, "grad_norm": 0.9894601106643677, "learning_rate": 0.0008182922272047833, "loss": 3.3111, "step": 21395 }, { "epoch": 1.4540019024323958, "grad_norm": 0.8318936824798584, "learning_rate": 0.0008182497621959506, "loss": 3.4487, "step": 21400 }, { "epoch": 1.4543416225030574, "grad_norm": 0.8804715871810913, "learning_rate": 0.0008182072971871178, "loss": 3.6597, "step": 21405 }, { "epoch": 1.4546813425737193, "grad_norm": 0.8738488554954529, "learning_rate": 0.0008181648321782851, "loss": 3.6583, "step": 21410 }, { "epoch": 1.4550210626443811, "grad_norm": 0.9786257147789001, "learning_rate": 0.0008181223671694524, "loss": 3.5933, "step": 21415 }, { "epoch": 1.4553607827150428, "grad_norm": 0.9737648367881775, "learning_rate": 0.0008180799021606196, "loss": 3.4951, "step": 21420 }, { "epoch": 1.4557005027857046, "grad_norm": 0.8127761483192444, "learning_rate": 0.000818037437151787, "loss": 3.4703, "step": 21425 }, { "epoch": 1.4560402228563665, "grad_norm": 1.0311543941497803, "learning_rate": 0.0008179949721429543, "loss": 3.6113, "step": 21430 }, { "epoch": 1.456379942927028, "grad_norm": 1.067214012145996, "learning_rate": 0.0008179525071341215, "loss": 3.4785, "step": 21435 }, { "epoch": 1.45671966299769, "grad_norm": 0.9844973087310791, "learning_rate": 0.0008179100421252888, "loss": 3.7457, "step": 21440 }, { "epoch": 1.4570593830683518, "grad_norm": 0.8678098917007446, "learning_rate": 0.0008178675771164561, "loss": 3.5818, "step": 21445 }, { "epoch": 1.4573991031390134, "grad_norm": 0.7208032011985779, "learning_rate": 0.0008178251121076233, "loss": 3.8525, "step": 21450 }, { "epoch": 1.4577388232096753, "grad_norm": 2.097007989883423, "learning_rate": 0.0008177826470987905, "loss": 3.7151, "step": 21455 }, { "epoch": 1.458078543280337, "grad_norm": 0.911551833152771, "learning_rate": 0.000817740182089958, "loss": 3.4568, "step": 21460 }, { "epoch": 1.4584182633509988, "grad_norm": 0.884174108505249, "learning_rate": 0.0008176977170811252, "loss": 3.7931, "step": 21465 }, { "epoch": 1.4587579834216606, "grad_norm": 2.025204658508301, "learning_rate": 0.0008176552520722924, "loss": 3.4686, "step": 21470 }, { "epoch": 1.4590977034923223, "grad_norm": 0.9569721817970276, "learning_rate": 0.0008176127870634598, "loss": 3.4445, "step": 21475 }, { "epoch": 1.4594374235629841, "grad_norm": 0.9006094336509705, "learning_rate": 0.000817570322054627, "loss": 3.7951, "step": 21480 }, { "epoch": 1.4597771436336457, "grad_norm": 1.010255217552185, "learning_rate": 0.0008175278570457942, "loss": 3.676, "step": 21485 }, { "epoch": 1.4601168637043076, "grad_norm": 1.0475374460220337, "learning_rate": 0.0008174853920369616, "loss": 3.9279, "step": 21490 }, { "epoch": 1.4604565837749695, "grad_norm": 1.5161429643630981, "learning_rate": 0.0008174429270281289, "loss": 3.8212, "step": 21495 }, { "epoch": 1.460796303845631, "grad_norm": 0.7942125797271729, "learning_rate": 0.0008174004620192961, "loss": 3.4719, "step": 21500 }, { "epoch": 1.461136023916293, "grad_norm": 1.2833255529403687, "learning_rate": 0.0008173579970104634, "loss": 3.5714, "step": 21505 }, { "epoch": 1.4614757439869548, "grad_norm": 0.845539927482605, "learning_rate": 0.0008173155320016307, "loss": 3.8547, "step": 21510 }, { "epoch": 1.4618154640576164, "grad_norm": 0.8955182433128357, "learning_rate": 0.0008172730669927979, "loss": 3.7075, "step": 21515 }, { "epoch": 1.4621551841282783, "grad_norm": 1.1749134063720703, "learning_rate": 0.0008172306019839652, "loss": 3.4106, "step": 21520 }, { "epoch": 1.4624949041989401, "grad_norm": 0.8004704117774963, "learning_rate": 0.0008171881369751325, "loss": 3.8432, "step": 21525 }, { "epoch": 1.4628346242696018, "grad_norm": 1.074240803718567, "learning_rate": 0.0008171456719662998, "loss": 3.5803, "step": 21530 }, { "epoch": 1.4631743443402636, "grad_norm": 2.3231027126312256, "learning_rate": 0.0008171032069574671, "loss": 3.7486, "step": 21535 }, { "epoch": 1.4635140644109255, "grad_norm": 2.1603333950042725, "learning_rate": 0.0008170607419486344, "loss": 3.6802, "step": 21540 }, { "epoch": 1.463853784481587, "grad_norm": 0.8936594128608704, "learning_rate": 0.0008170182769398016, "loss": 3.4125, "step": 21545 }, { "epoch": 1.464193504552249, "grad_norm": 0.8998619914054871, "learning_rate": 0.0008169758119309689, "loss": 3.842, "step": 21550 }, { "epoch": 1.4645332246229108, "grad_norm": 0.9896082878112793, "learning_rate": 0.0008169333469221361, "loss": 3.5288, "step": 21555 }, { "epoch": 1.4648729446935724, "grad_norm": 0.7342512011528015, "learning_rate": 0.0008168908819133034, "loss": 3.701, "step": 21560 }, { "epoch": 1.4652126647642343, "grad_norm": 0.7940000891685486, "learning_rate": 0.0008168484169044708, "loss": 3.8444, "step": 21565 }, { "epoch": 1.4655523848348961, "grad_norm": 1.2127254009246826, "learning_rate": 0.000816805951895638, "loss": 3.7677, "step": 21570 }, { "epoch": 1.4658921049055578, "grad_norm": 0.7867873311042786, "learning_rate": 0.0008167634868868053, "loss": 3.7963, "step": 21575 }, { "epoch": 1.4662318249762196, "grad_norm": 0.9748193621635437, "learning_rate": 0.0008167210218779726, "loss": 3.9508, "step": 21580 }, { "epoch": 1.4665715450468815, "grad_norm": 0.743140459060669, "learning_rate": 0.0008166785568691398, "loss": 3.6243, "step": 21585 }, { "epoch": 1.466911265117543, "grad_norm": 0.7631969451904297, "learning_rate": 0.000816636091860307, "loss": 3.5035, "step": 21590 }, { "epoch": 1.467250985188205, "grad_norm": 0.8958408832550049, "learning_rate": 0.0008165936268514744, "loss": 3.6533, "step": 21595 }, { "epoch": 1.4675907052588668, "grad_norm": 0.8481122851371765, "learning_rate": 0.0008165511618426417, "loss": 3.7868, "step": 21600 }, { "epoch": 1.4679304253295284, "grad_norm": 1.8762288093566895, "learning_rate": 0.0008165086968338089, "loss": 3.6979, "step": 21605 }, { "epoch": 1.4682701454001903, "grad_norm": 1.0441898107528687, "learning_rate": 0.0008164662318249763, "loss": 3.7121, "step": 21610 }, { "epoch": 1.4686098654708521, "grad_norm": 0.8871060609817505, "learning_rate": 0.0008164237668161435, "loss": 3.4828, "step": 21615 }, { "epoch": 1.4689495855415138, "grad_norm": 0.9585723280906677, "learning_rate": 0.0008163813018073107, "loss": 3.5887, "step": 21620 }, { "epoch": 1.4692893056121756, "grad_norm": 0.8343510627746582, "learning_rate": 0.0008163388367984781, "loss": 3.3523, "step": 21625 }, { "epoch": 1.4696290256828373, "grad_norm": 0.7199278473854065, "learning_rate": 0.0008162963717896453, "loss": 3.3846, "step": 21630 }, { "epoch": 1.4699687457534991, "grad_norm": 0.9149731993675232, "learning_rate": 0.0008162539067808127, "loss": 3.7782, "step": 21635 }, { "epoch": 1.470308465824161, "grad_norm": 0.9607492089271545, "learning_rate": 0.00081621144177198, "loss": 3.3856, "step": 21640 }, { "epoch": 1.4706481858948226, "grad_norm": 0.7553168535232544, "learning_rate": 0.0008161689767631472, "loss": 3.4264, "step": 21645 }, { "epoch": 1.4709879059654845, "grad_norm": 0.8708540797233582, "learning_rate": 0.0008161265117543145, "loss": 3.6059, "step": 21650 }, { "epoch": 1.471327626036146, "grad_norm": 0.8537190556526184, "learning_rate": 0.0008160840467454817, "loss": 3.5533, "step": 21655 }, { "epoch": 1.471667346106808, "grad_norm": 0.8778212666511536, "learning_rate": 0.000816041581736649, "loss": 3.4298, "step": 21660 }, { "epoch": 1.4720070661774698, "grad_norm": 0.9492563009262085, "learning_rate": 0.0008159991167278163, "loss": 3.7448, "step": 21665 }, { "epoch": 1.4723467862481314, "grad_norm": 0.9947162866592407, "learning_rate": 0.0008159566517189836, "loss": 3.8784, "step": 21670 }, { "epoch": 1.4726865063187933, "grad_norm": 0.7637332677841187, "learning_rate": 0.0008159141867101509, "loss": 3.5681, "step": 21675 }, { "epoch": 1.4730262263894551, "grad_norm": 0.7588942646980286, "learning_rate": 0.0008158717217013182, "loss": 3.5862, "step": 21680 }, { "epoch": 1.4733659464601168, "grad_norm": 0.7561425566673279, "learning_rate": 0.0008158292566924854, "loss": 3.6088, "step": 21685 }, { "epoch": 1.4737056665307786, "grad_norm": 0.8163308501243591, "learning_rate": 0.0008157867916836526, "loss": 3.5612, "step": 21690 }, { "epoch": 1.4740453866014405, "grad_norm": 0.904755175113678, "learning_rate": 0.00081574432667482, "loss": 3.7589, "step": 21695 }, { "epoch": 1.474385106672102, "grad_norm": 0.7634891867637634, "learning_rate": 0.0008157018616659872, "loss": 3.5614, "step": 21700 }, { "epoch": 1.474724826742764, "grad_norm": 0.8672685623168945, "learning_rate": 0.0008156593966571545, "loss": 3.5227, "step": 21705 }, { "epoch": 1.4750645468134258, "grad_norm": 0.7154973745346069, "learning_rate": 0.0008156169316483219, "loss": 3.595, "step": 21710 }, { "epoch": 1.4754042668840874, "grad_norm": 0.6683629751205444, "learning_rate": 0.0008155744666394891, "loss": 3.7091, "step": 21715 }, { "epoch": 1.4757439869547493, "grad_norm": 1.1849454641342163, "learning_rate": 0.0008155320016306563, "loss": 3.5867, "step": 21720 }, { "epoch": 1.4760837070254111, "grad_norm": 1.078500747680664, "learning_rate": 0.0008154895366218237, "loss": 3.6121, "step": 21725 }, { "epoch": 1.4764234270960728, "grad_norm": 0.6529405117034912, "learning_rate": 0.0008154470716129909, "loss": 3.5905, "step": 21730 }, { "epoch": 1.4767631471667346, "grad_norm": 0.7779830098152161, "learning_rate": 0.0008154046066041581, "loss": 3.5447, "step": 21735 }, { "epoch": 1.4771028672373965, "grad_norm": 0.8573459982872009, "learning_rate": 0.0008153621415953256, "loss": 3.4908, "step": 21740 }, { "epoch": 1.477442587308058, "grad_norm": 0.7864373326301575, "learning_rate": 0.0008153196765864928, "loss": 3.5203, "step": 21745 }, { "epoch": 1.47778230737872, "grad_norm": 0.9067627787590027, "learning_rate": 0.00081527721157766, "loss": 3.6091, "step": 21750 }, { "epoch": 1.4781220274493818, "grad_norm": 0.8581112027168274, "learning_rate": 0.0008152347465688273, "loss": 3.6542, "step": 21755 }, { "epoch": 1.4784617475200434, "grad_norm": 0.7228453755378723, "learning_rate": 0.0008151922815599946, "loss": 3.7159, "step": 21760 }, { "epoch": 1.4788014675907053, "grad_norm": 0.8718706965446472, "learning_rate": 0.0008151498165511618, "loss": 3.9812, "step": 21765 }, { "epoch": 1.4791411876613672, "grad_norm": 0.7447729110717773, "learning_rate": 0.0008151073515423291, "loss": 3.5932, "step": 21770 }, { "epoch": 1.4794809077320288, "grad_norm": 0.8128122091293335, "learning_rate": 0.0008150648865334965, "loss": 3.55, "step": 21775 }, { "epoch": 1.4798206278026906, "grad_norm": 0.7510889172554016, "learning_rate": 0.0008150224215246637, "loss": 3.4492, "step": 21780 }, { "epoch": 1.4801603478733525, "grad_norm": 0.6616800427436829, "learning_rate": 0.000814979956515831, "loss": 3.8175, "step": 21785 }, { "epoch": 1.4805000679440141, "grad_norm": 4.121511936187744, "learning_rate": 0.0008149374915069982, "loss": 3.6536, "step": 21790 }, { "epoch": 1.480839788014676, "grad_norm": 0.7207654118537903, "learning_rate": 0.0008148950264981655, "loss": 3.7986, "step": 21795 }, { "epoch": 1.4811795080853376, "grad_norm": 0.9350873231887817, "learning_rate": 0.0008148525614893328, "loss": 3.6416, "step": 21800 }, { "epoch": 1.4815192281559995, "grad_norm": 0.6798318028450012, "learning_rate": 0.0008148100964805, "loss": 3.8267, "step": 21805 }, { "epoch": 1.4818589482266613, "grad_norm": 0.6271336674690247, "learning_rate": 0.0008147676314716674, "loss": 3.6608, "step": 21810 }, { "epoch": 1.482198668297323, "grad_norm": 1.019269585609436, "learning_rate": 0.0008147251664628347, "loss": 3.835, "step": 21815 }, { "epoch": 1.4825383883679848, "grad_norm": 0.7201334238052368, "learning_rate": 0.0008146827014540019, "loss": 3.4257, "step": 21820 }, { "epoch": 1.4828781084386464, "grad_norm": 0.74535071849823, "learning_rate": 0.0008146402364451692, "loss": 3.8975, "step": 21825 }, { "epoch": 1.4832178285093083, "grad_norm": 1.012795090675354, "learning_rate": 0.0008145977714363365, "loss": 3.67, "step": 21830 }, { "epoch": 1.4835575485799701, "grad_norm": 1.1203855276107788, "learning_rate": 0.0008145553064275037, "loss": 3.7473, "step": 21835 }, { "epoch": 1.4838972686506318, "grad_norm": 1.1410645246505737, "learning_rate": 0.0008145128414186709, "loss": 3.6254, "step": 21840 }, { "epoch": 1.4842369887212936, "grad_norm": 0.8051386475563049, "learning_rate": 0.0008144703764098384, "loss": 3.6534, "step": 21845 }, { "epoch": 1.4845767087919555, "grad_norm": 0.9999968409538269, "learning_rate": 0.0008144279114010056, "loss": 3.5057, "step": 21850 }, { "epoch": 1.484916428862617, "grad_norm": 1.6259433031082153, "learning_rate": 0.0008143854463921728, "loss": 3.8802, "step": 21855 }, { "epoch": 1.485256148933279, "grad_norm": 0.9761654138565063, "learning_rate": 0.0008143429813833402, "loss": 3.6888, "step": 21860 }, { "epoch": 1.4855958690039408, "grad_norm": 0.9813851714134216, "learning_rate": 0.0008143005163745074, "loss": 3.8576, "step": 21865 }, { "epoch": 1.4859355890746024, "grad_norm": 1.0830596685409546, "learning_rate": 0.0008142580513656746, "loss": 3.4483, "step": 21870 }, { "epoch": 1.4862753091452643, "grad_norm": 0.8879253268241882, "learning_rate": 0.000814215586356842, "loss": 3.513, "step": 21875 }, { "epoch": 1.4866150292159261, "grad_norm": 0.9507722854614258, "learning_rate": 0.0008141731213480093, "loss": 3.7061, "step": 21880 }, { "epoch": 1.4869547492865878, "grad_norm": 0.7310115694999695, "learning_rate": 0.0008141306563391765, "loss": 3.3683, "step": 21885 }, { "epoch": 1.4872944693572496, "grad_norm": 0.8615754842758179, "learning_rate": 0.0008140881913303439, "loss": 3.3743, "step": 21890 }, { "epoch": 1.4876341894279115, "grad_norm": 1.008418321609497, "learning_rate": 0.0008140457263215111, "loss": 3.667, "step": 21895 }, { "epoch": 1.487973909498573, "grad_norm": 1.0290607213974, "learning_rate": 0.0008140032613126783, "loss": 3.4433, "step": 21900 }, { "epoch": 1.488313629569235, "grad_norm": 0.9199928045272827, "learning_rate": 0.0008139607963038456, "loss": 3.6276, "step": 21905 }, { "epoch": 1.4886533496398968, "grad_norm": 0.7884456515312195, "learning_rate": 0.0008139183312950129, "loss": 3.2784, "step": 21910 }, { "epoch": 1.4889930697105584, "grad_norm": 0.9601994752883911, "learning_rate": 0.0008138758662861802, "loss": 3.5129, "step": 21915 }, { "epoch": 1.4893327897812203, "grad_norm": 0.9612324237823486, "learning_rate": 0.0008138334012773475, "loss": 3.4873, "step": 21920 }, { "epoch": 1.4896725098518822, "grad_norm": 1.1331292390823364, "learning_rate": 0.0008137909362685148, "loss": 3.3827, "step": 21925 }, { "epoch": 1.4900122299225438, "grad_norm": 0.9957653880119324, "learning_rate": 0.000813748471259682, "loss": 3.7574, "step": 21930 }, { "epoch": 1.4903519499932056, "grad_norm": 0.7885183095932007, "learning_rate": 0.0008137060062508493, "loss": 3.7064, "step": 21935 }, { "epoch": 1.4906916700638675, "grad_norm": 0.8184173107147217, "learning_rate": 0.0008136635412420165, "loss": 3.7318, "step": 21940 }, { "epoch": 1.4910313901345291, "grad_norm": 0.8275867104530334, "learning_rate": 0.0008136210762331838, "loss": 3.5079, "step": 21945 }, { "epoch": 1.491371110205191, "grad_norm": 1.0202633142471313, "learning_rate": 0.0008135786112243512, "loss": 3.6899, "step": 21950 }, { "epoch": 1.4917108302758528, "grad_norm": 0.795844316482544, "learning_rate": 0.0008135361462155184, "loss": 3.6045, "step": 21955 }, { "epoch": 1.4920505503465145, "grad_norm": 0.760252833366394, "learning_rate": 0.0008134936812066857, "loss": 3.629, "step": 21960 }, { "epoch": 1.4923902704171763, "grad_norm": 0.8256102204322815, "learning_rate": 0.000813451216197853, "loss": 3.5065, "step": 21965 }, { "epoch": 1.492729990487838, "grad_norm": 44.190853118896484, "learning_rate": 0.0008134087511890202, "loss": 3.5083, "step": 21970 }, { "epoch": 1.4930697105584998, "grad_norm": 0.9282599091529846, "learning_rate": 0.0008133662861801876, "loss": 3.738, "step": 21975 }, { "epoch": 1.4934094306291616, "grad_norm": 0.8702162504196167, "learning_rate": 0.0008133238211713549, "loss": 3.6939, "step": 21980 }, { "epoch": 1.4937491506998233, "grad_norm": 1.1665629148483276, "learning_rate": 0.0008132813561625221, "loss": 3.3626, "step": 21985 }, { "epoch": 1.4940888707704851, "grad_norm": 0.8936403393745422, "learning_rate": 0.0008132388911536895, "loss": 3.5156, "step": 21990 }, { "epoch": 1.4944285908411468, "grad_norm": 0.8541828393936157, "learning_rate": 0.0008131964261448567, "loss": 3.639, "step": 21995 }, { "epoch": 1.4947683109118086, "grad_norm": 0.8075734972953796, "learning_rate": 0.0008131539611360239, "loss": 3.7343, "step": 22000 }, { "epoch": 1.4951080309824705, "grad_norm": 2.090625762939453, "learning_rate": 0.0008131114961271912, "loss": 3.7884, "step": 22005 }, { "epoch": 1.495447751053132, "grad_norm": 0.9424380660057068, "learning_rate": 0.0008130690311183585, "loss": 3.5661, "step": 22010 }, { "epoch": 1.495787471123794, "grad_norm": 0.8785465955734253, "learning_rate": 0.0008130265661095258, "loss": 3.6538, "step": 22015 }, { "epoch": 1.4961271911944558, "grad_norm": 0.7870867848396301, "learning_rate": 0.0008129841011006931, "loss": 3.6814, "step": 22020 }, { "epoch": 1.4964669112651174, "grad_norm": 0.862997829914093, "learning_rate": 0.0008129416360918604, "loss": 3.5377, "step": 22025 }, { "epoch": 1.4968066313357793, "grad_norm": 0.8783271312713623, "learning_rate": 0.0008128991710830276, "loss": 3.3761, "step": 22030 }, { "epoch": 1.4971463514064411, "grad_norm": 0.9661562442779541, "learning_rate": 0.0008128567060741949, "loss": 3.85, "step": 22035 }, { "epoch": 1.4974860714771028, "grad_norm": 1.013491153717041, "learning_rate": 0.0008128142410653621, "loss": 3.5776, "step": 22040 }, { "epoch": 1.4978257915477646, "grad_norm": 1.3911138772964478, "learning_rate": 0.0008127717760565294, "loss": 3.6554, "step": 22045 }, { "epoch": 1.4981655116184265, "grad_norm": 0.8019959926605225, "learning_rate": 0.0008127293110476968, "loss": 3.6283, "step": 22050 }, { "epoch": 1.4985052316890881, "grad_norm": 0.806312084197998, "learning_rate": 0.000812686846038864, "loss": 3.4803, "step": 22055 }, { "epoch": 1.49884495175975, "grad_norm": 0.9606966376304626, "learning_rate": 0.0008126443810300313, "loss": 3.6998, "step": 22060 }, { "epoch": 1.4991846718304118, "grad_norm": 0.7715378403663635, "learning_rate": 0.0008126019160211986, "loss": 3.9484, "step": 22065 }, { "epoch": 1.4995243919010735, "grad_norm": 2.489417552947998, "learning_rate": 0.0008125594510123658, "loss": 3.4609, "step": 22070 }, { "epoch": 1.4998641119717353, "grad_norm": 4.652007102966309, "learning_rate": 0.000812516986003533, "loss": 3.7212, "step": 22075 }, { "epoch": 1.5002038320423972, "grad_norm": 0.8743653297424316, "learning_rate": 0.0008124745209947004, "loss": 3.7734, "step": 22080 }, { "epoch": 1.5005435521130588, "grad_norm": 0.8106618523597717, "learning_rate": 0.0008124320559858677, "loss": 3.4748, "step": 22085 }, { "epoch": 1.5008832721837206, "grad_norm": 0.9319062829017639, "learning_rate": 0.0008123895909770349, "loss": 3.5262, "step": 22090 }, { "epoch": 1.5012229922543825, "grad_norm": 1.1792935132980347, "learning_rate": 0.0008123471259682023, "loss": 3.6257, "step": 22095 }, { "epoch": 1.5015627123250441, "grad_norm": 0.8822785019874573, "learning_rate": 0.0008123046609593695, "loss": 3.4636, "step": 22100 }, { "epoch": 1.501902432395706, "grad_norm": 1.0079622268676758, "learning_rate": 0.0008122621959505367, "loss": 3.7314, "step": 22105 }, { "epoch": 1.5022421524663678, "grad_norm": 0.7648638486862183, "learning_rate": 0.0008122197309417041, "loss": 3.7662, "step": 22110 }, { "epoch": 1.5025818725370295, "grad_norm": 0.9404330253601074, "learning_rate": 0.0008121772659328713, "loss": 3.6336, "step": 22115 }, { "epoch": 1.5029215926076913, "grad_norm": 0.7802832126617432, "learning_rate": 0.0008121348009240386, "loss": 3.4796, "step": 22120 }, { "epoch": 1.5032613126783532, "grad_norm": 0.8400198817253113, "learning_rate": 0.000812092335915206, "loss": 3.6483, "step": 22125 }, { "epoch": 1.5036010327490148, "grad_norm": 0.7932274341583252, "learning_rate": 0.0008120498709063732, "loss": 3.4743, "step": 22130 }, { "epoch": 1.5039407528196764, "grad_norm": 1.0990383625030518, "learning_rate": 0.0008120074058975404, "loss": 3.6033, "step": 22135 }, { "epoch": 1.5042804728903385, "grad_norm": 0.9605428576469421, "learning_rate": 0.0008119649408887077, "loss": 3.6626, "step": 22140 }, { "epoch": 1.5046201929610001, "grad_norm": 1.0197434425354004, "learning_rate": 0.000811922475879875, "loss": 3.7524, "step": 22145 }, { "epoch": 1.5049599130316618, "grad_norm": 1.150770902633667, "learning_rate": 0.0008118800108710422, "loss": 3.7737, "step": 22150 }, { "epoch": 1.5052996331023238, "grad_norm": 1.3884674310684204, "learning_rate": 0.0008118375458622096, "loss": 3.595, "step": 22155 }, { "epoch": 1.5056393531729855, "grad_norm": 0.9235925078392029, "learning_rate": 0.0008117950808533769, "loss": 3.6805, "step": 22160 }, { "epoch": 1.505979073243647, "grad_norm": 2.927175760269165, "learning_rate": 0.0008117526158445441, "loss": 3.782, "step": 22165 }, { "epoch": 1.506318793314309, "grad_norm": 0.9864637851715088, "learning_rate": 0.0008117101508357114, "loss": 3.4585, "step": 22170 }, { "epoch": 1.5066585133849708, "grad_norm": 1.06930673122406, "learning_rate": 0.0008116676858268787, "loss": 3.5276, "step": 22175 }, { "epoch": 1.5069982334556324, "grad_norm": 0.8951318264007568, "learning_rate": 0.0008116252208180459, "loss": 3.5389, "step": 22180 }, { "epoch": 1.5073379535262943, "grad_norm": 0.8548993468284607, "learning_rate": 0.0008115827558092132, "loss": 3.7338, "step": 22185 }, { "epoch": 1.5076776735969561, "grad_norm": 1.6443783044815063, "learning_rate": 0.0008115402908003805, "loss": 3.3473, "step": 22190 }, { "epoch": 1.5080173936676178, "grad_norm": 0.7876196503639221, "learning_rate": 0.0008114978257915478, "loss": 3.7573, "step": 22195 }, { "epoch": 1.5083571137382796, "grad_norm": 0.731320858001709, "learning_rate": 0.0008114553607827151, "loss": 3.6303, "step": 22200 }, { "epoch": 1.5086968338089415, "grad_norm": 1.1729212999343872, "learning_rate": 0.0008114128957738823, "loss": 3.7865, "step": 22205 }, { "epoch": 1.5090365538796031, "grad_norm": 1.0513426065444946, "learning_rate": 0.0008113704307650496, "loss": 3.5633, "step": 22210 }, { "epoch": 1.509376273950265, "grad_norm": 0.7319819927215576, "learning_rate": 0.0008113279657562169, "loss": 3.7131, "step": 22215 }, { "epoch": 1.5097159940209268, "grad_norm": 0.888192355632782, "learning_rate": 0.0008112855007473841, "loss": 3.5013, "step": 22220 }, { "epoch": 1.5100557140915885, "grad_norm": 0.9850166440010071, "learning_rate": 0.0008112430357385515, "loss": 3.5801, "step": 22225 }, { "epoch": 1.5103954341622503, "grad_norm": 1.0577248334884644, "learning_rate": 0.0008112005707297188, "loss": 3.5202, "step": 22230 }, { "epoch": 1.5107351542329122, "grad_norm": 1.1939570903778076, "learning_rate": 0.000811158105720886, "loss": 3.4592, "step": 22235 }, { "epoch": 1.5110748743035738, "grad_norm": 0.7630566358566284, "learning_rate": 0.0008111156407120532, "loss": 3.6854, "step": 22240 }, { "epoch": 1.5114145943742356, "grad_norm": 0.8147478699684143, "learning_rate": 0.0008110731757032206, "loss": 3.4123, "step": 22245 }, { "epoch": 1.5117543144448975, "grad_norm": 0.8937450647354126, "learning_rate": 0.0008110307106943878, "loss": 3.744, "step": 22250 }, { "epoch": 1.5120940345155591, "grad_norm": 0.9313421845436096, "learning_rate": 0.000810988245685555, "loss": 3.7744, "step": 22255 }, { "epoch": 1.512433754586221, "grad_norm": 0.8605682253837585, "learning_rate": 0.0008109457806767225, "loss": 3.5162, "step": 22260 }, { "epoch": 1.5127734746568828, "grad_norm": 0.7299453020095825, "learning_rate": 0.0008109033156678897, "loss": 3.581, "step": 22265 }, { "epoch": 1.5131131947275445, "grad_norm": 0.8222042918205261, "learning_rate": 0.0008108608506590569, "loss": 3.6304, "step": 22270 }, { "epoch": 1.5134529147982063, "grad_norm": 1.1752972602844238, "learning_rate": 0.0008108183856502243, "loss": 3.4502, "step": 22275 }, { "epoch": 1.5137926348688682, "grad_norm": 1.8046059608459473, "learning_rate": 0.0008107759206413915, "loss": 3.4202, "step": 22280 }, { "epoch": 1.5141323549395298, "grad_norm": 0.8829478025436401, "learning_rate": 0.0008107334556325587, "loss": 3.7306, "step": 22285 }, { "epoch": 1.5144720750101917, "grad_norm": 0.8909691572189331, "learning_rate": 0.000810690990623726, "loss": 3.6082, "step": 22290 }, { "epoch": 1.5148117950808535, "grad_norm": 1.0571426153182983, "learning_rate": 0.0008106485256148934, "loss": 3.4456, "step": 22295 }, { "epoch": 1.5151515151515151, "grad_norm": 1.4158740043640137, "learning_rate": 0.0008106060606060606, "loss": 3.6156, "step": 22300 }, { "epoch": 1.5154912352221768, "grad_norm": 0.9626006484031677, "learning_rate": 0.0008105635955972279, "loss": 3.8215, "step": 22305 }, { "epoch": 1.5158309552928388, "grad_norm": 1.6804776191711426, "learning_rate": 0.0008105211305883952, "loss": 3.5964, "step": 22310 }, { "epoch": 1.5161706753635005, "grad_norm": 0.9848420023918152, "learning_rate": 0.0008104786655795624, "loss": 3.7577, "step": 22315 }, { "epoch": 1.516510395434162, "grad_norm": 0.817219614982605, "learning_rate": 0.0008104362005707297, "loss": 3.6377, "step": 22320 }, { "epoch": 1.5168501155048242, "grad_norm": 0.9835785627365112, "learning_rate": 0.000810393735561897, "loss": 3.6006, "step": 22325 }, { "epoch": 1.5171898355754858, "grad_norm": 1.1600946187973022, "learning_rate": 0.0008103512705530644, "loss": 3.5452, "step": 22330 }, { "epoch": 1.5175295556461474, "grad_norm": 0.9806091785430908, "learning_rate": 0.0008103088055442316, "loss": 3.6534, "step": 22335 }, { "epoch": 1.5178692757168093, "grad_norm": 1.0273557901382446, "learning_rate": 0.0008102663405353988, "loss": 3.6165, "step": 22340 }, { "epoch": 1.5182089957874711, "grad_norm": 0.796470046043396, "learning_rate": 0.0008102238755265662, "loss": 3.5717, "step": 22345 }, { "epoch": 1.5185487158581328, "grad_norm": 0.9569476842880249, "learning_rate": 0.0008101814105177334, "loss": 3.5608, "step": 22350 }, { "epoch": 1.5188884359287946, "grad_norm": 1.0431102514266968, "learning_rate": 0.0008101389455089006, "loss": 3.5303, "step": 22355 }, { "epoch": 1.5192281559994565, "grad_norm": 0.7515780925750732, "learning_rate": 0.000810096480500068, "loss": 3.6888, "step": 22360 }, { "epoch": 1.5195678760701181, "grad_norm": 0.9718908071517944, "learning_rate": 0.0008100540154912353, "loss": 3.4985, "step": 22365 }, { "epoch": 1.51990759614078, "grad_norm": 1.195468544960022, "learning_rate": 0.0008100115504824025, "loss": 3.5277, "step": 22370 }, { "epoch": 1.5202473162114418, "grad_norm": 0.7781392335891724, "learning_rate": 0.0008099690854735699, "loss": 3.7776, "step": 22375 }, { "epoch": 1.5205870362821035, "grad_norm": 0.8142921328544617, "learning_rate": 0.0008099266204647371, "loss": 3.7104, "step": 22380 }, { "epoch": 1.5209267563527653, "grad_norm": 0.7960123419761658, "learning_rate": 0.0008098841554559043, "loss": 3.6966, "step": 22385 }, { "epoch": 1.5212664764234272, "grad_norm": 1.0521873235702515, "learning_rate": 0.0008098416904470716, "loss": 3.6761, "step": 22390 }, { "epoch": 1.5216061964940888, "grad_norm": 1.4839521646499634, "learning_rate": 0.0008097992254382389, "loss": 3.7996, "step": 22395 }, { "epoch": 1.5219459165647506, "grad_norm": 0.9616371393203735, "learning_rate": 0.0008097567604294062, "loss": 3.75, "step": 22400 }, { "epoch": 1.5222856366354125, "grad_norm": 1.0493491888046265, "learning_rate": 0.0008097142954205735, "loss": 3.6799, "step": 22405 }, { "epoch": 1.5226253567060741, "grad_norm": 0.8211082816123962, "learning_rate": 0.0008096718304117408, "loss": 3.8219, "step": 22410 }, { "epoch": 1.522965076776736, "grad_norm": 0.8053097724914551, "learning_rate": 0.000809629365402908, "loss": 3.6452, "step": 22415 }, { "epoch": 1.5233047968473978, "grad_norm": 0.9654778838157654, "learning_rate": 0.0008095869003940753, "loss": 3.6568, "step": 22420 }, { "epoch": 1.5236445169180595, "grad_norm": 0.8423956036567688, "learning_rate": 0.0008095444353852425, "loss": 3.752, "step": 22425 }, { "epoch": 1.5239842369887213, "grad_norm": 13.296154022216797, "learning_rate": 0.0008095019703764098, "loss": 3.3864, "step": 22430 }, { "epoch": 1.5243239570593832, "grad_norm": 1.3225758075714111, "learning_rate": 0.0008094595053675772, "loss": 3.5206, "step": 22435 }, { "epoch": 1.5246636771300448, "grad_norm": 0.8590450882911682, "learning_rate": 0.0008094170403587444, "loss": 3.7009, "step": 22440 }, { "epoch": 1.5250033972007067, "grad_norm": 0.8763025999069214, "learning_rate": 0.0008093745753499117, "loss": 3.6687, "step": 22445 }, { "epoch": 1.5253431172713685, "grad_norm": 1.0289757251739502, "learning_rate": 0.000809332110341079, "loss": 3.4923, "step": 22450 }, { "epoch": 1.5256828373420301, "grad_norm": 0.8347643613815308, "learning_rate": 0.0008092896453322462, "loss": 3.5813, "step": 22455 }, { "epoch": 1.526022557412692, "grad_norm": 0.9816063046455383, "learning_rate": 0.0008092471803234135, "loss": 3.7323, "step": 22460 }, { "epoch": 1.5263622774833538, "grad_norm": 0.7496846914291382, "learning_rate": 0.0008092047153145808, "loss": 3.6675, "step": 22465 }, { "epoch": 1.5267019975540155, "grad_norm": 0.8033835291862488, "learning_rate": 0.0008091622503057481, "loss": 3.5864, "step": 22470 }, { "epoch": 1.527041717624677, "grad_norm": 1.1840670108795166, "learning_rate": 0.0008091197852969153, "loss": 3.887, "step": 22475 }, { "epoch": 1.5273814376953392, "grad_norm": 0.8926982879638672, "learning_rate": 0.0008090773202880827, "loss": 3.6433, "step": 22480 }, { "epoch": 1.5277211577660008, "grad_norm": 0.8521348834037781, "learning_rate": 0.0008090348552792499, "loss": 3.8104, "step": 22485 }, { "epoch": 1.5280608778366624, "grad_norm": 1.0022072792053223, "learning_rate": 0.0008089923902704171, "loss": 3.7542, "step": 22490 }, { "epoch": 1.5284005979073245, "grad_norm": 0.9392140507698059, "learning_rate": 0.0008089499252615845, "loss": 3.6139, "step": 22495 }, { "epoch": 1.5287403179779862, "grad_norm": 1.1814770698547363, "learning_rate": 0.0008089074602527517, "loss": 3.6327, "step": 22500 }, { "epoch": 1.5290800380486478, "grad_norm": 0.7807260155677795, "learning_rate": 0.000808864995243919, "loss": 3.3598, "step": 22505 }, { "epoch": 1.5294197581193096, "grad_norm": 0.8285924792289734, "learning_rate": 0.0008088225302350864, "loss": 3.5522, "step": 22510 }, { "epoch": 1.5297594781899715, "grad_norm": 0.7884901762008667, "learning_rate": 0.0008087800652262536, "loss": 3.4582, "step": 22515 }, { "epoch": 1.5300991982606331, "grad_norm": 1.1802457571029663, "learning_rate": 0.0008087376002174208, "loss": 3.2962, "step": 22520 }, { "epoch": 1.530438918331295, "grad_norm": 0.7172727584838867, "learning_rate": 0.0008086951352085881, "loss": 3.7662, "step": 22525 }, { "epoch": 1.5307786384019568, "grad_norm": 0.6931126713752747, "learning_rate": 0.0008086526701997554, "loss": 3.5946, "step": 22530 }, { "epoch": 1.5311183584726185, "grad_norm": 1.9215635061264038, "learning_rate": 0.0008086102051909226, "loss": 3.783, "step": 22535 }, { "epoch": 1.5314580785432803, "grad_norm": 0.9454769492149353, "learning_rate": 0.00080856774018209, "loss": 3.565, "step": 22540 }, { "epoch": 1.5317977986139422, "grad_norm": 1.076009750366211, "learning_rate": 0.0008085252751732573, "loss": 3.5372, "step": 22545 }, { "epoch": 1.5321375186846038, "grad_norm": 0.9424208998680115, "learning_rate": 0.0008084828101644245, "loss": 3.5803, "step": 22550 }, { "epoch": 1.5324772387552656, "grad_norm": 0.7671887278556824, "learning_rate": 0.0008084403451555918, "loss": 3.7085, "step": 22555 }, { "epoch": 1.5328169588259275, "grad_norm": 0.7315728664398193, "learning_rate": 0.0008083978801467591, "loss": 3.6087, "step": 22560 }, { "epoch": 1.5331566788965891, "grad_norm": 1.044740080833435, "learning_rate": 0.0008083554151379263, "loss": 3.5571, "step": 22565 }, { "epoch": 1.533496398967251, "grad_norm": 0.9926623702049255, "learning_rate": 0.0008083129501290937, "loss": 3.7927, "step": 22570 }, { "epoch": 1.5338361190379128, "grad_norm": 0.8281869888305664, "learning_rate": 0.000808270485120261, "loss": 3.5189, "step": 22575 }, { "epoch": 1.5341758391085745, "grad_norm": 1.3996199369430542, "learning_rate": 0.0008082280201114282, "loss": 3.6841, "step": 22580 }, { "epoch": 1.5345155591792363, "grad_norm": 1.8536888360977173, "learning_rate": 0.0008081855551025955, "loss": 3.7203, "step": 22585 }, { "epoch": 1.5348552792498982, "grad_norm": 1.1468416452407837, "learning_rate": 0.0008081430900937627, "loss": 3.9712, "step": 22590 }, { "epoch": 1.5351949993205598, "grad_norm": 0.9454309344291687, "learning_rate": 0.00080810062508493, "loss": 3.9385, "step": 22595 }, { "epoch": 1.5355347193912217, "grad_norm": 0.758994460105896, "learning_rate": 0.0008080581600760973, "loss": 3.5691, "step": 22600 }, { "epoch": 1.5358744394618835, "grad_norm": 0.7544907331466675, "learning_rate": 0.0008080156950672646, "loss": 3.632, "step": 22605 }, { "epoch": 1.5362141595325451, "grad_norm": 1.500591516494751, "learning_rate": 0.0008079732300584319, "loss": 3.5932, "step": 22610 }, { "epoch": 1.536553879603207, "grad_norm": 0.7420836091041565, "learning_rate": 0.0008079307650495992, "loss": 3.9243, "step": 22615 }, { "epoch": 1.5368935996738688, "grad_norm": 0.8717135190963745, "learning_rate": 0.0008078883000407664, "loss": 3.6164, "step": 22620 }, { "epoch": 1.5372333197445305, "grad_norm": 1.1612741947174072, "learning_rate": 0.0008078458350319336, "loss": 3.5868, "step": 22625 }, { "epoch": 1.5375730398151923, "grad_norm": 0.9587457776069641, "learning_rate": 0.000807803370023101, "loss": 3.6252, "step": 22630 }, { "epoch": 1.5379127598858542, "grad_norm": 0.8992137312889099, "learning_rate": 0.0008077609050142682, "loss": 3.7543, "step": 22635 }, { "epoch": 1.5382524799565158, "grad_norm": 0.785990834236145, "learning_rate": 0.0008077184400054355, "loss": 3.5381, "step": 22640 }, { "epoch": 1.5385922000271774, "grad_norm": 2.1124203205108643, "learning_rate": 0.0008076759749966029, "loss": 3.6217, "step": 22645 }, { "epoch": 1.5389319200978395, "grad_norm": 1.0307828187942505, "learning_rate": 0.0008076335099877701, "loss": 3.665, "step": 22650 }, { "epoch": 1.5392716401685012, "grad_norm": 1.2666990756988525, "learning_rate": 0.0008075910449789373, "loss": 3.5478, "step": 22655 }, { "epoch": 1.5396113602391628, "grad_norm": 0.8385536670684814, "learning_rate": 0.0008075485799701047, "loss": 3.6572, "step": 22660 }, { "epoch": 1.5399510803098249, "grad_norm": 0.8621832728385925, "learning_rate": 0.0008075061149612719, "loss": 3.5894, "step": 22665 }, { "epoch": 1.5402908003804865, "grad_norm": 0.6969078779220581, "learning_rate": 0.0008074636499524392, "loss": 3.4891, "step": 22670 }, { "epoch": 1.5406305204511481, "grad_norm": 1.7248413562774658, "learning_rate": 0.0008074211849436066, "loss": 3.6176, "step": 22675 }, { "epoch": 1.54097024052181, "grad_norm": 0.7885979413986206, "learning_rate": 0.0008073787199347738, "loss": 3.5696, "step": 22680 }, { "epoch": 1.5413099605924718, "grad_norm": 1.5965110063552856, "learning_rate": 0.0008073362549259411, "loss": 3.7555, "step": 22685 }, { "epoch": 1.5416496806631335, "grad_norm": 0.784146785736084, "learning_rate": 0.0008072937899171083, "loss": 3.3155, "step": 22690 }, { "epoch": 1.5419894007337953, "grad_norm": 1.2182210683822632, "learning_rate": 0.0008072513249082756, "loss": 3.8097, "step": 22695 }, { "epoch": 1.5423291208044572, "grad_norm": 0.9736958742141724, "learning_rate": 0.0008072088598994429, "loss": 3.558, "step": 22700 }, { "epoch": 1.5426688408751188, "grad_norm": 0.5929139852523804, "learning_rate": 0.0008071663948906101, "loss": 3.628, "step": 22705 }, { "epoch": 1.5430085609457806, "grad_norm": 1.2482004165649414, "learning_rate": 0.0008071239298817775, "loss": 3.5474, "step": 22710 }, { "epoch": 1.5433482810164425, "grad_norm": 0.8275907635688782, "learning_rate": 0.0008070814648729448, "loss": 3.6432, "step": 22715 }, { "epoch": 1.5436880010871041, "grad_norm": 0.9764878749847412, "learning_rate": 0.000807038999864112, "loss": 3.4316, "step": 22720 }, { "epoch": 1.544027721157766, "grad_norm": 1.013533115386963, "learning_rate": 0.0008069965348552792, "loss": 3.6586, "step": 22725 }, { "epoch": 1.5443674412284278, "grad_norm": 1.2373079061508179, "learning_rate": 0.0008069540698464466, "loss": 3.7482, "step": 22730 }, { "epoch": 1.5447071612990895, "grad_norm": 0.7629674077033997, "learning_rate": 0.0008069116048376138, "loss": 3.5597, "step": 22735 }, { "epoch": 1.5450468813697513, "grad_norm": 0.6417026519775391, "learning_rate": 0.000806869139828781, "loss": 3.5827, "step": 22740 }, { "epoch": 1.5453866014404132, "grad_norm": 1.7202032804489136, "learning_rate": 0.0008068266748199485, "loss": 3.6263, "step": 22745 }, { "epoch": 1.5457263215110748, "grad_norm": 0.6892850995063782, "learning_rate": 0.0008067842098111157, "loss": 3.7504, "step": 22750 }, { "epoch": 1.5460660415817367, "grad_norm": 0.8357324004173279, "learning_rate": 0.0008067417448022829, "loss": 3.5557, "step": 22755 }, { "epoch": 1.5464057616523985, "grad_norm": 0.876130998134613, "learning_rate": 0.0008066992797934503, "loss": 3.5252, "step": 22760 }, { "epoch": 1.5467454817230601, "grad_norm": 0.8455385565757751, "learning_rate": 0.0008066568147846175, "loss": 3.4376, "step": 22765 }, { "epoch": 1.547085201793722, "grad_norm": 0.862780749797821, "learning_rate": 0.0008066143497757847, "loss": 3.5089, "step": 22770 }, { "epoch": 1.5474249218643839, "grad_norm": 0.9155977964401245, "learning_rate": 0.000806571884766952, "loss": 3.7824, "step": 22775 }, { "epoch": 1.5477646419350455, "grad_norm": 1.0503411293029785, "learning_rate": 0.0008065294197581194, "loss": 3.5253, "step": 22780 }, { "epoch": 1.5481043620057073, "grad_norm": 1.2907692193984985, "learning_rate": 0.0008064869547492866, "loss": 3.7616, "step": 22785 }, { "epoch": 1.5484440820763692, "grad_norm": 0.9911143183708191, "learning_rate": 0.0008064444897404539, "loss": 3.6041, "step": 22790 }, { "epoch": 1.5487838021470308, "grad_norm": 0.9603139758110046, "learning_rate": 0.0008064020247316212, "loss": 3.8913, "step": 22795 }, { "epoch": 1.5491235222176927, "grad_norm": 0.9356276988983154, "learning_rate": 0.0008063595597227884, "loss": 3.3038, "step": 22800 }, { "epoch": 1.5494632422883545, "grad_norm": 0.9622668027877808, "learning_rate": 0.0008063170947139557, "loss": 3.3986, "step": 22805 }, { "epoch": 1.5498029623590162, "grad_norm": 0.8473086953163147, "learning_rate": 0.000806274629705123, "loss": 3.4507, "step": 22810 }, { "epoch": 1.5501426824296778, "grad_norm": 0.9318678975105286, "learning_rate": 0.0008062321646962903, "loss": 3.6777, "step": 22815 }, { "epoch": 1.5504824025003399, "grad_norm": 0.7008505463600159, "learning_rate": 0.0008061896996874576, "loss": 3.5634, "step": 22820 }, { "epoch": 1.5508221225710015, "grad_norm": 0.9625828862190247, "learning_rate": 0.0008061472346786248, "loss": 3.5853, "step": 22825 }, { "epoch": 1.5511618426416631, "grad_norm": 1.0728516578674316, "learning_rate": 0.0008061047696697921, "loss": 3.7753, "step": 22830 }, { "epoch": 1.5515015627123252, "grad_norm": 0.9589217901229858, "learning_rate": 0.0008060623046609594, "loss": 3.7163, "step": 22835 }, { "epoch": 1.5518412827829868, "grad_norm": 1.330429196357727, "learning_rate": 0.0008060198396521266, "loss": 3.6416, "step": 22840 }, { "epoch": 1.5521810028536485, "grad_norm": 0.9001039266586304, "learning_rate": 0.0008059773746432939, "loss": 3.7091, "step": 22845 }, { "epoch": 1.5525207229243103, "grad_norm": 0.9324317574501038, "learning_rate": 0.0008059349096344613, "loss": 3.6015, "step": 22850 }, { "epoch": 1.5528604429949722, "grad_norm": 1.0072044134140015, "learning_rate": 0.0008058924446256285, "loss": 3.4328, "step": 22855 }, { "epoch": 1.5532001630656338, "grad_norm": 1.4578256607055664, "learning_rate": 0.0008058499796167958, "loss": 3.6767, "step": 22860 }, { "epoch": 1.5535398831362957, "grad_norm": 0.8969048261642456, "learning_rate": 0.0008058075146079631, "loss": 3.7119, "step": 22865 }, { "epoch": 1.5538796032069575, "grad_norm": 1.6967047452926636, "learning_rate": 0.0008057650495991303, "loss": 3.5337, "step": 22870 }, { "epoch": 1.5542193232776191, "grad_norm": 0.7999581098556519, "learning_rate": 0.0008057225845902975, "loss": 3.684, "step": 22875 }, { "epoch": 1.554559043348281, "grad_norm": 0.7820613384246826, "learning_rate": 0.0008056801195814649, "loss": 3.6337, "step": 22880 }, { "epoch": 1.5548987634189428, "grad_norm": 0.7836406230926514, "learning_rate": 0.0008056376545726322, "loss": 3.5899, "step": 22885 }, { "epoch": 1.5552384834896045, "grad_norm": 1.4860467910766602, "learning_rate": 0.0008055951895637994, "loss": 3.6698, "step": 22890 }, { "epoch": 1.5555782035602663, "grad_norm": 1.0068683624267578, "learning_rate": 0.0008055527245549668, "loss": 3.732, "step": 22895 }, { "epoch": 1.5559179236309282, "grad_norm": 0.9511059522628784, "learning_rate": 0.000805510259546134, "loss": 3.5187, "step": 22900 }, { "epoch": 1.5562576437015898, "grad_norm": 0.6855207681655884, "learning_rate": 0.0008054677945373012, "loss": 3.6602, "step": 22905 }, { "epoch": 1.5565973637722517, "grad_norm": 0.8003137707710266, "learning_rate": 0.0008054253295284686, "loss": 3.6585, "step": 22910 }, { "epoch": 1.5569370838429135, "grad_norm": 0.8115288615226746, "learning_rate": 0.0008053828645196358, "loss": 3.7257, "step": 22915 }, { "epoch": 1.5572768039135751, "grad_norm": 1.423439621925354, "learning_rate": 0.0008053403995108031, "loss": 3.5754, "step": 22920 }, { "epoch": 1.557616523984237, "grad_norm": 1.6858798265457153, "learning_rate": 0.0008052979345019704, "loss": 3.7301, "step": 22925 }, { "epoch": 1.5579562440548989, "grad_norm": 0.7612317204475403, "learning_rate": 0.0008052554694931377, "loss": 3.5535, "step": 22930 }, { "epoch": 1.5582959641255605, "grad_norm": 0.8697726130485535, "learning_rate": 0.0008052130044843049, "loss": 3.5451, "step": 22935 }, { "epoch": 1.5586356841962223, "grad_norm": 0.6498113870620728, "learning_rate": 0.0008051705394754722, "loss": 3.7092, "step": 22940 }, { "epoch": 1.5589754042668842, "grad_norm": 0.8934295177459717, "learning_rate": 0.0008051280744666395, "loss": 3.603, "step": 22945 }, { "epoch": 1.5593151243375458, "grad_norm": 0.6928650736808777, "learning_rate": 0.0008050856094578067, "loss": 3.5475, "step": 22950 }, { "epoch": 1.5596548444082077, "grad_norm": 0.8298101425170898, "learning_rate": 0.0008050431444489741, "loss": 3.6046, "step": 22955 }, { "epoch": 1.5599945644788695, "grad_norm": 0.9894862174987793, "learning_rate": 0.0008050006794401414, "loss": 3.634, "step": 22960 }, { "epoch": 1.5603342845495312, "grad_norm": 1.3444044589996338, "learning_rate": 0.0008049582144313086, "loss": 3.6892, "step": 22965 }, { "epoch": 1.560674004620193, "grad_norm": 1.8900312185287476, "learning_rate": 0.0008049157494224759, "loss": 3.7891, "step": 22970 }, { "epoch": 1.5610137246908549, "grad_norm": 0.7968646287918091, "learning_rate": 0.0008048732844136431, "loss": 3.3982, "step": 22975 }, { "epoch": 1.5613534447615165, "grad_norm": 0.8287606835365295, "learning_rate": 0.0008048308194048104, "loss": 3.4049, "step": 22980 }, { "epoch": 1.5616931648321781, "grad_norm": 1.1667578220367432, "learning_rate": 0.0008047883543959777, "loss": 3.5603, "step": 22985 }, { "epoch": 1.5620328849028402, "grad_norm": 0.7978909015655518, "learning_rate": 0.000804745889387145, "loss": 3.8638, "step": 22990 }, { "epoch": 1.5623726049735018, "grad_norm": 0.910234808921814, "learning_rate": 0.0008047034243783123, "loss": 3.5163, "step": 22995 }, { "epoch": 1.5627123250441635, "grad_norm": 0.955987274646759, "learning_rate": 0.0008046609593694796, "loss": 3.5148, "step": 23000 }, { "epoch": 1.5630520451148255, "grad_norm": 0.8619040846824646, "learning_rate": 0.0008046184943606468, "loss": 3.5625, "step": 23005 }, { "epoch": 1.5633917651854872, "grad_norm": 0.8423206806182861, "learning_rate": 0.0008045760293518142, "loss": 3.4994, "step": 23010 }, { "epoch": 1.5637314852561488, "grad_norm": 0.9177834391593933, "learning_rate": 0.0008045335643429814, "loss": 3.9807, "step": 23015 }, { "epoch": 1.5640712053268107, "grad_norm": 1.0530548095703125, "learning_rate": 0.0008044910993341486, "loss": 3.7496, "step": 23020 }, { "epoch": 1.5644109253974725, "grad_norm": 0.995296061038971, "learning_rate": 0.000804448634325316, "loss": 3.4806, "step": 23025 }, { "epoch": 1.5647506454681341, "grad_norm": 1.233062505722046, "learning_rate": 0.0008044061693164833, "loss": 3.4626, "step": 23030 }, { "epoch": 1.565090365538796, "grad_norm": 0.8018514513969421, "learning_rate": 0.0008043637043076505, "loss": 3.8425, "step": 23035 }, { "epoch": 1.5654300856094578, "grad_norm": 0.806217610836029, "learning_rate": 0.0008043212392988178, "loss": 3.6307, "step": 23040 }, { "epoch": 1.5657698056801195, "grad_norm": 0.8837574124336243, "learning_rate": 0.0008042787742899851, "loss": 3.5121, "step": 23045 }, { "epoch": 1.5661095257507813, "grad_norm": 0.7693446278572083, "learning_rate": 0.0008042363092811523, "loss": 3.6844, "step": 23050 }, { "epoch": 1.5664492458214432, "grad_norm": 1.0842504501342773, "learning_rate": 0.0008041938442723196, "loss": 3.6045, "step": 23055 }, { "epoch": 1.5667889658921048, "grad_norm": 0.666137158870697, "learning_rate": 0.000804151379263487, "loss": 3.7396, "step": 23060 }, { "epoch": 1.5671286859627667, "grad_norm": 0.9801398515701294, "learning_rate": 0.0008041089142546542, "loss": 3.8525, "step": 23065 }, { "epoch": 1.5674684060334285, "grad_norm": 0.9786010384559631, "learning_rate": 0.0008040664492458215, "loss": 3.8985, "step": 23070 }, { "epoch": 1.5678081261040901, "grad_norm": 1.0461175441741943, "learning_rate": 0.0008040239842369887, "loss": 3.5778, "step": 23075 }, { "epoch": 1.568147846174752, "grad_norm": 0.7922382950782776, "learning_rate": 0.000803981519228156, "loss": 3.722, "step": 23080 }, { "epoch": 1.5684875662454139, "grad_norm": 0.8120085000991821, "learning_rate": 0.0008039390542193233, "loss": 3.6013, "step": 23085 }, { "epoch": 1.5688272863160755, "grad_norm": 0.8989419341087341, "learning_rate": 0.0008038965892104905, "loss": 3.6531, "step": 23090 }, { "epoch": 1.5691670063867373, "grad_norm": 0.9200401306152344, "learning_rate": 0.0008038541242016579, "loss": 3.6092, "step": 23095 }, { "epoch": 1.5695067264573992, "grad_norm": 0.7439908981323242, "learning_rate": 0.0008038116591928252, "loss": 3.6983, "step": 23100 }, { "epoch": 1.5698464465280608, "grad_norm": 1.0173051357269287, "learning_rate": 0.0008037691941839924, "loss": 3.7334, "step": 23105 }, { "epoch": 1.5701861665987227, "grad_norm": 0.6860843896865845, "learning_rate": 0.0008037267291751596, "loss": 3.6004, "step": 23110 }, { "epoch": 1.5705258866693845, "grad_norm": 1.1148924827575684, "learning_rate": 0.000803684264166327, "loss": 3.6645, "step": 23115 }, { "epoch": 1.5708656067400462, "grad_norm": 0.7811193466186523, "learning_rate": 0.0008036417991574942, "loss": 3.5053, "step": 23120 }, { "epoch": 1.571205326810708, "grad_norm": 0.7312164306640625, "learning_rate": 0.0008035993341486614, "loss": 3.7217, "step": 23125 }, { "epoch": 1.5715450468813699, "grad_norm": 0.7451415657997131, "learning_rate": 0.0008035568691398289, "loss": 3.7186, "step": 23130 }, { "epoch": 1.5718847669520315, "grad_norm": 0.7153777480125427, "learning_rate": 0.0008035144041309961, "loss": 3.8071, "step": 23135 }, { "epoch": 1.5722244870226934, "grad_norm": 1.0929551124572754, "learning_rate": 0.0008034719391221633, "loss": 3.6357, "step": 23140 }, { "epoch": 1.5725642070933552, "grad_norm": 0.7249621748924255, "learning_rate": 0.0008034294741133307, "loss": 3.6326, "step": 23145 }, { "epoch": 1.5729039271640168, "grad_norm": 2.8112776279449463, "learning_rate": 0.0008033870091044979, "loss": 3.7499, "step": 23150 }, { "epoch": 1.5732436472346785, "grad_norm": 1.1247221231460571, "learning_rate": 0.0008033445440956651, "loss": 3.8276, "step": 23155 }, { "epoch": 1.5735833673053405, "grad_norm": 0.8950327634811401, "learning_rate": 0.0008033020790868326, "loss": 3.5158, "step": 23160 }, { "epoch": 1.5739230873760022, "grad_norm": 0.7450421452522278, "learning_rate": 0.0008032596140779998, "loss": 3.6468, "step": 23165 }, { "epoch": 1.5742628074466638, "grad_norm": 1.545096516609192, "learning_rate": 0.000803217149069167, "loss": 3.7397, "step": 23170 }, { "epoch": 1.5746025275173259, "grad_norm": 1.2321829795837402, "learning_rate": 0.0008031746840603343, "loss": 3.7426, "step": 23175 }, { "epoch": 1.5749422475879875, "grad_norm": 1.1646597385406494, "learning_rate": 0.0008031322190515016, "loss": 3.8136, "step": 23180 }, { "epoch": 1.5752819676586491, "grad_norm": 0.7760124802589417, "learning_rate": 0.0008030897540426688, "loss": 3.6527, "step": 23185 }, { "epoch": 1.575621687729311, "grad_norm": 2.333987236022949, "learning_rate": 0.0008030472890338361, "loss": 3.7422, "step": 23190 }, { "epoch": 1.5759614077999728, "grad_norm": 0.7689435482025146, "learning_rate": 0.0008030048240250035, "loss": 3.9986, "step": 23195 }, { "epoch": 1.5763011278706345, "grad_norm": 0.9107362031936646, "learning_rate": 0.0008029623590161707, "loss": 3.3347, "step": 23200 }, { "epoch": 1.5766408479412963, "grad_norm": 0.9274855256080627, "learning_rate": 0.000802919894007338, "loss": 3.6721, "step": 23205 }, { "epoch": 1.5769805680119582, "grad_norm": 1.2673982381820679, "learning_rate": 0.0008028774289985052, "loss": 3.4871, "step": 23210 }, { "epoch": 1.5773202880826198, "grad_norm": 1.9633291959762573, "learning_rate": 0.0008028349639896725, "loss": 3.4842, "step": 23215 }, { "epoch": 1.5776600081532817, "grad_norm": 1.1052364110946655, "learning_rate": 0.0008027924989808398, "loss": 3.5478, "step": 23220 }, { "epoch": 1.5779997282239435, "grad_norm": 0.7918109893798828, "learning_rate": 0.000802750033972007, "loss": 3.6834, "step": 23225 }, { "epoch": 1.5783394482946052, "grad_norm": 1.271414041519165, "learning_rate": 0.0008027075689631744, "loss": 3.4974, "step": 23230 }, { "epoch": 1.578679168365267, "grad_norm": 1.0142359733581543, "learning_rate": 0.0008026651039543417, "loss": 3.7193, "step": 23235 }, { "epoch": 1.5790188884359289, "grad_norm": 0.9054991006851196, "learning_rate": 0.0008026226389455089, "loss": 3.5532, "step": 23240 }, { "epoch": 1.5793586085065905, "grad_norm": 0.9334622621536255, "learning_rate": 0.0008025801739366762, "loss": 3.4267, "step": 23245 }, { "epoch": 1.5796983285772523, "grad_norm": 0.7576130628585815, "learning_rate": 0.0008025377089278435, "loss": 3.657, "step": 23250 }, { "epoch": 1.5800380486479142, "grad_norm": 0.8967666625976562, "learning_rate": 0.0008024952439190107, "loss": 3.7651, "step": 23255 }, { "epoch": 1.5803777687185758, "grad_norm": 0.886087954044342, "learning_rate": 0.0008024527789101779, "loss": 3.6664, "step": 23260 }, { "epoch": 1.5807174887892377, "grad_norm": 1.066821813583374, "learning_rate": 0.0008024103139013454, "loss": 3.6648, "step": 23265 }, { "epoch": 1.5810572088598995, "grad_norm": 2.746002197265625, "learning_rate": 0.0008023678488925126, "loss": 3.7318, "step": 23270 }, { "epoch": 1.5813969289305612, "grad_norm": 0.7996529340744019, "learning_rate": 0.0008023253838836798, "loss": 3.9165, "step": 23275 }, { "epoch": 1.581736649001223, "grad_norm": 0.9263038039207458, "learning_rate": 0.0008022829188748472, "loss": 3.8473, "step": 23280 }, { "epoch": 1.5820763690718849, "grad_norm": 1.0041640996932983, "learning_rate": 0.0008022404538660144, "loss": 3.6553, "step": 23285 }, { "epoch": 1.5824160891425465, "grad_norm": 1.1628309488296509, "learning_rate": 0.0008021979888571816, "loss": 3.1556, "step": 23290 }, { "epoch": 1.5827558092132084, "grad_norm": 1.1976736783981323, "learning_rate": 0.000802155523848349, "loss": 3.5365, "step": 23295 }, { "epoch": 1.5830955292838702, "grad_norm": 0.9240379333496094, "learning_rate": 0.0008021130588395163, "loss": 3.6256, "step": 23300 }, { "epoch": 1.5834352493545318, "grad_norm": 1.0488568544387817, "learning_rate": 0.0008020705938306835, "loss": 3.6563, "step": 23305 }, { "epoch": 1.5837749694251937, "grad_norm": 0.8769745230674744, "learning_rate": 0.0008020281288218509, "loss": 3.4754, "step": 23310 }, { "epoch": 1.5841146894958555, "grad_norm": 0.9473564028739929, "learning_rate": 0.0008019856638130181, "loss": 3.3395, "step": 23315 }, { "epoch": 1.5844544095665172, "grad_norm": 0.7586822509765625, "learning_rate": 0.0008019431988041853, "loss": 3.3944, "step": 23320 }, { "epoch": 1.5847941296371788, "grad_norm": 0.846683144569397, "learning_rate": 0.0008019007337953526, "loss": 3.5852, "step": 23325 }, { "epoch": 1.5851338497078409, "grad_norm": 0.7387902140617371, "learning_rate": 0.0008018582687865199, "loss": 3.7006, "step": 23330 }, { "epoch": 1.5854735697785025, "grad_norm": 0.8863653540611267, "learning_rate": 0.0008018158037776872, "loss": 3.7039, "step": 23335 }, { "epoch": 1.5858132898491641, "grad_norm": 1.0591938495635986, "learning_rate": 0.0008017733387688545, "loss": 3.4639, "step": 23340 }, { "epoch": 1.5861530099198262, "grad_norm": 0.8726239204406738, "learning_rate": 0.0008017308737600218, "loss": 3.5635, "step": 23345 }, { "epoch": 1.5864927299904878, "grad_norm": 0.8549723625183105, "learning_rate": 0.0008016884087511891, "loss": 3.9902, "step": 23350 }, { "epoch": 1.5868324500611495, "grad_norm": 1.5374599695205688, "learning_rate": 0.0008016459437423563, "loss": 3.6925, "step": 23355 }, { "epoch": 1.5871721701318113, "grad_norm": 1.0196396112442017, "learning_rate": 0.0008016034787335235, "loss": 3.4372, "step": 23360 }, { "epoch": 1.5875118902024732, "grad_norm": 0.7873160243034363, "learning_rate": 0.0008015610137246909, "loss": 3.7331, "step": 23365 }, { "epoch": 1.5878516102731348, "grad_norm": 0.7569318413734436, "learning_rate": 0.0008015185487158582, "loss": 3.7141, "step": 23370 }, { "epoch": 1.5881913303437967, "grad_norm": 0.7585000395774841, "learning_rate": 0.0008014760837070254, "loss": 3.5053, "step": 23375 }, { "epoch": 1.5885310504144585, "grad_norm": 2.3421552181243896, "learning_rate": 0.0008014336186981928, "loss": 3.6515, "step": 23380 }, { "epoch": 1.5888707704851202, "grad_norm": 1.0552809238433838, "learning_rate": 0.00080139115368936, "loss": 3.7835, "step": 23385 }, { "epoch": 1.589210490555782, "grad_norm": 1.1268529891967773, "learning_rate": 0.0008013486886805272, "loss": 3.5088, "step": 23390 }, { "epoch": 1.5895502106264439, "grad_norm": 0.914889931678772, "learning_rate": 0.0008013062236716946, "loss": 3.6977, "step": 23395 }, { "epoch": 1.5898899306971055, "grad_norm": 1.0253980159759521, "learning_rate": 0.0008012637586628618, "loss": 3.5166, "step": 23400 }, { "epoch": 1.5902296507677673, "grad_norm": 0.8665760159492493, "learning_rate": 0.0008012212936540291, "loss": 3.8314, "step": 23405 }, { "epoch": 1.5905693708384292, "grad_norm": 0.936165988445282, "learning_rate": 0.0008011788286451965, "loss": 3.8013, "step": 23410 }, { "epoch": 1.5909090909090908, "grad_norm": 0.7086284756660461, "learning_rate": 0.0008011363636363637, "loss": 3.6688, "step": 23415 }, { "epoch": 1.5912488109797527, "grad_norm": 0.8857320547103882, "learning_rate": 0.0008010938986275309, "loss": 3.4335, "step": 23420 }, { "epoch": 1.5915885310504145, "grad_norm": 1.0179429054260254, "learning_rate": 0.0008010514336186982, "loss": 3.7738, "step": 23425 }, { "epoch": 1.5919282511210762, "grad_norm": 1.4826998710632324, "learning_rate": 0.0008010089686098655, "loss": 3.5936, "step": 23430 }, { "epoch": 1.592267971191738, "grad_norm": 0.9242985248565674, "learning_rate": 0.0008009665036010327, "loss": 3.2941, "step": 23435 }, { "epoch": 1.5926076912623999, "grad_norm": 1.3576202392578125, "learning_rate": 0.0008009240385922001, "loss": 3.2814, "step": 23440 }, { "epoch": 1.5929474113330615, "grad_norm": 0.9796791076660156, "learning_rate": 0.0008008815735833674, "loss": 3.5618, "step": 23445 }, { "epoch": 1.5932871314037234, "grad_norm": 0.8101518750190735, "learning_rate": 0.0008008391085745346, "loss": 3.833, "step": 23450 }, { "epoch": 1.5936268514743852, "grad_norm": 0.9805428385734558, "learning_rate": 0.0008007966435657019, "loss": 3.4768, "step": 23455 }, { "epoch": 1.5939665715450468, "grad_norm": 0.9744020104408264, "learning_rate": 0.0008007541785568691, "loss": 3.7696, "step": 23460 }, { "epoch": 1.5943062916157087, "grad_norm": 0.6675955057144165, "learning_rate": 0.0008007117135480364, "loss": 3.4117, "step": 23465 }, { "epoch": 1.5946460116863705, "grad_norm": 1.0053808689117432, "learning_rate": 0.0008006692485392037, "loss": 3.7145, "step": 23470 }, { "epoch": 1.5949857317570322, "grad_norm": 0.9080557227134705, "learning_rate": 0.000800626783530371, "loss": 3.2919, "step": 23475 }, { "epoch": 1.595325451827694, "grad_norm": 0.8128993511199951, "learning_rate": 0.0008005843185215383, "loss": 3.6347, "step": 23480 }, { "epoch": 1.5956651718983559, "grad_norm": 1.1896580457687378, "learning_rate": 0.0008005418535127056, "loss": 3.5171, "step": 23485 }, { "epoch": 1.5960048919690175, "grad_norm": 0.8397215008735657, "learning_rate": 0.0008004993885038728, "loss": 3.4268, "step": 23490 }, { "epoch": 1.5963446120396791, "grad_norm": 0.8102633357048035, "learning_rate": 0.00080045692349504, "loss": 3.5101, "step": 23495 }, { "epoch": 1.5966843321103412, "grad_norm": 1.5935547351837158, "learning_rate": 0.0008004144584862074, "loss": 3.7055, "step": 23500 }, { "epoch": 1.5970240521810029, "grad_norm": 1.0748968124389648, "learning_rate": 0.0008003719934773746, "loss": 3.3178, "step": 23505 }, { "epoch": 1.5973637722516645, "grad_norm": 51.05748748779297, "learning_rate": 0.0008003295284685419, "loss": 3.6339, "step": 23510 }, { "epoch": 1.5977034923223266, "grad_norm": 1.2285377979278564, "learning_rate": 0.0008002870634597093, "loss": 3.7413, "step": 23515 }, { "epoch": 1.5980432123929882, "grad_norm": 0.9696277379989624, "learning_rate": 0.0008002445984508765, "loss": 3.6362, "step": 23520 }, { "epoch": 1.5983829324636498, "grad_norm": 1.0059013366699219, "learning_rate": 0.0008002021334420437, "loss": 3.7718, "step": 23525 }, { "epoch": 1.5987226525343117, "grad_norm": 1.3645857572555542, "learning_rate": 0.0008001596684332111, "loss": 3.4715, "step": 23530 }, { "epoch": 1.5990623726049735, "grad_norm": 0.8606336116790771, "learning_rate": 0.0008001172034243783, "loss": 3.3936, "step": 23535 }, { "epoch": 1.5994020926756352, "grad_norm": 1.0308597087860107, "learning_rate": 0.0008000747384155455, "loss": 3.8029, "step": 23540 }, { "epoch": 1.599741812746297, "grad_norm": 0.9579052925109863, "learning_rate": 0.000800032273406713, "loss": 3.4656, "step": 23545 }, { "epoch": 1.6000815328169589, "grad_norm": 0.8312612771987915, "learning_rate": 0.0007999898083978802, "loss": 3.8852, "step": 23550 }, { "epoch": 1.6004212528876205, "grad_norm": 0.7538661956787109, "learning_rate": 0.0007999473433890474, "loss": 3.641, "step": 23555 }, { "epoch": 1.6007609729582823, "grad_norm": 1.1336020231246948, "learning_rate": 0.0007999048783802147, "loss": 3.6772, "step": 23560 }, { "epoch": 1.6011006930289442, "grad_norm": 1.305827260017395, "learning_rate": 0.000799862413371382, "loss": 3.5811, "step": 23565 }, { "epoch": 1.6014404130996058, "grad_norm": 0.8289303183555603, "learning_rate": 0.0007998199483625492, "loss": 3.7448, "step": 23570 }, { "epoch": 1.6017801331702677, "grad_norm": 1.020234227180481, "learning_rate": 0.0007997774833537165, "loss": 3.6153, "step": 23575 }, { "epoch": 1.6021198532409295, "grad_norm": 1.488905906677246, "learning_rate": 0.0007997350183448839, "loss": 3.4622, "step": 23580 }, { "epoch": 1.6024595733115912, "grad_norm": 0.9271208643913269, "learning_rate": 0.0007996925533360511, "loss": 3.9867, "step": 23585 }, { "epoch": 1.602799293382253, "grad_norm": 0.940216064453125, "learning_rate": 0.0007996500883272184, "loss": 3.7772, "step": 23590 }, { "epoch": 1.6031390134529149, "grad_norm": 0.8570595383644104, "learning_rate": 0.0007996076233183857, "loss": 3.57, "step": 23595 }, { "epoch": 1.6034787335235765, "grad_norm": 0.9865108132362366, "learning_rate": 0.0007995651583095529, "loss": 3.8086, "step": 23600 }, { "epoch": 1.6038184535942384, "grad_norm": 0.835692822933197, "learning_rate": 0.0007995226933007202, "loss": 3.7469, "step": 23605 }, { "epoch": 1.6041581736649002, "grad_norm": 0.7875521183013916, "learning_rate": 0.0007994802282918874, "loss": 3.6248, "step": 23610 }, { "epoch": 1.6044978937355618, "grad_norm": 1.1715832948684692, "learning_rate": 0.0007994377632830548, "loss": 3.2905, "step": 23615 }, { "epoch": 1.6048376138062237, "grad_norm": 1.0136688947677612, "learning_rate": 0.0007993952982742221, "loss": 3.5795, "step": 23620 }, { "epoch": 1.6051773338768855, "grad_norm": 0.7369644045829773, "learning_rate": 0.0007993528332653893, "loss": 3.7182, "step": 23625 }, { "epoch": 1.6055170539475472, "grad_norm": 0.7304967641830444, "learning_rate": 0.0007993103682565566, "loss": 3.6482, "step": 23630 }, { "epoch": 1.605856774018209, "grad_norm": 0.883631706237793, "learning_rate": 0.0007992679032477239, "loss": 3.4561, "step": 23635 }, { "epoch": 1.6061964940888709, "grad_norm": 0.8991934657096863, "learning_rate": 0.0007992254382388911, "loss": 3.7719, "step": 23640 }, { "epoch": 1.6065362141595325, "grad_norm": 0.6554173231124878, "learning_rate": 0.0007991829732300583, "loss": 3.4212, "step": 23645 }, { "epoch": 1.6068759342301944, "grad_norm": 1.0237566232681274, "learning_rate": 0.0007991405082212258, "loss": 3.6227, "step": 23650 }, { "epoch": 1.6072156543008562, "grad_norm": 0.8674492239952087, "learning_rate": 0.000799098043212393, "loss": 3.6712, "step": 23655 }, { "epoch": 1.6075553743715179, "grad_norm": 0.7446072697639465, "learning_rate": 0.0007990555782035602, "loss": 3.7225, "step": 23660 }, { "epoch": 1.6078950944421795, "grad_norm": 0.732405960559845, "learning_rate": 0.0007990131131947276, "loss": 3.4367, "step": 23665 }, { "epoch": 1.6082348145128416, "grad_norm": 0.8097562789916992, "learning_rate": 0.0007989706481858948, "loss": 3.4835, "step": 23670 }, { "epoch": 1.6085745345835032, "grad_norm": 0.9072396755218506, "learning_rate": 0.000798928183177062, "loss": 3.4175, "step": 23675 }, { "epoch": 1.6089142546541648, "grad_norm": 1.0019091367721558, "learning_rate": 0.0007988857181682295, "loss": 3.5773, "step": 23680 }, { "epoch": 1.609253974724827, "grad_norm": 0.8127030730247498, "learning_rate": 0.0007988432531593967, "loss": 3.6151, "step": 23685 }, { "epoch": 1.6095936947954885, "grad_norm": 0.6904447674751282, "learning_rate": 0.000798800788150564, "loss": 3.6993, "step": 23690 }, { "epoch": 1.6099334148661502, "grad_norm": 0.8293530344963074, "learning_rate": 0.0007987583231417313, "loss": 3.2441, "step": 23695 }, { "epoch": 1.610273134936812, "grad_norm": 0.8480821847915649, "learning_rate": 0.0007987158581328985, "loss": 3.6264, "step": 23700 }, { "epoch": 1.6106128550074739, "grad_norm": 0.5983982682228088, "learning_rate": 0.0007986733931240658, "loss": 3.5845, "step": 23705 }, { "epoch": 1.6109525750781355, "grad_norm": 0.757638156414032, "learning_rate": 0.000798630928115233, "loss": 3.6406, "step": 23710 }, { "epoch": 1.6112922951487973, "grad_norm": 0.7428051829338074, "learning_rate": 0.0007985884631064004, "loss": 3.6964, "step": 23715 }, { "epoch": 1.6116320152194592, "grad_norm": 0.8134360909461975, "learning_rate": 0.0007985459980975677, "loss": 3.8953, "step": 23720 }, { "epoch": 1.6119717352901208, "grad_norm": 0.7559777498245239, "learning_rate": 0.0007985035330887349, "loss": 3.5425, "step": 23725 }, { "epoch": 1.6123114553607827, "grad_norm": 0.9046006202697754, "learning_rate": 0.0007984610680799022, "loss": 3.8438, "step": 23730 }, { "epoch": 1.6126511754314445, "grad_norm": 0.9858770966529846, "learning_rate": 0.0007984186030710695, "loss": 3.6159, "step": 23735 }, { "epoch": 1.6129908955021062, "grad_norm": 0.7351667881011963, "learning_rate": 0.0007983761380622367, "loss": 3.3538, "step": 23740 }, { "epoch": 1.613330615572768, "grad_norm": 0.8889757990837097, "learning_rate": 0.000798333673053404, "loss": 3.6714, "step": 23745 }, { "epoch": 1.6136703356434299, "grad_norm": 0.8211771249771118, "learning_rate": 0.0007982912080445714, "loss": 3.7144, "step": 23750 }, { "epoch": 1.6140100557140915, "grad_norm": 0.7915773987770081, "learning_rate": 0.0007982487430357386, "loss": 3.78, "step": 23755 }, { "epoch": 1.6143497757847534, "grad_norm": 0.7084634304046631, "learning_rate": 0.0007982062780269058, "loss": 3.6234, "step": 23760 }, { "epoch": 1.6146894958554152, "grad_norm": 0.8499560356140137, "learning_rate": 0.0007981638130180732, "loss": 3.5703, "step": 23765 }, { "epoch": 1.6150292159260768, "grad_norm": 0.8613821268081665, "learning_rate": 0.0007981213480092404, "loss": 3.314, "step": 23770 }, { "epoch": 1.6153689359967387, "grad_norm": 0.759160578250885, "learning_rate": 0.0007980788830004076, "loss": 3.6486, "step": 23775 }, { "epoch": 1.6157086560674006, "grad_norm": 0.8055202960968018, "learning_rate": 0.000798036417991575, "loss": 3.6462, "step": 23780 }, { "epoch": 1.6160483761380622, "grad_norm": 0.7821147441864014, "learning_rate": 0.0007979939529827423, "loss": 3.4886, "step": 23785 }, { "epoch": 1.616388096208724, "grad_norm": 0.8772496581077576, "learning_rate": 0.0007979514879739095, "loss": 3.5972, "step": 23790 }, { "epoch": 1.6167278162793859, "grad_norm": 1.0764036178588867, "learning_rate": 0.0007979090229650769, "loss": 3.3691, "step": 23795 }, { "epoch": 1.6170675363500475, "grad_norm": 0.9633224606513977, "learning_rate": 0.0007978665579562441, "loss": 3.6717, "step": 23800 }, { "epoch": 1.6174072564207094, "grad_norm": 0.7704792618751526, "learning_rate": 0.0007978240929474113, "loss": 3.4692, "step": 23805 }, { "epoch": 1.6177469764913712, "grad_norm": 1.0446765422821045, "learning_rate": 0.0007977816279385786, "loss": 3.5989, "step": 23810 }, { "epoch": 1.6180866965620329, "grad_norm": 0.7427679300308228, "learning_rate": 0.0007977391629297459, "loss": 3.8131, "step": 23815 }, { "epoch": 1.6184264166326947, "grad_norm": 0.7944449782371521, "learning_rate": 0.0007976966979209132, "loss": 3.4794, "step": 23820 }, { "epoch": 1.6187661367033566, "grad_norm": 0.8769071102142334, "learning_rate": 0.0007976542329120805, "loss": 3.646, "step": 23825 }, { "epoch": 1.6191058567740182, "grad_norm": 0.9226222038269043, "learning_rate": 0.0007976117679032478, "loss": 3.6245, "step": 23830 }, { "epoch": 1.6194455768446798, "grad_norm": 0.7831975817680359, "learning_rate": 0.000797569302894415, "loss": 3.5635, "step": 23835 }, { "epoch": 1.619785296915342, "grad_norm": 1.2923822402954102, "learning_rate": 0.0007975268378855823, "loss": 3.4353, "step": 23840 }, { "epoch": 1.6201250169860035, "grad_norm": 1.9435033798217773, "learning_rate": 0.0007974843728767495, "loss": 3.3165, "step": 23845 }, { "epoch": 1.6204647370566652, "grad_norm": 0.7486109733581543, "learning_rate": 0.0007974419078679168, "loss": 3.7958, "step": 23850 }, { "epoch": 1.6208044571273272, "grad_norm": 0.905042290687561, "learning_rate": 0.0007973994428590842, "loss": 3.7929, "step": 23855 }, { "epoch": 1.6211441771979889, "grad_norm": 0.8462935090065002, "learning_rate": 0.0007973569778502514, "loss": 3.4852, "step": 23860 }, { "epoch": 1.6214838972686505, "grad_norm": 0.7317774295806885, "learning_rate": 0.0007973145128414187, "loss": 3.4457, "step": 23865 }, { "epoch": 1.6218236173393124, "grad_norm": 0.7766490578651428, "learning_rate": 0.000797272047832586, "loss": 3.4813, "step": 23870 }, { "epoch": 1.6221633374099742, "grad_norm": 0.9738821387290955, "learning_rate": 0.0007972295828237532, "loss": 3.5736, "step": 23875 }, { "epoch": 1.6225030574806358, "grad_norm": 0.8721387386322021, "learning_rate": 0.0007971871178149205, "loss": 3.7161, "step": 23880 }, { "epoch": 1.6228427775512977, "grad_norm": 0.8313774466514587, "learning_rate": 0.0007971446528060878, "loss": 3.6662, "step": 23885 }, { "epoch": 1.6231824976219595, "grad_norm": 1.0236966609954834, "learning_rate": 0.0007971021877972551, "loss": 3.7131, "step": 23890 }, { "epoch": 1.6235222176926212, "grad_norm": 1.1392419338226318, "learning_rate": 0.0007970597227884223, "loss": 3.3905, "step": 23895 }, { "epoch": 1.623861937763283, "grad_norm": 0.7545613646507263, "learning_rate": 0.0007970172577795897, "loss": 3.6117, "step": 23900 }, { "epoch": 1.6242016578339449, "grad_norm": 0.7725341320037842, "learning_rate": 0.0007969747927707569, "loss": 3.6647, "step": 23905 }, { "epoch": 1.6245413779046065, "grad_norm": 0.8368087410926819, "learning_rate": 0.0007969323277619241, "loss": 3.58, "step": 23910 }, { "epoch": 1.6248810979752684, "grad_norm": 1.0467458963394165, "learning_rate": 0.0007968898627530915, "loss": 3.6169, "step": 23915 }, { "epoch": 1.6252208180459302, "grad_norm": 0.7514470219612122, "learning_rate": 0.0007968473977442587, "loss": 3.5628, "step": 23920 }, { "epoch": 1.6255605381165918, "grad_norm": 0.7083759307861328, "learning_rate": 0.000796804932735426, "loss": 3.662, "step": 23925 }, { "epoch": 1.6259002581872537, "grad_norm": 0.7236567735671997, "learning_rate": 0.0007967624677265934, "loss": 3.5875, "step": 23930 }, { "epoch": 1.6262399782579156, "grad_norm": 0.9053975939750671, "learning_rate": 0.0007967200027177606, "loss": 3.4448, "step": 23935 }, { "epoch": 1.6265796983285772, "grad_norm": 0.6816624402999878, "learning_rate": 0.0007966775377089278, "loss": 3.5639, "step": 23940 }, { "epoch": 1.626919418399239, "grad_norm": 1.0644323825836182, "learning_rate": 0.0007966350727000951, "loss": 3.6094, "step": 23945 }, { "epoch": 1.627259138469901, "grad_norm": 0.911760687828064, "learning_rate": 0.0007965926076912624, "loss": 3.3631, "step": 23950 }, { "epoch": 1.6275988585405625, "grad_norm": 1.0427849292755127, "learning_rate": 0.0007965501426824296, "loss": 3.6936, "step": 23955 }, { "epoch": 1.6279385786112244, "grad_norm": 0.842400848865509, "learning_rate": 0.000796507677673597, "loss": 3.913, "step": 23960 }, { "epoch": 1.6282782986818862, "grad_norm": 1.030480146408081, "learning_rate": 0.0007964652126647643, "loss": 3.618, "step": 23965 }, { "epoch": 1.6286180187525479, "grad_norm": 0.7657821774482727, "learning_rate": 0.0007964227476559315, "loss": 3.7221, "step": 23970 }, { "epoch": 1.6289577388232097, "grad_norm": 0.8917818665504456, "learning_rate": 0.0007963802826470988, "loss": 3.586, "step": 23975 }, { "epoch": 1.6292974588938716, "grad_norm": 0.8667722344398499, "learning_rate": 0.0007963378176382661, "loss": 3.5886, "step": 23980 }, { "epoch": 1.6296371789645332, "grad_norm": 0.9347371459007263, "learning_rate": 0.0007962953526294333, "loss": 3.895, "step": 23985 }, { "epoch": 1.629976899035195, "grad_norm": 1.1037589311599731, "learning_rate": 0.0007962528876206006, "loss": 3.7443, "step": 23990 }, { "epoch": 1.630316619105857, "grad_norm": 0.8355764746665955, "learning_rate": 0.000796210422611768, "loss": 3.6586, "step": 23995 }, { "epoch": 1.6306563391765185, "grad_norm": 1.8527227640151978, "learning_rate": 0.0007961679576029352, "loss": 3.6009, "step": 24000 }, { "epoch": 1.6309960592471802, "grad_norm": 1.0354634523391724, "learning_rate": 0.0007961254925941025, "loss": 3.4729, "step": 24005 }, { "epoch": 1.6313357793178422, "grad_norm": 0.9577877521514893, "learning_rate": 0.0007960830275852697, "loss": 3.4487, "step": 24010 }, { "epoch": 1.6316754993885039, "grad_norm": 0.8647545576095581, "learning_rate": 0.000796040562576437, "loss": 3.4622, "step": 24015 }, { "epoch": 1.6320152194591655, "grad_norm": 0.9539531469345093, "learning_rate": 0.0007959980975676043, "loss": 3.7307, "step": 24020 }, { "epoch": 1.6323549395298276, "grad_norm": 0.9936141967773438, "learning_rate": 0.0007959556325587715, "loss": 3.6429, "step": 24025 }, { "epoch": 1.6326946596004892, "grad_norm": 0.7562452554702759, "learning_rate": 0.000795913167549939, "loss": 3.7228, "step": 24030 }, { "epoch": 1.6330343796711508, "grad_norm": 0.6935852766036987, "learning_rate": 0.0007958707025411062, "loss": 3.6721, "step": 24035 }, { "epoch": 1.633374099741813, "grad_norm": 0.8108484745025635, "learning_rate": 0.0007958282375322734, "loss": 3.5455, "step": 24040 }, { "epoch": 1.6337138198124745, "grad_norm": 0.8274190425872803, "learning_rate": 0.0007957857725234408, "loss": 3.5713, "step": 24045 }, { "epoch": 1.6340535398831362, "grad_norm": 1.0683006048202515, "learning_rate": 0.000795743307514608, "loss": 3.4565, "step": 24050 }, { "epoch": 1.634393259953798, "grad_norm": 0.9223059415817261, "learning_rate": 0.0007957008425057752, "loss": 3.2271, "step": 24055 }, { "epoch": 1.6347329800244599, "grad_norm": 1.1195131540298462, "learning_rate": 0.0007956583774969425, "loss": 3.6638, "step": 24060 }, { "epoch": 1.6350727000951215, "grad_norm": 0.7707493901252747, "learning_rate": 0.0007956159124881099, "loss": 3.8254, "step": 24065 }, { "epoch": 1.6354124201657834, "grad_norm": 1.104349970817566, "learning_rate": 0.0007955734474792771, "loss": 3.7439, "step": 24070 }, { "epoch": 1.6357521402364452, "grad_norm": 0.6951128244400024, "learning_rate": 0.0007955309824704444, "loss": 3.6323, "step": 24075 }, { "epoch": 1.6360918603071068, "grad_norm": 0.9689163565635681, "learning_rate": 0.0007954885174616117, "loss": 3.8728, "step": 24080 }, { "epoch": 1.6364315803777687, "grad_norm": 0.917448103427887, "learning_rate": 0.0007954460524527789, "loss": 4.08, "step": 24085 }, { "epoch": 1.6367713004484306, "grad_norm": 2.319807767868042, "learning_rate": 0.0007954035874439462, "loss": 3.6446, "step": 24090 }, { "epoch": 1.6371110205190922, "grad_norm": 0.9266855120658875, "learning_rate": 0.0007953611224351134, "loss": 3.5803, "step": 24095 }, { "epoch": 1.637450740589754, "grad_norm": 0.9528455138206482, "learning_rate": 0.0007953186574262808, "loss": 3.6649, "step": 24100 }, { "epoch": 1.637790460660416, "grad_norm": 0.8594325184822083, "learning_rate": 0.0007952761924174481, "loss": 3.5385, "step": 24105 }, { "epoch": 1.6381301807310775, "grad_norm": 0.935585081577301, "learning_rate": 0.0007952337274086153, "loss": 3.4835, "step": 24110 }, { "epoch": 1.6384699008017394, "grad_norm": 0.9347378015518188, "learning_rate": 0.0007951912623997826, "loss": 3.5169, "step": 24115 }, { "epoch": 1.6388096208724012, "grad_norm": 0.9570009708404541, "learning_rate": 0.0007951487973909499, "loss": 3.5181, "step": 24120 }, { "epoch": 1.6391493409430629, "grad_norm": 0.8882436752319336, "learning_rate": 0.0007951063323821171, "loss": 3.5786, "step": 24125 }, { "epoch": 1.6394890610137247, "grad_norm": 0.9020746946334839, "learning_rate": 0.0007950638673732843, "loss": 3.653, "step": 24130 }, { "epoch": 1.6398287810843866, "grad_norm": 0.6291216015815735, "learning_rate": 0.0007950214023644518, "loss": 3.5078, "step": 24135 }, { "epoch": 1.6401685011550482, "grad_norm": 0.9524011015892029, "learning_rate": 0.000794978937355619, "loss": 3.7782, "step": 24140 }, { "epoch": 1.64050822122571, "grad_norm": 1.1547197103500366, "learning_rate": 0.0007949364723467862, "loss": 3.4975, "step": 24145 }, { "epoch": 1.640847941296372, "grad_norm": 0.9637807607650757, "learning_rate": 0.0007948940073379536, "loss": 3.7987, "step": 24150 }, { "epoch": 1.6411876613670335, "grad_norm": 0.928939700126648, "learning_rate": 0.0007948515423291208, "loss": 3.4597, "step": 24155 }, { "epoch": 1.6415273814376954, "grad_norm": 0.9121633768081665, "learning_rate": 0.000794809077320288, "loss": 3.1828, "step": 24160 }, { "epoch": 1.6418671015083572, "grad_norm": 2.2096896171569824, "learning_rate": 0.0007947666123114554, "loss": 3.6896, "step": 24165 }, { "epoch": 1.6422068215790189, "grad_norm": 0.8442529439926147, "learning_rate": 0.0007947241473026227, "loss": 3.2535, "step": 24170 }, { "epoch": 1.6425465416496805, "grad_norm": 0.8707417845726013, "learning_rate": 0.0007946816822937899, "loss": 3.5612, "step": 24175 }, { "epoch": 1.6428862617203426, "grad_norm": 0.8863127827644348, "learning_rate": 0.0007946392172849573, "loss": 3.5251, "step": 24180 }, { "epoch": 1.6432259817910042, "grad_norm": 1.0329296588897705, "learning_rate": 0.0007945967522761245, "loss": 3.7008, "step": 24185 }, { "epoch": 1.6435657018616658, "grad_norm": 0.9131726622581482, "learning_rate": 0.0007945542872672917, "loss": 3.2332, "step": 24190 }, { "epoch": 1.643905421932328, "grad_norm": 0.8410009741783142, "learning_rate": 0.000794511822258459, "loss": 3.5078, "step": 24195 }, { "epoch": 1.6442451420029895, "grad_norm": 1.0598950386047363, "learning_rate": 0.0007944693572496263, "loss": 3.8795, "step": 24200 }, { "epoch": 1.6445848620736512, "grad_norm": 0.836259126663208, "learning_rate": 0.0007944268922407936, "loss": 3.7727, "step": 24205 }, { "epoch": 1.6449245821443133, "grad_norm": 0.9779436588287354, "learning_rate": 0.0007943844272319609, "loss": 3.4638, "step": 24210 }, { "epoch": 1.6452643022149749, "grad_norm": 0.9968096613883972, "learning_rate": 0.0007943419622231282, "loss": 3.575, "step": 24215 }, { "epoch": 1.6456040222856365, "grad_norm": 0.8281976580619812, "learning_rate": 0.0007942994972142954, "loss": 3.5921, "step": 24220 }, { "epoch": 1.6459437423562984, "grad_norm": 1.3908189535140991, "learning_rate": 0.0007942570322054627, "loss": 3.5911, "step": 24225 }, { "epoch": 1.6462834624269602, "grad_norm": 1.1226708889007568, "learning_rate": 0.00079421456719663, "loss": 3.6616, "step": 24230 }, { "epoch": 1.6466231824976219, "grad_norm": 0.9824207425117493, "learning_rate": 0.0007941721021877972, "loss": 3.501, "step": 24235 }, { "epoch": 1.6469629025682837, "grad_norm": 1.5835187435150146, "learning_rate": 0.0007941296371789646, "loss": 3.7243, "step": 24240 }, { "epoch": 1.6473026226389456, "grad_norm": 0.8599257469177246, "learning_rate": 0.0007940871721701318, "loss": 3.4914, "step": 24245 }, { "epoch": 1.6476423427096072, "grad_norm": 0.685516893863678, "learning_rate": 0.0007940447071612991, "loss": 3.7324, "step": 24250 }, { "epoch": 1.647982062780269, "grad_norm": 1.0108672380447388, "learning_rate": 0.0007940022421524664, "loss": 3.4902, "step": 24255 }, { "epoch": 1.648321782850931, "grad_norm": 0.907278835773468, "learning_rate": 0.0007939597771436336, "loss": 3.6695, "step": 24260 }, { "epoch": 1.6486615029215925, "grad_norm": 0.8084161281585693, "learning_rate": 0.0007939173121348009, "loss": 3.6995, "step": 24265 }, { "epoch": 1.6490012229922544, "grad_norm": 1.344904899597168, "learning_rate": 0.0007938748471259683, "loss": 3.5237, "step": 24270 }, { "epoch": 1.6493409430629162, "grad_norm": 0.7513027191162109, "learning_rate": 0.0007938323821171355, "loss": 3.5218, "step": 24275 }, { "epoch": 1.6496806631335779, "grad_norm": 0.9992022514343262, "learning_rate": 0.0007937899171083028, "loss": 3.8445, "step": 24280 }, { "epoch": 1.6500203832042397, "grad_norm": 0.788295328617096, "learning_rate": 0.0007937474520994701, "loss": 3.4659, "step": 24285 }, { "epoch": 1.6503601032749016, "grad_norm": 1.026893138885498, "learning_rate": 0.0007937049870906373, "loss": 3.482, "step": 24290 }, { "epoch": 1.6506998233455632, "grad_norm": 0.9260799884796143, "learning_rate": 0.0007936625220818045, "loss": 3.8298, "step": 24295 }, { "epoch": 1.651039543416225, "grad_norm": 0.8436681032180786, "learning_rate": 0.0007936200570729719, "loss": 3.4303, "step": 24300 }, { "epoch": 1.651379263486887, "grad_norm": 0.7937162518501282, "learning_rate": 0.0007935775920641392, "loss": 3.5674, "step": 24305 }, { "epoch": 1.6517189835575485, "grad_norm": 1.0211962461471558, "learning_rate": 0.0007935351270553064, "loss": 3.6888, "step": 24310 }, { "epoch": 1.6520587036282104, "grad_norm": 0.7790192365646362, "learning_rate": 0.0007934926620464738, "loss": 3.6848, "step": 24315 }, { "epoch": 1.6523984236988722, "grad_norm": 1.0438449382781982, "learning_rate": 0.000793450197037641, "loss": 3.4933, "step": 24320 }, { "epoch": 1.6527381437695339, "grad_norm": 1.036513328552246, "learning_rate": 0.0007934077320288082, "loss": 3.5504, "step": 24325 }, { "epoch": 1.6530778638401957, "grad_norm": 0.8681591749191284, "learning_rate": 0.0007933652670199756, "loss": 3.4734, "step": 24330 }, { "epoch": 1.6534175839108576, "grad_norm": 1.102919101715088, "learning_rate": 0.0007933228020111428, "loss": 3.5633, "step": 24335 }, { "epoch": 1.6537573039815192, "grad_norm": 0.9221736788749695, "learning_rate": 0.0007932803370023101, "loss": 3.4217, "step": 24340 }, { "epoch": 1.6540970240521808, "grad_norm": 0.8063276410102844, "learning_rate": 0.0007932378719934774, "loss": 3.5708, "step": 24345 }, { "epoch": 1.654436744122843, "grad_norm": 1.0694186687469482, "learning_rate": 0.0007931954069846447, "loss": 3.6861, "step": 24350 }, { "epoch": 1.6547764641935045, "grad_norm": 1.0067116022109985, "learning_rate": 0.0007931529419758119, "loss": 3.5822, "step": 24355 }, { "epoch": 1.6551161842641662, "grad_norm": 0.9750124216079712, "learning_rate": 0.0007931104769669792, "loss": 3.7232, "step": 24360 }, { "epoch": 1.6554559043348283, "grad_norm": 0.8720276355743408, "learning_rate": 0.0007930680119581465, "loss": 3.5734, "step": 24365 }, { "epoch": 1.6557956244054899, "grad_norm": 0.8657500147819519, "learning_rate": 0.0007930255469493138, "loss": 3.6362, "step": 24370 }, { "epoch": 1.6561353444761515, "grad_norm": 0.8843256831169128, "learning_rate": 0.0007929830819404811, "loss": 3.5553, "step": 24375 }, { "epoch": 1.6564750645468136, "grad_norm": 0.7166230082511902, "learning_rate": 0.0007929406169316484, "loss": 3.4389, "step": 24380 }, { "epoch": 1.6568147846174752, "grad_norm": 0.9719016551971436, "learning_rate": 0.0007928981519228157, "loss": 3.836, "step": 24385 }, { "epoch": 1.6571545046881369, "grad_norm": 0.9254566431045532, "learning_rate": 0.0007928556869139829, "loss": 3.5425, "step": 24390 }, { "epoch": 1.6574942247587987, "grad_norm": 0.8964767456054688, "learning_rate": 0.0007928132219051501, "loss": 3.7351, "step": 24395 }, { "epoch": 1.6578339448294606, "grad_norm": 0.7842639684677124, "learning_rate": 0.0007927707568963175, "loss": 3.7234, "step": 24400 }, { "epoch": 1.6581736649001222, "grad_norm": 1.0432732105255127, "learning_rate": 0.0007927282918874847, "loss": 3.5309, "step": 24405 }, { "epoch": 1.658513384970784, "grad_norm": 0.8915448188781738, "learning_rate": 0.000792685826878652, "loss": 3.6623, "step": 24410 }, { "epoch": 1.658853105041446, "grad_norm": 1.486798882484436, "learning_rate": 0.0007926433618698194, "loss": 3.582, "step": 24415 }, { "epoch": 1.6591928251121075, "grad_norm": 0.8449773788452148, "learning_rate": 0.0007926008968609866, "loss": 3.6036, "step": 24420 }, { "epoch": 1.6595325451827694, "grad_norm": 0.9151912331581116, "learning_rate": 0.0007925584318521538, "loss": 3.6627, "step": 24425 }, { "epoch": 1.6598722652534312, "grad_norm": 0.8255609273910522, "learning_rate": 0.0007925159668433212, "loss": 3.7531, "step": 24430 }, { "epoch": 1.6602119853240929, "grad_norm": 0.8707639575004578, "learning_rate": 0.0007924735018344884, "loss": 3.6422, "step": 24435 }, { "epoch": 1.6605517053947547, "grad_norm": 1.099408745765686, "learning_rate": 0.0007924310368256556, "loss": 3.5605, "step": 24440 }, { "epoch": 1.6608914254654166, "grad_norm": 1.8394436836242676, "learning_rate": 0.000792388571816823, "loss": 3.5707, "step": 24445 }, { "epoch": 1.6612311455360782, "grad_norm": 0.7939387559890747, "learning_rate": 0.0007923461068079903, "loss": 3.7819, "step": 24450 }, { "epoch": 1.66157086560674, "grad_norm": 1.0110630989074707, "learning_rate": 0.0007923036417991575, "loss": 3.5795, "step": 24455 }, { "epoch": 1.661910585677402, "grad_norm": 0.8170263767242432, "learning_rate": 0.0007922611767903248, "loss": 3.77, "step": 24460 }, { "epoch": 1.6622503057480635, "grad_norm": 1.02459716796875, "learning_rate": 0.0007922187117814921, "loss": 3.609, "step": 24465 }, { "epoch": 1.6625900258187254, "grad_norm": 1.1937389373779297, "learning_rate": 0.0007921762467726593, "loss": 3.8032, "step": 24470 }, { "epoch": 1.6629297458893872, "grad_norm": 0.9913656115531921, "learning_rate": 0.0007921337817638266, "loss": 3.7429, "step": 24475 }, { "epoch": 1.6632694659600489, "grad_norm": 0.7464697957038879, "learning_rate": 0.000792091316754994, "loss": 3.8337, "step": 24480 }, { "epoch": 1.6636091860307107, "grad_norm": 1.0407745838165283, "learning_rate": 0.0007920488517461612, "loss": 3.5077, "step": 24485 }, { "epoch": 1.6639489061013726, "grad_norm": 0.9377779364585876, "learning_rate": 0.0007920063867373285, "loss": 3.5667, "step": 24490 }, { "epoch": 1.6642886261720342, "grad_norm": 1.397944450378418, "learning_rate": 0.0007919639217284957, "loss": 3.5412, "step": 24495 }, { "epoch": 1.664628346242696, "grad_norm": 0.805711567401886, "learning_rate": 0.000791921456719663, "loss": 3.84, "step": 24500 }, { "epoch": 1.664968066313358, "grad_norm": 0.9098036885261536, "learning_rate": 0.0007918789917108303, "loss": 3.5562, "step": 24505 }, { "epoch": 1.6653077863840196, "grad_norm": 0.705431342124939, "learning_rate": 0.0007918365267019975, "loss": 3.5241, "step": 24510 }, { "epoch": 1.6656475064546812, "grad_norm": 0.7368111610412598, "learning_rate": 0.0007917940616931649, "loss": 3.447, "step": 24515 }, { "epoch": 1.6659872265253433, "grad_norm": 0.948047935962677, "learning_rate": 0.0007917515966843322, "loss": 3.4755, "step": 24520 }, { "epoch": 1.6663269465960049, "grad_norm": 0.659034252166748, "learning_rate": 0.0007917091316754994, "loss": 3.764, "step": 24525 }, { "epoch": 1.6666666666666665, "grad_norm": 0.8233663439750671, "learning_rate": 0.0007916666666666666, "loss": 3.7284, "step": 24530 }, { "epoch": 1.6670063867373286, "grad_norm": 0.8553111553192139, "learning_rate": 0.000791624201657834, "loss": 3.7634, "step": 24535 }, { "epoch": 1.6673461068079902, "grad_norm": 0.8617802858352661, "learning_rate": 0.0007915817366490012, "loss": 3.6164, "step": 24540 }, { "epoch": 1.6676858268786519, "grad_norm": 1.92367684841156, "learning_rate": 0.0007915392716401684, "loss": 3.65, "step": 24545 }, { "epoch": 1.668025546949314, "grad_norm": 1.0957508087158203, "learning_rate": 0.0007914968066313359, "loss": 3.5995, "step": 24550 }, { "epoch": 1.6683652670199756, "grad_norm": 0.8375377058982849, "learning_rate": 0.0007914543416225031, "loss": 3.84, "step": 24555 }, { "epoch": 1.6687049870906372, "grad_norm": 0.7838262915611267, "learning_rate": 0.0007914118766136703, "loss": 3.5936, "step": 24560 }, { "epoch": 1.669044707161299, "grad_norm": 0.6568529009819031, "learning_rate": 0.0007913694116048377, "loss": 3.7518, "step": 24565 }, { "epoch": 1.669384427231961, "grad_norm": 0.9234344363212585, "learning_rate": 0.0007913269465960049, "loss": 3.8272, "step": 24570 }, { "epoch": 1.6697241473026225, "grad_norm": 0.8516789674758911, "learning_rate": 0.0007912844815871721, "loss": 3.5669, "step": 24575 }, { "epoch": 1.6700638673732844, "grad_norm": 1.0142862796783447, "learning_rate": 0.0007912420165783394, "loss": 3.6851, "step": 24580 }, { "epoch": 1.6704035874439462, "grad_norm": 0.9487866759300232, "learning_rate": 0.0007911995515695068, "loss": 3.6546, "step": 24585 }, { "epoch": 1.6707433075146079, "grad_norm": 0.7897478938102722, "learning_rate": 0.000791157086560674, "loss": 3.4394, "step": 24590 }, { "epoch": 1.6710830275852697, "grad_norm": 1.2754398584365845, "learning_rate": 0.0007911146215518413, "loss": 3.6394, "step": 24595 }, { "epoch": 1.6714227476559316, "grad_norm": 0.8909595608711243, "learning_rate": 0.0007910721565430086, "loss": 3.4293, "step": 24600 }, { "epoch": 1.6717624677265932, "grad_norm": 1.1150822639465332, "learning_rate": 0.0007910296915341758, "loss": 3.608, "step": 24605 }, { "epoch": 1.672102187797255, "grad_norm": 0.9409980773925781, "learning_rate": 0.0007909872265253431, "loss": 3.7637, "step": 24610 }, { "epoch": 1.672441907867917, "grad_norm": 1.0394517183303833, "learning_rate": 0.0007909447615165104, "loss": 3.7946, "step": 24615 }, { "epoch": 1.6727816279385785, "grad_norm": 0.9402737617492676, "learning_rate": 0.0007909022965076777, "loss": 3.4, "step": 24620 }, { "epoch": 1.6731213480092404, "grad_norm": 0.8615217208862305, "learning_rate": 0.000790859831498845, "loss": 3.6784, "step": 24625 }, { "epoch": 1.6734610680799022, "grad_norm": 0.9433040022850037, "learning_rate": 0.0007908173664900122, "loss": 3.7704, "step": 24630 }, { "epoch": 1.6738007881505639, "grad_norm": 0.7118695974349976, "learning_rate": 0.0007907749014811795, "loss": 3.6867, "step": 24635 }, { "epoch": 1.6741405082212257, "grad_norm": 0.7933318614959717, "learning_rate": 0.0007907324364723468, "loss": 3.5965, "step": 24640 }, { "epoch": 1.6744802282918876, "grad_norm": 0.7382358908653259, "learning_rate": 0.000790689971463514, "loss": 3.6258, "step": 24645 }, { "epoch": 1.6748199483625492, "grad_norm": 0.7156182527542114, "learning_rate": 0.0007906475064546813, "loss": 3.4533, "step": 24650 }, { "epoch": 1.675159668433211, "grad_norm": 2.3593029975891113, "learning_rate": 0.0007906050414458487, "loss": 3.7235, "step": 24655 }, { "epoch": 1.675499388503873, "grad_norm": 0.8176868557929993, "learning_rate": 0.0007905625764370159, "loss": 3.5293, "step": 24660 }, { "epoch": 1.6758391085745346, "grad_norm": 0.8658609390258789, "learning_rate": 0.0007905201114281832, "loss": 3.6861, "step": 24665 }, { "epoch": 1.6761788286451964, "grad_norm": 1.0936497449874878, "learning_rate": 0.0007904776464193505, "loss": 3.4858, "step": 24670 }, { "epoch": 1.6765185487158583, "grad_norm": 0.7908972501754761, "learning_rate": 0.0007904351814105177, "loss": 3.7076, "step": 24675 }, { "epoch": 1.67685826878652, "grad_norm": 1.1707217693328857, "learning_rate": 0.0007903927164016849, "loss": 3.4585, "step": 24680 }, { "epoch": 1.6771979888571815, "grad_norm": 0.922609806060791, "learning_rate": 0.0007903502513928523, "loss": 3.8582, "step": 24685 }, { "epoch": 1.6775377089278436, "grad_norm": 0.8792892098426819, "learning_rate": 0.0007903077863840196, "loss": 3.6767, "step": 24690 }, { "epoch": 1.6778774289985052, "grad_norm": 0.862585723400116, "learning_rate": 0.0007902653213751868, "loss": 3.709, "step": 24695 }, { "epoch": 1.6782171490691669, "grad_norm": 0.9991616606712341, "learning_rate": 0.0007902228563663542, "loss": 3.6167, "step": 24700 }, { "epoch": 1.678556869139829, "grad_norm": 0.8119308352470398, "learning_rate": 0.0007901803913575214, "loss": 3.6309, "step": 24705 }, { "epoch": 1.6788965892104906, "grad_norm": 0.7031501531600952, "learning_rate": 0.0007901379263486887, "loss": 3.6361, "step": 24710 }, { "epoch": 1.6792363092811522, "grad_norm": 0.8699216246604919, "learning_rate": 0.000790095461339856, "loss": 3.8782, "step": 24715 }, { "epoch": 1.6795760293518143, "grad_norm": 0.8439099788665771, "learning_rate": 0.0007900529963310232, "loss": 3.6235, "step": 24720 }, { "epoch": 1.679915749422476, "grad_norm": 1.0415631532669067, "learning_rate": 0.0007900105313221906, "loss": 3.42, "step": 24725 }, { "epoch": 1.6802554694931375, "grad_norm": 1.0736912488937378, "learning_rate": 0.0007899680663133579, "loss": 3.6379, "step": 24730 }, { "epoch": 1.6805951895637994, "grad_norm": 1.1264219284057617, "learning_rate": 0.0007899256013045251, "loss": 3.5814, "step": 24735 }, { "epoch": 1.6809349096344612, "grad_norm": 0.7785015106201172, "learning_rate": 0.0007898831362956924, "loss": 3.5889, "step": 24740 }, { "epoch": 1.6812746297051229, "grad_norm": 0.828604519367218, "learning_rate": 0.0007898406712868596, "loss": 3.8711, "step": 24745 }, { "epoch": 1.6816143497757847, "grad_norm": 0.9275208115577698, "learning_rate": 0.0007897982062780269, "loss": 3.6378, "step": 24750 }, { "epoch": 1.6819540698464466, "grad_norm": 0.9512251019477844, "learning_rate": 0.0007897557412691942, "loss": 3.5914, "step": 24755 }, { "epoch": 1.6822937899171082, "grad_norm": 0.8005484938621521, "learning_rate": 0.0007897132762603615, "loss": 3.4447, "step": 24760 }, { "epoch": 1.68263350998777, "grad_norm": 0.7597029209136963, "learning_rate": 0.0007896708112515288, "loss": 3.1614, "step": 24765 }, { "epoch": 1.682973230058432, "grad_norm": 0.8426936268806458, "learning_rate": 0.0007896283462426961, "loss": 3.5529, "step": 24770 }, { "epoch": 1.6833129501290935, "grad_norm": 0.813234269618988, "learning_rate": 0.0007895858812338633, "loss": 3.6662, "step": 24775 }, { "epoch": 1.6836526701997554, "grad_norm": 0.9698591232299805, "learning_rate": 0.0007895434162250305, "loss": 3.5314, "step": 24780 }, { "epoch": 1.6839923902704172, "grad_norm": 1.0913695096969604, "learning_rate": 0.0007895009512161979, "loss": 3.5848, "step": 24785 }, { "epoch": 1.6843321103410789, "grad_norm": 0.6830967664718628, "learning_rate": 0.0007894584862073651, "loss": 3.4369, "step": 24790 }, { "epoch": 1.6846718304117407, "grad_norm": 0.8661881685256958, "learning_rate": 0.0007894160211985324, "loss": 3.608, "step": 24795 }, { "epoch": 1.6850115504824026, "grad_norm": 0.7323229312896729, "learning_rate": 0.0007893735561896998, "loss": 3.4296, "step": 24800 }, { "epoch": 1.6853512705530642, "grad_norm": 1.3891593217849731, "learning_rate": 0.000789331091180867, "loss": 3.3683, "step": 24805 }, { "epoch": 1.685690990623726, "grad_norm": 1.674864411354065, "learning_rate": 0.0007892886261720342, "loss": 3.199, "step": 24810 }, { "epoch": 1.686030710694388, "grad_norm": 0.7719573378562927, "learning_rate": 0.0007892461611632016, "loss": 3.6464, "step": 24815 }, { "epoch": 1.6863704307650496, "grad_norm": 1.207993507385254, "learning_rate": 0.0007892036961543688, "loss": 3.5438, "step": 24820 }, { "epoch": 1.6867101508357114, "grad_norm": 1.1320436000823975, "learning_rate": 0.000789161231145536, "loss": 3.6096, "step": 24825 }, { "epoch": 1.6870498709063733, "grad_norm": 0.8345465064048767, "learning_rate": 0.0007891187661367035, "loss": 3.8119, "step": 24830 }, { "epoch": 1.687389590977035, "grad_norm": 0.7946677803993225, "learning_rate": 0.0007890763011278707, "loss": 3.5182, "step": 24835 }, { "epoch": 1.6877293110476967, "grad_norm": 0.7344543933868408, "learning_rate": 0.0007890338361190379, "loss": 3.6934, "step": 24840 }, { "epoch": 1.6880690311183586, "grad_norm": 1.0828746557235718, "learning_rate": 0.0007889913711102052, "loss": 3.6823, "step": 24845 }, { "epoch": 1.6884087511890202, "grad_norm": 0.9360087513923645, "learning_rate": 0.0007889489061013725, "loss": 3.4799, "step": 24850 }, { "epoch": 1.6887484712596819, "grad_norm": 0.7124038338661194, "learning_rate": 0.0007889064410925397, "loss": 3.4875, "step": 24855 }, { "epoch": 1.689088191330344, "grad_norm": 0.7972016334533691, "learning_rate": 0.0007888639760837071, "loss": 3.7074, "step": 24860 }, { "epoch": 1.6894279114010056, "grad_norm": 0.8951337933540344, "learning_rate": 0.0007888215110748744, "loss": 3.4117, "step": 24865 }, { "epoch": 1.6897676314716672, "grad_norm": 0.8420272469520569, "learning_rate": 0.0007887790460660416, "loss": 3.4128, "step": 24870 }, { "epoch": 1.6901073515423293, "grad_norm": 0.6531159281730652, "learning_rate": 0.0007887365810572089, "loss": 3.5992, "step": 24875 }, { "epoch": 1.690447071612991, "grad_norm": 0.7660381197929382, "learning_rate": 0.0007886941160483761, "loss": 3.4279, "step": 24880 }, { "epoch": 1.6907867916836525, "grad_norm": 0.7737146615982056, "learning_rate": 0.0007886516510395434, "loss": 3.3479, "step": 24885 }, { "epoch": 1.6911265117543146, "grad_norm": 0.8405500054359436, "learning_rate": 0.0007886091860307107, "loss": 3.5609, "step": 24890 }, { "epoch": 1.6914662318249762, "grad_norm": 0.9663074016571045, "learning_rate": 0.000788566721021878, "loss": 3.6688, "step": 24895 }, { "epoch": 1.6918059518956379, "grad_norm": 0.8301217555999756, "learning_rate": 0.0007885242560130453, "loss": 3.431, "step": 24900 }, { "epoch": 1.6921456719662997, "grad_norm": 0.8733827471733093, "learning_rate": 0.0007884817910042126, "loss": 3.4668, "step": 24905 }, { "epoch": 1.6924853920369616, "grad_norm": 0.7199254035949707, "learning_rate": 0.0007884393259953798, "loss": 3.7519, "step": 24910 }, { "epoch": 1.6928251121076232, "grad_norm": 0.8966487646102905, "learning_rate": 0.000788396860986547, "loss": 3.5421, "step": 24915 }, { "epoch": 1.693164832178285, "grad_norm": 0.7796667218208313, "learning_rate": 0.0007883543959777144, "loss": 3.6116, "step": 24920 }, { "epoch": 1.693504552248947, "grad_norm": 0.8656803965568542, "learning_rate": 0.0007883119309688816, "loss": 3.9229, "step": 24925 }, { "epoch": 1.6938442723196085, "grad_norm": 0.9291998147964478, "learning_rate": 0.0007882694659600489, "loss": 3.2287, "step": 24930 }, { "epoch": 1.6941839923902704, "grad_norm": 0.7663198113441467, "learning_rate": 0.0007882270009512163, "loss": 3.8697, "step": 24935 }, { "epoch": 1.6945237124609323, "grad_norm": 0.9700276255607605, "learning_rate": 0.0007881845359423835, "loss": 3.5059, "step": 24940 }, { "epoch": 1.6948634325315939, "grad_norm": 0.8098610639572144, "learning_rate": 0.0007881420709335507, "loss": 3.6999, "step": 24945 }, { "epoch": 1.6952031526022557, "grad_norm": 1.127705693244934, "learning_rate": 0.0007880996059247181, "loss": 3.7003, "step": 24950 }, { "epoch": 1.6955428726729176, "grad_norm": 0.8115562796592712, "learning_rate": 0.0007880571409158853, "loss": 3.6591, "step": 24955 }, { "epoch": 1.6958825927435792, "grad_norm": 0.8117164969444275, "learning_rate": 0.0007880146759070525, "loss": 3.2934, "step": 24960 }, { "epoch": 1.696222312814241, "grad_norm": 0.7299073338508606, "learning_rate": 0.00078797221089822, "loss": 3.4468, "step": 24965 }, { "epoch": 1.696562032884903, "grad_norm": 0.7534223198890686, "learning_rate": 0.0007879297458893872, "loss": 3.7497, "step": 24970 }, { "epoch": 1.6969017529555646, "grad_norm": 0.7504227757453918, "learning_rate": 0.0007878872808805544, "loss": 3.6254, "step": 24975 }, { "epoch": 1.6972414730262264, "grad_norm": 0.9052001237869263, "learning_rate": 0.0007878448158717217, "loss": 3.4899, "step": 24980 }, { "epoch": 1.6975811930968883, "grad_norm": 0.9598838090896606, "learning_rate": 0.000787802350862889, "loss": 3.3261, "step": 24985 }, { "epoch": 1.69792091316755, "grad_norm": 0.8577004671096802, "learning_rate": 0.0007877598858540562, "loss": 3.5583, "step": 24990 }, { "epoch": 1.6982606332382117, "grad_norm": 1.034424066543579, "learning_rate": 0.0007877174208452235, "loss": 3.7597, "step": 24995 }, { "epoch": 1.6986003533088736, "grad_norm": 0.7616882920265198, "learning_rate": 0.0007876749558363909, "loss": 3.7198, "step": 25000 }, { "epoch": 1.6989400733795352, "grad_norm": 0.7755090594291687, "learning_rate": 0.0007876324908275581, "loss": 3.5673, "step": 25005 }, { "epoch": 1.699279793450197, "grad_norm": 0.9648683071136475, "learning_rate": 0.0007875900258187254, "loss": 3.7855, "step": 25010 }, { "epoch": 1.699619513520859, "grad_norm": 0.9244015216827393, "learning_rate": 0.0007875475608098927, "loss": 3.436, "step": 25015 }, { "epoch": 1.6999592335915206, "grad_norm": 0.868840217590332, "learning_rate": 0.0007875050958010599, "loss": 3.7161, "step": 25020 }, { "epoch": 1.7002989536621822, "grad_norm": 0.9129986763000488, "learning_rate": 0.0007874626307922272, "loss": 3.6825, "step": 25025 }, { "epoch": 1.7006386737328443, "grad_norm": 1.2084019184112549, "learning_rate": 0.0007874201657833944, "loss": 3.628, "step": 25030 }, { "epoch": 1.700978393803506, "grad_norm": 0.8145015239715576, "learning_rate": 0.0007873777007745618, "loss": 3.256, "step": 25035 }, { "epoch": 1.7013181138741675, "grad_norm": 0.8580953478813171, "learning_rate": 0.0007873352357657291, "loss": 3.724, "step": 25040 }, { "epoch": 1.7016578339448296, "grad_norm": 0.9365824460983276, "learning_rate": 0.0007872927707568963, "loss": 3.6328, "step": 25045 }, { "epoch": 1.7019975540154912, "grad_norm": 1.0616331100463867, "learning_rate": 0.0007872503057480637, "loss": 3.5926, "step": 25050 }, { "epoch": 1.7023372740861529, "grad_norm": 1.2011743783950806, "learning_rate": 0.0007872078407392309, "loss": 3.5311, "step": 25055 }, { "epoch": 1.702676994156815, "grad_norm": 0.9170088171958923, "learning_rate": 0.0007871653757303981, "loss": 3.5167, "step": 25060 }, { "epoch": 1.7030167142274766, "grad_norm": 0.7168837785720825, "learning_rate": 0.0007871229107215655, "loss": 3.3605, "step": 25065 }, { "epoch": 1.7033564342981382, "grad_norm": 0.7276991009712219, "learning_rate": 0.0007870804457127328, "loss": 3.6938, "step": 25070 }, { "epoch": 1.7036961543688, "grad_norm": 0.8171664476394653, "learning_rate": 0.0007870379807039, "loss": 3.3423, "step": 25075 }, { "epoch": 1.704035874439462, "grad_norm": 0.7256236672401428, "learning_rate": 0.0007869955156950673, "loss": 3.498, "step": 25080 }, { "epoch": 1.7043755945101235, "grad_norm": 0.8292992115020752, "learning_rate": 0.0007869530506862346, "loss": 3.7187, "step": 25085 }, { "epoch": 1.7047153145807854, "grad_norm": 0.7140609622001648, "learning_rate": 0.0007869105856774018, "loss": 3.6975, "step": 25090 }, { "epoch": 1.7050550346514473, "grad_norm": 0.8792794942855835, "learning_rate": 0.0007868681206685691, "loss": 3.7577, "step": 25095 }, { "epoch": 1.7053947547221089, "grad_norm": 0.7570245862007141, "learning_rate": 0.0007868256556597364, "loss": 3.6283, "step": 25100 }, { "epoch": 1.7057344747927707, "grad_norm": 0.9570472836494446, "learning_rate": 0.0007867831906509037, "loss": 3.6202, "step": 25105 }, { "epoch": 1.7060741948634326, "grad_norm": 0.6486772894859314, "learning_rate": 0.000786740725642071, "loss": 3.6275, "step": 25110 }, { "epoch": 1.7064139149340942, "grad_norm": 0.8136416077613831, "learning_rate": 0.0007866982606332383, "loss": 3.4998, "step": 25115 }, { "epoch": 1.706753635004756, "grad_norm": 0.9321552515029907, "learning_rate": 0.0007866557956244055, "loss": 3.4543, "step": 25120 }, { "epoch": 1.707093355075418, "grad_norm": 1.0802842378616333, "learning_rate": 0.0007866133306155728, "loss": 3.5594, "step": 25125 }, { "epoch": 1.7074330751460796, "grad_norm": 0.7569277882575989, "learning_rate": 0.00078657086560674, "loss": 3.8712, "step": 25130 }, { "epoch": 1.7077727952167414, "grad_norm": 0.8030095100402832, "learning_rate": 0.0007865284005979073, "loss": 3.4874, "step": 25135 }, { "epoch": 1.7081125152874033, "grad_norm": 0.8289168477058411, "learning_rate": 0.0007864859355890747, "loss": 3.7909, "step": 25140 }, { "epoch": 1.708452235358065, "grad_norm": 1.2423521280288696, "learning_rate": 0.0007864434705802419, "loss": 3.5996, "step": 25145 }, { "epoch": 1.7087919554287267, "grad_norm": 0.8918646574020386, "learning_rate": 0.0007864010055714092, "loss": 3.9593, "step": 25150 }, { "epoch": 1.7091316754993886, "grad_norm": 1.0012123584747314, "learning_rate": 0.0007863585405625765, "loss": 3.7475, "step": 25155 }, { "epoch": 1.7094713955700502, "grad_norm": 1.2975878715515137, "learning_rate": 0.0007863160755537437, "loss": 3.749, "step": 25160 }, { "epoch": 1.709811115640712, "grad_norm": 0.8570579290390015, "learning_rate": 0.0007862736105449109, "loss": 3.568, "step": 25165 }, { "epoch": 1.710150835711374, "grad_norm": 0.8899945616722107, "learning_rate": 0.0007862311455360783, "loss": 3.718, "step": 25170 }, { "epoch": 1.7104905557820356, "grad_norm": 0.9734316468238831, "learning_rate": 0.0007861886805272456, "loss": 3.7305, "step": 25175 }, { "epoch": 1.7108302758526974, "grad_norm": 0.8933466672897339, "learning_rate": 0.0007861462155184128, "loss": 3.7253, "step": 25180 }, { "epoch": 1.7111699959233593, "grad_norm": 0.8621127009391785, "learning_rate": 0.0007861037505095802, "loss": 3.8209, "step": 25185 }, { "epoch": 1.711509715994021, "grad_norm": 1.1854227781295776, "learning_rate": 0.0007860612855007474, "loss": 3.6119, "step": 25190 }, { "epoch": 1.7118494360646825, "grad_norm": 1.0156502723693848, "learning_rate": 0.0007860188204919146, "loss": 3.7828, "step": 25195 }, { "epoch": 1.7121891561353446, "grad_norm": 0.9271770715713501, "learning_rate": 0.000785976355483082, "loss": 3.5079, "step": 25200 }, { "epoch": 1.7125288762060062, "grad_norm": 0.9001640677452087, "learning_rate": 0.0007859338904742492, "loss": 3.4672, "step": 25205 }, { "epoch": 1.7128685962766679, "grad_norm": 0.882731020450592, "learning_rate": 0.0007858914254654165, "loss": 3.6034, "step": 25210 }, { "epoch": 1.71320831634733, "grad_norm": 0.8454843163490295, "learning_rate": 0.0007858489604565839, "loss": 3.5299, "step": 25215 }, { "epoch": 1.7135480364179916, "grad_norm": 0.7633732557296753, "learning_rate": 0.0007858064954477511, "loss": 3.6641, "step": 25220 }, { "epoch": 1.7138877564886532, "grad_norm": 0.9066680073738098, "learning_rate": 0.0007857640304389183, "loss": 3.6847, "step": 25225 }, { "epoch": 1.7142274765593153, "grad_norm": 0.8993523120880127, "learning_rate": 0.0007857215654300856, "loss": 3.3965, "step": 25230 }, { "epoch": 1.714567196629977, "grad_norm": 0.8086879253387451, "learning_rate": 0.0007856791004212529, "loss": 3.5346, "step": 25235 }, { "epoch": 1.7149069167006386, "grad_norm": 1.2318345308303833, "learning_rate": 0.0007856366354124201, "loss": 4.0851, "step": 25240 }, { "epoch": 1.7152466367713004, "grad_norm": 0.6680339574813843, "learning_rate": 0.0007855941704035875, "loss": 3.5625, "step": 25245 }, { "epoch": 1.7155863568419623, "grad_norm": 0.7271459698677063, "learning_rate": 0.0007855517053947548, "loss": 3.6285, "step": 25250 }, { "epoch": 1.7159260769126239, "grad_norm": 0.9027031064033508, "learning_rate": 0.000785509240385922, "loss": 3.5296, "step": 25255 }, { "epoch": 1.7162657969832857, "grad_norm": 0.8412736058235168, "learning_rate": 0.0007854667753770893, "loss": 3.44, "step": 25260 }, { "epoch": 1.7166055170539476, "grad_norm": 0.8910761475563049, "learning_rate": 0.0007854243103682565, "loss": 3.7127, "step": 25265 }, { "epoch": 1.7169452371246092, "grad_norm": 0.716374397277832, "learning_rate": 0.0007853818453594238, "loss": 3.6474, "step": 25270 }, { "epoch": 1.717284957195271, "grad_norm": 0.7394686937332153, "learning_rate": 0.0007853393803505911, "loss": 3.5211, "step": 25275 }, { "epoch": 1.717624677265933, "grad_norm": 1.0245121717453003, "learning_rate": 0.0007852969153417584, "loss": 3.6877, "step": 25280 }, { "epoch": 1.7179643973365946, "grad_norm": 0.711330235004425, "learning_rate": 0.0007852544503329257, "loss": 3.7462, "step": 25285 }, { "epoch": 1.7183041174072564, "grad_norm": 0.8598629236221313, "learning_rate": 0.000785211985324093, "loss": 3.761, "step": 25290 }, { "epoch": 1.7186438374779183, "grad_norm": 0.9318746328353882, "learning_rate": 0.0007851695203152602, "loss": 3.3737, "step": 25295 }, { "epoch": 1.71898355754858, "grad_norm": 0.6782599687576294, "learning_rate": 0.0007851270553064275, "loss": 3.6125, "step": 25300 }, { "epoch": 1.7193232776192418, "grad_norm": 1.48186194896698, "learning_rate": 0.0007850845902975948, "loss": 3.4413, "step": 25305 }, { "epoch": 1.7196629976899036, "grad_norm": 0.9846222400665283, "learning_rate": 0.000785042125288762, "loss": 3.7362, "step": 25310 }, { "epoch": 1.7200027177605652, "grad_norm": 0.9732329249382019, "learning_rate": 0.0007849996602799293, "loss": 3.6995, "step": 25315 }, { "epoch": 1.720342437831227, "grad_norm": 0.7859788537025452, "learning_rate": 0.0007849571952710967, "loss": 3.6746, "step": 25320 }, { "epoch": 1.720682157901889, "grad_norm": 0.8149735331535339, "learning_rate": 0.0007849147302622639, "loss": 3.4913, "step": 25325 }, { "epoch": 1.7210218779725506, "grad_norm": 0.7564394474029541, "learning_rate": 0.0007848722652534311, "loss": 3.8136, "step": 25330 }, { "epoch": 1.7213615980432124, "grad_norm": 1.0305190086364746, "learning_rate": 0.0007848298002445985, "loss": 3.4983, "step": 25335 }, { "epoch": 1.7217013181138743, "grad_norm": 0.7466962933540344, "learning_rate": 0.0007847873352357657, "loss": 3.6435, "step": 25340 }, { "epoch": 1.722041038184536, "grad_norm": 0.837145209312439, "learning_rate": 0.0007847448702269329, "loss": 3.4404, "step": 25345 }, { "epoch": 1.7223807582551978, "grad_norm": 0.772117018699646, "learning_rate": 0.0007847024052181004, "loss": 3.3389, "step": 25350 }, { "epoch": 1.7227204783258596, "grad_norm": 0.9325253367424011, "learning_rate": 0.0007846599402092676, "loss": 3.4587, "step": 25355 }, { "epoch": 1.7230601983965212, "grad_norm": 0.8070598840713501, "learning_rate": 0.0007846174752004348, "loss": 3.4012, "step": 25360 }, { "epoch": 1.7233999184671829, "grad_norm": 0.8725023865699768, "learning_rate": 0.0007845750101916021, "loss": 3.5858, "step": 25365 }, { "epoch": 1.723739638537845, "grad_norm": 2.5675835609436035, "learning_rate": 0.0007845325451827694, "loss": 3.8135, "step": 25370 }, { "epoch": 1.7240793586085066, "grad_norm": 0.956696093082428, "learning_rate": 0.0007844900801739366, "loss": 3.5513, "step": 25375 }, { "epoch": 1.7244190786791682, "grad_norm": 0.9640530347824097, "learning_rate": 0.000784447615165104, "loss": 3.5469, "step": 25380 }, { "epoch": 1.7247587987498303, "grad_norm": 1.0938912630081177, "learning_rate": 0.0007844051501562713, "loss": 3.7052, "step": 25385 }, { "epoch": 1.725098518820492, "grad_norm": 0.7502402067184448, "learning_rate": 0.0007843626851474386, "loss": 3.7198, "step": 25390 }, { "epoch": 1.7254382388911536, "grad_norm": 1.082424283027649, "learning_rate": 0.0007843202201386058, "loss": 3.6527, "step": 25395 }, { "epoch": 1.7257779589618156, "grad_norm": 0.7903933525085449, "learning_rate": 0.0007842777551297731, "loss": 3.5476, "step": 25400 }, { "epoch": 1.7261176790324773, "grad_norm": 0.9569440484046936, "learning_rate": 0.0007842352901209404, "loss": 3.5726, "step": 25405 }, { "epoch": 1.726457399103139, "grad_norm": 0.7713733315467834, "learning_rate": 0.0007841928251121076, "loss": 3.6635, "step": 25410 }, { "epoch": 1.7267971191738007, "grad_norm": 0.8318759799003601, "learning_rate": 0.000784150360103275, "loss": 3.6996, "step": 25415 }, { "epoch": 1.7271368392444626, "grad_norm": 0.9367022514343262, "learning_rate": 0.0007841078950944423, "loss": 3.8604, "step": 25420 }, { "epoch": 1.7274765593151242, "grad_norm": 0.8763725161552429, "learning_rate": 0.0007840654300856095, "loss": 3.6309, "step": 25425 }, { "epoch": 1.727816279385786, "grad_norm": 0.9216275811195374, "learning_rate": 0.0007840229650767767, "loss": 3.499, "step": 25430 }, { "epoch": 1.728155999456448, "grad_norm": 1.0109037160873413, "learning_rate": 0.0007839805000679441, "loss": 3.6353, "step": 25435 }, { "epoch": 1.7284957195271096, "grad_norm": 0.69557124376297, "learning_rate": 0.0007839380350591113, "loss": 3.7098, "step": 25440 }, { "epoch": 1.7288354395977714, "grad_norm": 0.8115939497947693, "learning_rate": 0.0007838955700502785, "loss": 3.5327, "step": 25445 }, { "epoch": 1.7291751596684333, "grad_norm": 1.075798749923706, "learning_rate": 0.000783853105041446, "loss": 3.6241, "step": 25450 }, { "epoch": 1.729514879739095, "grad_norm": 0.8638855814933777, "learning_rate": 0.0007838106400326132, "loss": 3.716, "step": 25455 }, { "epoch": 1.7298545998097568, "grad_norm": 0.7389013171195984, "learning_rate": 0.0007837681750237804, "loss": 3.9045, "step": 25460 }, { "epoch": 1.7301943198804186, "grad_norm": 0.9173335433006287, "learning_rate": 0.0007837257100149478, "loss": 3.6819, "step": 25465 }, { "epoch": 1.7305340399510802, "grad_norm": 1.1047354936599731, "learning_rate": 0.000783683245006115, "loss": 3.4661, "step": 25470 }, { "epoch": 1.730873760021742, "grad_norm": 0.8324397802352905, "learning_rate": 0.0007836407799972822, "loss": 3.5738, "step": 25475 }, { "epoch": 1.731213480092404, "grad_norm": 1.0180611610412598, "learning_rate": 0.0007835983149884495, "loss": 3.4663, "step": 25480 }, { "epoch": 1.7315532001630656, "grad_norm": 0.9008068442344666, "learning_rate": 0.0007835558499796169, "loss": 3.5779, "step": 25485 }, { "epoch": 1.7318929202337274, "grad_norm": 1.0287599563598633, "learning_rate": 0.0007835133849707841, "loss": 3.6283, "step": 25490 }, { "epoch": 1.7322326403043893, "grad_norm": 0.9672912955284119, "learning_rate": 0.0007834709199619514, "loss": 3.412, "step": 25495 }, { "epoch": 1.732572360375051, "grad_norm": 1.0840785503387451, "learning_rate": 0.0007834284549531187, "loss": 3.6638, "step": 25500 }, { "epoch": 1.7329120804457128, "grad_norm": 1.008786916732788, "learning_rate": 0.0007833859899442859, "loss": 3.7185, "step": 25505 }, { "epoch": 1.7332518005163746, "grad_norm": 0.811100423336029, "learning_rate": 0.0007833435249354532, "loss": 3.8903, "step": 25510 }, { "epoch": 1.7335915205870362, "grad_norm": 0.905464231967926, "learning_rate": 0.0007833010599266204, "loss": 3.4721, "step": 25515 }, { "epoch": 1.733931240657698, "grad_norm": 0.7067092061042786, "learning_rate": 0.0007832585949177878, "loss": 3.639, "step": 25520 }, { "epoch": 1.73427096072836, "grad_norm": 0.9283978939056396, "learning_rate": 0.0007832161299089551, "loss": 3.2637, "step": 25525 }, { "epoch": 1.7346106807990216, "grad_norm": 0.8091094493865967, "learning_rate": 0.0007831736649001223, "loss": 3.3166, "step": 25530 }, { "epoch": 1.7349504008696832, "grad_norm": 0.9639005661010742, "learning_rate": 0.0007831311998912896, "loss": 3.382, "step": 25535 }, { "epoch": 1.7352901209403453, "grad_norm": 0.9067379236221313, "learning_rate": 0.0007830887348824569, "loss": 3.521, "step": 25540 }, { "epoch": 1.735629841011007, "grad_norm": 1.1528699398040771, "learning_rate": 0.0007830462698736241, "loss": 3.5495, "step": 25545 }, { "epoch": 1.7359695610816686, "grad_norm": 1.3540722131729126, "learning_rate": 0.0007830038048647913, "loss": 3.6872, "step": 25550 }, { "epoch": 1.7363092811523306, "grad_norm": 0.7633983492851257, "learning_rate": 0.0007829613398559588, "loss": 3.6953, "step": 25555 }, { "epoch": 1.7366490012229923, "grad_norm": 0.7218027114868164, "learning_rate": 0.000782918874847126, "loss": 3.6425, "step": 25560 }, { "epoch": 1.736988721293654, "grad_norm": 0.8587518930435181, "learning_rate": 0.0007828764098382932, "loss": 3.5772, "step": 25565 }, { "epoch": 1.737328441364316, "grad_norm": 1.1589277982711792, "learning_rate": 0.0007828339448294606, "loss": 3.3, "step": 25570 }, { "epoch": 1.7376681614349776, "grad_norm": 0.8263728022575378, "learning_rate": 0.0007827914798206278, "loss": 3.4872, "step": 25575 }, { "epoch": 1.7380078815056392, "grad_norm": 0.9674803018569946, "learning_rate": 0.000782749014811795, "loss": 3.6417, "step": 25580 }, { "epoch": 1.738347601576301, "grad_norm": 1.4880969524383545, "learning_rate": 0.0007827065498029624, "loss": 3.5134, "step": 25585 }, { "epoch": 1.738687321646963, "grad_norm": 1.052046537399292, "learning_rate": 0.0007826640847941297, "loss": 3.7721, "step": 25590 }, { "epoch": 1.7390270417176246, "grad_norm": 0.8075477480888367, "learning_rate": 0.0007826216197852969, "loss": 3.7144, "step": 25595 }, { "epoch": 1.7393667617882864, "grad_norm": 1.185079574584961, "learning_rate": 0.0007825791547764643, "loss": 3.9093, "step": 25600 }, { "epoch": 1.7397064818589483, "grad_norm": 0.9164443612098694, "learning_rate": 0.0007825366897676315, "loss": 3.623, "step": 25605 }, { "epoch": 1.74004620192961, "grad_norm": 0.9363973140716553, "learning_rate": 0.0007824942247587987, "loss": 3.597, "step": 25610 }, { "epoch": 1.7403859220002718, "grad_norm": 0.8684784770011902, "learning_rate": 0.000782451759749966, "loss": 3.8104, "step": 25615 }, { "epoch": 1.7407256420709336, "grad_norm": 0.8852021098136902, "learning_rate": 0.0007824092947411333, "loss": 3.4868, "step": 25620 }, { "epoch": 1.7410653621415952, "grad_norm": 1.8354719877243042, "learning_rate": 0.0007823668297323006, "loss": 3.6706, "step": 25625 }, { "epoch": 1.741405082212257, "grad_norm": 0.8952626585960388, "learning_rate": 0.0007823243647234679, "loss": 3.7335, "step": 25630 }, { "epoch": 1.741744802282919, "grad_norm": 1.5697295665740967, "learning_rate": 0.0007822818997146352, "loss": 3.5452, "step": 25635 }, { "epoch": 1.7420845223535806, "grad_norm": 0.7047613859176636, "learning_rate": 0.0007822394347058024, "loss": 3.4494, "step": 25640 }, { "epoch": 1.7424242424242424, "grad_norm": 0.9039536714553833, "learning_rate": 0.0007821969696969697, "loss": 3.4186, "step": 25645 }, { "epoch": 1.7427639624949043, "grad_norm": 0.9211353659629822, "learning_rate": 0.000782154504688137, "loss": 3.3789, "step": 25650 }, { "epoch": 1.743103682565566, "grad_norm": 0.7146081924438477, "learning_rate": 0.0007821120396793042, "loss": 3.643, "step": 25655 }, { "epoch": 1.7434434026362278, "grad_norm": 0.8163903951644897, "learning_rate": 0.0007820695746704716, "loss": 3.4956, "step": 25660 }, { "epoch": 1.7437831227068896, "grad_norm": 1.0551904439926147, "learning_rate": 0.0007820271096616388, "loss": 3.5657, "step": 25665 }, { "epoch": 1.7441228427775513, "grad_norm": 0.8210049271583557, "learning_rate": 0.0007819846446528061, "loss": 3.9947, "step": 25670 }, { "epoch": 1.744462562848213, "grad_norm": 0.815860390663147, "learning_rate": 0.0007819421796439734, "loss": 3.4684, "step": 25675 }, { "epoch": 1.744802282918875, "grad_norm": 0.8983619809150696, "learning_rate": 0.0007818997146351406, "loss": 3.7298, "step": 25680 }, { "epoch": 1.7451420029895366, "grad_norm": 0.8563128113746643, "learning_rate": 0.0007818572496263079, "loss": 3.3687, "step": 25685 }, { "epoch": 1.7454817230601984, "grad_norm": 0.9592525959014893, "learning_rate": 0.0007818147846174752, "loss": 3.7007, "step": 25690 }, { "epoch": 1.7458214431308603, "grad_norm": 0.7552351951599121, "learning_rate": 0.0007817723196086425, "loss": 3.6563, "step": 25695 }, { "epoch": 1.746161163201522, "grad_norm": 0.8797276616096497, "learning_rate": 0.0007817298545998098, "loss": 3.5357, "step": 25700 }, { "epoch": 1.7465008832721836, "grad_norm": 0.8985642194747925, "learning_rate": 0.0007816873895909771, "loss": 3.7454, "step": 25705 }, { "epoch": 1.7468406033428456, "grad_norm": 0.9358516931533813, "learning_rate": 0.0007816449245821443, "loss": 3.4075, "step": 25710 }, { "epoch": 1.7471803234135073, "grad_norm": 0.9598605632781982, "learning_rate": 0.0007816024595733115, "loss": 3.5979, "step": 25715 }, { "epoch": 1.747520043484169, "grad_norm": 3.204887866973877, "learning_rate": 0.0007815599945644789, "loss": 3.5259, "step": 25720 }, { "epoch": 1.747859763554831, "grad_norm": 0.8831291198730469, "learning_rate": 0.0007815175295556461, "loss": 3.5218, "step": 25725 }, { "epoch": 1.7481994836254926, "grad_norm": 0.7682862281799316, "learning_rate": 0.0007814750645468135, "loss": 3.6461, "step": 25730 }, { "epoch": 1.7485392036961542, "grad_norm": 0.6871035695075989, "learning_rate": 0.0007814325995379808, "loss": 3.2896, "step": 25735 }, { "epoch": 1.7488789237668163, "grad_norm": 0.7829475402832031, "learning_rate": 0.000781390134529148, "loss": 3.7015, "step": 25740 }, { "epoch": 1.749218643837478, "grad_norm": 0.6390740275382996, "learning_rate": 0.0007813476695203153, "loss": 3.285, "step": 25745 }, { "epoch": 1.7495583639081396, "grad_norm": 0.9304513931274414, "learning_rate": 0.0007813052045114826, "loss": 3.7594, "step": 25750 }, { "epoch": 1.7498980839788014, "grad_norm": 1.132741093635559, "learning_rate": 0.0007812627395026498, "loss": 3.5237, "step": 25755 }, { "epoch": 1.7502378040494633, "grad_norm": 1.568610668182373, "learning_rate": 0.0007812202744938171, "loss": 3.6244, "step": 25760 }, { "epoch": 1.750577524120125, "grad_norm": 0.9372501373291016, "learning_rate": 0.0007811778094849844, "loss": 3.5168, "step": 25765 }, { "epoch": 1.7509172441907868, "grad_norm": 0.7375500798225403, "learning_rate": 0.0007811353444761517, "loss": 3.3374, "step": 25770 }, { "epoch": 1.7512569642614486, "grad_norm": 0.9484035968780518, "learning_rate": 0.000781092879467319, "loss": 3.7108, "step": 25775 }, { "epoch": 1.7515966843321102, "grad_norm": 0.8634046316146851, "learning_rate": 0.0007810504144584862, "loss": 3.6578, "step": 25780 }, { "epoch": 1.751936404402772, "grad_norm": 0.8717957735061646, "learning_rate": 0.0007810079494496535, "loss": 3.4543, "step": 25785 }, { "epoch": 1.752276124473434, "grad_norm": 0.8090501427650452, "learning_rate": 0.0007809654844408208, "loss": 3.781, "step": 25790 }, { "epoch": 1.7526158445440956, "grad_norm": 0.8110304474830627, "learning_rate": 0.000780923019431988, "loss": 3.6415, "step": 25795 }, { "epoch": 1.7529555646147574, "grad_norm": 0.7835606932640076, "learning_rate": 0.0007808805544231554, "loss": 3.6532, "step": 25800 }, { "epoch": 1.7532952846854193, "grad_norm": 1.2483454942703247, "learning_rate": 0.0007808380894143227, "loss": 3.6573, "step": 25805 }, { "epoch": 1.753635004756081, "grad_norm": 0.7581899166107178, "learning_rate": 0.0007807956244054899, "loss": 3.7744, "step": 25810 }, { "epoch": 1.7539747248267428, "grad_norm": 1.0076439380645752, "learning_rate": 0.0007807531593966571, "loss": 3.8244, "step": 25815 }, { "epoch": 1.7543144448974046, "grad_norm": 0.9983475804328918, "learning_rate": 0.0007807106943878245, "loss": 3.8544, "step": 25820 }, { "epoch": 1.7546541649680663, "grad_norm": 1.0102927684783936, "learning_rate": 0.0007806682293789917, "loss": 3.5101, "step": 25825 }, { "epoch": 1.754993885038728, "grad_norm": 1.167184591293335, "learning_rate": 0.0007806257643701589, "loss": 3.6922, "step": 25830 }, { "epoch": 1.75533360510939, "grad_norm": 0.8384759426116943, "learning_rate": 0.0007805832993613264, "loss": 3.6572, "step": 25835 }, { "epoch": 1.7556733251800516, "grad_norm": 0.7975273728370667, "learning_rate": 0.0007805408343524936, "loss": 3.6317, "step": 25840 }, { "epoch": 1.7560130452507134, "grad_norm": 1.1237077713012695, "learning_rate": 0.0007804983693436608, "loss": 3.6961, "step": 25845 }, { "epoch": 1.7563527653213753, "grad_norm": 0.851542055606842, "learning_rate": 0.0007804559043348282, "loss": 3.527, "step": 25850 }, { "epoch": 1.756692485392037, "grad_norm": 1.3945471048355103, "learning_rate": 0.0007804134393259954, "loss": 3.6489, "step": 25855 }, { "epoch": 1.7570322054626988, "grad_norm": 1.315050482749939, "learning_rate": 0.0007803709743171626, "loss": 3.6273, "step": 25860 }, { "epoch": 1.7573719255333606, "grad_norm": 1.0731977224349976, "learning_rate": 0.0007803285093083299, "loss": 3.6821, "step": 25865 }, { "epoch": 1.7577116456040223, "grad_norm": 0.7589316368103027, "learning_rate": 0.0007802860442994973, "loss": 3.4292, "step": 25870 }, { "epoch": 1.758051365674684, "grad_norm": 1.2083330154418945, "learning_rate": 0.0007802435792906645, "loss": 3.3793, "step": 25875 }, { "epoch": 1.758391085745346, "grad_norm": 0.8640690445899963, "learning_rate": 0.0007802011142818318, "loss": 3.4375, "step": 25880 }, { "epoch": 1.7587308058160076, "grad_norm": 0.7859452366828918, "learning_rate": 0.0007801586492729991, "loss": 3.6506, "step": 25885 }, { "epoch": 1.7590705258866692, "grad_norm": 0.8886356949806213, "learning_rate": 0.0007801161842641663, "loss": 3.444, "step": 25890 }, { "epoch": 1.7594102459573313, "grad_norm": 1.5000760555267334, "learning_rate": 0.0007800737192553336, "loss": 3.6421, "step": 25895 }, { "epoch": 1.759749966027993, "grad_norm": 3.777399778366089, "learning_rate": 0.0007800312542465008, "loss": 3.2754, "step": 25900 }, { "epoch": 1.7600896860986546, "grad_norm": 1.2245557308197021, "learning_rate": 0.0007799887892376682, "loss": 3.5433, "step": 25905 }, { "epoch": 1.7604294061693166, "grad_norm": 0.9506595134735107, "learning_rate": 0.0007799463242288355, "loss": 3.6563, "step": 25910 }, { "epoch": 1.7607691262399783, "grad_norm": 1.03312349319458, "learning_rate": 0.0007799038592200027, "loss": 3.7397, "step": 25915 }, { "epoch": 1.76110884631064, "grad_norm": 0.807860255241394, "learning_rate": 0.00077986139421117, "loss": 3.485, "step": 25920 }, { "epoch": 1.7614485663813018, "grad_norm": 0.9521588087081909, "learning_rate": 0.0007798189292023373, "loss": 3.5696, "step": 25925 }, { "epoch": 1.7617882864519636, "grad_norm": 0.829430341720581, "learning_rate": 0.0007797764641935045, "loss": 3.659, "step": 25930 }, { "epoch": 1.7621280065226252, "grad_norm": 0.8915740847587585, "learning_rate": 0.0007797339991846718, "loss": 3.6361, "step": 25935 }, { "epoch": 1.762467726593287, "grad_norm": 0.9075105786323547, "learning_rate": 0.0007796915341758392, "loss": 3.4581, "step": 25940 }, { "epoch": 1.762807446663949, "grad_norm": 0.7310864925384521, "learning_rate": 0.0007796490691670064, "loss": 3.9386, "step": 25945 }, { "epoch": 1.7631471667346106, "grad_norm": 1.213394284248352, "learning_rate": 0.0007796066041581736, "loss": 3.5004, "step": 25950 }, { "epoch": 1.7634868868052724, "grad_norm": 0.835250735282898, "learning_rate": 0.000779564139149341, "loss": 3.8188, "step": 25955 }, { "epoch": 1.7638266068759343, "grad_norm": 0.8052058219909668, "learning_rate": 0.0007795216741405082, "loss": 3.7281, "step": 25960 }, { "epoch": 1.764166326946596, "grad_norm": 0.7974103689193726, "learning_rate": 0.0007794792091316754, "loss": 3.566, "step": 25965 }, { "epoch": 1.7645060470172578, "grad_norm": 0.9005211591720581, "learning_rate": 0.0007794367441228429, "loss": 3.8846, "step": 25970 }, { "epoch": 1.7648457670879196, "grad_norm": 0.7942432165145874, "learning_rate": 0.0007793942791140101, "loss": 3.5281, "step": 25975 }, { "epoch": 1.7651854871585813, "grad_norm": 0.7167492508888245, "learning_rate": 0.0007793518141051773, "loss": 3.4482, "step": 25980 }, { "epoch": 1.765525207229243, "grad_norm": 1.034829020500183, "learning_rate": 0.0007793093490963447, "loss": 3.4977, "step": 25985 }, { "epoch": 1.765864927299905, "grad_norm": 0.968509316444397, "learning_rate": 0.0007792668840875119, "loss": 3.524, "step": 25990 }, { "epoch": 1.7662046473705666, "grad_norm": 0.8943504095077515, "learning_rate": 0.0007792244190786791, "loss": 3.9223, "step": 25995 }, { "epoch": 1.7665443674412284, "grad_norm": 0.7722302079200745, "learning_rate": 0.0007791819540698464, "loss": 3.7378, "step": 26000 }, { "epoch": 1.7668840875118903, "grad_norm": 0.7472617030143738, "learning_rate": 0.0007791394890610138, "loss": 3.5882, "step": 26005 }, { "epoch": 1.767223807582552, "grad_norm": 0.9298312664031982, "learning_rate": 0.000779097024052181, "loss": 3.8509, "step": 26010 }, { "epoch": 1.7675635276532138, "grad_norm": 0.7147135734558105, "learning_rate": 0.0007790545590433483, "loss": 3.5533, "step": 26015 }, { "epoch": 1.7679032477238756, "grad_norm": 0.902371346950531, "learning_rate": 0.0007790120940345156, "loss": 3.4996, "step": 26020 }, { "epoch": 1.7682429677945373, "grad_norm": 0.775912880897522, "learning_rate": 0.0007789696290256828, "loss": 3.2739, "step": 26025 }, { "epoch": 1.7685826878651991, "grad_norm": 0.7736325860023499, "learning_rate": 0.0007789271640168501, "loss": 3.4853, "step": 26030 }, { "epoch": 1.768922407935861, "grad_norm": 0.739092230796814, "learning_rate": 0.0007788846990080174, "loss": 3.5761, "step": 26035 }, { "epoch": 1.7692621280065226, "grad_norm": 0.813132643699646, "learning_rate": 0.0007788422339991847, "loss": 3.6797, "step": 26040 }, { "epoch": 1.7696018480771842, "grad_norm": 0.8204823136329651, "learning_rate": 0.000778799768990352, "loss": 3.5608, "step": 26045 }, { "epoch": 1.7699415681478463, "grad_norm": 1.0364375114440918, "learning_rate": 0.0007787573039815192, "loss": 3.6712, "step": 26050 }, { "epoch": 1.770281288218508, "grad_norm": 0.7703670263290405, "learning_rate": 0.0007787148389726865, "loss": 3.3849, "step": 26055 }, { "epoch": 1.7706210082891696, "grad_norm": 0.8427909016609192, "learning_rate": 0.0007786723739638538, "loss": 3.6009, "step": 26060 }, { "epoch": 1.7709607283598316, "grad_norm": 0.8401391506195068, "learning_rate": 0.000778629908955021, "loss": 3.5513, "step": 26065 }, { "epoch": 1.7713004484304933, "grad_norm": 0.8893247842788696, "learning_rate": 0.0007785874439461884, "loss": 3.3767, "step": 26070 }, { "epoch": 1.771640168501155, "grad_norm": 1.0901212692260742, "learning_rate": 0.0007785449789373557, "loss": 3.4769, "step": 26075 }, { "epoch": 1.771979888571817, "grad_norm": 0.9976149797439575, "learning_rate": 0.0007785025139285229, "loss": 3.7512, "step": 26080 }, { "epoch": 1.7723196086424786, "grad_norm": 0.7497579455375671, "learning_rate": 0.0007784600489196903, "loss": 3.4341, "step": 26085 }, { "epoch": 1.7726593287131402, "grad_norm": 0.7696861624717712, "learning_rate": 0.0007784175839108575, "loss": 3.6361, "step": 26090 }, { "epoch": 1.772999048783802, "grad_norm": 1.311822533607483, "learning_rate": 0.0007783751189020247, "loss": 3.3312, "step": 26095 }, { "epoch": 1.773338768854464, "grad_norm": 0.8057903051376343, "learning_rate": 0.000778332653893192, "loss": 3.5161, "step": 26100 }, { "epoch": 1.7736784889251256, "grad_norm": 0.5596839189529419, "learning_rate": 0.0007782901888843593, "loss": 3.643, "step": 26105 }, { "epoch": 1.7740182089957874, "grad_norm": 1.091204047203064, "learning_rate": 0.0007782477238755266, "loss": 3.203, "step": 26110 }, { "epoch": 1.7743579290664493, "grad_norm": 3.4007673263549805, "learning_rate": 0.0007782052588666939, "loss": 3.5878, "step": 26115 }, { "epoch": 1.774697649137111, "grad_norm": 2.3639938831329346, "learning_rate": 0.0007781627938578612, "loss": 3.7566, "step": 26120 }, { "epoch": 1.7750373692077728, "grad_norm": 0.8306202292442322, "learning_rate": 0.0007781203288490284, "loss": 3.6895, "step": 26125 }, { "epoch": 1.7753770892784346, "grad_norm": 1.246132731437683, "learning_rate": 0.0007780778638401957, "loss": 3.5499, "step": 26130 }, { "epoch": 1.7757168093490963, "grad_norm": 0.8069747090339661, "learning_rate": 0.000778035398831363, "loss": 3.5961, "step": 26135 }, { "epoch": 1.7760565294197581, "grad_norm": 0.761080801486969, "learning_rate": 0.0007779929338225302, "loss": 3.6282, "step": 26140 }, { "epoch": 1.77639624949042, "grad_norm": 0.9705154299736023, "learning_rate": 0.0007779504688136976, "loss": 3.5841, "step": 26145 }, { "epoch": 1.7767359695610816, "grad_norm": 0.9187883138656616, "learning_rate": 0.0007779080038048648, "loss": 3.4097, "step": 26150 }, { "epoch": 1.7770756896317434, "grad_norm": 0.8528329730033875, "learning_rate": 0.0007778655387960321, "loss": 3.3493, "step": 26155 }, { "epoch": 1.7774154097024053, "grad_norm": 0.740311861038208, "learning_rate": 0.0007778230737871994, "loss": 3.5759, "step": 26160 }, { "epoch": 1.777755129773067, "grad_norm": 0.9098858833312988, "learning_rate": 0.0007777806087783666, "loss": 3.6686, "step": 26165 }, { "epoch": 1.7780948498437288, "grad_norm": 0.989059329032898, "learning_rate": 0.0007777381437695339, "loss": 3.6815, "step": 26170 }, { "epoch": 1.7784345699143906, "grad_norm": 0.9316731691360474, "learning_rate": 0.0007776956787607012, "loss": 3.3554, "step": 26175 }, { "epoch": 1.7787742899850523, "grad_norm": 0.7356194257736206, "learning_rate": 0.0007776532137518685, "loss": 3.883, "step": 26180 }, { "epoch": 1.7791140100557141, "grad_norm": 0.7641822695732117, "learning_rate": 0.0007776107487430358, "loss": 3.7066, "step": 26185 }, { "epoch": 1.779453730126376, "grad_norm": 0.9342180490493774, "learning_rate": 0.0007775682837342031, "loss": 3.4949, "step": 26190 }, { "epoch": 1.7797934501970376, "grad_norm": 0.8679904341697693, "learning_rate": 0.0007775258187253703, "loss": 3.3175, "step": 26195 }, { "epoch": 1.7801331702676995, "grad_norm": 1.0978342294692993, "learning_rate": 0.0007774833537165375, "loss": 3.4248, "step": 26200 }, { "epoch": 1.7804728903383613, "grad_norm": 0.791932225227356, "learning_rate": 0.0007774408887077049, "loss": 3.483, "step": 26205 }, { "epoch": 1.780812610409023, "grad_norm": 1.1206365823745728, "learning_rate": 0.0007773984236988721, "loss": 3.6317, "step": 26210 }, { "epoch": 1.7811523304796846, "grad_norm": 0.9562692642211914, "learning_rate": 0.0007773559586900394, "loss": 3.4922, "step": 26215 }, { "epoch": 1.7814920505503467, "grad_norm": 1.0970159769058228, "learning_rate": 0.0007773134936812068, "loss": 3.5252, "step": 26220 }, { "epoch": 1.7818317706210083, "grad_norm": 0.6391739249229431, "learning_rate": 0.000777271028672374, "loss": 3.6473, "step": 26225 }, { "epoch": 1.78217149069167, "grad_norm": 0.7666692137718201, "learning_rate": 0.0007772285636635412, "loss": 3.7515, "step": 26230 }, { "epoch": 1.782511210762332, "grad_norm": 0.6616684198379517, "learning_rate": 0.0007771860986547086, "loss": 3.6492, "step": 26235 }, { "epoch": 1.7828509308329936, "grad_norm": 1.3172047138214111, "learning_rate": 0.0007771436336458758, "loss": 3.5426, "step": 26240 }, { "epoch": 1.7831906509036552, "grad_norm": 0.8805871605873108, "learning_rate": 0.000777101168637043, "loss": 3.7075, "step": 26245 }, { "epoch": 1.7835303709743173, "grad_norm": 0.8232247233390808, "learning_rate": 0.0007770587036282105, "loss": 3.7654, "step": 26250 }, { "epoch": 1.783870091044979, "grad_norm": 0.6485000252723694, "learning_rate": 0.0007770162386193777, "loss": 3.4879, "step": 26255 }, { "epoch": 1.7842098111156406, "grad_norm": 0.9163903594017029, "learning_rate": 0.0007769737736105449, "loss": 3.5246, "step": 26260 }, { "epoch": 1.7845495311863024, "grad_norm": 0.7492279410362244, "learning_rate": 0.0007769313086017122, "loss": 3.5252, "step": 26265 }, { "epoch": 1.7848892512569643, "grad_norm": 0.7342423796653748, "learning_rate": 0.0007768888435928795, "loss": 3.7, "step": 26270 }, { "epoch": 1.785228971327626, "grad_norm": 1.0257717370986938, "learning_rate": 0.0007768463785840467, "loss": 3.6694, "step": 26275 }, { "epoch": 1.7855686913982878, "grad_norm": 0.6728167533874512, "learning_rate": 0.000776803913575214, "loss": 3.7653, "step": 26280 }, { "epoch": 1.7859084114689496, "grad_norm": 1.01872718334198, "learning_rate": 0.0007767614485663814, "loss": 3.6343, "step": 26285 }, { "epoch": 1.7862481315396113, "grad_norm": 0.7362704277038574, "learning_rate": 0.0007767189835575486, "loss": 3.6759, "step": 26290 }, { "epoch": 1.7865878516102731, "grad_norm": 1.366279125213623, "learning_rate": 0.0007766765185487159, "loss": 3.644, "step": 26295 }, { "epoch": 1.786927571680935, "grad_norm": 0.820401668548584, "learning_rate": 0.0007766340535398831, "loss": 3.404, "step": 26300 }, { "epoch": 1.7872672917515966, "grad_norm": 1.2093595266342163, "learning_rate": 0.0007765915885310504, "loss": 3.5584, "step": 26305 }, { "epoch": 1.7876070118222585, "grad_norm": 0.9943270683288574, "learning_rate": 0.0007765491235222177, "loss": 3.6304, "step": 26310 }, { "epoch": 1.7879467318929203, "grad_norm": 0.8318180441856384, "learning_rate": 0.0007765066585133849, "loss": 3.6815, "step": 26315 }, { "epoch": 1.788286451963582, "grad_norm": 0.8164183497428894, "learning_rate": 0.0007764641935045523, "loss": 3.4947, "step": 26320 }, { "epoch": 1.7886261720342438, "grad_norm": 0.8470618724822998, "learning_rate": 0.0007764217284957196, "loss": 3.4427, "step": 26325 }, { "epoch": 1.7889658921049056, "grad_norm": 0.968076229095459, "learning_rate": 0.0007763792634868868, "loss": 3.6362, "step": 26330 }, { "epoch": 1.7893056121755673, "grad_norm": 0.8567678928375244, "learning_rate": 0.000776336798478054, "loss": 3.5284, "step": 26335 }, { "epoch": 1.7896453322462291, "grad_norm": 0.9097279906272888, "learning_rate": 0.0007762943334692214, "loss": 3.6648, "step": 26340 }, { "epoch": 1.789985052316891, "grad_norm": 0.740727424621582, "learning_rate": 0.0007762518684603886, "loss": 3.632, "step": 26345 }, { "epoch": 1.7903247723875526, "grad_norm": 0.830374002456665, "learning_rate": 0.0007762094034515558, "loss": 3.9525, "step": 26350 }, { "epoch": 1.7906644924582145, "grad_norm": 0.7706518173217773, "learning_rate": 0.0007761669384427233, "loss": 3.8259, "step": 26355 }, { "epoch": 1.7910042125288763, "grad_norm": 0.7739277482032776, "learning_rate": 0.0007761244734338905, "loss": 3.6415, "step": 26360 }, { "epoch": 1.791343932599538, "grad_norm": 0.8040427565574646, "learning_rate": 0.0007760820084250577, "loss": 3.7262, "step": 26365 }, { "epoch": 1.7916836526701998, "grad_norm": 1.1802363395690918, "learning_rate": 0.0007760395434162251, "loss": 3.7065, "step": 26370 }, { "epoch": 1.7920233727408617, "grad_norm": 0.746063768863678, "learning_rate": 0.0007759970784073923, "loss": 3.6451, "step": 26375 }, { "epoch": 1.7923630928115233, "grad_norm": 1.1472550630569458, "learning_rate": 0.0007759546133985595, "loss": 3.7605, "step": 26380 }, { "epoch": 1.792702812882185, "grad_norm": 0.8310592174530029, "learning_rate": 0.0007759121483897269, "loss": 3.6762, "step": 26385 }, { "epoch": 1.793042532952847, "grad_norm": 1.0211249589920044, "learning_rate": 0.0007758696833808942, "loss": 3.6768, "step": 26390 }, { "epoch": 1.7933822530235086, "grad_norm": 0.6641800999641418, "learning_rate": 0.0007758272183720614, "loss": 3.7767, "step": 26395 }, { "epoch": 1.7937219730941703, "grad_norm": 0.8496041297912598, "learning_rate": 0.0007757847533632287, "loss": 3.5218, "step": 26400 }, { "epoch": 1.7940616931648323, "grad_norm": 0.7252931594848633, "learning_rate": 0.000775742288354396, "loss": 3.5727, "step": 26405 }, { "epoch": 1.794401413235494, "grad_norm": 0.861052930355072, "learning_rate": 0.0007756998233455633, "loss": 3.5623, "step": 26410 }, { "epoch": 1.7947411333061556, "grad_norm": 0.6104414463043213, "learning_rate": 0.0007756573583367305, "loss": 3.5555, "step": 26415 }, { "epoch": 1.7950808533768177, "grad_norm": 0.8131604790687561, "learning_rate": 0.0007756148933278978, "loss": 3.4779, "step": 26420 }, { "epoch": 1.7954205734474793, "grad_norm": 0.9060520529747009, "learning_rate": 0.0007755724283190652, "loss": 3.401, "step": 26425 }, { "epoch": 1.795760293518141, "grad_norm": 0.878742516040802, "learning_rate": 0.0007755299633102324, "loss": 3.5545, "step": 26430 }, { "epoch": 1.7961000135888028, "grad_norm": 1.2023383378982544, "learning_rate": 0.0007754874983013997, "loss": 3.415, "step": 26435 }, { "epoch": 1.7964397336594646, "grad_norm": 1.2565702199935913, "learning_rate": 0.000775445033292567, "loss": 3.7117, "step": 26440 }, { "epoch": 1.7967794537301263, "grad_norm": 0.7067636847496033, "learning_rate": 0.0007754025682837342, "loss": 3.3881, "step": 26445 }, { "epoch": 1.7971191738007881, "grad_norm": 1.422179937362671, "learning_rate": 0.0007753601032749014, "loss": 3.2169, "step": 26450 }, { "epoch": 1.79745889387145, "grad_norm": 1.0218615531921387, "learning_rate": 0.0007753176382660688, "loss": 3.5967, "step": 26455 }, { "epoch": 1.7977986139421116, "grad_norm": 0.8417220711708069, "learning_rate": 0.0007752751732572361, "loss": 3.7578, "step": 26460 }, { "epoch": 1.7981383340127735, "grad_norm": 0.6974185705184937, "learning_rate": 0.0007752327082484033, "loss": 3.5118, "step": 26465 }, { "epoch": 1.7984780540834353, "grad_norm": 0.9856672286987305, "learning_rate": 0.0007751902432395707, "loss": 3.6269, "step": 26470 }, { "epoch": 1.798817774154097, "grad_norm": 0.9373189210891724, "learning_rate": 0.0007751477782307379, "loss": 3.6082, "step": 26475 }, { "epoch": 1.7991574942247588, "grad_norm": 0.8289579153060913, "learning_rate": 0.0007751053132219051, "loss": 3.7031, "step": 26480 }, { "epoch": 1.7994972142954206, "grad_norm": 0.9893999099731445, "learning_rate": 0.0007750628482130725, "loss": 3.5097, "step": 26485 }, { "epoch": 1.7998369343660823, "grad_norm": 0.9149765968322754, "learning_rate": 0.0007750203832042397, "loss": 3.5405, "step": 26490 }, { "epoch": 1.8001766544367441, "grad_norm": 0.706072986125946, "learning_rate": 0.000774977918195407, "loss": 3.6303, "step": 26495 }, { "epoch": 1.800516374507406, "grad_norm": 0.7087363004684448, "learning_rate": 0.0007749354531865743, "loss": 3.5855, "step": 26500 }, { "epoch": 1.8008560945780676, "grad_norm": 0.9747774004936218, "learning_rate": 0.0007748929881777416, "loss": 3.4446, "step": 26505 }, { "epoch": 1.8011958146487295, "grad_norm": 0.7663552165031433, "learning_rate": 0.0007748505231689088, "loss": 3.9202, "step": 26510 }, { "epoch": 1.8015355347193913, "grad_norm": 0.83504718542099, "learning_rate": 0.0007748080581600761, "loss": 3.6159, "step": 26515 }, { "epoch": 1.801875254790053, "grad_norm": 0.6961431503295898, "learning_rate": 0.0007747655931512434, "loss": 3.2866, "step": 26520 }, { "epoch": 1.8022149748607148, "grad_norm": 0.8577984571456909, "learning_rate": 0.0007747231281424106, "loss": 3.7548, "step": 26525 }, { "epoch": 1.8025546949313767, "grad_norm": 0.7688853144645691, "learning_rate": 0.000774680663133578, "loss": 3.4588, "step": 26530 }, { "epoch": 1.8028944150020383, "grad_norm": 0.9180909991264343, "learning_rate": 0.0007746381981247453, "loss": 3.785, "step": 26535 }, { "epoch": 1.8032341350727001, "grad_norm": 0.8309755325317383, "learning_rate": 0.0007745957331159125, "loss": 3.7327, "step": 26540 }, { "epoch": 1.803573855143362, "grad_norm": 0.7985311150550842, "learning_rate": 0.0007745532681070798, "loss": 3.635, "step": 26545 }, { "epoch": 1.8039135752140236, "grad_norm": 1.130068063735962, "learning_rate": 0.000774510803098247, "loss": 3.522, "step": 26550 }, { "epoch": 1.8042532952846853, "grad_norm": 0.9112940430641174, "learning_rate": 0.0007744683380894143, "loss": 3.8311, "step": 26555 }, { "epoch": 1.8045930153553473, "grad_norm": 1.0229567289352417, "learning_rate": 0.0007744258730805817, "loss": 3.7484, "step": 26560 }, { "epoch": 1.804932735426009, "grad_norm": 0.8279697895050049, "learning_rate": 0.0007743834080717489, "loss": 3.5041, "step": 26565 }, { "epoch": 1.8052724554966706, "grad_norm": 0.7603811025619507, "learning_rate": 0.0007743409430629162, "loss": 3.6527, "step": 26570 }, { "epoch": 1.8056121755673327, "grad_norm": 0.9912105202674866, "learning_rate": 0.0007742984780540835, "loss": 3.4704, "step": 26575 }, { "epoch": 1.8059518956379943, "grad_norm": 0.8260603547096252, "learning_rate": 0.0007742560130452507, "loss": 3.5106, "step": 26580 }, { "epoch": 1.806291615708656, "grad_norm": 0.7465791702270508, "learning_rate": 0.0007742135480364179, "loss": 3.5074, "step": 26585 }, { "epoch": 1.806631335779318, "grad_norm": 1.007486343383789, "learning_rate": 0.0007741710830275853, "loss": 3.6652, "step": 26590 }, { "epoch": 1.8069710558499796, "grad_norm": 0.8247768878936768, "learning_rate": 0.0007741286180187526, "loss": 3.6207, "step": 26595 }, { "epoch": 1.8073107759206413, "grad_norm": 0.9325391054153442, "learning_rate": 0.0007740861530099198, "loss": 3.7965, "step": 26600 }, { "epoch": 1.8076504959913031, "grad_norm": 1.2678993940353394, "learning_rate": 0.0007740436880010872, "loss": 3.5615, "step": 26605 }, { "epoch": 1.807990216061965, "grad_norm": 0.7268818020820618, "learning_rate": 0.0007740012229922544, "loss": 3.5981, "step": 26610 }, { "epoch": 1.8083299361326266, "grad_norm": 0.7097742557525635, "learning_rate": 0.0007739587579834216, "loss": 3.4925, "step": 26615 }, { "epoch": 1.8086696562032885, "grad_norm": 0.772057294845581, "learning_rate": 0.000773916292974589, "loss": 3.6636, "step": 26620 }, { "epoch": 1.8090093762739503, "grad_norm": 0.803548276424408, "learning_rate": 0.0007738738279657562, "loss": 3.4405, "step": 26625 }, { "epoch": 1.809349096344612, "grad_norm": 0.6985668540000916, "learning_rate": 0.0007738313629569235, "loss": 3.6018, "step": 26630 }, { "epoch": 1.8096888164152738, "grad_norm": 0.8016760349273682, "learning_rate": 0.0007737888979480909, "loss": 3.9352, "step": 26635 }, { "epoch": 1.8100285364859356, "grad_norm": 0.8703891038894653, "learning_rate": 0.0007737464329392581, "loss": 3.5596, "step": 26640 }, { "epoch": 1.8103682565565973, "grad_norm": 0.834832489490509, "learning_rate": 0.0007737039679304253, "loss": 3.6899, "step": 26645 }, { "epoch": 1.8107079766272591, "grad_norm": 0.7041547894477844, "learning_rate": 0.0007736615029215926, "loss": 3.6926, "step": 26650 }, { "epoch": 1.811047696697921, "grad_norm": 0.7675163745880127, "learning_rate": 0.0007736190379127599, "loss": 3.6088, "step": 26655 }, { "epoch": 1.8113874167685826, "grad_norm": 2.7944114208221436, "learning_rate": 0.0007735765729039271, "loss": 3.5574, "step": 26660 }, { "epoch": 1.8117271368392445, "grad_norm": 0.6680983901023865, "learning_rate": 0.0007735341078950945, "loss": 3.4886, "step": 26665 }, { "epoch": 1.8120668569099063, "grad_norm": 0.8312240839004517, "learning_rate": 0.0007734916428862618, "loss": 3.5767, "step": 26670 }, { "epoch": 1.812406576980568, "grad_norm": 0.9335630536079407, "learning_rate": 0.000773449177877429, "loss": 3.5908, "step": 26675 }, { "epoch": 1.8127462970512298, "grad_norm": 0.8477770090103149, "learning_rate": 0.0007734067128685963, "loss": 3.4267, "step": 26680 }, { "epoch": 1.8130860171218917, "grad_norm": 0.8280338644981384, "learning_rate": 0.0007733642478597635, "loss": 3.4877, "step": 26685 }, { "epoch": 1.8134257371925533, "grad_norm": 0.7250208854675293, "learning_rate": 0.0007733217828509308, "loss": 3.5619, "step": 26690 }, { "epoch": 1.8137654572632151, "grad_norm": 0.8724890947341919, "learning_rate": 0.0007732793178420981, "loss": 3.994, "step": 26695 }, { "epoch": 1.814105177333877, "grad_norm": 0.7015180587768555, "learning_rate": 0.0007732368528332654, "loss": 3.6746, "step": 26700 }, { "epoch": 1.8144448974045386, "grad_norm": 0.7386777400970459, "learning_rate": 0.0007731943878244327, "loss": 3.3649, "step": 26705 }, { "epoch": 1.8147846174752005, "grad_norm": 0.7724874019622803, "learning_rate": 0.0007731519228156, "loss": 3.7754, "step": 26710 }, { "epoch": 1.8151243375458623, "grad_norm": 1.1906942129135132, "learning_rate": 0.0007731094578067672, "loss": 3.5221, "step": 26715 }, { "epoch": 1.815464057616524, "grad_norm": 1.0118272304534912, "learning_rate": 0.0007730669927979345, "loss": 3.5368, "step": 26720 }, { "epoch": 1.8158037776871856, "grad_norm": 0.876660168170929, "learning_rate": 0.0007730245277891018, "loss": 3.6559, "step": 26725 }, { "epoch": 1.8161434977578477, "grad_norm": 0.9719272255897522, "learning_rate": 0.000772982062780269, "loss": 3.7132, "step": 26730 }, { "epoch": 1.8164832178285093, "grad_norm": 0.8586097955703735, "learning_rate": 0.0007729395977714363, "loss": 3.7356, "step": 26735 }, { "epoch": 1.816822937899171, "grad_norm": 0.8096010088920593, "learning_rate": 0.0007728971327626037, "loss": 3.2139, "step": 26740 }, { "epoch": 1.817162657969833, "grad_norm": 0.6867233514785767, "learning_rate": 0.0007728546677537709, "loss": 3.7706, "step": 26745 }, { "epoch": 1.8175023780404946, "grad_norm": 1.5170011520385742, "learning_rate": 0.0007728122027449382, "loss": 3.876, "step": 26750 }, { "epoch": 1.8178420981111563, "grad_norm": 0.6503229141235352, "learning_rate": 0.0007727697377361055, "loss": 3.6268, "step": 26755 }, { "epoch": 1.8181818181818183, "grad_norm": 1.088379144668579, "learning_rate": 0.0007727272727272727, "loss": 3.5683, "step": 26760 }, { "epoch": 1.81852153825248, "grad_norm": 0.9574974775314331, "learning_rate": 0.00077268480771844, "loss": 3.4579, "step": 26765 }, { "epoch": 1.8188612583231416, "grad_norm": 0.9170920252799988, "learning_rate": 0.0007726423427096074, "loss": 3.6672, "step": 26770 }, { "epoch": 1.8192009783938035, "grad_norm": 0.9261990189552307, "learning_rate": 0.0007725998777007746, "loss": 3.7014, "step": 26775 }, { "epoch": 1.8195406984644653, "grad_norm": 1.0048258304595947, "learning_rate": 0.0007725574126919419, "loss": 3.3778, "step": 26780 }, { "epoch": 1.819880418535127, "grad_norm": 0.7642540335655212, "learning_rate": 0.0007725149476831091, "loss": 3.5766, "step": 26785 }, { "epoch": 1.8202201386057888, "grad_norm": 1.089959740638733, "learning_rate": 0.0007724724826742764, "loss": 3.5016, "step": 26790 }, { "epoch": 1.8205598586764506, "grad_norm": 1.5794767141342163, "learning_rate": 0.0007724300176654437, "loss": 3.4705, "step": 26795 }, { "epoch": 1.8208995787471123, "grad_norm": 0.9247173070907593, "learning_rate": 0.0007723875526566109, "loss": 3.6616, "step": 26800 }, { "epoch": 1.8212392988177741, "grad_norm": 0.9356890320777893, "learning_rate": 0.0007723450876477783, "loss": 3.7823, "step": 26805 }, { "epoch": 1.821579018888436, "grad_norm": 1.1007574796676636, "learning_rate": 0.0007723026226389456, "loss": 3.6459, "step": 26810 }, { "epoch": 1.8219187389590976, "grad_norm": 0.8401316404342651, "learning_rate": 0.0007722601576301128, "loss": 3.5881, "step": 26815 }, { "epoch": 1.8222584590297595, "grad_norm": 0.7598063349723816, "learning_rate": 0.0007722176926212801, "loss": 3.4313, "step": 26820 }, { "epoch": 1.8225981791004213, "grad_norm": 0.8231205344200134, "learning_rate": 0.0007721752276124474, "loss": 3.4215, "step": 26825 }, { "epoch": 1.822937899171083, "grad_norm": 0.842429518699646, "learning_rate": 0.0007721327626036146, "loss": 3.5588, "step": 26830 }, { "epoch": 1.8232776192417448, "grad_norm": 0.843315839767456, "learning_rate": 0.0007720902975947818, "loss": 3.5675, "step": 26835 }, { "epoch": 1.8236173393124067, "grad_norm": 0.8193457126617432, "learning_rate": 0.0007720478325859493, "loss": 3.6766, "step": 26840 }, { "epoch": 1.8239570593830683, "grad_norm": 0.8713749051094055, "learning_rate": 0.0007720053675771165, "loss": 3.6384, "step": 26845 }, { "epoch": 1.8242967794537301, "grad_norm": 0.8145918846130371, "learning_rate": 0.0007719629025682837, "loss": 3.7354, "step": 26850 }, { "epoch": 1.824636499524392, "grad_norm": 0.7795513272285461, "learning_rate": 0.0007719204375594511, "loss": 3.6932, "step": 26855 }, { "epoch": 1.8249762195950536, "grad_norm": 0.874422550201416, "learning_rate": 0.0007718779725506183, "loss": 3.4648, "step": 26860 }, { "epoch": 1.8253159396657155, "grad_norm": 0.7691762447357178, "learning_rate": 0.0007718355075417855, "loss": 3.7515, "step": 26865 }, { "epoch": 1.8256556597363773, "grad_norm": 0.9575505256652832, "learning_rate": 0.0007717930425329529, "loss": 3.7314, "step": 26870 }, { "epoch": 1.825995379807039, "grad_norm": 1.2624891996383667, "learning_rate": 0.0007717505775241202, "loss": 3.9587, "step": 26875 }, { "epoch": 1.8263350998777008, "grad_norm": 0.9109483361244202, "learning_rate": 0.0007717081125152874, "loss": 3.638, "step": 26880 }, { "epoch": 1.8266748199483627, "grad_norm": 0.7420251965522766, "learning_rate": 0.0007716656475064548, "loss": 3.559, "step": 26885 }, { "epoch": 1.8270145400190243, "grad_norm": 1.038568139076233, "learning_rate": 0.000771623182497622, "loss": 3.575, "step": 26890 }, { "epoch": 1.827354260089686, "grad_norm": 0.8054929971694946, "learning_rate": 0.0007715807174887892, "loss": 3.5465, "step": 26895 }, { "epoch": 1.827693980160348, "grad_norm": 1.322400450706482, "learning_rate": 0.0007715382524799565, "loss": 3.4497, "step": 26900 }, { "epoch": 1.8280337002310096, "grad_norm": 0.9718524813652039, "learning_rate": 0.0007714957874711238, "loss": 3.5916, "step": 26905 }, { "epoch": 1.8283734203016713, "grad_norm": 0.8023597598075867, "learning_rate": 0.0007714533224622911, "loss": 3.5082, "step": 26910 }, { "epoch": 1.8287131403723333, "grad_norm": 0.9028297662734985, "learning_rate": 0.0007714108574534584, "loss": 3.7235, "step": 26915 }, { "epoch": 1.829052860442995, "grad_norm": 0.8934625387191772, "learning_rate": 0.0007713683924446257, "loss": 3.4892, "step": 26920 }, { "epoch": 1.8293925805136566, "grad_norm": 1.4386545419692993, "learning_rate": 0.0007713259274357929, "loss": 3.4851, "step": 26925 }, { "epoch": 1.8297323005843187, "grad_norm": 0.8062783479690552, "learning_rate": 0.0007712834624269602, "loss": 3.6672, "step": 26930 }, { "epoch": 1.8300720206549803, "grad_norm": 0.6392088532447815, "learning_rate": 0.0007712409974181274, "loss": 3.6979, "step": 26935 }, { "epoch": 1.830411740725642, "grad_norm": 0.7586618661880493, "learning_rate": 0.0007711985324092947, "loss": 3.5235, "step": 26940 }, { "epoch": 1.8307514607963038, "grad_norm": 0.7700725197792053, "learning_rate": 0.0007711560674004621, "loss": 3.5141, "step": 26945 }, { "epoch": 1.8310911808669657, "grad_norm": 0.9625568985939026, "learning_rate": 0.0007711136023916293, "loss": 3.3685, "step": 26950 }, { "epoch": 1.8314309009376273, "grad_norm": 1.4263578653335571, "learning_rate": 0.0007710711373827966, "loss": 3.5274, "step": 26955 }, { "epoch": 1.8317706210082891, "grad_norm": 0.6960394382476807, "learning_rate": 0.0007710286723739639, "loss": 3.6193, "step": 26960 }, { "epoch": 1.832110341078951, "grad_norm": 0.7709155082702637, "learning_rate": 0.0007709862073651311, "loss": 3.4351, "step": 26965 }, { "epoch": 1.8324500611496126, "grad_norm": 0.8902915716171265, "learning_rate": 0.0007709437423562983, "loss": 3.6356, "step": 26970 }, { "epoch": 1.8327897812202745, "grad_norm": 0.9700208306312561, "learning_rate": 0.0007709012773474657, "loss": 3.8918, "step": 26975 }, { "epoch": 1.8331295012909363, "grad_norm": 0.73200523853302, "learning_rate": 0.000770858812338633, "loss": 3.6007, "step": 26980 }, { "epoch": 1.833469221361598, "grad_norm": 1.791385293006897, "learning_rate": 0.0007708163473298002, "loss": 3.5307, "step": 26985 }, { "epoch": 1.8338089414322598, "grad_norm": 0.8827402591705322, "learning_rate": 0.0007707738823209676, "loss": 3.5124, "step": 26990 }, { "epoch": 1.8341486615029217, "grad_norm": 0.7849318385124207, "learning_rate": 0.0007707314173121348, "loss": 3.2828, "step": 26995 }, { "epoch": 1.8344883815735833, "grad_norm": 1.2764041423797607, "learning_rate": 0.000770688952303302, "loss": 3.7065, "step": 27000 }, { "epoch": 1.8348281016442451, "grad_norm": 0.892051100730896, "learning_rate": 0.0007706464872944694, "loss": 3.3017, "step": 27005 }, { "epoch": 1.835167821714907, "grad_norm": 1.0181549787521362, "learning_rate": 0.0007706040222856366, "loss": 3.524, "step": 27010 }, { "epoch": 1.8355075417855686, "grad_norm": 0.7881715297698975, "learning_rate": 0.0007705615572768039, "loss": 3.6031, "step": 27015 }, { "epoch": 1.8358472618562305, "grad_norm": 1.4318002462387085, "learning_rate": 0.0007705190922679713, "loss": 3.678, "step": 27020 }, { "epoch": 1.8361869819268923, "grad_norm": 3.092374086380005, "learning_rate": 0.0007704766272591385, "loss": 3.6537, "step": 27025 }, { "epoch": 1.836526701997554, "grad_norm": 1.129860281944275, "learning_rate": 0.0007704341622503057, "loss": 3.402, "step": 27030 }, { "epoch": 1.8368664220682158, "grad_norm": 0.622101902961731, "learning_rate": 0.000770391697241473, "loss": 3.6286, "step": 27035 }, { "epoch": 1.8372061421388777, "grad_norm": 0.7821914553642273, "learning_rate": 0.0007703492322326403, "loss": 3.2474, "step": 27040 }, { "epoch": 1.8375458622095393, "grad_norm": 0.996071457862854, "learning_rate": 0.0007703067672238075, "loss": 3.5382, "step": 27045 }, { "epoch": 1.8378855822802012, "grad_norm": 0.7692259550094604, "learning_rate": 0.0007702643022149749, "loss": 3.6814, "step": 27050 }, { "epoch": 1.838225302350863, "grad_norm": 0.9140481948852539, "learning_rate": 0.0007702218372061422, "loss": 3.566, "step": 27055 }, { "epoch": 1.8385650224215246, "grad_norm": 3.619688034057617, "learning_rate": 0.0007701793721973094, "loss": 3.4249, "step": 27060 }, { "epoch": 1.8389047424921863, "grad_norm": 0.9598721861839294, "learning_rate": 0.0007701369071884767, "loss": 3.6482, "step": 27065 }, { "epoch": 1.8392444625628483, "grad_norm": 0.8211422562599182, "learning_rate": 0.000770094442179644, "loss": 3.6153, "step": 27070 }, { "epoch": 1.83958418263351, "grad_norm": 1.1440249681472778, "learning_rate": 0.0007700519771708112, "loss": 3.6286, "step": 27075 }, { "epoch": 1.8399239027041716, "grad_norm": 0.8672448396682739, "learning_rate": 0.0007700095121619785, "loss": 3.6143, "step": 27080 }, { "epoch": 1.8402636227748337, "grad_norm": 1.1416536569595337, "learning_rate": 0.0007699670471531458, "loss": 3.5142, "step": 27085 }, { "epoch": 1.8406033428454953, "grad_norm": 1.0361298322677612, "learning_rate": 0.0007699245821443132, "loss": 3.6066, "step": 27090 }, { "epoch": 1.840943062916157, "grad_norm": 0.8177396655082703, "learning_rate": 0.0007698821171354804, "loss": 3.4067, "step": 27095 }, { "epoch": 1.841282782986819, "grad_norm": 1.0287342071533203, "learning_rate": 0.0007698396521266476, "loss": 3.5735, "step": 27100 }, { "epoch": 1.8416225030574807, "grad_norm": 0.839390754699707, "learning_rate": 0.000769797187117815, "loss": 3.5407, "step": 27105 }, { "epoch": 1.8419622231281423, "grad_norm": 0.7465947270393372, "learning_rate": 0.0007697547221089822, "loss": 3.7772, "step": 27110 }, { "epoch": 1.8423019431988041, "grad_norm": 0.911277711391449, "learning_rate": 0.0007697122571001494, "loss": 3.4055, "step": 27115 }, { "epoch": 1.842641663269466, "grad_norm": 0.940079391002655, "learning_rate": 0.0007696697920913169, "loss": 3.7562, "step": 27120 }, { "epoch": 1.8429813833401276, "grad_norm": 1.1584961414337158, "learning_rate": 0.0007696273270824841, "loss": 3.6399, "step": 27125 }, { "epoch": 1.8433211034107895, "grad_norm": 0.8440088629722595, "learning_rate": 0.0007695848620736513, "loss": 3.7136, "step": 27130 }, { "epoch": 1.8436608234814513, "grad_norm": 0.746200442314148, "learning_rate": 0.0007695423970648186, "loss": 3.6443, "step": 27135 }, { "epoch": 1.844000543552113, "grad_norm": 0.8213841915130615, "learning_rate": 0.0007694999320559859, "loss": 3.6862, "step": 27140 }, { "epoch": 1.8443402636227748, "grad_norm": 0.6806025505065918, "learning_rate": 0.0007694574670471531, "loss": 3.3967, "step": 27145 }, { "epoch": 1.8446799836934367, "grad_norm": 1.1042819023132324, "learning_rate": 0.0007694150020383205, "loss": 3.7189, "step": 27150 }, { "epoch": 1.8450197037640983, "grad_norm": 1.0057493448257446, "learning_rate": 0.0007693725370294878, "loss": 3.7434, "step": 27155 }, { "epoch": 1.8453594238347601, "grad_norm": 0.8743189573287964, "learning_rate": 0.000769330072020655, "loss": 3.5987, "step": 27160 }, { "epoch": 1.845699143905422, "grad_norm": 1.1106281280517578, "learning_rate": 0.0007692876070118223, "loss": 3.2995, "step": 27165 }, { "epoch": 1.8460388639760836, "grad_norm": 0.868264377117157, "learning_rate": 0.0007692451420029896, "loss": 3.5978, "step": 27170 }, { "epoch": 1.8463785840467455, "grad_norm": 0.8783383369445801, "learning_rate": 0.0007692026769941568, "loss": 3.8498, "step": 27175 }, { "epoch": 1.8467183041174073, "grad_norm": 1.074454426765442, "learning_rate": 0.0007691602119853241, "loss": 3.8683, "step": 27180 }, { "epoch": 1.847058024188069, "grad_norm": 1.0404958724975586, "learning_rate": 0.0007691177469764914, "loss": 3.6995, "step": 27185 }, { "epoch": 1.8473977442587308, "grad_norm": 0.853009819984436, "learning_rate": 0.0007690752819676587, "loss": 3.8654, "step": 27190 }, { "epoch": 1.8477374643293927, "grad_norm": 0.7700687646865845, "learning_rate": 0.000769032816958826, "loss": 3.8463, "step": 27195 }, { "epoch": 1.8480771844000543, "grad_norm": 0.6830011010169983, "learning_rate": 0.0007689903519499932, "loss": 3.5871, "step": 27200 }, { "epoch": 1.8484169044707162, "grad_norm": 0.6956087350845337, "learning_rate": 0.0007689478869411605, "loss": 3.8807, "step": 27205 }, { "epoch": 1.848756624541378, "grad_norm": 0.7649178504943848, "learning_rate": 0.0007689054219323278, "loss": 3.5997, "step": 27210 }, { "epoch": 1.8490963446120396, "grad_norm": 0.64471834897995, "learning_rate": 0.000768862956923495, "loss": 3.3776, "step": 27215 }, { "epoch": 1.8494360646827015, "grad_norm": 0.9000673890113831, "learning_rate": 0.0007688204919146624, "loss": 3.8378, "step": 27220 }, { "epoch": 1.8497757847533634, "grad_norm": 1.0492181777954102, "learning_rate": 0.0007687780269058297, "loss": 3.4325, "step": 27225 }, { "epoch": 1.850115504824025, "grad_norm": 1.0854588747024536, "learning_rate": 0.0007687355618969969, "loss": 3.4353, "step": 27230 }, { "epoch": 1.8504552248946866, "grad_norm": 0.7448928356170654, "learning_rate": 0.0007686930968881641, "loss": 3.6821, "step": 27235 }, { "epoch": 1.8507949449653487, "grad_norm": 0.9552714824676514, "learning_rate": 0.0007686506318793315, "loss": 3.6605, "step": 27240 }, { "epoch": 1.8511346650360103, "grad_norm": 0.8704216480255127, "learning_rate": 0.0007686081668704987, "loss": 3.6212, "step": 27245 }, { "epoch": 1.851474385106672, "grad_norm": 1.2198938131332397, "learning_rate": 0.0007685657018616659, "loss": 3.4653, "step": 27250 }, { "epoch": 1.851814105177334, "grad_norm": 1.2459924221038818, "learning_rate": 0.0007685232368528334, "loss": 3.4805, "step": 27255 }, { "epoch": 1.8521538252479957, "grad_norm": 0.7156299948692322, "learning_rate": 0.0007684807718440006, "loss": 3.6415, "step": 27260 }, { "epoch": 1.8524935453186573, "grad_norm": 1.0380462408065796, "learning_rate": 0.0007684383068351678, "loss": 3.7782, "step": 27265 }, { "epoch": 1.8528332653893194, "grad_norm": 1.0427687168121338, "learning_rate": 0.0007683958418263352, "loss": 3.677, "step": 27270 }, { "epoch": 1.853172985459981, "grad_norm": 0.9309741854667664, "learning_rate": 0.0007683533768175024, "loss": 3.8922, "step": 27275 }, { "epoch": 1.8535127055306426, "grad_norm": 0.8546794056892395, "learning_rate": 0.0007683109118086696, "loss": 3.6222, "step": 27280 }, { "epoch": 1.8538524256013045, "grad_norm": 1.0085939168930054, "learning_rate": 0.0007682684467998369, "loss": 3.6291, "step": 27285 }, { "epoch": 1.8541921456719663, "grad_norm": 0.6203987002372742, "learning_rate": 0.0007682259817910043, "loss": 3.7207, "step": 27290 }, { "epoch": 1.854531865742628, "grad_norm": 1.034547209739685, "learning_rate": 0.0007681835167821715, "loss": 3.4743, "step": 27295 }, { "epoch": 1.8548715858132898, "grad_norm": 1.2599289417266846, "learning_rate": 0.0007681410517733388, "loss": 3.6557, "step": 27300 }, { "epoch": 1.8552113058839517, "grad_norm": 0.7946342825889587, "learning_rate": 0.0007680985867645061, "loss": 3.5512, "step": 27305 }, { "epoch": 1.8555510259546133, "grad_norm": 1.008371353149414, "learning_rate": 0.0007680561217556733, "loss": 3.5165, "step": 27310 }, { "epoch": 1.8558907460252752, "grad_norm": 1.0323691368103027, "learning_rate": 0.0007680136567468406, "loss": 3.8126, "step": 27315 }, { "epoch": 1.856230466095937, "grad_norm": 0.8112845420837402, "learning_rate": 0.0007679711917380078, "loss": 3.4403, "step": 27320 }, { "epoch": 1.8565701861665986, "grad_norm": 0.8098148703575134, "learning_rate": 0.0007679287267291752, "loss": 3.4563, "step": 27325 }, { "epoch": 1.8569099062372605, "grad_norm": 0.9876975417137146, "learning_rate": 0.0007678862617203425, "loss": 3.4914, "step": 27330 }, { "epoch": 1.8572496263079223, "grad_norm": 1.1242824792861938, "learning_rate": 0.0007678437967115097, "loss": 3.5018, "step": 27335 }, { "epoch": 1.857589346378584, "grad_norm": 0.7763880491256714, "learning_rate": 0.000767801331702677, "loss": 3.481, "step": 27340 }, { "epoch": 1.8579290664492458, "grad_norm": 0.887474775314331, "learning_rate": 0.0007677588666938443, "loss": 3.4525, "step": 27345 }, { "epoch": 1.8582687865199077, "grad_norm": 1.0389174222946167, "learning_rate": 0.0007677164016850115, "loss": 3.4386, "step": 27350 }, { "epoch": 1.8586085065905693, "grad_norm": 0.8689044117927551, "learning_rate": 0.0007676739366761788, "loss": 3.6042, "step": 27355 }, { "epoch": 1.8589482266612312, "grad_norm": 0.7528037428855896, "learning_rate": 0.0007676314716673462, "loss": 3.5482, "step": 27360 }, { "epoch": 1.859287946731893, "grad_norm": 0.8691056370735168, "learning_rate": 0.0007675890066585134, "loss": 3.5505, "step": 27365 }, { "epoch": 1.8596276668025546, "grad_norm": 0.7504808306694031, "learning_rate": 0.0007675465416496806, "loss": 3.4806, "step": 27370 }, { "epoch": 1.8599673868732165, "grad_norm": 1.110742449760437, "learning_rate": 0.000767504076640848, "loss": 3.5491, "step": 27375 }, { "epoch": 1.8603071069438784, "grad_norm": 0.7816097736358643, "learning_rate": 0.0007674616116320152, "loss": 3.4334, "step": 27380 }, { "epoch": 1.86064682701454, "grad_norm": 0.9115440845489502, "learning_rate": 0.0007674191466231824, "loss": 3.5536, "step": 27385 }, { "epoch": 1.8609865470852018, "grad_norm": 0.9597583413124084, "learning_rate": 0.0007673766816143498, "loss": 3.2818, "step": 27390 }, { "epoch": 1.8613262671558637, "grad_norm": 0.7826088666915894, "learning_rate": 0.0007673342166055171, "loss": 3.7394, "step": 27395 }, { "epoch": 1.8616659872265253, "grad_norm": 0.9445761442184448, "learning_rate": 0.0007672917515966843, "loss": 3.8531, "step": 27400 }, { "epoch": 1.862005707297187, "grad_norm": 1.0347726345062256, "learning_rate": 0.0007672492865878517, "loss": 3.3727, "step": 27405 }, { "epoch": 1.862345427367849, "grad_norm": 0.9945400357246399, "learning_rate": 0.0007672068215790189, "loss": 3.8067, "step": 27410 }, { "epoch": 1.8626851474385107, "grad_norm": 0.8428159356117249, "learning_rate": 0.0007671643565701861, "loss": 3.8321, "step": 27415 }, { "epoch": 1.8630248675091723, "grad_norm": 0.8342534303665161, "learning_rate": 0.0007671218915613534, "loss": 3.7449, "step": 27420 }, { "epoch": 1.8633645875798344, "grad_norm": 0.7847055196762085, "learning_rate": 0.0007670794265525207, "loss": 3.6294, "step": 27425 }, { "epoch": 1.863704307650496, "grad_norm": 0.9415319561958313, "learning_rate": 0.0007670369615436881, "loss": 3.6916, "step": 27430 }, { "epoch": 1.8640440277211576, "grad_norm": 0.6695097088813782, "learning_rate": 0.0007669944965348553, "loss": 3.6119, "step": 27435 }, { "epoch": 1.8643837477918197, "grad_norm": 0.9680308699607849, "learning_rate": 0.0007669520315260226, "loss": 3.8666, "step": 27440 }, { "epoch": 1.8647234678624813, "grad_norm": 0.8927963376045227, "learning_rate": 0.0007669095665171899, "loss": 3.5948, "step": 27445 }, { "epoch": 1.865063187933143, "grad_norm": 0.9845446944236755, "learning_rate": 0.0007668671015083571, "loss": 3.3865, "step": 27450 }, { "epoch": 1.8654029080038048, "grad_norm": 0.9280561804771423, "learning_rate": 0.0007668246364995244, "loss": 3.4073, "step": 27455 }, { "epoch": 1.8657426280744667, "grad_norm": 0.6795822978019714, "learning_rate": 0.0007667821714906917, "loss": 3.5863, "step": 27460 }, { "epoch": 1.8660823481451283, "grad_norm": 1.4276690483093262, "learning_rate": 0.000766739706481859, "loss": 3.3912, "step": 27465 }, { "epoch": 1.8664220682157902, "grad_norm": 0.8691829442977905, "learning_rate": 0.0007666972414730262, "loss": 3.5612, "step": 27470 }, { "epoch": 1.866761788286452, "grad_norm": 0.927778422832489, "learning_rate": 0.0007666547764641936, "loss": 3.3653, "step": 27475 }, { "epoch": 1.8671015083571136, "grad_norm": 2.222081422805786, "learning_rate": 0.0007666123114553608, "loss": 3.6839, "step": 27480 }, { "epoch": 1.8674412284277755, "grad_norm": 1.2115776538848877, "learning_rate": 0.000766569846446528, "loss": 3.507, "step": 27485 }, { "epoch": 1.8677809484984373, "grad_norm": 0.9850777387619019, "learning_rate": 0.0007665273814376954, "loss": 3.482, "step": 27490 }, { "epoch": 1.868120668569099, "grad_norm": 3.00408935546875, "learning_rate": 0.0007664849164288626, "loss": 3.8213, "step": 27495 }, { "epoch": 1.8684603886397608, "grad_norm": 1.016135573387146, "learning_rate": 0.0007664424514200299, "loss": 3.4416, "step": 27500 }, { "epoch": 1.8688001087104227, "grad_norm": 0.9369787573814392, "learning_rate": 0.0007663999864111973, "loss": 3.7815, "step": 27505 }, { "epoch": 1.8691398287810843, "grad_norm": 1.1467502117156982, "learning_rate": 0.0007663575214023645, "loss": 3.3465, "step": 27510 }, { "epoch": 1.8694795488517462, "grad_norm": 0.9237188100814819, "learning_rate": 0.0007663150563935317, "loss": 3.6249, "step": 27515 }, { "epoch": 1.869819268922408, "grad_norm": 0.9433103799819946, "learning_rate": 0.000766272591384699, "loss": 3.4141, "step": 27520 }, { "epoch": 1.8701589889930696, "grad_norm": 0.7327950596809387, "learning_rate": 0.0007662301263758663, "loss": 3.503, "step": 27525 }, { "epoch": 1.8704987090637315, "grad_norm": 0.7121861577033997, "learning_rate": 0.0007661876613670335, "loss": 3.7001, "step": 27530 }, { "epoch": 1.8708384291343934, "grad_norm": 8.888404846191406, "learning_rate": 0.0007661451963582009, "loss": 3.2281, "step": 27535 }, { "epoch": 1.871178149205055, "grad_norm": 0.8614112138748169, "learning_rate": 0.0007661027313493682, "loss": 3.8553, "step": 27540 }, { "epoch": 1.8715178692757168, "grad_norm": 0.701536238193512, "learning_rate": 0.0007660602663405354, "loss": 3.5742, "step": 27545 }, { "epoch": 1.8718575893463787, "grad_norm": 0.8840484619140625, "learning_rate": 0.0007660178013317027, "loss": 3.8435, "step": 27550 }, { "epoch": 1.8721973094170403, "grad_norm": 0.8737392425537109, "learning_rate": 0.00076597533632287, "loss": 3.5616, "step": 27555 }, { "epoch": 1.8725370294877022, "grad_norm": 0.9750568866729736, "learning_rate": 0.0007659328713140372, "loss": 3.1655, "step": 27560 }, { "epoch": 1.872876749558364, "grad_norm": 1.8826932907104492, "learning_rate": 0.0007658904063052045, "loss": 3.6648, "step": 27565 }, { "epoch": 1.8732164696290257, "grad_norm": 0.9589025974273682, "learning_rate": 0.0007658479412963718, "loss": 3.4366, "step": 27570 }, { "epoch": 1.8735561896996873, "grad_norm": 0.9324145317077637, "learning_rate": 0.0007658054762875391, "loss": 3.7053, "step": 27575 }, { "epoch": 1.8738959097703494, "grad_norm": 1.2258570194244385, "learning_rate": 0.0007657630112787064, "loss": 3.7311, "step": 27580 }, { "epoch": 1.874235629841011, "grad_norm": 0.8182900547981262, "learning_rate": 0.0007657205462698736, "loss": 3.5832, "step": 27585 }, { "epoch": 1.8745753499116726, "grad_norm": 0.7030863761901855, "learning_rate": 0.0007656780812610409, "loss": 3.431, "step": 27590 }, { "epoch": 1.8749150699823347, "grad_norm": 0.8426514863967896, "learning_rate": 0.0007656356162522082, "loss": 3.4716, "step": 27595 }, { "epoch": 1.8752547900529963, "grad_norm": 2.751703977584839, "learning_rate": 0.0007655931512433754, "loss": 3.4714, "step": 27600 }, { "epoch": 1.875594510123658, "grad_norm": 2.7049918174743652, "learning_rate": 0.0007655506862345428, "loss": 3.753, "step": 27605 }, { "epoch": 1.87593423019432, "grad_norm": 0.8007530570030212, "learning_rate": 0.0007655082212257101, "loss": 3.4382, "step": 27610 }, { "epoch": 1.8762739502649817, "grad_norm": 0.7956873774528503, "learning_rate": 0.0007654657562168773, "loss": 3.7092, "step": 27615 }, { "epoch": 1.8766136703356433, "grad_norm": 0.8245481848716736, "learning_rate": 0.0007654232912080445, "loss": 3.5895, "step": 27620 }, { "epoch": 1.8769533904063052, "grad_norm": 1.086917519569397, "learning_rate": 0.0007653808261992119, "loss": 3.7614, "step": 27625 }, { "epoch": 1.877293110476967, "grad_norm": 0.7628483176231384, "learning_rate": 0.0007653383611903791, "loss": 3.5879, "step": 27630 }, { "epoch": 1.8776328305476286, "grad_norm": 0.8953080177307129, "learning_rate": 0.0007652958961815463, "loss": 3.253, "step": 27635 }, { "epoch": 1.8779725506182905, "grad_norm": 0.6786538362503052, "learning_rate": 0.0007652534311727138, "loss": 3.6148, "step": 27640 }, { "epoch": 1.8783122706889523, "grad_norm": 1.1295231580734253, "learning_rate": 0.000765210966163881, "loss": 3.3177, "step": 27645 }, { "epoch": 1.878651990759614, "grad_norm": 1.1180208921432495, "learning_rate": 0.0007651685011550482, "loss": 3.4648, "step": 27650 }, { "epoch": 1.8789917108302758, "grad_norm": 1.1307436227798462, "learning_rate": 0.0007651260361462156, "loss": 3.6143, "step": 27655 }, { "epoch": 1.8793314309009377, "grad_norm": 1.5474539995193481, "learning_rate": 0.0007650835711373828, "loss": 3.6104, "step": 27660 }, { "epoch": 1.8796711509715993, "grad_norm": 1.0800511837005615, "learning_rate": 0.00076504110612855, "loss": 3.7443, "step": 27665 }, { "epoch": 1.8800108710422612, "grad_norm": 0.7681803107261658, "learning_rate": 0.0007649986411197175, "loss": 3.5591, "step": 27670 }, { "epoch": 1.880350591112923, "grad_norm": 1.0217119455337524, "learning_rate": 0.0007649561761108847, "loss": 3.461, "step": 27675 }, { "epoch": 1.8806903111835847, "grad_norm": 0.8130105137825012, "learning_rate": 0.0007649137111020519, "loss": 3.7187, "step": 27680 }, { "epoch": 1.8810300312542465, "grad_norm": 0.7654531002044678, "learning_rate": 0.0007648712460932192, "loss": 3.58, "step": 27685 }, { "epoch": 1.8813697513249084, "grad_norm": 0.702591061592102, "learning_rate": 0.0007648287810843865, "loss": 3.568, "step": 27690 }, { "epoch": 1.88170947139557, "grad_norm": 1.335799217224121, "learning_rate": 0.0007647863160755537, "loss": 3.8125, "step": 27695 }, { "epoch": 1.8820491914662318, "grad_norm": 0.9578614234924316, "learning_rate": 0.000764743851066721, "loss": 3.4757, "step": 27700 }, { "epoch": 1.8823889115368937, "grad_norm": 0.9437412023544312, "learning_rate": 0.0007647013860578884, "loss": 3.5117, "step": 27705 }, { "epoch": 1.8827286316075553, "grad_norm": 0.9306308031082153, "learning_rate": 0.0007646589210490556, "loss": 3.5703, "step": 27710 }, { "epoch": 1.8830683516782172, "grad_norm": 0.7703170776367188, "learning_rate": 0.0007646164560402229, "loss": 3.7251, "step": 27715 }, { "epoch": 1.883408071748879, "grad_norm": 0.8097624778747559, "learning_rate": 0.0007645739910313901, "loss": 3.5606, "step": 27720 }, { "epoch": 1.8837477918195407, "grad_norm": 0.7362086176872253, "learning_rate": 0.0007645315260225574, "loss": 3.578, "step": 27725 }, { "epoch": 1.8840875118902025, "grad_norm": 1.0768812894821167, "learning_rate": 0.0007644890610137247, "loss": 3.5988, "step": 27730 }, { "epoch": 1.8844272319608644, "grad_norm": 0.8561644554138184, "learning_rate": 0.0007644465960048919, "loss": 3.6768, "step": 27735 }, { "epoch": 1.884766952031526, "grad_norm": 0.9110159277915955, "learning_rate": 0.0007644041309960593, "loss": 3.6808, "step": 27740 }, { "epoch": 1.8851066721021879, "grad_norm": 0.7943991422653198, "learning_rate": 0.0007643616659872266, "loss": 3.4954, "step": 27745 }, { "epoch": 1.8854463921728497, "grad_norm": 0.6693992614746094, "learning_rate": 0.0007643192009783938, "loss": 3.4861, "step": 27750 }, { "epoch": 1.8857861122435113, "grad_norm": 0.7504506707191467, "learning_rate": 0.000764276735969561, "loss": 3.4306, "step": 27755 }, { "epoch": 1.886125832314173, "grad_norm": 0.9541374444961548, "learning_rate": 0.0007642342709607284, "loss": 3.7398, "step": 27760 }, { "epoch": 1.886465552384835, "grad_norm": 1.605481505393982, "learning_rate": 0.0007641918059518956, "loss": 3.6045, "step": 27765 }, { "epoch": 1.8868052724554967, "grad_norm": 1.0158085823059082, "learning_rate": 0.0007641493409430629, "loss": 3.7272, "step": 27770 }, { "epoch": 1.8871449925261583, "grad_norm": 0.8868354558944702, "learning_rate": 0.0007641068759342303, "loss": 3.396, "step": 27775 }, { "epoch": 1.8874847125968204, "grad_norm": 0.6586178541183472, "learning_rate": 0.0007640644109253975, "loss": 3.4357, "step": 27780 }, { "epoch": 1.887824432667482, "grad_norm": 1.1147302389144897, "learning_rate": 0.0007640219459165648, "loss": 3.3869, "step": 27785 }, { "epoch": 1.8881641527381436, "grad_norm": 1.4214062690734863, "learning_rate": 0.0007639794809077321, "loss": 3.7593, "step": 27790 }, { "epoch": 1.8885038728088055, "grad_norm": 0.7250053286552429, "learning_rate": 0.0007639370158988993, "loss": 3.5766, "step": 27795 }, { "epoch": 1.8888435928794673, "grad_norm": 0.8354542851448059, "learning_rate": 0.0007638945508900666, "loss": 3.5954, "step": 27800 }, { "epoch": 1.889183312950129, "grad_norm": 0.7880530953407288, "learning_rate": 0.0007638520858812339, "loss": 3.5222, "step": 27805 }, { "epoch": 1.8895230330207908, "grad_norm": 2.096595287322998, "learning_rate": 0.0007638096208724012, "loss": 3.8444, "step": 27810 }, { "epoch": 1.8898627530914527, "grad_norm": 1.105875849723816, "learning_rate": 0.0007637671558635685, "loss": 3.5127, "step": 27815 }, { "epoch": 1.8902024731621143, "grad_norm": 0.8182263970375061, "learning_rate": 0.0007637246908547357, "loss": 3.5066, "step": 27820 }, { "epoch": 1.8905421932327762, "grad_norm": 1.4389373064041138, "learning_rate": 0.000763682225845903, "loss": 3.5328, "step": 27825 }, { "epoch": 1.890881913303438, "grad_norm": 0.9205893874168396, "learning_rate": 0.0007636397608370703, "loss": 3.9885, "step": 27830 }, { "epoch": 1.8912216333740997, "grad_norm": 0.9334415793418884, "learning_rate": 0.0007635972958282375, "loss": 3.7796, "step": 27835 }, { "epoch": 1.8915613534447615, "grad_norm": 0.7761504650115967, "learning_rate": 0.0007635548308194048, "loss": 3.389, "step": 27840 }, { "epoch": 1.8919010735154234, "grad_norm": 0.9125584363937378, "learning_rate": 0.0007635123658105722, "loss": 3.5199, "step": 27845 }, { "epoch": 1.892240793586085, "grad_norm": 1.073754072189331, "learning_rate": 0.0007634699008017394, "loss": 3.5102, "step": 27850 }, { "epoch": 1.8925805136567468, "grad_norm": 0.9121300578117371, "learning_rate": 0.0007634274357929067, "loss": 3.9014, "step": 27855 }, { "epoch": 1.8929202337274087, "grad_norm": 0.9460108876228333, "learning_rate": 0.000763384970784074, "loss": 3.8095, "step": 27860 }, { "epoch": 1.8932599537980703, "grad_norm": 1.0225892066955566, "learning_rate": 0.0007633425057752412, "loss": 3.4162, "step": 27865 }, { "epoch": 1.8935996738687322, "grad_norm": 0.9070990681648254, "learning_rate": 0.0007633000407664084, "loss": 3.8772, "step": 27870 }, { "epoch": 1.893939393939394, "grad_norm": 1.0600470304489136, "learning_rate": 0.0007632575757575758, "loss": 3.7199, "step": 27875 }, { "epoch": 1.8942791140100557, "grad_norm": 0.764045000076294, "learning_rate": 0.0007632151107487431, "loss": 3.6753, "step": 27880 }, { "epoch": 1.8946188340807175, "grad_norm": 0.9605132341384888, "learning_rate": 0.0007631726457399103, "loss": 3.4173, "step": 27885 }, { "epoch": 1.8949585541513794, "grad_norm": 0.6812487840652466, "learning_rate": 0.0007631301807310777, "loss": 3.5835, "step": 27890 }, { "epoch": 1.895298274222041, "grad_norm": 1.0720773935317993, "learning_rate": 0.0007630877157222449, "loss": 3.7022, "step": 27895 }, { "epoch": 1.8956379942927029, "grad_norm": 1.05709969997406, "learning_rate": 0.0007630452507134121, "loss": 3.7093, "step": 27900 }, { "epoch": 1.8959777143633647, "grad_norm": 1.2263301610946655, "learning_rate": 0.0007630027857045795, "loss": 3.3946, "step": 27905 }, { "epoch": 1.8963174344340263, "grad_norm": 0.8029810190200806, "learning_rate": 0.0007629603206957467, "loss": 3.6446, "step": 27910 }, { "epoch": 1.8966571545046882, "grad_norm": 0.8597691655158997, "learning_rate": 0.000762917855686914, "loss": 3.5462, "step": 27915 }, { "epoch": 1.89699687457535, "grad_norm": 0.8356927037239075, "learning_rate": 0.0007628753906780813, "loss": 3.5887, "step": 27920 }, { "epoch": 1.8973365946460117, "grad_norm": 1.0175747871398926, "learning_rate": 0.0007628329256692486, "loss": 3.6338, "step": 27925 }, { "epoch": 1.8976763147166733, "grad_norm": 0.7892065644264221, "learning_rate": 0.0007627904606604158, "loss": 3.6273, "step": 27930 }, { "epoch": 1.8980160347873354, "grad_norm": 0.8458698391914368, "learning_rate": 0.0007627479956515831, "loss": 3.5968, "step": 27935 }, { "epoch": 1.898355754857997, "grad_norm": 0.8552168607711792, "learning_rate": 0.0007627055306427504, "loss": 3.4676, "step": 27940 }, { "epoch": 1.8986954749286586, "grad_norm": 1.0278396606445312, "learning_rate": 0.0007626630656339176, "loss": 3.4311, "step": 27945 }, { "epoch": 1.8990351949993207, "grad_norm": 0.7547438144683838, "learning_rate": 0.000762620600625085, "loss": 3.5143, "step": 27950 }, { "epoch": 1.8993749150699824, "grad_norm": 0.7855194807052612, "learning_rate": 0.0007625781356162523, "loss": 3.3154, "step": 27955 }, { "epoch": 1.899714635140644, "grad_norm": 0.7551230192184448, "learning_rate": 0.0007625356706074195, "loss": 3.4993, "step": 27960 }, { "epoch": 1.9000543552113058, "grad_norm": 1.033882737159729, "learning_rate": 0.0007624932055985868, "loss": 3.7465, "step": 27965 }, { "epoch": 1.9003940752819677, "grad_norm": 0.7596902847290039, "learning_rate": 0.000762450740589754, "loss": 3.7756, "step": 27970 }, { "epoch": 1.9007337953526293, "grad_norm": 0.8528746366500854, "learning_rate": 0.0007624082755809213, "loss": 3.5839, "step": 27975 }, { "epoch": 1.9010735154232912, "grad_norm": 0.6484443545341492, "learning_rate": 0.0007623658105720886, "loss": 3.6108, "step": 27980 }, { "epoch": 1.901413235493953, "grad_norm": 1.2080998420715332, "learning_rate": 0.0007623233455632559, "loss": 3.906, "step": 27985 }, { "epoch": 1.9017529555646147, "grad_norm": 1.0229791402816772, "learning_rate": 0.0007622808805544232, "loss": 3.6065, "step": 27990 }, { "epoch": 1.9020926756352765, "grad_norm": 1.0935660600662231, "learning_rate": 0.0007622384155455905, "loss": 3.4556, "step": 27995 }, { "epoch": 1.9024323957059384, "grad_norm": 0.7671613097190857, "learning_rate": 0.0007621959505367577, "loss": 3.5871, "step": 28000 }, { "epoch": 1.9027721157766, "grad_norm": 0.967738687992096, "learning_rate": 0.0007621534855279249, "loss": 3.6703, "step": 28005 }, { "epoch": 1.9031118358472618, "grad_norm": 0.8478018641471863, "learning_rate": 0.0007621110205190923, "loss": 3.8198, "step": 28010 }, { "epoch": 1.9034515559179237, "grad_norm": 0.9766073226928711, "learning_rate": 0.0007620685555102595, "loss": 3.4404, "step": 28015 }, { "epoch": 1.9037912759885853, "grad_norm": 0.8470941781997681, "learning_rate": 0.0007620260905014268, "loss": 3.668, "step": 28020 }, { "epoch": 1.9041309960592472, "grad_norm": 0.9228593707084656, "learning_rate": 0.0007619836254925942, "loss": 3.6934, "step": 28025 }, { "epoch": 1.904470716129909, "grad_norm": 0.7718645930290222, "learning_rate": 0.0007619411604837614, "loss": 3.7298, "step": 28030 }, { "epoch": 1.9048104362005707, "grad_norm": 3.500680446624756, "learning_rate": 0.0007618986954749286, "loss": 3.5958, "step": 28035 }, { "epoch": 1.9051501562712325, "grad_norm": 1.4715577363967896, "learning_rate": 0.000761856230466096, "loss": 3.5392, "step": 28040 }, { "epoch": 1.9054898763418944, "grad_norm": 0.7793188095092773, "learning_rate": 0.0007618137654572632, "loss": 3.7854, "step": 28045 }, { "epoch": 1.905829596412556, "grad_norm": 0.9477581977844238, "learning_rate": 0.0007617713004484304, "loss": 3.4512, "step": 28050 }, { "epoch": 1.9061693164832179, "grad_norm": 0.9024282693862915, "learning_rate": 0.0007617288354395979, "loss": 3.5512, "step": 28055 }, { "epoch": 1.9065090365538797, "grad_norm": 0.7619509100914001, "learning_rate": 0.0007616863704307651, "loss": 3.7984, "step": 28060 }, { "epoch": 1.9068487566245413, "grad_norm": 0.7706624865531921, "learning_rate": 0.0007616439054219323, "loss": 3.3561, "step": 28065 }, { "epoch": 1.9071884766952032, "grad_norm": 0.742607831954956, "learning_rate": 0.0007616014404130996, "loss": 3.4511, "step": 28070 }, { "epoch": 1.907528196765865, "grad_norm": 1.0033817291259766, "learning_rate": 0.0007615589754042669, "loss": 3.6443, "step": 28075 }, { "epoch": 1.9078679168365267, "grad_norm": 0.7715677618980408, "learning_rate": 0.0007615165103954341, "loss": 3.5183, "step": 28080 }, { "epoch": 1.9082076369071885, "grad_norm": 3.076587200164795, "learning_rate": 0.0007614740453866014, "loss": 3.5551, "step": 28085 }, { "epoch": 1.9085473569778504, "grad_norm": 0.718163251876831, "learning_rate": 0.0007614315803777688, "loss": 3.8148, "step": 28090 }, { "epoch": 1.908887077048512, "grad_norm": 0.8399987816810608, "learning_rate": 0.000761389115368936, "loss": 3.456, "step": 28095 }, { "epoch": 1.9092267971191736, "grad_norm": 0.9357495307922363, "learning_rate": 0.0007613466503601033, "loss": 3.4895, "step": 28100 }, { "epoch": 1.9095665171898357, "grad_norm": 0.7830958962440491, "learning_rate": 0.0007613041853512705, "loss": 3.504, "step": 28105 }, { "epoch": 1.9099062372604974, "grad_norm": 0.9575322866439819, "learning_rate": 0.0007612617203424379, "loss": 3.6611, "step": 28110 }, { "epoch": 1.910245957331159, "grad_norm": 0.9187508225440979, "learning_rate": 0.0007612192553336051, "loss": 3.6058, "step": 28115 }, { "epoch": 1.910585677401821, "grad_norm": 0.7837285399436951, "learning_rate": 0.0007611767903247723, "loss": 3.7639, "step": 28120 }, { "epoch": 1.9109253974724827, "grad_norm": 0.8744210600852966, "learning_rate": 0.0007611343253159398, "loss": 3.8573, "step": 28125 }, { "epoch": 1.9112651175431443, "grad_norm": 1.0014907121658325, "learning_rate": 0.000761091860307107, "loss": 3.5586, "step": 28130 }, { "epoch": 1.9116048376138062, "grad_norm": 1.105457067489624, "learning_rate": 0.0007610493952982742, "loss": 3.4191, "step": 28135 }, { "epoch": 1.911944557684468, "grad_norm": 1.0035400390625, "learning_rate": 0.0007610069302894416, "loss": 3.9603, "step": 28140 }, { "epoch": 1.9122842777551297, "grad_norm": 0.7402544617652893, "learning_rate": 0.0007609644652806088, "loss": 3.6728, "step": 28145 }, { "epoch": 1.9126239978257915, "grad_norm": 0.7774442434310913, "learning_rate": 0.000760922000271776, "loss": 3.5254, "step": 28150 }, { "epoch": 1.9129637178964534, "grad_norm": 0.9557531476020813, "learning_rate": 0.0007608795352629433, "loss": 3.695, "step": 28155 }, { "epoch": 1.913303437967115, "grad_norm": 1.0287468433380127, "learning_rate": 0.0007608370702541107, "loss": 3.7225, "step": 28160 }, { "epoch": 1.9136431580377768, "grad_norm": 0.9337747097015381, "learning_rate": 0.0007607946052452779, "loss": 3.8231, "step": 28165 }, { "epoch": 1.9139828781084387, "grad_norm": 0.9472321271896362, "learning_rate": 0.0007607521402364452, "loss": 3.543, "step": 28170 }, { "epoch": 1.9143225981791003, "grad_norm": 1.073047161102295, "learning_rate": 0.0007607096752276125, "loss": 3.393, "step": 28175 }, { "epoch": 1.9146623182497622, "grad_norm": 0.8966087102890015, "learning_rate": 0.0007606672102187797, "loss": 3.7088, "step": 28180 }, { "epoch": 1.915002038320424, "grad_norm": 1.0451486110687256, "learning_rate": 0.000760624745209947, "loss": 3.6303, "step": 28185 }, { "epoch": 1.9153417583910857, "grad_norm": 0.9368476271629333, "learning_rate": 0.0007605822802011143, "loss": 3.5946, "step": 28190 }, { "epoch": 1.9156814784617475, "grad_norm": 0.761813759803772, "learning_rate": 0.0007605398151922816, "loss": 3.4495, "step": 28195 }, { "epoch": 1.9160211985324094, "grad_norm": 0.8372112512588501, "learning_rate": 0.0007604973501834489, "loss": 3.9955, "step": 28200 }, { "epoch": 1.916360918603071, "grad_norm": 1.0954171419143677, "learning_rate": 0.0007604548851746161, "loss": 3.6138, "step": 28205 }, { "epoch": 1.9167006386737329, "grad_norm": 0.7364549040794373, "learning_rate": 0.0007604124201657834, "loss": 3.6222, "step": 28210 }, { "epoch": 1.9170403587443947, "grad_norm": 0.8073201775550842, "learning_rate": 0.0007603699551569507, "loss": 3.4635, "step": 28215 }, { "epoch": 1.9173800788150563, "grad_norm": 0.8864088654518127, "learning_rate": 0.0007603274901481179, "loss": 3.5409, "step": 28220 }, { "epoch": 1.9177197988857182, "grad_norm": 0.9455179572105408, "learning_rate": 0.0007602850251392852, "loss": 3.3552, "step": 28225 }, { "epoch": 1.91805951895638, "grad_norm": 0.8603058457374573, "learning_rate": 0.0007602425601304526, "loss": 3.2582, "step": 28230 }, { "epoch": 1.9183992390270417, "grad_norm": 0.8076804280281067, "learning_rate": 0.0007602000951216198, "loss": 3.6138, "step": 28235 }, { "epoch": 1.9187389590977035, "grad_norm": 0.779769778251648, "learning_rate": 0.0007601576301127871, "loss": 3.6792, "step": 28240 }, { "epoch": 1.9190786791683654, "grad_norm": 1.0759003162384033, "learning_rate": 0.0007601151651039544, "loss": 3.5209, "step": 28245 }, { "epoch": 1.919418399239027, "grad_norm": 1.349061369895935, "learning_rate": 0.0007600727000951216, "loss": 3.6442, "step": 28250 }, { "epoch": 1.9197581193096889, "grad_norm": 0.8423357605934143, "learning_rate": 0.0007600302350862888, "loss": 3.746, "step": 28255 }, { "epoch": 1.9200978393803507, "grad_norm": 0.8904449343681335, "learning_rate": 0.0007599877700774563, "loss": 3.6733, "step": 28260 }, { "epoch": 1.9204375594510124, "grad_norm": 1.0457406044006348, "learning_rate": 0.0007599453050686235, "loss": 3.7641, "step": 28265 }, { "epoch": 1.920777279521674, "grad_norm": 1.1688271760940552, "learning_rate": 0.0007599028400597907, "loss": 3.721, "step": 28270 }, { "epoch": 1.921116999592336, "grad_norm": 0.8149852156639099, "learning_rate": 0.0007598603750509581, "loss": 3.6576, "step": 28275 }, { "epoch": 1.9214567196629977, "grad_norm": 0.6973206996917725, "learning_rate": 0.0007598179100421253, "loss": 3.4242, "step": 28280 }, { "epoch": 1.9217964397336593, "grad_norm": 0.947292149066925, "learning_rate": 0.0007597754450332925, "loss": 3.6933, "step": 28285 }, { "epoch": 1.9221361598043214, "grad_norm": 0.7033851146697998, "learning_rate": 0.0007597329800244599, "loss": 3.7296, "step": 28290 }, { "epoch": 1.922475879874983, "grad_norm": 0.8169011473655701, "learning_rate": 0.0007596905150156272, "loss": 3.5704, "step": 28295 }, { "epoch": 1.9228155999456447, "grad_norm": 0.811951756477356, "learning_rate": 0.0007596480500067944, "loss": 3.7214, "step": 28300 }, { "epoch": 1.9231553200163065, "grad_norm": 0.6803563833236694, "learning_rate": 0.0007596055849979618, "loss": 3.6676, "step": 28305 }, { "epoch": 1.9234950400869684, "grad_norm": 0.7913274168968201, "learning_rate": 0.000759563119989129, "loss": 3.7017, "step": 28310 }, { "epoch": 1.92383476015763, "grad_norm": 0.7437870502471924, "learning_rate": 0.0007595206549802962, "loss": 3.846, "step": 28315 }, { "epoch": 1.9241744802282919, "grad_norm": 0.8227376341819763, "learning_rate": 0.0007594781899714635, "loss": 3.6799, "step": 28320 }, { "epoch": 1.9245142002989537, "grad_norm": 0.8750634789466858, "learning_rate": 0.0007594357249626308, "loss": 3.5371, "step": 28325 }, { "epoch": 1.9248539203696153, "grad_norm": 4.627682209014893, "learning_rate": 0.0007593932599537981, "loss": 3.6174, "step": 28330 }, { "epoch": 1.9251936404402772, "grad_norm": 1.0895839929580688, "learning_rate": 0.0007593507949449654, "loss": 3.5953, "step": 28335 }, { "epoch": 1.925533360510939, "grad_norm": 0.8343336582183838, "learning_rate": 0.0007593083299361327, "loss": 3.6144, "step": 28340 }, { "epoch": 1.9258730805816007, "grad_norm": 0.8747314810752869, "learning_rate": 0.0007592658649272999, "loss": 3.5115, "step": 28345 }, { "epoch": 1.9262128006522625, "grad_norm": 0.9775891304016113, "learning_rate": 0.0007592233999184672, "loss": 3.516, "step": 28350 }, { "epoch": 1.9265525207229244, "grad_norm": 0.9417952299118042, "learning_rate": 0.0007591809349096344, "loss": 3.438, "step": 28355 }, { "epoch": 1.926892240793586, "grad_norm": 1.0056073665618896, "learning_rate": 0.0007591384699008017, "loss": 3.4382, "step": 28360 }, { "epoch": 1.9272319608642479, "grad_norm": 0.9947229623794556, "learning_rate": 0.0007590960048919691, "loss": 3.6533, "step": 28365 }, { "epoch": 1.9275716809349097, "grad_norm": 0.7735212445259094, "learning_rate": 0.0007590535398831363, "loss": 3.4921, "step": 28370 }, { "epoch": 1.9279114010055713, "grad_norm": 0.8552477955818176, "learning_rate": 0.0007590110748743036, "loss": 3.5746, "step": 28375 }, { "epoch": 1.9282511210762332, "grad_norm": 0.9084644913673401, "learning_rate": 0.0007589686098654709, "loss": 3.6247, "step": 28380 }, { "epoch": 1.928590841146895, "grad_norm": 0.7161413431167603, "learning_rate": 0.0007589261448566381, "loss": 3.3702, "step": 28385 }, { "epoch": 1.9289305612175567, "grad_norm": 2.041700839996338, "learning_rate": 0.0007588836798478053, "loss": 3.7149, "step": 28390 }, { "epoch": 1.9292702812882185, "grad_norm": 0.7780112624168396, "learning_rate": 0.0007588412148389727, "loss": 3.6575, "step": 28395 }, { "epoch": 1.9296100013588804, "grad_norm": 0.9971717596054077, "learning_rate": 0.00075879874983014, "loss": 3.7108, "step": 28400 }, { "epoch": 1.929949721429542, "grad_norm": 0.8554769158363342, "learning_rate": 0.0007587562848213072, "loss": 3.6328, "step": 28405 }, { "epoch": 1.9302894415002039, "grad_norm": 0.6781002879142761, "learning_rate": 0.0007587138198124746, "loss": 3.5295, "step": 28410 }, { "epoch": 1.9306291615708657, "grad_norm": 1.050248384475708, "learning_rate": 0.0007586713548036418, "loss": 3.2926, "step": 28415 }, { "epoch": 1.9309688816415274, "grad_norm": 0.7920815348625183, "learning_rate": 0.000758628889794809, "loss": 3.8175, "step": 28420 }, { "epoch": 1.9313086017121892, "grad_norm": 0.818256676197052, "learning_rate": 0.0007585864247859764, "loss": 3.5565, "step": 28425 }, { "epoch": 1.931648321782851, "grad_norm": 0.8688068389892578, "learning_rate": 0.0007585439597771436, "loss": 3.5658, "step": 28430 }, { "epoch": 1.9319880418535127, "grad_norm": 1.0677967071533203, "learning_rate": 0.0007585014947683109, "loss": 3.6674, "step": 28435 }, { "epoch": 1.9323277619241743, "grad_norm": 0.902638852596283, "learning_rate": 0.0007584590297594783, "loss": 3.5321, "step": 28440 }, { "epoch": 1.9326674819948364, "grad_norm": 0.7627798914909363, "learning_rate": 0.0007584165647506455, "loss": 3.5257, "step": 28445 }, { "epoch": 1.933007202065498, "grad_norm": 0.9267018437385559, "learning_rate": 0.0007583740997418128, "loss": 3.7782, "step": 28450 }, { "epoch": 1.9333469221361597, "grad_norm": 0.9510639309883118, "learning_rate": 0.00075833163473298, "loss": 3.6926, "step": 28455 }, { "epoch": 1.9336866422068217, "grad_norm": 0.6269004344940186, "learning_rate": 0.0007582891697241473, "loss": 3.7019, "step": 28460 }, { "epoch": 1.9340263622774834, "grad_norm": 0.9256649017333984, "learning_rate": 0.0007582467047153146, "loss": 3.6565, "step": 28465 }, { "epoch": 1.934366082348145, "grad_norm": 0.7546939849853516, "learning_rate": 0.0007582042397064819, "loss": 3.4047, "step": 28470 }, { "epoch": 1.9347058024188069, "grad_norm": 1.9614346027374268, "learning_rate": 0.0007581617746976492, "loss": 3.8338, "step": 28475 }, { "epoch": 1.9350455224894687, "grad_norm": 0.7255362272262573, "learning_rate": 0.0007581193096888165, "loss": 3.6765, "step": 28480 }, { "epoch": 1.9353852425601303, "grad_norm": 0.8525300621986389, "learning_rate": 0.0007580768446799837, "loss": 3.5382, "step": 28485 }, { "epoch": 1.9357249626307922, "grad_norm": 0.7200602889060974, "learning_rate": 0.000758034379671151, "loss": 3.3816, "step": 28490 }, { "epoch": 1.936064682701454, "grad_norm": 0.9942429065704346, "learning_rate": 0.0007579919146623183, "loss": 3.6353, "step": 28495 }, { "epoch": 1.9364044027721157, "grad_norm": 0.8485697507858276, "learning_rate": 0.0007579494496534855, "loss": 3.6562, "step": 28500 }, { "epoch": 1.9367441228427775, "grad_norm": 0.8071969747543335, "learning_rate": 0.0007579069846446528, "loss": 3.6914, "step": 28505 }, { "epoch": 1.9370838429134394, "grad_norm": 0.968704879283905, "learning_rate": 0.0007578645196358202, "loss": 3.4869, "step": 28510 }, { "epoch": 1.937423562984101, "grad_norm": 0.8211969137191772, "learning_rate": 0.0007578220546269874, "loss": 3.6826, "step": 28515 }, { "epoch": 1.9377632830547629, "grad_norm": 0.8874754309654236, "learning_rate": 0.0007577795896181546, "loss": 3.5212, "step": 28520 }, { "epoch": 1.9381030031254247, "grad_norm": 0.7931695580482483, "learning_rate": 0.000757737124609322, "loss": 3.6002, "step": 28525 }, { "epoch": 1.9384427231960863, "grad_norm": 0.7379376292228699, "learning_rate": 0.0007576946596004892, "loss": 3.6429, "step": 28530 }, { "epoch": 1.9387824432667482, "grad_norm": 0.8322670459747314, "learning_rate": 0.0007576521945916564, "loss": 3.6122, "step": 28535 }, { "epoch": 1.93912216333741, "grad_norm": 2.0350444316864014, "learning_rate": 0.0007576097295828239, "loss": 3.7357, "step": 28540 }, { "epoch": 1.9394618834080717, "grad_norm": 0.8767146468162537, "learning_rate": 0.0007575672645739911, "loss": 3.6049, "step": 28545 }, { "epoch": 1.9398016034787335, "grad_norm": 1.0677225589752197, "learning_rate": 0.0007575247995651583, "loss": 3.664, "step": 28550 }, { "epoch": 1.9401413235493954, "grad_norm": 0.7669855952262878, "learning_rate": 0.0007574823345563256, "loss": 3.5136, "step": 28555 }, { "epoch": 1.940481043620057, "grad_norm": 0.803428053855896, "learning_rate": 0.0007574398695474929, "loss": 3.7272, "step": 28560 }, { "epoch": 1.9408207636907189, "grad_norm": 0.7286089658737183, "learning_rate": 0.0007573974045386601, "loss": 3.5448, "step": 28565 }, { "epoch": 1.9411604837613807, "grad_norm": 0.9872887134552002, "learning_rate": 0.0007573549395298274, "loss": 3.6438, "step": 28570 }, { "epoch": 1.9415002038320424, "grad_norm": 0.8057947754859924, "learning_rate": 0.0007573124745209948, "loss": 3.4015, "step": 28575 }, { "epoch": 1.9418399239027042, "grad_norm": 1.7552804946899414, "learning_rate": 0.000757270009512162, "loss": 3.6373, "step": 28580 }, { "epoch": 1.942179643973366, "grad_norm": 0.6760085225105286, "learning_rate": 0.0007572275445033293, "loss": 3.8561, "step": 28585 }, { "epoch": 1.9425193640440277, "grad_norm": 0.9069851636886597, "learning_rate": 0.0007571850794944966, "loss": 3.638, "step": 28590 }, { "epoch": 1.9428590841146895, "grad_norm": 0.7618457674980164, "learning_rate": 0.0007571426144856638, "loss": 3.5034, "step": 28595 }, { "epoch": 1.9431988041853514, "grad_norm": 1.030716896057129, "learning_rate": 0.0007571001494768311, "loss": 3.2856, "step": 28600 }, { "epoch": 1.943538524256013, "grad_norm": 0.914871096611023, "learning_rate": 0.0007570576844679983, "loss": 3.5833, "step": 28605 }, { "epoch": 1.9438782443266747, "grad_norm": 1.038490891456604, "learning_rate": 0.0007570152194591657, "loss": 3.4763, "step": 28610 }, { "epoch": 1.9442179643973367, "grad_norm": 0.8067642450332642, "learning_rate": 0.000756972754450333, "loss": 3.5549, "step": 28615 }, { "epoch": 1.9445576844679984, "grad_norm": 1.0564671754837036, "learning_rate": 0.0007569302894415002, "loss": 3.1271, "step": 28620 }, { "epoch": 1.94489740453866, "grad_norm": 0.8853978514671326, "learning_rate": 0.0007568878244326675, "loss": 3.4998, "step": 28625 }, { "epoch": 1.945237124609322, "grad_norm": 0.7977197170257568, "learning_rate": 0.0007568453594238348, "loss": 3.5712, "step": 28630 }, { "epoch": 1.9455768446799837, "grad_norm": 2.813751697540283, "learning_rate": 0.000756802894415002, "loss": 3.5143, "step": 28635 }, { "epoch": 1.9459165647506453, "grad_norm": 0.7260422110557556, "learning_rate": 0.0007567604294061692, "loss": 3.4412, "step": 28640 }, { "epoch": 1.9462562848213072, "grad_norm": 0.8211414813995361, "learning_rate": 0.0007567179643973367, "loss": 3.5394, "step": 28645 }, { "epoch": 1.946596004891969, "grad_norm": 1.2254635095596313, "learning_rate": 0.0007566754993885039, "loss": 3.2922, "step": 28650 }, { "epoch": 1.9469357249626307, "grad_norm": 0.8842107653617859, "learning_rate": 0.0007566330343796711, "loss": 3.7405, "step": 28655 }, { "epoch": 1.9472754450332925, "grad_norm": 0.9992979168891907, "learning_rate": 0.0007565905693708385, "loss": 3.8473, "step": 28660 }, { "epoch": 1.9476151651039544, "grad_norm": 1.2302290201187134, "learning_rate": 0.0007565481043620057, "loss": 3.4311, "step": 28665 }, { "epoch": 1.947954885174616, "grad_norm": 1.0497682094573975, "learning_rate": 0.0007565056393531729, "loss": 3.5467, "step": 28670 }, { "epoch": 1.9482946052452779, "grad_norm": 1.0691750049591064, "learning_rate": 0.0007564631743443403, "loss": 3.4072, "step": 28675 }, { "epoch": 1.9486343253159397, "grad_norm": 0.7533948421478271, "learning_rate": 0.0007564207093355076, "loss": 3.792, "step": 28680 }, { "epoch": 1.9489740453866014, "grad_norm": 1.0500904321670532, "learning_rate": 0.0007563782443266748, "loss": 3.7924, "step": 28685 }, { "epoch": 1.9493137654572632, "grad_norm": 0.7273003458976746, "learning_rate": 0.0007563357793178422, "loss": 3.6146, "step": 28690 }, { "epoch": 1.949653485527925, "grad_norm": 0.7840332388877869, "learning_rate": 0.0007562933143090094, "loss": 3.6988, "step": 28695 }, { "epoch": 1.9499932055985867, "grad_norm": 0.9248551726341248, "learning_rate": 0.0007562508493001766, "loss": 3.6088, "step": 28700 }, { "epoch": 1.9503329256692485, "grad_norm": 0.8148496747016907, "learning_rate": 0.0007562083842913439, "loss": 3.516, "step": 28705 }, { "epoch": 1.9506726457399104, "grad_norm": 1.110105037689209, "learning_rate": 0.0007561659192825112, "loss": 3.8206, "step": 28710 }, { "epoch": 1.951012365810572, "grad_norm": 0.8824129104614258, "learning_rate": 0.0007561234542736785, "loss": 3.6933, "step": 28715 }, { "epoch": 1.9513520858812339, "grad_norm": 0.7973528504371643, "learning_rate": 0.0007560809892648458, "loss": 3.575, "step": 28720 }, { "epoch": 1.9516918059518957, "grad_norm": 2.1247591972351074, "learning_rate": 0.0007560385242560131, "loss": 3.6658, "step": 28725 }, { "epoch": 1.9520315260225574, "grad_norm": 1.136062502861023, "learning_rate": 0.0007559960592471803, "loss": 3.6825, "step": 28730 }, { "epoch": 1.9523712460932192, "grad_norm": 0.7387412786483765, "learning_rate": 0.0007559535942383476, "loss": 3.7793, "step": 28735 }, { "epoch": 1.952710966163881, "grad_norm": 0.7842884063720703, "learning_rate": 0.0007559111292295148, "loss": 3.7047, "step": 28740 }, { "epoch": 1.9530506862345427, "grad_norm": 0.8975210189819336, "learning_rate": 0.0007558686642206821, "loss": 3.8107, "step": 28745 }, { "epoch": 1.9533904063052046, "grad_norm": 0.7974251508712769, "learning_rate": 0.0007558261992118495, "loss": 3.5251, "step": 28750 }, { "epoch": 1.9537301263758664, "grad_norm": 0.9227864742279053, "learning_rate": 0.0007557837342030167, "loss": 3.6562, "step": 28755 }, { "epoch": 1.954069846446528, "grad_norm": 0.773226797580719, "learning_rate": 0.000755741269194184, "loss": 3.6523, "step": 28760 }, { "epoch": 1.95440956651719, "grad_norm": 1.1268770694732666, "learning_rate": 0.0007556988041853513, "loss": 3.7385, "step": 28765 }, { "epoch": 1.9547492865878517, "grad_norm": 1.0929476022720337, "learning_rate": 0.0007556563391765185, "loss": 3.7262, "step": 28770 }, { "epoch": 1.9550890066585134, "grad_norm": 0.9068654775619507, "learning_rate": 0.0007556138741676858, "loss": 3.5628, "step": 28775 }, { "epoch": 1.955428726729175, "grad_norm": 0.9386857748031616, "learning_rate": 0.0007555714091588531, "loss": 3.729, "step": 28780 }, { "epoch": 1.955768446799837, "grad_norm": 1.0286078453063965, "learning_rate": 0.0007555289441500204, "loss": 3.5653, "step": 28785 }, { "epoch": 1.9561081668704987, "grad_norm": 0.6781588792800903, "learning_rate": 0.0007554864791411878, "loss": 3.7909, "step": 28790 }, { "epoch": 1.9564478869411603, "grad_norm": 2.163759469985962, "learning_rate": 0.000755444014132355, "loss": 3.5412, "step": 28795 }, { "epoch": 1.9567876070118224, "grad_norm": 0.683273434638977, "learning_rate": 0.0007554015491235222, "loss": 3.5831, "step": 28800 }, { "epoch": 1.957127327082484, "grad_norm": 0.9136443734169006, "learning_rate": 0.0007553590841146895, "loss": 3.3184, "step": 28805 }, { "epoch": 1.9574670471531457, "grad_norm": 0.9115235209465027, "learning_rate": 0.0007553166191058568, "loss": 3.7052, "step": 28810 }, { "epoch": 1.9578067672238075, "grad_norm": 0.9279776215553284, "learning_rate": 0.000755274154097024, "loss": 3.6018, "step": 28815 }, { "epoch": 1.9581464872944694, "grad_norm": 0.8505581021308899, "learning_rate": 0.0007552316890881914, "loss": 3.6655, "step": 28820 }, { "epoch": 1.958486207365131, "grad_norm": 1.2644245624542236, "learning_rate": 0.0007551892240793587, "loss": 3.5212, "step": 28825 }, { "epoch": 1.9588259274357929, "grad_norm": 1.1435060501098633, "learning_rate": 0.0007551467590705259, "loss": 3.3801, "step": 28830 }, { "epoch": 1.9591656475064547, "grad_norm": 1.210832118988037, "learning_rate": 0.0007551042940616932, "loss": 3.41, "step": 28835 }, { "epoch": 1.9595053675771164, "grad_norm": 0.7638642191886902, "learning_rate": 0.0007550618290528604, "loss": 3.6881, "step": 28840 }, { "epoch": 1.9598450876477782, "grad_norm": 0.8216562271118164, "learning_rate": 0.0007550193640440277, "loss": 3.694, "step": 28845 }, { "epoch": 1.96018480771844, "grad_norm": 1.1245285272598267, "learning_rate": 0.0007549768990351951, "loss": 3.6368, "step": 28850 }, { "epoch": 1.9605245277891017, "grad_norm": 0.706417441368103, "learning_rate": 0.0007549344340263623, "loss": 3.5578, "step": 28855 }, { "epoch": 1.9608642478597635, "grad_norm": 0.7962143421173096, "learning_rate": 0.0007548919690175296, "loss": 3.7875, "step": 28860 }, { "epoch": 1.9612039679304254, "grad_norm": 0.6766414642333984, "learning_rate": 0.0007548495040086969, "loss": 3.6219, "step": 28865 }, { "epoch": 1.961543688001087, "grad_norm": 0.8412807583808899, "learning_rate": 0.0007548070389998641, "loss": 3.5257, "step": 28870 }, { "epoch": 1.9618834080717489, "grad_norm": 0.8144760131835938, "learning_rate": 0.0007547645739910314, "loss": 3.7681, "step": 28875 }, { "epoch": 1.9622231281424107, "grad_norm": 0.6655876636505127, "learning_rate": 0.0007547221089821987, "loss": 3.5578, "step": 28880 }, { "epoch": 1.9625628482130724, "grad_norm": 0.9093195796012878, "learning_rate": 0.000754679643973366, "loss": 3.3821, "step": 28885 }, { "epoch": 1.9629025682837342, "grad_norm": 0.6724187135696411, "learning_rate": 0.0007546371789645332, "loss": 3.5931, "step": 28890 }, { "epoch": 1.963242288354396, "grad_norm": 1.607333779335022, "learning_rate": 0.0007545947139557006, "loss": 3.5183, "step": 28895 }, { "epoch": 1.9635820084250577, "grad_norm": 0.9282097220420837, "learning_rate": 0.0007545522489468678, "loss": 3.5025, "step": 28900 }, { "epoch": 1.9639217284957196, "grad_norm": 1.0963257551193237, "learning_rate": 0.000754509783938035, "loss": 3.7237, "step": 28905 }, { "epoch": 1.9642614485663814, "grad_norm": 0.8336236476898193, "learning_rate": 0.0007544673189292024, "loss": 3.4077, "step": 28910 }, { "epoch": 1.964601168637043, "grad_norm": 0.8237790465354919, "learning_rate": 0.0007544248539203696, "loss": 3.4558, "step": 28915 }, { "epoch": 1.964940888707705, "grad_norm": 0.7438591718673706, "learning_rate": 0.0007543823889115369, "loss": 3.737, "step": 28920 }, { "epoch": 1.9652806087783667, "grad_norm": 0.8561109304428101, "learning_rate": 0.0007543399239027043, "loss": 3.529, "step": 28925 }, { "epoch": 1.9656203288490284, "grad_norm": 0.9789109826087952, "learning_rate": 0.0007542974588938715, "loss": 3.5817, "step": 28930 }, { "epoch": 1.9659600489196902, "grad_norm": 0.8005071878433228, "learning_rate": 0.0007542549938850387, "loss": 3.4739, "step": 28935 }, { "epoch": 1.966299768990352, "grad_norm": 0.8143114447593689, "learning_rate": 0.000754212528876206, "loss": 3.5272, "step": 28940 }, { "epoch": 1.9666394890610137, "grad_norm": 0.7283206582069397, "learning_rate": 0.0007541700638673733, "loss": 3.7608, "step": 28945 }, { "epoch": 1.9669792091316753, "grad_norm": 1.002816915512085, "learning_rate": 0.0007541275988585405, "loss": 3.8601, "step": 28950 }, { "epoch": 1.9673189292023374, "grad_norm": 1.4885766506195068, "learning_rate": 0.0007540851338497079, "loss": 3.4029, "step": 28955 }, { "epoch": 1.967658649272999, "grad_norm": 0.8344660997390747, "learning_rate": 0.0007540426688408752, "loss": 3.3417, "step": 28960 }, { "epoch": 1.9679983693436607, "grad_norm": 1.0546966791152954, "learning_rate": 0.0007540002038320424, "loss": 3.7401, "step": 28965 }, { "epoch": 1.9683380894143228, "grad_norm": 0.9657679200172424, "learning_rate": 0.0007539577388232097, "loss": 3.5961, "step": 28970 }, { "epoch": 1.9686778094849844, "grad_norm": 0.9433134198188782, "learning_rate": 0.000753915273814377, "loss": 3.3656, "step": 28975 }, { "epoch": 1.969017529555646, "grad_norm": 0.7718935608863831, "learning_rate": 0.0007538728088055442, "loss": 3.3715, "step": 28980 }, { "epoch": 1.9693572496263079, "grad_norm": 1.0160068273544312, "learning_rate": 0.0007538303437967115, "loss": 3.5351, "step": 28985 }, { "epoch": 1.9696969696969697, "grad_norm": 0.8678659200668335, "learning_rate": 0.0007537878787878788, "loss": 3.583, "step": 28990 }, { "epoch": 1.9700366897676314, "grad_norm": 0.7335505485534668, "learning_rate": 0.0007537454137790461, "loss": 3.343, "step": 28995 }, { "epoch": 1.9703764098382932, "grad_norm": 0.7890894412994385, "learning_rate": 0.0007537029487702134, "loss": 3.7078, "step": 29000 }, { "epoch": 1.970716129908955, "grad_norm": 0.7053489089012146, "learning_rate": 0.0007536604837613806, "loss": 3.5596, "step": 29005 }, { "epoch": 1.9710558499796167, "grad_norm": 0.7064149379730225, "learning_rate": 0.0007536180187525479, "loss": 3.6475, "step": 29010 }, { "epoch": 1.9713955700502785, "grad_norm": 0.8108725547790527, "learning_rate": 0.0007535755537437152, "loss": 3.6204, "step": 29015 }, { "epoch": 1.9717352901209404, "grad_norm": 0.8527393341064453, "learning_rate": 0.0007535330887348824, "loss": 3.6671, "step": 29020 }, { "epoch": 1.972075010191602, "grad_norm": 0.9500375390052795, "learning_rate": 0.0007534906237260498, "loss": 3.6597, "step": 29025 }, { "epoch": 1.9724147302622639, "grad_norm": 1.1384083032608032, "learning_rate": 0.0007534481587172171, "loss": 3.87, "step": 29030 }, { "epoch": 1.9727544503329257, "grad_norm": 1.4571928977966309, "learning_rate": 0.0007534056937083843, "loss": 3.5968, "step": 29035 }, { "epoch": 1.9730941704035874, "grad_norm": 0.7645004987716675, "learning_rate": 0.0007533632286995515, "loss": 3.5517, "step": 29040 }, { "epoch": 1.9734338904742492, "grad_norm": 0.9181371927261353, "learning_rate": 0.0007533207636907189, "loss": 3.5608, "step": 29045 }, { "epoch": 1.973773610544911, "grad_norm": 0.9836548566818237, "learning_rate": 0.0007532782986818861, "loss": 3.6286, "step": 29050 }, { "epoch": 1.9741133306155727, "grad_norm": 1.0330171585083008, "learning_rate": 0.0007532358336730533, "loss": 3.8315, "step": 29055 }, { "epoch": 1.9744530506862346, "grad_norm": 0.9947535395622253, "learning_rate": 0.0007531933686642208, "loss": 3.5824, "step": 29060 }, { "epoch": 1.9747927707568964, "grad_norm": 0.9524598717689514, "learning_rate": 0.000753150903655388, "loss": 3.938, "step": 29065 }, { "epoch": 1.975132490827558, "grad_norm": 0.8610302805900574, "learning_rate": 0.0007531084386465552, "loss": 3.5288, "step": 29070 }, { "epoch": 1.97547221089822, "grad_norm": 0.837296724319458, "learning_rate": 0.0007530659736377226, "loss": 3.4799, "step": 29075 }, { "epoch": 1.9758119309688817, "grad_norm": 0.9392138123512268, "learning_rate": 0.0007530235086288898, "loss": 3.4918, "step": 29080 }, { "epoch": 1.9761516510395434, "grad_norm": 0.8305526375770569, "learning_rate": 0.000752981043620057, "loss": 3.7236, "step": 29085 }, { "epoch": 1.9764913711102052, "grad_norm": 0.9105304479598999, "learning_rate": 0.0007529385786112243, "loss": 3.6312, "step": 29090 }, { "epoch": 1.976831091180867, "grad_norm": 0.752221405506134, "learning_rate": 0.0007528961136023917, "loss": 3.649, "step": 29095 }, { "epoch": 1.9771708112515287, "grad_norm": 7.136263370513916, "learning_rate": 0.0007528536485935589, "loss": 3.3271, "step": 29100 }, { "epoch": 1.9775105313221906, "grad_norm": 0.7253371477127075, "learning_rate": 0.0007528111835847262, "loss": 3.668, "step": 29105 }, { "epoch": 1.9778502513928524, "grad_norm": 0.9408615827560425, "learning_rate": 0.0007527687185758935, "loss": 3.372, "step": 29110 }, { "epoch": 1.978189971463514, "grad_norm": 0.7048061490058899, "learning_rate": 0.0007527262535670607, "loss": 3.5793, "step": 29115 }, { "epoch": 1.9785296915341757, "grad_norm": 0.8520619869232178, "learning_rate": 0.000752683788558228, "loss": 3.7079, "step": 29120 }, { "epoch": 1.9788694116048378, "grad_norm": 0.8509448766708374, "learning_rate": 0.0007526413235493952, "loss": 3.8453, "step": 29125 }, { "epoch": 1.9792091316754994, "grad_norm": 0.9482970237731934, "learning_rate": 0.0007525988585405627, "loss": 3.7508, "step": 29130 }, { "epoch": 1.979548851746161, "grad_norm": 0.7908791303634644, "learning_rate": 0.0007525563935317299, "loss": 3.6798, "step": 29135 }, { "epoch": 1.979888571816823, "grad_norm": 1.5976383686065674, "learning_rate": 0.0007525139285228971, "loss": 3.4236, "step": 29140 }, { "epoch": 1.9802282918874847, "grad_norm": 0.6679471135139465, "learning_rate": 0.0007524714635140645, "loss": 3.5702, "step": 29145 }, { "epoch": 1.9805680119581464, "grad_norm": 0.8123327493667603, "learning_rate": 0.0007524289985052317, "loss": 3.6634, "step": 29150 }, { "epoch": 1.9809077320288082, "grad_norm": 0.7589690685272217, "learning_rate": 0.0007523865334963989, "loss": 3.4967, "step": 29155 }, { "epoch": 1.98124745209947, "grad_norm": 1.0659916400909424, "learning_rate": 0.0007523440684875663, "loss": 3.4568, "step": 29160 }, { "epoch": 1.9815871721701317, "grad_norm": 0.970257580280304, "learning_rate": 0.0007523016034787336, "loss": 3.4835, "step": 29165 }, { "epoch": 1.9819268922407935, "grad_norm": 0.9269804358482361, "learning_rate": 0.0007522591384699008, "loss": 3.7128, "step": 29170 }, { "epoch": 1.9822666123114554, "grad_norm": 0.742489755153656, "learning_rate": 0.0007522166734610682, "loss": 3.8559, "step": 29175 }, { "epoch": 1.982606332382117, "grad_norm": 0.7392188906669617, "learning_rate": 0.0007521742084522354, "loss": 3.4747, "step": 29180 }, { "epoch": 1.9829460524527789, "grad_norm": 0.6585854887962341, "learning_rate": 0.0007521317434434026, "loss": 3.7298, "step": 29185 }, { "epoch": 1.9832857725234407, "grad_norm": 0.923235297203064, "learning_rate": 0.0007520892784345699, "loss": 3.7449, "step": 29190 }, { "epoch": 1.9836254925941024, "grad_norm": 0.8680022358894348, "learning_rate": 0.0007520468134257372, "loss": 3.7723, "step": 29195 }, { "epoch": 1.9839652126647642, "grad_norm": 0.8059386610984802, "learning_rate": 0.0007520043484169045, "loss": 3.8393, "step": 29200 }, { "epoch": 1.984304932735426, "grad_norm": 0.7884226441383362, "learning_rate": 0.0007519618834080718, "loss": 3.6193, "step": 29205 }, { "epoch": 1.9846446528060877, "grad_norm": 0.7910537123680115, "learning_rate": 0.0007519194183992391, "loss": 3.3136, "step": 29210 }, { "epoch": 1.9849843728767496, "grad_norm": 0.9270262718200684, "learning_rate": 0.0007518769533904063, "loss": 3.7627, "step": 29215 }, { "epoch": 1.9853240929474114, "grad_norm": 1.0346249341964722, "learning_rate": 0.0007518344883815736, "loss": 3.4026, "step": 29220 }, { "epoch": 1.985663813018073, "grad_norm": 0.8513607978820801, "learning_rate": 0.0007517920233727409, "loss": 3.7147, "step": 29225 }, { "epoch": 1.986003533088735, "grad_norm": 0.8393589854240417, "learning_rate": 0.0007517495583639081, "loss": 3.4863, "step": 29230 }, { "epoch": 1.9863432531593967, "grad_norm": 0.8679572343826294, "learning_rate": 0.0007517070933550755, "loss": 3.5135, "step": 29235 }, { "epoch": 1.9866829732300584, "grad_norm": 1.0132571458816528, "learning_rate": 0.0007516646283462427, "loss": 3.3759, "step": 29240 }, { "epoch": 1.9870226933007202, "grad_norm": 0.9760110378265381, "learning_rate": 0.00075162216333741, "loss": 3.7447, "step": 29245 }, { "epoch": 1.987362413371382, "grad_norm": 0.8186237812042236, "learning_rate": 0.0007515796983285773, "loss": 3.7374, "step": 29250 }, { "epoch": 1.9877021334420437, "grad_norm": 1.1092513799667358, "learning_rate": 0.0007515372333197445, "loss": 3.5809, "step": 29255 }, { "epoch": 1.9880418535127056, "grad_norm": 1.261192798614502, "learning_rate": 0.0007514947683109118, "loss": 3.8282, "step": 29260 }, { "epoch": 1.9883815735833674, "grad_norm": 0.9189577102661133, "learning_rate": 0.0007514523033020791, "loss": 3.4139, "step": 29265 }, { "epoch": 1.988721293654029, "grad_norm": 0.8337008357048035, "learning_rate": 0.0007514098382932464, "loss": 3.5481, "step": 29270 }, { "epoch": 1.989061013724691, "grad_norm": 1.409749150276184, "learning_rate": 0.0007513673732844137, "loss": 3.3206, "step": 29275 }, { "epoch": 1.9894007337953528, "grad_norm": 0.6449939608573914, "learning_rate": 0.000751324908275581, "loss": 3.5363, "step": 29280 }, { "epoch": 1.9897404538660144, "grad_norm": 1.1269621849060059, "learning_rate": 0.0007512824432667482, "loss": 3.5596, "step": 29285 }, { "epoch": 1.990080173936676, "grad_norm": 0.7379196286201477, "learning_rate": 0.0007512399782579154, "loss": 3.491, "step": 29290 }, { "epoch": 1.990419894007338, "grad_norm": 0.7300626635551453, "learning_rate": 0.0007511975132490828, "loss": 3.6164, "step": 29295 }, { "epoch": 1.9907596140779997, "grad_norm": 0.7448827624320984, "learning_rate": 0.00075115504824025, "loss": 3.6993, "step": 29300 }, { "epoch": 1.9910993341486614, "grad_norm": 0.8180156946182251, "learning_rate": 0.0007511125832314173, "loss": 3.5765, "step": 29305 }, { "epoch": 1.9914390542193234, "grad_norm": 0.6944757103919983, "learning_rate": 0.0007510701182225847, "loss": 3.6794, "step": 29310 }, { "epoch": 1.991778774289985, "grad_norm": 0.7398365139961243, "learning_rate": 0.0007510276532137519, "loss": 3.5284, "step": 29315 }, { "epoch": 1.9921184943606467, "grad_norm": 2.0357625484466553, "learning_rate": 0.0007509851882049191, "loss": 3.543, "step": 29320 }, { "epoch": 1.9924582144313085, "grad_norm": 0.934351921081543, "learning_rate": 0.0007509427231960865, "loss": 3.7063, "step": 29325 }, { "epoch": 1.9927979345019704, "grad_norm": 0.7739645838737488, "learning_rate": 0.0007509002581872537, "loss": 3.4061, "step": 29330 }, { "epoch": 1.993137654572632, "grad_norm": 0.8704223036766052, "learning_rate": 0.0007508577931784209, "loss": 3.539, "step": 29335 }, { "epoch": 1.9934773746432939, "grad_norm": 0.8656898736953735, "learning_rate": 0.0007508153281695883, "loss": 3.7759, "step": 29340 }, { "epoch": 1.9938170947139557, "grad_norm": 0.9884647727012634, "learning_rate": 0.0007507728631607556, "loss": 3.3741, "step": 29345 }, { "epoch": 1.9941568147846174, "grad_norm": 0.8359668850898743, "learning_rate": 0.0007507303981519228, "loss": 3.8857, "step": 29350 }, { "epoch": 1.9944965348552792, "grad_norm": 0.7502020001411438, "learning_rate": 0.0007506879331430901, "loss": 3.3919, "step": 29355 }, { "epoch": 1.994836254925941, "grad_norm": 1.1888049840927124, "learning_rate": 0.0007506454681342574, "loss": 3.7313, "step": 29360 }, { "epoch": 1.9951759749966027, "grad_norm": 0.8564573526382446, "learning_rate": 0.0007506030031254246, "loss": 3.6416, "step": 29365 }, { "epoch": 1.9955156950672646, "grad_norm": 0.8001554608345032, "learning_rate": 0.000750560538116592, "loss": 3.6559, "step": 29370 }, { "epoch": 1.9958554151379264, "grad_norm": 0.8352419137954712, "learning_rate": 0.0007505180731077593, "loss": 3.7106, "step": 29375 }, { "epoch": 1.996195135208588, "grad_norm": 0.6696701645851135, "learning_rate": 0.0007504756080989265, "loss": 3.8092, "step": 29380 }, { "epoch": 1.99653485527925, "grad_norm": 0.9772933125495911, "learning_rate": 0.0007504331430900938, "loss": 3.6293, "step": 29385 }, { "epoch": 1.9968745753499118, "grad_norm": 0.8547801375389099, "learning_rate": 0.000750390678081261, "loss": 3.4399, "step": 29390 }, { "epoch": 1.9972142954205734, "grad_norm": 0.6507989168167114, "learning_rate": 0.0007503482130724283, "loss": 3.7233, "step": 29395 }, { "epoch": 1.9975540154912352, "grad_norm": 0.8760092854499817, "learning_rate": 0.0007503057480635956, "loss": 3.7968, "step": 29400 }, { "epoch": 1.997893735561897, "grad_norm": 0.8082702159881592, "learning_rate": 0.0007502632830547629, "loss": 3.5677, "step": 29405 }, { "epoch": 1.9982334556325587, "grad_norm": 1.0632838010787964, "learning_rate": 0.0007502208180459302, "loss": 3.6244, "step": 29410 }, { "epoch": 1.9985731757032206, "grad_norm": 1.1742656230926514, "learning_rate": 0.0007501783530370975, "loss": 3.5269, "step": 29415 }, { "epoch": 1.9989128957738824, "grad_norm": 0.952635645866394, "learning_rate": 0.0007501358880282647, "loss": 3.67, "step": 29420 }, { "epoch": 1.999252615844544, "grad_norm": 1.0246108770370483, "learning_rate": 0.0007500934230194319, "loss": 3.6288, "step": 29425 }, { "epoch": 1.999592335915206, "grad_norm": 0.7565904855728149, "learning_rate": 0.0007500509580105993, "loss": 3.5684, "step": 29430 }, { "epoch": 1.9999320559858678, "grad_norm": 0.7612989544868469, "learning_rate": 0.0007500084930017665, "loss": 3.7262, "step": 29435 }, { "epoch": 2.0, "eval_bertscore": { "f1": 0.8401453708687405, "precision": 0.84065271906063, "recall": 0.8405500507271559 }, "eval_bleu_4": 0.018990813977970547, "eval_exact_match": 0.00019381723035177828, "eval_loss": 3.4735400676727295, "eval_meteor": 0.09309370200781929, "eval_rouge": { "rouge1": 0.12514640946339917, "rouge2": 0.01804564127942626, "rougeL": 0.10785387297955415, "rougeLsum": 0.10789256108330744 }, "eval_runtime": 3205.431, "eval_samples_per_second": 3.219, "eval_steps_per_second": 0.402, "step": 29436 }, { "epoch": 2.0002717760565294, "grad_norm": 0.907905638217926, "learning_rate": 0.0007499660279929338, "loss": 3.5941, "step": 29440 }, { "epoch": 2.000611496127191, "grad_norm": 0.6962242722511292, "learning_rate": 0.0007499235629841012, "loss": 3.5878, "step": 29445 }, { "epoch": 2.000951216197853, "grad_norm": 0.737462043762207, "learning_rate": 0.0007498810979752684, "loss": 3.5378, "step": 29450 }, { "epoch": 2.0012909362685147, "grad_norm": 0.798230767250061, "learning_rate": 0.0007498386329664356, "loss": 3.5264, "step": 29455 }, { "epoch": 2.0016306563391764, "grad_norm": 1.5187355279922485, "learning_rate": 0.000749796167957603, "loss": 3.4892, "step": 29460 }, { "epoch": 2.0019703764098384, "grad_norm": 0.9855580925941467, "learning_rate": 0.0007497537029487702, "loss": 3.3585, "step": 29465 }, { "epoch": 2.0023100964805, "grad_norm": 1.1158792972564697, "learning_rate": 0.0007497112379399374, "loss": 3.37, "step": 29470 }, { "epoch": 2.0026498165511617, "grad_norm": 1.0571216344833374, "learning_rate": 0.0007496687729311049, "loss": 3.4063, "step": 29475 }, { "epoch": 2.0029895366218238, "grad_norm": 1.1315616369247437, "learning_rate": 0.0007496263079222721, "loss": 3.4602, "step": 29480 }, { "epoch": 2.0033292566924854, "grad_norm": 0.920049250125885, "learning_rate": 0.0007495838429134394, "loss": 3.3883, "step": 29485 }, { "epoch": 2.003668976763147, "grad_norm": 0.8580490350723267, "learning_rate": 0.0007495413779046066, "loss": 3.6258, "step": 29490 }, { "epoch": 2.004008696833809, "grad_norm": 0.6755834221839905, "learning_rate": 0.0007494989128957739, "loss": 3.7057, "step": 29495 }, { "epoch": 2.0043484169044707, "grad_norm": 0.7505844831466675, "learning_rate": 0.0007494564478869412, "loss": 3.1538, "step": 29500 }, { "epoch": 2.0046881369751324, "grad_norm": 2.153061628341675, "learning_rate": 0.0007494139828781084, "loss": 3.5878, "step": 29505 }, { "epoch": 2.0050278570457944, "grad_norm": 0.9868181347846985, "learning_rate": 0.0007493715178692758, "loss": 3.8243, "step": 29510 }, { "epoch": 2.005367577116456, "grad_norm": 0.8526576161384583, "learning_rate": 0.0007493290528604431, "loss": 3.5244, "step": 29515 }, { "epoch": 2.0057072971871177, "grad_norm": 1.3826223611831665, "learning_rate": 0.0007492865878516103, "loss": 3.3284, "step": 29520 }, { "epoch": 2.00604701725778, "grad_norm": 1.0314799547195435, "learning_rate": 0.0007492441228427775, "loss": 3.6559, "step": 29525 }, { "epoch": 2.0063867373284414, "grad_norm": 0.8046008944511414, "learning_rate": 0.0007492016578339449, "loss": 3.6961, "step": 29530 }, { "epoch": 2.006726457399103, "grad_norm": 1.0257865190505981, "learning_rate": 0.0007491591928251121, "loss": 3.7188, "step": 29535 }, { "epoch": 2.007066177469765, "grad_norm": 0.7950069904327393, "learning_rate": 0.0007491167278162793, "loss": 3.6145, "step": 29540 }, { "epoch": 2.0074058975404268, "grad_norm": 0.964979887008667, "learning_rate": 0.0007490742628074468, "loss": 3.8685, "step": 29545 }, { "epoch": 2.0077456176110884, "grad_norm": 0.8541505932807922, "learning_rate": 0.000749031797798614, "loss": 3.1539, "step": 29550 }, { "epoch": 2.0080853376817505, "grad_norm": 0.9248681664466858, "learning_rate": 0.0007489893327897812, "loss": 3.5821, "step": 29555 }, { "epoch": 2.008425057752412, "grad_norm": 0.8942385911941528, "learning_rate": 0.0007489468677809486, "loss": 3.3341, "step": 29560 }, { "epoch": 2.0087647778230737, "grad_norm": 1.231141448020935, "learning_rate": 0.0007489044027721158, "loss": 3.4626, "step": 29565 }, { "epoch": 2.0091044978937354, "grad_norm": 0.8439732789993286, "learning_rate": 0.000748861937763283, "loss": 3.6587, "step": 29570 }, { "epoch": 2.0094442179643974, "grad_norm": 0.7009230256080627, "learning_rate": 0.0007488194727544503, "loss": 3.551, "step": 29575 }, { "epoch": 2.009783938035059, "grad_norm": 0.7958117127418518, "learning_rate": 0.0007487770077456177, "loss": 3.662, "step": 29580 }, { "epoch": 2.0101236581057207, "grad_norm": 0.6516826152801514, "learning_rate": 0.0007487345427367849, "loss": 3.5244, "step": 29585 }, { "epoch": 2.0104633781763828, "grad_norm": 0.9384609460830688, "learning_rate": 0.0007486920777279522, "loss": 3.5378, "step": 29590 }, { "epoch": 2.0108030982470444, "grad_norm": 1.3192696571350098, "learning_rate": 0.0007486496127191195, "loss": 3.5726, "step": 29595 }, { "epoch": 2.011142818317706, "grad_norm": 0.8813995122909546, "learning_rate": 0.0007486071477102867, "loss": 3.396, "step": 29600 }, { "epoch": 2.011482538388368, "grad_norm": 0.9518397450447083, "learning_rate": 0.000748564682701454, "loss": 3.393, "step": 29605 }, { "epoch": 2.0118222584590297, "grad_norm": 0.8820313811302185, "learning_rate": 0.0007485222176926213, "loss": 3.5008, "step": 29610 }, { "epoch": 2.0121619785296914, "grad_norm": 0.9841651916503906, "learning_rate": 0.0007484797526837886, "loss": 3.6952, "step": 29615 }, { "epoch": 2.0125016986003534, "grad_norm": 1.007918357849121, "learning_rate": 0.0007484372876749559, "loss": 3.6085, "step": 29620 }, { "epoch": 2.012841418671015, "grad_norm": 0.9101667404174805, "learning_rate": 0.0007483948226661231, "loss": 3.4666, "step": 29625 }, { "epoch": 2.0131811387416767, "grad_norm": 0.9427982568740845, "learning_rate": 0.0007483523576572904, "loss": 3.677, "step": 29630 }, { "epoch": 2.0135208588123388, "grad_norm": 0.7218946218490601, "learning_rate": 0.0007483098926484577, "loss": 3.6277, "step": 29635 }, { "epoch": 2.0138605788830004, "grad_norm": 0.8679907917976379, "learning_rate": 0.0007482674276396249, "loss": 3.3277, "step": 29640 }, { "epoch": 2.014200298953662, "grad_norm": 0.8475306034088135, "learning_rate": 0.0007482249626307922, "loss": 3.4709, "step": 29645 }, { "epoch": 2.014540019024324, "grad_norm": 0.7366526126861572, "learning_rate": 0.0007481824976219596, "loss": 3.5437, "step": 29650 }, { "epoch": 2.0148797390949857, "grad_norm": 1.0132291316986084, "learning_rate": 0.0007481400326131268, "loss": 3.8658, "step": 29655 }, { "epoch": 2.0152194591656474, "grad_norm": 0.7501122355461121, "learning_rate": 0.0007480975676042941, "loss": 3.7098, "step": 29660 }, { "epoch": 2.0155591792363095, "grad_norm": 0.8191317319869995, "learning_rate": 0.0007480551025954614, "loss": 3.6435, "step": 29665 }, { "epoch": 2.015898899306971, "grad_norm": 1.051823377609253, "learning_rate": 0.0007480126375866286, "loss": 3.4452, "step": 29670 }, { "epoch": 2.0162386193776327, "grad_norm": 0.9380770921707153, "learning_rate": 0.0007479701725777958, "loss": 3.1687, "step": 29675 }, { "epoch": 2.016578339448295, "grad_norm": 0.9178208708763123, "learning_rate": 0.0007479277075689632, "loss": 3.8156, "step": 29680 }, { "epoch": 2.0169180595189564, "grad_norm": 1.190659999847412, "learning_rate": 0.0007478852425601305, "loss": 3.4968, "step": 29685 }, { "epoch": 2.017257779589618, "grad_norm": 0.7861708998680115, "learning_rate": 0.0007478427775512977, "loss": 3.7399, "step": 29690 }, { "epoch": 2.01759749966028, "grad_norm": 0.6784389615058899, "learning_rate": 0.0007478003125424651, "loss": 3.7482, "step": 29695 }, { "epoch": 2.0179372197309418, "grad_norm": 0.993552029132843, "learning_rate": 0.0007477578475336323, "loss": 3.4869, "step": 29700 }, { "epoch": 2.0182769398016034, "grad_norm": 0.7340433597564697, "learning_rate": 0.0007477153825247995, "loss": 3.4401, "step": 29705 }, { "epoch": 2.0186166598722655, "grad_norm": 0.7984585165977478, "learning_rate": 0.0007476729175159669, "loss": 3.7574, "step": 29710 }, { "epoch": 2.018956379942927, "grad_norm": 0.9595564603805542, "learning_rate": 0.0007476304525071341, "loss": 3.5996, "step": 29715 }, { "epoch": 2.0192961000135887, "grad_norm": 0.8063091039657593, "learning_rate": 0.0007475879874983014, "loss": 3.378, "step": 29720 }, { "epoch": 2.0196358200842504, "grad_norm": 0.8322507739067078, "learning_rate": 0.0007475455224894687, "loss": 3.5582, "step": 29725 }, { "epoch": 2.0199755401549124, "grad_norm": 0.9427157640457153, "learning_rate": 0.000747503057480636, "loss": 3.2115, "step": 29730 }, { "epoch": 2.020315260225574, "grad_norm": 1.0460054874420166, "learning_rate": 0.0007474605924718032, "loss": 3.3804, "step": 29735 }, { "epoch": 2.0206549802962357, "grad_norm": 0.8137238621711731, "learning_rate": 0.0007474181274629705, "loss": 3.4971, "step": 29740 }, { "epoch": 2.0209947003668978, "grad_norm": 1.254673719406128, "learning_rate": 0.0007473756624541378, "loss": 3.3985, "step": 29745 }, { "epoch": 2.0213344204375594, "grad_norm": 0.778195858001709, "learning_rate": 0.000747333197445305, "loss": 3.4578, "step": 29750 }, { "epoch": 2.021674140508221, "grad_norm": 0.7506092190742493, "learning_rate": 0.0007472907324364724, "loss": 3.6048, "step": 29755 }, { "epoch": 2.022013860578883, "grad_norm": 0.7908632755279541, "learning_rate": 0.0007472482674276397, "loss": 3.701, "step": 29760 }, { "epoch": 2.0223535806495447, "grad_norm": 0.9338049292564392, "learning_rate": 0.0007472058024188069, "loss": 3.5614, "step": 29765 }, { "epoch": 2.0226933007202064, "grad_norm": 0.959753155708313, "learning_rate": 0.0007471633374099742, "loss": 3.4331, "step": 29770 }, { "epoch": 2.0230330207908684, "grad_norm": 0.8978981971740723, "learning_rate": 0.0007471208724011414, "loss": 3.4794, "step": 29775 }, { "epoch": 2.02337274086153, "grad_norm": 0.9128916263580322, "learning_rate": 0.0007470784073923087, "loss": 3.5545, "step": 29780 }, { "epoch": 2.0237124609321917, "grad_norm": 0.8868361115455627, "learning_rate": 0.000747035942383476, "loss": 3.5537, "step": 29785 }, { "epoch": 2.024052181002854, "grad_norm": 0.7643030881881714, "learning_rate": 0.0007469934773746433, "loss": 3.5983, "step": 29790 }, { "epoch": 2.0243919010735154, "grad_norm": 0.8862969875335693, "learning_rate": 0.0007469510123658106, "loss": 3.5653, "step": 29795 }, { "epoch": 2.024731621144177, "grad_norm": 0.6573250889778137, "learning_rate": 0.0007469085473569779, "loss": 3.912, "step": 29800 }, { "epoch": 2.025071341214839, "grad_norm": 0.9341059923171997, "learning_rate": 0.0007468660823481451, "loss": 3.6593, "step": 29805 }, { "epoch": 2.0254110612855007, "grad_norm": 3.6508257389068604, "learning_rate": 0.0007468236173393123, "loss": 3.6188, "step": 29810 }, { "epoch": 2.0257507813561624, "grad_norm": 0.9624465107917786, "learning_rate": 0.0007467811523304797, "loss": 3.523, "step": 29815 }, { "epoch": 2.0260905014268245, "grad_norm": 0.7539438605308533, "learning_rate": 0.0007467386873216469, "loss": 3.527, "step": 29820 }, { "epoch": 2.026430221497486, "grad_norm": 0.9194149971008301, "learning_rate": 0.0007466962223128144, "loss": 3.5303, "step": 29825 }, { "epoch": 2.0267699415681477, "grad_norm": 0.8312097191810608, "learning_rate": 0.0007466537573039816, "loss": 3.384, "step": 29830 }, { "epoch": 2.02710966163881, "grad_norm": 0.8856743574142456, "learning_rate": 0.0007466112922951488, "loss": 3.36, "step": 29835 }, { "epoch": 2.0274493817094714, "grad_norm": 1.442902684211731, "learning_rate": 0.0007465688272863161, "loss": 3.7349, "step": 29840 }, { "epoch": 2.027789101780133, "grad_norm": 0.8912723660469055, "learning_rate": 0.0007465263622774834, "loss": 3.3736, "step": 29845 }, { "epoch": 2.028128821850795, "grad_norm": 0.8849855065345764, "learning_rate": 0.0007464838972686506, "loss": 3.5301, "step": 29850 }, { "epoch": 2.0284685419214568, "grad_norm": 0.9413118958473206, "learning_rate": 0.0007464414322598179, "loss": 3.7613, "step": 29855 }, { "epoch": 2.0288082619921184, "grad_norm": 0.6635482311248779, "learning_rate": 0.0007463989672509853, "loss": 3.5251, "step": 29860 }, { "epoch": 2.0291479820627805, "grad_norm": 1.132591962814331, "learning_rate": 0.0007463565022421525, "loss": 3.7151, "step": 29865 }, { "epoch": 2.029487702133442, "grad_norm": 0.9874240756034851, "learning_rate": 0.0007463140372333198, "loss": 3.6424, "step": 29870 }, { "epoch": 2.0298274222041037, "grad_norm": 0.818653404712677, "learning_rate": 0.000746271572224487, "loss": 3.719, "step": 29875 }, { "epoch": 2.030167142274766, "grad_norm": 1.0717358589172363, "learning_rate": 0.0007462291072156543, "loss": 3.648, "step": 29880 }, { "epoch": 2.0305068623454274, "grad_norm": 0.9440126419067383, "learning_rate": 0.0007461866422068216, "loss": 3.5654, "step": 29885 }, { "epoch": 2.030846582416089, "grad_norm": 0.8054519891738892, "learning_rate": 0.0007461441771979888, "loss": 3.5355, "step": 29890 }, { "epoch": 2.031186302486751, "grad_norm": 1.1522808074951172, "learning_rate": 0.0007461017121891562, "loss": 3.2367, "step": 29895 }, { "epoch": 2.0315260225574128, "grad_norm": 1.3224278688430786, "learning_rate": 0.0007460592471803235, "loss": 3.2162, "step": 29900 }, { "epoch": 2.0318657426280744, "grad_norm": 0.816796064376831, "learning_rate": 0.0007460167821714907, "loss": 3.8512, "step": 29905 }, { "epoch": 2.032205462698736, "grad_norm": 0.7117137908935547, "learning_rate": 0.000745974317162658, "loss": 3.7982, "step": 29910 }, { "epoch": 2.032545182769398, "grad_norm": 0.8739092350006104, "learning_rate": 0.0007459318521538253, "loss": 3.4923, "step": 29915 }, { "epoch": 2.0328849028400597, "grad_norm": 0.8217427730560303, "learning_rate": 0.0007458893871449925, "loss": 3.4505, "step": 29920 }, { "epoch": 2.0332246229107214, "grad_norm": 0.9769152998924255, "learning_rate": 0.0007458469221361597, "loss": 3.6454, "step": 29925 }, { "epoch": 2.0335643429813834, "grad_norm": 0.7304091453552246, "learning_rate": 0.0007458044571273272, "loss": 3.6234, "step": 29930 }, { "epoch": 2.033904063052045, "grad_norm": 1.0225117206573486, "learning_rate": 0.0007457619921184944, "loss": 3.4735, "step": 29935 }, { "epoch": 2.0342437831227067, "grad_norm": 0.8672761917114258, "learning_rate": 0.0007457195271096616, "loss": 3.8095, "step": 29940 }, { "epoch": 2.034583503193369, "grad_norm": 0.997830867767334, "learning_rate": 0.000745677062100829, "loss": 3.5252, "step": 29945 }, { "epoch": 2.0349232232640304, "grad_norm": 0.6500917077064514, "learning_rate": 0.0007456345970919962, "loss": 3.4479, "step": 29950 }, { "epoch": 2.035262943334692, "grad_norm": 0.9575283527374268, "learning_rate": 0.0007455921320831634, "loss": 3.5827, "step": 29955 }, { "epoch": 2.035602663405354, "grad_norm": 0.7531545758247375, "learning_rate": 0.0007455496670743309, "loss": 3.5396, "step": 29960 }, { "epoch": 2.0359423834760157, "grad_norm": 2.2459850311279297, "learning_rate": 0.0007455072020654981, "loss": 3.3888, "step": 29965 }, { "epoch": 2.0362821035466774, "grad_norm": 1.1024680137634277, "learning_rate": 0.0007454647370566653, "loss": 3.6355, "step": 29970 }, { "epoch": 2.0366218236173395, "grad_norm": 0.862695038318634, "learning_rate": 0.0007454222720478326, "loss": 3.7001, "step": 29975 }, { "epoch": 2.036961543688001, "grad_norm": 0.9912177920341492, "learning_rate": 0.0007453798070389999, "loss": 3.341, "step": 29980 }, { "epoch": 2.0373012637586627, "grad_norm": 0.8192928433418274, "learning_rate": 0.0007453373420301671, "loss": 3.5358, "step": 29985 }, { "epoch": 2.037640983829325, "grad_norm": 0.8122713565826416, "learning_rate": 0.0007452948770213344, "loss": 3.7265, "step": 29990 }, { "epoch": 2.0379807038999864, "grad_norm": 1.0422576665878296, "learning_rate": 0.0007452524120125018, "loss": 3.7656, "step": 29995 }, { "epoch": 2.038320423970648, "grad_norm": 0.7285352349281311, "learning_rate": 0.000745209947003669, "loss": 3.726, "step": 30000 }, { "epoch": 2.03866014404131, "grad_norm": 0.8428540825843811, "learning_rate": 0.0007451674819948363, "loss": 3.6522, "step": 30005 }, { "epoch": 2.0389998641119718, "grad_norm": 0.8177939653396606, "learning_rate": 0.0007451250169860036, "loss": 3.8713, "step": 30010 }, { "epoch": 2.0393395841826334, "grad_norm": 0.8508073687553406, "learning_rate": 0.0007450825519771708, "loss": 3.8155, "step": 30015 }, { "epoch": 2.0396793042532955, "grad_norm": 0.7089604735374451, "learning_rate": 0.0007450400869683381, "loss": 3.6199, "step": 30020 }, { "epoch": 2.040019024323957, "grad_norm": 0.8525442481040955, "learning_rate": 0.0007449976219595053, "loss": 3.5146, "step": 30025 }, { "epoch": 2.0403587443946187, "grad_norm": 0.8071790933609009, "learning_rate": 0.0007449551569506727, "loss": 3.648, "step": 30030 }, { "epoch": 2.040698464465281, "grad_norm": 0.9341453313827515, "learning_rate": 0.00074491269194184, "loss": 3.5942, "step": 30035 }, { "epoch": 2.0410381845359424, "grad_norm": 1.0676578283309937, "learning_rate": 0.0007448702269330072, "loss": 3.5487, "step": 30040 }, { "epoch": 2.041377904606604, "grad_norm": 0.7942273616790771, "learning_rate": 0.0007448277619241745, "loss": 3.4798, "step": 30045 }, { "epoch": 2.041717624677266, "grad_norm": 1.496077060699463, "learning_rate": 0.0007447852969153418, "loss": 3.561, "step": 30050 }, { "epoch": 2.0420573447479278, "grad_norm": 0.8735402226448059, "learning_rate": 0.000744742831906509, "loss": 3.3278, "step": 30055 }, { "epoch": 2.0423970648185894, "grad_norm": 0.9960219264030457, "learning_rate": 0.0007447003668976762, "loss": 3.5946, "step": 30060 }, { "epoch": 2.042736784889251, "grad_norm": 0.8716889023780823, "learning_rate": 0.0007446579018888437, "loss": 3.5035, "step": 30065 }, { "epoch": 2.043076504959913, "grad_norm": 0.8317398428916931, "learning_rate": 0.0007446154368800109, "loss": 3.8043, "step": 30070 }, { "epoch": 2.0434162250305747, "grad_norm": 0.8044674396514893, "learning_rate": 0.0007445729718711781, "loss": 3.2921, "step": 30075 }, { "epoch": 2.0437559451012364, "grad_norm": 0.783662736415863, "learning_rate": 0.0007445305068623455, "loss": 3.4793, "step": 30080 }, { "epoch": 2.0440956651718984, "grad_norm": 0.7997136116027832, "learning_rate": 0.0007444880418535127, "loss": 3.6485, "step": 30085 }, { "epoch": 2.04443538524256, "grad_norm": 0.6637369990348816, "learning_rate": 0.0007444455768446799, "loss": 3.6048, "step": 30090 }, { "epoch": 2.0447751053132217, "grad_norm": 0.7963462471961975, "learning_rate": 0.0007444031118358473, "loss": 3.4203, "step": 30095 }, { "epoch": 2.045114825383884, "grad_norm": 1.0084829330444336, "learning_rate": 0.0007443606468270146, "loss": 3.4502, "step": 30100 }, { "epoch": 2.0454545454545454, "grad_norm": 0.8999413847923279, "learning_rate": 0.0007443181818181818, "loss": 3.6533, "step": 30105 }, { "epoch": 2.045794265525207, "grad_norm": 0.9683690071105957, "learning_rate": 0.0007442757168093492, "loss": 3.5035, "step": 30110 }, { "epoch": 2.046133985595869, "grad_norm": 0.7637821435928345, "learning_rate": 0.0007442332518005164, "loss": 3.7422, "step": 30115 }, { "epoch": 2.0464737056665308, "grad_norm": 0.7321568727493286, "learning_rate": 0.0007441907867916836, "loss": 3.5244, "step": 30120 }, { "epoch": 2.0468134257371924, "grad_norm": 0.8551080226898193, "learning_rate": 0.0007441483217828509, "loss": 3.5163, "step": 30125 }, { "epoch": 2.0471531458078545, "grad_norm": 0.7921773195266724, "learning_rate": 0.0007441058567740182, "loss": 3.4562, "step": 30130 }, { "epoch": 2.047492865878516, "grad_norm": 0.7873597145080566, "learning_rate": 0.0007440633917651855, "loss": 3.5316, "step": 30135 }, { "epoch": 2.0478325859491777, "grad_norm": 0.7279452681541443, "learning_rate": 0.0007440209267563528, "loss": 3.747, "step": 30140 }, { "epoch": 2.04817230601984, "grad_norm": 1.2161774635314941, "learning_rate": 0.0007439784617475201, "loss": 3.6942, "step": 30145 }, { "epoch": 2.0485120260905014, "grad_norm": 0.9279671907424927, "learning_rate": 0.0007439359967386873, "loss": 3.3763, "step": 30150 }, { "epoch": 2.048851746161163, "grad_norm": 0.81589275598526, "learning_rate": 0.0007438935317298546, "loss": 3.3799, "step": 30155 }, { "epoch": 2.049191466231825, "grad_norm": 0.759606122970581, "learning_rate": 0.0007438510667210218, "loss": 3.6719, "step": 30160 }, { "epoch": 2.0495311863024868, "grad_norm": 0.7813960313796997, "learning_rate": 0.0007438086017121892, "loss": 3.501, "step": 30165 }, { "epoch": 2.0498709063731484, "grad_norm": 0.862910807132721, "learning_rate": 0.0007437661367033565, "loss": 3.5236, "step": 30170 }, { "epoch": 2.0502106264438105, "grad_norm": 0.9117962718009949, "learning_rate": 0.0007437236716945237, "loss": 3.4349, "step": 30175 }, { "epoch": 2.050550346514472, "grad_norm": 0.762190043926239, "learning_rate": 0.0007436812066856911, "loss": 3.7909, "step": 30180 }, { "epoch": 2.0508900665851337, "grad_norm": 1.0550425052642822, "learning_rate": 0.0007436387416768583, "loss": 3.3549, "step": 30185 }, { "epoch": 2.051229786655796, "grad_norm": 0.7871007919311523, "learning_rate": 0.0007435962766680255, "loss": 3.3736, "step": 30190 }, { "epoch": 2.0515695067264574, "grad_norm": 0.9111835360527039, "learning_rate": 0.0007435538116591929, "loss": 3.3158, "step": 30195 }, { "epoch": 2.051909226797119, "grad_norm": 0.8797037601470947, "learning_rate": 0.0007435113466503601, "loss": 3.8605, "step": 30200 }, { "epoch": 2.052248946867781, "grad_norm": 0.648908793926239, "learning_rate": 0.0007434688816415274, "loss": 3.5761, "step": 30205 }, { "epoch": 2.0525886669384428, "grad_norm": 1.0742509365081787, "learning_rate": 0.0007434264166326948, "loss": 3.4852, "step": 30210 }, { "epoch": 2.0529283870091044, "grad_norm": 0.9244308471679688, "learning_rate": 0.000743383951623862, "loss": 3.583, "step": 30215 }, { "epoch": 2.0532681070797665, "grad_norm": 0.780324399471283, "learning_rate": 0.0007433414866150292, "loss": 3.7677, "step": 30220 }, { "epoch": 2.053607827150428, "grad_norm": 1.364317536354065, "learning_rate": 0.0007432990216061965, "loss": 3.4176, "step": 30225 }, { "epoch": 2.0539475472210897, "grad_norm": 1.0801634788513184, "learning_rate": 0.0007432565565973638, "loss": 3.315, "step": 30230 }, { "epoch": 2.054287267291752, "grad_norm": 0.7172245979309082, "learning_rate": 0.000743214091588531, "loss": 3.5615, "step": 30235 }, { "epoch": 2.0546269873624134, "grad_norm": 0.7711317539215088, "learning_rate": 0.0007431716265796984, "loss": 3.5654, "step": 30240 }, { "epoch": 2.054966707433075, "grad_norm": 1.7499786615371704, "learning_rate": 0.0007431291615708657, "loss": 3.5924, "step": 30245 }, { "epoch": 2.0553064275037367, "grad_norm": 0.8764796257019043, "learning_rate": 0.0007430866965620329, "loss": 3.4943, "step": 30250 }, { "epoch": 2.055646147574399, "grad_norm": 0.8986364006996155, "learning_rate": 0.0007430442315532002, "loss": 3.779, "step": 30255 }, { "epoch": 2.0559858676450604, "grad_norm": 0.7314947843551636, "learning_rate": 0.0007430017665443674, "loss": 3.7973, "step": 30260 }, { "epoch": 2.056325587715722, "grad_norm": 0.882265031337738, "learning_rate": 0.0007429593015355347, "loss": 3.6973, "step": 30265 }, { "epoch": 2.056665307786384, "grad_norm": 0.7728033065795898, "learning_rate": 0.000742916836526702, "loss": 3.3723, "step": 30270 }, { "epoch": 2.0570050278570458, "grad_norm": 0.8112080097198486, "learning_rate": 0.0007428743715178693, "loss": 3.6802, "step": 30275 }, { "epoch": 2.0573447479277074, "grad_norm": 0.797694742679596, "learning_rate": 0.0007428319065090366, "loss": 3.2253, "step": 30280 }, { "epoch": 2.0576844679983695, "grad_norm": 0.8730963468551636, "learning_rate": 0.0007427894415002039, "loss": 3.5329, "step": 30285 }, { "epoch": 2.058024188069031, "grad_norm": 0.7174362540245056, "learning_rate": 0.0007427469764913711, "loss": 3.6808, "step": 30290 }, { "epoch": 2.0583639081396927, "grad_norm": 1.0370514392852783, "learning_rate": 0.0007427045114825384, "loss": 3.4656, "step": 30295 }, { "epoch": 2.058703628210355, "grad_norm": 1.2400455474853516, "learning_rate": 0.0007426620464737057, "loss": 3.659, "step": 30300 }, { "epoch": 2.0590433482810164, "grad_norm": 0.905286431312561, "learning_rate": 0.0007426195814648729, "loss": 3.6574, "step": 30305 }, { "epoch": 2.059383068351678, "grad_norm": 0.9873246550559998, "learning_rate": 0.0007425771164560402, "loss": 3.5455, "step": 30310 }, { "epoch": 2.05972278842234, "grad_norm": 0.75831139087677, "learning_rate": 0.0007425346514472076, "loss": 3.5152, "step": 30315 }, { "epoch": 2.0600625084930018, "grad_norm": 0.7477074265480042, "learning_rate": 0.0007424921864383748, "loss": 3.6138, "step": 30320 }, { "epoch": 2.0604022285636634, "grad_norm": 0.7729437947273254, "learning_rate": 0.000742449721429542, "loss": 3.6884, "step": 30325 }, { "epoch": 2.0607419486343255, "grad_norm": 0.866472601890564, "learning_rate": 0.0007424072564207094, "loss": 3.3981, "step": 30330 }, { "epoch": 2.061081668704987, "grad_norm": 0.9379312992095947, "learning_rate": 0.0007423647914118766, "loss": 3.4964, "step": 30335 }, { "epoch": 2.0614213887756487, "grad_norm": 0.7489362359046936, "learning_rate": 0.0007423223264030438, "loss": 3.5504, "step": 30340 }, { "epoch": 2.061761108846311, "grad_norm": 0.8190793395042419, "learning_rate": 0.0007422798613942113, "loss": 3.4219, "step": 30345 }, { "epoch": 2.0621008289169724, "grad_norm": 0.8518457412719727, "learning_rate": 0.0007422373963853785, "loss": 3.6322, "step": 30350 }, { "epoch": 2.062440548987634, "grad_norm": 0.7793540954589844, "learning_rate": 0.0007421949313765457, "loss": 3.5534, "step": 30355 }, { "epoch": 2.062780269058296, "grad_norm": 0.9046018719673157, "learning_rate": 0.000742152466367713, "loss": 3.6531, "step": 30360 }, { "epoch": 2.0631199891289578, "grad_norm": 0.8473179340362549, "learning_rate": 0.0007421100013588803, "loss": 3.3025, "step": 30365 }, { "epoch": 2.0634597091996194, "grad_norm": 1.0683820247650146, "learning_rate": 0.0007420675363500475, "loss": 3.485, "step": 30370 }, { "epoch": 2.0637994292702815, "grad_norm": 0.9478075504302979, "learning_rate": 0.0007420250713412148, "loss": 3.3582, "step": 30375 }, { "epoch": 2.064139149340943, "grad_norm": 0.763405442237854, "learning_rate": 0.0007419826063323822, "loss": 3.4806, "step": 30380 }, { "epoch": 2.0644788694116047, "grad_norm": 0.8223350644111633, "learning_rate": 0.0007419401413235494, "loss": 3.5762, "step": 30385 }, { "epoch": 2.064818589482267, "grad_norm": 0.9755221009254456, "learning_rate": 0.0007418976763147167, "loss": 3.7148, "step": 30390 }, { "epoch": 2.0651583095529285, "grad_norm": 1.0897862911224365, "learning_rate": 0.000741855211305884, "loss": 3.6498, "step": 30395 }, { "epoch": 2.06549802962359, "grad_norm": 0.9741256237030029, "learning_rate": 0.0007418127462970512, "loss": 3.3485, "step": 30400 }, { "epoch": 2.0658377496942517, "grad_norm": 0.8971060514450073, "learning_rate": 0.0007417702812882185, "loss": 3.4101, "step": 30405 }, { "epoch": 2.066177469764914, "grad_norm": 0.7620330452919006, "learning_rate": 0.0007417278162793857, "loss": 3.6017, "step": 30410 }, { "epoch": 2.0665171898355754, "grad_norm": 0.6156169176101685, "learning_rate": 0.0007416853512705531, "loss": 3.4944, "step": 30415 }, { "epoch": 2.066856909906237, "grad_norm": 1.0007234811782837, "learning_rate": 0.0007416428862617204, "loss": 3.3781, "step": 30420 }, { "epoch": 2.067196629976899, "grad_norm": 0.7318184971809387, "learning_rate": 0.0007416004212528876, "loss": 3.5418, "step": 30425 }, { "epoch": 2.0675363500475608, "grad_norm": 0.7923423051834106, "learning_rate": 0.0007415579562440549, "loss": 3.5652, "step": 30430 }, { "epoch": 2.0678760701182224, "grad_norm": 0.612704873085022, "learning_rate": 0.0007415154912352222, "loss": 3.5636, "step": 30435 }, { "epoch": 2.0682157901888845, "grad_norm": 0.6869993805885315, "learning_rate": 0.0007414730262263894, "loss": 3.7997, "step": 30440 }, { "epoch": 2.068555510259546, "grad_norm": 1.023064374923706, "learning_rate": 0.0007414305612175566, "loss": 3.6569, "step": 30445 }, { "epoch": 2.0688952303302077, "grad_norm": 0.6793161630630493, "learning_rate": 0.0007413880962087241, "loss": 3.7399, "step": 30450 }, { "epoch": 2.06923495040087, "grad_norm": 0.8024521470069885, "learning_rate": 0.0007413456311998913, "loss": 3.3655, "step": 30455 }, { "epoch": 2.0695746704715314, "grad_norm": 0.9406973719596863, "learning_rate": 0.0007413031661910585, "loss": 3.6199, "step": 30460 }, { "epoch": 2.069914390542193, "grad_norm": 0.8953339457511902, "learning_rate": 0.0007412607011822259, "loss": 3.616, "step": 30465 }, { "epoch": 2.070254110612855, "grad_norm": 0.810542106628418, "learning_rate": 0.0007412182361733931, "loss": 3.5286, "step": 30470 }, { "epoch": 2.0705938306835168, "grad_norm": 0.8588165044784546, "learning_rate": 0.0007411757711645603, "loss": 3.3497, "step": 30475 }, { "epoch": 2.0709335507541784, "grad_norm": 2.339153289794922, "learning_rate": 0.0007411333061557277, "loss": 3.5623, "step": 30480 }, { "epoch": 2.0712732708248405, "grad_norm": 0.9562128782272339, "learning_rate": 0.000741090841146895, "loss": 3.349, "step": 30485 }, { "epoch": 2.071612990895502, "grad_norm": 0.7850987315177917, "learning_rate": 0.0007410483761380622, "loss": 3.5961, "step": 30490 }, { "epoch": 2.0719527109661637, "grad_norm": 0.8163318037986755, "learning_rate": 0.0007410059111292296, "loss": 3.6388, "step": 30495 }, { "epoch": 2.072292431036826, "grad_norm": 0.990432858467102, "learning_rate": 0.0007409634461203968, "loss": 3.2966, "step": 30500 }, { "epoch": 2.0726321511074874, "grad_norm": 0.8055406212806702, "learning_rate": 0.0007409209811115641, "loss": 3.7438, "step": 30505 }, { "epoch": 2.072971871178149, "grad_norm": 1.0218015909194946, "learning_rate": 0.0007408785161027313, "loss": 3.4548, "step": 30510 }, { "epoch": 2.073311591248811, "grad_norm": 1.0589393377304077, "learning_rate": 0.0007408360510938986, "loss": 3.4523, "step": 30515 }, { "epoch": 2.073651311319473, "grad_norm": 0.7979984283447266, "learning_rate": 0.000740793586085066, "loss": 3.5664, "step": 30520 }, { "epoch": 2.0739910313901344, "grad_norm": 1.0792994499206543, "learning_rate": 0.0007407511210762332, "loss": 3.7811, "step": 30525 }, { "epoch": 2.0743307514607965, "grad_norm": 0.8642266392707825, "learning_rate": 0.0007407086560674005, "loss": 3.5929, "step": 30530 }, { "epoch": 2.074670471531458, "grad_norm": 0.9011669754981995, "learning_rate": 0.0007406661910585678, "loss": 3.3067, "step": 30535 }, { "epoch": 2.0750101916021197, "grad_norm": 0.8261159062385559, "learning_rate": 0.000740623726049735, "loss": 3.3304, "step": 30540 }, { "epoch": 2.075349911672782, "grad_norm": 1.800051212310791, "learning_rate": 0.0007405812610409022, "loss": 3.5425, "step": 30545 }, { "epoch": 2.0756896317434435, "grad_norm": 0.7925270199775696, "learning_rate": 0.0007405387960320697, "loss": 3.5296, "step": 30550 }, { "epoch": 2.076029351814105, "grad_norm": 3.5795559883117676, "learning_rate": 0.0007404963310232369, "loss": 3.5929, "step": 30555 }, { "epoch": 2.076369071884767, "grad_norm": 0.9162275791168213, "learning_rate": 0.0007404538660144041, "loss": 3.5214, "step": 30560 }, { "epoch": 2.076708791955429, "grad_norm": 1.4265096187591553, "learning_rate": 0.0007404114010055715, "loss": 3.5892, "step": 30565 }, { "epoch": 2.0770485120260904, "grad_norm": 0.7997646927833557, "learning_rate": 0.0007403689359967387, "loss": 3.3387, "step": 30570 }, { "epoch": 2.0773882320967525, "grad_norm": 1.1077004671096802, "learning_rate": 0.0007403264709879059, "loss": 3.5577, "step": 30575 }, { "epoch": 2.077727952167414, "grad_norm": 0.9957609176635742, "learning_rate": 0.0007402840059790733, "loss": 3.613, "step": 30580 }, { "epoch": 2.0780676722380758, "grad_norm": 0.846028745174408, "learning_rate": 0.0007402415409702406, "loss": 3.4101, "step": 30585 }, { "epoch": 2.0784073923087374, "grad_norm": 0.8597121834754944, "learning_rate": 0.0007401990759614078, "loss": 3.415, "step": 30590 }, { "epoch": 2.0787471123793995, "grad_norm": 0.9149390459060669, "learning_rate": 0.0007401566109525752, "loss": 3.4515, "step": 30595 }, { "epoch": 2.079086832450061, "grad_norm": 0.9115296006202698, "learning_rate": 0.0007401141459437424, "loss": 3.6031, "step": 30600 }, { "epoch": 2.0794265525207227, "grad_norm": 0.8619689345359802, "learning_rate": 0.0007400716809349096, "loss": 3.7249, "step": 30605 }, { "epoch": 2.079766272591385, "grad_norm": 0.8290433287620544, "learning_rate": 0.0007400292159260769, "loss": 3.7367, "step": 30610 }, { "epoch": 2.0801059926620464, "grad_norm": 1.5585417747497559, "learning_rate": 0.0007399867509172442, "loss": 3.5775, "step": 30615 }, { "epoch": 2.080445712732708, "grad_norm": 0.9745950698852539, "learning_rate": 0.0007399442859084115, "loss": 3.3943, "step": 30620 }, { "epoch": 2.08078543280337, "grad_norm": 0.7606522440910339, "learning_rate": 0.0007399018208995788, "loss": 3.6931, "step": 30625 }, { "epoch": 2.0811251528740318, "grad_norm": 0.8307530283927917, "learning_rate": 0.0007398593558907461, "loss": 3.5827, "step": 30630 }, { "epoch": 2.0814648729446934, "grad_norm": 0.9678487181663513, "learning_rate": 0.0007398168908819133, "loss": 3.5312, "step": 30635 }, { "epoch": 2.0818045930153555, "grad_norm": 0.9765388369560242, "learning_rate": 0.0007397744258730806, "loss": 3.5896, "step": 30640 }, { "epoch": 2.082144313086017, "grad_norm": 0.997481107711792, "learning_rate": 0.0007397319608642479, "loss": 3.9057, "step": 30645 }, { "epoch": 2.0824840331566787, "grad_norm": 0.8829625248908997, "learning_rate": 0.0007396894958554151, "loss": 3.7211, "step": 30650 }, { "epoch": 2.082823753227341, "grad_norm": 0.8595649003982544, "learning_rate": 0.0007396470308465825, "loss": 3.7974, "step": 30655 }, { "epoch": 2.0831634732980024, "grad_norm": 1.0464352369308472, "learning_rate": 0.0007396045658377497, "loss": 3.6111, "step": 30660 }, { "epoch": 2.083503193368664, "grad_norm": 0.8240364193916321, "learning_rate": 0.000739562100828917, "loss": 3.4898, "step": 30665 }, { "epoch": 2.083842913439326, "grad_norm": 1.0037145614624023, "learning_rate": 0.0007395196358200843, "loss": 3.5247, "step": 30670 }, { "epoch": 2.084182633509988, "grad_norm": 0.8251913785934448, "learning_rate": 0.0007394771708112515, "loss": 3.5309, "step": 30675 }, { "epoch": 2.0845223535806494, "grad_norm": 0.8879213929176331, "learning_rate": 0.0007394347058024188, "loss": 3.4845, "step": 30680 }, { "epoch": 2.0848620736513115, "grad_norm": 1.0858031511306763, "learning_rate": 0.0007393922407935861, "loss": 3.6329, "step": 30685 }, { "epoch": 2.085201793721973, "grad_norm": 1.2666829824447632, "learning_rate": 0.0007393497757847534, "loss": 3.3307, "step": 30690 }, { "epoch": 2.0855415137926347, "grad_norm": 0.9408236742019653, "learning_rate": 0.0007393073107759207, "loss": 3.5853, "step": 30695 }, { "epoch": 2.085881233863297, "grad_norm": 0.919680655002594, "learning_rate": 0.000739264845767088, "loss": 3.3659, "step": 30700 }, { "epoch": 2.0862209539339585, "grad_norm": 0.7047791481018066, "learning_rate": 0.0007392223807582552, "loss": 3.5284, "step": 30705 }, { "epoch": 2.08656067400462, "grad_norm": 1.0470675230026245, "learning_rate": 0.0007391799157494224, "loss": 3.5477, "step": 30710 }, { "epoch": 2.086900394075282, "grad_norm": 0.9449440240859985, "learning_rate": 0.0007391374507405898, "loss": 3.5362, "step": 30715 }, { "epoch": 2.087240114145944, "grad_norm": 0.785474956035614, "learning_rate": 0.000739094985731757, "loss": 3.6538, "step": 30720 }, { "epoch": 2.0875798342166054, "grad_norm": 0.7510668635368347, "learning_rate": 0.0007390525207229243, "loss": 3.8347, "step": 30725 }, { "epoch": 2.0879195542872675, "grad_norm": 1.516573190689087, "learning_rate": 0.0007390100557140917, "loss": 3.6469, "step": 30730 }, { "epoch": 2.088259274357929, "grad_norm": 1.1345758438110352, "learning_rate": 0.0007389675907052589, "loss": 3.5847, "step": 30735 }, { "epoch": 2.0885989944285908, "grad_norm": 0.6628506779670715, "learning_rate": 0.0007389251256964261, "loss": 3.5002, "step": 30740 }, { "epoch": 2.0889387144992524, "grad_norm": 0.8574951887130737, "learning_rate": 0.0007388826606875935, "loss": 3.5501, "step": 30745 }, { "epoch": 2.0892784345699145, "grad_norm": 0.8082968592643738, "learning_rate": 0.0007388401956787607, "loss": 3.6739, "step": 30750 }, { "epoch": 2.089618154640576, "grad_norm": 0.7693761587142944, "learning_rate": 0.0007387977306699279, "loss": 3.6925, "step": 30755 }, { "epoch": 2.0899578747112377, "grad_norm": 0.894877016544342, "learning_rate": 0.0007387552656610953, "loss": 3.4466, "step": 30760 }, { "epoch": 2.0902975947819, "grad_norm": 0.6918829679489136, "learning_rate": 0.0007387128006522626, "loss": 4.0783, "step": 30765 }, { "epoch": 2.0906373148525614, "grad_norm": 0.6671911478042603, "learning_rate": 0.0007386703356434298, "loss": 3.6155, "step": 30770 }, { "epoch": 2.090977034923223, "grad_norm": 0.7401109933853149, "learning_rate": 0.0007386278706345971, "loss": 3.455, "step": 30775 }, { "epoch": 2.091316754993885, "grad_norm": 0.6841751933097839, "learning_rate": 0.0007385854056257644, "loss": 3.4967, "step": 30780 }, { "epoch": 2.0916564750645468, "grad_norm": 0.9585409760475159, "learning_rate": 0.0007385429406169316, "loss": 3.6054, "step": 30785 }, { "epoch": 2.0919961951352084, "grad_norm": 0.7005188465118408, "learning_rate": 0.0007385004756080989, "loss": 3.6842, "step": 30790 }, { "epoch": 2.0923359152058705, "grad_norm": 1.0339319705963135, "learning_rate": 0.0007384580105992663, "loss": 3.6938, "step": 30795 }, { "epoch": 2.092675635276532, "grad_norm": 0.67942214012146, "learning_rate": 0.0007384155455904335, "loss": 3.5447, "step": 30800 }, { "epoch": 2.0930153553471937, "grad_norm": 1.498030185699463, "learning_rate": 0.0007383730805816008, "loss": 3.4657, "step": 30805 }, { "epoch": 2.093355075417856, "grad_norm": 0.8941583037376404, "learning_rate": 0.000738330615572768, "loss": 3.5493, "step": 30810 }, { "epoch": 2.0936947954885174, "grad_norm": 0.8852878212928772, "learning_rate": 0.0007382881505639353, "loss": 3.6949, "step": 30815 }, { "epoch": 2.094034515559179, "grad_norm": 1.6694402694702148, "learning_rate": 0.0007382456855551026, "loss": 3.4862, "step": 30820 }, { "epoch": 2.094374235629841, "grad_norm": 0.9616932272911072, "learning_rate": 0.0007382032205462698, "loss": 3.5881, "step": 30825 }, { "epoch": 2.094713955700503, "grad_norm": 0.9270486831665039, "learning_rate": 0.0007381607555374372, "loss": 3.6069, "step": 30830 }, { "epoch": 2.0950536757711644, "grad_norm": 0.8870041966438293, "learning_rate": 0.0007381182905286045, "loss": 3.5657, "step": 30835 }, { "epoch": 2.0953933958418265, "grad_norm": 0.869133710861206, "learning_rate": 0.0007380758255197717, "loss": 3.5809, "step": 30840 }, { "epoch": 2.095733115912488, "grad_norm": 1.1645175218582153, "learning_rate": 0.000738033360510939, "loss": 3.7026, "step": 30845 }, { "epoch": 2.0960728359831498, "grad_norm": 0.6898667216300964, "learning_rate": 0.0007379908955021063, "loss": 3.825, "step": 30850 }, { "epoch": 2.096412556053812, "grad_norm": 0.8482049107551575, "learning_rate": 0.0007379484304932735, "loss": 3.5534, "step": 30855 }, { "epoch": 2.0967522761244735, "grad_norm": 0.8328823447227478, "learning_rate": 0.0007379059654844408, "loss": 3.6489, "step": 30860 }, { "epoch": 2.097091996195135, "grad_norm": 1.1751227378845215, "learning_rate": 0.0007378635004756082, "loss": 3.2842, "step": 30865 }, { "epoch": 2.097431716265797, "grad_norm": 0.8591613173484802, "learning_rate": 0.0007378210354667754, "loss": 3.7298, "step": 30870 }, { "epoch": 2.097771436336459, "grad_norm": 0.8904399275779724, "learning_rate": 0.0007377785704579427, "loss": 3.5831, "step": 30875 }, { "epoch": 2.0981111564071204, "grad_norm": 1.0145186185836792, "learning_rate": 0.00073773610544911, "loss": 3.4177, "step": 30880 }, { "epoch": 2.0984508764777825, "grad_norm": 1.009313702583313, "learning_rate": 0.0007376936404402772, "loss": 3.7288, "step": 30885 }, { "epoch": 2.098790596548444, "grad_norm": 0.8114494681358337, "learning_rate": 0.0007376511754314445, "loss": 3.7062, "step": 30890 }, { "epoch": 2.0991303166191058, "grad_norm": 1.1027445793151855, "learning_rate": 0.0007376087104226117, "loss": 3.4822, "step": 30895 }, { "epoch": 2.099470036689768, "grad_norm": 0.8629288673400879, "learning_rate": 0.0007375662454137791, "loss": 3.5784, "step": 30900 }, { "epoch": 2.0998097567604295, "grad_norm": 2.1976678371429443, "learning_rate": 0.0007375237804049464, "loss": 3.8541, "step": 30905 }, { "epoch": 2.100149476831091, "grad_norm": 1.0396888256072998, "learning_rate": 0.0007374813153961136, "loss": 3.6352, "step": 30910 }, { "epoch": 2.100489196901753, "grad_norm": 0.8514304757118225, "learning_rate": 0.0007374388503872809, "loss": 3.6542, "step": 30915 }, { "epoch": 2.100828916972415, "grad_norm": 0.9642804861068726, "learning_rate": 0.0007373963853784482, "loss": 3.2502, "step": 30920 }, { "epoch": 2.1011686370430764, "grad_norm": 1.1004514694213867, "learning_rate": 0.0007373539203696154, "loss": 3.765, "step": 30925 }, { "epoch": 2.101508357113738, "grad_norm": 0.7562880516052246, "learning_rate": 0.0007373114553607827, "loss": 3.4817, "step": 30930 }, { "epoch": 2.1018480771844, "grad_norm": 0.8613396883010864, "learning_rate": 0.0007372689903519501, "loss": 3.6272, "step": 30935 }, { "epoch": 2.1021877972550618, "grad_norm": 0.6694457530975342, "learning_rate": 0.0007372265253431173, "loss": 3.7453, "step": 30940 }, { "epoch": 2.1025275173257234, "grad_norm": 0.8620917201042175, "learning_rate": 0.0007371840603342845, "loss": 3.2337, "step": 30945 }, { "epoch": 2.1028672373963855, "grad_norm": 0.9174368381500244, "learning_rate": 0.0007371415953254519, "loss": 3.441, "step": 30950 }, { "epoch": 2.103206957467047, "grad_norm": 1.4081940650939941, "learning_rate": 0.0007370991303166191, "loss": 3.6905, "step": 30955 }, { "epoch": 2.1035466775377087, "grad_norm": 4.196097373962402, "learning_rate": 0.0007370566653077863, "loss": 3.913, "step": 30960 }, { "epoch": 2.103886397608371, "grad_norm": 0.9279444217681885, "learning_rate": 0.0007370142002989537, "loss": 3.5284, "step": 30965 }, { "epoch": 2.1042261176790324, "grad_norm": 0.8992620706558228, "learning_rate": 0.000736971735290121, "loss": 3.3375, "step": 30970 }, { "epoch": 2.104565837749694, "grad_norm": 1.0974339246749878, "learning_rate": 0.0007369292702812882, "loss": 3.5736, "step": 30975 }, { "epoch": 2.104905557820356, "grad_norm": 0.8205392360687256, "learning_rate": 0.0007368868052724556, "loss": 3.4748, "step": 30980 }, { "epoch": 2.105245277891018, "grad_norm": 0.823762834072113, "learning_rate": 0.0007368443402636228, "loss": 3.4716, "step": 30985 }, { "epoch": 2.1055849979616794, "grad_norm": 0.9685941934585571, "learning_rate": 0.00073680187525479, "loss": 3.5789, "step": 30990 }, { "epoch": 2.1059247180323415, "grad_norm": 0.9597747325897217, "learning_rate": 0.0007367594102459573, "loss": 3.7585, "step": 30995 }, { "epoch": 2.106264438103003, "grad_norm": 0.9118058085441589, "learning_rate": 0.0007367169452371246, "loss": 3.6869, "step": 31000 }, { "epoch": 2.1066041581736648, "grad_norm": 0.7891155481338501, "learning_rate": 0.0007366744802282919, "loss": 3.4723, "step": 31005 }, { "epoch": 2.106943878244327, "grad_norm": 0.8345896601676941, "learning_rate": 0.0007366320152194592, "loss": 3.5754, "step": 31010 }, { "epoch": 2.1072835983149885, "grad_norm": 0.8935109376907349, "learning_rate": 0.0007365895502106265, "loss": 3.8115, "step": 31015 }, { "epoch": 2.10762331838565, "grad_norm": 0.7765200138092041, "learning_rate": 0.0007365470852017937, "loss": 3.4921, "step": 31020 }, { "epoch": 2.107963038456312, "grad_norm": 0.9236768484115601, "learning_rate": 0.000736504620192961, "loss": 3.5042, "step": 31025 }, { "epoch": 2.108302758526974, "grad_norm": 0.7747576236724854, "learning_rate": 0.0007364621551841283, "loss": 3.6333, "step": 31030 }, { "epoch": 2.1086424785976354, "grad_norm": 1.0654182434082031, "learning_rate": 0.0007364196901752955, "loss": 3.3361, "step": 31035 }, { "epoch": 2.1089821986682975, "grad_norm": 1.389291524887085, "learning_rate": 0.0007363772251664629, "loss": 3.5247, "step": 31040 }, { "epoch": 2.109321918738959, "grad_norm": 0.6754319667816162, "learning_rate": 0.0007363347601576301, "loss": 3.6945, "step": 31045 }, { "epoch": 2.1096616388096208, "grad_norm": 1.1766279935836792, "learning_rate": 0.0007362922951487974, "loss": 3.5849, "step": 31050 }, { "epoch": 2.110001358880283, "grad_norm": 0.8909016847610474, "learning_rate": 0.0007362498301399647, "loss": 3.8529, "step": 31055 }, { "epoch": 2.1103410789509445, "grad_norm": 1.0372370481491089, "learning_rate": 0.0007362073651311319, "loss": 3.5731, "step": 31060 }, { "epoch": 2.110680799021606, "grad_norm": 1.0577566623687744, "learning_rate": 0.0007361649001222992, "loss": 3.6801, "step": 31065 }, { "epoch": 2.111020519092268, "grad_norm": 0.8992224931716919, "learning_rate": 0.0007361224351134666, "loss": 3.6454, "step": 31070 }, { "epoch": 2.11136023916293, "grad_norm": 0.7491416931152344, "learning_rate": 0.0007360799701046338, "loss": 3.8411, "step": 31075 }, { "epoch": 2.1116999592335914, "grad_norm": 0.98088139295578, "learning_rate": 0.0007360375050958011, "loss": 3.4313, "step": 31080 }, { "epoch": 2.112039679304253, "grad_norm": 1.5353848934173584, "learning_rate": 0.0007359950400869684, "loss": 3.4704, "step": 31085 }, { "epoch": 2.112379399374915, "grad_norm": 0.8137950301170349, "learning_rate": 0.0007359525750781356, "loss": 3.3954, "step": 31090 }, { "epoch": 2.1127191194455768, "grad_norm": 0.7912168502807617, "learning_rate": 0.0007359101100693028, "loss": 3.5286, "step": 31095 }, { "epoch": 2.1130588395162384, "grad_norm": 0.7438257932662964, "learning_rate": 0.0007358676450604702, "loss": 3.366, "step": 31100 }, { "epoch": 2.1133985595869005, "grad_norm": 0.9867879152297974, "learning_rate": 0.0007358251800516375, "loss": 3.6732, "step": 31105 }, { "epoch": 2.113738279657562, "grad_norm": 1.0709060430526733, "learning_rate": 0.0007357827150428047, "loss": 3.744, "step": 31110 }, { "epoch": 2.1140779997282237, "grad_norm": 1.883725643157959, "learning_rate": 0.0007357402500339721, "loss": 3.3822, "step": 31115 }, { "epoch": 2.114417719798886, "grad_norm": 0.7549963593482971, "learning_rate": 0.0007356977850251393, "loss": 3.5248, "step": 31120 }, { "epoch": 2.1147574398695475, "grad_norm": 0.9590659737586975, "learning_rate": 0.0007356553200163065, "loss": 3.8197, "step": 31125 }, { "epoch": 2.115097159940209, "grad_norm": 0.9155833721160889, "learning_rate": 0.0007356128550074739, "loss": 3.4443, "step": 31130 }, { "epoch": 2.115436880010871, "grad_norm": 0.922195315361023, "learning_rate": 0.0007355703899986411, "loss": 3.227, "step": 31135 }, { "epoch": 2.115776600081533, "grad_norm": 0.8331239223480225, "learning_rate": 0.0007355279249898084, "loss": 3.3328, "step": 31140 }, { "epoch": 2.1161163201521944, "grad_norm": 0.8495606184005737, "learning_rate": 0.0007354854599809757, "loss": 3.8232, "step": 31145 }, { "epoch": 2.1164560402228565, "grad_norm": 6.0059990882873535, "learning_rate": 0.000735442994972143, "loss": 3.9294, "step": 31150 }, { "epoch": 2.116795760293518, "grad_norm": 0.8534340262413025, "learning_rate": 0.0007354005299633102, "loss": 3.3439, "step": 31155 }, { "epoch": 2.1171354803641798, "grad_norm": 1.2177562713623047, "learning_rate": 0.0007353580649544775, "loss": 3.2805, "step": 31160 }, { "epoch": 2.117475200434842, "grad_norm": 0.9601883292198181, "learning_rate": 0.0007353155999456448, "loss": 3.7246, "step": 31165 }, { "epoch": 2.1178149205055035, "grad_norm": 1.1258084774017334, "learning_rate": 0.000735273134936812, "loss": 3.4845, "step": 31170 }, { "epoch": 2.118154640576165, "grad_norm": 0.8835456967353821, "learning_rate": 0.0007352306699279794, "loss": 3.6241, "step": 31175 }, { "epoch": 2.118494360646827, "grad_norm": 0.7710118889808655, "learning_rate": 0.0007351882049191467, "loss": 3.6438, "step": 31180 }, { "epoch": 2.118834080717489, "grad_norm": 0.8302015066146851, "learning_rate": 0.000735145739910314, "loss": 3.474, "step": 31185 }, { "epoch": 2.1191738007881504, "grad_norm": 0.879035472869873, "learning_rate": 0.0007351032749014812, "loss": 3.4775, "step": 31190 }, { "epoch": 2.1195135208588125, "grad_norm": 1.0090245008468628, "learning_rate": 0.0007350608098926484, "loss": 3.4724, "step": 31195 }, { "epoch": 2.119853240929474, "grad_norm": 0.8304756283760071, "learning_rate": 0.0007350183448838158, "loss": 3.6889, "step": 31200 }, { "epoch": 2.1201929610001358, "grad_norm": 1.096398115158081, "learning_rate": 0.000734975879874983, "loss": 3.755, "step": 31205 }, { "epoch": 2.120532681070798, "grad_norm": 1.7347301244735718, "learning_rate": 0.0007349334148661503, "loss": 3.4463, "step": 31210 }, { "epoch": 2.1208724011414595, "grad_norm": 0.9564123153686523, "learning_rate": 0.0007348909498573177, "loss": 3.6471, "step": 31215 }, { "epoch": 2.121212121212121, "grad_norm": 2.1438848972320557, "learning_rate": 0.0007348484848484849, "loss": 3.5666, "step": 31220 }, { "epoch": 2.121551841282783, "grad_norm": 1.0155041217803955, "learning_rate": 0.0007348060198396521, "loss": 3.4862, "step": 31225 }, { "epoch": 2.121891561353445, "grad_norm": 0.7622844576835632, "learning_rate": 0.0007347635548308195, "loss": 3.6398, "step": 31230 }, { "epoch": 2.1222312814241064, "grad_norm": 0.9833621382713318, "learning_rate": 0.0007347210898219867, "loss": 3.5922, "step": 31235 }, { "epoch": 2.1225710014947685, "grad_norm": 0.8327182531356812, "learning_rate": 0.0007346786248131539, "loss": 3.601, "step": 31240 }, { "epoch": 2.12291072156543, "grad_norm": 0.7352569103240967, "learning_rate": 0.0007346361598043214, "loss": 3.6843, "step": 31245 }, { "epoch": 2.123250441636092, "grad_norm": 2.2563624382019043, "learning_rate": 0.0007345936947954886, "loss": 3.6987, "step": 31250 }, { "epoch": 2.123590161706754, "grad_norm": 1.4925892353057861, "learning_rate": 0.0007345512297866558, "loss": 3.6914, "step": 31255 }, { "epoch": 2.1239298817774155, "grad_norm": 0.6803785562515259, "learning_rate": 0.0007345087647778231, "loss": 3.4913, "step": 31260 }, { "epoch": 2.124269601848077, "grad_norm": 0.9997332096099854, "learning_rate": 0.0007344662997689904, "loss": 3.2839, "step": 31265 }, { "epoch": 2.1246093219187387, "grad_norm": 0.9116175770759583, "learning_rate": 0.0007344238347601576, "loss": 3.491, "step": 31270 }, { "epoch": 2.124949041989401, "grad_norm": 0.877632737159729, "learning_rate": 0.0007343813697513249, "loss": 3.2774, "step": 31275 }, { "epoch": 2.1252887620600625, "grad_norm": 0.8134282231330872, "learning_rate": 0.0007343389047424923, "loss": 3.5925, "step": 31280 }, { "epoch": 2.125628482130724, "grad_norm": 0.8061376810073853, "learning_rate": 0.0007342964397336595, "loss": 3.6866, "step": 31285 }, { "epoch": 2.125968202201386, "grad_norm": 0.9105165004730225, "learning_rate": 0.0007342539747248268, "loss": 3.6668, "step": 31290 }, { "epoch": 2.126307922272048, "grad_norm": 0.7287615537643433, "learning_rate": 0.000734211509715994, "loss": 3.102, "step": 31295 }, { "epoch": 2.1266476423427094, "grad_norm": 0.8636132478713989, "learning_rate": 0.0007341690447071613, "loss": 3.3987, "step": 31300 }, { "epoch": 2.1269873624133715, "grad_norm": 1.0694997310638428, "learning_rate": 0.0007341265796983286, "loss": 3.393, "step": 31305 }, { "epoch": 2.127327082484033, "grad_norm": 0.7727710008621216, "learning_rate": 0.0007340841146894958, "loss": 3.4094, "step": 31310 }, { "epoch": 2.1276668025546948, "grad_norm": 0.7723323702812195, "learning_rate": 0.0007340416496806632, "loss": 3.4469, "step": 31315 }, { "epoch": 2.128006522625357, "grad_norm": 0.9947643280029297, "learning_rate": 0.0007339991846718305, "loss": 3.5012, "step": 31320 }, { "epoch": 2.1283462426960185, "grad_norm": 1.4604175090789795, "learning_rate": 0.0007339567196629977, "loss": 3.5038, "step": 31325 }, { "epoch": 2.12868596276668, "grad_norm": 1.0697214603424072, "learning_rate": 0.000733914254654165, "loss": 3.5198, "step": 31330 }, { "epoch": 2.129025682837342, "grad_norm": 1.5371419191360474, "learning_rate": 0.0007338717896453323, "loss": 3.7354, "step": 31335 }, { "epoch": 2.129365402908004, "grad_norm": 0.8357107043266296, "learning_rate": 0.0007338293246364995, "loss": 3.5535, "step": 31340 }, { "epoch": 2.1297051229786654, "grad_norm": 1.016566514968872, "learning_rate": 0.0007337868596276667, "loss": 3.5474, "step": 31345 }, { "epoch": 2.1300448430493275, "grad_norm": 0.8068065047264099, "learning_rate": 0.0007337443946188342, "loss": 3.6461, "step": 31350 }, { "epoch": 2.130384563119989, "grad_norm": 0.9013103246688843, "learning_rate": 0.0007337019296100014, "loss": 3.5535, "step": 31355 }, { "epoch": 2.1307242831906508, "grad_norm": 1.0109392404556274, "learning_rate": 0.0007336594646011686, "loss": 3.3527, "step": 31360 }, { "epoch": 2.131064003261313, "grad_norm": 0.8996456265449524, "learning_rate": 0.000733616999592336, "loss": 3.5124, "step": 31365 }, { "epoch": 2.1314037233319745, "grad_norm": 0.736445963382721, "learning_rate": 0.0007335745345835032, "loss": 3.5266, "step": 31370 }, { "epoch": 2.131743443402636, "grad_norm": 0.9316646456718445, "learning_rate": 0.0007335320695746704, "loss": 3.5799, "step": 31375 }, { "epoch": 2.132083163473298, "grad_norm": 1.0985591411590576, "learning_rate": 0.0007334896045658378, "loss": 3.4757, "step": 31380 }, { "epoch": 2.13242288354396, "grad_norm": 0.7550480365753174, "learning_rate": 0.0007334471395570051, "loss": 3.6612, "step": 31385 }, { "epoch": 2.1327626036146214, "grad_norm": 1.1711262464523315, "learning_rate": 0.0007334046745481723, "loss": 3.7815, "step": 31390 }, { "epoch": 2.1331023236852835, "grad_norm": 0.9393274784088135, "learning_rate": 0.0007333622095393396, "loss": 3.3389, "step": 31395 }, { "epoch": 2.133442043755945, "grad_norm": 0.9003846049308777, "learning_rate": 0.0007333197445305069, "loss": 3.6046, "step": 31400 }, { "epoch": 2.133781763826607, "grad_norm": 0.8741315603256226, "learning_rate": 0.0007332772795216741, "loss": 3.3711, "step": 31405 }, { "epoch": 2.134121483897269, "grad_norm": 0.8278862833976746, "learning_rate": 0.0007332348145128414, "loss": 3.6874, "step": 31410 }, { "epoch": 2.1344612039679305, "grad_norm": 1.0153931379318237, "learning_rate": 0.0007331923495040087, "loss": 3.5025, "step": 31415 }, { "epoch": 2.134800924038592, "grad_norm": 0.8533931374549866, "learning_rate": 0.000733149884495176, "loss": 3.5955, "step": 31420 }, { "epoch": 2.1351406441092537, "grad_norm": 0.9252077341079712, "learning_rate": 0.0007331074194863433, "loss": 3.7191, "step": 31425 }, { "epoch": 2.135480364179916, "grad_norm": 1.2899731397628784, "learning_rate": 0.0007330649544775106, "loss": 3.6253, "step": 31430 }, { "epoch": 2.1358200842505775, "grad_norm": 0.9998446106910706, "learning_rate": 0.0007330224894686778, "loss": 3.4152, "step": 31435 }, { "epoch": 2.136159804321239, "grad_norm": 0.8295803070068359, "learning_rate": 0.0007329800244598451, "loss": 3.5939, "step": 31440 }, { "epoch": 2.136499524391901, "grad_norm": 0.852884829044342, "learning_rate": 0.0007329375594510123, "loss": 3.3085, "step": 31445 }, { "epoch": 2.136839244462563, "grad_norm": 0.9598560929298401, "learning_rate": 0.0007328950944421796, "loss": 3.5181, "step": 31450 }, { "epoch": 2.1371789645332244, "grad_norm": 1.0232887268066406, "learning_rate": 0.000732852629433347, "loss": 3.5802, "step": 31455 }, { "epoch": 2.1375186846038865, "grad_norm": 0.7499103546142578, "learning_rate": 0.0007328101644245142, "loss": 3.5178, "step": 31460 }, { "epoch": 2.137858404674548, "grad_norm": 1.0047237873077393, "learning_rate": 0.0007327676994156815, "loss": 3.3931, "step": 31465 }, { "epoch": 2.1381981247452098, "grad_norm": 1.0767709016799927, "learning_rate": 0.0007327252344068488, "loss": 3.2317, "step": 31470 }, { "epoch": 2.138537844815872, "grad_norm": 0.9548283815383911, "learning_rate": 0.000732682769398016, "loss": 3.3947, "step": 31475 }, { "epoch": 2.1388775648865335, "grad_norm": 0.9128632545471191, "learning_rate": 0.0007326403043891832, "loss": 3.3272, "step": 31480 }, { "epoch": 2.139217284957195, "grad_norm": 0.9172502160072327, "learning_rate": 0.0007325978393803506, "loss": 3.6509, "step": 31485 }, { "epoch": 2.139557005027857, "grad_norm": 1.179800033569336, "learning_rate": 0.0007325553743715179, "loss": 3.6627, "step": 31490 }, { "epoch": 2.139896725098519, "grad_norm": 1.347105622291565, "learning_rate": 0.0007325129093626851, "loss": 3.4968, "step": 31495 }, { "epoch": 2.1402364451691804, "grad_norm": 0.8214390277862549, "learning_rate": 0.0007324704443538525, "loss": 3.3475, "step": 31500 }, { "epoch": 2.1405761652398425, "grad_norm": 1.0101730823516846, "learning_rate": 0.0007324279793450197, "loss": 3.4742, "step": 31505 }, { "epoch": 2.140915885310504, "grad_norm": 1.0550105571746826, "learning_rate": 0.0007323855143361869, "loss": 3.4472, "step": 31510 }, { "epoch": 2.1412556053811658, "grad_norm": 0.9533572793006897, "learning_rate": 0.0007323430493273543, "loss": 3.4693, "step": 31515 }, { "epoch": 2.141595325451828, "grad_norm": 0.8550202250480652, "learning_rate": 0.0007323005843185215, "loss": 3.5023, "step": 31520 }, { "epoch": 2.1419350455224895, "grad_norm": 0.8884824514389038, "learning_rate": 0.0007322581193096889, "loss": 3.3, "step": 31525 }, { "epoch": 2.142274765593151, "grad_norm": 0.9333264231681824, "learning_rate": 0.0007322156543008562, "loss": 3.6576, "step": 31530 }, { "epoch": 2.142614485663813, "grad_norm": 0.853769838809967, "learning_rate": 0.0007321731892920234, "loss": 3.5697, "step": 31535 }, { "epoch": 2.142954205734475, "grad_norm": 0.715916633605957, "learning_rate": 0.0007321307242831907, "loss": 3.6361, "step": 31540 }, { "epoch": 2.1432939258051364, "grad_norm": 0.8997506499290466, "learning_rate": 0.0007320882592743579, "loss": 3.4186, "step": 31545 }, { "epoch": 2.1436336458757985, "grad_norm": 0.7232168912887573, "learning_rate": 0.0007320457942655252, "loss": 3.8004, "step": 31550 }, { "epoch": 2.14397336594646, "grad_norm": 1.2245670557022095, "learning_rate": 0.0007320033292566925, "loss": 3.6357, "step": 31555 }, { "epoch": 2.144313086017122, "grad_norm": 0.9550303220748901, "learning_rate": 0.0007319608642478598, "loss": 3.8391, "step": 31560 }, { "epoch": 2.144652806087784, "grad_norm": 1.229727864265442, "learning_rate": 0.0007319183992390271, "loss": 3.5964, "step": 31565 }, { "epoch": 2.1449925261584455, "grad_norm": 0.8532562851905823, "learning_rate": 0.0007318759342301944, "loss": 3.4641, "step": 31570 }, { "epoch": 2.145332246229107, "grad_norm": 0.9707767963409424, "learning_rate": 0.0007318334692213616, "loss": 3.6332, "step": 31575 }, { "epoch": 2.145671966299769, "grad_norm": 1.135162591934204, "learning_rate": 0.0007317910042125288, "loss": 3.4967, "step": 31580 }, { "epoch": 2.146011686370431, "grad_norm": 0.9108568429946899, "learning_rate": 0.0007317485392036962, "loss": 3.5378, "step": 31585 }, { "epoch": 2.1463514064410925, "grad_norm": 0.9217504858970642, "learning_rate": 0.0007317060741948634, "loss": 3.5773, "step": 31590 }, { "epoch": 2.1466911265117545, "grad_norm": 1.0232627391815186, "learning_rate": 0.0007316636091860307, "loss": 3.4168, "step": 31595 }, { "epoch": 2.147030846582416, "grad_norm": 0.7857924699783325, "learning_rate": 0.0007316211441771981, "loss": 3.5239, "step": 31600 }, { "epoch": 2.147370566653078, "grad_norm": 1.0647212266921997, "learning_rate": 0.0007315786791683653, "loss": 3.3871, "step": 31605 }, { "epoch": 2.14771028672374, "grad_norm": 0.9916369318962097, "learning_rate": 0.0007315362141595325, "loss": 3.3874, "step": 31610 }, { "epoch": 2.1480500067944015, "grad_norm": 0.7761172652244568, "learning_rate": 0.0007314937491506999, "loss": 3.6364, "step": 31615 }, { "epoch": 2.148389726865063, "grad_norm": 0.8220212459564209, "learning_rate": 0.0007314512841418671, "loss": 3.5365, "step": 31620 }, { "epoch": 2.1487294469357248, "grad_norm": 1.8060420751571655, "learning_rate": 0.0007314088191330343, "loss": 3.6777, "step": 31625 }, { "epoch": 2.149069167006387, "grad_norm": 1.0996476411819458, "learning_rate": 0.0007313663541242018, "loss": 3.5047, "step": 31630 }, { "epoch": 2.1494088870770485, "grad_norm": 1.1521632671356201, "learning_rate": 0.000731323889115369, "loss": 3.4793, "step": 31635 }, { "epoch": 2.14974860714771, "grad_norm": 1.1348285675048828, "learning_rate": 0.0007312814241065362, "loss": 3.6641, "step": 31640 }, { "epoch": 2.150088327218372, "grad_norm": 0.9098120331764221, "learning_rate": 0.0007312389590977035, "loss": 3.4719, "step": 31645 }, { "epoch": 2.150428047289034, "grad_norm": 0.8423128724098206, "learning_rate": 0.0007311964940888708, "loss": 3.7904, "step": 31650 }, { "epoch": 2.1507677673596954, "grad_norm": 0.7840149998664856, "learning_rate": 0.000731154029080038, "loss": 3.7199, "step": 31655 }, { "epoch": 2.1511074874303575, "grad_norm": 0.7220330238342285, "learning_rate": 0.0007311115640712054, "loss": 3.7743, "step": 31660 }, { "epoch": 2.151447207501019, "grad_norm": 0.7513092160224915, "learning_rate": 0.0007310690990623727, "loss": 3.7024, "step": 31665 }, { "epoch": 2.1517869275716808, "grad_norm": 0.7922874689102173, "learning_rate": 0.0007310266340535399, "loss": 3.6589, "step": 31670 }, { "epoch": 2.152126647642343, "grad_norm": 0.9872101545333862, "learning_rate": 0.0007309841690447072, "loss": 3.6992, "step": 31675 }, { "epoch": 2.1524663677130045, "grad_norm": 1.164272665977478, "learning_rate": 0.0007309417040358744, "loss": 3.4242, "step": 31680 }, { "epoch": 2.152806087783666, "grad_norm": 0.9403621554374695, "learning_rate": 0.0007308992390270417, "loss": 3.5247, "step": 31685 }, { "epoch": 2.153145807854328, "grad_norm": 1.1007064580917358, "learning_rate": 0.000730856774018209, "loss": 3.8102, "step": 31690 }, { "epoch": 2.15348552792499, "grad_norm": 0.7965973615646362, "learning_rate": 0.0007308143090093763, "loss": 3.1304, "step": 31695 }, { "epoch": 2.1538252479956514, "grad_norm": 0.8228327035903931, "learning_rate": 0.0007307718440005436, "loss": 3.6233, "step": 31700 }, { "epoch": 2.1541649680663135, "grad_norm": 1.12892484664917, "learning_rate": 0.0007307293789917109, "loss": 3.6274, "step": 31705 }, { "epoch": 2.154504688136975, "grad_norm": 0.9403474926948547, "learning_rate": 0.0007306869139828781, "loss": 3.5683, "step": 31710 }, { "epoch": 2.154844408207637, "grad_norm": 0.7245636582374573, "learning_rate": 0.0007306444489740454, "loss": 3.4514, "step": 31715 }, { "epoch": 2.155184128278299, "grad_norm": 1.0180444717407227, "learning_rate": 0.0007306019839652127, "loss": 3.5083, "step": 31720 }, { "epoch": 2.1555238483489605, "grad_norm": 0.8991326093673706, "learning_rate": 0.0007305595189563799, "loss": 3.4947, "step": 31725 }, { "epoch": 2.155863568419622, "grad_norm": 0.7014590501785278, "learning_rate": 0.0007305170539475472, "loss": 3.7489, "step": 31730 }, { "epoch": 2.156203288490284, "grad_norm": 0.9303785562515259, "learning_rate": 0.0007304745889387146, "loss": 3.2134, "step": 31735 }, { "epoch": 2.156543008560946, "grad_norm": 0.9980180859565735, "learning_rate": 0.0007304321239298818, "loss": 3.7292, "step": 31740 }, { "epoch": 2.1568827286316075, "grad_norm": 0.8129411339759827, "learning_rate": 0.000730389658921049, "loss": 3.4917, "step": 31745 }, { "epoch": 2.1572224487022695, "grad_norm": 0.7390148043632507, "learning_rate": 0.0007303471939122164, "loss": 3.261, "step": 31750 }, { "epoch": 2.157562168772931, "grad_norm": 0.7620751857757568, "learning_rate": 0.0007303047289033836, "loss": 3.5553, "step": 31755 }, { "epoch": 2.157901888843593, "grad_norm": 1.3783831596374512, "learning_rate": 0.0007302622638945508, "loss": 3.5328, "step": 31760 }, { "epoch": 2.1582416089142544, "grad_norm": 0.9731869697570801, "learning_rate": 0.0007302197988857183, "loss": 3.428, "step": 31765 }, { "epoch": 2.1585813289849165, "grad_norm": 0.830479085445404, "learning_rate": 0.0007301773338768855, "loss": 3.6724, "step": 31770 }, { "epoch": 2.158921049055578, "grad_norm": 1.0441572666168213, "learning_rate": 0.0007301348688680527, "loss": 3.2826, "step": 31775 }, { "epoch": 2.1592607691262398, "grad_norm": 0.9143582582473755, "learning_rate": 0.00073009240385922, "loss": 3.4812, "step": 31780 }, { "epoch": 2.159600489196902, "grad_norm": 0.7824612855911255, "learning_rate": 0.0007300499388503873, "loss": 3.5202, "step": 31785 }, { "epoch": 2.1599402092675635, "grad_norm": 0.8735954761505127, "learning_rate": 0.0007300074738415545, "loss": 3.8512, "step": 31790 }, { "epoch": 2.160279929338225, "grad_norm": 0.798884928226471, "learning_rate": 0.0007299650088327218, "loss": 3.6352, "step": 31795 }, { "epoch": 2.160619649408887, "grad_norm": 0.7739851474761963, "learning_rate": 0.0007299225438238892, "loss": 3.3699, "step": 31800 }, { "epoch": 2.160959369479549, "grad_norm": 1.1707955598831177, "learning_rate": 0.0007298800788150564, "loss": 3.6818, "step": 31805 }, { "epoch": 2.1612990895502104, "grad_norm": 0.9298028945922852, "learning_rate": 0.0007298376138062237, "loss": 3.8862, "step": 31810 }, { "epoch": 2.1616388096208725, "grad_norm": 0.7256051301956177, "learning_rate": 0.000729795148797391, "loss": 3.6736, "step": 31815 }, { "epoch": 2.161978529691534, "grad_norm": 0.8461595773696899, "learning_rate": 0.0007297526837885582, "loss": 3.555, "step": 31820 }, { "epoch": 2.1623182497621958, "grad_norm": 0.7672905921936035, "learning_rate": 0.0007297102187797255, "loss": 3.7106, "step": 31825 }, { "epoch": 2.162657969832858, "grad_norm": 1.0254000425338745, "learning_rate": 0.0007296677537708927, "loss": 3.5311, "step": 31830 }, { "epoch": 2.1629976899035195, "grad_norm": 0.8974380493164062, "learning_rate": 0.0007296252887620601, "loss": 3.4838, "step": 31835 }, { "epoch": 2.163337409974181, "grad_norm": 0.8667131662368774, "learning_rate": 0.0007295828237532274, "loss": 3.8285, "step": 31840 }, { "epoch": 2.163677130044843, "grad_norm": 0.8348760008811951, "learning_rate": 0.0007295403587443946, "loss": 3.8101, "step": 31845 }, { "epoch": 2.164016850115505, "grad_norm": 0.9489355683326721, "learning_rate": 0.0007294978937355619, "loss": 3.5214, "step": 31850 }, { "epoch": 2.1643565701861665, "grad_norm": 0.6789339184761047, "learning_rate": 0.0007294554287267292, "loss": 3.6683, "step": 31855 }, { "epoch": 2.1646962902568285, "grad_norm": 0.8377249836921692, "learning_rate": 0.0007294129637178964, "loss": 3.6151, "step": 31860 }, { "epoch": 2.16503601032749, "grad_norm": 0.7281380891799927, "learning_rate": 0.0007293704987090638, "loss": 3.7247, "step": 31865 }, { "epoch": 2.165375730398152, "grad_norm": 0.8438053131103516, "learning_rate": 0.0007293280337002311, "loss": 3.6184, "step": 31870 }, { "epoch": 2.165715450468814, "grad_norm": 0.9111326336860657, "learning_rate": 0.0007292855686913983, "loss": 3.5633, "step": 31875 }, { "epoch": 2.1660551705394755, "grad_norm": 0.9842153191566467, "learning_rate": 0.0007292431036825656, "loss": 3.3378, "step": 31880 }, { "epoch": 2.166394890610137, "grad_norm": 0.6745507121086121, "learning_rate": 0.0007292006386737329, "loss": 3.7712, "step": 31885 }, { "epoch": 2.166734610680799, "grad_norm": 0.6508551836013794, "learning_rate": 0.0007291581736649001, "loss": 3.6096, "step": 31890 }, { "epoch": 2.167074330751461, "grad_norm": 0.931950569152832, "learning_rate": 0.0007291157086560674, "loss": 3.5407, "step": 31895 }, { "epoch": 2.1674140508221225, "grad_norm": 0.9310864210128784, "learning_rate": 0.0007290732436472347, "loss": 3.5262, "step": 31900 }, { "epoch": 2.1677537708927845, "grad_norm": 0.7360971570014954, "learning_rate": 0.000729030778638402, "loss": 3.6069, "step": 31905 }, { "epoch": 2.168093490963446, "grad_norm": 0.9720098376274109, "learning_rate": 0.0007289883136295693, "loss": 3.7101, "step": 31910 }, { "epoch": 2.168433211034108, "grad_norm": 1.2836759090423584, "learning_rate": 0.0007289458486207366, "loss": 3.6137, "step": 31915 }, { "epoch": 2.16877293110477, "grad_norm": 0.9214504957199097, "learning_rate": 0.0007289033836119038, "loss": 3.6009, "step": 31920 }, { "epoch": 2.1691126511754315, "grad_norm": 0.8209858536720276, "learning_rate": 0.0007288609186030711, "loss": 3.5509, "step": 31925 }, { "epoch": 2.169452371246093, "grad_norm": 0.8441799879074097, "learning_rate": 0.0007288184535942383, "loss": 3.3762, "step": 31930 }, { "epoch": 2.169792091316755, "grad_norm": 0.8889607191085815, "learning_rate": 0.0007287759885854056, "loss": 3.6119, "step": 31935 }, { "epoch": 2.170131811387417, "grad_norm": 0.7439685463905334, "learning_rate": 0.000728733523576573, "loss": 3.6788, "step": 31940 }, { "epoch": 2.1704715314580785, "grad_norm": 0.881527841091156, "learning_rate": 0.0007286910585677402, "loss": 3.575, "step": 31945 }, { "epoch": 2.1708112515287405, "grad_norm": 1.2256739139556885, "learning_rate": 0.0007286485935589075, "loss": 3.4081, "step": 31950 }, { "epoch": 2.171150971599402, "grad_norm": 0.9902128577232361, "learning_rate": 0.0007286061285500748, "loss": 3.6815, "step": 31955 }, { "epoch": 2.171490691670064, "grad_norm": 0.7661766409873962, "learning_rate": 0.000728563663541242, "loss": 3.6245, "step": 31960 }, { "epoch": 2.1718304117407254, "grad_norm": 0.9393168687820435, "learning_rate": 0.0007285211985324092, "loss": 3.6549, "step": 31965 }, { "epoch": 2.1721701318113875, "grad_norm": 0.7727407217025757, "learning_rate": 0.0007284787335235766, "loss": 3.605, "step": 31970 }, { "epoch": 2.172509851882049, "grad_norm": 1.4381295442581177, "learning_rate": 0.0007284362685147439, "loss": 3.5801, "step": 31975 }, { "epoch": 2.172849571952711, "grad_norm": 1.126309871673584, "learning_rate": 0.0007283938035059111, "loss": 3.4406, "step": 31980 }, { "epoch": 2.173189292023373, "grad_norm": 1.1382906436920166, "learning_rate": 0.0007283513384970785, "loss": 3.601, "step": 31985 }, { "epoch": 2.1735290120940345, "grad_norm": 0.8750792741775513, "learning_rate": 0.0007283088734882457, "loss": 3.4852, "step": 31990 }, { "epoch": 2.173868732164696, "grad_norm": 0.9577144980430603, "learning_rate": 0.0007282664084794129, "loss": 3.4796, "step": 31995 }, { "epoch": 2.174208452235358, "grad_norm": 0.791410505771637, "learning_rate": 0.0007282239434705803, "loss": 3.4427, "step": 32000 }, { "epoch": 2.17454817230602, "grad_norm": 1.2013370990753174, "learning_rate": 0.0007281814784617475, "loss": 3.9451, "step": 32005 }, { "epoch": 2.1748878923766815, "grad_norm": 1.6336698532104492, "learning_rate": 0.0007281390134529148, "loss": 3.5103, "step": 32010 }, { "epoch": 2.1752276124473435, "grad_norm": 0.744517982006073, "learning_rate": 0.0007280965484440822, "loss": 3.6145, "step": 32015 }, { "epoch": 2.175567332518005, "grad_norm": 1.6193718910217285, "learning_rate": 0.0007280540834352494, "loss": 3.4132, "step": 32020 }, { "epoch": 2.175907052588667, "grad_norm": 0.9837519526481628, "learning_rate": 0.0007280116184264166, "loss": 3.5978, "step": 32025 }, { "epoch": 2.176246772659329, "grad_norm": 0.8598582744598389, "learning_rate": 0.0007279691534175839, "loss": 3.588, "step": 32030 }, { "epoch": 2.1765864927299905, "grad_norm": 1.020618200302124, "learning_rate": 0.0007279266884087512, "loss": 3.6982, "step": 32035 }, { "epoch": 2.176926212800652, "grad_norm": 0.7065830826759338, "learning_rate": 0.0007278842233999184, "loss": 3.3536, "step": 32040 }, { "epoch": 2.177265932871314, "grad_norm": 0.7865496873855591, "learning_rate": 0.0007278417583910858, "loss": 3.6723, "step": 32045 }, { "epoch": 2.177605652941976, "grad_norm": 1.6191611289978027, "learning_rate": 0.0007277992933822531, "loss": 3.4818, "step": 32050 }, { "epoch": 2.1779453730126375, "grad_norm": 1.0523301362991333, "learning_rate": 0.0007277568283734203, "loss": 3.4491, "step": 32055 }, { "epoch": 2.1782850930832995, "grad_norm": 0.7991454005241394, "learning_rate": 0.0007277143633645876, "loss": 3.6163, "step": 32060 }, { "epoch": 2.178624813153961, "grad_norm": 0.8753361701965332, "learning_rate": 0.0007276718983557549, "loss": 3.5957, "step": 32065 }, { "epoch": 2.178964533224623, "grad_norm": 0.7132070064544678, "learning_rate": 0.0007276294333469221, "loss": 3.6721, "step": 32070 }, { "epoch": 2.179304253295285, "grad_norm": 1.0106687545776367, "learning_rate": 0.0007275869683380894, "loss": 3.5634, "step": 32075 }, { "epoch": 2.1796439733659465, "grad_norm": 0.8802247047424316, "learning_rate": 0.0007275445033292567, "loss": 3.6829, "step": 32080 }, { "epoch": 2.179983693436608, "grad_norm": 0.8940814733505249, "learning_rate": 0.000727502038320424, "loss": 3.876, "step": 32085 }, { "epoch": 2.18032341350727, "grad_norm": 0.7642830610275269, "learning_rate": 0.0007274595733115913, "loss": 3.2673, "step": 32090 }, { "epoch": 2.180663133577932, "grad_norm": 0.9021485447883606, "learning_rate": 0.0007274171083027585, "loss": 3.5188, "step": 32095 }, { "epoch": 2.1810028536485935, "grad_norm": 0.6848967671394348, "learning_rate": 0.0007273746432939258, "loss": 3.6715, "step": 32100 }, { "epoch": 2.181342573719255, "grad_norm": 1.0565532445907593, "learning_rate": 0.0007273321782850931, "loss": 3.51, "step": 32105 }, { "epoch": 2.181682293789917, "grad_norm": 0.8490297794342041, "learning_rate": 0.0007272897132762603, "loss": 3.5255, "step": 32110 }, { "epoch": 2.182022013860579, "grad_norm": 1.003279447555542, "learning_rate": 0.0007272472482674277, "loss": 3.4336, "step": 32115 }, { "epoch": 2.1823617339312404, "grad_norm": 1.5934319496154785, "learning_rate": 0.000727204783258595, "loss": 3.5803, "step": 32120 }, { "epoch": 2.1827014540019025, "grad_norm": 0.7985337972640991, "learning_rate": 0.0007271623182497622, "loss": 3.7513, "step": 32125 }, { "epoch": 2.183041174072564, "grad_norm": 0.7382664084434509, "learning_rate": 0.0007271198532409294, "loss": 3.4937, "step": 32130 }, { "epoch": 2.183380894143226, "grad_norm": 1.0742384195327759, "learning_rate": 0.0007270773882320968, "loss": 3.6019, "step": 32135 }, { "epoch": 2.183720614213888, "grad_norm": 0.774810254573822, "learning_rate": 0.000727034923223264, "loss": 3.5984, "step": 32140 }, { "epoch": 2.1840603342845495, "grad_norm": 0.9531111121177673, "learning_rate": 0.0007269924582144312, "loss": 3.4163, "step": 32145 }, { "epoch": 2.184400054355211, "grad_norm": 0.8622708320617676, "learning_rate": 0.0007269499932055987, "loss": 3.4777, "step": 32150 }, { "epoch": 2.184739774425873, "grad_norm": 0.8366093039512634, "learning_rate": 0.0007269075281967659, "loss": 3.2415, "step": 32155 }, { "epoch": 2.185079494496535, "grad_norm": 0.7602417469024658, "learning_rate": 0.0007268650631879331, "loss": 3.5645, "step": 32160 }, { "epoch": 2.1854192145671965, "grad_norm": 0.8636565208435059, "learning_rate": 0.0007268225981791005, "loss": 3.6202, "step": 32165 }, { "epoch": 2.1857589346378585, "grad_norm": 0.8804951906204224, "learning_rate": 0.0007267801331702677, "loss": 3.5869, "step": 32170 }, { "epoch": 2.18609865470852, "grad_norm": 1.190540075302124, "learning_rate": 0.0007267376681614349, "loss": 3.3927, "step": 32175 }, { "epoch": 2.186438374779182, "grad_norm": 0.881720781326294, "learning_rate": 0.0007266952031526022, "loss": 3.5997, "step": 32180 }, { "epoch": 2.186778094849844, "grad_norm": 0.8046108484268188, "learning_rate": 0.0007266527381437696, "loss": 3.7171, "step": 32185 }, { "epoch": 2.1871178149205055, "grad_norm": 1.0526396036148071, "learning_rate": 0.0007266102731349368, "loss": 3.7582, "step": 32190 }, { "epoch": 2.187457534991167, "grad_norm": 0.7152811884880066, "learning_rate": 0.0007265678081261041, "loss": 3.7741, "step": 32195 }, { "epoch": 2.187797255061829, "grad_norm": 1.1968938112258911, "learning_rate": 0.0007265253431172714, "loss": 3.3958, "step": 32200 }, { "epoch": 2.188136975132491, "grad_norm": 0.8464360237121582, "learning_rate": 0.0007264828781084387, "loss": 3.6738, "step": 32205 }, { "epoch": 2.1884766952031525, "grad_norm": 0.8990806341171265, "learning_rate": 0.0007264404130996059, "loss": 3.7505, "step": 32210 }, { "epoch": 2.1888164152738145, "grad_norm": 0.9128430485725403, "learning_rate": 0.0007263979480907731, "loss": 3.5511, "step": 32215 }, { "epoch": 2.189156135344476, "grad_norm": 1.0432490110397339, "learning_rate": 0.0007263554830819406, "loss": 3.5986, "step": 32220 }, { "epoch": 2.189495855415138, "grad_norm": 0.8298383355140686, "learning_rate": 0.0007263130180731078, "loss": 3.3839, "step": 32225 }, { "epoch": 2.1898355754858, "grad_norm": 1.0730490684509277, "learning_rate": 0.000726270553064275, "loss": 3.614, "step": 32230 }, { "epoch": 2.1901752955564615, "grad_norm": 0.937078595161438, "learning_rate": 0.0007262280880554424, "loss": 3.7991, "step": 32235 }, { "epoch": 2.190515015627123, "grad_norm": 0.7295710444450378, "learning_rate": 0.0007261856230466096, "loss": 3.6619, "step": 32240 }, { "epoch": 2.190854735697785, "grad_norm": 0.9337127208709717, "learning_rate": 0.0007261431580377768, "loss": 3.2815, "step": 32245 }, { "epoch": 2.191194455768447, "grad_norm": 0.7765060663223267, "learning_rate": 0.0007261006930289443, "loss": 3.5362, "step": 32250 }, { "epoch": 2.1915341758391085, "grad_norm": 0.8145327568054199, "learning_rate": 0.0007260582280201115, "loss": 3.5171, "step": 32255 }, { "epoch": 2.1918738959097706, "grad_norm": 0.7860673069953918, "learning_rate": 0.0007260157630112787, "loss": 3.3525, "step": 32260 }, { "epoch": 2.192213615980432, "grad_norm": 0.9476709365844727, "learning_rate": 0.000725973298002446, "loss": 3.8501, "step": 32265 }, { "epoch": 2.192553336051094, "grad_norm": 0.9440532326698303, "learning_rate": 0.0007259308329936133, "loss": 3.5489, "step": 32270 }, { "epoch": 2.192893056121756, "grad_norm": 0.7818008065223694, "learning_rate": 0.0007258883679847805, "loss": 3.63, "step": 32275 }, { "epoch": 2.1932327761924175, "grad_norm": 0.9779481291770935, "learning_rate": 0.0007258459029759478, "loss": 3.5989, "step": 32280 }, { "epoch": 2.193572496263079, "grad_norm": 1.2730967998504639, "learning_rate": 0.0007258034379671152, "loss": 3.0607, "step": 32285 }, { "epoch": 2.1939122163337412, "grad_norm": 0.908113420009613, "learning_rate": 0.0007257609729582824, "loss": 3.52, "step": 32290 }, { "epoch": 2.194251936404403, "grad_norm": 0.9928163290023804, "learning_rate": 0.0007257185079494497, "loss": 3.6207, "step": 32295 }, { "epoch": 2.1945916564750645, "grad_norm": 0.9725337624549866, "learning_rate": 0.000725676042940617, "loss": 3.5023, "step": 32300 }, { "epoch": 2.194931376545726, "grad_norm": 1.0681414604187012, "learning_rate": 0.0007256335779317842, "loss": 3.5948, "step": 32305 }, { "epoch": 2.195271096616388, "grad_norm": 1.376857876777649, "learning_rate": 0.0007255911129229515, "loss": 3.5413, "step": 32310 }, { "epoch": 2.19561081668705, "grad_norm": 0.7144401669502258, "learning_rate": 0.0007255486479141187, "loss": 3.3796, "step": 32315 }, { "epoch": 2.1959505367577115, "grad_norm": 1.0554355382919312, "learning_rate": 0.0007255061829052861, "loss": 3.2464, "step": 32320 }, { "epoch": 2.1962902568283735, "grad_norm": 0.8161914944648743, "learning_rate": 0.0007254637178964534, "loss": 3.6515, "step": 32325 }, { "epoch": 2.196629976899035, "grad_norm": 0.7045878171920776, "learning_rate": 0.0007254212528876206, "loss": 3.4592, "step": 32330 }, { "epoch": 2.196969696969697, "grad_norm": 1.0574604272842407, "learning_rate": 0.0007253787878787879, "loss": 3.6202, "step": 32335 }, { "epoch": 2.197309417040359, "grad_norm": 0.7589321732521057, "learning_rate": 0.0007253363228699552, "loss": 3.719, "step": 32340 }, { "epoch": 2.1976491371110205, "grad_norm": 0.9884923100471497, "learning_rate": 0.0007252938578611224, "loss": 3.7307, "step": 32345 }, { "epoch": 2.197988857181682, "grad_norm": 0.8472917079925537, "learning_rate": 0.0007252513928522897, "loss": 3.7195, "step": 32350 }, { "epoch": 2.198328577252344, "grad_norm": 1.1842713356018066, "learning_rate": 0.0007252089278434571, "loss": 3.6106, "step": 32355 }, { "epoch": 2.198668297323006, "grad_norm": 0.848441481590271, "learning_rate": 0.0007251664628346243, "loss": 3.5959, "step": 32360 }, { "epoch": 2.1990080173936675, "grad_norm": 0.7582809925079346, "learning_rate": 0.0007251239978257915, "loss": 3.3248, "step": 32365 }, { "epoch": 2.1993477374643295, "grad_norm": 1.0703539848327637, "learning_rate": 0.0007250815328169589, "loss": 3.2531, "step": 32370 }, { "epoch": 2.199687457534991, "grad_norm": 0.7876978516578674, "learning_rate": 0.0007250390678081261, "loss": 3.3242, "step": 32375 }, { "epoch": 2.200027177605653, "grad_norm": 0.8323952555656433, "learning_rate": 0.0007249966027992933, "loss": 3.5724, "step": 32380 }, { "epoch": 2.200366897676315, "grad_norm": 0.9582597017288208, "learning_rate": 0.0007249541377904607, "loss": 3.5901, "step": 32385 }, { "epoch": 2.2007066177469765, "grad_norm": 0.8199208974838257, "learning_rate": 0.000724911672781628, "loss": 3.8667, "step": 32390 }, { "epoch": 2.201046337817638, "grad_norm": 0.902228057384491, "learning_rate": 0.0007248692077727952, "loss": 3.3433, "step": 32395 }, { "epoch": 2.2013860578883, "grad_norm": 0.699664294719696, "learning_rate": 0.0007248267427639626, "loss": 3.6722, "step": 32400 }, { "epoch": 2.201725777958962, "grad_norm": 0.7893538475036621, "learning_rate": 0.0007247842777551298, "loss": 3.6817, "step": 32405 }, { "epoch": 2.2020654980296235, "grad_norm": 1.2912781238555908, "learning_rate": 0.000724741812746297, "loss": 3.5174, "step": 32410 }, { "epoch": 2.2024052181002856, "grad_norm": 0.8231721520423889, "learning_rate": 0.0007246993477374643, "loss": 3.716, "step": 32415 }, { "epoch": 2.202744938170947, "grad_norm": 1.0842039585113525, "learning_rate": 0.0007246568827286316, "loss": 3.5368, "step": 32420 }, { "epoch": 2.203084658241609, "grad_norm": 1.0638808012008667, "learning_rate": 0.0007246144177197989, "loss": 3.6361, "step": 32425 }, { "epoch": 2.203424378312271, "grad_norm": 0.9698588252067566, "learning_rate": 0.0007245719527109662, "loss": 3.2701, "step": 32430 }, { "epoch": 2.2037640983829325, "grad_norm": 2.4767730236053467, "learning_rate": 0.0007245294877021335, "loss": 3.5859, "step": 32435 }, { "epoch": 2.204103818453594, "grad_norm": 0.7966992855072021, "learning_rate": 0.0007244870226933007, "loss": 3.7905, "step": 32440 }, { "epoch": 2.204443538524256, "grad_norm": 0.7904607057571411, "learning_rate": 0.000724444557684468, "loss": 3.8365, "step": 32445 }, { "epoch": 2.204783258594918, "grad_norm": 0.9263324737548828, "learning_rate": 0.0007244020926756353, "loss": 3.3993, "step": 32450 }, { "epoch": 2.2051229786655795, "grad_norm": 0.8912951946258545, "learning_rate": 0.0007243596276668025, "loss": 3.3937, "step": 32455 }, { "epoch": 2.205462698736241, "grad_norm": 0.8640767931938171, "learning_rate": 0.0007243171626579699, "loss": 3.5114, "step": 32460 }, { "epoch": 2.205802418806903, "grad_norm": 0.7920671105384827, "learning_rate": 0.0007242746976491371, "loss": 3.6954, "step": 32465 }, { "epoch": 2.206142138877565, "grad_norm": 1.273353934288025, "learning_rate": 0.0007242322326403044, "loss": 3.6205, "step": 32470 }, { "epoch": 2.2064818589482265, "grad_norm": 0.8244420886039734, "learning_rate": 0.0007241897676314717, "loss": 3.7842, "step": 32475 }, { "epoch": 2.2068215790188885, "grad_norm": 0.9040079116821289, "learning_rate": 0.0007241473026226389, "loss": 3.1961, "step": 32480 }, { "epoch": 2.20716129908955, "grad_norm": 0.8033730387687683, "learning_rate": 0.0007241048376138062, "loss": 3.834, "step": 32485 }, { "epoch": 2.207501019160212, "grad_norm": 0.9443610310554504, "learning_rate": 0.0007240623726049735, "loss": 3.4635, "step": 32490 }, { "epoch": 2.207840739230874, "grad_norm": 0.8103798031806946, "learning_rate": 0.0007240199075961408, "loss": 3.7182, "step": 32495 }, { "epoch": 2.2081804593015355, "grad_norm": 0.7979253530502319, "learning_rate": 0.0007239774425873081, "loss": 3.4059, "step": 32500 }, { "epoch": 2.208520179372197, "grad_norm": 0.8763925433158875, "learning_rate": 0.0007239349775784754, "loss": 3.5197, "step": 32505 }, { "epoch": 2.208859899442859, "grad_norm": 0.9153876304626465, "learning_rate": 0.0007238925125696426, "loss": 3.6599, "step": 32510 }, { "epoch": 2.209199619513521, "grad_norm": 1.0884946584701538, "learning_rate": 0.0007238500475608098, "loss": 3.7522, "step": 32515 }, { "epoch": 2.2095393395841825, "grad_norm": 1.2856712341308594, "learning_rate": 0.0007238075825519772, "loss": 3.6226, "step": 32520 }, { "epoch": 2.2098790596548445, "grad_norm": 0.9621086716651917, "learning_rate": 0.0007237651175431444, "loss": 3.7566, "step": 32525 }, { "epoch": 2.210218779725506, "grad_norm": 0.8527933955192566, "learning_rate": 0.0007237226525343117, "loss": 3.6458, "step": 32530 }, { "epoch": 2.210558499796168, "grad_norm": 0.8334488868713379, "learning_rate": 0.0007236801875254791, "loss": 3.6017, "step": 32535 }, { "epoch": 2.21089821986683, "grad_norm": 0.75522780418396, "learning_rate": 0.0007236377225166463, "loss": 3.7891, "step": 32540 }, { "epoch": 2.2112379399374915, "grad_norm": 0.922684371471405, "learning_rate": 0.0007235952575078136, "loss": 3.5874, "step": 32545 }, { "epoch": 2.211577660008153, "grad_norm": 0.7306028604507446, "learning_rate": 0.0007235527924989809, "loss": 3.6804, "step": 32550 }, { "epoch": 2.211917380078815, "grad_norm": 1.1049672365188599, "learning_rate": 0.0007235103274901481, "loss": 3.6485, "step": 32555 }, { "epoch": 2.212257100149477, "grad_norm": 0.9597798585891724, "learning_rate": 0.0007234678624813154, "loss": 3.5013, "step": 32560 }, { "epoch": 2.2125968202201385, "grad_norm": 0.7286372184753418, "learning_rate": 0.0007234253974724827, "loss": 3.6512, "step": 32565 }, { "epoch": 2.2129365402908006, "grad_norm": 0.932775616645813, "learning_rate": 0.00072338293246365, "loss": 3.4226, "step": 32570 }, { "epoch": 2.213276260361462, "grad_norm": 1.3865336179733276, "learning_rate": 0.0007233404674548173, "loss": 3.4846, "step": 32575 }, { "epoch": 2.213615980432124, "grad_norm": 0.8341137766838074, "learning_rate": 0.0007232980024459845, "loss": 3.6311, "step": 32580 }, { "epoch": 2.213955700502786, "grad_norm": 0.8115186095237732, "learning_rate": 0.0007232555374371518, "loss": 3.726, "step": 32585 }, { "epoch": 2.2142954205734475, "grad_norm": 0.768846869468689, "learning_rate": 0.0007232130724283191, "loss": 3.5069, "step": 32590 }, { "epoch": 2.214635140644109, "grad_norm": 0.9354526400566101, "learning_rate": 0.0007231706074194863, "loss": 3.3344, "step": 32595 }, { "epoch": 2.2149748607147712, "grad_norm": 0.919762134552002, "learning_rate": 0.0007231281424106537, "loss": 3.6443, "step": 32600 }, { "epoch": 2.215314580785433, "grad_norm": 0.7725939750671387, "learning_rate": 0.000723085677401821, "loss": 3.3866, "step": 32605 }, { "epoch": 2.2156543008560945, "grad_norm": 0.942682147026062, "learning_rate": 0.0007230432123929882, "loss": 3.3307, "step": 32610 }, { "epoch": 2.2159940209267566, "grad_norm": 0.8874782919883728, "learning_rate": 0.0007230007473841554, "loss": 3.7634, "step": 32615 }, { "epoch": 2.216333740997418, "grad_norm": 0.8687102794647217, "learning_rate": 0.0007229582823753228, "loss": 3.6211, "step": 32620 }, { "epoch": 2.21667346106808, "grad_norm": 0.964253306388855, "learning_rate": 0.00072291581736649, "loss": 3.602, "step": 32625 }, { "epoch": 2.217013181138742, "grad_norm": 0.878630518913269, "learning_rate": 0.0007228733523576572, "loss": 3.4792, "step": 32630 }, { "epoch": 2.2173529012094035, "grad_norm": 1.075579047203064, "learning_rate": 0.0007228308873488247, "loss": 3.5107, "step": 32635 }, { "epoch": 2.217692621280065, "grad_norm": 1.0901236534118652, "learning_rate": 0.0007227884223399919, "loss": 3.5373, "step": 32640 }, { "epoch": 2.218032341350727, "grad_norm": 0.9886935353279114, "learning_rate": 0.0007227459573311591, "loss": 3.569, "step": 32645 }, { "epoch": 2.218372061421389, "grad_norm": 0.7487064003944397, "learning_rate": 0.0007227034923223265, "loss": 3.3605, "step": 32650 }, { "epoch": 2.2187117814920505, "grad_norm": 1.1131982803344727, "learning_rate": 0.0007226610273134937, "loss": 3.5597, "step": 32655 }, { "epoch": 2.219051501562712, "grad_norm": 1.1309337615966797, "learning_rate": 0.0007226185623046609, "loss": 3.503, "step": 32660 }, { "epoch": 2.219391221633374, "grad_norm": 0.8000867366790771, "learning_rate": 0.0007225760972958282, "loss": 3.2924, "step": 32665 }, { "epoch": 2.219730941704036, "grad_norm": 0.887381911277771, "learning_rate": 0.0007225336322869956, "loss": 3.6958, "step": 32670 }, { "epoch": 2.2200706617746975, "grad_norm": 0.8937597274780273, "learning_rate": 0.0007224911672781628, "loss": 3.5569, "step": 32675 }, { "epoch": 2.2204103818453595, "grad_norm": 1.062612533569336, "learning_rate": 0.0007224487022693301, "loss": 3.3807, "step": 32680 }, { "epoch": 2.220750101916021, "grad_norm": 1.657966136932373, "learning_rate": 0.0007224062372604974, "loss": 3.541, "step": 32685 }, { "epoch": 2.221089821986683, "grad_norm": 1.0473896265029907, "learning_rate": 0.0007223637722516646, "loss": 3.3144, "step": 32690 }, { "epoch": 2.221429542057345, "grad_norm": 1.1639864444732666, "learning_rate": 0.0007223213072428319, "loss": 3.4216, "step": 32695 }, { "epoch": 2.2217692621280065, "grad_norm": 0.7584365606307983, "learning_rate": 0.0007222788422339991, "loss": 3.5164, "step": 32700 }, { "epoch": 2.222108982198668, "grad_norm": 0.7838539481163025, "learning_rate": 0.0007222363772251665, "loss": 3.5092, "step": 32705 }, { "epoch": 2.2224487022693302, "grad_norm": 0.6943447589874268, "learning_rate": 0.0007221939122163338, "loss": 3.6687, "step": 32710 }, { "epoch": 2.222788422339992, "grad_norm": 0.7911533117294312, "learning_rate": 0.000722151447207501, "loss": 3.6781, "step": 32715 }, { "epoch": 2.2231281424106535, "grad_norm": 0.9920641183853149, "learning_rate": 0.0007221089821986683, "loss": 3.5283, "step": 32720 }, { "epoch": 2.2234678624813156, "grad_norm": 0.8376407623291016, "learning_rate": 0.0007220665171898356, "loss": 3.4906, "step": 32725 }, { "epoch": 2.223807582551977, "grad_norm": 0.9474460482597351, "learning_rate": 0.0007220240521810028, "loss": 3.6072, "step": 32730 }, { "epoch": 2.224147302622639, "grad_norm": 0.8528099656105042, "learning_rate": 0.0007219815871721701, "loss": 3.649, "step": 32735 }, { "epoch": 2.224487022693301, "grad_norm": 0.9030001163482666, "learning_rate": 0.0007219391221633375, "loss": 3.675, "step": 32740 }, { "epoch": 2.2248267427639625, "grad_norm": 0.9056916832923889, "learning_rate": 0.0007218966571545047, "loss": 3.6448, "step": 32745 }, { "epoch": 2.225166462834624, "grad_norm": 0.9632193446159363, "learning_rate": 0.000721854192145672, "loss": 3.2476, "step": 32750 }, { "epoch": 2.2255061829052862, "grad_norm": 1.1059685945510864, "learning_rate": 0.0007218117271368393, "loss": 3.5806, "step": 32755 }, { "epoch": 2.225845902975948, "grad_norm": 0.7420191764831543, "learning_rate": 0.0007217692621280065, "loss": 3.7043, "step": 32760 }, { "epoch": 2.2261856230466095, "grad_norm": 0.8974022269248962, "learning_rate": 0.0007217267971191737, "loss": 3.4824, "step": 32765 }, { "epoch": 2.2265253431172716, "grad_norm": 0.9585962891578674, "learning_rate": 0.0007216843321103411, "loss": 3.6685, "step": 32770 }, { "epoch": 2.226865063187933, "grad_norm": 1.177390694618225, "learning_rate": 0.0007216418671015084, "loss": 3.5998, "step": 32775 }, { "epoch": 2.227204783258595, "grad_norm": 0.8217247128486633, "learning_rate": 0.0007215994020926756, "loss": 3.641, "step": 32780 }, { "epoch": 2.2275445033292565, "grad_norm": 0.9738537073135376, "learning_rate": 0.000721556937083843, "loss": 3.5254, "step": 32785 }, { "epoch": 2.2278842233999185, "grad_norm": 0.8631089925765991, "learning_rate": 0.0007215144720750102, "loss": 3.4482, "step": 32790 }, { "epoch": 2.22822394347058, "grad_norm": 0.867121160030365, "learning_rate": 0.0007214720070661774, "loss": 3.7201, "step": 32795 }, { "epoch": 2.228563663541242, "grad_norm": 0.8319726586341858, "learning_rate": 0.0007214295420573448, "loss": 3.7933, "step": 32800 }, { "epoch": 2.228903383611904, "grad_norm": 0.8822260499000549, "learning_rate": 0.000721387077048512, "loss": 3.5026, "step": 32805 }, { "epoch": 2.2292431036825655, "grad_norm": 0.7983752489089966, "learning_rate": 0.0007213446120396793, "loss": 3.5965, "step": 32810 }, { "epoch": 2.229582823753227, "grad_norm": 0.7111396789550781, "learning_rate": 0.0007213021470308466, "loss": 3.6576, "step": 32815 }, { "epoch": 2.229922543823889, "grad_norm": 1.037390947341919, "learning_rate": 0.0007212596820220139, "loss": 3.6479, "step": 32820 }, { "epoch": 2.230262263894551, "grad_norm": 0.7104963660240173, "learning_rate": 0.0007212172170131811, "loss": 3.6445, "step": 32825 }, { "epoch": 2.2306019839652125, "grad_norm": 1.064030408859253, "learning_rate": 0.0007211747520043484, "loss": 3.8064, "step": 32830 }, { "epoch": 2.2309417040358746, "grad_norm": 1.0181405544281006, "learning_rate": 0.0007211322869955157, "loss": 3.62, "step": 32835 }, { "epoch": 2.231281424106536, "grad_norm": 1.0560693740844727, "learning_rate": 0.0007210898219866829, "loss": 3.5056, "step": 32840 }, { "epoch": 2.231621144177198, "grad_norm": 1.155771255493164, "learning_rate": 0.0007210473569778503, "loss": 3.5113, "step": 32845 }, { "epoch": 2.23196086424786, "grad_norm": 0.903671383857727, "learning_rate": 0.0007210048919690176, "loss": 3.5671, "step": 32850 }, { "epoch": 2.2323005843185215, "grad_norm": 0.8087148070335388, "learning_rate": 0.0007209624269601848, "loss": 3.4991, "step": 32855 }, { "epoch": 2.232640304389183, "grad_norm": 0.9262293577194214, "learning_rate": 0.0007209199619513521, "loss": 3.3508, "step": 32860 }, { "epoch": 2.2329800244598452, "grad_norm": 1.0619874000549316, "learning_rate": 0.0007208774969425193, "loss": 3.288, "step": 32865 }, { "epoch": 2.233319744530507, "grad_norm": 0.7767520546913147, "learning_rate": 0.0007208350319336866, "loss": 3.756, "step": 32870 }, { "epoch": 2.2336594646011685, "grad_norm": 0.9351972937583923, "learning_rate": 0.000720792566924854, "loss": 3.7703, "step": 32875 }, { "epoch": 2.2339991846718306, "grad_norm": 0.8207551836967468, "learning_rate": 0.0007207501019160212, "loss": 3.5417, "step": 32880 }, { "epoch": 2.234338904742492, "grad_norm": 0.8272174596786499, "learning_rate": 0.0007207076369071886, "loss": 3.4901, "step": 32885 }, { "epoch": 2.234678624813154, "grad_norm": 1.8387123346328735, "learning_rate": 0.0007206651718983558, "loss": 3.558, "step": 32890 }, { "epoch": 2.235018344883816, "grad_norm": 1.307273507118225, "learning_rate": 0.000720622706889523, "loss": 3.5348, "step": 32895 }, { "epoch": 2.2353580649544775, "grad_norm": 0.8822222352027893, "learning_rate": 0.0007205802418806904, "loss": 3.5816, "step": 32900 }, { "epoch": 2.235697785025139, "grad_norm": 0.8231761455535889, "learning_rate": 0.0007205377768718576, "loss": 3.523, "step": 32905 }, { "epoch": 2.2360375050958012, "grad_norm": 0.8112620711326599, "learning_rate": 0.0007204953118630249, "loss": 3.8385, "step": 32910 }, { "epoch": 2.236377225166463, "grad_norm": 1.0755014419555664, "learning_rate": 0.0007204528468541922, "loss": 3.7934, "step": 32915 }, { "epoch": 2.2367169452371245, "grad_norm": 0.8414238095283508, "learning_rate": 0.0007204103818453595, "loss": 3.464, "step": 32920 }, { "epoch": 2.2370566653077866, "grad_norm": 0.8555359840393066, "learning_rate": 0.0007203679168365267, "loss": 3.6465, "step": 32925 }, { "epoch": 2.237396385378448, "grad_norm": 0.8031982183456421, "learning_rate": 0.000720325451827694, "loss": 3.8341, "step": 32930 }, { "epoch": 2.23773610544911, "grad_norm": 0.9278504252433777, "learning_rate": 0.0007202829868188613, "loss": 3.3356, "step": 32935 }, { "epoch": 2.238075825519772, "grad_norm": 1.064771294593811, "learning_rate": 0.0007202405218100285, "loss": 3.3553, "step": 32940 }, { "epoch": 2.2384155455904335, "grad_norm": 2.3802034854888916, "learning_rate": 0.0007201980568011959, "loss": 3.6065, "step": 32945 }, { "epoch": 2.238755265661095, "grad_norm": 0.7531465291976929, "learning_rate": 0.0007201555917923632, "loss": 3.7289, "step": 32950 }, { "epoch": 2.2390949857317572, "grad_norm": 0.7438278794288635, "learning_rate": 0.0007201131267835304, "loss": 3.5987, "step": 32955 }, { "epoch": 2.239434705802419, "grad_norm": 0.7301222681999207, "learning_rate": 0.0007200706617746977, "loss": 3.5205, "step": 32960 }, { "epoch": 2.2397744258730805, "grad_norm": 1.117942214012146, "learning_rate": 0.0007200281967658649, "loss": 3.6124, "step": 32965 }, { "epoch": 2.2401141459437426, "grad_norm": 0.9310340285301208, "learning_rate": 0.0007199857317570322, "loss": 3.6187, "step": 32970 }, { "epoch": 2.240453866014404, "grad_norm": 1.2055370807647705, "learning_rate": 0.0007199432667481995, "loss": 3.5309, "step": 32975 }, { "epoch": 2.240793586085066, "grad_norm": 0.7803168892860413, "learning_rate": 0.0007199008017393668, "loss": 3.6, "step": 32980 }, { "epoch": 2.2411333061557275, "grad_norm": 1.1834999322891235, "learning_rate": 0.0007198583367305341, "loss": 3.5526, "step": 32985 }, { "epoch": 2.2414730262263896, "grad_norm": 0.957045316696167, "learning_rate": 0.0007198158717217014, "loss": 3.4927, "step": 32990 }, { "epoch": 2.241812746297051, "grad_norm": 1.13032865524292, "learning_rate": 0.0007197734067128686, "loss": 3.5098, "step": 32995 }, { "epoch": 2.242152466367713, "grad_norm": 0.8925689458847046, "learning_rate": 0.0007197309417040358, "loss": 3.484, "step": 33000 }, { "epoch": 2.242492186438375, "grad_norm": 0.8652591705322266, "learning_rate": 0.0007196884766952032, "loss": 3.581, "step": 33005 }, { "epoch": 2.2428319065090365, "grad_norm": 0.8590790629386902, "learning_rate": 0.0007196460116863704, "loss": 3.255, "step": 33010 }, { "epoch": 2.243171626579698, "grad_norm": 1.02212655544281, "learning_rate": 0.0007196035466775377, "loss": 3.7149, "step": 33015 }, { "epoch": 2.2435113466503602, "grad_norm": 0.8495359420776367, "learning_rate": 0.0007195610816687051, "loss": 3.5893, "step": 33020 }, { "epoch": 2.243851066721022, "grad_norm": 0.7727569341659546, "learning_rate": 0.0007195186166598723, "loss": 3.5309, "step": 33025 }, { "epoch": 2.2441907867916835, "grad_norm": 0.8089480400085449, "learning_rate": 0.0007194761516510395, "loss": 3.581, "step": 33030 }, { "epoch": 2.2445305068623456, "grad_norm": 0.7776786684989929, "learning_rate": 0.0007194336866422069, "loss": 3.4994, "step": 33035 }, { "epoch": 2.244870226933007, "grad_norm": 0.9157167673110962, "learning_rate": 0.0007193912216333741, "loss": 3.5078, "step": 33040 }, { "epoch": 2.245209947003669, "grad_norm": 0.8186530470848083, "learning_rate": 0.0007193487566245413, "loss": 3.6267, "step": 33045 }, { "epoch": 2.245549667074331, "grad_norm": 0.8314428329467773, "learning_rate": 0.0007193062916157088, "loss": 3.9037, "step": 33050 }, { "epoch": 2.2458893871449925, "grad_norm": 0.992611825466156, "learning_rate": 0.000719263826606876, "loss": 3.8596, "step": 33055 }, { "epoch": 2.246229107215654, "grad_norm": 0.7888532876968384, "learning_rate": 0.0007192213615980432, "loss": 3.5228, "step": 33060 }, { "epoch": 2.2465688272863162, "grad_norm": 0.9443208575248718, "learning_rate": 0.0007191788965892105, "loss": 3.6179, "step": 33065 }, { "epoch": 2.246908547356978, "grad_norm": 1.0666089057922363, "learning_rate": 0.0007191364315803778, "loss": 3.58, "step": 33070 }, { "epoch": 2.2472482674276395, "grad_norm": 0.8038224577903748, "learning_rate": 0.000719093966571545, "loss": 3.7215, "step": 33075 }, { "epoch": 2.2475879874983016, "grad_norm": 0.8206089735031128, "learning_rate": 0.0007190515015627123, "loss": 3.5763, "step": 33080 }, { "epoch": 2.247927707568963, "grad_norm": 1.1060070991516113, "learning_rate": 0.0007190090365538797, "loss": 3.5541, "step": 33085 }, { "epoch": 2.248267427639625, "grad_norm": 0.7552579045295715, "learning_rate": 0.0007189665715450469, "loss": 3.4304, "step": 33090 }, { "epoch": 2.248607147710287, "grad_norm": 0.8153659701347351, "learning_rate": 0.0007189241065362142, "loss": 3.277, "step": 33095 }, { "epoch": 2.2489468677809485, "grad_norm": 0.8223702907562256, "learning_rate": 0.0007188816415273814, "loss": 3.4822, "step": 33100 }, { "epoch": 2.24928658785161, "grad_norm": 0.7493183016777039, "learning_rate": 0.0007188391765185487, "loss": 3.3604, "step": 33105 }, { "epoch": 2.2496263079222723, "grad_norm": 0.8348582983016968, "learning_rate": 0.000718796711509716, "loss": 3.4184, "step": 33110 }, { "epoch": 2.249966027992934, "grad_norm": 0.6842407584190369, "learning_rate": 0.0007187542465008832, "loss": 3.6876, "step": 33115 }, { "epoch": 2.2503057480635955, "grad_norm": 0.9893853664398193, "learning_rate": 0.0007187117814920506, "loss": 3.5327, "step": 33120 }, { "epoch": 2.250645468134257, "grad_norm": 0.932339072227478, "learning_rate": 0.0007186693164832179, "loss": 3.7372, "step": 33125 }, { "epoch": 2.250985188204919, "grad_norm": 0.8852616548538208, "learning_rate": 0.0007186268514743851, "loss": 3.7989, "step": 33130 }, { "epoch": 2.251324908275581, "grad_norm": 0.7991600036621094, "learning_rate": 0.0007185843864655524, "loss": 3.683, "step": 33135 }, { "epoch": 2.2516646283462425, "grad_norm": 0.8995228409767151, "learning_rate": 0.0007185419214567197, "loss": 3.6304, "step": 33140 }, { "epoch": 2.2520043484169046, "grad_norm": 0.9056110382080078, "learning_rate": 0.0007184994564478869, "loss": 3.5715, "step": 33145 }, { "epoch": 2.252344068487566, "grad_norm": 1.0736966133117676, "learning_rate": 0.0007184569914390541, "loss": 3.3348, "step": 33150 }, { "epoch": 2.252683788558228, "grad_norm": 0.8796764612197876, "learning_rate": 0.0007184145264302216, "loss": 3.8133, "step": 33155 }, { "epoch": 2.25302350862889, "grad_norm": 0.7989653944969177, "learning_rate": 0.0007183720614213888, "loss": 3.6325, "step": 33160 }, { "epoch": 2.2533632286995515, "grad_norm": 0.7202712297439575, "learning_rate": 0.000718329596412556, "loss": 3.7224, "step": 33165 }, { "epoch": 2.253702948770213, "grad_norm": 1.2048949003219604, "learning_rate": 0.0007182871314037234, "loss": 3.4293, "step": 33170 }, { "epoch": 2.2540426688408752, "grad_norm": 0.9079889059066772, "learning_rate": 0.0007182446663948906, "loss": 3.4945, "step": 33175 }, { "epoch": 2.254382388911537, "grad_norm": 0.8293566703796387, "learning_rate": 0.0007182022013860578, "loss": 3.3822, "step": 33180 }, { "epoch": 2.2547221089821985, "grad_norm": 0.8465235233306885, "learning_rate": 0.0007181597363772252, "loss": 3.6184, "step": 33185 }, { "epoch": 2.2550618290528606, "grad_norm": 0.8670704960823059, "learning_rate": 0.0007181172713683925, "loss": 3.5337, "step": 33190 }, { "epoch": 2.255401549123522, "grad_norm": 1.127526044845581, "learning_rate": 0.0007180748063595597, "loss": 3.7075, "step": 33195 }, { "epoch": 2.255741269194184, "grad_norm": 0.7312422394752502, "learning_rate": 0.000718032341350727, "loss": 3.5978, "step": 33200 }, { "epoch": 2.256080989264846, "grad_norm": 0.7384932637214661, "learning_rate": 0.0007179898763418943, "loss": 3.6336, "step": 33205 }, { "epoch": 2.2564207093355075, "grad_norm": 0.7124952673912048, "learning_rate": 0.0007179474113330615, "loss": 3.6529, "step": 33210 }, { "epoch": 2.256760429406169, "grad_norm": 0.8823535442352295, "learning_rate": 0.0007179049463242288, "loss": 3.4039, "step": 33215 }, { "epoch": 2.2571001494768312, "grad_norm": 0.9871249794960022, "learning_rate": 0.0007178624813153961, "loss": 3.7726, "step": 33220 }, { "epoch": 2.257439869547493, "grad_norm": 1.136189579963684, "learning_rate": 0.0007178200163065635, "loss": 3.3842, "step": 33225 }, { "epoch": 2.2577795896181545, "grad_norm": 1.0106223821640015, "learning_rate": 0.0007177775512977307, "loss": 3.5458, "step": 33230 }, { "epoch": 2.2581193096888166, "grad_norm": 0.8007284998893738, "learning_rate": 0.000717735086288898, "loss": 3.5947, "step": 33235 }, { "epoch": 2.258459029759478, "grad_norm": 0.9360009431838989, "learning_rate": 0.0007176926212800653, "loss": 3.5269, "step": 33240 }, { "epoch": 2.25879874983014, "grad_norm": 1.0695573091506958, "learning_rate": 0.0007176501562712325, "loss": 3.3501, "step": 33245 }, { "epoch": 2.259138469900802, "grad_norm": 0.8601471185684204, "learning_rate": 0.0007176076912623997, "loss": 3.6174, "step": 33250 }, { "epoch": 2.2594781899714635, "grad_norm": 0.7691159248352051, "learning_rate": 0.0007175652262535671, "loss": 3.398, "step": 33255 }, { "epoch": 2.259817910042125, "grad_norm": 0.6757928729057312, "learning_rate": 0.0007175227612447344, "loss": 3.7447, "step": 33260 }, { "epoch": 2.2601576301127873, "grad_norm": 1.0051745176315308, "learning_rate": 0.0007174802962359016, "loss": 3.7604, "step": 33265 }, { "epoch": 2.260497350183449, "grad_norm": 0.8519276976585388, "learning_rate": 0.000717437831227069, "loss": 3.4593, "step": 33270 }, { "epoch": 2.2608370702541105, "grad_norm": 0.9402484893798828, "learning_rate": 0.0007173953662182362, "loss": 3.5249, "step": 33275 }, { "epoch": 2.2611767903247726, "grad_norm": 0.9661968350410461, "learning_rate": 0.0007173529012094034, "loss": 3.5901, "step": 33280 }, { "epoch": 2.261516510395434, "grad_norm": 1.0839349031448364, "learning_rate": 0.0007173104362005708, "loss": 3.6345, "step": 33285 }, { "epoch": 2.261856230466096, "grad_norm": 0.6857048869132996, "learning_rate": 0.000717267971191738, "loss": 3.5202, "step": 33290 }, { "epoch": 2.262195950536758, "grad_norm": 0.9796864986419678, "learning_rate": 0.0007172255061829053, "loss": 3.625, "step": 33295 }, { "epoch": 2.2625356706074196, "grad_norm": 0.7852342128753662, "learning_rate": 0.0007171830411740726, "loss": 3.5383, "step": 33300 }, { "epoch": 2.262875390678081, "grad_norm": 0.8394941091537476, "learning_rate": 0.0007171405761652399, "loss": 3.4653, "step": 33305 }, { "epoch": 2.2632151107487433, "grad_norm": 1.0016043186187744, "learning_rate": 0.0007170981111564071, "loss": 3.5829, "step": 33310 }, { "epoch": 2.263554830819405, "grad_norm": 0.8145662546157837, "learning_rate": 0.0007170556461475744, "loss": 3.7781, "step": 33315 }, { "epoch": 2.2638945508900665, "grad_norm": 0.8144311904907227, "learning_rate": 0.0007170131811387417, "loss": 3.5867, "step": 33320 }, { "epoch": 2.2642342709607286, "grad_norm": 0.8240835666656494, "learning_rate": 0.0007169707161299089, "loss": 3.4374, "step": 33325 }, { "epoch": 2.2645739910313902, "grad_norm": 1.0463414192199707, "learning_rate": 0.0007169282511210763, "loss": 3.5152, "step": 33330 }, { "epoch": 2.264913711102052, "grad_norm": 0.7924541234970093, "learning_rate": 0.0007168857861122436, "loss": 3.5487, "step": 33335 }, { "epoch": 2.2652534311727135, "grad_norm": 0.7810038328170776, "learning_rate": 0.0007168433211034108, "loss": 3.514, "step": 33340 }, { "epoch": 2.2655931512433756, "grad_norm": 0.7996058464050293, "learning_rate": 0.0007168008560945781, "loss": 3.4942, "step": 33345 }, { "epoch": 2.265932871314037, "grad_norm": 0.9138662815093994, "learning_rate": 0.0007167583910857453, "loss": 3.7177, "step": 33350 }, { "epoch": 2.266272591384699, "grad_norm": 0.9390940070152283, "learning_rate": 0.0007167159260769126, "loss": 3.716, "step": 33355 }, { "epoch": 2.266612311455361, "grad_norm": 0.6953398585319519, "learning_rate": 0.00071667346106808, "loss": 3.754, "step": 33360 }, { "epoch": 2.2669520315260225, "grad_norm": 0.8761784434318542, "learning_rate": 0.0007166309960592472, "loss": 3.6092, "step": 33365 }, { "epoch": 2.267291751596684, "grad_norm": 0.7568075656890869, "learning_rate": 0.0007165885310504145, "loss": 3.6632, "step": 33370 }, { "epoch": 2.2676314716673462, "grad_norm": 0.901160717010498, "learning_rate": 0.0007165460660415818, "loss": 3.2737, "step": 33375 }, { "epoch": 2.267971191738008, "grad_norm": 0.7893183827400208, "learning_rate": 0.000716503601032749, "loss": 3.744, "step": 33380 }, { "epoch": 2.2683109118086695, "grad_norm": 0.7973024249076843, "learning_rate": 0.0007164611360239162, "loss": 3.3238, "step": 33385 }, { "epoch": 2.2686506318793316, "grad_norm": 0.9344809651374817, "learning_rate": 0.0007164186710150836, "loss": 3.5552, "step": 33390 }, { "epoch": 2.268990351949993, "grad_norm": 0.8530999422073364, "learning_rate": 0.0007163762060062509, "loss": 3.5628, "step": 33395 }, { "epoch": 2.269330072020655, "grad_norm": 0.9041158556938171, "learning_rate": 0.0007163337409974181, "loss": 3.3898, "step": 33400 }, { "epoch": 2.269669792091317, "grad_norm": 1.436303734779358, "learning_rate": 0.0007162912759885855, "loss": 3.3977, "step": 33405 }, { "epoch": 2.2700095121619785, "grad_norm": 0.8674291968345642, "learning_rate": 0.0007162488109797527, "loss": 3.4566, "step": 33410 }, { "epoch": 2.27034923223264, "grad_norm": 0.8038323521614075, "learning_rate": 0.0007162063459709199, "loss": 3.6008, "step": 33415 }, { "epoch": 2.2706889523033023, "grad_norm": 0.8486367464065552, "learning_rate": 0.0007161638809620873, "loss": 3.6199, "step": 33420 }, { "epoch": 2.271028672373964, "grad_norm": 4.3038740158081055, "learning_rate": 0.0007161214159532545, "loss": 3.3495, "step": 33425 }, { "epoch": 2.2713683924446255, "grad_norm": 0.9377936720848083, "learning_rate": 0.0007160789509444218, "loss": 3.4689, "step": 33430 }, { "epoch": 2.2717081125152876, "grad_norm": 0.6448661684989929, "learning_rate": 0.0007160364859355892, "loss": 3.3967, "step": 33435 }, { "epoch": 2.2720478325859492, "grad_norm": 0.7574926018714905, "learning_rate": 0.0007159940209267564, "loss": 3.434, "step": 33440 }, { "epoch": 2.272387552656611, "grad_norm": 0.7551472783088684, "learning_rate": 0.0007159515559179236, "loss": 3.5716, "step": 33445 }, { "epoch": 2.2727272727272725, "grad_norm": 0.9378146529197693, "learning_rate": 0.0007159090909090909, "loss": 3.3339, "step": 33450 }, { "epoch": 2.2730669927979346, "grad_norm": 1.1030871868133545, "learning_rate": 0.0007158666259002582, "loss": 3.6648, "step": 33455 }, { "epoch": 2.273406712868596, "grad_norm": 1.0810635089874268, "learning_rate": 0.0007158241608914254, "loss": 3.5567, "step": 33460 }, { "epoch": 2.273746432939258, "grad_norm": 0.928423285484314, "learning_rate": 0.0007157816958825928, "loss": 3.4463, "step": 33465 }, { "epoch": 2.27408615300992, "grad_norm": 1.1333566904067993, "learning_rate": 0.0007157392308737601, "loss": 3.4522, "step": 33470 }, { "epoch": 2.2744258730805815, "grad_norm": 0.7303792238235474, "learning_rate": 0.0007156967658649273, "loss": 3.6039, "step": 33475 }, { "epoch": 2.274765593151243, "grad_norm": 0.9278403520584106, "learning_rate": 0.0007156543008560946, "loss": 3.5457, "step": 33480 }, { "epoch": 2.2751053132219052, "grad_norm": 0.8834285736083984, "learning_rate": 0.0007156118358472619, "loss": 3.5495, "step": 33485 }, { "epoch": 2.275445033292567, "grad_norm": 0.7755470275878906, "learning_rate": 0.0007155693708384291, "loss": 3.7187, "step": 33490 }, { "epoch": 2.2757847533632285, "grad_norm": 0.9774354696273804, "learning_rate": 0.0007155269058295964, "loss": 3.6987, "step": 33495 }, { "epoch": 2.2761244734338906, "grad_norm": 0.729327380657196, "learning_rate": 0.0007154844408207637, "loss": 3.7649, "step": 33500 }, { "epoch": 2.276464193504552, "grad_norm": 0.7892672419548035, "learning_rate": 0.000715441975811931, "loss": 3.8116, "step": 33505 }, { "epoch": 2.276803913575214, "grad_norm": 0.8903812766075134, "learning_rate": 0.0007153995108030983, "loss": 3.3989, "step": 33510 }, { "epoch": 2.277143633645876, "grad_norm": 0.9278993606567383, "learning_rate": 0.0007153570457942655, "loss": 3.625, "step": 33515 }, { "epoch": 2.2774833537165375, "grad_norm": 0.9745951890945435, "learning_rate": 0.0007153145807854328, "loss": 3.5104, "step": 33520 }, { "epoch": 2.277823073787199, "grad_norm": 0.8301883339881897, "learning_rate": 0.0007152721157766001, "loss": 3.5483, "step": 33525 }, { "epoch": 2.2781627938578612, "grad_norm": 0.8892806172370911, "learning_rate": 0.0007152296507677673, "loss": 3.5899, "step": 33530 }, { "epoch": 2.278502513928523, "grad_norm": 0.8409011363983154, "learning_rate": 0.0007151871857589347, "loss": 3.4398, "step": 33535 }, { "epoch": 2.2788422339991845, "grad_norm": 4.0718536376953125, "learning_rate": 0.000715144720750102, "loss": 3.8608, "step": 33540 }, { "epoch": 2.2791819540698466, "grad_norm": 0.943730890750885, "learning_rate": 0.0007151022557412692, "loss": 3.4986, "step": 33545 }, { "epoch": 2.279521674140508, "grad_norm": 0.8227397799491882, "learning_rate": 0.0007150597907324364, "loss": 3.5182, "step": 33550 }, { "epoch": 2.27986139421117, "grad_norm": 1.0455341339111328, "learning_rate": 0.0007150173257236038, "loss": 3.4311, "step": 33555 }, { "epoch": 2.280201114281832, "grad_norm": 0.9557324647903442, "learning_rate": 0.000714974860714771, "loss": 3.4792, "step": 33560 }, { "epoch": 2.2805408343524936, "grad_norm": 0.746530294418335, "learning_rate": 0.0007149323957059383, "loss": 3.4188, "step": 33565 }, { "epoch": 2.280880554423155, "grad_norm": 0.6280292868614197, "learning_rate": 0.0007148899306971057, "loss": 3.7707, "step": 33570 }, { "epoch": 2.2812202744938173, "grad_norm": 0.8083510994911194, "learning_rate": 0.0007148474656882729, "loss": 3.4646, "step": 33575 }, { "epoch": 2.281559994564479, "grad_norm": 1.2106924057006836, "learning_rate": 0.0007148050006794402, "loss": 3.3421, "step": 33580 }, { "epoch": 2.2818997146351405, "grad_norm": 0.7883691787719727, "learning_rate": 0.0007147625356706075, "loss": 3.5916, "step": 33585 }, { "epoch": 2.2822394347058026, "grad_norm": 0.948059618473053, "learning_rate": 0.0007147200706617747, "loss": 3.4732, "step": 33590 }, { "epoch": 2.2825791547764642, "grad_norm": 1.2449692487716675, "learning_rate": 0.000714677605652942, "loss": 3.4399, "step": 33595 }, { "epoch": 2.282918874847126, "grad_norm": 0.8142520785331726, "learning_rate": 0.0007146351406441092, "loss": 3.6643, "step": 33600 }, { "epoch": 2.283258594917788, "grad_norm": 1.11040461063385, "learning_rate": 0.0007145926756352766, "loss": 3.4133, "step": 33605 }, { "epoch": 2.2835983149884496, "grad_norm": 1.0723345279693604, "learning_rate": 0.0007145502106264439, "loss": 3.57, "step": 33610 }, { "epoch": 2.283938035059111, "grad_norm": 0.8764246702194214, "learning_rate": 0.0007145077456176111, "loss": 3.4837, "step": 33615 }, { "epoch": 2.2842777551297733, "grad_norm": 0.8795246481895447, "learning_rate": 0.0007144652806087784, "loss": 3.5893, "step": 33620 }, { "epoch": 2.284617475200435, "grad_norm": 0.8362054824829102, "learning_rate": 0.0007144228155999457, "loss": 3.8411, "step": 33625 }, { "epoch": 2.2849571952710965, "grad_norm": 0.9274553060531616, "learning_rate": 0.0007143803505911129, "loss": 3.4455, "step": 33630 }, { "epoch": 2.2852969153417586, "grad_norm": 0.9331597089767456, "learning_rate": 0.0007143378855822801, "loss": 3.5365, "step": 33635 }, { "epoch": 2.2856366354124202, "grad_norm": 0.8610716462135315, "learning_rate": 0.0007142954205734476, "loss": 3.8021, "step": 33640 }, { "epoch": 2.285976355483082, "grad_norm": 1.0017797946929932, "learning_rate": 0.0007142529555646148, "loss": 3.6703, "step": 33645 }, { "epoch": 2.286316075553744, "grad_norm": 0.9519618153572083, "learning_rate": 0.000714210490555782, "loss": 3.5385, "step": 33650 }, { "epoch": 2.2866557956244056, "grad_norm": 0.9531429409980774, "learning_rate": 0.0007141680255469494, "loss": 3.5308, "step": 33655 }, { "epoch": 2.286995515695067, "grad_norm": 0.7119864225387573, "learning_rate": 0.0007141255605381166, "loss": 3.6731, "step": 33660 }, { "epoch": 2.2873352357657293, "grad_norm": 0.7924984097480774, "learning_rate": 0.0007140830955292838, "loss": 3.4853, "step": 33665 }, { "epoch": 2.287674955836391, "grad_norm": 0.8567978143692017, "learning_rate": 0.0007140406305204512, "loss": 3.5347, "step": 33670 }, { "epoch": 2.2880146759070525, "grad_norm": 0.9263978600502014, "learning_rate": 0.0007139981655116185, "loss": 3.3028, "step": 33675 }, { "epoch": 2.288354395977714, "grad_norm": 0.9461192488670349, "learning_rate": 0.0007139557005027857, "loss": 3.5105, "step": 33680 }, { "epoch": 2.2886941160483762, "grad_norm": 0.9066718816757202, "learning_rate": 0.000713913235493953, "loss": 3.5792, "step": 33685 }, { "epoch": 2.289033836119038, "grad_norm": 0.8691078424453735, "learning_rate": 0.0007138707704851203, "loss": 3.6218, "step": 33690 }, { "epoch": 2.2893735561896995, "grad_norm": 0.8015620708465576, "learning_rate": 0.0007138283054762875, "loss": 3.6117, "step": 33695 }, { "epoch": 2.2897132762603616, "grad_norm": 0.8820092082023621, "learning_rate": 0.0007137858404674548, "loss": 3.7952, "step": 33700 }, { "epoch": 2.290052996331023, "grad_norm": 0.7063429355621338, "learning_rate": 0.0007137433754586221, "loss": 3.5758, "step": 33705 }, { "epoch": 2.290392716401685, "grad_norm": 1.047690510749817, "learning_rate": 0.0007137009104497894, "loss": 3.3326, "step": 33710 }, { "epoch": 2.290732436472347, "grad_norm": 0.9483758211135864, "learning_rate": 0.0007136584454409567, "loss": 3.4372, "step": 33715 }, { "epoch": 2.2910721565430086, "grad_norm": 0.7899272441864014, "learning_rate": 0.000713615980432124, "loss": 3.4505, "step": 33720 }, { "epoch": 2.29141187661367, "grad_norm": 0.7726660966873169, "learning_rate": 0.0007135735154232912, "loss": 3.5856, "step": 33725 }, { "epoch": 2.2917515966843323, "grad_norm": 0.9461966156959534, "learning_rate": 0.0007135310504144585, "loss": 3.8457, "step": 33730 }, { "epoch": 2.292091316754994, "grad_norm": 0.8097044229507446, "learning_rate": 0.0007134885854056257, "loss": 3.4337, "step": 33735 }, { "epoch": 2.2924310368256555, "grad_norm": 0.8588869571685791, "learning_rate": 0.000713446120396793, "loss": 3.5262, "step": 33740 }, { "epoch": 2.2927707568963176, "grad_norm": 0.8851317763328552, "learning_rate": 0.0007134036553879604, "loss": 3.4683, "step": 33745 }, { "epoch": 2.2931104769669792, "grad_norm": 0.8512869477272034, "learning_rate": 0.0007133611903791276, "loss": 3.669, "step": 33750 }, { "epoch": 2.293450197037641, "grad_norm": 0.849254846572876, "learning_rate": 0.0007133187253702949, "loss": 3.6922, "step": 33755 }, { "epoch": 2.293789917108303, "grad_norm": 0.6882972717285156, "learning_rate": 0.0007132762603614622, "loss": 3.3624, "step": 33760 }, { "epoch": 2.2941296371789646, "grad_norm": 0.7374040484428406, "learning_rate": 0.0007132337953526294, "loss": 3.2656, "step": 33765 }, { "epoch": 2.294469357249626, "grad_norm": 0.8655424118041992, "learning_rate": 0.0007131913303437967, "loss": 3.691, "step": 33770 }, { "epoch": 2.2948090773202883, "grad_norm": 0.8615302443504333, "learning_rate": 0.000713148865334964, "loss": 3.5827, "step": 33775 }, { "epoch": 2.29514879739095, "grad_norm": 0.8697820901870728, "learning_rate": 0.0007131064003261313, "loss": 3.5313, "step": 33780 }, { "epoch": 2.2954885174616115, "grad_norm": 0.9026974439620972, "learning_rate": 0.0007130639353172985, "loss": 3.6302, "step": 33785 }, { "epoch": 2.295828237532273, "grad_norm": 1.0698466300964355, "learning_rate": 0.0007130214703084659, "loss": 3.5132, "step": 33790 }, { "epoch": 2.2961679576029352, "grad_norm": 1.1608260869979858, "learning_rate": 0.0007129790052996331, "loss": 3.608, "step": 33795 }, { "epoch": 2.296507677673597, "grad_norm": 0.7514434456825256, "learning_rate": 0.0007129365402908003, "loss": 3.6538, "step": 33800 }, { "epoch": 2.2968473977442585, "grad_norm": 1.1244678497314453, "learning_rate": 0.0007128940752819677, "loss": 3.5982, "step": 33805 }, { "epoch": 2.2971871178149206, "grad_norm": 1.0733412504196167, "learning_rate": 0.0007128516102731349, "loss": 3.4568, "step": 33810 }, { "epoch": 2.297526837885582, "grad_norm": 0.7152422666549683, "learning_rate": 0.0007128091452643022, "loss": 3.6435, "step": 33815 }, { "epoch": 2.297866557956244, "grad_norm": 0.7616744637489319, "learning_rate": 0.0007127666802554696, "loss": 3.5654, "step": 33820 }, { "epoch": 2.298206278026906, "grad_norm": 0.8126440644264221, "learning_rate": 0.0007127242152466368, "loss": 3.4232, "step": 33825 }, { "epoch": 2.2985459980975675, "grad_norm": 0.8614236116409302, "learning_rate": 0.000712681750237804, "loss": 3.624, "step": 33830 }, { "epoch": 2.298885718168229, "grad_norm": 0.9122059345245361, "learning_rate": 0.0007126392852289713, "loss": 3.4455, "step": 33835 }, { "epoch": 2.2992254382388913, "grad_norm": 0.8862968683242798, "learning_rate": 0.0007125968202201386, "loss": 3.4778, "step": 33840 }, { "epoch": 2.299565158309553, "grad_norm": 0.8370521664619446, "learning_rate": 0.0007125543552113058, "loss": 3.778, "step": 33845 }, { "epoch": 2.2999048783802145, "grad_norm": 0.6971193552017212, "learning_rate": 0.0007125118902024732, "loss": 3.6383, "step": 33850 }, { "epoch": 2.3002445984508766, "grad_norm": 0.7824249863624573, "learning_rate": 0.0007124694251936405, "loss": 3.5573, "step": 33855 }, { "epoch": 2.300584318521538, "grad_norm": 0.7947288155555725, "learning_rate": 0.0007124269601848077, "loss": 3.2653, "step": 33860 }, { "epoch": 2.3009240385922, "grad_norm": 0.8118384480476379, "learning_rate": 0.000712384495175975, "loss": 3.7857, "step": 33865 }, { "epoch": 2.301263758662862, "grad_norm": 0.8397238850593567, "learning_rate": 0.0007123420301671423, "loss": 3.2688, "step": 33870 }, { "epoch": 2.3016034787335236, "grad_norm": 0.6829814910888672, "learning_rate": 0.0007122995651583095, "loss": 3.34, "step": 33875 }, { "epoch": 2.301943198804185, "grad_norm": 1.0572757720947266, "learning_rate": 0.0007122571001494768, "loss": 3.6329, "step": 33880 }, { "epoch": 2.3022829188748473, "grad_norm": 0.8951638340950012, "learning_rate": 0.0007122146351406441, "loss": 3.4023, "step": 33885 }, { "epoch": 2.302622638945509, "grad_norm": 0.7757227420806885, "learning_rate": 0.0007121721701318114, "loss": 3.5675, "step": 33890 }, { "epoch": 2.3029623590161705, "grad_norm": 0.8249233365058899, "learning_rate": 0.0007121297051229787, "loss": 3.5986, "step": 33895 }, { "epoch": 2.3033020790868326, "grad_norm": 0.8921540975570679, "learning_rate": 0.0007120872401141459, "loss": 3.3791, "step": 33900 }, { "epoch": 2.3036417991574942, "grad_norm": 0.977718710899353, "learning_rate": 0.0007120447751053133, "loss": 3.6686, "step": 33905 }, { "epoch": 2.303981519228156, "grad_norm": 0.8318045735359192, "learning_rate": 0.0007120023100964805, "loss": 3.5758, "step": 33910 }, { "epoch": 2.304321239298818, "grad_norm": 0.9497118592262268, "learning_rate": 0.0007119598450876477, "loss": 3.7846, "step": 33915 }, { "epoch": 2.3046609593694796, "grad_norm": 0.9017803072929382, "learning_rate": 0.0007119173800788152, "loss": 3.7131, "step": 33920 }, { "epoch": 2.305000679440141, "grad_norm": 0.8985109329223633, "learning_rate": 0.0007118749150699824, "loss": 3.6689, "step": 33925 }, { "epoch": 2.3053403995108033, "grad_norm": 0.7551419734954834, "learning_rate": 0.0007118324500611496, "loss": 3.3541, "step": 33930 }, { "epoch": 2.305680119581465, "grad_norm": 1.7869906425476074, "learning_rate": 0.000711789985052317, "loss": 3.6841, "step": 33935 }, { "epoch": 2.3060198396521265, "grad_norm": 0.8264340162277222, "learning_rate": 0.0007117475200434842, "loss": 3.6175, "step": 33940 }, { "epoch": 2.3063595597227886, "grad_norm": 0.8188421130180359, "learning_rate": 0.0007117050550346514, "loss": 3.6617, "step": 33945 }, { "epoch": 2.3066992797934502, "grad_norm": 0.8296565413475037, "learning_rate": 0.0007116625900258188, "loss": 3.5027, "step": 33950 }, { "epoch": 2.307038999864112, "grad_norm": 0.824295163154602, "learning_rate": 0.0007116201250169861, "loss": 3.4179, "step": 33955 }, { "epoch": 2.307378719934774, "grad_norm": 0.8817291855812073, "learning_rate": 0.0007115776600081533, "loss": 3.673, "step": 33960 }, { "epoch": 2.3077184400054356, "grad_norm": 1.6192768812179565, "learning_rate": 0.0007115351949993206, "loss": 3.4492, "step": 33965 }, { "epoch": 2.308058160076097, "grad_norm": 0.8845073580741882, "learning_rate": 0.0007114927299904879, "loss": 3.2858, "step": 33970 }, { "epoch": 2.3083978801467593, "grad_norm": 0.6938125491142273, "learning_rate": 0.0007114502649816551, "loss": 3.7204, "step": 33975 }, { "epoch": 2.308737600217421, "grad_norm": 0.9887435436248779, "learning_rate": 0.0007114077999728224, "loss": 3.3122, "step": 33980 }, { "epoch": 2.3090773202880825, "grad_norm": 1.0122140645980835, "learning_rate": 0.0007113653349639897, "loss": 3.6489, "step": 33985 }, { "epoch": 2.3094170403587446, "grad_norm": 0.9755750894546509, "learning_rate": 0.000711322869955157, "loss": 3.5054, "step": 33990 }, { "epoch": 2.3097567604294063, "grad_norm": 1.0172529220581055, "learning_rate": 0.0007112804049463243, "loss": 3.7896, "step": 33995 }, { "epoch": 2.310096480500068, "grad_norm": 1.064594030380249, "learning_rate": 0.0007112379399374915, "loss": 3.546, "step": 34000 }, { "epoch": 2.31043620057073, "grad_norm": 1.0242984294891357, "learning_rate": 0.0007111954749286588, "loss": 3.6751, "step": 34005 }, { "epoch": 2.3107759206413916, "grad_norm": 1.041520595550537, "learning_rate": 0.0007111530099198261, "loss": 3.7273, "step": 34010 }, { "epoch": 2.311115640712053, "grad_norm": 0.8324095606803894, "learning_rate": 0.0007111105449109933, "loss": 3.6797, "step": 34015 }, { "epoch": 2.311455360782715, "grad_norm": 0.7015892267227173, "learning_rate": 0.0007110680799021607, "loss": 3.6101, "step": 34020 }, { "epoch": 2.311795080853377, "grad_norm": 0.803220808506012, "learning_rate": 0.000711025614893328, "loss": 3.5955, "step": 34025 }, { "epoch": 2.3121348009240386, "grad_norm": 0.8305432200431824, "learning_rate": 0.0007109831498844952, "loss": 3.2995, "step": 34030 }, { "epoch": 2.3124745209947, "grad_norm": 0.8131983876228333, "learning_rate": 0.0007109406848756624, "loss": 3.6584, "step": 34035 }, { "epoch": 2.3128142410653623, "grad_norm": 0.7462334036827087, "learning_rate": 0.0007108982198668298, "loss": 3.7746, "step": 34040 }, { "epoch": 2.313153961136024, "grad_norm": 0.7840363383293152, "learning_rate": 0.000710855754857997, "loss": 3.7043, "step": 34045 }, { "epoch": 2.3134936812066855, "grad_norm": 1.8237195014953613, "learning_rate": 0.0007108132898491642, "loss": 3.4715, "step": 34050 }, { "epoch": 2.3138334012773476, "grad_norm": 0.9244502782821655, "learning_rate": 0.0007107708248403317, "loss": 3.3044, "step": 34055 }, { "epoch": 2.3141731213480092, "grad_norm": 0.7685158848762512, "learning_rate": 0.0007107283598314989, "loss": 3.3397, "step": 34060 }, { "epoch": 2.314512841418671, "grad_norm": 0.7994508147239685, "learning_rate": 0.0007106858948226661, "loss": 3.6908, "step": 34065 }, { "epoch": 2.314852561489333, "grad_norm": 1.0769963264465332, "learning_rate": 0.0007106434298138335, "loss": 3.5231, "step": 34070 }, { "epoch": 2.3151922815599946, "grad_norm": 0.7900683283805847, "learning_rate": 0.0007106009648050007, "loss": 3.3827, "step": 34075 }, { "epoch": 2.315532001630656, "grad_norm": 1.0778696537017822, "learning_rate": 0.0007105584997961679, "loss": 3.7908, "step": 34080 }, { "epoch": 2.3158717217013183, "grad_norm": 0.760898768901825, "learning_rate": 0.0007105160347873352, "loss": 3.7848, "step": 34085 }, { "epoch": 2.31621144177198, "grad_norm": 0.6816806793212891, "learning_rate": 0.0007104735697785026, "loss": 3.6774, "step": 34090 }, { "epoch": 2.3165511618426415, "grad_norm": 0.8084598183631897, "learning_rate": 0.0007104311047696698, "loss": 3.813, "step": 34095 }, { "epoch": 2.3168908819133036, "grad_norm": 1.1742398738861084, "learning_rate": 0.0007103886397608371, "loss": 3.697, "step": 34100 }, { "epoch": 2.3172306019839652, "grad_norm": 0.846848726272583, "learning_rate": 0.0007103461747520044, "loss": 3.3247, "step": 34105 }, { "epoch": 2.317570322054627, "grad_norm": 0.9724783301353455, "learning_rate": 0.0007103037097431716, "loss": 3.7151, "step": 34110 }, { "epoch": 2.317910042125289, "grad_norm": 1.0472772121429443, "learning_rate": 0.0007102612447343389, "loss": 3.7369, "step": 34115 }, { "epoch": 2.3182497621959506, "grad_norm": 1.0491966009140015, "learning_rate": 0.0007102187797255061, "loss": 3.6542, "step": 34120 }, { "epoch": 2.318589482266612, "grad_norm": 0.9557170271873474, "learning_rate": 0.0007101763147166735, "loss": 3.6361, "step": 34125 }, { "epoch": 2.318929202337274, "grad_norm": 0.9192716479301453, "learning_rate": 0.0007101338497078408, "loss": 3.6267, "step": 34130 }, { "epoch": 2.319268922407936, "grad_norm": 0.9863161444664001, "learning_rate": 0.000710091384699008, "loss": 3.7624, "step": 34135 }, { "epoch": 2.3196086424785975, "grad_norm": 0.7052904367446899, "learning_rate": 0.0007100489196901753, "loss": 3.7789, "step": 34140 }, { "epoch": 2.319948362549259, "grad_norm": 1.1891669034957886, "learning_rate": 0.0007100064546813426, "loss": 3.4648, "step": 34145 }, { "epoch": 2.3202880826199213, "grad_norm": 0.9277065396308899, "learning_rate": 0.0007099639896725098, "loss": 3.6379, "step": 34150 }, { "epoch": 2.320627802690583, "grad_norm": 0.9203821420669556, "learning_rate": 0.0007099215246636771, "loss": 3.467, "step": 34155 }, { "epoch": 2.3209675227612445, "grad_norm": 1.2827599048614502, "learning_rate": 0.0007098790596548445, "loss": 3.416, "step": 34160 }, { "epoch": 2.3213072428319066, "grad_norm": 1.0372949838638306, "learning_rate": 0.0007098365946460117, "loss": 3.4516, "step": 34165 }, { "epoch": 2.3216469629025682, "grad_norm": 0.757472813129425, "learning_rate": 0.000709794129637179, "loss": 3.5345, "step": 34170 }, { "epoch": 2.32198668297323, "grad_norm": 0.8775049448013306, "learning_rate": 0.0007097516646283463, "loss": 3.3518, "step": 34175 }, { "epoch": 2.322326403043892, "grad_norm": 0.9360730051994324, "learning_rate": 0.0007097091996195135, "loss": 3.3618, "step": 34180 }, { "epoch": 2.3226661231145536, "grad_norm": 0.9197375774383545, "learning_rate": 0.0007096667346106807, "loss": 3.4241, "step": 34185 }, { "epoch": 2.323005843185215, "grad_norm": 0.8105782270431519, "learning_rate": 0.0007096242696018481, "loss": 3.3682, "step": 34190 }, { "epoch": 2.3233455632558773, "grad_norm": 0.9358724355697632, "learning_rate": 0.0007095818045930154, "loss": 3.6549, "step": 34195 }, { "epoch": 2.323685283326539, "grad_norm": 1.0015637874603271, "learning_rate": 0.0007095393395841826, "loss": 3.4941, "step": 34200 }, { "epoch": 2.3240250033972005, "grad_norm": 1.0032161474227905, "learning_rate": 0.00070949687457535, "loss": 3.4708, "step": 34205 }, { "epoch": 2.3243647234678626, "grad_norm": 0.7717537879943848, "learning_rate": 0.0007094544095665172, "loss": 3.9169, "step": 34210 }, { "epoch": 2.3247044435385242, "grad_norm": 0.7563071250915527, "learning_rate": 0.0007094119445576844, "loss": 3.4856, "step": 34215 }, { "epoch": 2.325044163609186, "grad_norm": 0.9331188797950745, "learning_rate": 0.0007093694795488518, "loss": 3.8392, "step": 34220 }, { "epoch": 2.325383883679848, "grad_norm": 1.0929288864135742, "learning_rate": 0.000709327014540019, "loss": 3.456, "step": 34225 }, { "epoch": 2.3257236037505096, "grad_norm": 0.8375023603439331, "learning_rate": 0.0007092845495311863, "loss": 3.6963, "step": 34230 }, { "epoch": 2.326063323821171, "grad_norm": 0.8193178176879883, "learning_rate": 0.0007092420845223536, "loss": 3.541, "step": 34235 }, { "epoch": 2.3264030438918333, "grad_norm": 0.7918261289596558, "learning_rate": 0.0007091996195135209, "loss": 3.6571, "step": 34240 }, { "epoch": 2.326742763962495, "grad_norm": 1.0284161567687988, "learning_rate": 0.0007091571545046882, "loss": 3.5652, "step": 34245 }, { "epoch": 2.3270824840331565, "grad_norm": 0.9130529761314392, "learning_rate": 0.0007091146894958554, "loss": 3.6421, "step": 34250 }, { "epoch": 2.3274222041038186, "grad_norm": 0.8366966843605042, "learning_rate": 0.0007090722244870227, "loss": 3.6082, "step": 34255 }, { "epoch": 2.3277619241744802, "grad_norm": 1.1698920726776123, "learning_rate": 0.00070902975947819, "loss": 3.6491, "step": 34260 }, { "epoch": 2.328101644245142, "grad_norm": 0.8507300019264221, "learning_rate": 0.0007089872944693573, "loss": 3.3246, "step": 34265 }, { "epoch": 2.328441364315804, "grad_norm": 0.909733235836029, "learning_rate": 0.0007089448294605246, "loss": 3.7497, "step": 34270 }, { "epoch": 2.3287810843864656, "grad_norm": 0.8864739537239075, "learning_rate": 0.0007089023644516919, "loss": 3.3475, "step": 34275 }, { "epoch": 2.329120804457127, "grad_norm": 0.7572029829025269, "learning_rate": 0.0007088598994428591, "loss": 3.9518, "step": 34280 }, { "epoch": 2.3294605245277893, "grad_norm": 0.7196884751319885, "learning_rate": 0.0007088174344340263, "loss": 3.4821, "step": 34285 }, { "epoch": 2.329800244598451, "grad_norm": 1.327818751335144, "learning_rate": 0.0007087749694251937, "loss": 3.3762, "step": 34290 }, { "epoch": 2.3301399646691126, "grad_norm": 0.7785714268684387, "learning_rate": 0.0007087325044163609, "loss": 3.6989, "step": 34295 }, { "epoch": 2.3304796847397746, "grad_norm": 0.7618208527565002, "learning_rate": 0.0007086900394075282, "loss": 3.5059, "step": 34300 }, { "epoch": 2.3308194048104363, "grad_norm": 0.9117652177810669, "learning_rate": 0.0007086475743986956, "loss": 3.762, "step": 34305 }, { "epoch": 2.331159124881098, "grad_norm": 0.7800503969192505, "learning_rate": 0.0007086051093898628, "loss": 3.635, "step": 34310 }, { "epoch": 2.33149884495176, "grad_norm": 0.969446063041687, "learning_rate": 0.00070856264438103, "loss": 3.5902, "step": 34315 }, { "epoch": 2.3318385650224216, "grad_norm": 1.0706977844238281, "learning_rate": 0.0007085201793721974, "loss": 3.2806, "step": 34320 }, { "epoch": 2.3321782850930832, "grad_norm": 0.8545742034912109, "learning_rate": 0.0007084777143633646, "loss": 3.3652, "step": 34325 }, { "epoch": 2.3325180051637453, "grad_norm": 0.9091259837150574, "learning_rate": 0.0007084352493545318, "loss": 3.6613, "step": 34330 }, { "epoch": 2.332857725234407, "grad_norm": 0.8166537284851074, "learning_rate": 0.0007083927843456992, "loss": 3.3974, "step": 34335 }, { "epoch": 2.3331974453050686, "grad_norm": 0.86677086353302, "learning_rate": 0.0007083503193368665, "loss": 3.5774, "step": 34340 }, { "epoch": 2.3335371653757306, "grad_norm": 0.847618818283081, "learning_rate": 0.0007083078543280337, "loss": 3.4753, "step": 34345 }, { "epoch": 2.3338768854463923, "grad_norm": 0.903878390789032, "learning_rate": 0.000708265389319201, "loss": 3.6949, "step": 34350 }, { "epoch": 2.334216605517054, "grad_norm": 1.0377613306045532, "learning_rate": 0.0007082229243103683, "loss": 3.5921, "step": 34355 }, { "epoch": 2.3345563255877155, "grad_norm": 0.682405412197113, "learning_rate": 0.0007081804593015355, "loss": 3.7734, "step": 34360 }, { "epoch": 2.3348960456583776, "grad_norm": 0.8002089262008667, "learning_rate": 0.0007081379942927028, "loss": 3.4987, "step": 34365 }, { "epoch": 2.3352357657290392, "grad_norm": 1.0584214925765991, "learning_rate": 0.0007080955292838702, "loss": 3.6984, "step": 34370 }, { "epoch": 2.335575485799701, "grad_norm": 0.7826297283172607, "learning_rate": 0.0007080530642750374, "loss": 3.632, "step": 34375 }, { "epoch": 2.335915205870363, "grad_norm": 0.9048815369606018, "learning_rate": 0.0007080105992662047, "loss": 3.442, "step": 34380 }, { "epoch": 2.3362549259410246, "grad_norm": 0.8867250084877014, "learning_rate": 0.0007079681342573719, "loss": 3.6201, "step": 34385 }, { "epoch": 2.336594646011686, "grad_norm": 1.0725702047348022, "learning_rate": 0.0007079256692485392, "loss": 3.5104, "step": 34390 }, { "epoch": 2.3369343660823483, "grad_norm": 1.0236880779266357, "learning_rate": 0.0007078832042397065, "loss": 3.7025, "step": 34395 }, { "epoch": 2.33727408615301, "grad_norm": 0.9194849729537964, "learning_rate": 0.0007078407392308737, "loss": 3.7502, "step": 34400 }, { "epoch": 2.3376138062236715, "grad_norm": 1.975117802619934, "learning_rate": 0.0007077982742220411, "loss": 3.6178, "step": 34405 }, { "epoch": 2.3379535262943336, "grad_norm": 0.8757022023200989, "learning_rate": 0.0007077558092132084, "loss": 3.6882, "step": 34410 }, { "epoch": 2.3382932463649952, "grad_norm": 1.0355061292648315, "learning_rate": 0.0007077133442043756, "loss": 3.6011, "step": 34415 }, { "epoch": 2.338632966435657, "grad_norm": 0.9576653242111206, "learning_rate": 0.0007076708791955428, "loss": 3.3939, "step": 34420 }, { "epoch": 2.338972686506319, "grad_norm": 0.7670703530311584, "learning_rate": 0.0007076284141867102, "loss": 3.2081, "step": 34425 }, { "epoch": 2.3393124065769806, "grad_norm": 0.7585484385490417, "learning_rate": 0.0007075859491778774, "loss": 3.6672, "step": 34430 }, { "epoch": 2.339652126647642, "grad_norm": 0.7069069147109985, "learning_rate": 0.0007075434841690446, "loss": 3.7496, "step": 34435 }, { "epoch": 2.3399918467183043, "grad_norm": 1.0143811702728271, "learning_rate": 0.0007075010191602121, "loss": 3.3745, "step": 34440 }, { "epoch": 2.340331566788966, "grad_norm": 0.8294817209243774, "learning_rate": 0.0007074585541513793, "loss": 3.5667, "step": 34445 }, { "epoch": 2.3406712868596276, "grad_norm": 0.8429718613624573, "learning_rate": 0.0007074160891425465, "loss": 3.6083, "step": 34450 }, { "epoch": 2.3410110069302896, "grad_norm": 0.8887079358100891, "learning_rate": 0.0007073736241337139, "loss": 3.784, "step": 34455 }, { "epoch": 2.3413507270009513, "grad_norm": 1.3208699226379395, "learning_rate": 0.0007073311591248811, "loss": 3.676, "step": 34460 }, { "epoch": 2.341690447071613, "grad_norm": 1.106257677078247, "learning_rate": 0.0007072886941160483, "loss": 3.6297, "step": 34465 }, { "epoch": 2.3420301671422745, "grad_norm": 0.7275393009185791, "learning_rate": 0.0007072462291072156, "loss": 3.5263, "step": 34470 }, { "epoch": 2.3423698872129366, "grad_norm": 0.9168503284454346, "learning_rate": 0.000707203764098383, "loss": 3.3449, "step": 34475 }, { "epoch": 2.3427096072835982, "grad_norm": 0.8990557193756104, "learning_rate": 0.0007071612990895502, "loss": 3.6523, "step": 34480 }, { "epoch": 2.34304932735426, "grad_norm": 0.9464854001998901, "learning_rate": 0.0007071188340807175, "loss": 3.5586, "step": 34485 }, { "epoch": 2.343389047424922, "grad_norm": 1.0622926950454712, "learning_rate": 0.0007070763690718848, "loss": 3.6451, "step": 34490 }, { "epoch": 2.3437287674955836, "grad_norm": 4.860132694244385, "learning_rate": 0.000707033904063052, "loss": 3.7712, "step": 34495 }, { "epoch": 2.344068487566245, "grad_norm": 0.9055885076522827, "learning_rate": 0.0007069914390542193, "loss": 3.3985, "step": 34500 }, { "epoch": 2.3444082076369073, "grad_norm": 1.129346489906311, "learning_rate": 0.0007069489740453866, "loss": 3.6186, "step": 34505 }, { "epoch": 2.344747927707569, "grad_norm": 1.1102019548416138, "learning_rate": 0.0007069065090365539, "loss": 3.5941, "step": 34510 }, { "epoch": 2.3450876477782305, "grad_norm": 1.0202596187591553, "learning_rate": 0.0007068640440277212, "loss": 3.4945, "step": 34515 }, { "epoch": 2.3454273678488926, "grad_norm": 0.870394766330719, "learning_rate": 0.0007068215790188884, "loss": 3.4026, "step": 34520 }, { "epoch": 2.3457670879195542, "grad_norm": 0.8878560066223145, "learning_rate": 0.0007067791140100557, "loss": 3.7226, "step": 34525 }, { "epoch": 2.346106807990216, "grad_norm": 1.066633939743042, "learning_rate": 0.000706736649001223, "loss": 3.7562, "step": 34530 }, { "epoch": 2.346446528060878, "grad_norm": 1.0387638807296753, "learning_rate": 0.0007066941839923902, "loss": 3.7419, "step": 34535 }, { "epoch": 2.3467862481315396, "grad_norm": 0.9734619855880737, "learning_rate": 0.0007066517189835575, "loss": 3.3707, "step": 34540 }, { "epoch": 2.347125968202201, "grad_norm": 0.8226066827774048, "learning_rate": 0.0007066092539747249, "loss": 3.5612, "step": 34545 }, { "epoch": 2.3474656882728633, "grad_norm": 1.1062244176864624, "learning_rate": 0.0007065667889658921, "loss": 3.6997, "step": 34550 }, { "epoch": 2.347805408343525, "grad_norm": 0.9129453897476196, "learning_rate": 0.0007065243239570594, "loss": 3.6935, "step": 34555 }, { "epoch": 2.3481451284141865, "grad_norm": 0.8260941505432129, "learning_rate": 0.0007064818589482267, "loss": 3.7246, "step": 34560 }, { "epoch": 2.3484848484848486, "grad_norm": 0.8649405241012573, "learning_rate": 0.0007064393939393939, "loss": 3.7534, "step": 34565 }, { "epoch": 2.3488245685555103, "grad_norm": 0.7330026030540466, "learning_rate": 0.0007063969289305611, "loss": 3.6331, "step": 34570 }, { "epoch": 2.349164288626172, "grad_norm": 1.1282317638397217, "learning_rate": 0.0007063544639217286, "loss": 3.5202, "step": 34575 }, { "epoch": 2.349504008696834, "grad_norm": 0.9743011593818665, "learning_rate": 0.0007063119989128958, "loss": 3.6357, "step": 34580 }, { "epoch": 2.3498437287674956, "grad_norm": 0.921506941318512, "learning_rate": 0.0007062695339040631, "loss": 3.5229, "step": 34585 }, { "epoch": 2.350183448838157, "grad_norm": 0.7268308401107788, "learning_rate": 0.0007062270688952304, "loss": 3.7577, "step": 34590 }, { "epoch": 2.3505231689088193, "grad_norm": 0.8359577059745789, "learning_rate": 0.0007061846038863976, "loss": 3.4376, "step": 34595 }, { "epoch": 2.350862888979481, "grad_norm": 0.7796173095703125, "learning_rate": 0.0007061421388775649, "loss": 3.6381, "step": 34600 }, { "epoch": 2.3512026090501426, "grad_norm": 1.031511902809143, "learning_rate": 0.0007060996738687322, "loss": 3.6355, "step": 34605 }, { "epoch": 2.3515423291208046, "grad_norm": 0.8212525844573975, "learning_rate": 0.0007060572088598995, "loss": 3.7198, "step": 34610 }, { "epoch": 2.3518820491914663, "grad_norm": 0.8388593196868896, "learning_rate": 0.0007060147438510668, "loss": 3.4942, "step": 34615 }, { "epoch": 2.352221769262128, "grad_norm": 0.6859729290008545, "learning_rate": 0.000705972278842234, "loss": 3.4758, "step": 34620 }, { "epoch": 2.35256148933279, "grad_norm": 0.7173684239387512, "learning_rate": 0.0007059298138334013, "loss": 3.6544, "step": 34625 }, { "epoch": 2.3529012094034516, "grad_norm": 0.8629988431930542, "learning_rate": 0.0007058873488245686, "loss": 3.7, "step": 34630 }, { "epoch": 2.3532409294741132, "grad_norm": 1.128861427307129, "learning_rate": 0.0007058448838157358, "loss": 3.8156, "step": 34635 }, { "epoch": 2.3535806495447753, "grad_norm": 0.8494544625282288, "learning_rate": 0.0007058024188069031, "loss": 3.4643, "step": 34640 }, { "epoch": 2.353920369615437, "grad_norm": 0.6636223793029785, "learning_rate": 0.0007057599537980705, "loss": 3.5783, "step": 34645 }, { "epoch": 2.3542600896860986, "grad_norm": 0.7290956974029541, "learning_rate": 0.0007057174887892377, "loss": 3.5841, "step": 34650 }, { "epoch": 2.3545998097567606, "grad_norm": 0.9678652882575989, "learning_rate": 0.000705675023780405, "loss": 3.629, "step": 34655 }, { "epoch": 2.3549395298274223, "grad_norm": 0.7674130201339722, "learning_rate": 0.0007056325587715723, "loss": 3.6045, "step": 34660 }, { "epoch": 2.355279249898084, "grad_norm": 0.902842104434967, "learning_rate": 0.0007055900937627395, "loss": 3.6456, "step": 34665 }, { "epoch": 2.355618969968746, "grad_norm": 0.7511454820632935, "learning_rate": 0.0007055476287539067, "loss": 3.6316, "step": 34670 }, { "epoch": 2.3559586900394076, "grad_norm": 0.6847243309020996, "learning_rate": 0.0007055051637450741, "loss": 3.6069, "step": 34675 }, { "epoch": 2.3562984101100692, "grad_norm": 0.9734264016151428, "learning_rate": 0.0007054626987362414, "loss": 3.5082, "step": 34680 }, { "epoch": 2.3566381301807313, "grad_norm": 0.8817508220672607, "learning_rate": 0.0007054202337274086, "loss": 3.5564, "step": 34685 }, { "epoch": 2.356977850251393, "grad_norm": 0.7149307131767273, "learning_rate": 0.000705377768718576, "loss": 3.5211, "step": 34690 }, { "epoch": 2.3573175703220546, "grad_norm": 0.8015739321708679, "learning_rate": 0.0007053353037097432, "loss": 3.4549, "step": 34695 }, { "epoch": 2.357657290392716, "grad_norm": 0.9455263018608093, "learning_rate": 0.0007052928387009104, "loss": 3.9212, "step": 34700 }, { "epoch": 2.3579970104633783, "grad_norm": 1.5807610750198364, "learning_rate": 0.0007052503736920778, "loss": 3.5016, "step": 34705 }, { "epoch": 2.35833673053404, "grad_norm": 0.8543570637702942, "learning_rate": 0.000705207908683245, "loss": 3.4, "step": 34710 }, { "epoch": 2.3586764506047015, "grad_norm": 0.9081307053565979, "learning_rate": 0.0007051654436744123, "loss": 3.3649, "step": 34715 }, { "epoch": 2.3590161706753636, "grad_norm": 0.8815417289733887, "learning_rate": 0.0007051229786655796, "loss": 3.6709, "step": 34720 }, { "epoch": 2.3593558907460253, "grad_norm": 0.8759732246398926, "learning_rate": 0.0007050805136567469, "loss": 3.6189, "step": 34725 }, { "epoch": 2.359695610816687, "grad_norm": 0.9433268308639526, "learning_rate": 0.0007050380486479141, "loss": 3.4962, "step": 34730 }, { "epoch": 2.360035330887349, "grad_norm": 1.0115960836410522, "learning_rate": 0.0007049955836390814, "loss": 3.5265, "step": 34735 }, { "epoch": 2.3603750509580106, "grad_norm": 0.871773898601532, "learning_rate": 0.0007049531186302487, "loss": 3.2949, "step": 34740 }, { "epoch": 2.360714771028672, "grad_norm": 0.8277033567428589, "learning_rate": 0.0007049106536214159, "loss": 3.5797, "step": 34745 }, { "epoch": 2.3610544910993343, "grad_norm": 0.8331530094146729, "learning_rate": 0.0007048681886125833, "loss": 3.5718, "step": 34750 }, { "epoch": 2.361394211169996, "grad_norm": 0.7831059694290161, "learning_rate": 0.0007048257236037506, "loss": 3.5988, "step": 34755 }, { "epoch": 2.3617339312406576, "grad_norm": 0.7845896482467651, "learning_rate": 0.0007047832585949178, "loss": 3.6117, "step": 34760 }, { "epoch": 2.3620736513113196, "grad_norm": 0.9187370538711548, "learning_rate": 0.0007047407935860851, "loss": 3.5545, "step": 34765 }, { "epoch": 2.3624133713819813, "grad_norm": 0.9950335621833801, "learning_rate": 0.0007046983285772523, "loss": 3.2773, "step": 34770 }, { "epoch": 2.362753091452643, "grad_norm": 1.020647406578064, "learning_rate": 0.0007046558635684196, "loss": 3.543, "step": 34775 }, { "epoch": 2.363092811523305, "grad_norm": 0.8755601644515991, "learning_rate": 0.0007046133985595869, "loss": 3.5004, "step": 34780 }, { "epoch": 2.3634325315939666, "grad_norm": 0.898391604423523, "learning_rate": 0.0007045709335507542, "loss": 3.5907, "step": 34785 }, { "epoch": 2.3637722516646282, "grad_norm": 1.3889415264129639, "learning_rate": 0.0007045284685419215, "loss": 3.7194, "step": 34790 }, { "epoch": 2.3641119717352903, "grad_norm": 0.6181111931800842, "learning_rate": 0.0007044860035330888, "loss": 3.5344, "step": 34795 }, { "epoch": 2.364451691805952, "grad_norm": 0.9931139945983887, "learning_rate": 0.000704443538524256, "loss": 3.7138, "step": 34800 }, { "epoch": 2.3647914118766136, "grad_norm": 0.7110602259635925, "learning_rate": 0.0007044010735154232, "loss": 3.6617, "step": 34805 }, { "epoch": 2.365131131947275, "grad_norm": 4.998938083648682, "learning_rate": 0.0007043586085065906, "loss": 3.6314, "step": 34810 }, { "epoch": 2.3654708520179373, "grad_norm": 0.7764558792114258, "learning_rate": 0.0007043161434977578, "loss": 3.3432, "step": 34815 }, { "epoch": 2.365810572088599, "grad_norm": 0.9735468626022339, "learning_rate": 0.0007042736784889251, "loss": 3.3736, "step": 34820 }, { "epoch": 2.3661502921592605, "grad_norm": 0.7165238261222839, "learning_rate": 0.0007042312134800925, "loss": 3.0851, "step": 34825 }, { "epoch": 2.3664900122299226, "grad_norm": 0.8510459065437317, "learning_rate": 0.0007041887484712597, "loss": 3.1694, "step": 34830 }, { "epoch": 2.3668297323005842, "grad_norm": 0.7846376299858093, "learning_rate": 0.0007041462834624269, "loss": 3.7947, "step": 34835 }, { "epoch": 2.367169452371246, "grad_norm": 0.8124869465827942, "learning_rate": 0.0007041038184535943, "loss": 3.7984, "step": 34840 }, { "epoch": 2.367509172441908, "grad_norm": 0.8498038053512573, "learning_rate": 0.0007040613534447615, "loss": 3.5317, "step": 34845 }, { "epoch": 2.3678488925125696, "grad_norm": 0.7841882705688477, "learning_rate": 0.0007040188884359287, "loss": 3.6931, "step": 34850 }, { "epoch": 2.368188612583231, "grad_norm": 0.7706218957901001, "learning_rate": 0.0007039764234270962, "loss": 3.5774, "step": 34855 }, { "epoch": 2.3685283326538933, "grad_norm": 0.8382828831672668, "learning_rate": 0.0007039339584182634, "loss": 3.5038, "step": 34860 }, { "epoch": 2.368868052724555, "grad_norm": 0.8014661073684692, "learning_rate": 0.0007038914934094306, "loss": 3.561, "step": 34865 }, { "epoch": 2.3692077727952165, "grad_norm": 1.1246716976165771, "learning_rate": 0.0007038490284005979, "loss": 3.6765, "step": 34870 }, { "epoch": 2.3695474928658786, "grad_norm": 0.8412127494812012, "learning_rate": 0.0007038065633917652, "loss": 3.7021, "step": 34875 }, { "epoch": 2.3698872129365403, "grad_norm": 0.8694748878479004, "learning_rate": 0.0007037640983829324, "loss": 3.5457, "step": 34880 }, { "epoch": 2.370226933007202, "grad_norm": 0.829232931137085, "learning_rate": 0.0007037216333740997, "loss": 3.6355, "step": 34885 }, { "epoch": 2.370566653077864, "grad_norm": 1.800268292427063, "learning_rate": 0.0007036791683652671, "loss": 3.5249, "step": 34890 }, { "epoch": 2.3709063731485256, "grad_norm": 0.8035861253738403, "learning_rate": 0.0007036367033564343, "loss": 3.6698, "step": 34895 }, { "epoch": 2.3712460932191872, "grad_norm": 2.7226827144622803, "learning_rate": 0.0007035942383476016, "loss": 3.6353, "step": 34900 }, { "epoch": 2.3715858132898493, "grad_norm": 0.9045145511627197, "learning_rate": 0.0007035517733387688, "loss": 3.5663, "step": 34905 }, { "epoch": 2.371925533360511, "grad_norm": 1.0053484439849854, "learning_rate": 0.0007035093083299361, "loss": 3.3924, "step": 34910 }, { "epoch": 2.3722652534311726, "grad_norm": 0.7027516961097717, "learning_rate": 0.0007034668433211034, "loss": 3.7158, "step": 34915 }, { "epoch": 2.3726049735018346, "grad_norm": 0.878036379814148, "learning_rate": 0.0007034243783122706, "loss": 3.568, "step": 34920 }, { "epoch": 2.3729446935724963, "grad_norm": 1.3156311511993408, "learning_rate": 0.0007033819133034381, "loss": 3.532, "step": 34925 }, { "epoch": 2.373284413643158, "grad_norm": 0.885959267616272, "learning_rate": 0.0007033394482946053, "loss": 3.6234, "step": 34930 }, { "epoch": 2.37362413371382, "grad_norm": 1.0305720567703247, "learning_rate": 0.0007032969832857725, "loss": 3.7252, "step": 34935 }, { "epoch": 2.3739638537844816, "grad_norm": 0.8468777537345886, "learning_rate": 0.0007032545182769399, "loss": 3.7487, "step": 34940 }, { "epoch": 2.3743035738551432, "grad_norm": 1.4521880149841309, "learning_rate": 0.0007032120532681071, "loss": 3.4531, "step": 34945 }, { "epoch": 2.3746432939258053, "grad_norm": 0.9655662178993225, "learning_rate": 0.0007031695882592743, "loss": 3.6747, "step": 34950 }, { "epoch": 2.374983013996467, "grad_norm": 0.9979805946350098, "learning_rate": 0.0007031271232504417, "loss": 3.5536, "step": 34955 }, { "epoch": 2.3753227340671286, "grad_norm": 0.7736377120018005, "learning_rate": 0.000703084658241609, "loss": 3.5414, "step": 34960 }, { "epoch": 2.3756624541377906, "grad_norm": 0.8296526074409485, "learning_rate": 0.0007030421932327762, "loss": 3.6541, "step": 34965 }, { "epoch": 2.3760021742084523, "grad_norm": 0.8666050434112549, "learning_rate": 0.0007029997282239435, "loss": 3.6216, "step": 34970 }, { "epoch": 2.376341894279114, "grad_norm": 0.8654815554618835, "learning_rate": 0.0007029572632151108, "loss": 3.6338, "step": 34975 }, { "epoch": 2.376681614349776, "grad_norm": 0.9888479113578796, "learning_rate": 0.000702914798206278, "loss": 3.4786, "step": 34980 }, { "epoch": 2.3770213344204376, "grad_norm": 1.0325915813446045, "learning_rate": 0.0007028723331974453, "loss": 3.4872, "step": 34985 }, { "epoch": 2.3773610544910992, "grad_norm": 0.7684481739997864, "learning_rate": 0.0007028298681886126, "loss": 3.744, "step": 34990 }, { "epoch": 2.3777007745617613, "grad_norm": 0.9074941873550415, "learning_rate": 0.0007027874031797799, "loss": 3.5123, "step": 34995 }, { "epoch": 2.378040494632423, "grad_norm": 0.8340933322906494, "learning_rate": 0.0007027449381709472, "loss": 3.6753, "step": 35000 }, { "epoch": 2.3783802147030846, "grad_norm": 0.9796059131622314, "learning_rate": 0.0007027024731621145, "loss": 3.4091, "step": 35005 }, { "epoch": 2.3787199347737467, "grad_norm": 0.7915922999382019, "learning_rate": 0.0007026600081532817, "loss": 3.7104, "step": 35010 }, { "epoch": 2.3790596548444083, "grad_norm": 0.8944940567016602, "learning_rate": 0.000702617543144449, "loss": 3.5503, "step": 35015 }, { "epoch": 2.37939937491507, "grad_norm": 0.7971494793891907, "learning_rate": 0.0007025750781356162, "loss": 3.6392, "step": 35020 }, { "epoch": 2.379739094985732, "grad_norm": 1.332970380783081, "learning_rate": 0.0007025326131267835, "loss": 3.7434, "step": 35025 }, { "epoch": 2.3800788150563936, "grad_norm": 1.3748971223831177, "learning_rate": 0.0007024901481179509, "loss": 3.5831, "step": 35030 }, { "epoch": 2.3804185351270553, "grad_norm": 0.8054746985435486, "learning_rate": 0.0007024476831091181, "loss": 3.6861, "step": 35035 }, { "epoch": 2.380758255197717, "grad_norm": 1.1740444898605347, "learning_rate": 0.0007024052181002854, "loss": 3.7624, "step": 35040 }, { "epoch": 2.381097975268379, "grad_norm": 1.0862822532653809, "learning_rate": 0.0007023627530914527, "loss": 3.2013, "step": 35045 }, { "epoch": 2.3814376953390406, "grad_norm": 0.823153555393219, "learning_rate": 0.0007023202880826199, "loss": 3.2775, "step": 35050 }, { "epoch": 2.3817774154097022, "grad_norm": 1.2217553853988647, "learning_rate": 0.0007022778230737871, "loss": 3.6296, "step": 35055 }, { "epoch": 2.3821171354803643, "grad_norm": 0.8057252168655396, "learning_rate": 0.0007022353580649546, "loss": 3.5592, "step": 35060 }, { "epoch": 2.382456855551026, "grad_norm": 0.8286086916923523, "learning_rate": 0.0007021928930561218, "loss": 3.6506, "step": 35065 }, { "epoch": 2.3827965756216876, "grad_norm": 0.9963324069976807, "learning_rate": 0.000702150428047289, "loss": 3.5708, "step": 35070 }, { "epoch": 2.3831362956923496, "grad_norm": 0.8757644295692444, "learning_rate": 0.0007021079630384564, "loss": 3.5466, "step": 35075 }, { "epoch": 2.3834760157630113, "grad_norm": 1.0636075735092163, "learning_rate": 0.0007020654980296236, "loss": 3.5707, "step": 35080 }, { "epoch": 2.383815735833673, "grad_norm": 1.0848828554153442, "learning_rate": 0.0007020230330207908, "loss": 3.422, "step": 35085 }, { "epoch": 2.384155455904335, "grad_norm": 0.8940354585647583, "learning_rate": 0.0007019805680119582, "loss": 3.5293, "step": 35090 }, { "epoch": 2.3844951759749966, "grad_norm": 0.8815000057220459, "learning_rate": 0.0007019381030031255, "loss": 3.8505, "step": 35095 }, { "epoch": 2.3848348960456582, "grad_norm": 0.7502058744430542, "learning_rate": 0.0007018956379942927, "loss": 3.7819, "step": 35100 }, { "epoch": 2.3851746161163203, "grad_norm": 1.832905888557434, "learning_rate": 0.00070185317298546, "loss": 3.8133, "step": 35105 }, { "epoch": 2.385514336186982, "grad_norm": 0.9052382111549377, "learning_rate": 0.0007018107079766273, "loss": 3.7156, "step": 35110 }, { "epoch": 2.3858540562576436, "grad_norm": 0.7821452021598816, "learning_rate": 0.0007017682429677945, "loss": 3.5069, "step": 35115 }, { "epoch": 2.3861937763283056, "grad_norm": 0.8948304057121277, "learning_rate": 0.0007017257779589618, "loss": 3.812, "step": 35120 }, { "epoch": 2.3865334963989673, "grad_norm": 0.8012406826019287, "learning_rate": 0.0007016833129501291, "loss": 3.6965, "step": 35125 }, { "epoch": 2.386873216469629, "grad_norm": 0.762101948261261, "learning_rate": 0.0007016408479412964, "loss": 3.8838, "step": 35130 }, { "epoch": 2.387212936540291, "grad_norm": 0.824510931968689, "learning_rate": 0.0007015983829324637, "loss": 3.5625, "step": 35135 }, { "epoch": 2.3875526566109526, "grad_norm": 0.7094634175300598, "learning_rate": 0.000701555917923631, "loss": 3.5224, "step": 35140 }, { "epoch": 2.3878923766816142, "grad_norm": 0.9212439060211182, "learning_rate": 0.0007015134529147982, "loss": 3.638, "step": 35145 }, { "epoch": 2.388232096752276, "grad_norm": 1.0975406169891357, "learning_rate": 0.0007014709879059655, "loss": 3.6119, "step": 35150 }, { "epoch": 2.388571816822938, "grad_norm": 0.7583838701248169, "learning_rate": 0.0007014285228971327, "loss": 3.6175, "step": 35155 }, { "epoch": 2.3889115368935996, "grad_norm": 0.8665974140167236, "learning_rate": 0.0007013860578883, "loss": 3.75, "step": 35160 }, { "epoch": 2.389251256964261, "grad_norm": 1.0681042671203613, "learning_rate": 0.0007013435928794674, "loss": 3.6799, "step": 35165 }, { "epoch": 2.3895909770349233, "grad_norm": 1.0444397926330566, "learning_rate": 0.0007013011278706346, "loss": 3.4206, "step": 35170 }, { "epoch": 2.389930697105585, "grad_norm": 0.9459745287895203, "learning_rate": 0.0007012586628618019, "loss": 3.6722, "step": 35175 }, { "epoch": 2.3902704171762466, "grad_norm": 1.1398289203643799, "learning_rate": 0.0007012161978529692, "loss": 3.4389, "step": 35180 }, { "epoch": 2.3906101372469086, "grad_norm": 0.9716097712516785, "learning_rate": 0.0007011737328441364, "loss": 3.5042, "step": 35185 }, { "epoch": 2.3909498573175703, "grad_norm": 0.7929019927978516, "learning_rate": 0.0007011312678353037, "loss": 3.5095, "step": 35190 }, { "epoch": 2.391289577388232, "grad_norm": 0.8883367776870728, "learning_rate": 0.000701088802826471, "loss": 3.7362, "step": 35195 }, { "epoch": 2.391629297458894, "grad_norm": 0.8819423913955688, "learning_rate": 0.0007010463378176383, "loss": 3.8422, "step": 35200 }, { "epoch": 2.3919690175295556, "grad_norm": 0.8674731254577637, "learning_rate": 0.0007010038728088055, "loss": 3.1445, "step": 35205 }, { "epoch": 2.3923087376002172, "grad_norm": 0.854587972164154, "learning_rate": 0.0007009614077999729, "loss": 3.9388, "step": 35210 }, { "epoch": 2.3926484576708793, "grad_norm": 0.9078401327133179, "learning_rate": 0.0007009189427911401, "loss": 3.6791, "step": 35215 }, { "epoch": 2.392988177741541, "grad_norm": 0.6886181831359863, "learning_rate": 0.0007008764777823073, "loss": 3.6302, "step": 35220 }, { "epoch": 2.3933278978122026, "grad_norm": 0.8518375754356384, "learning_rate": 0.0007008340127734747, "loss": 3.814, "step": 35225 }, { "epoch": 2.3936676178828646, "grad_norm": 0.9054287672042847, "learning_rate": 0.0007007915477646419, "loss": 3.7222, "step": 35230 }, { "epoch": 2.3940073379535263, "grad_norm": 0.9529050588607788, "learning_rate": 0.0007007490827558092, "loss": 3.4628, "step": 35235 }, { "epoch": 2.394347058024188, "grad_norm": 0.9817489385604858, "learning_rate": 0.0007007066177469766, "loss": 3.7105, "step": 35240 }, { "epoch": 2.39468677809485, "grad_norm": 0.8480692505836487, "learning_rate": 0.0007006641527381438, "loss": 3.4509, "step": 35245 }, { "epoch": 2.3950264981655116, "grad_norm": 0.9525547027587891, "learning_rate": 0.000700621687729311, "loss": 3.4153, "step": 35250 }, { "epoch": 2.3953662182361732, "grad_norm": 0.7224661111831665, "learning_rate": 0.0007005792227204783, "loss": 3.4337, "step": 35255 }, { "epoch": 2.3957059383068353, "grad_norm": 1.3708618879318237, "learning_rate": 0.0007005367577116456, "loss": 3.5217, "step": 35260 }, { "epoch": 2.396045658377497, "grad_norm": 0.7442063093185425, "learning_rate": 0.0007004942927028129, "loss": 3.8185, "step": 35265 }, { "epoch": 2.3963853784481586, "grad_norm": 0.8112321496009827, "learning_rate": 0.0007004518276939802, "loss": 3.6568, "step": 35270 }, { "epoch": 2.3967250985188207, "grad_norm": 0.7316697239875793, "learning_rate": 0.0007004093626851475, "loss": 3.5508, "step": 35275 }, { "epoch": 2.3970648185894823, "grad_norm": 0.8168145418167114, "learning_rate": 0.0007003668976763148, "loss": 3.641, "step": 35280 }, { "epoch": 2.397404538660144, "grad_norm": 0.7948176860809326, "learning_rate": 0.000700324432667482, "loss": 3.6508, "step": 35285 }, { "epoch": 2.397744258730806, "grad_norm": 0.8953681588172913, "learning_rate": 0.0007002819676586493, "loss": 3.6532, "step": 35290 }, { "epoch": 2.3980839788014676, "grad_norm": 0.7659631371498108, "learning_rate": 0.0007002395026498166, "loss": 3.5541, "step": 35295 }, { "epoch": 2.3984236988721293, "grad_norm": 0.8775231838226318, "learning_rate": 0.0007001970376409838, "loss": 3.5622, "step": 35300 }, { "epoch": 2.3987634189427913, "grad_norm": 0.832393229007721, "learning_rate": 0.0007001545726321511, "loss": 3.7594, "step": 35305 }, { "epoch": 2.399103139013453, "grad_norm": 0.8624285459518433, "learning_rate": 0.0007001121076233185, "loss": 3.3551, "step": 35310 }, { "epoch": 2.3994428590841146, "grad_norm": 1.0660191774368286, "learning_rate": 0.0007000696426144857, "loss": 3.4003, "step": 35315 }, { "epoch": 2.3997825791547767, "grad_norm": 1.0410321950912476, "learning_rate": 0.0007000271776056529, "loss": 3.581, "step": 35320 }, { "epoch": 2.4001222992254383, "grad_norm": 0.8879261016845703, "learning_rate": 0.0006999847125968203, "loss": 3.6809, "step": 35325 }, { "epoch": 2.4004620192961, "grad_norm": 0.7513574957847595, "learning_rate": 0.0006999422475879875, "loss": 3.4495, "step": 35330 }, { "epoch": 2.400801739366762, "grad_norm": 0.7090854048728943, "learning_rate": 0.0006998997825791547, "loss": 3.8296, "step": 35335 }, { "epoch": 2.4011414594374236, "grad_norm": 0.7399373650550842, "learning_rate": 0.0006998573175703222, "loss": 3.5137, "step": 35340 }, { "epoch": 2.4014811795080853, "grad_norm": 0.8372071981430054, "learning_rate": 0.0006998148525614894, "loss": 3.5607, "step": 35345 }, { "epoch": 2.4018208995787473, "grad_norm": 1.0791149139404297, "learning_rate": 0.0006997723875526566, "loss": 3.3788, "step": 35350 }, { "epoch": 2.402160619649409, "grad_norm": 0.9231259822845459, "learning_rate": 0.000699729922543824, "loss": 3.5509, "step": 35355 }, { "epoch": 2.4025003397200706, "grad_norm": 1.2656488418579102, "learning_rate": 0.0006996874575349912, "loss": 3.5402, "step": 35360 }, { "epoch": 2.4028400597907327, "grad_norm": 1.1341971158981323, "learning_rate": 0.0006996449925261584, "loss": 3.4829, "step": 35365 }, { "epoch": 2.4031797798613943, "grad_norm": 0.8347205519676208, "learning_rate": 0.0006996025275173257, "loss": 3.7142, "step": 35370 }, { "epoch": 2.403519499932056, "grad_norm": 0.7063478231430054, "learning_rate": 0.0006995600625084931, "loss": 3.4618, "step": 35375 }, { "epoch": 2.4038592200027176, "grad_norm": 0.8571862578392029, "learning_rate": 0.0006995175974996603, "loss": 3.4671, "step": 35380 }, { "epoch": 2.4041989400733796, "grad_norm": 0.8497893214225769, "learning_rate": 0.0006994751324908276, "loss": 3.6183, "step": 35385 }, { "epoch": 2.4045386601440413, "grad_norm": 1.3127068281173706, "learning_rate": 0.0006994326674819949, "loss": 3.5464, "step": 35390 }, { "epoch": 2.404878380214703, "grad_norm": 0.8803237676620483, "learning_rate": 0.0006993902024731621, "loss": 3.5213, "step": 35395 }, { "epoch": 2.405218100285365, "grad_norm": 0.7964141368865967, "learning_rate": 0.0006993477374643294, "loss": 3.5652, "step": 35400 }, { "epoch": 2.4055578203560266, "grad_norm": 0.8695953488349915, "learning_rate": 0.0006993052724554966, "loss": 3.7839, "step": 35405 }, { "epoch": 2.4058975404266882, "grad_norm": 0.9421870112419128, "learning_rate": 0.000699262807446664, "loss": 3.4181, "step": 35410 }, { "epoch": 2.4062372604973503, "grad_norm": 0.807289183139801, "learning_rate": 0.0006992203424378313, "loss": 3.451, "step": 35415 }, { "epoch": 2.406576980568012, "grad_norm": 0.7639554142951965, "learning_rate": 0.0006991778774289985, "loss": 3.5529, "step": 35420 }, { "epoch": 2.4069167006386736, "grad_norm": 1.0106925964355469, "learning_rate": 0.0006991354124201658, "loss": 3.6477, "step": 35425 }, { "epoch": 2.4072564207093357, "grad_norm": 0.9396530389785767, "learning_rate": 0.0006990929474113331, "loss": 3.653, "step": 35430 }, { "epoch": 2.4075961407799973, "grad_norm": 1.131540298461914, "learning_rate": 0.0006990504824025003, "loss": 3.4754, "step": 35435 }, { "epoch": 2.407935860850659, "grad_norm": 0.7946423888206482, "learning_rate": 0.0006990080173936675, "loss": 3.5346, "step": 35440 }, { "epoch": 2.408275580921321, "grad_norm": 0.8676256537437439, "learning_rate": 0.000698965552384835, "loss": 3.7595, "step": 35445 }, { "epoch": 2.4086153009919826, "grad_norm": 0.9540257453918457, "learning_rate": 0.0006989230873760022, "loss": 3.8453, "step": 35450 }, { "epoch": 2.4089550210626443, "grad_norm": 0.8447473645210266, "learning_rate": 0.0006988806223671694, "loss": 3.5738, "step": 35455 }, { "epoch": 2.4092947411333063, "grad_norm": 0.8300119638442993, "learning_rate": 0.0006988381573583368, "loss": 3.755, "step": 35460 }, { "epoch": 2.409634461203968, "grad_norm": 1.1039456129074097, "learning_rate": 0.000698795692349504, "loss": 3.4086, "step": 35465 }, { "epoch": 2.4099741812746296, "grad_norm": 0.6949214339256287, "learning_rate": 0.0006987532273406712, "loss": 3.6585, "step": 35470 }, { "epoch": 2.4103139013452917, "grad_norm": 0.8014123439788818, "learning_rate": 0.0006987107623318386, "loss": 3.5212, "step": 35475 }, { "epoch": 2.4106536214159533, "grad_norm": 0.6742558479309082, "learning_rate": 0.0006986682973230059, "loss": 3.5719, "step": 35480 }, { "epoch": 2.410993341486615, "grad_norm": 0.9560319185256958, "learning_rate": 0.0006986258323141731, "loss": 3.6049, "step": 35485 }, { "epoch": 2.4113330615572766, "grad_norm": 0.7446780800819397, "learning_rate": 0.0006985833673053405, "loss": 3.7649, "step": 35490 }, { "epoch": 2.4116727816279386, "grad_norm": 0.9310965538024902, "learning_rate": 0.0006985409022965077, "loss": 3.7156, "step": 35495 }, { "epoch": 2.4120125016986003, "grad_norm": 0.9593939185142517, "learning_rate": 0.0006984984372876749, "loss": 3.7625, "step": 35500 }, { "epoch": 2.412352221769262, "grad_norm": 0.8953796625137329, "learning_rate": 0.0006984559722788422, "loss": 3.291, "step": 35505 }, { "epoch": 2.412691941839924, "grad_norm": 0.8510547280311584, "learning_rate": 0.0006984135072700095, "loss": 3.5676, "step": 35510 }, { "epoch": 2.4130316619105856, "grad_norm": 0.7069025635719299, "learning_rate": 0.0006983710422611768, "loss": 3.4771, "step": 35515 }, { "epoch": 2.4133713819812472, "grad_norm": 1.0682461261749268, "learning_rate": 0.0006983285772523441, "loss": 3.8241, "step": 35520 }, { "epoch": 2.4137111020519093, "grad_norm": 1.0669869184494019, "learning_rate": 0.0006982861122435114, "loss": 3.5641, "step": 35525 }, { "epoch": 2.414050822122571, "grad_norm": 0.9785784482955933, "learning_rate": 0.0006982436472346786, "loss": 3.7611, "step": 35530 }, { "epoch": 2.4143905421932326, "grad_norm": 0.653236985206604, "learning_rate": 0.0006982011822258459, "loss": 3.706, "step": 35535 }, { "epoch": 2.4147302622638946, "grad_norm": 0.8143099546432495, "learning_rate": 0.0006981587172170131, "loss": 3.6114, "step": 35540 }, { "epoch": 2.4150699823345563, "grad_norm": 0.8603134751319885, "learning_rate": 0.0006981162522081804, "loss": 3.4839, "step": 35545 }, { "epoch": 2.415409702405218, "grad_norm": 0.932726263999939, "learning_rate": 0.0006980737871993478, "loss": 3.397, "step": 35550 }, { "epoch": 2.41574942247588, "grad_norm": 0.9290340542793274, "learning_rate": 0.000698031322190515, "loss": 3.7343, "step": 35555 }, { "epoch": 2.4160891425465416, "grad_norm": 0.7325130105018616, "learning_rate": 0.0006979888571816823, "loss": 3.6894, "step": 35560 }, { "epoch": 2.4164288626172032, "grad_norm": 0.7920358777046204, "learning_rate": 0.0006979463921728496, "loss": 3.4905, "step": 35565 }, { "epoch": 2.4167685826878653, "grad_norm": 0.9487249851226807, "learning_rate": 0.0006979039271640168, "loss": 3.4996, "step": 35570 }, { "epoch": 2.417108302758527, "grad_norm": 0.6337518095970154, "learning_rate": 0.0006978614621551841, "loss": 3.4349, "step": 35575 }, { "epoch": 2.4174480228291886, "grad_norm": 1.0120391845703125, "learning_rate": 0.0006978189971463514, "loss": 3.5619, "step": 35580 }, { "epoch": 2.4177877428998507, "grad_norm": 0.7809683680534363, "learning_rate": 0.0006977765321375187, "loss": 3.7061, "step": 35585 }, { "epoch": 2.4181274629705123, "grad_norm": 0.7638624310493469, "learning_rate": 0.000697734067128686, "loss": 3.5365, "step": 35590 }, { "epoch": 2.418467183041174, "grad_norm": 1.234602928161621, "learning_rate": 0.0006976916021198533, "loss": 3.7469, "step": 35595 }, { "epoch": 2.418806903111836, "grad_norm": 0.982877790927887, "learning_rate": 0.0006976491371110205, "loss": 3.6337, "step": 35600 }, { "epoch": 2.4191466231824976, "grad_norm": 0.6958208084106445, "learning_rate": 0.0006976066721021878, "loss": 3.651, "step": 35605 }, { "epoch": 2.4194863432531593, "grad_norm": 0.7958633303642273, "learning_rate": 0.0006975642070933551, "loss": 3.5346, "step": 35610 }, { "epoch": 2.4198260633238213, "grad_norm": 0.9256461262702942, "learning_rate": 0.0006975217420845223, "loss": 3.696, "step": 35615 }, { "epoch": 2.420165783394483, "grad_norm": 0.7455571889877319, "learning_rate": 0.0006974792770756897, "loss": 3.653, "step": 35620 }, { "epoch": 2.4205055034651446, "grad_norm": 0.8943120837211609, "learning_rate": 0.000697436812066857, "loss": 3.4327, "step": 35625 }, { "epoch": 2.4208452235358067, "grad_norm": 1.0734084844589233, "learning_rate": 0.0006973943470580242, "loss": 3.56, "step": 35630 }, { "epoch": 2.4211849436064683, "grad_norm": 0.7935028076171875, "learning_rate": 0.0006973518820491915, "loss": 3.7503, "step": 35635 }, { "epoch": 2.42152466367713, "grad_norm": 0.8489923477172852, "learning_rate": 0.0006973094170403588, "loss": 3.4397, "step": 35640 }, { "epoch": 2.421864383747792, "grad_norm": 0.6729536056518555, "learning_rate": 0.000697266952031526, "loss": 3.4819, "step": 35645 }, { "epoch": 2.4222041038184536, "grad_norm": 0.845465362071991, "learning_rate": 0.0006972244870226934, "loss": 3.4502, "step": 35650 }, { "epoch": 2.4225438238891153, "grad_norm": 0.7607055306434631, "learning_rate": 0.0006971820220138606, "loss": 3.35, "step": 35655 }, { "epoch": 2.4228835439597773, "grad_norm": 1.330264687538147, "learning_rate": 0.0006971395570050279, "loss": 3.7882, "step": 35660 }, { "epoch": 2.423223264030439, "grad_norm": 0.7368998527526855, "learning_rate": 0.0006970970919961952, "loss": 3.6089, "step": 35665 }, { "epoch": 2.4235629841011006, "grad_norm": 1.0352815389633179, "learning_rate": 0.0006970546269873624, "loss": 3.7088, "step": 35670 }, { "epoch": 2.4239027041717627, "grad_norm": 1.2250632047653198, "learning_rate": 0.0006970121619785297, "loss": 3.499, "step": 35675 }, { "epoch": 2.4242424242424243, "grad_norm": 0.9267789125442505, "learning_rate": 0.000696969696969697, "loss": 3.5728, "step": 35680 }, { "epoch": 2.424582144313086, "grad_norm": 1.1931495666503906, "learning_rate": 0.0006969272319608643, "loss": 3.546, "step": 35685 }, { "epoch": 2.424921864383748, "grad_norm": 0.9957852959632874, "learning_rate": 0.0006968847669520316, "loss": 3.5427, "step": 35690 }, { "epoch": 2.4252615844544096, "grad_norm": 0.9141325950622559, "learning_rate": 0.0006968423019431989, "loss": 3.4356, "step": 35695 }, { "epoch": 2.4256013045250713, "grad_norm": 0.9408026337623596, "learning_rate": 0.0006967998369343661, "loss": 3.5648, "step": 35700 }, { "epoch": 2.4259410245957334, "grad_norm": 0.9678852558135986, "learning_rate": 0.0006967573719255333, "loss": 3.103, "step": 35705 }, { "epoch": 2.426280744666395, "grad_norm": 0.8087712526321411, "learning_rate": 0.0006967149069167007, "loss": 3.8023, "step": 35710 }, { "epoch": 2.4266204647370566, "grad_norm": 0.732836127281189, "learning_rate": 0.0006966724419078679, "loss": 3.4035, "step": 35715 }, { "epoch": 2.4269601848077182, "grad_norm": 0.8847925662994385, "learning_rate": 0.0006966299768990352, "loss": 3.6221, "step": 35720 }, { "epoch": 2.4272999048783803, "grad_norm": 0.7148637771606445, "learning_rate": 0.0006965875118902026, "loss": 3.3436, "step": 35725 }, { "epoch": 2.427639624949042, "grad_norm": 0.869138240814209, "learning_rate": 0.0006965450468813698, "loss": 3.7851, "step": 35730 }, { "epoch": 2.4279793450197036, "grad_norm": 0.785347580909729, "learning_rate": 0.000696502581872537, "loss": 3.5752, "step": 35735 }, { "epoch": 2.4283190650903657, "grad_norm": 0.7496178150177002, "learning_rate": 0.0006964601168637044, "loss": 3.5744, "step": 35740 }, { "epoch": 2.4286587851610273, "grad_norm": 0.8821709156036377, "learning_rate": 0.0006964176518548716, "loss": 3.339, "step": 35745 }, { "epoch": 2.428998505231689, "grad_norm": 0.8500383496284485, "learning_rate": 0.0006963751868460388, "loss": 3.6413, "step": 35750 }, { "epoch": 2.429338225302351, "grad_norm": 0.9530502557754517, "learning_rate": 0.0006963327218372062, "loss": 3.4856, "step": 35755 }, { "epoch": 2.4296779453730126, "grad_norm": 0.887913703918457, "learning_rate": 0.0006962902568283735, "loss": 3.8364, "step": 35760 }, { "epoch": 2.4300176654436743, "grad_norm": 1.0631946325302124, "learning_rate": 0.0006962477918195407, "loss": 3.6963, "step": 35765 }, { "epoch": 2.4303573855143363, "grad_norm": 0.7874326705932617, "learning_rate": 0.000696205326810708, "loss": 3.6464, "step": 35770 }, { "epoch": 2.430697105584998, "grad_norm": 0.8797521591186523, "learning_rate": 0.0006961628618018753, "loss": 3.6598, "step": 35775 }, { "epoch": 2.4310368256556596, "grad_norm": 0.6685046553611755, "learning_rate": 0.0006961203967930425, "loss": 3.1654, "step": 35780 }, { "epoch": 2.4313765457263217, "grad_norm": 0.9128331542015076, "learning_rate": 0.0006960779317842098, "loss": 3.5792, "step": 35785 }, { "epoch": 2.4317162657969833, "grad_norm": 0.8362668752670288, "learning_rate": 0.0006960354667753772, "loss": 3.5145, "step": 35790 }, { "epoch": 2.432055985867645, "grad_norm": 0.8993712067604065, "learning_rate": 0.0006959930017665444, "loss": 3.69, "step": 35795 }, { "epoch": 2.432395705938307, "grad_norm": 0.8033528923988342, "learning_rate": 0.0006959505367577117, "loss": 3.547, "step": 35800 }, { "epoch": 2.4327354260089686, "grad_norm": 1.0936760902404785, "learning_rate": 0.0006959080717488789, "loss": 3.5311, "step": 35805 }, { "epoch": 2.4330751460796303, "grad_norm": 0.7865379452705383, "learning_rate": 0.0006958656067400462, "loss": 3.7302, "step": 35810 }, { "epoch": 2.4334148661502923, "grad_norm": 0.8467683792114258, "learning_rate": 0.0006958231417312135, "loss": 3.4578, "step": 35815 }, { "epoch": 2.433754586220954, "grad_norm": 0.720098614692688, "learning_rate": 0.0006957806767223807, "loss": 3.7984, "step": 35820 }, { "epoch": 2.4340943062916156, "grad_norm": 0.9408493638038635, "learning_rate": 0.0006957382117135481, "loss": 3.6219, "step": 35825 }, { "epoch": 2.4344340263622772, "grad_norm": 0.7647580504417419, "learning_rate": 0.0006956957467047154, "loss": 3.5149, "step": 35830 }, { "epoch": 2.4347737464329393, "grad_norm": 0.7991424798965454, "learning_rate": 0.0006956532816958826, "loss": 3.3739, "step": 35835 }, { "epoch": 2.435113466503601, "grad_norm": 0.8955696225166321, "learning_rate": 0.0006956108166870498, "loss": 3.6552, "step": 35840 }, { "epoch": 2.4354531865742626, "grad_norm": 1.013723611831665, "learning_rate": 0.0006955683516782172, "loss": 3.4633, "step": 35845 }, { "epoch": 2.4357929066449246, "grad_norm": 1.0584102869033813, "learning_rate": 0.0006955258866693844, "loss": 3.4916, "step": 35850 }, { "epoch": 2.4361326267155863, "grad_norm": 0.8208675980567932, "learning_rate": 0.0006954834216605516, "loss": 3.6064, "step": 35855 }, { "epoch": 2.436472346786248, "grad_norm": 1.0219154357910156, "learning_rate": 0.0006954409566517191, "loss": 3.7318, "step": 35860 }, { "epoch": 2.43681206685691, "grad_norm": 0.8373352885246277, "learning_rate": 0.0006953984916428863, "loss": 3.464, "step": 35865 }, { "epoch": 2.4371517869275716, "grad_norm": 0.8732852935791016, "learning_rate": 0.0006953560266340535, "loss": 3.5093, "step": 35870 }, { "epoch": 2.4374915069982332, "grad_norm": 0.8502609133720398, "learning_rate": 0.0006953135616252209, "loss": 3.6487, "step": 35875 }, { "epoch": 2.4378312270688953, "grad_norm": 0.8389297723770142, "learning_rate": 0.0006952710966163881, "loss": 3.5812, "step": 35880 }, { "epoch": 2.438170947139557, "grad_norm": 0.9552708864212036, "learning_rate": 0.0006952286316075553, "loss": 3.3851, "step": 35885 }, { "epoch": 2.4385106672102186, "grad_norm": 1.0844228267669678, "learning_rate": 0.0006951861665987226, "loss": 3.5467, "step": 35890 }, { "epoch": 2.4388503872808807, "grad_norm": 0.8239562511444092, "learning_rate": 0.00069514370158989, "loss": 3.5824, "step": 35895 }, { "epoch": 2.4391901073515423, "grad_norm": 0.8297695517539978, "learning_rate": 0.0006951012365810572, "loss": 3.4096, "step": 35900 }, { "epoch": 2.439529827422204, "grad_norm": 0.9136099815368652, "learning_rate": 0.0006950587715722245, "loss": 3.4727, "step": 35905 }, { "epoch": 2.439869547492866, "grad_norm": 0.657521665096283, "learning_rate": 0.0006950163065633918, "loss": 3.7835, "step": 35910 }, { "epoch": 2.4402092675635276, "grad_norm": 0.933928370475769, "learning_rate": 0.000694973841554559, "loss": 3.478, "step": 35915 }, { "epoch": 2.4405489876341893, "grad_norm": 0.932222306728363, "learning_rate": 0.0006949313765457263, "loss": 3.4499, "step": 35920 }, { "epoch": 2.4408887077048513, "grad_norm": 0.8237214684486389, "learning_rate": 0.0006948889115368936, "loss": 3.3677, "step": 35925 }, { "epoch": 2.441228427775513, "grad_norm": 0.888521671295166, "learning_rate": 0.0006948464465280609, "loss": 3.771, "step": 35930 }, { "epoch": 2.4415681478461746, "grad_norm": 0.9662396311759949, "learning_rate": 0.0006948039815192282, "loss": 3.8254, "step": 35935 }, { "epoch": 2.4419078679168367, "grad_norm": 1.012626051902771, "learning_rate": 0.0006947615165103954, "loss": 3.4814, "step": 35940 }, { "epoch": 2.4422475879874983, "grad_norm": 0.9767229557037354, "learning_rate": 0.0006947190515015628, "loss": 3.5873, "step": 35945 }, { "epoch": 2.44258730805816, "grad_norm": 1.0281695127487183, "learning_rate": 0.00069467658649273, "loss": 3.5043, "step": 35950 }, { "epoch": 2.442927028128822, "grad_norm": 0.7039209604263306, "learning_rate": 0.0006946341214838972, "loss": 3.7777, "step": 35955 }, { "epoch": 2.4432667481994836, "grad_norm": 0.6964161992073059, "learning_rate": 0.0006945916564750646, "loss": 3.3807, "step": 35960 }, { "epoch": 2.4436064682701453, "grad_norm": 0.7231311798095703, "learning_rate": 0.0006945491914662319, "loss": 3.6047, "step": 35965 }, { "epoch": 2.4439461883408073, "grad_norm": 0.9180313944816589, "learning_rate": 0.0006945067264573991, "loss": 3.4751, "step": 35970 }, { "epoch": 2.444285908411469, "grad_norm": 0.6846402287483215, "learning_rate": 0.0006944642614485665, "loss": 3.4168, "step": 35975 }, { "epoch": 2.4446256284821306, "grad_norm": 0.8182682991027832, "learning_rate": 0.0006944217964397337, "loss": 3.2754, "step": 35980 }, { "epoch": 2.4449653485527927, "grad_norm": 0.847457766532898, "learning_rate": 0.0006943793314309009, "loss": 3.1851, "step": 35985 }, { "epoch": 2.4453050686234543, "grad_norm": 1.0363085269927979, "learning_rate": 0.0006943368664220682, "loss": 3.7026, "step": 35990 }, { "epoch": 2.445644788694116, "grad_norm": 0.9380670189857483, "learning_rate": 0.0006942944014132355, "loss": 3.5107, "step": 35995 }, { "epoch": 2.445984508764778, "grad_norm": 1.0116405487060547, "learning_rate": 0.0006942519364044028, "loss": 3.6311, "step": 36000 }, { "epoch": 2.4463242288354397, "grad_norm": 1.0885958671569824, "learning_rate": 0.0006942094713955701, "loss": 3.6876, "step": 36005 }, { "epoch": 2.4466639489061013, "grad_norm": 0.8997837901115417, "learning_rate": 0.0006941670063867374, "loss": 3.6208, "step": 36010 }, { "epoch": 2.4470036689767634, "grad_norm": 0.9498965740203857, "learning_rate": 0.0006941245413779046, "loss": 3.5735, "step": 36015 }, { "epoch": 2.447343389047425, "grad_norm": 0.7127948999404907, "learning_rate": 0.0006940820763690719, "loss": 3.3544, "step": 36020 }, { "epoch": 2.4476831091180866, "grad_norm": 1.0788519382476807, "learning_rate": 0.0006940396113602392, "loss": 3.6056, "step": 36025 }, { "epoch": 2.4480228291887487, "grad_norm": 0.8992520570755005, "learning_rate": 0.0006939971463514064, "loss": 3.4717, "step": 36030 }, { "epoch": 2.4483625492594103, "grad_norm": 0.8551177382469177, "learning_rate": 0.0006939546813425738, "loss": 3.5759, "step": 36035 }, { "epoch": 2.448702269330072, "grad_norm": 0.8903460502624512, "learning_rate": 0.000693912216333741, "loss": 3.4482, "step": 36040 }, { "epoch": 2.449041989400734, "grad_norm": 0.7458207011222839, "learning_rate": 0.0006938697513249083, "loss": 3.7496, "step": 36045 }, { "epoch": 2.4493817094713957, "grad_norm": 0.8646011352539062, "learning_rate": 0.0006938272863160756, "loss": 3.6397, "step": 36050 }, { "epoch": 2.4497214295420573, "grad_norm": 0.9198480844497681, "learning_rate": 0.0006937848213072428, "loss": 3.6274, "step": 36055 }, { "epoch": 2.4500611496127194, "grad_norm": 0.9103124737739563, "learning_rate": 0.0006937423562984101, "loss": 3.5538, "step": 36060 }, { "epoch": 2.450400869683381, "grad_norm": 1.149709939956665, "learning_rate": 0.0006936998912895774, "loss": 3.5288, "step": 36065 }, { "epoch": 2.4507405897540426, "grad_norm": 0.7911571860313416, "learning_rate": 0.0006936574262807447, "loss": 3.5274, "step": 36070 }, { "epoch": 2.4510803098247043, "grad_norm": 0.9497578740119934, "learning_rate": 0.000693614961271912, "loss": 3.5979, "step": 36075 }, { "epoch": 2.4514200298953663, "grad_norm": 0.8061007857322693, "learning_rate": 0.0006935724962630793, "loss": 3.721, "step": 36080 }, { "epoch": 2.451759749966028, "grad_norm": 0.833150327205658, "learning_rate": 0.0006935300312542465, "loss": 3.7713, "step": 36085 }, { "epoch": 2.4520994700366896, "grad_norm": 0.76576828956604, "learning_rate": 0.0006934875662454137, "loss": 3.3959, "step": 36090 }, { "epoch": 2.4524391901073517, "grad_norm": 0.9413695931434631, "learning_rate": 0.0006934451012365811, "loss": 3.44, "step": 36095 }, { "epoch": 2.4527789101780133, "grad_norm": 0.9613192677497864, "learning_rate": 0.0006934026362277483, "loss": 3.4142, "step": 36100 }, { "epoch": 2.453118630248675, "grad_norm": 0.7438663840293884, "learning_rate": 0.0006933601712189156, "loss": 3.3793, "step": 36105 }, { "epoch": 2.453458350319337, "grad_norm": 0.7741310000419617, "learning_rate": 0.000693317706210083, "loss": 3.7323, "step": 36110 }, { "epoch": 2.4537980703899986, "grad_norm": 0.7437799572944641, "learning_rate": 0.0006932752412012502, "loss": 3.7139, "step": 36115 }, { "epoch": 2.4541377904606603, "grad_norm": 0.9267513155937195, "learning_rate": 0.0006932327761924174, "loss": 3.6203, "step": 36120 }, { "epoch": 2.4544775105313223, "grad_norm": 0.9539667367935181, "learning_rate": 0.0006931903111835848, "loss": 3.5079, "step": 36125 }, { "epoch": 2.454817230601984, "grad_norm": 0.8683731555938721, "learning_rate": 0.000693147846174752, "loss": 3.6364, "step": 36130 }, { "epoch": 2.4551569506726456, "grad_norm": 0.8454530835151672, "learning_rate": 0.0006931053811659192, "loss": 3.6136, "step": 36135 }, { "epoch": 2.4554966707433077, "grad_norm": 0.7789711356163025, "learning_rate": 0.0006930629161570866, "loss": 3.2908, "step": 36140 }, { "epoch": 2.4558363908139693, "grad_norm": 0.9197741150856018, "learning_rate": 0.0006930204511482539, "loss": 3.7711, "step": 36145 }, { "epoch": 2.456176110884631, "grad_norm": 0.7262634038925171, "learning_rate": 0.0006929779861394211, "loss": 3.5818, "step": 36150 }, { "epoch": 2.456515830955293, "grad_norm": 0.889327883720398, "learning_rate": 0.0006929355211305884, "loss": 3.7181, "step": 36155 }, { "epoch": 2.4568555510259547, "grad_norm": 1.0491082668304443, "learning_rate": 0.0006928930561217557, "loss": 3.6124, "step": 36160 }, { "epoch": 2.4571952710966163, "grad_norm": 0.8459656834602356, "learning_rate": 0.0006928505911129229, "loss": 3.8499, "step": 36165 }, { "epoch": 2.457534991167278, "grad_norm": 0.8021054863929749, "learning_rate": 0.0006928081261040902, "loss": 3.3295, "step": 36170 }, { "epoch": 2.45787471123794, "grad_norm": 0.7558298707008362, "learning_rate": 0.0006927656610952576, "loss": 3.5904, "step": 36175 }, { "epoch": 2.4582144313086016, "grad_norm": 0.8450889587402344, "learning_rate": 0.0006927231960864248, "loss": 3.548, "step": 36180 }, { "epoch": 2.4585541513792633, "grad_norm": 0.8992059230804443, "learning_rate": 0.0006926807310775921, "loss": 3.6591, "step": 36185 }, { "epoch": 2.4588938714499253, "grad_norm": 0.6901722550392151, "learning_rate": 0.0006926382660687593, "loss": 3.6373, "step": 36190 }, { "epoch": 2.459233591520587, "grad_norm": 0.9824018478393555, "learning_rate": 0.0006925958010599266, "loss": 3.6866, "step": 36195 }, { "epoch": 2.4595733115912486, "grad_norm": 0.9984708428382874, "learning_rate": 0.0006925533360510939, "loss": 3.4464, "step": 36200 }, { "epoch": 2.4599130316619107, "grad_norm": 0.8927099108695984, "learning_rate": 0.0006925108710422611, "loss": 3.909, "step": 36205 }, { "epoch": 2.4602527517325723, "grad_norm": 0.7059174180030823, "learning_rate": 0.0006924684060334285, "loss": 3.6478, "step": 36210 }, { "epoch": 2.460592471803234, "grad_norm": 0.8820169568061829, "learning_rate": 0.0006924259410245958, "loss": 3.3821, "step": 36215 }, { "epoch": 2.460932191873896, "grad_norm": 0.906986653804779, "learning_rate": 0.000692383476015763, "loss": 3.8304, "step": 36220 }, { "epoch": 2.4612719119445576, "grad_norm": 0.8624225854873657, "learning_rate": 0.0006923410110069302, "loss": 3.3044, "step": 36225 }, { "epoch": 2.4616116320152193, "grad_norm": 0.7914595007896423, "learning_rate": 0.0006922985459980976, "loss": 3.7538, "step": 36230 }, { "epoch": 2.4619513520858813, "grad_norm": 0.6971786618232727, "learning_rate": 0.0006922560809892648, "loss": 3.2757, "step": 36235 }, { "epoch": 2.462291072156543, "grad_norm": 0.8726166486740112, "learning_rate": 0.000692213615980432, "loss": 3.497, "step": 36240 }, { "epoch": 2.4626307922272046, "grad_norm": 0.9129696488380432, "learning_rate": 0.0006921711509715995, "loss": 3.6021, "step": 36245 }, { "epoch": 2.4629705122978667, "grad_norm": 0.9540472030639648, "learning_rate": 0.0006921286859627667, "loss": 3.4729, "step": 36250 }, { "epoch": 2.4633102323685283, "grad_norm": 1.0409308671951294, "learning_rate": 0.0006920862209539339, "loss": 3.4031, "step": 36255 }, { "epoch": 2.46364995243919, "grad_norm": 0.8868533968925476, "learning_rate": 0.0006920437559451013, "loss": 3.4979, "step": 36260 }, { "epoch": 2.463989672509852, "grad_norm": 0.9985135793685913, "learning_rate": 0.0006920012909362685, "loss": 3.5361, "step": 36265 }, { "epoch": 2.4643293925805136, "grad_norm": 0.865886926651001, "learning_rate": 0.0006919588259274357, "loss": 3.7461, "step": 36270 }, { "epoch": 2.4646691126511753, "grad_norm": 0.9465520977973938, "learning_rate": 0.0006919163609186032, "loss": 3.5689, "step": 36275 }, { "epoch": 2.4650088327218374, "grad_norm": 0.9770486950874329, "learning_rate": 0.0006918738959097704, "loss": 3.5385, "step": 36280 }, { "epoch": 2.465348552792499, "grad_norm": 0.7647587060928345, "learning_rate": 0.0006918314309009377, "loss": 3.5755, "step": 36285 }, { "epoch": 2.4656882728631606, "grad_norm": 1.102231502532959, "learning_rate": 0.0006917889658921049, "loss": 3.6893, "step": 36290 }, { "epoch": 2.4660279929338227, "grad_norm": 0.7716435790061951, "learning_rate": 0.0006917465008832722, "loss": 3.2765, "step": 36295 }, { "epoch": 2.4663677130044843, "grad_norm": 0.928592324256897, "learning_rate": 0.0006917040358744395, "loss": 3.5901, "step": 36300 }, { "epoch": 2.466707433075146, "grad_norm": 0.67704176902771, "learning_rate": 0.0006916615708656067, "loss": 3.4815, "step": 36305 }, { "epoch": 2.467047153145808, "grad_norm": 1.0540499687194824, "learning_rate": 0.0006916191058567741, "loss": 3.6412, "step": 36310 }, { "epoch": 2.4673868732164697, "grad_norm": 0.7599851489067078, "learning_rate": 0.0006915766408479414, "loss": 3.8823, "step": 36315 }, { "epoch": 2.4677265932871313, "grad_norm": 0.9563323259353638, "learning_rate": 0.0006915341758391086, "loss": 3.7295, "step": 36320 }, { "epoch": 2.4680663133577934, "grad_norm": 0.899450421333313, "learning_rate": 0.0006914917108302758, "loss": 3.6357, "step": 36325 }, { "epoch": 2.468406033428455, "grad_norm": 0.8754828572273254, "learning_rate": 0.0006914492458214432, "loss": 3.5001, "step": 36330 }, { "epoch": 2.4687457534991166, "grad_norm": 0.856401801109314, "learning_rate": 0.0006914067808126104, "loss": 3.7415, "step": 36335 }, { "epoch": 2.4690854735697787, "grad_norm": 0.8319867849349976, "learning_rate": 0.0006913643158037776, "loss": 3.259, "step": 36340 }, { "epoch": 2.4694251936404403, "grad_norm": 0.954565703868866, "learning_rate": 0.0006913218507949451, "loss": 3.621, "step": 36345 }, { "epoch": 2.469764913711102, "grad_norm": 0.9378578662872314, "learning_rate": 0.0006912793857861123, "loss": 3.6769, "step": 36350 }, { "epoch": 2.470104633781764, "grad_norm": 1.0436452627182007, "learning_rate": 0.0006912369207772795, "loss": 3.3329, "step": 36355 }, { "epoch": 2.4704443538524257, "grad_norm": 0.8781203031539917, "learning_rate": 0.0006911944557684469, "loss": 3.4577, "step": 36360 }, { "epoch": 2.4707840739230873, "grad_norm": 0.8413339853286743, "learning_rate": 0.0006911519907596141, "loss": 3.7904, "step": 36365 }, { "epoch": 2.4711237939937494, "grad_norm": 0.8315607905387878, "learning_rate": 0.0006911095257507813, "loss": 3.5522, "step": 36370 }, { "epoch": 2.471463514064411, "grad_norm": 0.9372484087944031, "learning_rate": 0.0006910670607419487, "loss": 3.4998, "step": 36375 }, { "epoch": 2.4718032341350726, "grad_norm": 1.043075442314148, "learning_rate": 0.000691024595733116, "loss": 3.6908, "step": 36380 }, { "epoch": 2.4721429542057347, "grad_norm": 0.8879507184028625, "learning_rate": 0.0006909821307242832, "loss": 3.6829, "step": 36385 }, { "epoch": 2.4724826742763963, "grad_norm": 0.8010611534118652, "learning_rate": 0.0006909396657154505, "loss": 3.4733, "step": 36390 }, { "epoch": 2.472822394347058, "grad_norm": 0.8769936561584473, "learning_rate": 0.0006908972007066178, "loss": 3.6313, "step": 36395 }, { "epoch": 2.47316211441772, "grad_norm": 0.7876511216163635, "learning_rate": 0.000690854735697785, "loss": 3.4161, "step": 36400 }, { "epoch": 2.4735018344883817, "grad_norm": 0.8814708590507507, "learning_rate": 0.0006908122706889523, "loss": 3.6457, "step": 36405 }, { "epoch": 2.4738415545590433, "grad_norm": 0.871041476726532, "learning_rate": 0.0006907698056801196, "loss": 3.6814, "step": 36410 }, { "epoch": 2.474181274629705, "grad_norm": 1.1853734254837036, "learning_rate": 0.0006907273406712869, "loss": 3.5313, "step": 36415 }, { "epoch": 2.474520994700367, "grad_norm": 0.8447781205177307, "learning_rate": 0.0006906848756624542, "loss": 3.5422, "step": 36420 }, { "epoch": 2.4748607147710286, "grad_norm": 0.8156052231788635, "learning_rate": 0.0006906424106536215, "loss": 3.4339, "step": 36425 }, { "epoch": 2.4752004348416903, "grad_norm": 0.7409082055091858, "learning_rate": 0.0006905999456447887, "loss": 3.5654, "step": 36430 }, { "epoch": 2.4755401549123524, "grad_norm": 0.7991155385971069, "learning_rate": 0.000690557480635956, "loss": 3.6512, "step": 36435 }, { "epoch": 2.475879874983014, "grad_norm": 0.8262657523155212, "learning_rate": 0.0006905150156271232, "loss": 3.4858, "step": 36440 }, { "epoch": 2.4762195950536756, "grad_norm": 0.8796854615211487, "learning_rate": 0.0006904725506182905, "loss": 3.7319, "step": 36445 }, { "epoch": 2.4765593151243377, "grad_norm": 0.6967483162879944, "learning_rate": 0.0006904300856094579, "loss": 3.7002, "step": 36450 }, { "epoch": 2.4768990351949993, "grad_norm": 1.10507071018219, "learning_rate": 0.0006903876206006251, "loss": 3.7097, "step": 36455 }, { "epoch": 2.477238755265661, "grad_norm": 0.8796741962432861, "learning_rate": 0.0006903451555917924, "loss": 3.5756, "step": 36460 }, { "epoch": 2.477578475336323, "grad_norm": 0.8185814023017883, "learning_rate": 0.0006903026905829597, "loss": 3.4423, "step": 36465 }, { "epoch": 2.4779181954069847, "grad_norm": 0.8333466649055481, "learning_rate": 0.0006902602255741269, "loss": 3.4952, "step": 36470 }, { "epoch": 2.4782579154776463, "grad_norm": 0.8334892392158508, "learning_rate": 0.0006902177605652941, "loss": 3.6537, "step": 36475 }, { "epoch": 2.4785976355483084, "grad_norm": 0.9300302863121033, "learning_rate": 0.0006901752955564615, "loss": 3.4466, "step": 36480 }, { "epoch": 2.47893735561897, "grad_norm": 0.9987243413925171, "learning_rate": 0.0006901328305476288, "loss": 3.4529, "step": 36485 }, { "epoch": 2.4792770756896316, "grad_norm": 1.0005249977111816, "learning_rate": 0.000690090365538796, "loss": 3.3849, "step": 36490 }, { "epoch": 2.4796167957602937, "grad_norm": 1.2294481992721558, "learning_rate": 0.0006900479005299634, "loss": 3.6634, "step": 36495 }, { "epoch": 2.4799565158309553, "grad_norm": 0.8743250966072083, "learning_rate": 0.0006900054355211306, "loss": 3.3456, "step": 36500 }, { "epoch": 2.480296235901617, "grad_norm": 0.8085228800773621, "learning_rate": 0.0006899629705122978, "loss": 3.6539, "step": 36505 }, { "epoch": 2.4806359559722786, "grad_norm": 0.7113310098648071, "learning_rate": 0.0006899205055034652, "loss": 3.5481, "step": 36510 }, { "epoch": 2.4809756760429407, "grad_norm": 1.0010929107666016, "learning_rate": 0.0006898780404946324, "loss": 3.4007, "step": 36515 }, { "epoch": 2.4813153961136023, "grad_norm": 0.7953394055366516, "learning_rate": 0.0006898355754857997, "loss": 3.4604, "step": 36520 }, { "epoch": 2.481655116184264, "grad_norm": 0.8871358036994934, "learning_rate": 0.000689793110476967, "loss": 3.648, "step": 36525 }, { "epoch": 2.481994836254926, "grad_norm": 0.976520836353302, "learning_rate": 0.0006897506454681343, "loss": 3.6012, "step": 36530 }, { "epoch": 2.4823345563255876, "grad_norm": 0.7046443819999695, "learning_rate": 0.0006897081804593015, "loss": 3.6521, "step": 36535 }, { "epoch": 2.4826742763962493, "grad_norm": 0.7388847470283508, "learning_rate": 0.0006896657154504688, "loss": 3.3058, "step": 36540 }, { "epoch": 2.4830139964669113, "grad_norm": 0.8523498773574829, "learning_rate": 0.0006896232504416361, "loss": 3.5752, "step": 36545 }, { "epoch": 2.483353716537573, "grad_norm": 0.938737154006958, "learning_rate": 0.0006895807854328033, "loss": 3.459, "step": 36550 }, { "epoch": 2.4836934366082346, "grad_norm": 0.8756615519523621, "learning_rate": 0.0006895383204239707, "loss": 3.4447, "step": 36555 }, { "epoch": 2.4840331566788967, "grad_norm": 0.8922083377838135, "learning_rate": 0.000689495855415138, "loss": 3.3873, "step": 36560 }, { "epoch": 2.4843728767495583, "grad_norm": 0.9193309545516968, "learning_rate": 0.0006894533904063052, "loss": 3.3683, "step": 36565 }, { "epoch": 2.48471259682022, "grad_norm": 0.8712266087532043, "learning_rate": 0.0006894109253974725, "loss": 3.3698, "step": 36570 }, { "epoch": 2.485052316890882, "grad_norm": 0.853512704372406, "learning_rate": 0.0006893684603886397, "loss": 3.7681, "step": 36575 }, { "epoch": 2.4853920369615436, "grad_norm": 0.9410825967788696, "learning_rate": 0.000689325995379807, "loss": 3.7854, "step": 36580 }, { "epoch": 2.4857317570322053, "grad_norm": 0.708361029624939, "learning_rate": 0.0006892835303709743, "loss": 3.4626, "step": 36585 }, { "epoch": 2.4860714771028674, "grad_norm": 0.8282793164253235, "learning_rate": 0.0006892410653621416, "loss": 3.6188, "step": 36590 }, { "epoch": 2.486411197173529, "grad_norm": 0.9518354535102844, "learning_rate": 0.0006891986003533089, "loss": 3.2638, "step": 36595 }, { "epoch": 2.4867509172441906, "grad_norm": 0.942774772644043, "learning_rate": 0.0006891561353444762, "loss": 3.6533, "step": 36600 }, { "epoch": 2.4870906373148527, "grad_norm": 0.7877184152603149, "learning_rate": 0.0006891136703356434, "loss": 3.4383, "step": 36605 }, { "epoch": 2.4874303573855143, "grad_norm": 0.8960031867027283, "learning_rate": 0.0006890712053268107, "loss": 3.7348, "step": 36610 }, { "epoch": 2.487770077456176, "grad_norm": 0.8156405091285706, "learning_rate": 0.000689028740317978, "loss": 3.7056, "step": 36615 }, { "epoch": 2.488109797526838, "grad_norm": 1.0419365167617798, "learning_rate": 0.0006889862753091452, "loss": 3.5797, "step": 36620 }, { "epoch": 2.4884495175974997, "grad_norm": 0.7346763610839844, "learning_rate": 0.0006889438103003127, "loss": 3.6299, "step": 36625 }, { "epoch": 2.4887892376681613, "grad_norm": 0.9340589642524719, "learning_rate": 0.0006889013452914799, "loss": 3.4286, "step": 36630 }, { "epoch": 2.4891289577388234, "grad_norm": 0.9242081046104431, "learning_rate": 0.0006888588802826471, "loss": 3.6487, "step": 36635 }, { "epoch": 2.489468677809485, "grad_norm": 1.1686097383499146, "learning_rate": 0.0006888164152738144, "loss": 3.5239, "step": 36640 }, { "epoch": 2.4898083978801466, "grad_norm": 0.7603017091751099, "learning_rate": 0.0006887739502649817, "loss": 3.5929, "step": 36645 }, { "epoch": 2.4901481179508087, "grad_norm": 0.8958903551101685, "learning_rate": 0.0006887314852561489, "loss": 3.4947, "step": 36650 }, { "epoch": 2.4904878380214703, "grad_norm": 0.86673504114151, "learning_rate": 0.0006886890202473162, "loss": 3.5726, "step": 36655 }, { "epoch": 2.490827558092132, "grad_norm": 1.0016387701034546, "learning_rate": 0.0006886465552384836, "loss": 3.3194, "step": 36660 }, { "epoch": 2.491167278162794, "grad_norm": 0.9451571106910706, "learning_rate": 0.0006886040902296508, "loss": 3.3076, "step": 36665 }, { "epoch": 2.4915069982334557, "grad_norm": 1.0089694261550903, "learning_rate": 0.0006885616252208181, "loss": 3.4043, "step": 36670 }, { "epoch": 2.4918467183041173, "grad_norm": 0.8287688493728638, "learning_rate": 0.0006885191602119853, "loss": 3.4409, "step": 36675 }, { "epoch": 2.4921864383747794, "grad_norm": 0.975384533405304, "learning_rate": 0.0006884766952031526, "loss": 3.4429, "step": 36680 }, { "epoch": 2.492526158445441, "grad_norm": 0.8510820269584656, "learning_rate": 0.0006884342301943199, "loss": 3.4138, "step": 36685 }, { "epoch": 2.4928658785161026, "grad_norm": 1.0971518754959106, "learning_rate": 0.0006883917651854871, "loss": 3.3162, "step": 36690 }, { "epoch": 2.4932055985867647, "grad_norm": 0.7318975925445557, "learning_rate": 0.0006883493001766545, "loss": 3.6157, "step": 36695 }, { "epoch": 2.4935453186574263, "grad_norm": 0.8650673627853394, "learning_rate": 0.0006883068351678218, "loss": 3.5632, "step": 36700 }, { "epoch": 2.493885038728088, "grad_norm": 1.056884765625, "learning_rate": 0.000688264370158989, "loss": 3.3088, "step": 36705 }, { "epoch": 2.49422475879875, "grad_norm": 0.9452702403068542, "learning_rate": 0.0006882219051501563, "loss": 3.6457, "step": 36710 }, { "epoch": 2.4945644788694117, "grad_norm": 0.7401376366615295, "learning_rate": 0.0006881794401413236, "loss": 3.7734, "step": 36715 }, { "epoch": 2.4949041989400733, "grad_norm": 0.832613468170166, "learning_rate": 0.0006881369751324908, "loss": 3.3839, "step": 36720 }, { "epoch": 2.4952439190107354, "grad_norm": 1.0046380758285522, "learning_rate": 0.000688094510123658, "loss": 3.2739, "step": 36725 }, { "epoch": 2.495583639081397, "grad_norm": 0.7986001372337341, "learning_rate": 0.0006880520451148255, "loss": 3.6226, "step": 36730 }, { "epoch": 2.4959233591520587, "grad_norm": 1.242040991783142, "learning_rate": 0.0006880095801059927, "loss": 3.8644, "step": 36735 }, { "epoch": 2.4962630792227207, "grad_norm": 0.9378507733345032, "learning_rate": 0.0006879671150971599, "loss": 3.4524, "step": 36740 }, { "epoch": 2.4966027992933824, "grad_norm": 0.9926351308822632, "learning_rate": 0.0006879246500883273, "loss": 3.4312, "step": 36745 }, { "epoch": 2.496942519364044, "grad_norm": 0.7548143863677979, "learning_rate": 0.0006878821850794945, "loss": 3.7, "step": 36750 }, { "epoch": 2.4972822394347056, "grad_norm": 0.8410499691963196, "learning_rate": 0.0006878397200706617, "loss": 3.3711, "step": 36755 }, { "epoch": 2.4976219595053677, "grad_norm": 0.6862537860870361, "learning_rate": 0.0006877972550618292, "loss": 3.4216, "step": 36760 }, { "epoch": 2.4979616795760293, "grad_norm": 0.722764253616333, "learning_rate": 0.0006877547900529964, "loss": 3.4782, "step": 36765 }, { "epoch": 2.498301399646691, "grad_norm": 0.6903204917907715, "learning_rate": 0.0006877123250441636, "loss": 3.5483, "step": 36770 }, { "epoch": 2.498641119717353, "grad_norm": 0.8590174317359924, "learning_rate": 0.000687669860035331, "loss": 3.3938, "step": 36775 }, { "epoch": 2.4989808397880147, "grad_norm": 0.7185794115066528, "learning_rate": 0.0006876273950264982, "loss": 3.3916, "step": 36780 }, { "epoch": 2.4993205598586763, "grad_norm": 0.9417985081672668, "learning_rate": 0.0006875849300176654, "loss": 3.5408, "step": 36785 }, { "epoch": 2.4996602799293384, "grad_norm": 0.9269102811813354, "learning_rate": 0.0006875424650088327, "loss": 3.4685, "step": 36790 }, { "epoch": 2.5, "grad_norm": 0.8657393455505371, "learning_rate": 0.0006875, "loss": 3.48, "step": 36795 }, { "epoch": 2.5003397200706616, "grad_norm": 1.022359013557434, "learning_rate": 0.0006874575349911673, "loss": 3.6529, "step": 36800 }, { "epoch": 2.5006794401413237, "grad_norm": 0.8003290295600891, "learning_rate": 0.0006874150699823346, "loss": 3.7177, "step": 36805 }, { "epoch": 2.5010191602119853, "grad_norm": 0.8137874007225037, "learning_rate": 0.0006873726049735019, "loss": 3.6766, "step": 36810 }, { "epoch": 2.501358880282647, "grad_norm": 0.9338594675064087, "learning_rate": 0.0006873301399646691, "loss": 3.4237, "step": 36815 }, { "epoch": 2.5016986003533086, "grad_norm": 0.8676313161849976, "learning_rate": 0.0006872876749558364, "loss": 3.6658, "step": 36820 }, { "epoch": 2.5020383204239707, "grad_norm": 0.7515265345573425, "learning_rate": 0.0006872452099470036, "loss": 3.5861, "step": 36825 }, { "epoch": 2.5023780404946323, "grad_norm": 1.1176272630691528, "learning_rate": 0.0006872027449381709, "loss": 3.5769, "step": 36830 }, { "epoch": 2.502717760565294, "grad_norm": 0.8808847069740295, "learning_rate": 0.0006871602799293383, "loss": 3.5341, "step": 36835 }, { "epoch": 2.503057480635956, "grad_norm": 0.6433759927749634, "learning_rate": 0.0006871178149205055, "loss": 3.6638, "step": 36840 }, { "epoch": 2.5033972007066176, "grad_norm": 1.1460437774658203, "learning_rate": 0.0006870753499116728, "loss": 3.6047, "step": 36845 }, { "epoch": 2.5037369207772793, "grad_norm": 0.7882977724075317, "learning_rate": 0.0006870328849028401, "loss": 3.546, "step": 36850 }, { "epoch": 2.5040766408479413, "grad_norm": 0.9900385737419128, "learning_rate": 0.0006869904198940073, "loss": 3.3811, "step": 36855 }, { "epoch": 2.504416360918603, "grad_norm": 0.8787567019462585, "learning_rate": 0.0006869479548851745, "loss": 3.7839, "step": 36860 }, { "epoch": 2.5047560809892646, "grad_norm": 0.8779926300048828, "learning_rate": 0.000686905489876342, "loss": 3.6486, "step": 36865 }, { "epoch": 2.5050958010599267, "grad_norm": 0.7147447466850281, "learning_rate": 0.0006868630248675092, "loss": 3.2132, "step": 36870 }, { "epoch": 2.5054355211305883, "grad_norm": 0.9452587366104126, "learning_rate": 0.0006868205598586764, "loss": 3.3614, "step": 36875 }, { "epoch": 2.50577524120125, "grad_norm": 0.7602297067642212, "learning_rate": 0.0006867780948498438, "loss": 3.4661, "step": 36880 }, { "epoch": 2.506114961271912, "grad_norm": 0.8211740851402283, "learning_rate": 0.000686735629841011, "loss": 3.5463, "step": 36885 }, { "epoch": 2.5064546813425737, "grad_norm": 0.9510506987571716, "learning_rate": 0.0006866931648321782, "loss": 3.4778, "step": 36890 }, { "epoch": 2.5067944014132353, "grad_norm": 0.8949901461601257, "learning_rate": 0.0006866506998233456, "loss": 3.3598, "step": 36895 }, { "epoch": 2.5071341214838974, "grad_norm": 1.0402981042861938, "learning_rate": 0.0006866082348145129, "loss": 3.3759, "step": 36900 }, { "epoch": 2.507473841554559, "grad_norm": 0.8432304859161377, "learning_rate": 0.0006865657698056801, "loss": 3.6168, "step": 36905 }, { "epoch": 2.5078135616252206, "grad_norm": 0.6661829352378845, "learning_rate": 0.0006865233047968475, "loss": 3.4575, "step": 36910 }, { "epoch": 2.5081532816958827, "grad_norm": 1.1519736051559448, "learning_rate": 0.0006864808397880147, "loss": 3.5699, "step": 36915 }, { "epoch": 2.5084930017665443, "grad_norm": 0.7281892895698547, "learning_rate": 0.0006864383747791819, "loss": 3.3514, "step": 36920 }, { "epoch": 2.508832721837206, "grad_norm": 0.8577595949172974, "learning_rate": 0.0006863959097703492, "loss": 3.7121, "step": 36925 }, { "epoch": 2.509172441907868, "grad_norm": 0.7311611175537109, "learning_rate": 0.0006863534447615165, "loss": 3.3516, "step": 36930 }, { "epoch": 2.5095121619785297, "grad_norm": 1.0527610778808594, "learning_rate": 0.0006863109797526838, "loss": 3.7124, "step": 36935 }, { "epoch": 2.5098518820491913, "grad_norm": 0.7159387469291687, "learning_rate": 0.0006862685147438511, "loss": 3.6336, "step": 36940 }, { "epoch": 2.5101916021198534, "grad_norm": 0.9110502004623413, "learning_rate": 0.0006862260497350184, "loss": 3.6723, "step": 36945 }, { "epoch": 2.510531322190515, "grad_norm": 1.5016603469848633, "learning_rate": 0.0006861835847261856, "loss": 3.4908, "step": 36950 }, { "epoch": 2.5108710422611766, "grad_norm": 1.0306766033172607, "learning_rate": 0.0006861411197173529, "loss": 3.6121, "step": 36955 }, { "epoch": 2.5112107623318387, "grad_norm": 0.5805943608283997, "learning_rate": 0.0006860986547085201, "loss": 3.5461, "step": 36960 }, { "epoch": 2.5115504824025003, "grad_norm": 0.9453427791595459, "learning_rate": 0.0006860561896996874, "loss": 3.401, "step": 36965 }, { "epoch": 2.511890202473162, "grad_norm": 0.7779866456985474, "learning_rate": 0.0006860137246908548, "loss": 3.5209, "step": 36970 }, { "epoch": 2.512229922543824, "grad_norm": 0.7268861532211304, "learning_rate": 0.000685971259682022, "loss": 3.744, "step": 36975 }, { "epoch": 2.5125696426144857, "grad_norm": 0.7437829971313477, "learning_rate": 0.0006859287946731894, "loss": 3.4672, "step": 36980 }, { "epoch": 2.5129093626851473, "grad_norm": 0.9142969846725464, "learning_rate": 0.0006858863296643566, "loss": 3.588, "step": 36985 }, { "epoch": 2.5132490827558094, "grad_norm": 0.8910629153251648, "learning_rate": 0.0006858438646555238, "loss": 3.896, "step": 36990 }, { "epoch": 2.513588802826471, "grad_norm": 0.7418274879455566, "learning_rate": 0.0006858013996466912, "loss": 3.8323, "step": 36995 }, { "epoch": 2.5139285228971326, "grad_norm": 0.8523584008216858, "learning_rate": 0.0006857589346378584, "loss": 3.517, "step": 37000 }, { "epoch": 2.5142682429677947, "grad_norm": 0.7802397608757019, "learning_rate": 0.0006857164696290257, "loss": 3.9053, "step": 37005 }, { "epoch": 2.5146079630384564, "grad_norm": 0.6620455384254456, "learning_rate": 0.0006856740046201931, "loss": 3.3909, "step": 37010 }, { "epoch": 2.514947683109118, "grad_norm": 0.7681835889816284, "learning_rate": 0.0006856315396113603, "loss": 3.5391, "step": 37015 }, { "epoch": 2.51528740317978, "grad_norm": 0.7635166049003601, "learning_rate": 0.0006855890746025275, "loss": 3.4221, "step": 37020 }, { "epoch": 2.5156271232504417, "grad_norm": 0.865240216255188, "learning_rate": 0.0006855466095936948, "loss": 3.4297, "step": 37025 }, { "epoch": 2.5159668433211033, "grad_norm": 0.6793370246887207, "learning_rate": 0.0006855041445848621, "loss": 3.4611, "step": 37030 }, { "epoch": 2.5163065633917654, "grad_norm": 0.8803624510765076, "learning_rate": 0.0006854616795760293, "loss": 3.4582, "step": 37035 }, { "epoch": 2.516646283462427, "grad_norm": 0.7430471777915955, "learning_rate": 0.0006854192145671967, "loss": 3.5829, "step": 37040 }, { "epoch": 2.5169860035330887, "grad_norm": 0.9532375931739807, "learning_rate": 0.000685376749558364, "loss": 3.4732, "step": 37045 }, { "epoch": 2.5173257236037507, "grad_norm": 1.0152339935302734, "learning_rate": 0.0006853342845495312, "loss": 3.6815, "step": 37050 }, { "epoch": 2.5176654436744124, "grad_norm": 1.0876001119613647, "learning_rate": 0.0006852918195406985, "loss": 3.6239, "step": 37055 }, { "epoch": 2.518005163745074, "grad_norm": 0.9778195023536682, "learning_rate": 0.0006852493545318657, "loss": 3.3743, "step": 37060 }, { "epoch": 2.518344883815736, "grad_norm": 0.7446442246437073, "learning_rate": 0.000685206889523033, "loss": 3.6971, "step": 37065 }, { "epoch": 2.5186846038863977, "grad_norm": 0.823059618473053, "learning_rate": 0.0006851644245142003, "loss": 3.5565, "step": 37070 }, { "epoch": 2.5190243239570593, "grad_norm": 0.7140182256698608, "learning_rate": 0.0006851219595053676, "loss": 3.241, "step": 37075 }, { "epoch": 2.5193640440277214, "grad_norm": 1.0930211544036865, "learning_rate": 0.0006850794944965349, "loss": 3.6895, "step": 37080 }, { "epoch": 2.519703764098383, "grad_norm": 0.733505368232727, "learning_rate": 0.0006850370294877022, "loss": 3.4622, "step": 37085 }, { "epoch": 2.5200434841690447, "grad_norm": 0.7258961200714111, "learning_rate": 0.0006849945644788694, "loss": 3.5916, "step": 37090 }, { "epoch": 2.5203832042397067, "grad_norm": 1.1903328895568848, "learning_rate": 0.0006849520994700367, "loss": 3.7739, "step": 37095 }, { "epoch": 2.5207229243103684, "grad_norm": 0.8272022604942322, "learning_rate": 0.000684909634461204, "loss": 3.455, "step": 37100 }, { "epoch": 2.52106264438103, "grad_norm": 0.9182109832763672, "learning_rate": 0.0006848671694523712, "loss": 3.3313, "step": 37105 }, { "epoch": 2.521402364451692, "grad_norm": 1.2223981618881226, "learning_rate": 0.0006848247044435386, "loss": 3.4623, "step": 37110 }, { "epoch": 2.5217420845223537, "grad_norm": 1.0115257501602173, "learning_rate": 0.0006847822394347059, "loss": 3.5466, "step": 37115 }, { "epoch": 2.5220818045930153, "grad_norm": 0.860014021396637, "learning_rate": 0.0006847397744258731, "loss": 3.6982, "step": 37120 }, { "epoch": 2.522421524663677, "grad_norm": 0.8742210865020752, "learning_rate": 0.0006846973094170403, "loss": 3.626, "step": 37125 }, { "epoch": 2.522761244734339, "grad_norm": 0.8782237768173218, "learning_rate": 0.0006846548444082077, "loss": 3.5397, "step": 37130 }, { "epoch": 2.5231009648050007, "grad_norm": 0.996138870716095, "learning_rate": 0.0006846123793993749, "loss": 3.7882, "step": 37135 }, { "epoch": 2.5234406848756623, "grad_norm": 0.598161518573761, "learning_rate": 0.0006845699143905421, "loss": 3.6471, "step": 37140 }, { "epoch": 2.5237804049463244, "grad_norm": 0.8116862773895264, "learning_rate": 0.0006845274493817096, "loss": 3.4462, "step": 37145 }, { "epoch": 2.524120125016986, "grad_norm": 0.9528845548629761, "learning_rate": 0.0006844849843728768, "loss": 3.746, "step": 37150 }, { "epoch": 2.5244598450876476, "grad_norm": 0.9268794059753418, "learning_rate": 0.000684442519364044, "loss": 3.5414, "step": 37155 }, { "epoch": 2.5247995651583093, "grad_norm": 0.8700067400932312, "learning_rate": 0.0006844000543552114, "loss": 3.4506, "step": 37160 }, { "epoch": 2.5251392852289714, "grad_norm": 0.9306234121322632, "learning_rate": 0.0006843575893463786, "loss": 3.7126, "step": 37165 }, { "epoch": 2.525479005299633, "grad_norm": 0.9183580875396729, "learning_rate": 0.0006843151243375458, "loss": 3.4843, "step": 37170 }, { "epoch": 2.5258187253702946, "grad_norm": 0.9226963520050049, "learning_rate": 0.0006842726593287131, "loss": 3.3709, "step": 37175 }, { "epoch": 2.5261584454409567, "grad_norm": 1.014102578163147, "learning_rate": 0.0006842301943198805, "loss": 3.5456, "step": 37180 }, { "epoch": 2.5264981655116183, "grad_norm": 0.6991927623748779, "learning_rate": 0.0006841877293110477, "loss": 3.4699, "step": 37185 }, { "epoch": 2.52683788558228, "grad_norm": 0.8245767951011658, "learning_rate": 0.000684145264302215, "loss": 3.4473, "step": 37190 }, { "epoch": 2.527177605652942, "grad_norm": 0.9326229095458984, "learning_rate": 0.0006841027992933823, "loss": 3.7456, "step": 37195 }, { "epoch": 2.5275173257236037, "grad_norm": 0.8568160533905029, "learning_rate": 0.0006840603342845495, "loss": 3.4264, "step": 37200 }, { "epoch": 2.5278570457942653, "grad_norm": 0.7080419659614563, "learning_rate": 0.0006840178692757168, "loss": 3.6784, "step": 37205 }, { "epoch": 2.5281967658649274, "grad_norm": 0.8823555111885071, "learning_rate": 0.000683975404266884, "loss": 3.5315, "step": 37210 }, { "epoch": 2.528536485935589, "grad_norm": 0.9758599400520325, "learning_rate": 0.0006839329392580514, "loss": 3.4048, "step": 37215 }, { "epoch": 2.5288762060062506, "grad_norm": 0.6911731958389282, "learning_rate": 0.0006838904742492187, "loss": 3.665, "step": 37220 }, { "epoch": 2.5292159260769127, "grad_norm": 0.8030714988708496, "learning_rate": 0.0006838480092403859, "loss": 3.4843, "step": 37225 }, { "epoch": 2.5295556461475743, "grad_norm": 0.730897843837738, "learning_rate": 0.0006838055442315532, "loss": 3.4001, "step": 37230 }, { "epoch": 2.529895366218236, "grad_norm": 1.1770083904266357, "learning_rate": 0.0006837630792227205, "loss": 3.4259, "step": 37235 }, { "epoch": 2.530235086288898, "grad_norm": 1.0021699666976929, "learning_rate": 0.0006837206142138877, "loss": 3.575, "step": 37240 }, { "epoch": 2.5305748063595597, "grad_norm": 1.0874969959259033, "learning_rate": 0.000683678149205055, "loss": 3.3868, "step": 37245 }, { "epoch": 2.5309145264302213, "grad_norm": 0.8256418704986572, "learning_rate": 0.0006836356841962224, "loss": 3.5362, "step": 37250 }, { "epoch": 2.5312542465008834, "grad_norm": 0.8466256260871887, "learning_rate": 0.0006835932191873896, "loss": 3.6982, "step": 37255 }, { "epoch": 2.531593966571545, "grad_norm": 0.8407155275344849, "learning_rate": 0.0006835507541785568, "loss": 3.6027, "step": 37260 }, { "epoch": 2.5319336866422066, "grad_norm": 0.9318026900291443, "learning_rate": 0.0006835082891697242, "loss": 3.4181, "step": 37265 }, { "epoch": 2.5322734067128687, "grad_norm": 0.8569952249526978, "learning_rate": 0.0006834658241608914, "loss": 3.4468, "step": 37270 }, { "epoch": 2.5326131267835303, "grad_norm": 0.7275272011756897, "learning_rate": 0.0006834233591520586, "loss": 3.55, "step": 37275 }, { "epoch": 2.532952846854192, "grad_norm": 1.004207968711853, "learning_rate": 0.000683380894143226, "loss": 3.5624, "step": 37280 }, { "epoch": 2.533292566924854, "grad_norm": 2.2090513706207275, "learning_rate": 0.0006833384291343933, "loss": 3.4734, "step": 37285 }, { "epoch": 2.5336322869955157, "grad_norm": 0.7665367126464844, "learning_rate": 0.0006832959641255605, "loss": 3.5937, "step": 37290 }, { "epoch": 2.5339720070661773, "grad_norm": 0.857237696647644, "learning_rate": 0.0006832534991167279, "loss": 3.4068, "step": 37295 }, { "epoch": 2.5343117271368394, "grad_norm": 0.9589405655860901, "learning_rate": 0.0006832110341078951, "loss": 3.4182, "step": 37300 }, { "epoch": 2.534651447207501, "grad_norm": 0.8746241927146912, "learning_rate": 0.0006831685690990623, "loss": 3.6515, "step": 37305 }, { "epoch": 2.5349911672781626, "grad_norm": 0.7131111025810242, "learning_rate": 0.0006831261040902296, "loss": 3.398, "step": 37310 }, { "epoch": 2.5353308873488247, "grad_norm": 1.3584858179092407, "learning_rate": 0.0006830836390813969, "loss": 3.2563, "step": 37315 }, { "epoch": 2.5356706074194864, "grad_norm": 0.7180082201957703, "learning_rate": 0.0006830411740725643, "loss": 3.5323, "step": 37320 }, { "epoch": 2.536010327490148, "grad_norm": 0.8817541599273682, "learning_rate": 0.0006829987090637315, "loss": 3.4727, "step": 37325 }, { "epoch": 2.53635004756081, "grad_norm": 0.848643958568573, "learning_rate": 0.0006829562440548988, "loss": 3.6531, "step": 37330 }, { "epoch": 2.5366897676314717, "grad_norm": 0.7705969214439392, "learning_rate": 0.0006829137790460661, "loss": 3.5679, "step": 37335 }, { "epoch": 2.5370294877021333, "grad_norm": 0.6676970720291138, "learning_rate": 0.0006828713140372333, "loss": 3.6485, "step": 37340 }, { "epoch": 2.5373692077727954, "grad_norm": 0.967232346534729, "learning_rate": 0.0006828288490284006, "loss": 3.8227, "step": 37345 }, { "epoch": 2.537708927843457, "grad_norm": 0.8471551537513733, "learning_rate": 0.000682786384019568, "loss": 3.3796, "step": 37350 }, { "epoch": 2.5380486479141187, "grad_norm": 0.6741301417350769, "learning_rate": 0.0006827439190107352, "loss": 3.7094, "step": 37355 }, { "epoch": 2.5383883679847807, "grad_norm": 0.8954216241836548, "learning_rate": 0.0006827014540019024, "loss": 3.628, "step": 37360 }, { "epoch": 2.5387280880554424, "grad_norm": 0.7996103167533875, "learning_rate": 0.0006826589889930698, "loss": 3.3768, "step": 37365 }, { "epoch": 2.539067808126104, "grad_norm": 0.9981301426887512, "learning_rate": 0.000682616523984237, "loss": 3.5798, "step": 37370 }, { "epoch": 2.539407528196766, "grad_norm": 0.7760396003723145, "learning_rate": 0.0006825740589754042, "loss": 3.6395, "step": 37375 }, { "epoch": 2.5397472482674277, "grad_norm": 0.7435452938079834, "learning_rate": 0.0006825315939665716, "loss": 3.5326, "step": 37380 }, { "epoch": 2.5400869683380893, "grad_norm": 0.763060450553894, "learning_rate": 0.0006824891289577389, "loss": 3.6156, "step": 37385 }, { "epoch": 2.5404266884087514, "grad_norm": 0.9341066479682922, "learning_rate": 0.0006824466639489061, "loss": 3.6775, "step": 37390 }, { "epoch": 2.540766408479413, "grad_norm": 0.704109251499176, "learning_rate": 0.0006824041989400735, "loss": 3.6327, "step": 37395 }, { "epoch": 2.5411061285500747, "grad_norm": 0.7881059050559998, "learning_rate": 0.0006823617339312407, "loss": 3.8377, "step": 37400 }, { "epoch": 2.5414458486207367, "grad_norm": 0.7162206172943115, "learning_rate": 0.0006823192689224079, "loss": 3.2856, "step": 37405 }, { "epoch": 2.5417855686913984, "grad_norm": 0.8092921376228333, "learning_rate": 0.0006822768039135752, "loss": 3.5832, "step": 37410 }, { "epoch": 2.54212528876206, "grad_norm": 0.8755738139152527, "learning_rate": 0.0006822343389047425, "loss": 3.8799, "step": 37415 }, { "epoch": 2.542465008832722, "grad_norm": 0.8400694727897644, "learning_rate": 0.0006821918738959098, "loss": 3.8144, "step": 37420 }, { "epoch": 2.5428047289033837, "grad_norm": 0.8859412670135498, "learning_rate": 0.0006821494088870771, "loss": 3.6268, "step": 37425 }, { "epoch": 2.5431444489740453, "grad_norm": 0.7265583872795105, "learning_rate": 0.0006821069438782444, "loss": 3.6034, "step": 37430 }, { "epoch": 2.5434841690447074, "grad_norm": 1.0732241868972778, "learning_rate": 0.0006820644788694116, "loss": 3.5784, "step": 37435 }, { "epoch": 2.543823889115369, "grad_norm": 0.9312456846237183, "learning_rate": 0.0006820220138605789, "loss": 3.4733, "step": 37440 }, { "epoch": 2.5441636091860307, "grad_norm": 0.8461014628410339, "learning_rate": 0.0006819795488517462, "loss": 3.3997, "step": 37445 }, { "epoch": 2.5445033292566928, "grad_norm": 0.8358914256095886, "learning_rate": 0.0006819370838429134, "loss": 3.5168, "step": 37450 }, { "epoch": 2.5448430493273544, "grad_norm": 1.2743711471557617, "learning_rate": 0.0006818946188340808, "loss": 3.4885, "step": 37455 }, { "epoch": 2.545182769398016, "grad_norm": 0.8078498840332031, "learning_rate": 0.000681852153825248, "loss": 3.619, "step": 37460 }, { "epoch": 2.5455224894686777, "grad_norm": 1.0167410373687744, "learning_rate": 0.0006818096888164153, "loss": 3.5456, "step": 37465 }, { "epoch": 2.5458622095393397, "grad_norm": 0.8658894300460815, "learning_rate": 0.0006817672238075826, "loss": 3.6247, "step": 37470 }, { "epoch": 2.5462019296100014, "grad_norm": 0.8564377427101135, "learning_rate": 0.0006817247587987498, "loss": 3.4208, "step": 37475 }, { "epoch": 2.546541649680663, "grad_norm": 0.9480738043785095, "learning_rate": 0.0006816822937899171, "loss": 3.4077, "step": 37480 }, { "epoch": 2.546881369751325, "grad_norm": 0.7453556656837463, "learning_rate": 0.0006816398287810844, "loss": 3.776, "step": 37485 }, { "epoch": 2.5472210898219867, "grad_norm": 0.8725706934928894, "learning_rate": 0.0006815973637722517, "loss": 3.817, "step": 37490 }, { "epoch": 2.5475608098926483, "grad_norm": 0.7744187712669373, "learning_rate": 0.000681554898763419, "loss": 3.3928, "step": 37495 }, { "epoch": 2.54790052996331, "grad_norm": 0.9024271965026855, "learning_rate": 0.0006815124337545863, "loss": 3.5952, "step": 37500 }, { "epoch": 2.548240250033972, "grad_norm": 0.7473392486572266, "learning_rate": 0.0006814699687457535, "loss": 3.7007, "step": 37505 }, { "epoch": 2.5485799701046337, "grad_norm": 1.1972899436950684, "learning_rate": 0.0006814275037369207, "loss": 3.6493, "step": 37510 }, { "epoch": 2.5489196901752953, "grad_norm": 0.7993201017379761, "learning_rate": 0.0006813850387280881, "loss": 3.5256, "step": 37515 }, { "epoch": 2.5492594102459574, "grad_norm": 0.6989744901657104, "learning_rate": 0.0006813425737192553, "loss": 3.4708, "step": 37520 }, { "epoch": 2.549599130316619, "grad_norm": 1.0735396146774292, "learning_rate": 0.0006813001087104226, "loss": 3.7403, "step": 37525 }, { "epoch": 2.5499388503872806, "grad_norm": 0.8045423030853271, "learning_rate": 0.00068125764370159, "loss": 3.5402, "step": 37530 }, { "epoch": 2.5502785704579427, "grad_norm": 0.7121123671531677, "learning_rate": 0.0006812151786927572, "loss": 3.7096, "step": 37535 }, { "epoch": 2.5506182905286043, "grad_norm": 0.8172414898872375, "learning_rate": 0.0006811727136839244, "loss": 3.6282, "step": 37540 }, { "epoch": 2.550958010599266, "grad_norm": 0.8707689642906189, "learning_rate": 0.0006811302486750918, "loss": 3.5806, "step": 37545 }, { "epoch": 2.551297730669928, "grad_norm": 0.8858196139335632, "learning_rate": 0.000681087783666259, "loss": 3.3122, "step": 37550 }, { "epoch": 2.5516374507405897, "grad_norm": 0.8651095628738403, "learning_rate": 0.0006810453186574262, "loss": 3.269, "step": 37555 }, { "epoch": 2.5519771708112513, "grad_norm": 0.7171977162361145, "learning_rate": 0.0006810028536485936, "loss": 3.2971, "step": 37560 }, { "epoch": 2.5523168908819134, "grad_norm": 0.736710250377655, "learning_rate": 0.0006809603886397609, "loss": 3.4751, "step": 37565 }, { "epoch": 2.552656610952575, "grad_norm": 1.1818926334381104, "learning_rate": 0.0006809179236309281, "loss": 3.4499, "step": 37570 }, { "epoch": 2.5529963310232366, "grad_norm": 0.8949518203735352, "learning_rate": 0.0006808754586220954, "loss": 3.4745, "step": 37575 }, { "epoch": 2.5533360510938987, "grad_norm": 0.760863184928894, "learning_rate": 0.0006808329936132627, "loss": 3.828, "step": 37580 }, { "epoch": 2.5536757711645603, "grad_norm": 0.6896313428878784, "learning_rate": 0.0006807905286044299, "loss": 3.1893, "step": 37585 }, { "epoch": 2.554015491235222, "grad_norm": 0.8468573689460754, "learning_rate": 0.0006807480635955972, "loss": 3.5887, "step": 37590 }, { "epoch": 2.554355211305884, "grad_norm": 0.9476075768470764, "learning_rate": 0.0006807055985867646, "loss": 3.6029, "step": 37595 }, { "epoch": 2.5546949313765457, "grad_norm": 0.8353786468505859, "learning_rate": 0.0006806631335779318, "loss": 3.2934, "step": 37600 }, { "epoch": 2.5550346514472073, "grad_norm": 0.705713152885437, "learning_rate": 0.0006806206685690991, "loss": 3.6285, "step": 37605 }, { "epoch": 2.5553743715178694, "grad_norm": 0.9897499680519104, "learning_rate": 0.0006805782035602663, "loss": 3.657, "step": 37610 }, { "epoch": 2.555714091588531, "grad_norm": 1.1788630485534668, "learning_rate": 0.0006805357385514336, "loss": 3.3612, "step": 37615 }, { "epoch": 2.5560538116591927, "grad_norm": 0.8140487670898438, "learning_rate": 0.0006804932735426009, "loss": 3.6974, "step": 37620 }, { "epoch": 2.5563935317298547, "grad_norm": 1.4328302145004272, "learning_rate": 0.0006804508085337681, "loss": 3.7755, "step": 37625 }, { "epoch": 2.5567332518005164, "grad_norm": 0.9705634713172913, "learning_rate": 0.0006804083435249355, "loss": 3.4662, "step": 37630 }, { "epoch": 2.557072971871178, "grad_norm": 0.860119640827179, "learning_rate": 0.0006803658785161028, "loss": 3.717, "step": 37635 }, { "epoch": 2.55741269194184, "grad_norm": 0.9648481607437134, "learning_rate": 0.00068032341350727, "loss": 3.3909, "step": 37640 }, { "epoch": 2.5577524120125017, "grad_norm": 0.8376898765563965, "learning_rate": 0.0006802809484984372, "loss": 3.5654, "step": 37645 }, { "epoch": 2.5580921320831633, "grad_norm": 0.7412816286087036, "learning_rate": 0.0006802384834896046, "loss": 3.4574, "step": 37650 }, { "epoch": 2.5584318521538254, "grad_norm": 0.7815935611724854, "learning_rate": 0.0006801960184807718, "loss": 3.6062, "step": 37655 }, { "epoch": 2.558771572224487, "grad_norm": 0.9171143174171448, "learning_rate": 0.0006801535534719391, "loss": 3.5079, "step": 37660 }, { "epoch": 2.5591112922951487, "grad_norm": 0.7003941535949707, "learning_rate": 0.0006801110884631065, "loss": 3.2882, "step": 37665 }, { "epoch": 2.5594510123658107, "grad_norm": 0.7786941528320312, "learning_rate": 0.0006800686234542737, "loss": 3.5496, "step": 37670 }, { "epoch": 2.5597907324364724, "grad_norm": 0.7433000206947327, "learning_rate": 0.000680026158445441, "loss": 3.6646, "step": 37675 }, { "epoch": 2.560130452507134, "grad_norm": 0.9838359951972961, "learning_rate": 0.0006799836934366083, "loss": 3.6243, "step": 37680 }, { "epoch": 2.560470172577796, "grad_norm": 0.8476574420928955, "learning_rate": 0.0006799412284277755, "loss": 3.6368, "step": 37685 }, { "epoch": 2.5608098926484577, "grad_norm": 1.1009407043457031, "learning_rate": 0.0006798987634189428, "loss": 3.6687, "step": 37690 }, { "epoch": 2.5611496127191193, "grad_norm": 0.7650176882743835, "learning_rate": 0.00067985629841011, "loss": 3.5522, "step": 37695 }, { "epoch": 2.5614893327897814, "grad_norm": 0.9319393038749695, "learning_rate": 0.0006798138334012774, "loss": 3.5043, "step": 37700 }, { "epoch": 2.561829052860443, "grad_norm": 0.9441753029823303, "learning_rate": 0.0006797713683924447, "loss": 3.5682, "step": 37705 }, { "epoch": 2.5621687729311047, "grad_norm": 0.8322954773902893, "learning_rate": 0.0006797289033836119, "loss": 3.4793, "step": 37710 }, { "epoch": 2.5625084930017668, "grad_norm": 0.7383533716201782, "learning_rate": 0.0006796864383747792, "loss": 3.6736, "step": 37715 }, { "epoch": 2.5628482130724284, "grad_norm": 0.9590554237365723, "learning_rate": 0.0006796439733659465, "loss": 3.5587, "step": 37720 }, { "epoch": 2.56318793314309, "grad_norm": 1.002205491065979, "learning_rate": 0.0006796015083571137, "loss": 3.3145, "step": 37725 }, { "epoch": 2.563527653213752, "grad_norm": 0.7824024558067322, "learning_rate": 0.000679559043348281, "loss": 3.7085, "step": 37730 }, { "epoch": 2.5638673732844137, "grad_norm": 0.8643611669540405, "learning_rate": 0.0006795165783394484, "loss": 3.6592, "step": 37735 }, { "epoch": 2.5642070933550754, "grad_norm": 0.7834507822990417, "learning_rate": 0.0006794741133306156, "loss": 3.6691, "step": 37740 }, { "epoch": 2.5645468134257374, "grad_norm": 0.6759098768234253, "learning_rate": 0.0006794316483217828, "loss": 3.6876, "step": 37745 }, { "epoch": 2.564886533496399, "grad_norm": 0.8073316812515259, "learning_rate": 0.0006793891833129502, "loss": 3.3911, "step": 37750 }, { "epoch": 2.5652262535670607, "grad_norm": 0.796397864818573, "learning_rate": 0.0006793467183041174, "loss": 3.5533, "step": 37755 }, { "epoch": 2.5655659736377228, "grad_norm": 0.7748994827270508, "learning_rate": 0.0006793042532952846, "loss": 3.5981, "step": 37760 }, { "epoch": 2.5659056937083844, "grad_norm": 0.837766170501709, "learning_rate": 0.000679261788286452, "loss": 3.6042, "step": 37765 }, { "epoch": 2.566245413779046, "grad_norm": 0.9411345720291138, "learning_rate": 0.0006792193232776193, "loss": 3.4886, "step": 37770 }, { "epoch": 2.566585133849708, "grad_norm": 0.9730944037437439, "learning_rate": 0.0006791768582687865, "loss": 3.7639, "step": 37775 }, { "epoch": 2.5669248539203697, "grad_norm": 0.7867385149002075, "learning_rate": 0.0006791343932599539, "loss": 3.7003, "step": 37780 }, { "epoch": 2.5672645739910314, "grad_norm": 0.7268301248550415, "learning_rate": 0.0006790919282511211, "loss": 3.4639, "step": 37785 }, { "epoch": 2.5676042940616934, "grad_norm": 0.754845917224884, "learning_rate": 0.0006790494632422883, "loss": 3.8566, "step": 37790 }, { "epoch": 2.567944014132355, "grad_norm": 0.8006362915039062, "learning_rate": 0.0006790069982334557, "loss": 3.5566, "step": 37795 }, { "epoch": 2.5682837342030167, "grad_norm": 1.0977997779846191, "learning_rate": 0.0006789645332246229, "loss": 3.7898, "step": 37800 }, { "epoch": 2.5686234542736783, "grad_norm": 0.8390315771102905, "learning_rate": 0.0006789220682157902, "loss": 3.5081, "step": 37805 }, { "epoch": 2.5689631743443404, "grad_norm": 0.7826142311096191, "learning_rate": 0.0006788796032069575, "loss": 3.5884, "step": 37810 }, { "epoch": 2.569302894415002, "grad_norm": 0.77899169921875, "learning_rate": 0.0006788371381981248, "loss": 3.6451, "step": 37815 }, { "epoch": 2.5696426144856637, "grad_norm": 0.9358908534049988, "learning_rate": 0.000678794673189292, "loss": 3.6303, "step": 37820 }, { "epoch": 2.5699823345563257, "grad_norm": 0.8480795621871948, "learning_rate": 0.0006787522081804593, "loss": 3.5702, "step": 37825 }, { "epoch": 2.5703220546269874, "grad_norm": 0.8820386528968811, "learning_rate": 0.0006787097431716266, "loss": 3.2465, "step": 37830 }, { "epoch": 2.570661774697649, "grad_norm": 0.846269428730011, "learning_rate": 0.0006786672781627938, "loss": 3.406, "step": 37835 }, { "epoch": 2.5710014947683106, "grad_norm": 0.8806630373001099, "learning_rate": 0.0006786248131539612, "loss": 3.482, "step": 37840 }, { "epoch": 2.5713412148389727, "grad_norm": 0.9678062200546265, "learning_rate": 0.0006785823481451285, "loss": 3.4212, "step": 37845 }, { "epoch": 2.5716809349096343, "grad_norm": 0.9641690850257874, "learning_rate": 0.0006785398831362957, "loss": 3.7284, "step": 37850 }, { "epoch": 2.572020654980296, "grad_norm": 0.86585533618927, "learning_rate": 0.000678497418127463, "loss": 3.7102, "step": 37855 }, { "epoch": 2.572360375050958, "grad_norm": 0.6904707551002502, "learning_rate": 0.0006784549531186302, "loss": 3.4816, "step": 37860 }, { "epoch": 2.5727000951216197, "grad_norm": 0.9663662910461426, "learning_rate": 0.0006784124881097975, "loss": 3.7681, "step": 37865 }, { "epoch": 2.5730398151922813, "grad_norm": 0.8621360063552856, "learning_rate": 0.0006783700231009648, "loss": 3.5581, "step": 37870 }, { "epoch": 2.5733795352629434, "grad_norm": 0.8445187211036682, "learning_rate": 0.0006783275580921321, "loss": 3.6319, "step": 37875 }, { "epoch": 2.573719255333605, "grad_norm": 1.0300123691558838, "learning_rate": 0.0006782850930832994, "loss": 3.4343, "step": 37880 }, { "epoch": 2.5740589754042666, "grad_norm": 0.8465536236763, "learning_rate": 0.0006782426280744667, "loss": 3.5243, "step": 37885 }, { "epoch": 2.5743986954749287, "grad_norm": 0.7552660703659058, "learning_rate": 0.0006782001630656339, "loss": 3.6014, "step": 37890 }, { "epoch": 2.5747384155455904, "grad_norm": 1.1084339618682861, "learning_rate": 0.0006781576980568011, "loss": 3.634, "step": 37895 }, { "epoch": 2.575078135616252, "grad_norm": 0.8992234468460083, "learning_rate": 0.0006781152330479685, "loss": 3.8, "step": 37900 }, { "epoch": 2.575417855686914, "grad_norm": 0.7705115675926208, "learning_rate": 0.0006780727680391357, "loss": 3.7682, "step": 37905 }, { "epoch": 2.5757575757575757, "grad_norm": 1.2505043745040894, "learning_rate": 0.000678030303030303, "loss": 3.6418, "step": 37910 }, { "epoch": 2.5760972958282373, "grad_norm": 0.9458591938018799, "learning_rate": 0.0006779878380214704, "loss": 3.608, "step": 37915 }, { "epoch": 2.5764370158988994, "grad_norm": 0.7450234293937683, "learning_rate": 0.0006779453730126376, "loss": 3.4988, "step": 37920 }, { "epoch": 2.576776735969561, "grad_norm": 0.9315587282180786, "learning_rate": 0.0006779029080038048, "loss": 3.6405, "step": 37925 }, { "epoch": 2.5771164560402227, "grad_norm": 0.8893061876296997, "learning_rate": 0.0006778604429949722, "loss": 3.3913, "step": 37930 }, { "epoch": 2.5774561761108847, "grad_norm": 0.9576724767684937, "learning_rate": 0.0006778179779861394, "loss": 3.3531, "step": 37935 }, { "epoch": 2.5777958961815464, "grad_norm": 0.8206709027290344, "learning_rate": 0.0006777755129773066, "loss": 3.508, "step": 37940 }, { "epoch": 2.578135616252208, "grad_norm": 0.8342892527580261, "learning_rate": 0.000677733047968474, "loss": 3.4397, "step": 37945 }, { "epoch": 2.57847533632287, "grad_norm": 1.346496343612671, "learning_rate": 0.0006776905829596413, "loss": 3.4765, "step": 37950 }, { "epoch": 2.5788150563935317, "grad_norm": 0.7249206304550171, "learning_rate": 0.0006776481179508085, "loss": 3.45, "step": 37955 }, { "epoch": 2.5791547764641933, "grad_norm": 0.8977625370025635, "learning_rate": 0.0006776056529419758, "loss": 3.2067, "step": 37960 }, { "epoch": 2.5794944965348554, "grad_norm": 0.8230484127998352, "learning_rate": 0.0006775631879331431, "loss": 3.7792, "step": 37965 }, { "epoch": 2.579834216605517, "grad_norm": 0.8896654844284058, "learning_rate": 0.0006775207229243103, "loss": 3.3746, "step": 37970 }, { "epoch": 2.5801739366761787, "grad_norm": 0.8910081386566162, "learning_rate": 0.0006774782579154777, "loss": 3.4598, "step": 37975 }, { "epoch": 2.5805136567468407, "grad_norm": 0.8321078419685364, "learning_rate": 0.000677435792906645, "loss": 3.6167, "step": 37980 }, { "epoch": 2.5808533768175024, "grad_norm": 0.9067173004150391, "learning_rate": 0.0006773933278978122, "loss": 3.374, "step": 37985 }, { "epoch": 2.581193096888164, "grad_norm": 0.9578613638877869, "learning_rate": 0.0006773508628889795, "loss": 3.651, "step": 37990 }, { "epoch": 2.581532816958826, "grad_norm": 0.7611261010169983, "learning_rate": 0.0006773083978801467, "loss": 3.7245, "step": 37995 }, { "epoch": 2.5818725370294877, "grad_norm": 1.0454410314559937, "learning_rate": 0.0006772659328713141, "loss": 3.4177, "step": 38000 }, { "epoch": 2.5822122571001493, "grad_norm": 1.0576915740966797, "learning_rate": 0.0006772234678624813, "loss": 3.4216, "step": 38005 }, { "epoch": 2.5825519771708114, "grad_norm": 0.6573306322097778, "learning_rate": 0.0006771810028536486, "loss": 3.8496, "step": 38010 }, { "epoch": 2.582891697241473, "grad_norm": 0.9440877437591553, "learning_rate": 0.000677138537844816, "loss": 3.6634, "step": 38015 }, { "epoch": 2.5832314173121347, "grad_norm": 0.8111201524734497, "learning_rate": 0.0006770960728359832, "loss": 3.7969, "step": 38020 }, { "epoch": 2.5835711373827968, "grad_norm": 0.6250476241111755, "learning_rate": 0.0006770536078271504, "loss": 3.547, "step": 38025 }, { "epoch": 2.5839108574534584, "grad_norm": 0.8649698495864868, "learning_rate": 0.0006770111428183178, "loss": 3.4831, "step": 38030 }, { "epoch": 2.58425057752412, "grad_norm": 1.282321810722351, "learning_rate": 0.000676968677809485, "loss": 3.592, "step": 38035 }, { "epoch": 2.584590297594782, "grad_norm": 0.939784824848175, "learning_rate": 0.0006769262128006522, "loss": 3.7098, "step": 38040 }, { "epoch": 2.5849300176654437, "grad_norm": 0.8066626191139221, "learning_rate": 0.0006768837477918197, "loss": 3.5911, "step": 38045 }, { "epoch": 2.5852697377361054, "grad_norm": 0.901806652545929, "learning_rate": 0.0006768412827829869, "loss": 3.5094, "step": 38050 }, { "epoch": 2.5856094578067674, "grad_norm": 1.9045625925064087, "learning_rate": 0.0006767988177741541, "loss": 3.7184, "step": 38055 }, { "epoch": 2.585949177877429, "grad_norm": 0.8009050488471985, "learning_rate": 0.0006767563527653214, "loss": 3.5618, "step": 38060 }, { "epoch": 2.5862888979480907, "grad_norm": 1.0326038599014282, "learning_rate": 0.0006767138877564887, "loss": 3.7053, "step": 38065 }, { "epoch": 2.5866286180187528, "grad_norm": 0.6755421161651611, "learning_rate": 0.0006766714227476559, "loss": 3.745, "step": 38070 }, { "epoch": 2.5869683380894144, "grad_norm": 0.8439539074897766, "learning_rate": 0.0006766289577388232, "loss": 3.6241, "step": 38075 }, { "epoch": 2.587308058160076, "grad_norm": 0.8789964318275452, "learning_rate": 0.0006765864927299906, "loss": 3.3594, "step": 38080 }, { "epoch": 2.587647778230738, "grad_norm": 0.5861936211585999, "learning_rate": 0.0006765440277211578, "loss": 3.5959, "step": 38085 }, { "epoch": 2.5879874983013997, "grad_norm": 0.8412250876426697, "learning_rate": 0.0006765015627123251, "loss": 3.5651, "step": 38090 }, { "epoch": 2.5883272183720614, "grad_norm": 0.8585387468338013, "learning_rate": 0.0006764590977034923, "loss": 3.5619, "step": 38095 }, { "epoch": 2.5886669384427234, "grad_norm": 0.7352014780044556, "learning_rate": 0.0006764166326946596, "loss": 3.5438, "step": 38100 }, { "epoch": 2.589006658513385, "grad_norm": 0.884844958782196, "learning_rate": 0.0006763741676858269, "loss": 3.8177, "step": 38105 }, { "epoch": 2.5893463785840467, "grad_norm": 0.9844726324081421, "learning_rate": 0.0006763317026769941, "loss": 3.771, "step": 38110 }, { "epoch": 2.589686098654709, "grad_norm": 0.9863755106925964, "learning_rate": 0.0006762892376681615, "loss": 3.551, "step": 38115 }, { "epoch": 2.5900258187253704, "grad_norm": 0.9825115203857422, "learning_rate": 0.0006762467726593288, "loss": 3.4705, "step": 38120 }, { "epoch": 2.590365538796032, "grad_norm": 0.9233160018920898, "learning_rate": 0.000676204307650496, "loss": 3.7045, "step": 38125 }, { "epoch": 2.590705258866694, "grad_norm": 0.9878427386283875, "learning_rate": 0.0006761618426416633, "loss": 3.6086, "step": 38130 }, { "epoch": 2.5910449789373557, "grad_norm": 0.9156361222267151, "learning_rate": 0.0006761193776328306, "loss": 3.6003, "step": 38135 }, { "epoch": 2.5913846990080174, "grad_norm": 0.9913665056228638, "learning_rate": 0.0006760769126239978, "loss": 3.4969, "step": 38140 }, { "epoch": 2.591724419078679, "grad_norm": 0.8464643955230713, "learning_rate": 0.000676034447615165, "loss": 3.7677, "step": 38145 }, { "epoch": 2.592064139149341, "grad_norm": 1.024053692817688, "learning_rate": 0.0006759919826063325, "loss": 3.507, "step": 38150 }, { "epoch": 2.5924038592200027, "grad_norm": 0.7304444909095764, "learning_rate": 0.0006759495175974997, "loss": 3.556, "step": 38155 }, { "epoch": 2.5927435792906643, "grad_norm": 0.8833540081977844, "learning_rate": 0.0006759070525886669, "loss": 3.6095, "step": 38160 }, { "epoch": 2.5930832993613264, "grad_norm": 0.9241132140159607, "learning_rate": 0.0006758645875798343, "loss": 3.5069, "step": 38165 }, { "epoch": 2.593423019431988, "grad_norm": 0.8799516558647156, "learning_rate": 0.0006758221225710015, "loss": 3.4759, "step": 38170 }, { "epoch": 2.5937627395026497, "grad_norm": 1.264857292175293, "learning_rate": 0.0006757796575621687, "loss": 3.3816, "step": 38175 }, { "epoch": 2.5941024595733113, "grad_norm": 0.7831274271011353, "learning_rate": 0.000675737192553336, "loss": 3.6048, "step": 38180 }, { "epoch": 2.5944421796439734, "grad_norm": 0.8285607099533081, "learning_rate": 0.0006756947275445034, "loss": 3.3295, "step": 38185 }, { "epoch": 2.594781899714635, "grad_norm": 0.8769218325614929, "learning_rate": 0.0006756522625356706, "loss": 3.5181, "step": 38190 }, { "epoch": 2.5951216197852967, "grad_norm": 1.0275450944900513, "learning_rate": 0.000675609797526838, "loss": 3.7917, "step": 38195 }, { "epoch": 2.5954613398559587, "grad_norm": 0.7965763211250305, "learning_rate": 0.0006755673325180052, "loss": 3.3955, "step": 38200 }, { "epoch": 2.5958010599266204, "grad_norm": 0.7361631989479065, "learning_rate": 0.0006755248675091724, "loss": 3.4683, "step": 38205 }, { "epoch": 2.596140779997282, "grad_norm": 0.6477368474006653, "learning_rate": 0.0006754824025003397, "loss": 3.6982, "step": 38210 }, { "epoch": 2.596480500067944, "grad_norm": 0.8972387313842773, "learning_rate": 0.000675439937491507, "loss": 3.4413, "step": 38215 }, { "epoch": 2.5968202201386057, "grad_norm": 0.7857843637466431, "learning_rate": 0.0006753974724826743, "loss": 3.5718, "step": 38220 }, { "epoch": 2.5971599402092673, "grad_norm": 0.8097231984138489, "learning_rate": 0.0006753550074738416, "loss": 3.5958, "step": 38225 }, { "epoch": 2.5974996602799294, "grad_norm": 0.7015044689178467, "learning_rate": 0.0006753125424650089, "loss": 3.6646, "step": 38230 }, { "epoch": 2.597839380350591, "grad_norm": 0.927734911441803, "learning_rate": 0.0006752700774561761, "loss": 3.5838, "step": 38235 }, { "epoch": 2.5981791004212527, "grad_norm": 0.831920325756073, "learning_rate": 0.0006752276124473434, "loss": 3.5729, "step": 38240 }, { "epoch": 2.5985188204919147, "grad_norm": 0.7816717028617859, "learning_rate": 0.0006751851474385106, "loss": 3.6579, "step": 38245 }, { "epoch": 2.5988585405625764, "grad_norm": 1.7639280557632446, "learning_rate": 0.0006751426824296779, "loss": 3.7849, "step": 38250 }, { "epoch": 2.599198260633238, "grad_norm": 0.8847293257713318, "learning_rate": 0.0006751002174208453, "loss": 3.6717, "step": 38255 }, { "epoch": 2.5995379807039, "grad_norm": 0.7849623560905457, "learning_rate": 0.0006750577524120125, "loss": 3.6197, "step": 38260 }, { "epoch": 2.5998777007745617, "grad_norm": 0.8404530882835388, "learning_rate": 0.0006750152874031798, "loss": 3.4211, "step": 38265 }, { "epoch": 2.6002174208452233, "grad_norm": 0.9761221408843994, "learning_rate": 0.0006749728223943471, "loss": 3.8696, "step": 38270 }, { "epoch": 2.6005571409158854, "grad_norm": 0.7586433291435242, "learning_rate": 0.0006749303573855143, "loss": 3.3688, "step": 38275 }, { "epoch": 2.600896860986547, "grad_norm": 0.8199018239974976, "learning_rate": 0.0006748878923766815, "loss": 3.5302, "step": 38280 }, { "epoch": 2.6012365810572087, "grad_norm": 0.7461830377578735, "learning_rate": 0.0006748454273678489, "loss": 3.7262, "step": 38285 }, { "epoch": 2.6015763011278707, "grad_norm": 1.0927621126174927, "learning_rate": 0.0006748029623590162, "loss": 3.3659, "step": 38290 }, { "epoch": 2.6019160211985324, "grad_norm": 0.9734758138656616, "learning_rate": 0.0006747604973501834, "loss": 3.5388, "step": 38295 }, { "epoch": 2.602255741269194, "grad_norm": 1.051708459854126, "learning_rate": 0.0006747180323413508, "loss": 3.7233, "step": 38300 }, { "epoch": 2.602595461339856, "grad_norm": 1.4222198724746704, "learning_rate": 0.000674675567332518, "loss": 3.4036, "step": 38305 }, { "epoch": 2.6029351814105177, "grad_norm": 0.7939115762710571, "learning_rate": 0.0006746331023236852, "loss": 3.4957, "step": 38310 }, { "epoch": 2.6032749014811793, "grad_norm": 1.2306339740753174, "learning_rate": 0.0006745906373148526, "loss": 3.5228, "step": 38315 }, { "epoch": 2.6036146215518414, "grad_norm": 0.8348496556282043, "learning_rate": 0.0006745481723060198, "loss": 3.6153, "step": 38320 }, { "epoch": 2.603954341622503, "grad_norm": 1.0470563173294067, "learning_rate": 0.0006745057072971871, "loss": 3.7779, "step": 38325 }, { "epoch": 2.6042940616931647, "grad_norm": 0.6838955283164978, "learning_rate": 0.0006744632422883545, "loss": 3.7124, "step": 38330 }, { "epoch": 2.6046337817638268, "grad_norm": 0.8200861215591431, "learning_rate": 0.0006744207772795217, "loss": 3.6478, "step": 38335 }, { "epoch": 2.6049735018344884, "grad_norm": 0.9785839915275574, "learning_rate": 0.000674378312270689, "loss": 3.6081, "step": 38340 }, { "epoch": 2.60531322190515, "grad_norm": 0.7723677754402161, "learning_rate": 0.0006743358472618562, "loss": 3.5961, "step": 38345 }, { "epoch": 2.605652941975812, "grad_norm": 1.0171928405761719, "learning_rate": 0.0006742933822530235, "loss": 3.4563, "step": 38350 }, { "epoch": 2.6059926620464737, "grad_norm": 1.064400553703308, "learning_rate": 0.0006742509172441908, "loss": 3.4759, "step": 38355 }, { "epoch": 2.6063323821171354, "grad_norm": 0.9959901571273804, "learning_rate": 0.0006742084522353581, "loss": 3.5084, "step": 38360 }, { "epoch": 2.6066721021877974, "grad_norm": 0.9633559584617615, "learning_rate": 0.0006741659872265254, "loss": 3.3502, "step": 38365 }, { "epoch": 2.607011822258459, "grad_norm": 0.6645146012306213, "learning_rate": 0.0006741235222176927, "loss": 3.7727, "step": 38370 }, { "epoch": 2.6073515423291207, "grad_norm": 0.7200229167938232, "learning_rate": 0.0006740810572088599, "loss": 3.9959, "step": 38375 }, { "epoch": 2.6076912623997828, "grad_norm": 0.7621169686317444, "learning_rate": 0.0006740385922000271, "loss": 3.6673, "step": 38380 }, { "epoch": 2.6080309824704444, "grad_norm": 1.0611259937286377, "learning_rate": 0.0006739961271911945, "loss": 3.6608, "step": 38385 }, { "epoch": 2.608370702541106, "grad_norm": 0.7128794193267822, "learning_rate": 0.0006739536621823617, "loss": 3.4661, "step": 38390 }, { "epoch": 2.608710422611768, "grad_norm": 0.8369954824447632, "learning_rate": 0.000673911197173529, "loss": 3.8463, "step": 38395 }, { "epoch": 2.6090501426824297, "grad_norm": 0.6605397462844849, "learning_rate": 0.0006738687321646964, "loss": 3.6906, "step": 38400 }, { "epoch": 2.6093898627530914, "grad_norm": 0.7816190123558044, "learning_rate": 0.0006738262671558636, "loss": 3.5596, "step": 38405 }, { "epoch": 2.6097295828237534, "grad_norm": 0.7149060368537903, "learning_rate": 0.0006737838021470308, "loss": 3.6477, "step": 38410 }, { "epoch": 2.610069302894415, "grad_norm": 0.830822229385376, "learning_rate": 0.0006737413371381982, "loss": 3.6696, "step": 38415 }, { "epoch": 2.6104090229650767, "grad_norm": 1.726894736289978, "learning_rate": 0.0006736988721293654, "loss": 3.4857, "step": 38420 }, { "epoch": 2.610748743035739, "grad_norm": 0.841387927532196, "learning_rate": 0.0006736564071205326, "loss": 3.5815, "step": 38425 }, { "epoch": 2.6110884631064004, "grad_norm": 0.7283778190612793, "learning_rate": 0.0006736139421117001, "loss": 3.269, "step": 38430 }, { "epoch": 2.611428183177062, "grad_norm": 0.8097230792045593, "learning_rate": 0.0006735714771028673, "loss": 3.3093, "step": 38435 }, { "epoch": 2.611767903247724, "grad_norm": 1.0492138862609863, "learning_rate": 0.0006735290120940345, "loss": 3.5646, "step": 38440 }, { "epoch": 2.6121076233183858, "grad_norm": 1.0851428508758545, "learning_rate": 0.0006734865470852018, "loss": 3.3848, "step": 38445 }, { "epoch": 2.6124473433890474, "grad_norm": 0.7617583274841309, "learning_rate": 0.0006734440820763691, "loss": 3.5219, "step": 38450 }, { "epoch": 2.6127870634597095, "grad_norm": 0.7774679660797119, "learning_rate": 0.0006734016170675363, "loss": 3.5262, "step": 38455 }, { "epoch": 2.613126783530371, "grad_norm": 0.8226400017738342, "learning_rate": 0.0006733591520587036, "loss": 3.6576, "step": 38460 }, { "epoch": 2.6134665036010327, "grad_norm": 0.7864985466003418, "learning_rate": 0.000673316687049871, "loss": 3.6255, "step": 38465 }, { "epoch": 2.613806223671695, "grad_norm": 1.197155237197876, "learning_rate": 0.0006732742220410382, "loss": 3.566, "step": 38470 }, { "epoch": 2.6141459437423564, "grad_norm": 0.927047848701477, "learning_rate": 0.0006732317570322055, "loss": 3.4633, "step": 38475 }, { "epoch": 2.614485663813018, "grad_norm": 1.0048460960388184, "learning_rate": 0.0006731892920233727, "loss": 3.7472, "step": 38480 }, { "epoch": 2.6148253838836797, "grad_norm": 0.7877074480056763, "learning_rate": 0.00067314682701454, "loss": 3.4778, "step": 38485 }, { "epoch": 2.6151651039543418, "grad_norm": 1.1517329216003418, "learning_rate": 0.0006731043620057073, "loss": 3.3867, "step": 38490 }, { "epoch": 2.6155048240250034, "grad_norm": 0.884728193283081, "learning_rate": 0.0006730618969968745, "loss": 3.5884, "step": 38495 }, { "epoch": 2.615844544095665, "grad_norm": 1.135661244392395, "learning_rate": 0.0006730194319880419, "loss": 3.4396, "step": 38500 }, { "epoch": 2.616184264166327, "grad_norm": 0.9094513058662415, "learning_rate": 0.0006729769669792092, "loss": 3.1697, "step": 38505 }, { "epoch": 2.6165239842369887, "grad_norm": 1.1300067901611328, "learning_rate": 0.0006729345019703764, "loss": 3.5583, "step": 38510 }, { "epoch": 2.6168637043076504, "grad_norm": 4.949653625488281, "learning_rate": 0.0006728920369615437, "loss": 3.3656, "step": 38515 }, { "epoch": 2.617203424378312, "grad_norm": 0.8903859257698059, "learning_rate": 0.000672849571952711, "loss": 3.5788, "step": 38520 }, { "epoch": 2.617543144448974, "grad_norm": 0.8498395085334778, "learning_rate": 0.0006728071069438782, "loss": 3.4334, "step": 38525 }, { "epoch": 2.6178828645196357, "grad_norm": 1.0390172004699707, "learning_rate": 0.0006727646419350454, "loss": 3.4975, "step": 38530 }, { "epoch": 2.6182225845902973, "grad_norm": 0.6246967315673828, "learning_rate": 0.0006727221769262129, "loss": 3.2806, "step": 38535 }, { "epoch": 2.6185623046609594, "grad_norm": 0.6392166018486023, "learning_rate": 0.0006726797119173801, "loss": 3.6332, "step": 38540 }, { "epoch": 2.618902024731621, "grad_norm": 0.8629065752029419, "learning_rate": 0.0006726372469085473, "loss": 3.5773, "step": 38545 }, { "epoch": 2.6192417448022827, "grad_norm": 0.6214338541030884, "learning_rate": 0.0006725947818997147, "loss": 3.4667, "step": 38550 }, { "epoch": 2.6195814648729447, "grad_norm": 0.9043987989425659, "learning_rate": 0.0006725523168908819, "loss": 3.6781, "step": 38555 }, { "epoch": 2.6199211849436064, "grad_norm": 0.8652035593986511, "learning_rate": 0.0006725098518820491, "loss": 3.5256, "step": 38560 }, { "epoch": 2.620260905014268, "grad_norm": 0.9345974922180176, "learning_rate": 0.0006724673868732166, "loss": 3.5936, "step": 38565 }, { "epoch": 2.62060062508493, "grad_norm": 0.8196595311164856, "learning_rate": 0.0006724249218643838, "loss": 3.8122, "step": 38570 }, { "epoch": 2.6209403451555917, "grad_norm": 0.9962665438652039, "learning_rate": 0.000672382456855551, "loss": 3.5679, "step": 38575 }, { "epoch": 2.6212800652262533, "grad_norm": 1.7092270851135254, "learning_rate": 0.0006723399918467184, "loss": 3.7124, "step": 38580 }, { "epoch": 2.6216197852969154, "grad_norm": 0.903668224811554, "learning_rate": 0.0006722975268378856, "loss": 3.6283, "step": 38585 }, { "epoch": 2.621959505367577, "grad_norm": 0.8718007206916809, "learning_rate": 0.0006722550618290528, "loss": 3.5193, "step": 38590 }, { "epoch": 2.6222992254382387, "grad_norm": 1.2119618654251099, "learning_rate": 0.0006722125968202201, "loss": 3.6052, "step": 38595 }, { "epoch": 2.6226389455089008, "grad_norm": 0.8903998136520386, "learning_rate": 0.0006721701318113875, "loss": 3.6537, "step": 38600 }, { "epoch": 2.6229786655795624, "grad_norm": 0.8654888868331909, "learning_rate": 0.0006721276668025547, "loss": 3.7145, "step": 38605 }, { "epoch": 2.623318385650224, "grad_norm": 0.8859283924102783, "learning_rate": 0.000672085201793722, "loss": 3.6125, "step": 38610 }, { "epoch": 2.623658105720886, "grad_norm": 1.5566061735153198, "learning_rate": 0.0006720427367848893, "loss": 3.5142, "step": 38615 }, { "epoch": 2.6239978257915477, "grad_norm": 0.9676165580749512, "learning_rate": 0.0006720002717760565, "loss": 3.5339, "step": 38620 }, { "epoch": 2.6243375458622094, "grad_norm": 0.9105085134506226, "learning_rate": 0.0006719578067672238, "loss": 3.8566, "step": 38625 }, { "epoch": 2.6246772659328714, "grad_norm": 0.8054528832435608, "learning_rate": 0.000671915341758391, "loss": 3.7914, "step": 38630 }, { "epoch": 2.625016986003533, "grad_norm": 1.1351432800292969, "learning_rate": 0.0006718728767495584, "loss": 3.9082, "step": 38635 }, { "epoch": 2.6253567060741947, "grad_norm": 1.2112489938735962, "learning_rate": 0.0006718304117407257, "loss": 3.7037, "step": 38640 }, { "epoch": 2.6256964261448568, "grad_norm": 0.7334005832672119, "learning_rate": 0.0006717879467318929, "loss": 3.6933, "step": 38645 }, { "epoch": 2.6260361462155184, "grad_norm": 0.817758321762085, "learning_rate": 0.0006717454817230602, "loss": 3.7132, "step": 38650 }, { "epoch": 2.62637586628618, "grad_norm": 0.9176595211029053, "learning_rate": 0.0006717030167142275, "loss": 3.2833, "step": 38655 }, { "epoch": 2.626715586356842, "grad_norm": 0.941903293132782, "learning_rate": 0.0006716605517053947, "loss": 3.5427, "step": 38660 }, { "epoch": 2.6270553064275037, "grad_norm": 0.8159829378128052, "learning_rate": 0.000671618086696562, "loss": 3.3996, "step": 38665 }, { "epoch": 2.6273950264981654, "grad_norm": 1.0653454065322876, "learning_rate": 0.0006715756216877294, "loss": 3.6255, "step": 38670 }, { "epoch": 2.6277347465688274, "grad_norm": 1.0850754976272583, "learning_rate": 0.0006715331566788966, "loss": 3.4204, "step": 38675 }, { "epoch": 2.628074466639489, "grad_norm": 0.9883540868759155, "learning_rate": 0.000671490691670064, "loss": 3.6716, "step": 38680 }, { "epoch": 2.6284141867101507, "grad_norm": 0.6990470886230469, "learning_rate": 0.0006714482266612312, "loss": 3.7462, "step": 38685 }, { "epoch": 2.6287539067808128, "grad_norm": 0.7547423839569092, "learning_rate": 0.0006714057616523984, "loss": 3.6905, "step": 38690 }, { "epoch": 2.6290936268514744, "grad_norm": 1.020064115524292, "learning_rate": 0.0006713632966435657, "loss": 3.4894, "step": 38695 }, { "epoch": 2.629433346922136, "grad_norm": 0.7413952946662903, "learning_rate": 0.000671320831634733, "loss": 3.5132, "step": 38700 }, { "epoch": 2.629773066992798, "grad_norm": 0.7968048453330994, "learning_rate": 0.0006712783666259003, "loss": 3.7338, "step": 38705 }, { "epoch": 2.6301127870634597, "grad_norm": 0.6279461979866028, "learning_rate": 0.0006712359016170676, "loss": 3.7586, "step": 38710 }, { "epoch": 2.6304525071341214, "grad_norm": 1.3969736099243164, "learning_rate": 0.0006711934366082349, "loss": 3.6384, "step": 38715 }, { "epoch": 2.6307922272047835, "grad_norm": 0.8817132711410522, "learning_rate": 0.0006711509715994021, "loss": 3.562, "step": 38720 }, { "epoch": 2.631131947275445, "grad_norm": 0.7038147449493408, "learning_rate": 0.0006711085065905694, "loss": 3.5306, "step": 38725 }, { "epoch": 2.6314716673461067, "grad_norm": 1.0569236278533936, "learning_rate": 0.0006710660415817366, "loss": 3.4945, "step": 38730 }, { "epoch": 2.631811387416769, "grad_norm": 0.9933321475982666, "learning_rate": 0.0006710235765729039, "loss": 3.5395, "step": 38735 }, { "epoch": 2.6321511074874304, "grad_norm": 0.9027732014656067, "learning_rate": 0.0006709811115640713, "loss": 3.583, "step": 38740 }, { "epoch": 2.632490827558092, "grad_norm": 0.8695491552352905, "learning_rate": 0.0006709386465552385, "loss": 3.6478, "step": 38745 }, { "epoch": 2.632830547628754, "grad_norm": 0.8274911046028137, "learning_rate": 0.0006708961815464058, "loss": 3.5436, "step": 38750 }, { "epoch": 2.6331702676994158, "grad_norm": 1.0487310886383057, "learning_rate": 0.0006708537165375731, "loss": 3.2929, "step": 38755 }, { "epoch": 2.6335099877700774, "grad_norm": 0.7929543852806091, "learning_rate": 0.0006708112515287403, "loss": 3.5429, "step": 38760 }, { "epoch": 2.6338497078407395, "grad_norm": 0.7827832102775574, "learning_rate": 0.0006707687865199076, "loss": 3.4065, "step": 38765 }, { "epoch": 2.634189427911401, "grad_norm": 1.0113779306411743, "learning_rate": 0.0006707263215110749, "loss": 3.6951, "step": 38770 }, { "epoch": 2.6345291479820627, "grad_norm": 0.8027418255805969, "learning_rate": 0.0006706838565022422, "loss": 3.5684, "step": 38775 }, { "epoch": 2.634868868052725, "grad_norm": 0.8337321877479553, "learning_rate": 0.0006706413914934094, "loss": 3.7862, "step": 38780 }, { "epoch": 2.6352085881233864, "grad_norm": 0.9075456261634827, "learning_rate": 0.0006705989264845768, "loss": 3.8197, "step": 38785 }, { "epoch": 2.635548308194048, "grad_norm": 1.1221232414245605, "learning_rate": 0.000670556461475744, "loss": 3.7347, "step": 38790 }, { "epoch": 2.63588802826471, "grad_norm": 0.9430304765701294, "learning_rate": 0.0006705139964669112, "loss": 3.5805, "step": 38795 }, { "epoch": 2.6362277483353718, "grad_norm": 0.8529255390167236, "learning_rate": 0.0006704715314580786, "loss": 3.6401, "step": 38800 }, { "epoch": 2.6365674684060334, "grad_norm": 0.8448794484138489, "learning_rate": 0.0006704290664492458, "loss": 3.48, "step": 38805 }, { "epoch": 2.6369071884766955, "grad_norm": 1.2087451219558716, "learning_rate": 0.0006703866014404131, "loss": 3.4545, "step": 38810 }, { "epoch": 2.637246908547357, "grad_norm": 0.8385733366012573, "learning_rate": 0.0006703441364315805, "loss": 3.3509, "step": 38815 }, { "epoch": 2.6375866286180187, "grad_norm": 0.9531806111335754, "learning_rate": 0.0006703016714227477, "loss": 3.8468, "step": 38820 }, { "epoch": 2.6379263486886804, "grad_norm": 0.9492085576057434, "learning_rate": 0.0006702592064139149, "loss": 3.6461, "step": 38825 }, { "epoch": 2.6382660687593424, "grad_norm": 0.7910784482955933, "learning_rate": 0.0006702167414050822, "loss": 3.5708, "step": 38830 }, { "epoch": 2.638605788830004, "grad_norm": 0.7317068576812744, "learning_rate": 0.0006701742763962495, "loss": 3.7697, "step": 38835 }, { "epoch": 2.6389455089006657, "grad_norm": 0.8129504919052124, "learning_rate": 0.0006701318113874167, "loss": 3.4037, "step": 38840 }, { "epoch": 2.639285228971328, "grad_norm": 0.6434752345085144, "learning_rate": 0.0006700893463785841, "loss": 3.5381, "step": 38845 }, { "epoch": 2.6396249490419894, "grad_norm": 1.0738664865493774, "learning_rate": 0.0006700468813697514, "loss": 3.4779, "step": 38850 }, { "epoch": 2.639964669112651, "grad_norm": 0.9184576869010925, "learning_rate": 0.0006700044163609186, "loss": 3.5823, "step": 38855 }, { "epoch": 2.640304389183313, "grad_norm": 0.8333171606063843, "learning_rate": 0.0006699619513520859, "loss": 3.535, "step": 38860 }, { "epoch": 2.6406441092539747, "grad_norm": 0.8588457107543945, "learning_rate": 0.0006699194863432532, "loss": 3.579, "step": 38865 }, { "epoch": 2.6409838293246364, "grad_norm": 0.7158852815628052, "learning_rate": 0.0006698770213344204, "loss": 3.4853, "step": 38870 }, { "epoch": 2.641323549395298, "grad_norm": 0.9022689461708069, "learning_rate": 0.0006698345563255877, "loss": 3.7643, "step": 38875 }, { "epoch": 2.64166326946596, "grad_norm": 25.67415428161621, "learning_rate": 0.000669792091316755, "loss": 3.6054, "step": 38880 }, { "epoch": 2.6420029895366217, "grad_norm": 0.7990885972976685, "learning_rate": 0.0006697496263079223, "loss": 3.7794, "step": 38885 }, { "epoch": 2.6423427096072833, "grad_norm": 0.7637774348258972, "learning_rate": 0.0006697071612990896, "loss": 3.6493, "step": 38890 }, { "epoch": 2.6426824296779454, "grad_norm": 0.9356614947319031, "learning_rate": 0.0006696646962902568, "loss": 3.5832, "step": 38895 }, { "epoch": 2.643022149748607, "grad_norm": 0.9159225225448608, "learning_rate": 0.0006696222312814241, "loss": 3.2256, "step": 38900 }, { "epoch": 2.6433618698192687, "grad_norm": 0.966389000415802, "learning_rate": 0.0006695797662725914, "loss": 3.4058, "step": 38905 }, { "epoch": 2.6437015898899308, "grad_norm": 0.7136698961257935, "learning_rate": 0.0006695373012637586, "loss": 3.4978, "step": 38910 }, { "epoch": 2.6440413099605924, "grad_norm": 0.8356807827949524, "learning_rate": 0.000669494836254926, "loss": 3.5399, "step": 38915 }, { "epoch": 2.644381030031254, "grad_norm": 0.8544842600822449, "learning_rate": 0.0006694523712460933, "loss": 3.7321, "step": 38920 }, { "epoch": 2.644720750101916, "grad_norm": 0.8490853905677795, "learning_rate": 0.0006694099062372605, "loss": 3.3927, "step": 38925 }, { "epoch": 2.6450604701725777, "grad_norm": 0.8461875319480896, "learning_rate": 0.0006693674412284277, "loss": 3.3326, "step": 38930 }, { "epoch": 2.6454001902432394, "grad_norm": 0.8575462102890015, "learning_rate": 0.0006693249762195951, "loss": 3.3817, "step": 38935 }, { "epoch": 2.6457399103139014, "grad_norm": 0.858337938785553, "learning_rate": 0.0006692825112107623, "loss": 3.4148, "step": 38940 }, { "epoch": 2.646079630384563, "grad_norm": 0.7775519490242004, "learning_rate": 0.0006692400462019295, "loss": 3.6192, "step": 38945 }, { "epoch": 2.6464193504552247, "grad_norm": 1.0075974464416504, "learning_rate": 0.000669197581193097, "loss": 3.4872, "step": 38950 }, { "epoch": 2.6467590705258868, "grad_norm": 0.8635708689689636, "learning_rate": 0.0006691551161842642, "loss": 3.5131, "step": 38955 }, { "epoch": 2.6470987905965484, "grad_norm": 0.9143396615982056, "learning_rate": 0.0006691126511754314, "loss": 3.2962, "step": 38960 }, { "epoch": 2.64743851066721, "grad_norm": 0.8970433473587036, "learning_rate": 0.0006690701861665988, "loss": 3.4906, "step": 38965 }, { "epoch": 2.647778230737872, "grad_norm": 0.838435173034668, "learning_rate": 0.000669027721157766, "loss": 3.5363, "step": 38970 }, { "epoch": 2.6481179508085337, "grad_norm": 0.7684668302536011, "learning_rate": 0.0006689852561489332, "loss": 3.5185, "step": 38975 }, { "epoch": 2.6484576708791954, "grad_norm": 0.9158735275268555, "learning_rate": 0.0006689427911401005, "loss": 3.719, "step": 38980 }, { "epoch": 2.6487973909498574, "grad_norm": 0.908362090587616, "learning_rate": 0.0006689003261312679, "loss": 3.4921, "step": 38985 }, { "epoch": 2.649137111020519, "grad_norm": 0.8787447214126587, "learning_rate": 0.0006688578611224351, "loss": 3.5257, "step": 38990 }, { "epoch": 2.6494768310911807, "grad_norm": 0.9223270416259766, "learning_rate": 0.0006688153961136024, "loss": 3.3577, "step": 38995 }, { "epoch": 2.649816551161843, "grad_norm": 0.9562312960624695, "learning_rate": 0.0006687729311047697, "loss": 3.695, "step": 39000 }, { "epoch": 2.6501562712325044, "grad_norm": 1.0691815614700317, "learning_rate": 0.0006687304660959369, "loss": 3.4451, "step": 39005 }, { "epoch": 2.650495991303166, "grad_norm": 0.8978657722473145, "learning_rate": 0.0006686880010871042, "loss": 3.6595, "step": 39010 }, { "epoch": 2.650835711373828, "grad_norm": 0.9087507724761963, "learning_rate": 0.0006686455360782714, "loss": 3.6368, "step": 39015 }, { "epoch": 2.6511754314444897, "grad_norm": 0.9738811254501343, "learning_rate": 0.0006686030710694389, "loss": 3.3309, "step": 39020 }, { "epoch": 2.6515151515151514, "grad_norm": 0.7656195759773254, "learning_rate": 0.0006685606060606061, "loss": 3.6944, "step": 39025 }, { "epoch": 2.6518548715858135, "grad_norm": 0.8604965806007385, "learning_rate": 0.0006685181410517733, "loss": 3.7169, "step": 39030 }, { "epoch": 2.652194591656475, "grad_norm": 0.8998962044715881, "learning_rate": 0.0006684756760429407, "loss": 3.7117, "step": 39035 }, { "epoch": 2.6525343117271367, "grad_norm": 0.9582512974739075, "learning_rate": 0.0006684332110341079, "loss": 3.646, "step": 39040 }, { "epoch": 2.652874031797799, "grad_norm": 0.8939792513847351, "learning_rate": 0.0006683907460252751, "loss": 3.4811, "step": 39045 }, { "epoch": 2.6532137518684604, "grad_norm": 0.901568591594696, "learning_rate": 0.0006683482810164426, "loss": 3.4898, "step": 39050 }, { "epoch": 2.653553471939122, "grad_norm": 1.177497386932373, "learning_rate": 0.0006683058160076098, "loss": 3.1865, "step": 39055 }, { "epoch": 2.653893192009784, "grad_norm": 1.246700406074524, "learning_rate": 0.000668263350998777, "loss": 3.7413, "step": 39060 }, { "epoch": 2.6542329120804458, "grad_norm": 0.8685649037361145, "learning_rate": 0.0006682208859899444, "loss": 3.2871, "step": 39065 }, { "epoch": 2.6545726321511074, "grad_norm": 0.7652031779289246, "learning_rate": 0.0006681784209811116, "loss": 3.8107, "step": 39070 }, { "epoch": 2.6549123522217695, "grad_norm": 1.0441315174102783, "learning_rate": 0.0006681359559722788, "loss": 3.4456, "step": 39075 }, { "epoch": 2.655252072292431, "grad_norm": 0.8567469120025635, "learning_rate": 0.0006680934909634461, "loss": 3.5856, "step": 39080 }, { "epoch": 2.6555917923630927, "grad_norm": 0.7644731402397156, "learning_rate": 0.0006680510259546135, "loss": 3.6342, "step": 39085 }, { "epoch": 2.655931512433755, "grad_norm": 0.6943089962005615, "learning_rate": 0.0006680085609457807, "loss": 3.5026, "step": 39090 }, { "epoch": 2.6562712325044164, "grad_norm": 0.7880464196205139, "learning_rate": 0.000667966095936948, "loss": 3.5199, "step": 39095 }, { "epoch": 2.656610952575078, "grad_norm": 1.070072889328003, "learning_rate": 0.0006679236309281153, "loss": 3.5301, "step": 39100 }, { "epoch": 2.65695067264574, "grad_norm": 0.7754001617431641, "learning_rate": 0.0006678811659192825, "loss": 3.4531, "step": 39105 }, { "epoch": 2.6572903927164018, "grad_norm": 0.9749760031700134, "learning_rate": 0.0006678387009104498, "loss": 3.2918, "step": 39110 }, { "epoch": 2.6576301127870634, "grad_norm": 0.9373928904533386, "learning_rate": 0.000667796235901617, "loss": 3.4242, "step": 39115 }, { "epoch": 2.6579698328577255, "grad_norm": 0.8050748109817505, "learning_rate": 0.0006677537708927844, "loss": 3.7281, "step": 39120 }, { "epoch": 2.658309552928387, "grad_norm": 0.8123527765274048, "learning_rate": 0.0006677113058839517, "loss": 3.2858, "step": 39125 }, { "epoch": 2.6586492729990487, "grad_norm": 0.9150041937828064, "learning_rate": 0.0006676688408751189, "loss": 3.4277, "step": 39130 }, { "epoch": 2.658988993069711, "grad_norm": 0.8619819283485413, "learning_rate": 0.0006676263758662862, "loss": 3.6669, "step": 39135 }, { "epoch": 2.6593287131403724, "grad_norm": 0.7923856973648071, "learning_rate": 0.0006675839108574535, "loss": 3.4763, "step": 39140 }, { "epoch": 2.659668433211034, "grad_norm": 0.816271960735321, "learning_rate": 0.0006675414458486207, "loss": 3.3649, "step": 39145 }, { "epoch": 2.660008153281696, "grad_norm": 0.7809050679206848, "learning_rate": 0.000667498980839788, "loss": 3.26, "step": 39150 }, { "epoch": 2.660347873352358, "grad_norm": 0.7251778841018677, "learning_rate": 0.0006674565158309554, "loss": 3.9465, "step": 39155 }, { "epoch": 2.6606875934230194, "grad_norm": 0.9981110692024231, "learning_rate": 0.0006674140508221226, "loss": 3.4462, "step": 39160 }, { "epoch": 2.661027313493681, "grad_norm": 1.0442508459091187, "learning_rate": 0.0006673715858132898, "loss": 3.7769, "step": 39165 }, { "epoch": 2.661367033564343, "grad_norm": 0.9589933156967163, "learning_rate": 0.0006673291208044572, "loss": 3.5334, "step": 39170 }, { "epoch": 2.6617067536350048, "grad_norm": 0.8386595845222473, "learning_rate": 0.0006672866557956244, "loss": 3.4662, "step": 39175 }, { "epoch": 2.6620464737056664, "grad_norm": 0.823900580406189, "learning_rate": 0.0006672441907867916, "loss": 3.4005, "step": 39180 }, { "epoch": 2.6623861937763285, "grad_norm": 0.8166152834892273, "learning_rate": 0.000667201725777959, "loss": 3.284, "step": 39185 }, { "epoch": 2.66272591384699, "grad_norm": 1.0028407573699951, "learning_rate": 0.0006671592607691263, "loss": 3.4659, "step": 39190 }, { "epoch": 2.6630656339176517, "grad_norm": 0.8185228109359741, "learning_rate": 0.0006671167957602935, "loss": 3.6145, "step": 39195 }, { "epoch": 2.663405353988314, "grad_norm": 0.9407854080200195, "learning_rate": 0.0006670743307514609, "loss": 3.7606, "step": 39200 }, { "epoch": 2.6637450740589754, "grad_norm": 1.0144007205963135, "learning_rate": 0.0006670318657426281, "loss": 3.4303, "step": 39205 }, { "epoch": 2.664084794129637, "grad_norm": 0.9002841114997864, "learning_rate": 0.0006669894007337953, "loss": 3.5891, "step": 39210 }, { "epoch": 2.6644245142002987, "grad_norm": 0.8895481824874878, "learning_rate": 0.0006669469357249627, "loss": 3.4951, "step": 39215 }, { "epoch": 2.6647642342709608, "grad_norm": 0.8655336499214172, "learning_rate": 0.0006669044707161299, "loss": 3.3381, "step": 39220 }, { "epoch": 2.6651039543416224, "grad_norm": 0.7400758266448975, "learning_rate": 0.0006668620057072972, "loss": 3.7898, "step": 39225 }, { "epoch": 2.665443674412284, "grad_norm": 0.9576754570007324, "learning_rate": 0.0006668195406984645, "loss": 3.4518, "step": 39230 }, { "epoch": 2.665783394482946, "grad_norm": 0.9104257225990295, "learning_rate": 0.0006667770756896318, "loss": 3.4648, "step": 39235 }, { "epoch": 2.6661231145536077, "grad_norm": 0.9224026799201965, "learning_rate": 0.000666734610680799, "loss": 3.4527, "step": 39240 }, { "epoch": 2.6664628346242694, "grad_norm": 0.7793877124786377, "learning_rate": 0.0006666921456719663, "loss": 3.53, "step": 39245 }, { "epoch": 2.6668025546949314, "grad_norm": 0.7612549066543579, "learning_rate": 0.0006666496806631336, "loss": 3.6605, "step": 39250 }, { "epoch": 2.667142274765593, "grad_norm": 0.685893177986145, "learning_rate": 0.0006666072156543008, "loss": 3.3682, "step": 39255 }, { "epoch": 2.6674819948362547, "grad_norm": 1.1559594869613647, "learning_rate": 0.0006665647506454682, "loss": 3.7983, "step": 39260 }, { "epoch": 2.6678217149069168, "grad_norm": 0.9116805195808411, "learning_rate": 0.0006665222856366355, "loss": 3.3838, "step": 39265 }, { "epoch": 2.6681614349775784, "grad_norm": 0.8146975636482239, "learning_rate": 0.0006664798206278027, "loss": 3.4497, "step": 39270 }, { "epoch": 2.66850115504824, "grad_norm": 0.7309738993644714, "learning_rate": 0.00066643735561897, "loss": 3.7783, "step": 39275 }, { "epoch": 2.668840875118902, "grad_norm": 1.1440026760101318, "learning_rate": 0.0006663948906101372, "loss": 3.624, "step": 39280 }, { "epoch": 2.6691805951895637, "grad_norm": 1.3363008499145508, "learning_rate": 0.0006663524256013045, "loss": 3.8108, "step": 39285 }, { "epoch": 2.6695203152602254, "grad_norm": 0.9720861315727234, "learning_rate": 0.0006663099605924718, "loss": 3.5352, "step": 39290 }, { "epoch": 2.6698600353308874, "grad_norm": 0.8297094106674194, "learning_rate": 0.0006662674955836391, "loss": 3.7056, "step": 39295 }, { "epoch": 2.670199755401549, "grad_norm": 0.8896045684814453, "learning_rate": 0.0006662250305748064, "loss": 3.561, "step": 39300 }, { "epoch": 2.6705394754722107, "grad_norm": 0.8745763301849365, "learning_rate": 0.0006661825655659737, "loss": 3.4967, "step": 39305 }, { "epoch": 2.670879195542873, "grad_norm": 0.8838697671890259, "learning_rate": 0.0006661401005571409, "loss": 3.5506, "step": 39310 }, { "epoch": 2.6712189156135344, "grad_norm": 0.9359268546104431, "learning_rate": 0.0006660976355483081, "loss": 3.6081, "step": 39315 }, { "epoch": 2.671558635684196, "grad_norm": 0.9610401391983032, "learning_rate": 0.0006660551705394755, "loss": 3.8735, "step": 39320 }, { "epoch": 2.671898355754858, "grad_norm": 0.7464884519577026, "learning_rate": 0.0006660127055306427, "loss": 3.6112, "step": 39325 }, { "epoch": 2.6722380758255198, "grad_norm": 0.7213006615638733, "learning_rate": 0.00066597024052181, "loss": 3.6616, "step": 39330 }, { "epoch": 2.6725777958961814, "grad_norm": 0.8091405034065247, "learning_rate": 0.0006659277755129774, "loss": 3.4883, "step": 39335 }, { "epoch": 2.6729175159668435, "grad_norm": 0.9522737264633179, "learning_rate": 0.0006658853105041446, "loss": 3.7208, "step": 39340 }, { "epoch": 2.673257236037505, "grad_norm": 1.0663467645645142, "learning_rate": 0.0006658428454953118, "loss": 3.4942, "step": 39345 }, { "epoch": 2.6735969561081667, "grad_norm": 0.8880877494812012, "learning_rate": 0.0006658003804864792, "loss": 3.5162, "step": 39350 }, { "epoch": 2.673936676178829, "grad_norm": 0.7805944085121155, "learning_rate": 0.0006657579154776464, "loss": 3.6987, "step": 39355 }, { "epoch": 2.6742763962494904, "grad_norm": 0.7099069952964783, "learning_rate": 0.0006657154504688137, "loss": 3.3643, "step": 39360 }, { "epoch": 2.674616116320152, "grad_norm": 0.7927857041358948, "learning_rate": 0.000665672985459981, "loss": 3.4248, "step": 39365 }, { "epoch": 2.674955836390814, "grad_norm": 0.8989330530166626, "learning_rate": 0.0006656305204511483, "loss": 3.5511, "step": 39370 }, { "epoch": 2.6752955564614758, "grad_norm": 1.0804892778396606, "learning_rate": 0.0006655880554423156, "loss": 3.6852, "step": 39375 }, { "epoch": 2.6756352765321374, "grad_norm": 0.7740371227264404, "learning_rate": 0.0006655455904334828, "loss": 3.4718, "step": 39380 }, { "epoch": 2.6759749966027995, "grad_norm": 0.7535105347633362, "learning_rate": 0.0006655031254246501, "loss": 3.4063, "step": 39385 }, { "epoch": 2.676314716673461, "grad_norm": 0.8613489270210266, "learning_rate": 0.0006654606604158174, "loss": 3.716, "step": 39390 }, { "epoch": 2.6766544367441227, "grad_norm": 0.9653170704841614, "learning_rate": 0.0006654181954069846, "loss": 3.7586, "step": 39395 }, { "epoch": 2.676994156814785, "grad_norm": 0.7744818329811096, "learning_rate": 0.000665375730398152, "loss": 3.7027, "step": 39400 }, { "epoch": 2.6773338768854464, "grad_norm": 0.8145372867584229, "learning_rate": 0.0006653332653893193, "loss": 3.4915, "step": 39405 }, { "epoch": 2.677673596956108, "grad_norm": 0.9243924021720886, "learning_rate": 0.0006652908003804865, "loss": 3.3064, "step": 39410 }, { "epoch": 2.67801331702677, "grad_norm": 0.7611715197563171, "learning_rate": 0.0006652483353716537, "loss": 3.6374, "step": 39415 }, { "epoch": 2.6783530370974318, "grad_norm": 0.9148041605949402, "learning_rate": 0.0006652058703628211, "loss": 3.6513, "step": 39420 }, { "epoch": 2.6786927571680934, "grad_norm": 0.954589307308197, "learning_rate": 0.0006651634053539883, "loss": 3.6613, "step": 39425 }, { "epoch": 2.6790324772387555, "grad_norm": 1.2141176462173462, "learning_rate": 0.0006651209403451555, "loss": 3.6164, "step": 39430 }, { "epoch": 2.679372197309417, "grad_norm": 0.7941005229949951, "learning_rate": 0.000665078475336323, "loss": 3.5494, "step": 39435 }, { "epoch": 2.6797119173800787, "grad_norm": 0.7552561163902283, "learning_rate": 0.0006650360103274902, "loss": 3.5026, "step": 39440 }, { "epoch": 2.680051637450741, "grad_norm": 0.8646746873855591, "learning_rate": 0.0006649935453186574, "loss": 3.6857, "step": 39445 }, { "epoch": 2.6803913575214025, "grad_norm": 0.719590425491333, "learning_rate": 0.0006649510803098248, "loss": 3.7175, "step": 39450 }, { "epoch": 2.680731077592064, "grad_norm": 0.8356661200523376, "learning_rate": 0.000664908615300992, "loss": 3.5311, "step": 39455 }, { "epoch": 2.681070797662726, "grad_norm": 0.8313671350479126, "learning_rate": 0.0006648661502921592, "loss": 3.5614, "step": 39460 }, { "epoch": 2.681410517733388, "grad_norm": 0.8496318459510803, "learning_rate": 0.0006648236852833265, "loss": 3.4067, "step": 39465 }, { "epoch": 2.6817502378040494, "grad_norm": 0.7232120037078857, "learning_rate": 0.0006647812202744939, "loss": 3.6396, "step": 39470 }, { "epoch": 2.6820899578747115, "grad_norm": 0.7688809037208557, "learning_rate": 0.0006647387552656611, "loss": 3.4844, "step": 39475 }, { "epoch": 2.682429677945373, "grad_norm": 0.7387804388999939, "learning_rate": 0.0006646962902568284, "loss": 3.4331, "step": 39480 }, { "epoch": 2.6827693980160348, "grad_norm": 1.0792258977890015, "learning_rate": 0.0006646538252479957, "loss": 3.6457, "step": 39485 }, { "epoch": 2.683109118086697, "grad_norm": 0.8235520720481873, "learning_rate": 0.0006646113602391629, "loss": 3.7389, "step": 39490 }, { "epoch": 2.6834488381573585, "grad_norm": 0.9406498074531555, "learning_rate": 0.0006645688952303302, "loss": 3.4926, "step": 39495 }, { "epoch": 2.68378855822802, "grad_norm": 0.8052217960357666, "learning_rate": 0.0006645264302214975, "loss": 3.4535, "step": 39500 }, { "epoch": 2.6841282782986817, "grad_norm": 1.107268214225769, "learning_rate": 0.0006644839652126648, "loss": 3.5002, "step": 39505 }, { "epoch": 2.684467998369344, "grad_norm": 0.6901599168777466, "learning_rate": 0.0006644415002038321, "loss": 3.6099, "step": 39510 }, { "epoch": 2.6848077184400054, "grad_norm": 0.9755265712738037, "learning_rate": 0.0006643990351949993, "loss": 3.4136, "step": 39515 }, { "epoch": 2.685147438510667, "grad_norm": 1.1893476247787476, "learning_rate": 0.0006643565701861666, "loss": 3.3938, "step": 39520 }, { "epoch": 2.685487158581329, "grad_norm": 1.077042818069458, "learning_rate": 0.0006643141051773339, "loss": 3.4097, "step": 39525 }, { "epoch": 2.6858268786519908, "grad_norm": 0.9407221078872681, "learning_rate": 0.0006642716401685011, "loss": 3.5794, "step": 39530 }, { "epoch": 2.6861665987226524, "grad_norm": 0.814281165599823, "learning_rate": 0.0006642291751596684, "loss": 3.7497, "step": 39535 }, { "epoch": 2.6865063187933145, "grad_norm": 0.9309245347976685, "learning_rate": 0.0006641867101508358, "loss": 3.5242, "step": 39540 }, { "epoch": 2.686846038863976, "grad_norm": 1.1620299816131592, "learning_rate": 0.000664144245142003, "loss": 3.3924, "step": 39545 }, { "epoch": 2.6871857589346377, "grad_norm": 1.1696833372116089, "learning_rate": 0.0006641017801331703, "loss": 3.5454, "step": 39550 }, { "epoch": 2.6875254790052994, "grad_norm": 1.1456794738769531, "learning_rate": 0.0006640593151243376, "loss": 3.6153, "step": 39555 }, { "epoch": 2.6878651990759614, "grad_norm": 0.8372112512588501, "learning_rate": 0.0006640168501155048, "loss": 3.4789, "step": 39560 }, { "epoch": 2.688204919146623, "grad_norm": 0.7948510646820068, "learning_rate": 0.000663974385106672, "loss": 3.6489, "step": 39565 }, { "epoch": 2.6885446392172847, "grad_norm": 1.6276192665100098, "learning_rate": 0.0006639319200978394, "loss": 3.5652, "step": 39570 }, { "epoch": 2.688884359287947, "grad_norm": 0.7661606669425964, "learning_rate": 0.0006638894550890067, "loss": 3.5418, "step": 39575 }, { "epoch": 2.6892240793586084, "grad_norm": 0.8404340744018555, "learning_rate": 0.0006638469900801739, "loss": 3.4948, "step": 39580 }, { "epoch": 2.68956379942927, "grad_norm": 1.0753207206726074, "learning_rate": 0.0006638045250713413, "loss": 3.854, "step": 39585 }, { "epoch": 2.689903519499932, "grad_norm": 0.9387730360031128, "learning_rate": 0.0006637620600625085, "loss": 3.6577, "step": 39590 }, { "epoch": 2.6902432395705937, "grad_norm": 0.7888733148574829, "learning_rate": 0.0006637195950536757, "loss": 3.6716, "step": 39595 }, { "epoch": 2.6905829596412554, "grad_norm": 0.977247953414917, "learning_rate": 0.000663677130044843, "loss": 3.461, "step": 39600 }, { "epoch": 2.6909226797119175, "grad_norm": 1.0005854368209839, "learning_rate": 0.0006636346650360103, "loss": 3.559, "step": 39605 }, { "epoch": 2.691262399782579, "grad_norm": 0.9771857857704163, "learning_rate": 0.0006635922000271776, "loss": 3.5618, "step": 39610 }, { "epoch": 2.6916021198532407, "grad_norm": 0.7702210545539856, "learning_rate": 0.000663549735018345, "loss": 3.7666, "step": 39615 }, { "epoch": 2.691941839923903, "grad_norm": 0.7041729092597961, "learning_rate": 0.0006635072700095122, "loss": 3.6083, "step": 39620 }, { "epoch": 2.6922815599945644, "grad_norm": 0.9051448106765747, "learning_rate": 0.0006634648050006794, "loss": 3.5937, "step": 39625 }, { "epoch": 2.692621280065226, "grad_norm": 0.9443694353103638, "learning_rate": 0.0006634223399918467, "loss": 3.521, "step": 39630 }, { "epoch": 2.692961000135888, "grad_norm": 0.9342772960662842, "learning_rate": 0.000663379874983014, "loss": 3.4778, "step": 39635 }, { "epoch": 2.6933007202065498, "grad_norm": 0.9229065775871277, "learning_rate": 0.0006633374099741812, "loss": 3.4587, "step": 39640 }, { "epoch": 2.6936404402772114, "grad_norm": 0.9385390281677246, "learning_rate": 0.0006632949449653486, "loss": 3.3121, "step": 39645 }, { "epoch": 2.6939801603478735, "grad_norm": 0.8475236892700195, "learning_rate": 0.0006632524799565159, "loss": 3.5958, "step": 39650 }, { "epoch": 2.694319880418535, "grad_norm": 0.8443287014961243, "learning_rate": 0.0006632100149476831, "loss": 3.4901, "step": 39655 }, { "epoch": 2.6946596004891967, "grad_norm": 1.015817642211914, "learning_rate": 0.0006631675499388504, "loss": 3.6771, "step": 39660 }, { "epoch": 2.694999320559859, "grad_norm": 0.7274404168128967, "learning_rate": 0.0006631250849300176, "loss": 3.6129, "step": 39665 }, { "epoch": 2.6953390406305204, "grad_norm": 0.7245690822601318, "learning_rate": 0.0006630826199211849, "loss": 3.4489, "step": 39670 }, { "epoch": 2.695678760701182, "grad_norm": 1.0628036260604858, "learning_rate": 0.0006630401549123523, "loss": 3.3562, "step": 39675 }, { "epoch": 2.696018480771844, "grad_norm": 0.8156663179397583, "learning_rate": 0.0006629976899035195, "loss": 3.4015, "step": 39680 }, { "epoch": 2.6963582008425058, "grad_norm": 0.961750328540802, "learning_rate": 0.0006629552248946868, "loss": 3.4847, "step": 39685 }, { "epoch": 2.6966979209131674, "grad_norm": 0.8796713948249817, "learning_rate": 0.0006629127598858541, "loss": 3.4862, "step": 39690 }, { "epoch": 2.6970376409838295, "grad_norm": 0.7819221615791321, "learning_rate": 0.0006628702948770213, "loss": 3.5789, "step": 39695 }, { "epoch": 2.697377361054491, "grad_norm": 0.7031370401382446, "learning_rate": 0.0006628278298681887, "loss": 3.6516, "step": 39700 }, { "epoch": 2.6977170811251527, "grad_norm": 0.7477797865867615, "learning_rate": 0.0006627853648593559, "loss": 3.4195, "step": 39705 }, { "epoch": 2.698056801195815, "grad_norm": 0.8148168325424194, "learning_rate": 0.0006627428998505232, "loss": 3.5894, "step": 39710 }, { "epoch": 2.6983965212664764, "grad_norm": 1.1020723581314087, "learning_rate": 0.0006627004348416905, "loss": 3.4635, "step": 39715 }, { "epoch": 2.698736241337138, "grad_norm": 0.9621317982673645, "learning_rate": 0.0006626579698328578, "loss": 3.752, "step": 39720 }, { "epoch": 2.6990759614078, "grad_norm": 0.6750820279121399, "learning_rate": 0.000662615504824025, "loss": 3.5458, "step": 39725 }, { "epoch": 2.699415681478462, "grad_norm": 0.6667659282684326, "learning_rate": 0.0006625730398151923, "loss": 3.4146, "step": 39730 }, { "epoch": 2.6997554015491234, "grad_norm": 1.2838027477264404, "learning_rate": 0.0006625305748063596, "loss": 3.563, "step": 39735 }, { "epoch": 2.7000951216197855, "grad_norm": 0.7612210512161255, "learning_rate": 0.0006624881097975268, "loss": 3.2695, "step": 39740 }, { "epoch": 2.700434841690447, "grad_norm": 0.7487524747848511, "learning_rate": 0.0006624456447886942, "loss": 3.6091, "step": 39745 }, { "epoch": 2.7007745617611087, "grad_norm": 0.9047382473945618, "learning_rate": 0.0006624031797798615, "loss": 3.6756, "step": 39750 }, { "epoch": 2.701114281831771, "grad_norm": 0.7021672129631042, "learning_rate": 0.0006623607147710287, "loss": 3.5139, "step": 39755 }, { "epoch": 2.7014540019024325, "grad_norm": 1.0043939352035522, "learning_rate": 0.000662318249762196, "loss": 3.6311, "step": 39760 }, { "epoch": 2.701793721973094, "grad_norm": 0.7244371771812439, "learning_rate": 0.0006622757847533632, "loss": 3.5967, "step": 39765 }, { "epoch": 2.702133442043756, "grad_norm": 0.9074171185493469, "learning_rate": 0.0006622333197445305, "loss": 3.47, "step": 39770 }, { "epoch": 2.702473162114418, "grad_norm": 0.8315871953964233, "learning_rate": 0.0006621908547356978, "loss": 3.3909, "step": 39775 }, { "epoch": 2.7028128821850794, "grad_norm": 1.0892366170883179, "learning_rate": 0.0006621483897268651, "loss": 3.3348, "step": 39780 }, { "epoch": 2.7031526022557415, "grad_norm": 0.9583085179328918, "learning_rate": 0.0006621059247180324, "loss": 3.4619, "step": 39785 }, { "epoch": 2.703492322326403, "grad_norm": 0.9405403137207031, "learning_rate": 0.0006620634597091997, "loss": 3.7066, "step": 39790 }, { "epoch": 2.7038320423970648, "grad_norm": 1.1698927879333496, "learning_rate": 0.0006620209947003669, "loss": 3.5143, "step": 39795 }, { "epoch": 2.704171762467727, "grad_norm": 1.123669147491455, "learning_rate": 0.0006619785296915341, "loss": 3.5656, "step": 39800 }, { "epoch": 2.7045114825383885, "grad_norm": 0.8114689588546753, "learning_rate": 0.0006619360646827015, "loss": 3.4688, "step": 39805 }, { "epoch": 2.70485120260905, "grad_norm": 0.8444017767906189, "learning_rate": 0.0006618935996738687, "loss": 3.5631, "step": 39810 }, { "epoch": 2.705190922679712, "grad_norm": 0.8161699771881104, "learning_rate": 0.000661851134665036, "loss": 3.4663, "step": 39815 }, { "epoch": 2.705530642750374, "grad_norm": 0.8161278367042542, "learning_rate": 0.0006618086696562034, "loss": 3.6632, "step": 39820 }, { "epoch": 2.7058703628210354, "grad_norm": 0.8805455565452576, "learning_rate": 0.0006617662046473706, "loss": 3.195, "step": 39825 }, { "epoch": 2.7062100828916975, "grad_norm": 0.8140993714332581, "learning_rate": 0.0006617237396385378, "loss": 3.4359, "step": 39830 }, { "epoch": 2.706549802962359, "grad_norm": 0.9666776061058044, "learning_rate": 0.0006616812746297052, "loss": 3.5496, "step": 39835 }, { "epoch": 2.7068895230330208, "grad_norm": 0.9289587736129761, "learning_rate": 0.0006616388096208724, "loss": 3.3168, "step": 39840 }, { "epoch": 2.7072292431036824, "grad_norm": 0.7441180944442749, "learning_rate": 0.0006615963446120396, "loss": 3.4691, "step": 39845 }, { "epoch": 2.7075689631743445, "grad_norm": 0.8969364166259766, "learning_rate": 0.0006615538796032071, "loss": 3.6486, "step": 39850 }, { "epoch": 2.707908683245006, "grad_norm": 0.8045446872711182, "learning_rate": 0.0006615114145943743, "loss": 3.4694, "step": 39855 }, { "epoch": 2.7082484033156677, "grad_norm": 0.8576960563659668, "learning_rate": 0.0006614689495855415, "loss": 3.5353, "step": 39860 }, { "epoch": 2.70858812338633, "grad_norm": 0.8782486319541931, "learning_rate": 0.0006614264845767088, "loss": 3.6126, "step": 39865 }, { "epoch": 2.7089278434569914, "grad_norm": 1.0336476564407349, "learning_rate": 0.0006613840195678761, "loss": 3.6594, "step": 39870 }, { "epoch": 2.709267563527653, "grad_norm": 0.8430631756782532, "learning_rate": 0.0006613415545590433, "loss": 3.48, "step": 39875 }, { "epoch": 2.709607283598315, "grad_norm": 1.1418321132659912, "learning_rate": 0.0006612990895502106, "loss": 3.5315, "step": 39880 }, { "epoch": 2.709947003668977, "grad_norm": 0.7815362811088562, "learning_rate": 0.000661256624541378, "loss": 3.68, "step": 39885 }, { "epoch": 2.7102867237396384, "grad_norm": 0.7009844183921814, "learning_rate": 0.0006612141595325452, "loss": 3.6887, "step": 39890 }, { "epoch": 2.7106264438103, "grad_norm": 0.7809376120567322, "learning_rate": 0.0006611716945237125, "loss": 3.5866, "step": 39895 }, { "epoch": 2.710966163880962, "grad_norm": 0.8068056702613831, "learning_rate": 0.0006611292295148797, "loss": 3.3631, "step": 39900 }, { "epoch": 2.7113058839516238, "grad_norm": 1.1175657510757446, "learning_rate": 0.000661086764506047, "loss": 3.6421, "step": 39905 }, { "epoch": 2.7116456040222854, "grad_norm": 0.8177772760391235, "learning_rate": 0.0006610442994972143, "loss": 3.5571, "step": 39910 }, { "epoch": 2.7119853240929475, "grad_norm": 0.8902101516723633, "learning_rate": 0.0006610018344883815, "loss": 3.5564, "step": 39915 }, { "epoch": 2.712325044163609, "grad_norm": 0.736126720905304, "learning_rate": 0.0006609593694795489, "loss": 3.7114, "step": 39920 }, { "epoch": 2.7126647642342707, "grad_norm": 0.7346240878105164, "learning_rate": 0.0006609169044707162, "loss": 3.7131, "step": 39925 }, { "epoch": 2.713004484304933, "grad_norm": 1.0551403760910034, "learning_rate": 0.0006608744394618834, "loss": 3.7793, "step": 39930 }, { "epoch": 2.7133442043755944, "grad_norm": 1.1357228755950928, "learning_rate": 0.0006608319744530507, "loss": 3.674, "step": 39935 }, { "epoch": 2.713683924446256, "grad_norm": 0.8803273439407349, "learning_rate": 0.000660789509444218, "loss": 3.5907, "step": 39940 }, { "epoch": 2.714023644516918, "grad_norm": 0.8253132104873657, "learning_rate": 0.0006607470444353852, "loss": 3.2503, "step": 39945 }, { "epoch": 2.7143633645875798, "grad_norm": 1.1099398136138916, "learning_rate": 0.0006607045794265524, "loss": 3.4453, "step": 39950 }, { "epoch": 2.7147030846582414, "grad_norm": 0.9525774717330933, "learning_rate": 0.0006606621144177199, "loss": 3.5244, "step": 39955 }, { "epoch": 2.7150428047289035, "grad_norm": 0.7008033394813538, "learning_rate": 0.0006606196494088871, "loss": 3.7403, "step": 39960 }, { "epoch": 2.715382524799565, "grad_norm": 0.8683610558509827, "learning_rate": 0.0006605771844000543, "loss": 3.5985, "step": 39965 }, { "epoch": 2.7157222448702267, "grad_norm": 1.175252079963684, "learning_rate": 0.0006605347193912217, "loss": 3.2575, "step": 39970 }, { "epoch": 2.716061964940889, "grad_norm": 0.888146698474884, "learning_rate": 0.0006604922543823889, "loss": 3.6604, "step": 39975 }, { "epoch": 2.7164016850115504, "grad_norm": 0.9928892254829407, "learning_rate": 0.0006604497893735561, "loss": 3.4821, "step": 39980 }, { "epoch": 2.716741405082212, "grad_norm": 0.8842623829841614, "learning_rate": 0.0006604073243647235, "loss": 3.4429, "step": 39985 }, { "epoch": 2.717081125152874, "grad_norm": 0.7981084585189819, "learning_rate": 0.0006603648593558908, "loss": 3.5934, "step": 39990 }, { "epoch": 2.7174208452235358, "grad_norm": 0.841220498085022, "learning_rate": 0.000660322394347058, "loss": 3.5876, "step": 39995 }, { "epoch": 2.7177605652941974, "grad_norm": 0.7617934942245483, "learning_rate": 0.0006602799293382254, "loss": 3.6606, "step": 40000 }, { "epoch": 2.7181002853648595, "grad_norm": 1.173885464668274, "learning_rate": 0.0006602374643293926, "loss": 3.5908, "step": 40005 }, { "epoch": 2.718440005435521, "grad_norm": 0.8232343196868896, "learning_rate": 0.0006601949993205598, "loss": 3.4696, "step": 40010 }, { "epoch": 2.7187797255061827, "grad_norm": 0.7001729011535645, "learning_rate": 0.0006601525343117271, "loss": 3.4657, "step": 40015 }, { "epoch": 2.719119445576845, "grad_norm": 0.9046725630760193, "learning_rate": 0.0006601100693028944, "loss": 3.6355, "step": 40020 }, { "epoch": 2.7194591656475064, "grad_norm": 0.6880229711532593, "learning_rate": 0.0006600676042940617, "loss": 3.8121, "step": 40025 }, { "epoch": 2.719798885718168, "grad_norm": 1.1041297912597656, "learning_rate": 0.000660025139285229, "loss": 3.6441, "step": 40030 }, { "epoch": 2.72013860578883, "grad_norm": 0.7319654226303101, "learning_rate": 0.0006599826742763963, "loss": 3.3554, "step": 40035 }, { "epoch": 2.720478325859492, "grad_norm": 1.0054905414581299, "learning_rate": 0.0006599402092675636, "loss": 3.6667, "step": 40040 }, { "epoch": 2.7208180459301534, "grad_norm": 0.8685222864151001, "learning_rate": 0.0006598977442587308, "loss": 3.4287, "step": 40045 }, { "epoch": 2.7211577660008155, "grad_norm": 0.8062014579772949, "learning_rate": 0.000659855279249898, "loss": 3.4739, "step": 40050 }, { "epoch": 2.721497486071477, "grad_norm": 0.9211556315422058, "learning_rate": 0.0006598128142410654, "loss": 3.58, "step": 40055 }, { "epoch": 2.7218372061421388, "grad_norm": 1.118658185005188, "learning_rate": 0.0006597703492322327, "loss": 3.3311, "step": 40060 }, { "epoch": 2.722176926212801, "grad_norm": 0.9092445969581604, "learning_rate": 0.0006597278842233999, "loss": 3.6422, "step": 40065 }, { "epoch": 2.7225166462834625, "grad_norm": 0.8178219795227051, "learning_rate": 0.0006596854192145673, "loss": 3.6058, "step": 40070 }, { "epoch": 2.722856366354124, "grad_norm": 0.9425751566886902, "learning_rate": 0.0006596429542057345, "loss": 3.677, "step": 40075 }, { "epoch": 2.723196086424786, "grad_norm": 1.1031423807144165, "learning_rate": 0.0006596004891969017, "loss": 3.4428, "step": 40080 }, { "epoch": 2.723535806495448, "grad_norm": 0.7799941897392273, "learning_rate": 0.0006595580241880691, "loss": 3.7331, "step": 40085 }, { "epoch": 2.7238755265661094, "grad_norm": 0.9952499866485596, "learning_rate": 0.0006595155591792363, "loss": 3.6353, "step": 40090 }, { "epoch": 2.7242152466367715, "grad_norm": 0.7231821417808533, "learning_rate": 0.0006594730941704036, "loss": 3.6645, "step": 40095 }, { "epoch": 2.724554966707433, "grad_norm": 0.885179340839386, "learning_rate": 0.000659430629161571, "loss": 3.5133, "step": 40100 }, { "epoch": 2.7248946867780948, "grad_norm": 0.8990301489830017, "learning_rate": 0.0006593881641527382, "loss": 3.4832, "step": 40105 }, { "epoch": 2.725234406848757, "grad_norm": 2.263291835784912, "learning_rate": 0.0006593456991439054, "loss": 3.6553, "step": 40110 }, { "epoch": 2.7255741269194185, "grad_norm": 0.8630979061126709, "learning_rate": 0.0006593032341350727, "loss": 3.634, "step": 40115 }, { "epoch": 2.72591384699008, "grad_norm": 0.9513310790061951, "learning_rate": 0.00065926076912624, "loss": 3.4474, "step": 40120 }, { "epoch": 2.726253567060742, "grad_norm": 0.8380131721496582, "learning_rate": 0.0006592183041174072, "loss": 3.3844, "step": 40125 }, { "epoch": 2.726593287131404, "grad_norm": 0.7741430401802063, "learning_rate": 0.0006591758391085746, "loss": 3.7444, "step": 40130 }, { "epoch": 2.7269330072020654, "grad_norm": 0.8665879964828491, "learning_rate": 0.0006591333740997419, "loss": 3.5391, "step": 40135 }, { "epoch": 2.7272727272727275, "grad_norm": 0.920905590057373, "learning_rate": 0.0006590909090909091, "loss": 3.4289, "step": 40140 }, { "epoch": 2.727612447343389, "grad_norm": 0.9568038582801819, "learning_rate": 0.0006590484440820764, "loss": 3.5123, "step": 40145 }, { "epoch": 2.7279521674140508, "grad_norm": 1.0718151330947876, "learning_rate": 0.0006590059790732436, "loss": 3.6251, "step": 40150 }, { "epoch": 2.728291887484713, "grad_norm": 0.8815311789512634, "learning_rate": 0.0006589635140644109, "loss": 3.5821, "step": 40155 }, { "epoch": 2.7286316075553745, "grad_norm": 0.8185691833496094, "learning_rate": 0.0006589210490555782, "loss": 3.5243, "step": 40160 }, { "epoch": 2.728971327626036, "grad_norm": 1.332939624786377, "learning_rate": 0.0006588785840467455, "loss": 3.7498, "step": 40165 }, { "epoch": 2.729311047696698, "grad_norm": 1.276260256767273, "learning_rate": 0.0006588361190379128, "loss": 3.5576, "step": 40170 }, { "epoch": 2.72965076776736, "grad_norm": 0.7643992900848389, "learning_rate": 0.0006587936540290801, "loss": 3.6814, "step": 40175 }, { "epoch": 2.7299904878380215, "grad_norm": 1.1474686861038208, "learning_rate": 0.0006587511890202473, "loss": 3.7066, "step": 40180 }, { "epoch": 2.730330207908683, "grad_norm": 0.9082356691360474, "learning_rate": 0.0006587087240114146, "loss": 3.6036, "step": 40185 }, { "epoch": 2.730669927979345, "grad_norm": 0.751785397529602, "learning_rate": 0.0006586662590025819, "loss": 3.7263, "step": 40190 }, { "epoch": 2.731009648050007, "grad_norm": 0.76153165102005, "learning_rate": 0.0006586237939937491, "loss": 3.5915, "step": 40195 }, { "epoch": 2.7313493681206684, "grad_norm": 0.9065309166908264, "learning_rate": 0.0006585813289849164, "loss": 3.7779, "step": 40200 }, { "epoch": 2.7316890881913305, "grad_norm": 1.0192867517471313, "learning_rate": 0.0006585388639760838, "loss": 3.385, "step": 40205 }, { "epoch": 2.732028808261992, "grad_norm": 0.6835248470306396, "learning_rate": 0.000658496398967251, "loss": 3.6086, "step": 40210 }, { "epoch": 2.7323685283326538, "grad_norm": 0.9173712134361267, "learning_rate": 0.0006584539339584182, "loss": 3.5931, "step": 40215 }, { "epoch": 2.732708248403316, "grad_norm": 0.8721328377723694, "learning_rate": 0.0006584114689495856, "loss": 3.6052, "step": 40220 }, { "epoch": 2.7330479684739775, "grad_norm": 0.9276031255722046, "learning_rate": 0.0006583690039407528, "loss": 3.5505, "step": 40225 }, { "epoch": 2.733387688544639, "grad_norm": 1.031188726425171, "learning_rate": 0.00065832653893192, "loss": 3.4408, "step": 40230 }, { "epoch": 2.7337274086153007, "grad_norm": 1.0801630020141602, "learning_rate": 0.0006582840739230875, "loss": 3.5817, "step": 40235 }, { "epoch": 2.734067128685963, "grad_norm": 0.8701567053794861, "learning_rate": 0.0006582416089142547, "loss": 3.625, "step": 40240 }, { "epoch": 2.7344068487566244, "grad_norm": 0.8715426921844482, "learning_rate": 0.0006581991439054219, "loss": 3.4749, "step": 40245 }, { "epoch": 2.734746568827286, "grad_norm": 1.1303719282150269, "learning_rate": 0.0006581566788965892, "loss": 3.7009, "step": 40250 }, { "epoch": 2.735086288897948, "grad_norm": 0.9289172887802124, "learning_rate": 0.0006581142138877565, "loss": 3.7178, "step": 40255 }, { "epoch": 2.7354260089686098, "grad_norm": 0.6357284784317017, "learning_rate": 0.0006580717488789237, "loss": 3.5377, "step": 40260 }, { "epoch": 2.7357657290392714, "grad_norm": 0.8401117920875549, "learning_rate": 0.0006580292838700911, "loss": 3.4947, "step": 40265 }, { "epoch": 2.7361054491099335, "grad_norm": 0.7872085571289062, "learning_rate": 0.0006579868188612584, "loss": 3.5856, "step": 40270 }, { "epoch": 2.736445169180595, "grad_norm": 0.9084161520004272, "learning_rate": 0.0006579443538524256, "loss": 3.2861, "step": 40275 }, { "epoch": 2.7367848892512567, "grad_norm": 0.9328500628471375, "learning_rate": 0.0006579018888435929, "loss": 3.7289, "step": 40280 }, { "epoch": 2.737124609321919, "grad_norm": 0.8494167327880859, "learning_rate": 0.0006578594238347602, "loss": 3.4918, "step": 40285 }, { "epoch": 2.7374643293925804, "grad_norm": 0.7552316188812256, "learning_rate": 0.0006578169588259274, "loss": 3.6883, "step": 40290 }, { "epoch": 2.737804049463242, "grad_norm": 0.6937495470046997, "learning_rate": 0.0006577744938170947, "loss": 3.3879, "step": 40295 }, { "epoch": 2.738143769533904, "grad_norm": 0.8413161039352417, "learning_rate": 0.000657732028808262, "loss": 3.427, "step": 40300 }, { "epoch": 2.738483489604566, "grad_norm": 0.8789914846420288, "learning_rate": 0.0006576895637994293, "loss": 3.4447, "step": 40305 }, { "epoch": 2.7388232096752274, "grad_norm": 0.8155306577682495, "learning_rate": 0.0006576470987905966, "loss": 3.7293, "step": 40310 }, { "epoch": 2.7391629297458895, "grad_norm": 0.9186259508132935, "learning_rate": 0.0006576046337817638, "loss": 3.5566, "step": 40315 }, { "epoch": 2.739502649816551, "grad_norm": 0.9676461815834045, "learning_rate": 0.0006575621687729311, "loss": 3.2586, "step": 40320 }, { "epoch": 2.7398423698872127, "grad_norm": 0.84682297706604, "learning_rate": 0.0006575197037640984, "loss": 3.6196, "step": 40325 }, { "epoch": 2.740182089957875, "grad_norm": 0.8325427770614624, "learning_rate": 0.0006574772387552656, "loss": 3.6705, "step": 40330 }, { "epoch": 2.7405218100285365, "grad_norm": 0.6998556852340698, "learning_rate": 0.000657434773746433, "loss": 3.6178, "step": 40335 }, { "epoch": 2.740861530099198, "grad_norm": 0.8425354957580566, "learning_rate": 0.0006573923087376003, "loss": 3.4702, "step": 40340 }, { "epoch": 2.74120125016986, "grad_norm": 0.9224645495414734, "learning_rate": 0.0006573498437287675, "loss": 3.6163, "step": 40345 }, { "epoch": 2.741540970240522, "grad_norm": 1.0837113857269287, "learning_rate": 0.0006573073787199347, "loss": 3.0764, "step": 40350 }, { "epoch": 2.7418806903111834, "grad_norm": 0.7782437801361084, "learning_rate": 0.0006572649137111021, "loss": 3.3457, "step": 40355 }, { "epoch": 2.7422204103818455, "grad_norm": 0.8085513114929199, "learning_rate": 0.0006572224487022693, "loss": 3.638, "step": 40360 }, { "epoch": 2.742560130452507, "grad_norm": 1.6547608375549316, "learning_rate": 0.0006571799836934365, "loss": 3.638, "step": 40365 }, { "epoch": 2.7428998505231688, "grad_norm": 0.7194579839706421, "learning_rate": 0.000657137518684604, "loss": 3.6916, "step": 40370 }, { "epoch": 2.743239570593831, "grad_norm": 1.110898733139038, "learning_rate": 0.0006570950536757712, "loss": 3.5896, "step": 40375 }, { "epoch": 2.7435792906644925, "grad_norm": 0.8973047733306885, "learning_rate": 0.0006570525886669385, "loss": 3.7063, "step": 40380 }, { "epoch": 2.743919010735154, "grad_norm": 1.019471526145935, "learning_rate": 0.0006570101236581058, "loss": 3.5558, "step": 40385 }, { "epoch": 2.744258730805816, "grad_norm": 0.795246422290802, "learning_rate": 0.000656967658649273, "loss": 3.7662, "step": 40390 }, { "epoch": 2.744598450876478, "grad_norm": 0.8133932948112488, "learning_rate": 0.0006569251936404403, "loss": 3.5625, "step": 40395 }, { "epoch": 2.7449381709471394, "grad_norm": 0.8875547647476196, "learning_rate": 0.0006568827286316075, "loss": 3.2861, "step": 40400 }, { "epoch": 2.7452778910178015, "grad_norm": 0.7816322445869446, "learning_rate": 0.0006568402636227749, "loss": 3.5642, "step": 40405 }, { "epoch": 2.745617611088463, "grad_norm": 0.7662747502326965, "learning_rate": 0.0006567977986139422, "loss": 3.4831, "step": 40410 }, { "epoch": 2.7459573311591248, "grad_norm": 0.7646679282188416, "learning_rate": 0.0006567553336051094, "loss": 3.5403, "step": 40415 }, { "epoch": 2.746297051229787, "grad_norm": 0.6875748634338379, "learning_rate": 0.0006567128685962767, "loss": 3.6506, "step": 40420 }, { "epoch": 2.7466367713004485, "grad_norm": 0.7331896424293518, "learning_rate": 0.000656670403587444, "loss": 3.6811, "step": 40425 }, { "epoch": 2.74697649137111, "grad_norm": 0.7540739178657532, "learning_rate": 0.0006566279385786112, "loss": 3.3072, "step": 40430 }, { "epoch": 2.747316211441772, "grad_norm": 0.788153350353241, "learning_rate": 0.0006565854735697784, "loss": 3.5628, "step": 40435 }, { "epoch": 2.747655931512434, "grad_norm": 0.8513244390487671, "learning_rate": 0.0006565430085609459, "loss": 3.7153, "step": 40440 }, { "epoch": 2.7479956515830954, "grad_norm": 0.8342524170875549, "learning_rate": 0.0006565005435521131, "loss": 3.6279, "step": 40445 }, { "epoch": 2.7483353716537575, "grad_norm": 1.0505750179290771, "learning_rate": 0.0006564580785432803, "loss": 3.636, "step": 40450 }, { "epoch": 2.748675091724419, "grad_norm": 0.9458935260772705, "learning_rate": 0.0006564156135344477, "loss": 3.5757, "step": 40455 }, { "epoch": 2.749014811795081, "grad_norm": 1.0131213665008545, "learning_rate": 0.0006563731485256149, "loss": 3.7699, "step": 40460 }, { "epoch": 2.749354531865743, "grad_norm": 1.017067790031433, "learning_rate": 0.0006563306835167821, "loss": 3.4812, "step": 40465 }, { "epoch": 2.7496942519364045, "grad_norm": 0.8894461393356323, "learning_rate": 0.0006562882185079495, "loss": 3.2047, "step": 40470 }, { "epoch": 2.750033972007066, "grad_norm": 0.8313674330711365, "learning_rate": 0.0006562457534991168, "loss": 3.6895, "step": 40475 }, { "epoch": 2.750373692077728, "grad_norm": 0.9415770769119263, "learning_rate": 0.000656203288490284, "loss": 3.6684, "step": 40480 }, { "epoch": 2.75071341214839, "grad_norm": 0.6898900270462036, "learning_rate": 0.0006561608234814514, "loss": 3.6185, "step": 40485 }, { "epoch": 2.7510531322190515, "grad_norm": 0.7222219109535217, "learning_rate": 0.0006561183584726186, "loss": 3.5232, "step": 40490 }, { "epoch": 2.7513928522897135, "grad_norm": 0.8290750980377197, "learning_rate": 0.0006560758934637858, "loss": 3.5704, "step": 40495 }, { "epoch": 2.751732572360375, "grad_norm": 0.8512561321258545, "learning_rate": 0.0006560334284549531, "loss": 3.5888, "step": 40500 }, { "epoch": 2.752072292431037, "grad_norm": 0.8354743123054504, "learning_rate": 0.0006559909634461204, "loss": 3.4185, "step": 40505 }, { "epoch": 2.752412012501699, "grad_norm": 0.8362182974815369, "learning_rate": 0.0006559484984372877, "loss": 3.5621, "step": 40510 }, { "epoch": 2.7527517325723605, "grad_norm": 0.8976656198501587, "learning_rate": 0.000655906033428455, "loss": 3.57, "step": 40515 }, { "epoch": 2.753091452643022, "grad_norm": 0.8194878697395325, "learning_rate": 0.0006558635684196223, "loss": 3.4224, "step": 40520 }, { "epoch": 2.7534311727136838, "grad_norm": 1.195380687713623, "learning_rate": 0.0006558211034107895, "loss": 3.5948, "step": 40525 }, { "epoch": 2.753770892784346, "grad_norm": 0.8025371432304382, "learning_rate": 0.0006557786384019568, "loss": 3.6011, "step": 40530 }, { "epoch": 2.7541106128550075, "grad_norm": 0.6247618198394775, "learning_rate": 0.000655736173393124, "loss": 3.7089, "step": 40535 }, { "epoch": 2.754450332925669, "grad_norm": 1.003104567527771, "learning_rate": 0.0006556937083842913, "loss": 3.5178, "step": 40540 }, { "epoch": 2.754790052996331, "grad_norm": 0.8687087893486023, "learning_rate": 0.0006556512433754587, "loss": 3.509, "step": 40545 }, { "epoch": 2.755129773066993, "grad_norm": 0.958437442779541, "learning_rate": 0.0006556087783666259, "loss": 3.7245, "step": 40550 }, { "epoch": 2.7554694931376544, "grad_norm": 1.0121073722839355, "learning_rate": 0.0006555663133577932, "loss": 3.5874, "step": 40555 }, { "epoch": 2.7558092132083165, "grad_norm": 1.3760305643081665, "learning_rate": 0.0006555238483489605, "loss": 3.5112, "step": 40560 }, { "epoch": 2.756148933278978, "grad_norm": 0.7471091151237488, "learning_rate": 0.0006554813833401277, "loss": 3.5137, "step": 40565 }, { "epoch": 2.7564886533496398, "grad_norm": 0.6749697327613831, "learning_rate": 0.000655438918331295, "loss": 3.3283, "step": 40570 }, { "epoch": 2.7568283734203014, "grad_norm": 0.9833710789680481, "learning_rate": 0.0006553964533224623, "loss": 3.5585, "step": 40575 }, { "epoch": 2.7571680934909635, "grad_norm": 1.0973074436187744, "learning_rate": 0.0006553539883136296, "loss": 3.4875, "step": 40580 }, { "epoch": 2.757507813561625, "grad_norm": 1.0187256336212158, "learning_rate": 0.0006553115233047968, "loss": 3.4893, "step": 40585 }, { "epoch": 2.7578475336322867, "grad_norm": 0.9151908755302429, "learning_rate": 0.0006552690582959642, "loss": 3.4982, "step": 40590 }, { "epoch": 2.758187253702949, "grad_norm": 0.9743619561195374, "learning_rate": 0.0006552265932871314, "loss": 3.5472, "step": 40595 }, { "epoch": 2.7585269737736104, "grad_norm": 0.8172758221626282, "learning_rate": 0.0006551841282782986, "loss": 3.6355, "step": 40600 }, { "epoch": 2.758866693844272, "grad_norm": 0.8158865571022034, "learning_rate": 0.000655141663269466, "loss": 3.6191, "step": 40605 }, { "epoch": 2.759206413914934, "grad_norm": 0.9475234746932983, "learning_rate": 0.0006550991982606332, "loss": 3.5825, "step": 40610 }, { "epoch": 2.759546133985596, "grad_norm": 0.9995924234390259, "learning_rate": 0.0006550567332518005, "loss": 3.1282, "step": 40615 }, { "epoch": 2.7598858540562574, "grad_norm": 0.925312876701355, "learning_rate": 0.0006550142682429679, "loss": 3.7246, "step": 40620 }, { "epoch": 2.7602255741269195, "grad_norm": 0.855171799659729, "learning_rate": 0.0006549718032341351, "loss": 3.4692, "step": 40625 }, { "epoch": 2.760565294197581, "grad_norm": 1.0137652158737183, "learning_rate": 0.0006549293382253023, "loss": 3.3815, "step": 40630 }, { "epoch": 2.7609050142682428, "grad_norm": 0.8715655207633972, "learning_rate": 0.0006548868732164696, "loss": 3.5159, "step": 40635 }, { "epoch": 2.761244734338905, "grad_norm": 0.6796324253082275, "learning_rate": 0.0006548444082076369, "loss": 3.1392, "step": 40640 }, { "epoch": 2.7615844544095665, "grad_norm": 0.8689772486686707, "learning_rate": 0.0006548019431988041, "loss": 3.7051, "step": 40645 }, { "epoch": 2.761924174480228, "grad_norm": 0.9542728066444397, "learning_rate": 0.0006547594781899715, "loss": 3.4893, "step": 40650 }, { "epoch": 2.76226389455089, "grad_norm": 0.7278113961219788, "learning_rate": 0.0006547170131811388, "loss": 3.5894, "step": 40655 }, { "epoch": 2.762603614621552, "grad_norm": 0.8851398229598999, "learning_rate": 0.000654674548172306, "loss": 3.5513, "step": 40660 }, { "epoch": 2.7629433346922134, "grad_norm": 0.7643380761146545, "learning_rate": 0.0006546320831634733, "loss": 3.5327, "step": 40665 }, { "epoch": 2.7632830547628755, "grad_norm": 0.7789206504821777, "learning_rate": 0.0006545896181546406, "loss": 3.5304, "step": 40670 }, { "epoch": 2.763622774833537, "grad_norm": 0.851741373538971, "learning_rate": 0.0006545471531458078, "loss": 3.3978, "step": 40675 }, { "epoch": 2.7639624949041988, "grad_norm": 0.7207466959953308, "learning_rate": 0.0006545046881369751, "loss": 3.4144, "step": 40680 }, { "epoch": 2.764302214974861, "grad_norm": 1.1106083393096924, "learning_rate": 0.0006544622231281425, "loss": 3.7285, "step": 40685 }, { "epoch": 2.7646419350455225, "grad_norm": 0.7861536741256714, "learning_rate": 0.0006544197581193097, "loss": 3.6548, "step": 40690 }, { "epoch": 2.764981655116184, "grad_norm": 0.853843629360199, "learning_rate": 0.000654377293110477, "loss": 3.5457, "step": 40695 }, { "epoch": 2.765321375186846, "grad_norm": 1.037405252456665, "learning_rate": 0.0006543348281016442, "loss": 3.4414, "step": 40700 }, { "epoch": 2.765661095257508, "grad_norm": 0.806270182132721, "learning_rate": 0.0006542923630928115, "loss": 3.6084, "step": 40705 }, { "epoch": 2.7660008153281694, "grad_norm": 0.971283495426178, "learning_rate": 0.0006542498980839788, "loss": 3.5334, "step": 40710 }, { "epoch": 2.7663405353988315, "grad_norm": 0.7653524875640869, "learning_rate": 0.000654207433075146, "loss": 3.5272, "step": 40715 }, { "epoch": 2.766680255469493, "grad_norm": 1.3915772438049316, "learning_rate": 0.0006541649680663135, "loss": 3.4962, "step": 40720 }, { "epoch": 2.7670199755401548, "grad_norm": 0.9580897092819214, "learning_rate": 0.0006541225030574807, "loss": 3.3549, "step": 40725 }, { "epoch": 2.767359695610817, "grad_norm": 1.2455593347549438, "learning_rate": 0.0006540800380486479, "loss": 3.457, "step": 40730 }, { "epoch": 2.7676994156814785, "grad_norm": 0.7226529121398926, "learning_rate": 0.0006540375730398153, "loss": 3.5481, "step": 40735 }, { "epoch": 2.76803913575214, "grad_norm": 0.7970854043960571, "learning_rate": 0.0006539951080309825, "loss": 3.5547, "step": 40740 }, { "epoch": 2.768378855822802, "grad_norm": 0.7825415134429932, "learning_rate": 0.0006539526430221497, "loss": 3.641, "step": 40745 }, { "epoch": 2.768718575893464, "grad_norm": 0.9059216380119324, "learning_rate": 0.0006539101780133171, "loss": 3.6653, "step": 40750 }, { "epoch": 2.7690582959641254, "grad_norm": 0.903237521648407, "learning_rate": 0.0006538677130044844, "loss": 3.5349, "step": 40755 }, { "epoch": 2.7693980160347875, "grad_norm": 0.8359914422035217, "learning_rate": 0.0006538252479956516, "loss": 3.5534, "step": 40760 }, { "epoch": 2.769737736105449, "grad_norm": 1.1874862909317017, "learning_rate": 0.0006537827829868189, "loss": 3.5867, "step": 40765 }, { "epoch": 2.770077456176111, "grad_norm": 0.7669093012809753, "learning_rate": 0.0006537403179779862, "loss": 3.6542, "step": 40770 }, { "epoch": 2.770417176246773, "grad_norm": 0.949467658996582, "learning_rate": 0.0006536978529691534, "loss": 3.7316, "step": 40775 }, { "epoch": 2.7707568963174345, "grad_norm": 1.008885383605957, "learning_rate": 0.0006536553879603207, "loss": 3.5683, "step": 40780 }, { "epoch": 2.771096616388096, "grad_norm": 0.8699371218681335, "learning_rate": 0.000653612922951488, "loss": 3.4793, "step": 40785 }, { "epoch": 2.771436336458758, "grad_norm": 0.7293240427970886, "learning_rate": 0.0006535704579426553, "loss": 3.7199, "step": 40790 }, { "epoch": 2.77177605652942, "grad_norm": 0.9556752443313599, "learning_rate": 0.0006535279929338226, "loss": 3.7293, "step": 40795 }, { "epoch": 2.7721157766000815, "grad_norm": 0.859874427318573, "learning_rate": 0.0006534855279249898, "loss": 3.4217, "step": 40800 }, { "epoch": 2.7724554966707435, "grad_norm": 0.877276599407196, "learning_rate": 0.0006534430629161571, "loss": 3.4427, "step": 40805 }, { "epoch": 2.772795216741405, "grad_norm": 0.8203192353248596, "learning_rate": 0.0006534005979073244, "loss": 3.3766, "step": 40810 }, { "epoch": 2.773134936812067, "grad_norm": 0.9335929751396179, "learning_rate": 0.0006533581328984916, "loss": 3.6727, "step": 40815 }, { "epoch": 2.773474656882729, "grad_norm": 0.7954270243644714, "learning_rate": 0.000653315667889659, "loss": 3.6444, "step": 40820 }, { "epoch": 2.7738143769533905, "grad_norm": 0.9236946105957031, "learning_rate": 0.0006532732028808263, "loss": 3.39, "step": 40825 }, { "epoch": 2.774154097024052, "grad_norm": 1.0424996614456177, "learning_rate": 0.0006532307378719935, "loss": 3.4322, "step": 40830 }, { "epoch": 2.774493817094714, "grad_norm": 0.8398603796958923, "learning_rate": 0.0006531882728631607, "loss": 3.6222, "step": 40835 }, { "epoch": 2.774833537165376, "grad_norm": 0.7191680669784546, "learning_rate": 0.0006531458078543281, "loss": 3.7335, "step": 40840 }, { "epoch": 2.7751732572360375, "grad_norm": 0.8853099942207336, "learning_rate": 0.0006531033428454953, "loss": 3.8024, "step": 40845 }, { "epoch": 2.7755129773066995, "grad_norm": 1.0183979272842407, "learning_rate": 0.0006530608778366625, "loss": 3.3684, "step": 40850 }, { "epoch": 2.775852697377361, "grad_norm": 1.303118348121643, "learning_rate": 0.00065301841282783, "loss": 3.4345, "step": 40855 }, { "epoch": 2.776192417448023, "grad_norm": 0.9715933799743652, "learning_rate": 0.0006529759478189972, "loss": 3.4978, "step": 40860 }, { "epoch": 2.7765321375186844, "grad_norm": 0.9406045079231262, "learning_rate": 0.0006529334828101644, "loss": 3.7486, "step": 40865 }, { "epoch": 2.7768718575893465, "grad_norm": 0.7973609566688538, "learning_rate": 0.0006528910178013318, "loss": 3.544, "step": 40870 }, { "epoch": 2.777211577660008, "grad_norm": 1.0448392629623413, "learning_rate": 0.000652848552792499, "loss": 3.5673, "step": 40875 }, { "epoch": 2.7775512977306698, "grad_norm": 0.9311828017234802, "learning_rate": 0.0006528060877836662, "loss": 3.6106, "step": 40880 }, { "epoch": 2.777891017801332, "grad_norm": 2.4790256023406982, "learning_rate": 0.0006527636227748335, "loss": 3.301, "step": 40885 }, { "epoch": 2.7782307378719935, "grad_norm": 1.1215834617614746, "learning_rate": 0.0006527211577660009, "loss": 3.6576, "step": 40890 }, { "epoch": 2.778570457942655, "grad_norm": 0.7589175701141357, "learning_rate": 0.0006526786927571681, "loss": 3.7616, "step": 40895 }, { "epoch": 2.778910178013317, "grad_norm": 0.8938860893249512, "learning_rate": 0.0006526362277483354, "loss": 3.1507, "step": 40900 }, { "epoch": 2.779249898083979, "grad_norm": 0.9298586845397949, "learning_rate": 0.0006525937627395027, "loss": 3.7021, "step": 40905 }, { "epoch": 2.7795896181546405, "grad_norm": 0.7274421453475952, "learning_rate": 0.0006525512977306699, "loss": 3.5557, "step": 40910 }, { "epoch": 2.779929338225302, "grad_norm": 1.043718934059143, "learning_rate": 0.0006525088327218372, "loss": 3.6564, "step": 40915 }, { "epoch": 2.780269058295964, "grad_norm": 1.0683753490447998, "learning_rate": 0.0006524663677130045, "loss": 3.1886, "step": 40920 }, { "epoch": 2.780608778366626, "grad_norm": 0.7982569336891174, "learning_rate": 0.0006524239027041718, "loss": 3.7112, "step": 40925 }, { "epoch": 2.7809484984372874, "grad_norm": 0.7379443645477295, "learning_rate": 0.0006523814376953391, "loss": 3.6198, "step": 40930 }, { "epoch": 2.7812882185079495, "grad_norm": 0.7897406816482544, "learning_rate": 0.0006523389726865063, "loss": 3.6165, "step": 40935 }, { "epoch": 2.781627938578611, "grad_norm": 0.6179049015045166, "learning_rate": 0.0006522965076776736, "loss": 3.6887, "step": 40940 }, { "epoch": 2.7819676586492728, "grad_norm": 0.8219224214553833, "learning_rate": 0.0006522540426688409, "loss": 3.3352, "step": 40945 }, { "epoch": 2.782307378719935, "grad_norm": 1.139896273612976, "learning_rate": 0.0006522115776600081, "loss": 3.6948, "step": 40950 }, { "epoch": 2.7826470987905965, "grad_norm": 0.8027321696281433, "learning_rate": 0.0006521691126511754, "loss": 3.5342, "step": 40955 }, { "epoch": 2.782986818861258, "grad_norm": 0.7882165908813477, "learning_rate": 0.0006521266476423428, "loss": 3.6057, "step": 40960 }, { "epoch": 2.78332653893192, "grad_norm": 0.8381488919258118, "learning_rate": 0.00065208418263351, "loss": 3.5081, "step": 40965 }, { "epoch": 2.783666259002582, "grad_norm": 0.6658279895782471, "learning_rate": 0.0006520417176246773, "loss": 3.4365, "step": 40970 }, { "epoch": 2.7840059790732434, "grad_norm": 0.7792349457740784, "learning_rate": 0.0006519992526158446, "loss": 3.6053, "step": 40975 }, { "epoch": 2.7843456991439055, "grad_norm": 0.850094199180603, "learning_rate": 0.0006519567876070118, "loss": 3.322, "step": 40980 }, { "epoch": 2.784685419214567, "grad_norm": 0.8450062274932861, "learning_rate": 0.000651914322598179, "loss": 3.627, "step": 40985 }, { "epoch": 2.7850251392852288, "grad_norm": 0.9091132879257202, "learning_rate": 0.0006518718575893464, "loss": 3.4873, "step": 40990 }, { "epoch": 2.785364859355891, "grad_norm": 0.9887285232543945, "learning_rate": 0.0006518293925805137, "loss": 3.7454, "step": 40995 }, { "epoch": 2.7857045794265525, "grad_norm": 1.0718141794204712, "learning_rate": 0.0006517869275716809, "loss": 3.3801, "step": 41000 }, { "epoch": 2.786044299497214, "grad_norm": 0.6924198269844055, "learning_rate": 0.0006517444625628483, "loss": 3.8928, "step": 41005 }, { "epoch": 2.786384019567876, "grad_norm": 0.8262925744056702, "learning_rate": 0.0006517019975540155, "loss": 3.7331, "step": 41010 }, { "epoch": 2.786723739638538, "grad_norm": 0.7325571179389954, "learning_rate": 0.0006516595325451827, "loss": 3.7014, "step": 41015 }, { "epoch": 2.7870634597091994, "grad_norm": 0.8553724884986877, "learning_rate": 0.00065161706753635, "loss": 3.4751, "step": 41020 }, { "epoch": 2.7874031797798615, "grad_norm": 0.8223779201507568, "learning_rate": 0.0006515746025275173, "loss": 3.7135, "step": 41025 }, { "epoch": 2.787742899850523, "grad_norm": 1.0360852479934692, "learning_rate": 0.0006515321375186846, "loss": 3.2543, "step": 41030 }, { "epoch": 2.788082619921185, "grad_norm": 0.8779467344284058, "learning_rate": 0.000651489672509852, "loss": 3.7755, "step": 41035 }, { "epoch": 2.788422339991847, "grad_norm": 0.8353963494300842, "learning_rate": 0.0006514472075010192, "loss": 3.5913, "step": 41040 }, { "epoch": 2.7887620600625085, "grad_norm": 0.7837998270988464, "learning_rate": 0.0006514047424921864, "loss": 3.1349, "step": 41045 }, { "epoch": 2.78910178013317, "grad_norm": 0.9109930396080017, "learning_rate": 0.0006513622774833537, "loss": 3.2446, "step": 41050 }, { "epoch": 2.789441500203832, "grad_norm": 0.640507698059082, "learning_rate": 0.000651319812474521, "loss": 3.7637, "step": 41055 }, { "epoch": 2.789781220274494, "grad_norm": 0.807341456413269, "learning_rate": 0.0006512773474656883, "loss": 3.7173, "step": 41060 }, { "epoch": 2.7901209403451555, "grad_norm": 0.7839886546134949, "learning_rate": 0.0006512348824568556, "loss": 3.5668, "step": 41065 }, { "epoch": 2.7904606604158175, "grad_norm": 0.7389859557151794, "learning_rate": 0.0006511924174480229, "loss": 3.5327, "step": 41070 }, { "epoch": 2.790800380486479, "grad_norm": 0.8813762068748474, "learning_rate": 0.0006511499524391902, "loss": 3.3964, "step": 41075 }, { "epoch": 2.791140100557141, "grad_norm": 0.8989459276199341, "learning_rate": 0.0006511074874303574, "loss": 3.547, "step": 41080 }, { "epoch": 2.791479820627803, "grad_norm": 1.0809416770935059, "learning_rate": 0.0006510650224215246, "loss": 3.5185, "step": 41085 }, { "epoch": 2.7918195406984645, "grad_norm": 0.739500880241394, "learning_rate": 0.000651022557412692, "loss": 3.5512, "step": 41090 }, { "epoch": 2.792159260769126, "grad_norm": 0.8862757682800293, "learning_rate": 0.0006509800924038592, "loss": 3.6412, "step": 41095 }, { "epoch": 2.792498980839788, "grad_norm": 0.922012448310852, "learning_rate": 0.0006509376273950265, "loss": 3.4056, "step": 41100 }, { "epoch": 2.79283870091045, "grad_norm": 0.7956035733222961, "learning_rate": 0.0006508951623861939, "loss": 3.6471, "step": 41105 }, { "epoch": 2.7931784209811115, "grad_norm": 0.749803900718689, "learning_rate": 0.0006508526973773611, "loss": 3.6001, "step": 41110 }, { "epoch": 2.7935181410517735, "grad_norm": 0.8472878336906433, "learning_rate": 0.0006508102323685283, "loss": 3.7048, "step": 41115 }, { "epoch": 2.793857861122435, "grad_norm": 0.8235501050949097, "learning_rate": 0.0006507677673596957, "loss": 3.4751, "step": 41120 }, { "epoch": 2.794197581193097, "grad_norm": 0.7599331140518188, "learning_rate": 0.0006507253023508629, "loss": 3.6153, "step": 41125 }, { "epoch": 2.794537301263759, "grad_norm": 0.8559428453445435, "learning_rate": 0.0006506828373420301, "loss": 3.2042, "step": 41130 }, { "epoch": 2.7948770213344205, "grad_norm": 0.7228989601135254, "learning_rate": 0.0006506403723331975, "loss": 3.4054, "step": 41135 }, { "epoch": 2.795216741405082, "grad_norm": 0.8533180356025696, "learning_rate": 0.0006505979073243648, "loss": 3.4009, "step": 41140 }, { "epoch": 2.795556461475744, "grad_norm": 1.193645715713501, "learning_rate": 0.000650555442315532, "loss": 3.3261, "step": 41145 }, { "epoch": 2.795896181546406, "grad_norm": 0.8603028655052185, "learning_rate": 0.0006505129773066993, "loss": 3.4895, "step": 41150 }, { "epoch": 2.7962359016170675, "grad_norm": 0.8908913135528564, "learning_rate": 0.0006504705122978666, "loss": 3.4189, "step": 41155 }, { "epoch": 2.7965756216877296, "grad_norm": 0.9131405353546143, "learning_rate": 0.0006504280472890338, "loss": 3.4005, "step": 41160 }, { "epoch": 2.796915341758391, "grad_norm": 1.7250792980194092, "learning_rate": 0.0006503855822802011, "loss": 3.5472, "step": 41165 }, { "epoch": 2.797255061829053, "grad_norm": 0.7639291882514954, "learning_rate": 0.0006503431172713685, "loss": 3.5514, "step": 41170 }, { "epoch": 2.797594781899715, "grad_norm": 1.2182351350784302, "learning_rate": 0.0006503006522625357, "loss": 3.4675, "step": 41175 }, { "epoch": 2.7979345019703765, "grad_norm": 0.8525551557540894, "learning_rate": 0.000650258187253703, "loss": 3.6614, "step": 41180 }, { "epoch": 2.798274222041038, "grad_norm": 0.849395751953125, "learning_rate": 0.0006502157222448702, "loss": 3.425, "step": 41185 }, { "epoch": 2.7986139421117002, "grad_norm": 0.7365756034851074, "learning_rate": 0.0006501732572360375, "loss": 3.637, "step": 41190 }, { "epoch": 2.798953662182362, "grad_norm": 0.7442958950996399, "learning_rate": 0.0006501307922272048, "loss": 3.8011, "step": 41195 }, { "epoch": 2.7992933822530235, "grad_norm": 0.8995551466941833, "learning_rate": 0.000650088327218372, "loss": 3.5012, "step": 41200 }, { "epoch": 2.799633102323685, "grad_norm": 0.7885043621063232, "learning_rate": 0.0006500458622095394, "loss": 3.5857, "step": 41205 }, { "epoch": 2.799972822394347, "grad_norm": 0.8332861661911011, "learning_rate": 0.0006500033972007067, "loss": 3.6609, "step": 41210 }, { "epoch": 2.800312542465009, "grad_norm": 0.7752822041511536, "learning_rate": 0.0006499609321918739, "loss": 3.384, "step": 41215 }, { "epoch": 2.8006522625356705, "grad_norm": 0.9063897132873535, "learning_rate": 0.0006499184671830411, "loss": 3.3559, "step": 41220 }, { "epoch": 2.8009919826063325, "grad_norm": 0.7396212816238403, "learning_rate": 0.0006498760021742085, "loss": 3.5052, "step": 41225 }, { "epoch": 2.801331702676994, "grad_norm": 0.9083309769630432, "learning_rate": 0.0006498335371653757, "loss": 3.5104, "step": 41230 }, { "epoch": 2.801671422747656, "grad_norm": 0.8172621726989746, "learning_rate": 0.0006497910721565429, "loss": 3.4756, "step": 41235 }, { "epoch": 2.802011142818318, "grad_norm": 1.024101972579956, "learning_rate": 0.0006497486071477104, "loss": 3.5534, "step": 41240 }, { "epoch": 2.8023508628889795, "grad_norm": 0.8846065402030945, "learning_rate": 0.0006497061421388776, "loss": 3.3963, "step": 41245 }, { "epoch": 2.802690582959641, "grad_norm": 0.8324682116508484, "learning_rate": 0.0006496636771300448, "loss": 3.4444, "step": 41250 }, { "epoch": 2.8030303030303028, "grad_norm": 0.888038694858551, "learning_rate": 0.0006496212121212122, "loss": 3.5378, "step": 41255 }, { "epoch": 2.803370023100965, "grad_norm": 1.1987608671188354, "learning_rate": 0.0006495787471123794, "loss": 3.9997, "step": 41260 }, { "epoch": 2.8037097431716265, "grad_norm": 0.7669888138771057, "learning_rate": 0.0006495362821035466, "loss": 3.509, "step": 41265 }, { "epoch": 2.804049463242288, "grad_norm": 0.9518877863883972, "learning_rate": 0.000649493817094714, "loss": 3.3775, "step": 41270 }, { "epoch": 2.80438918331295, "grad_norm": 2.1852855682373047, "learning_rate": 0.0006494513520858813, "loss": 3.5079, "step": 41275 }, { "epoch": 2.804728903383612, "grad_norm": 0.8351412415504456, "learning_rate": 0.0006494088870770485, "loss": 3.6044, "step": 41280 }, { "epoch": 2.8050686234542734, "grad_norm": 0.8314439058303833, "learning_rate": 0.0006493664220682158, "loss": 3.4677, "step": 41285 }, { "epoch": 2.8054083435249355, "grad_norm": 0.9713733792304993, "learning_rate": 0.0006493239570593831, "loss": 3.6681, "step": 41290 }, { "epoch": 2.805748063595597, "grad_norm": 1.1723856925964355, "learning_rate": 0.0006492814920505503, "loss": 3.1873, "step": 41295 }, { "epoch": 2.8060877836662588, "grad_norm": 0.8414456844329834, "learning_rate": 0.0006492390270417176, "loss": 3.6145, "step": 41300 }, { "epoch": 2.806427503736921, "grad_norm": 0.7658439874649048, "learning_rate": 0.0006491965620328849, "loss": 3.6697, "step": 41305 }, { "epoch": 2.8067672238075825, "grad_norm": 0.8715805411338806, "learning_rate": 0.0006491540970240522, "loss": 3.4449, "step": 41310 }, { "epoch": 2.807106943878244, "grad_norm": 0.8142425417900085, "learning_rate": 0.0006491116320152195, "loss": 3.4652, "step": 41315 }, { "epoch": 2.807446663948906, "grad_norm": 0.9371588230133057, "learning_rate": 0.0006490691670063867, "loss": 3.4198, "step": 41320 }, { "epoch": 2.807786384019568, "grad_norm": 0.7339078187942505, "learning_rate": 0.000649026701997554, "loss": 3.6215, "step": 41325 }, { "epoch": 2.8081261040902294, "grad_norm": 0.8386957049369812, "learning_rate": 0.0006489842369887213, "loss": 3.4108, "step": 41330 }, { "epoch": 2.8084658241608915, "grad_norm": 1.1156625747680664, "learning_rate": 0.0006489417719798885, "loss": 3.5202, "step": 41335 }, { "epoch": 2.808805544231553, "grad_norm": 0.8288564682006836, "learning_rate": 0.0006488993069710558, "loss": 3.5625, "step": 41340 }, { "epoch": 2.809145264302215, "grad_norm": 0.7618797421455383, "learning_rate": 0.0006488568419622232, "loss": 3.6131, "step": 41345 }, { "epoch": 2.809484984372877, "grad_norm": 1.0272284746170044, "learning_rate": 0.0006488143769533904, "loss": 3.3034, "step": 41350 }, { "epoch": 2.8098247044435385, "grad_norm": 0.7986567616462708, "learning_rate": 0.0006487719119445577, "loss": 3.6474, "step": 41355 }, { "epoch": 2.8101644245142, "grad_norm": 0.9640021920204163, "learning_rate": 0.000648729446935725, "loss": 3.4959, "step": 41360 }, { "epoch": 2.810504144584862, "grad_norm": 1.209046721458435, "learning_rate": 0.0006486869819268922, "loss": 3.512, "step": 41365 }, { "epoch": 2.810843864655524, "grad_norm": 0.9461942315101624, "learning_rate": 0.0006486445169180594, "loss": 3.403, "step": 41370 }, { "epoch": 2.8111835847261855, "grad_norm": 0.9588611125946045, "learning_rate": 0.0006486020519092269, "loss": 3.495, "step": 41375 }, { "epoch": 2.8115233047968475, "grad_norm": 0.7493300437927246, "learning_rate": 0.0006485595869003941, "loss": 3.6506, "step": 41380 }, { "epoch": 2.811863024867509, "grad_norm": 0.8535450100898743, "learning_rate": 0.0006485171218915613, "loss": 3.6834, "step": 41385 }, { "epoch": 2.812202744938171, "grad_norm": 0.8242064118385315, "learning_rate": 0.0006484746568827287, "loss": 3.7053, "step": 41390 }, { "epoch": 2.812542465008833, "grad_norm": 0.8100264668464661, "learning_rate": 0.0006484321918738959, "loss": 3.6562, "step": 41395 }, { "epoch": 2.8128821850794945, "grad_norm": 0.859417200088501, "learning_rate": 0.0006483897268650632, "loss": 3.6942, "step": 41400 }, { "epoch": 2.813221905150156, "grad_norm": 0.8369675278663635, "learning_rate": 0.0006483472618562305, "loss": 3.4838, "step": 41405 }, { "epoch": 2.813561625220818, "grad_norm": 0.7112186551094055, "learning_rate": 0.0006483047968473978, "loss": 3.6894, "step": 41410 }, { "epoch": 2.81390134529148, "grad_norm": 0.9024016261100769, "learning_rate": 0.0006482623318385651, "loss": 3.6715, "step": 41415 }, { "epoch": 2.8142410653621415, "grad_norm": 1.2939252853393555, "learning_rate": 0.0006482198668297324, "loss": 3.8274, "step": 41420 }, { "epoch": 2.8145807854328035, "grad_norm": 1.1902871131896973, "learning_rate": 0.0006481774018208996, "loss": 3.4898, "step": 41425 }, { "epoch": 2.814920505503465, "grad_norm": 1.1284643411636353, "learning_rate": 0.0006481349368120669, "loss": 3.4472, "step": 41430 }, { "epoch": 2.815260225574127, "grad_norm": 0.8643841743469238, "learning_rate": 0.0006480924718032341, "loss": 3.4466, "step": 41435 }, { "epoch": 2.815599945644789, "grad_norm": 0.9510395526885986, "learning_rate": 0.0006480500067944014, "loss": 3.5191, "step": 41440 }, { "epoch": 2.8159396657154505, "grad_norm": 0.954417884349823, "learning_rate": 0.0006480075417855688, "loss": 3.6143, "step": 41445 }, { "epoch": 2.816279385786112, "grad_norm": 0.8017139434814453, "learning_rate": 0.000647965076776736, "loss": 3.8417, "step": 41450 }, { "epoch": 2.816619105856774, "grad_norm": 0.8395540714263916, "learning_rate": 0.0006479226117679033, "loss": 3.5647, "step": 41455 }, { "epoch": 2.816958825927436, "grad_norm": 0.9345386028289795, "learning_rate": 0.0006478801467590706, "loss": 3.7737, "step": 41460 }, { "epoch": 2.8172985459980975, "grad_norm": 0.8753471374511719, "learning_rate": 0.0006478376817502378, "loss": 3.4784, "step": 41465 }, { "epoch": 2.8176382660687596, "grad_norm": 0.722888171672821, "learning_rate": 0.000647795216741405, "loss": 3.5069, "step": 41470 }, { "epoch": 2.817977986139421, "grad_norm": 0.6951571106910706, "learning_rate": 0.0006477527517325724, "loss": 3.7842, "step": 41475 }, { "epoch": 2.818317706210083, "grad_norm": 0.6592711806297302, "learning_rate": 0.0006477102867237397, "loss": 3.604, "step": 41480 }, { "epoch": 2.818657426280745, "grad_norm": 0.8487776517868042, "learning_rate": 0.0006476678217149069, "loss": 3.7043, "step": 41485 }, { "epoch": 2.8189971463514065, "grad_norm": 0.9052252173423767, "learning_rate": 0.0006476253567060743, "loss": 3.5509, "step": 41490 }, { "epoch": 2.819336866422068, "grad_norm": 0.835721492767334, "learning_rate": 0.0006475828916972415, "loss": 3.6987, "step": 41495 }, { "epoch": 2.8196765864927302, "grad_norm": 1.0919418334960938, "learning_rate": 0.0006475404266884087, "loss": 3.4681, "step": 41500 }, { "epoch": 2.820016306563392, "grad_norm": 0.8099721074104309, "learning_rate": 0.0006474979616795761, "loss": 3.65, "step": 41505 }, { "epoch": 2.8203560266340535, "grad_norm": 0.8060786724090576, "learning_rate": 0.0006474554966707433, "loss": 3.6157, "step": 41510 }, { "epoch": 2.8206957467047156, "grad_norm": 0.8671443462371826, "learning_rate": 0.0006474130316619106, "loss": 3.5806, "step": 41515 }, { "epoch": 2.821035466775377, "grad_norm": 0.8679184317588806, "learning_rate": 0.000647370566653078, "loss": 3.6527, "step": 41520 }, { "epoch": 2.821375186846039, "grad_norm": 0.7229198217391968, "learning_rate": 0.0006473281016442452, "loss": 3.5314, "step": 41525 }, { "epoch": 2.821714906916701, "grad_norm": 0.6718996167182922, "learning_rate": 0.0006472856366354124, "loss": 3.6724, "step": 41530 }, { "epoch": 2.8220546269873625, "grad_norm": 0.641571581363678, "learning_rate": 0.0006472431716265797, "loss": 3.697, "step": 41535 }, { "epoch": 2.822394347058024, "grad_norm": 0.9195499420166016, "learning_rate": 0.000647200706617747, "loss": 3.489, "step": 41540 }, { "epoch": 2.822734067128686, "grad_norm": 0.6031072735786438, "learning_rate": 0.0006471582416089142, "loss": 3.3711, "step": 41545 }, { "epoch": 2.823073787199348, "grad_norm": 0.7756946086883545, "learning_rate": 0.0006471157766000816, "loss": 3.5265, "step": 41550 }, { "epoch": 2.8234135072700095, "grad_norm": 0.9457759857177734, "learning_rate": 0.0006470733115912489, "loss": 3.6576, "step": 41555 }, { "epoch": 2.823753227340671, "grad_norm": 0.7569836378097534, "learning_rate": 0.0006470308465824161, "loss": 3.6099, "step": 41560 }, { "epoch": 2.824092947411333, "grad_norm": 0.9484497308731079, "learning_rate": 0.0006469883815735834, "loss": 3.9439, "step": 41565 }, { "epoch": 2.824432667481995, "grad_norm": 0.9010297060012817, "learning_rate": 0.0006469459165647506, "loss": 3.4099, "step": 41570 }, { "epoch": 2.8247723875526565, "grad_norm": 1.157332181930542, "learning_rate": 0.0006469034515559179, "loss": 3.58, "step": 41575 }, { "epoch": 2.8251121076233185, "grad_norm": 0.6770252585411072, "learning_rate": 0.0006468609865470852, "loss": 3.5915, "step": 41580 }, { "epoch": 2.82545182769398, "grad_norm": 1.2056981325149536, "learning_rate": 0.0006468185215382525, "loss": 3.4417, "step": 41585 }, { "epoch": 2.825791547764642, "grad_norm": 1.1649360656738281, "learning_rate": 0.0006467760565294198, "loss": 3.7079, "step": 41590 }, { "epoch": 2.8261312678353034, "grad_norm": 0.8605906963348389, "learning_rate": 0.0006467335915205871, "loss": 3.4819, "step": 41595 }, { "epoch": 2.8264709879059655, "grad_norm": 0.8827203512191772, "learning_rate": 0.0006466911265117543, "loss": 3.7741, "step": 41600 }, { "epoch": 2.826810707976627, "grad_norm": 0.7515257596969604, "learning_rate": 0.0006466486615029216, "loss": 3.6435, "step": 41605 }, { "epoch": 2.8271504280472888, "grad_norm": 0.7814648151397705, "learning_rate": 0.0006466061964940889, "loss": 3.5114, "step": 41610 }, { "epoch": 2.827490148117951, "grad_norm": 0.7758761644363403, "learning_rate": 0.0006465637314852561, "loss": 3.618, "step": 41615 }, { "epoch": 2.8278298681886125, "grad_norm": 0.7390299439430237, "learning_rate": 0.0006465212664764234, "loss": 3.5086, "step": 41620 }, { "epoch": 2.828169588259274, "grad_norm": 0.9217628836631775, "learning_rate": 0.0006464788014675908, "loss": 3.3841, "step": 41625 }, { "epoch": 2.828509308329936, "grad_norm": 0.7836593389511108, "learning_rate": 0.000646436336458758, "loss": 3.7769, "step": 41630 }, { "epoch": 2.828849028400598, "grad_norm": 0.7080562710762024, "learning_rate": 0.0006463938714499252, "loss": 3.4178, "step": 41635 }, { "epoch": 2.8291887484712595, "grad_norm": 0.7039355039596558, "learning_rate": 0.0006463514064410926, "loss": 3.563, "step": 41640 }, { "epoch": 2.8295284685419215, "grad_norm": 0.8547645211219788, "learning_rate": 0.0006463089414322598, "loss": 3.6418, "step": 41645 }, { "epoch": 2.829868188612583, "grad_norm": 0.9048673510551453, "learning_rate": 0.000646266476423427, "loss": 3.676, "step": 41650 }, { "epoch": 2.830207908683245, "grad_norm": 0.9365345239639282, "learning_rate": 0.0006462240114145945, "loss": 3.5881, "step": 41655 }, { "epoch": 2.830547628753907, "grad_norm": 0.939250111579895, "learning_rate": 0.0006461815464057617, "loss": 3.4857, "step": 41660 }, { "epoch": 2.8308873488245685, "grad_norm": 0.9623307585716248, "learning_rate": 0.0006461390813969289, "loss": 3.3108, "step": 41665 }, { "epoch": 2.83122706889523, "grad_norm": 1.0015333890914917, "learning_rate": 0.0006460966163880962, "loss": 3.7509, "step": 41670 }, { "epoch": 2.831566788965892, "grad_norm": 0.9369686841964722, "learning_rate": 0.0006460541513792635, "loss": 3.5462, "step": 41675 }, { "epoch": 2.831906509036554, "grad_norm": 0.8528543710708618, "learning_rate": 0.0006460116863704307, "loss": 3.6148, "step": 41680 }, { "epoch": 2.8322462291072155, "grad_norm": 0.8709750771522522, "learning_rate": 0.000645969221361598, "loss": 3.6066, "step": 41685 }, { "epoch": 2.8325859491778775, "grad_norm": 0.7227299213409424, "learning_rate": 0.0006459267563527654, "loss": 3.6639, "step": 41690 }, { "epoch": 2.832925669248539, "grad_norm": 0.9132999181747437, "learning_rate": 0.0006458842913439326, "loss": 3.7505, "step": 41695 }, { "epoch": 2.833265389319201, "grad_norm": 1.0032469034194946, "learning_rate": 0.0006458418263350999, "loss": 3.5422, "step": 41700 }, { "epoch": 2.833605109389863, "grad_norm": 0.7329068779945374, "learning_rate": 0.0006457993613262672, "loss": 3.6211, "step": 41705 }, { "epoch": 2.8339448294605245, "grad_norm": 1.0247801542282104, "learning_rate": 0.0006457568963174344, "loss": 3.6968, "step": 41710 }, { "epoch": 2.834284549531186, "grad_norm": 0.9737585186958313, "learning_rate": 0.0006457144313086017, "loss": 3.4537, "step": 41715 }, { "epoch": 2.834624269601848, "grad_norm": 0.7124726176261902, "learning_rate": 0.0006456719662997689, "loss": 3.5022, "step": 41720 }, { "epoch": 2.83496398967251, "grad_norm": 0.7406711578369141, "learning_rate": 0.0006456295012909363, "loss": 3.6671, "step": 41725 }, { "epoch": 2.8353037097431715, "grad_norm": 0.8273912668228149, "learning_rate": 0.0006455870362821036, "loss": 3.7653, "step": 41730 }, { "epoch": 2.8356434298138335, "grad_norm": 1.0623247623443604, "learning_rate": 0.0006455445712732708, "loss": 3.3684, "step": 41735 }, { "epoch": 2.835983149884495, "grad_norm": 0.6887548565864563, "learning_rate": 0.0006455021062644382, "loss": 3.5987, "step": 41740 }, { "epoch": 2.836322869955157, "grad_norm": 0.9533951282501221, "learning_rate": 0.0006454596412556054, "loss": 3.7599, "step": 41745 }, { "epoch": 2.836662590025819, "grad_norm": 0.8202416300773621, "learning_rate": 0.0006454171762467726, "loss": 3.5961, "step": 41750 }, { "epoch": 2.8370023100964805, "grad_norm": 0.7496135830879211, "learning_rate": 0.00064537471123794, "loss": 3.4107, "step": 41755 }, { "epoch": 2.837342030167142, "grad_norm": 1.181650161743164, "learning_rate": 0.0006453322462291073, "loss": 3.8101, "step": 41760 }, { "epoch": 2.8376817502378042, "grad_norm": 0.7017676830291748, "learning_rate": 0.0006452897812202745, "loss": 3.5105, "step": 41765 }, { "epoch": 2.838021470308466, "grad_norm": 1.0137461423873901, "learning_rate": 0.0006452473162114418, "loss": 3.7202, "step": 41770 }, { "epoch": 2.8383611903791275, "grad_norm": 0.8122454285621643, "learning_rate": 0.0006452048512026091, "loss": 3.6935, "step": 41775 }, { "epoch": 2.8387009104497896, "grad_norm": 0.7557841539382935, "learning_rate": 0.0006451623861937763, "loss": 3.6296, "step": 41780 }, { "epoch": 2.839040630520451, "grad_norm": 0.7402377128601074, "learning_rate": 0.0006451199211849436, "loss": 3.708, "step": 41785 }, { "epoch": 2.839380350591113, "grad_norm": 0.7256947159767151, "learning_rate": 0.0006450774561761109, "loss": 3.5295, "step": 41790 }, { "epoch": 2.839720070661775, "grad_norm": 0.8619740605354309, "learning_rate": 0.0006450349911672782, "loss": 3.6633, "step": 41795 }, { "epoch": 2.8400597907324365, "grad_norm": 0.8553181290626526, "learning_rate": 0.0006449925261584455, "loss": 3.2943, "step": 41800 }, { "epoch": 2.840399510803098, "grad_norm": 0.8918444514274597, "learning_rate": 0.0006449500611496128, "loss": 3.7773, "step": 41805 }, { "epoch": 2.8407392308737602, "grad_norm": 0.9942034482955933, "learning_rate": 0.00064490759614078, "loss": 3.7759, "step": 41810 }, { "epoch": 2.841078950944422, "grad_norm": 0.674082338809967, "learning_rate": 0.0006448651311319473, "loss": 3.7917, "step": 41815 }, { "epoch": 2.8414186710150835, "grad_norm": 0.8962203860282898, "learning_rate": 0.0006448226661231145, "loss": 3.2653, "step": 41820 }, { "epoch": 2.8417583910857456, "grad_norm": 0.881939172744751, "learning_rate": 0.0006447802011142818, "loss": 3.5808, "step": 41825 }, { "epoch": 2.842098111156407, "grad_norm": 0.8823063969612122, "learning_rate": 0.0006447377361054492, "loss": 3.3298, "step": 41830 }, { "epoch": 2.842437831227069, "grad_norm": 0.7699676752090454, "learning_rate": 0.0006446952710966164, "loss": 3.4607, "step": 41835 }, { "epoch": 2.842777551297731, "grad_norm": 0.8576290011405945, "learning_rate": 0.0006446528060877837, "loss": 3.6437, "step": 41840 }, { "epoch": 2.8431172713683925, "grad_norm": 0.9803963899612427, "learning_rate": 0.000644610341078951, "loss": 3.7071, "step": 41845 }, { "epoch": 2.843456991439054, "grad_norm": 1.2538588047027588, "learning_rate": 0.0006445678760701182, "loss": 3.378, "step": 41850 }, { "epoch": 2.8437967115097162, "grad_norm": 0.9848114252090454, "learning_rate": 0.0006445254110612854, "loss": 3.6173, "step": 41855 }, { "epoch": 2.844136431580378, "grad_norm": 0.8595824837684631, "learning_rate": 0.0006444829460524528, "loss": 3.1623, "step": 41860 }, { "epoch": 2.8444761516510395, "grad_norm": 0.8997084498405457, "learning_rate": 0.0006444404810436201, "loss": 3.335, "step": 41865 }, { "epoch": 2.8448158717217016, "grad_norm": 0.9591615796089172, "learning_rate": 0.0006443980160347873, "loss": 3.5138, "step": 41870 }, { "epoch": 2.845155591792363, "grad_norm": 0.8541892766952515, "learning_rate": 0.0006443555510259547, "loss": 3.4273, "step": 41875 }, { "epoch": 2.845495311863025, "grad_norm": 0.8266332745552063, "learning_rate": 0.0006443130860171219, "loss": 3.384, "step": 41880 }, { "epoch": 2.8458350319336865, "grad_norm": 0.8416678309440613, "learning_rate": 0.0006442706210082891, "loss": 3.4545, "step": 41885 }, { "epoch": 2.8461747520043486, "grad_norm": 0.8054022192955017, "learning_rate": 0.0006442281559994565, "loss": 3.6852, "step": 41890 }, { "epoch": 2.84651447207501, "grad_norm": 1.0356812477111816, "learning_rate": 0.0006441856909906237, "loss": 3.6916, "step": 41895 }, { "epoch": 2.846854192145672, "grad_norm": 1.0687991380691528, "learning_rate": 0.000644143225981791, "loss": 3.5356, "step": 41900 }, { "epoch": 2.847193912216334, "grad_norm": 0.7813867926597595, "learning_rate": 0.0006441007609729584, "loss": 3.5088, "step": 41905 }, { "epoch": 2.8475336322869955, "grad_norm": 1.1813397407531738, "learning_rate": 0.0006440582959641256, "loss": 3.5169, "step": 41910 }, { "epoch": 2.847873352357657, "grad_norm": 0.7769020199775696, "learning_rate": 0.0006440158309552928, "loss": 3.7573, "step": 41915 }, { "epoch": 2.8482130724283192, "grad_norm": 0.8390051126480103, "learning_rate": 0.0006439733659464601, "loss": 3.8694, "step": 41920 }, { "epoch": 2.848552792498981, "grad_norm": 0.8402785062789917, "learning_rate": 0.0006439309009376274, "loss": 3.5078, "step": 41925 }, { "epoch": 2.8488925125696425, "grad_norm": 1.0050921440124512, "learning_rate": 0.0006438884359287946, "loss": 3.6184, "step": 41930 }, { "epoch": 2.849232232640304, "grad_norm": 0.9718076586723328, "learning_rate": 0.000643845970919962, "loss": 3.4694, "step": 41935 }, { "epoch": 2.849571952710966, "grad_norm": 1.109322428703308, "learning_rate": 0.0006438035059111293, "loss": 3.4884, "step": 41940 }, { "epoch": 2.849911672781628, "grad_norm": 0.7805372476577759, "learning_rate": 0.0006437610409022965, "loss": 3.4998, "step": 41945 }, { "epoch": 2.8502513928522895, "grad_norm": 1.2811968326568604, "learning_rate": 0.0006437185758934638, "loss": 3.4424, "step": 41950 }, { "epoch": 2.8505911129229515, "grad_norm": 0.7912372946739197, "learning_rate": 0.000643676110884631, "loss": 3.5649, "step": 41955 }, { "epoch": 2.850930832993613, "grad_norm": 0.7288410663604736, "learning_rate": 0.0006436336458757983, "loss": 3.6666, "step": 41960 }, { "epoch": 2.851270553064275, "grad_norm": 0.8947926759719849, "learning_rate": 0.0006435911808669657, "loss": 3.4914, "step": 41965 }, { "epoch": 2.851610273134937, "grad_norm": 0.6313999891281128, "learning_rate": 0.0006435487158581329, "loss": 3.6639, "step": 41970 }, { "epoch": 2.8519499932055985, "grad_norm": 0.8949326872825623, "learning_rate": 0.0006435062508493002, "loss": 3.6489, "step": 41975 }, { "epoch": 2.85228971327626, "grad_norm": 0.7926784753799438, "learning_rate": 0.0006434637858404675, "loss": 3.3057, "step": 41980 }, { "epoch": 2.852629433346922, "grad_norm": 0.9715356826782227, "learning_rate": 0.0006434213208316347, "loss": 3.7546, "step": 41985 }, { "epoch": 2.852969153417584, "grad_norm": 1.4090635776519775, "learning_rate": 0.000643378855822802, "loss": 3.4772, "step": 41990 }, { "epoch": 2.8533088734882455, "grad_norm": 0.9378007650375366, "learning_rate": 0.0006433363908139693, "loss": 3.6404, "step": 41995 }, { "epoch": 2.8536485935589075, "grad_norm": 0.6559866666793823, "learning_rate": 0.0006432939258051366, "loss": 3.725, "step": 42000 }, { "epoch": 2.853988313629569, "grad_norm": 0.685090959072113, "learning_rate": 0.0006432514607963038, "loss": 3.6177, "step": 42005 }, { "epoch": 2.854328033700231, "grad_norm": 1.0530961751937866, "learning_rate": 0.0006432089957874712, "loss": 3.2503, "step": 42010 }, { "epoch": 2.854667753770893, "grad_norm": 0.7915587425231934, "learning_rate": 0.0006431665307786384, "loss": 3.5532, "step": 42015 }, { "epoch": 2.8550074738415545, "grad_norm": 0.8528828620910645, "learning_rate": 0.0006431240657698056, "loss": 3.6983, "step": 42020 }, { "epoch": 2.855347193912216, "grad_norm": 0.8784139156341553, "learning_rate": 0.000643081600760973, "loss": 3.844, "step": 42025 }, { "epoch": 2.855686913982878, "grad_norm": 0.8080273270606995, "learning_rate": 0.0006430391357521402, "loss": 3.3913, "step": 42030 }, { "epoch": 2.85602663405354, "grad_norm": 0.969186544418335, "learning_rate": 0.0006429966707433075, "loss": 3.2613, "step": 42035 }, { "epoch": 2.8563663541242015, "grad_norm": 0.8907771706581116, "learning_rate": 0.0006429542057344749, "loss": 3.5874, "step": 42040 }, { "epoch": 2.8567060741948636, "grad_norm": 0.9256178736686707, "learning_rate": 0.0006429117407256421, "loss": 3.7473, "step": 42045 }, { "epoch": 2.857045794265525, "grad_norm": 0.8172616362571716, "learning_rate": 0.0006428692757168093, "loss": 3.4484, "step": 42050 }, { "epoch": 2.857385514336187, "grad_norm": 0.8367016315460205, "learning_rate": 0.0006428268107079766, "loss": 3.6257, "step": 42055 }, { "epoch": 2.857725234406849, "grad_norm": 0.7967295050621033, "learning_rate": 0.0006427843456991439, "loss": 3.4888, "step": 42060 }, { "epoch": 2.8580649544775105, "grad_norm": 0.8925996422767639, "learning_rate": 0.0006427418806903111, "loss": 3.6407, "step": 42065 }, { "epoch": 2.858404674548172, "grad_norm": 0.8647528290748596, "learning_rate": 0.0006426994156814785, "loss": 3.5231, "step": 42070 }, { "epoch": 2.8587443946188342, "grad_norm": 0.9978330731391907, "learning_rate": 0.0006426569506726458, "loss": 3.5665, "step": 42075 }, { "epoch": 2.859084114689496, "grad_norm": 0.9879549741744995, "learning_rate": 0.0006426144856638131, "loss": 3.5062, "step": 42080 }, { "epoch": 2.8594238347601575, "grad_norm": 0.7880136966705322, "learning_rate": 0.0006425720206549803, "loss": 3.4226, "step": 42085 }, { "epoch": 2.8597635548308196, "grad_norm": 0.7803234457969666, "learning_rate": 0.0006425295556461476, "loss": 3.6137, "step": 42090 }, { "epoch": 2.860103274901481, "grad_norm": 1.073130488395691, "learning_rate": 0.0006424870906373149, "loss": 3.5561, "step": 42095 }, { "epoch": 2.860442994972143, "grad_norm": 0.9077723622322083, "learning_rate": 0.0006424446256284821, "loss": 3.663, "step": 42100 }, { "epoch": 2.860782715042805, "grad_norm": 0.6709428429603577, "learning_rate": 0.0006424021606196495, "loss": 3.2919, "step": 42105 }, { "epoch": 2.8611224351134665, "grad_norm": 0.9544896483421326, "learning_rate": 0.0006423596956108168, "loss": 3.8351, "step": 42110 }, { "epoch": 2.861462155184128, "grad_norm": 0.9154482483863831, "learning_rate": 0.000642317230601984, "loss": 3.4774, "step": 42115 }, { "epoch": 2.8618018752547902, "grad_norm": 0.8827584385871887, "learning_rate": 0.0006422747655931512, "loss": 3.6207, "step": 42120 }, { "epoch": 2.862141595325452, "grad_norm": 0.8696451187133789, "learning_rate": 0.0006422323005843186, "loss": 3.6162, "step": 42125 }, { "epoch": 2.8624813153961135, "grad_norm": 0.9808441400527954, "learning_rate": 0.0006421898355754858, "loss": 3.5638, "step": 42130 }, { "epoch": 2.8628210354667756, "grad_norm": 1.0594931840896606, "learning_rate": 0.000642147370566653, "loss": 3.5955, "step": 42135 }, { "epoch": 2.863160755537437, "grad_norm": 0.8295979499816895, "learning_rate": 0.0006421049055578205, "loss": 3.2135, "step": 42140 }, { "epoch": 2.863500475608099, "grad_norm": 1.2168315649032593, "learning_rate": 0.0006420624405489877, "loss": 3.5142, "step": 42145 }, { "epoch": 2.863840195678761, "grad_norm": 0.9181390404701233, "learning_rate": 0.0006420199755401549, "loss": 3.4905, "step": 42150 }, { "epoch": 2.8641799157494225, "grad_norm": 0.8940221667289734, "learning_rate": 0.0006419775105313223, "loss": 3.6417, "step": 42155 }, { "epoch": 2.864519635820084, "grad_norm": 0.6922504901885986, "learning_rate": 0.0006419350455224895, "loss": 3.4489, "step": 42160 }, { "epoch": 2.8648593558907463, "grad_norm": 0.7633323073387146, "learning_rate": 0.0006418925805136567, "loss": 3.3769, "step": 42165 }, { "epoch": 2.865199075961408, "grad_norm": 0.910102367401123, "learning_rate": 0.000641850115504824, "loss": 3.5851, "step": 42170 }, { "epoch": 2.8655387960320695, "grad_norm": 0.8751612305641174, "learning_rate": 0.0006418076504959914, "loss": 3.5135, "step": 42175 }, { "epoch": 2.8658785161027316, "grad_norm": 0.8148460984230042, "learning_rate": 0.0006417651854871586, "loss": 3.6535, "step": 42180 }, { "epoch": 2.866218236173393, "grad_norm": 0.8185792565345764, "learning_rate": 0.0006417227204783259, "loss": 3.2767, "step": 42185 }, { "epoch": 2.866557956244055, "grad_norm": 0.7995314598083496, "learning_rate": 0.0006416802554694932, "loss": 3.547, "step": 42190 }, { "epoch": 2.866897676314717, "grad_norm": 1.3204153776168823, "learning_rate": 0.0006416377904606604, "loss": 3.5845, "step": 42195 }, { "epoch": 2.8672373963853786, "grad_norm": 6.705195903778076, "learning_rate": 0.0006415953254518277, "loss": 3.5688, "step": 42200 }, { "epoch": 2.86757711645604, "grad_norm": 0.9194809198379517, "learning_rate": 0.0006415528604429949, "loss": 3.4978, "step": 42205 }, { "epoch": 2.8679168365267023, "grad_norm": 1.4958008527755737, "learning_rate": 0.0006415103954341623, "loss": 3.6392, "step": 42210 }, { "epoch": 2.868256556597364, "grad_norm": 0.7360958456993103, "learning_rate": 0.0006414679304253296, "loss": 3.5015, "step": 42215 }, { "epoch": 2.8685962766680255, "grad_norm": 0.9407960772514343, "learning_rate": 0.0006414254654164968, "loss": 3.2406, "step": 42220 }, { "epoch": 2.868935996738687, "grad_norm": 0.8606539964675903, "learning_rate": 0.0006413830004076641, "loss": 3.7269, "step": 42225 }, { "epoch": 2.8692757168093492, "grad_norm": 1.269915223121643, "learning_rate": 0.0006413405353988314, "loss": 3.3868, "step": 42230 }, { "epoch": 2.869615436880011, "grad_norm": 0.8137083053588867, "learning_rate": 0.0006412980703899986, "loss": 3.5869, "step": 42235 }, { "epoch": 2.8699551569506725, "grad_norm": 0.8669810891151428, "learning_rate": 0.0006412556053811658, "loss": 3.6903, "step": 42240 }, { "epoch": 2.8702948770213346, "grad_norm": 0.9795706272125244, "learning_rate": 0.0006412131403723333, "loss": 3.6634, "step": 42245 }, { "epoch": 2.870634597091996, "grad_norm": 1.6822649240493774, "learning_rate": 0.0006411706753635005, "loss": 3.5108, "step": 42250 }, { "epoch": 2.870974317162658, "grad_norm": 0.968251645565033, "learning_rate": 0.0006411282103546677, "loss": 3.6763, "step": 42255 }, { "epoch": 2.87131403723332, "grad_norm": 0.6832924485206604, "learning_rate": 0.0006410857453458351, "loss": 3.4391, "step": 42260 }, { "epoch": 2.8716537573039815, "grad_norm": 0.8875095844268799, "learning_rate": 0.0006410432803370023, "loss": 3.4593, "step": 42265 }, { "epoch": 2.871993477374643, "grad_norm": 1.0385401248931885, "learning_rate": 0.0006410008153281695, "loss": 3.4715, "step": 42270 }, { "epoch": 2.872333197445305, "grad_norm": 0.9741016626358032, "learning_rate": 0.0006409583503193369, "loss": 3.7553, "step": 42275 }, { "epoch": 2.872672917515967, "grad_norm": 0.7240934371948242, "learning_rate": 0.0006409158853105042, "loss": 3.6119, "step": 42280 }, { "epoch": 2.8730126375866285, "grad_norm": 0.9612627029418945, "learning_rate": 0.0006408734203016714, "loss": 3.4585, "step": 42285 }, { "epoch": 2.87335235765729, "grad_norm": 1.1005984544754028, "learning_rate": 0.0006408309552928388, "loss": 3.4789, "step": 42290 }, { "epoch": 2.873692077727952, "grad_norm": 0.8549172878265381, "learning_rate": 0.000640788490284006, "loss": 3.5615, "step": 42295 }, { "epoch": 2.874031797798614, "grad_norm": 0.7889883518218994, "learning_rate": 0.0006407460252751732, "loss": 3.7247, "step": 42300 }, { "epoch": 2.8743715178692755, "grad_norm": 0.8491808176040649, "learning_rate": 0.0006407035602663405, "loss": 3.5565, "step": 42305 }, { "epoch": 2.8747112379399375, "grad_norm": 1.0010915994644165, "learning_rate": 0.0006406610952575078, "loss": 3.548, "step": 42310 }, { "epoch": 2.875050958010599, "grad_norm": 0.90715491771698, "learning_rate": 0.0006406186302486751, "loss": 3.585, "step": 42315 }, { "epoch": 2.875390678081261, "grad_norm": 0.8811260461807251, "learning_rate": 0.0006405761652398424, "loss": 3.5687, "step": 42320 }, { "epoch": 2.875730398151923, "grad_norm": 0.8841518759727478, "learning_rate": 0.0006405337002310097, "loss": 3.5043, "step": 42325 }, { "epoch": 2.8760701182225845, "grad_norm": 0.9076136946678162, "learning_rate": 0.0006404912352221769, "loss": 3.5579, "step": 42330 }, { "epoch": 2.876409838293246, "grad_norm": 1.0862607955932617, "learning_rate": 0.0006404487702133442, "loss": 3.4655, "step": 42335 }, { "epoch": 2.876749558363908, "grad_norm": 0.7681044340133667, "learning_rate": 0.0006404063052045115, "loss": 3.6666, "step": 42340 }, { "epoch": 2.87708927843457, "grad_norm": 1.1122334003448486, "learning_rate": 0.0006403638401956787, "loss": 3.4034, "step": 42345 }, { "epoch": 2.8774289985052315, "grad_norm": 0.76408851146698, "learning_rate": 0.0006403213751868461, "loss": 3.7245, "step": 42350 }, { "epoch": 2.8777687185758936, "grad_norm": 0.800439178943634, "learning_rate": 0.0006402789101780133, "loss": 3.5469, "step": 42355 }, { "epoch": 2.878108438646555, "grad_norm": 0.7807614803314209, "learning_rate": 0.0006402364451691806, "loss": 3.6776, "step": 42360 }, { "epoch": 2.878448158717217, "grad_norm": 0.7730633020401001, "learning_rate": 0.0006401939801603479, "loss": 3.5172, "step": 42365 }, { "epoch": 2.878787878787879, "grad_norm": 0.946793258190155, "learning_rate": 0.0006401515151515151, "loss": 3.6086, "step": 42370 }, { "epoch": 2.8791275988585405, "grad_norm": 0.8278414607048035, "learning_rate": 0.0006401090501426824, "loss": 3.8165, "step": 42375 }, { "epoch": 2.879467318929202, "grad_norm": 0.8552778959274292, "learning_rate": 0.0006400665851338497, "loss": 3.6534, "step": 42380 }, { "epoch": 2.8798070389998642, "grad_norm": 0.6798197031021118, "learning_rate": 0.000640024120125017, "loss": 3.4707, "step": 42385 }, { "epoch": 2.880146759070526, "grad_norm": 0.7311868071556091, "learning_rate": 0.0006399816551161843, "loss": 3.504, "step": 42390 }, { "epoch": 2.8804864791411875, "grad_norm": 0.8453450202941895, "learning_rate": 0.0006399391901073516, "loss": 3.5748, "step": 42395 }, { "epoch": 2.8808261992118496, "grad_norm": 0.897329568862915, "learning_rate": 0.0006398967250985188, "loss": 3.7554, "step": 42400 }, { "epoch": 2.881165919282511, "grad_norm": 0.9080669283866882, "learning_rate": 0.000639854260089686, "loss": 3.4393, "step": 42405 }, { "epoch": 2.881505639353173, "grad_norm": 0.892851710319519, "learning_rate": 0.0006398117950808534, "loss": 3.6194, "step": 42410 }, { "epoch": 2.881845359423835, "grad_norm": 0.9177255034446716, "learning_rate": 0.0006397693300720206, "loss": 3.6608, "step": 42415 }, { "epoch": 2.8821850794944965, "grad_norm": 1.098097562789917, "learning_rate": 0.000639726865063188, "loss": 3.827, "step": 42420 }, { "epoch": 2.882524799565158, "grad_norm": 1.0553370714187622, "learning_rate": 0.0006396844000543553, "loss": 3.3986, "step": 42425 }, { "epoch": 2.8828645196358202, "grad_norm": 0.9975004196166992, "learning_rate": 0.0006396419350455225, "loss": 3.6784, "step": 42430 }, { "epoch": 2.883204239706482, "grad_norm": 0.7348812818527222, "learning_rate": 0.0006395994700366898, "loss": 3.5928, "step": 42435 }, { "epoch": 2.8835439597771435, "grad_norm": 0.9415812492370605, "learning_rate": 0.000639557005027857, "loss": 3.5491, "step": 42440 }, { "epoch": 2.8838836798478056, "grad_norm": 0.7492969036102295, "learning_rate": 0.0006395145400190243, "loss": 3.8582, "step": 42445 }, { "epoch": 2.884223399918467, "grad_norm": 0.9638067483901978, "learning_rate": 0.0006394720750101916, "loss": 3.3706, "step": 42450 }, { "epoch": 2.884563119989129, "grad_norm": 0.8698820471763611, "learning_rate": 0.000639429610001359, "loss": 3.5963, "step": 42455 }, { "epoch": 2.884902840059791, "grad_norm": 0.7817463874816895, "learning_rate": 0.0006393871449925262, "loss": 3.6897, "step": 42460 }, { "epoch": 2.8852425601304525, "grad_norm": 0.6357342004776001, "learning_rate": 0.0006393446799836935, "loss": 3.631, "step": 42465 }, { "epoch": 2.885582280201114, "grad_norm": 0.9060226678848267, "learning_rate": 0.0006393022149748607, "loss": 3.4765, "step": 42470 }, { "epoch": 2.8859220002717763, "grad_norm": 1.1503204107284546, "learning_rate": 0.000639259749966028, "loss": 3.7089, "step": 42475 }, { "epoch": 2.886261720342438, "grad_norm": 0.8642039895057678, "learning_rate": 0.0006392172849571953, "loss": 3.7822, "step": 42480 }, { "epoch": 2.8866014404130995, "grad_norm": 0.7597491145133972, "learning_rate": 0.0006391748199483625, "loss": 3.5902, "step": 42485 }, { "epoch": 2.8869411604837616, "grad_norm": 0.7859035134315491, "learning_rate": 0.0006391323549395299, "loss": 3.6041, "step": 42490 }, { "epoch": 2.8872808805544232, "grad_norm": 0.851076066493988, "learning_rate": 0.0006390898899306972, "loss": 3.6319, "step": 42495 }, { "epoch": 2.887620600625085, "grad_norm": 0.8372633457183838, "learning_rate": 0.0006390474249218644, "loss": 3.5286, "step": 42500 }, { "epoch": 2.887960320695747, "grad_norm": 0.796132504940033, "learning_rate": 0.0006390049599130316, "loss": 3.5349, "step": 42505 }, { "epoch": 2.8883000407664086, "grad_norm": 1.2908692359924316, "learning_rate": 0.000638962494904199, "loss": 3.4829, "step": 42510 }, { "epoch": 2.88863976083707, "grad_norm": 0.8849920034408569, "learning_rate": 0.0006389200298953662, "loss": 3.2734, "step": 42515 }, { "epoch": 2.8889794809077323, "grad_norm": 0.9792762398719788, "learning_rate": 0.0006388775648865334, "loss": 3.7267, "step": 42520 }, { "epoch": 2.889319200978394, "grad_norm": 1.0214260816574097, "learning_rate": 0.0006388350998777009, "loss": 3.5204, "step": 42525 }, { "epoch": 2.8896589210490555, "grad_norm": 0.9677252769470215, "learning_rate": 0.0006387926348688681, "loss": 3.5951, "step": 42530 }, { "epoch": 2.8899986411197176, "grad_norm": 0.9316784143447876, "learning_rate": 0.0006387501698600353, "loss": 3.2639, "step": 42535 }, { "epoch": 2.8903383611903792, "grad_norm": 0.988141655921936, "learning_rate": 0.0006387077048512027, "loss": 3.7225, "step": 42540 }, { "epoch": 2.890678081261041, "grad_norm": 0.8708176612854004, "learning_rate": 0.0006386652398423699, "loss": 3.4157, "step": 42545 }, { "epoch": 2.891017801331703, "grad_norm": 1.3555513620376587, "learning_rate": 0.0006386227748335371, "loss": 3.3983, "step": 42550 }, { "epoch": 2.8913575214023646, "grad_norm": 1.010156273841858, "learning_rate": 0.0006385803098247045, "loss": 3.4824, "step": 42555 }, { "epoch": 2.891697241473026, "grad_norm": 0.8730334043502808, "learning_rate": 0.0006385378448158718, "loss": 3.6738, "step": 42560 }, { "epoch": 2.8920369615436883, "grad_norm": 0.9817825555801392, "learning_rate": 0.000638495379807039, "loss": 3.6883, "step": 42565 }, { "epoch": 2.89237668161435, "grad_norm": 0.7821949124336243, "learning_rate": 0.0006384529147982063, "loss": 3.5876, "step": 42570 }, { "epoch": 2.8927164016850115, "grad_norm": 0.9443778991699219, "learning_rate": 0.0006384104497893736, "loss": 3.5109, "step": 42575 }, { "epoch": 2.893056121755673, "grad_norm": 0.8248337507247925, "learning_rate": 0.0006383679847805408, "loss": 3.4168, "step": 42580 }, { "epoch": 2.8933958418263352, "grad_norm": 0.7502477765083313, "learning_rate": 0.0006383255197717081, "loss": 3.5999, "step": 42585 }, { "epoch": 2.893735561896997, "grad_norm": 1.1297409534454346, "learning_rate": 0.0006382830547628755, "loss": 3.5377, "step": 42590 }, { "epoch": 2.8940752819676585, "grad_norm": 1.4060485363006592, "learning_rate": 0.0006382405897540427, "loss": 3.626, "step": 42595 }, { "epoch": 2.8944150020383206, "grad_norm": 0.9989623427391052, "learning_rate": 0.00063819812474521, "loss": 3.6374, "step": 42600 }, { "epoch": 2.894754722108982, "grad_norm": 0.9649537801742554, "learning_rate": 0.0006381556597363772, "loss": 3.2851, "step": 42605 }, { "epoch": 2.895094442179644, "grad_norm": 0.8392823338508606, "learning_rate": 0.0006381131947275445, "loss": 3.4792, "step": 42610 }, { "epoch": 2.8954341622503055, "grad_norm": 0.9658290147781372, "learning_rate": 0.0006380707297187118, "loss": 3.3912, "step": 42615 }, { "epoch": 2.8957738823209676, "grad_norm": 0.8299267292022705, "learning_rate": 0.000638028264709879, "loss": 3.5084, "step": 42620 }, { "epoch": 2.896113602391629, "grad_norm": 0.9467092156410217, "learning_rate": 0.0006379857997010464, "loss": 3.414, "step": 42625 }, { "epoch": 2.896453322462291, "grad_norm": 0.9420976638793945, "learning_rate": 0.0006379433346922137, "loss": 3.5193, "step": 42630 }, { "epoch": 2.896793042532953, "grad_norm": 0.9384837746620178, "learning_rate": 0.0006379008696833809, "loss": 3.6153, "step": 42635 }, { "epoch": 2.8971327626036145, "grad_norm": 0.933294951915741, "learning_rate": 0.0006378584046745481, "loss": 3.5723, "step": 42640 }, { "epoch": 2.897472482674276, "grad_norm": 0.8453119993209839, "learning_rate": 0.0006378159396657155, "loss": 3.8758, "step": 42645 }, { "epoch": 2.8978122027449382, "grad_norm": 0.6300462484359741, "learning_rate": 0.0006377734746568827, "loss": 3.5315, "step": 42650 }, { "epoch": 2.8981519228156, "grad_norm": 0.8902947306632996, "learning_rate": 0.0006377310096480499, "loss": 3.4045, "step": 42655 }, { "epoch": 2.8984916428862615, "grad_norm": 0.9883577823638916, "learning_rate": 0.0006376885446392174, "loss": 3.5604, "step": 42660 }, { "epoch": 2.8988313629569236, "grad_norm": 0.9162705540657043, "learning_rate": 0.0006376460796303846, "loss": 3.627, "step": 42665 }, { "epoch": 2.899171083027585, "grad_norm": 0.9432220458984375, "learning_rate": 0.0006376036146215518, "loss": 3.4278, "step": 42670 }, { "epoch": 2.899510803098247, "grad_norm": 0.8451558947563171, "learning_rate": 0.0006375611496127192, "loss": 3.5946, "step": 42675 }, { "epoch": 2.899850523168909, "grad_norm": 0.8453450798988342, "learning_rate": 0.0006375186846038864, "loss": 3.4331, "step": 42680 }, { "epoch": 2.9001902432395705, "grad_norm": 1.061569333076477, "learning_rate": 0.0006374762195950536, "loss": 3.5746, "step": 42685 }, { "epoch": 2.900529963310232, "grad_norm": 0.9832978844642639, "learning_rate": 0.000637433754586221, "loss": 3.4752, "step": 42690 }, { "epoch": 2.9008696833808942, "grad_norm": 1.009527325630188, "learning_rate": 0.0006373912895773883, "loss": 3.6149, "step": 42695 }, { "epoch": 2.901209403451556, "grad_norm": 0.787018895149231, "learning_rate": 0.0006373488245685555, "loss": 3.6025, "step": 42700 }, { "epoch": 2.9015491235222175, "grad_norm": 1.1298191547393799, "learning_rate": 0.0006373063595597228, "loss": 3.8568, "step": 42705 }, { "epoch": 2.9018888435928796, "grad_norm": 0.9390883445739746, "learning_rate": 0.0006372638945508901, "loss": 3.6322, "step": 42710 }, { "epoch": 2.902228563663541, "grad_norm": 0.8891267776489258, "learning_rate": 0.0006372214295420573, "loss": 3.4913, "step": 42715 }, { "epoch": 2.902568283734203, "grad_norm": 0.7465917468070984, "learning_rate": 0.0006371789645332246, "loss": 3.4973, "step": 42720 }, { "epoch": 2.902908003804865, "grad_norm": 0.897925853729248, "learning_rate": 0.0006371364995243919, "loss": 3.5614, "step": 42725 }, { "epoch": 2.9032477238755265, "grad_norm": 1.2403651475906372, "learning_rate": 0.0006370940345155592, "loss": 3.6058, "step": 42730 }, { "epoch": 2.903587443946188, "grad_norm": 0.6772661805152893, "learning_rate": 0.0006370515695067265, "loss": 3.6434, "step": 42735 }, { "epoch": 2.9039271640168502, "grad_norm": 1.0289537906646729, "learning_rate": 0.0006370091044978937, "loss": 3.5642, "step": 42740 }, { "epoch": 2.904266884087512, "grad_norm": 0.84995037317276, "learning_rate": 0.000636966639489061, "loss": 3.3625, "step": 42745 }, { "epoch": 2.9046066041581735, "grad_norm": 0.8925580382347107, "learning_rate": 0.0006369241744802283, "loss": 3.4654, "step": 42750 }, { "epoch": 2.9049463242288356, "grad_norm": 0.859586775302887, "learning_rate": 0.0006368817094713955, "loss": 3.5087, "step": 42755 }, { "epoch": 2.905286044299497, "grad_norm": 0.9112608432769775, "learning_rate": 0.0006368392444625629, "loss": 3.5589, "step": 42760 }, { "epoch": 2.905625764370159, "grad_norm": 0.8865873217582703, "learning_rate": 0.0006367967794537302, "loss": 3.5169, "step": 42765 }, { "epoch": 2.905965484440821, "grad_norm": 0.6465127468109131, "learning_rate": 0.0006367543144448974, "loss": 3.5673, "step": 42770 }, { "epoch": 2.9063052045114826, "grad_norm": 0.7113096714019775, "learning_rate": 0.0006367118494360648, "loss": 3.734, "step": 42775 }, { "epoch": 2.906644924582144, "grad_norm": 1.079235553741455, "learning_rate": 0.000636669384427232, "loss": 3.6069, "step": 42780 }, { "epoch": 2.9069846446528063, "grad_norm": 0.8976610898971558, "learning_rate": 0.0006366269194183992, "loss": 3.2982, "step": 42785 }, { "epoch": 2.907324364723468, "grad_norm": 0.872797429561615, "learning_rate": 0.0006365844544095666, "loss": 3.5512, "step": 42790 }, { "epoch": 2.9076640847941295, "grad_norm": 0.8457685708999634, "learning_rate": 0.0006365419894007338, "loss": 3.3455, "step": 42795 }, { "epoch": 2.9080038048647916, "grad_norm": 0.7257461547851562, "learning_rate": 0.0006364995243919011, "loss": 3.6599, "step": 42800 }, { "epoch": 2.9083435249354532, "grad_norm": 1.4832161664962769, "learning_rate": 0.0006364570593830684, "loss": 3.4785, "step": 42805 }, { "epoch": 2.908683245006115, "grad_norm": 0.7436572909355164, "learning_rate": 0.0006364145943742357, "loss": 3.7304, "step": 42810 }, { "epoch": 2.909022965076777, "grad_norm": 0.8569587469100952, "learning_rate": 0.0006363721293654029, "loss": 3.6556, "step": 42815 }, { "epoch": 2.9093626851474386, "grad_norm": 0.6778474450111389, "learning_rate": 0.0006363296643565702, "loss": 3.5356, "step": 42820 }, { "epoch": 2.9097024052181, "grad_norm": 0.7488446235656738, "learning_rate": 0.0006362871993477375, "loss": 3.5043, "step": 42825 }, { "epoch": 2.9100421252887623, "grad_norm": 0.6841427683830261, "learning_rate": 0.0006362447343389047, "loss": 3.4015, "step": 42830 }, { "epoch": 2.910381845359424, "grad_norm": 0.947187066078186, "learning_rate": 0.0006362022693300721, "loss": 3.7585, "step": 42835 }, { "epoch": 2.9107215654300855, "grad_norm": 0.817452609539032, "learning_rate": 0.0006361598043212394, "loss": 3.6684, "step": 42840 }, { "epoch": 2.9110612855007476, "grad_norm": 0.6989634037017822, "learning_rate": 0.0006361173393124066, "loss": 3.4543, "step": 42845 }, { "epoch": 2.9114010055714092, "grad_norm": 0.9328689575195312, "learning_rate": 0.0006360748743035739, "loss": 3.5661, "step": 42850 }, { "epoch": 2.911740725642071, "grad_norm": 1.1119638681411743, "learning_rate": 0.0006360324092947411, "loss": 3.5179, "step": 42855 }, { "epoch": 2.912080445712733, "grad_norm": 0.8440215587615967, "learning_rate": 0.0006359899442859084, "loss": 3.6756, "step": 42860 }, { "epoch": 2.9124201657833946, "grad_norm": 0.9562031626701355, "learning_rate": 0.0006359474792770757, "loss": 3.5806, "step": 42865 }, { "epoch": 2.912759885854056, "grad_norm": 0.827194333076477, "learning_rate": 0.000635905014268243, "loss": 3.688, "step": 42870 }, { "epoch": 2.9130996059247183, "grad_norm": 0.8765027523040771, "learning_rate": 0.0006358625492594103, "loss": 3.5777, "step": 42875 }, { "epoch": 2.91343932599538, "grad_norm": 0.7576547861099243, "learning_rate": 0.0006358200842505776, "loss": 3.5804, "step": 42880 }, { "epoch": 2.9137790460660415, "grad_norm": 0.7906240224838257, "learning_rate": 0.0006357776192417448, "loss": 3.8544, "step": 42885 }, { "epoch": 2.9141187661367036, "grad_norm": 0.810924768447876, "learning_rate": 0.000635735154232912, "loss": 3.5058, "step": 42890 }, { "epoch": 2.9144584862073653, "grad_norm": 0.8478354215621948, "learning_rate": 0.0006356926892240794, "loss": 3.5976, "step": 42895 }, { "epoch": 2.914798206278027, "grad_norm": 0.8921616077423096, "learning_rate": 0.0006356502242152466, "loss": 3.3183, "step": 42900 }, { "epoch": 2.915137926348689, "grad_norm": 0.7466316223144531, "learning_rate": 0.0006356077592064139, "loss": 3.684, "step": 42905 }, { "epoch": 2.9154776464193506, "grad_norm": 0.8609406352043152, "learning_rate": 0.0006355652941975813, "loss": 3.4325, "step": 42910 }, { "epoch": 2.915817366490012, "grad_norm": 0.6886776685714722, "learning_rate": 0.0006355228291887485, "loss": 3.6301, "step": 42915 }, { "epoch": 2.916157086560674, "grad_norm": 1.0339919328689575, "learning_rate": 0.0006354803641799157, "loss": 3.5278, "step": 42920 }, { "epoch": 2.916496806631336, "grad_norm": 0.8952237367630005, "learning_rate": 0.0006354378991710831, "loss": 3.6656, "step": 42925 }, { "epoch": 2.9168365267019976, "grad_norm": 0.8915913701057434, "learning_rate": 0.0006353954341622503, "loss": 3.442, "step": 42930 }, { "epoch": 2.917176246772659, "grad_norm": 0.9781518578529358, "learning_rate": 0.0006353529691534175, "loss": 3.6552, "step": 42935 }, { "epoch": 2.9175159668433213, "grad_norm": 0.6803771257400513, "learning_rate": 0.000635310504144585, "loss": 3.7985, "step": 42940 }, { "epoch": 2.917855686913983, "grad_norm": 0.7576701045036316, "learning_rate": 0.0006352680391357522, "loss": 3.7594, "step": 42945 }, { "epoch": 2.9181954069846445, "grad_norm": 0.850638747215271, "learning_rate": 0.0006352255741269194, "loss": 3.4467, "step": 42950 }, { "epoch": 2.918535127055306, "grad_norm": 1.1481186151504517, "learning_rate": 0.0006351831091180867, "loss": 3.429, "step": 42955 }, { "epoch": 2.9188748471259682, "grad_norm": 0.7756760120391846, "learning_rate": 0.000635140644109254, "loss": 3.4955, "step": 42960 }, { "epoch": 2.91921456719663, "grad_norm": 0.7409411072731018, "learning_rate": 0.0006350981791004212, "loss": 3.6201, "step": 42965 }, { "epoch": 2.9195542872672915, "grad_norm": 0.9981425404548645, "learning_rate": 0.0006350557140915885, "loss": 3.6949, "step": 42970 }, { "epoch": 2.9198940073379536, "grad_norm": 0.6571142077445984, "learning_rate": 0.0006350132490827559, "loss": 3.4643, "step": 42975 }, { "epoch": 2.920233727408615, "grad_norm": 0.7857457995414734, "learning_rate": 0.0006349707840739231, "loss": 3.708, "step": 42980 }, { "epoch": 2.920573447479277, "grad_norm": 1.0771361589431763, "learning_rate": 0.0006349283190650904, "loss": 3.3373, "step": 42985 }, { "epoch": 2.920913167549939, "grad_norm": 0.9270572066307068, "learning_rate": 0.0006348858540562576, "loss": 3.5306, "step": 42990 }, { "epoch": 2.9212528876206005, "grad_norm": 0.9011585116386414, "learning_rate": 0.0006348433890474249, "loss": 3.5624, "step": 42995 }, { "epoch": 2.921592607691262, "grad_norm": 0.8225682973861694, "learning_rate": 0.0006348009240385922, "loss": 3.5298, "step": 43000 }, { "epoch": 2.9219323277619242, "grad_norm": 0.8072992563247681, "learning_rate": 0.0006347584590297594, "loss": 3.7955, "step": 43005 }, { "epoch": 2.922272047832586, "grad_norm": 0.8372965455055237, "learning_rate": 0.0006347159940209268, "loss": 3.5219, "step": 43010 }, { "epoch": 2.9226117679032475, "grad_norm": 0.890300989151001, "learning_rate": 0.0006346735290120941, "loss": 3.2631, "step": 43015 }, { "epoch": 2.9229514879739096, "grad_norm": 0.9603749513626099, "learning_rate": 0.0006346310640032613, "loss": 3.6234, "step": 43020 }, { "epoch": 2.923291208044571, "grad_norm": 0.8113959431648254, "learning_rate": 0.0006345885989944286, "loss": 3.7105, "step": 43025 }, { "epoch": 2.923630928115233, "grad_norm": 0.9562439918518066, "learning_rate": 0.0006345461339855959, "loss": 3.6015, "step": 43030 }, { "epoch": 2.923970648185895, "grad_norm": 0.6846343874931335, "learning_rate": 0.0006345036689767631, "loss": 3.5917, "step": 43035 }, { "epoch": 2.9243103682565565, "grad_norm": 0.7247750163078308, "learning_rate": 0.0006344612039679303, "loss": 3.5819, "step": 43040 }, { "epoch": 2.924650088327218, "grad_norm": 0.7040160298347473, "learning_rate": 0.0006344187389590978, "loss": 3.6997, "step": 43045 }, { "epoch": 2.9249898083978803, "grad_norm": 0.8270775675773621, "learning_rate": 0.000634376273950265, "loss": 3.7975, "step": 43050 }, { "epoch": 2.925329528468542, "grad_norm": 0.9861984252929688, "learning_rate": 0.0006343338089414322, "loss": 3.4485, "step": 43055 }, { "epoch": 2.9256692485392035, "grad_norm": 0.9797326922416687, "learning_rate": 0.0006342913439325996, "loss": 3.5411, "step": 43060 }, { "epoch": 2.9260089686098656, "grad_norm": 0.8767942786216736, "learning_rate": 0.0006342488789237668, "loss": 3.7338, "step": 43065 }, { "epoch": 2.926348688680527, "grad_norm": 0.8385321497917175, "learning_rate": 0.000634206413914934, "loss": 3.3743, "step": 43070 }, { "epoch": 2.926688408751189, "grad_norm": 0.8651174902915955, "learning_rate": 0.0006341639489061015, "loss": 3.569, "step": 43075 }, { "epoch": 2.927028128821851, "grad_norm": 1.0775728225708008, "learning_rate": 0.0006341214838972687, "loss": 3.6486, "step": 43080 }, { "epoch": 2.9273678488925126, "grad_norm": 0.8994227647781372, "learning_rate": 0.0006340790188884359, "loss": 3.6417, "step": 43085 }, { "epoch": 2.927707568963174, "grad_norm": 0.785743772983551, "learning_rate": 0.0006340365538796032, "loss": 3.3074, "step": 43090 }, { "epoch": 2.9280472890338363, "grad_norm": 0.9374095797538757, "learning_rate": 0.0006339940888707705, "loss": 3.3838, "step": 43095 }, { "epoch": 2.928387009104498, "grad_norm": 0.8134713768959045, "learning_rate": 0.0006339516238619378, "loss": 3.4064, "step": 43100 }, { "epoch": 2.9287267291751595, "grad_norm": 1.0632164478302002, "learning_rate": 0.000633909158853105, "loss": 3.4691, "step": 43105 }, { "epoch": 2.9290664492458216, "grad_norm": 0.678580641746521, "learning_rate": 0.0006338666938442724, "loss": 3.4899, "step": 43110 }, { "epoch": 2.9294061693164832, "grad_norm": 0.6749963760375977, "learning_rate": 0.0006338242288354397, "loss": 3.6664, "step": 43115 }, { "epoch": 2.929745889387145, "grad_norm": 1.077505111694336, "learning_rate": 0.0006337817638266069, "loss": 3.2458, "step": 43120 }, { "epoch": 2.930085609457807, "grad_norm": 0.745525598526001, "learning_rate": 0.0006337392988177742, "loss": 3.6514, "step": 43125 }, { "epoch": 2.9304253295284686, "grad_norm": 0.808663010597229, "learning_rate": 0.0006336968338089415, "loss": 3.5602, "step": 43130 }, { "epoch": 2.93076504959913, "grad_norm": 0.94858318567276, "learning_rate": 0.0006336543688001087, "loss": 3.1466, "step": 43135 }, { "epoch": 2.9311047696697923, "grad_norm": 0.8936737775802612, "learning_rate": 0.0006336119037912759, "loss": 3.527, "step": 43140 }, { "epoch": 2.931444489740454, "grad_norm": 0.6836514472961426, "learning_rate": 0.0006335694387824434, "loss": 3.7765, "step": 43145 }, { "epoch": 2.9317842098111155, "grad_norm": 0.7926419377326965, "learning_rate": 0.0006335269737736106, "loss": 3.8441, "step": 43150 }, { "epoch": 2.9321239298817776, "grad_norm": 0.9745867252349854, "learning_rate": 0.0006334845087647778, "loss": 3.6393, "step": 43155 }, { "epoch": 2.9324636499524392, "grad_norm": 0.9688714146614075, "learning_rate": 0.0006334420437559452, "loss": 3.5635, "step": 43160 }, { "epoch": 2.932803370023101, "grad_norm": 0.7379640936851501, "learning_rate": 0.0006333995787471124, "loss": 3.6406, "step": 43165 }, { "epoch": 2.933143090093763, "grad_norm": 1.6365584135055542, "learning_rate": 0.0006333571137382796, "loss": 3.2963, "step": 43170 }, { "epoch": 2.9334828101644246, "grad_norm": 0.9489404559135437, "learning_rate": 0.000633314648729447, "loss": 3.5189, "step": 43175 }, { "epoch": 2.933822530235086, "grad_norm": 1.082284688949585, "learning_rate": 0.0006332721837206143, "loss": 3.6844, "step": 43180 }, { "epoch": 2.9341622503057483, "grad_norm": 0.6399793028831482, "learning_rate": 0.0006332297187117815, "loss": 3.4025, "step": 43185 }, { "epoch": 2.93450197037641, "grad_norm": 0.8884056210517883, "learning_rate": 0.0006331872537029488, "loss": 3.5939, "step": 43190 }, { "epoch": 2.9348416904470715, "grad_norm": 1.1194565296173096, "learning_rate": 0.0006331447886941161, "loss": 3.6128, "step": 43195 }, { "epoch": 2.9351814105177336, "grad_norm": 1.199790596961975, "learning_rate": 0.0006331023236852833, "loss": 3.6194, "step": 43200 }, { "epoch": 2.9355211305883953, "grad_norm": 0.9104600548744202, "learning_rate": 0.0006330598586764506, "loss": 3.6906, "step": 43205 }, { "epoch": 2.935860850659057, "grad_norm": 0.7115840911865234, "learning_rate": 0.0006330173936676179, "loss": 3.6423, "step": 43210 }, { "epoch": 2.936200570729719, "grad_norm": 0.7971370816230774, "learning_rate": 0.0006329749286587852, "loss": 3.5575, "step": 43215 }, { "epoch": 2.9365402908003806, "grad_norm": 0.7459076046943665, "learning_rate": 0.0006329324636499525, "loss": 3.4956, "step": 43220 }, { "epoch": 2.9368800108710422, "grad_norm": 0.8045793771743774, "learning_rate": 0.0006328899986411198, "loss": 3.4865, "step": 43225 }, { "epoch": 2.9372197309417043, "grad_norm": 0.8700524568557739, "learning_rate": 0.000632847533632287, "loss": 3.6058, "step": 43230 }, { "epoch": 2.937559451012366, "grad_norm": 1.0913031101226807, "learning_rate": 0.0006328050686234543, "loss": 3.4015, "step": 43235 }, { "epoch": 2.9378991710830276, "grad_norm": 0.9350839853286743, "learning_rate": 0.0006327626036146215, "loss": 3.5714, "step": 43240 }, { "epoch": 2.9382388911536896, "grad_norm": 0.8130491971969604, "learning_rate": 0.0006327201386057888, "loss": 3.3993, "step": 43245 }, { "epoch": 2.9385786112243513, "grad_norm": 0.7840045690536499, "learning_rate": 0.0006326776735969562, "loss": 3.4859, "step": 43250 }, { "epoch": 2.938918331295013, "grad_norm": 0.8669129014015198, "learning_rate": 0.0006326352085881234, "loss": 3.7256, "step": 43255 }, { "epoch": 2.9392580513656745, "grad_norm": 0.9097908735275269, "learning_rate": 0.0006325927435792907, "loss": 3.4386, "step": 43260 }, { "epoch": 2.9395977714363366, "grad_norm": 0.9442394971847534, "learning_rate": 0.000632550278570458, "loss": 3.4204, "step": 43265 }, { "epoch": 2.9399374915069982, "grad_norm": 0.7750453352928162, "learning_rate": 0.0006325078135616252, "loss": 3.5287, "step": 43270 }, { "epoch": 2.94027721157766, "grad_norm": 0.7245613932609558, "learning_rate": 0.0006324653485527924, "loss": 3.4228, "step": 43275 }, { "epoch": 2.940616931648322, "grad_norm": 0.9449433088302612, "learning_rate": 0.0006324228835439598, "loss": 3.6402, "step": 43280 }, { "epoch": 2.9409566517189836, "grad_norm": 0.9120093584060669, "learning_rate": 0.0006323804185351271, "loss": 3.7134, "step": 43285 }, { "epoch": 2.941296371789645, "grad_norm": 0.6628315448760986, "learning_rate": 0.0006323379535262943, "loss": 3.5407, "step": 43290 }, { "epoch": 2.941636091860307, "grad_norm": 0.729255735874176, "learning_rate": 0.0006322954885174617, "loss": 3.7114, "step": 43295 }, { "epoch": 2.941975811930969, "grad_norm": 0.9126265645027161, "learning_rate": 0.0006322530235086289, "loss": 3.7376, "step": 43300 }, { "epoch": 2.9423155320016305, "grad_norm": 0.7074252367019653, "learning_rate": 0.0006322105584997961, "loss": 3.329, "step": 43305 }, { "epoch": 2.942655252072292, "grad_norm": 0.8255859613418579, "learning_rate": 0.0006321680934909635, "loss": 3.5726, "step": 43310 }, { "epoch": 2.9429949721429542, "grad_norm": 1.0203677415847778, "learning_rate": 0.0006321256284821307, "loss": 3.3668, "step": 43315 }, { "epoch": 2.943334692213616, "grad_norm": 1.047762393951416, "learning_rate": 0.000632083163473298, "loss": 3.7313, "step": 43320 }, { "epoch": 2.9436744122842775, "grad_norm": 0.9265753030776978, "learning_rate": 0.0006320406984644654, "loss": 3.3701, "step": 43325 }, { "epoch": 2.9440141323549396, "grad_norm": 0.9602901339530945, "learning_rate": 0.0006319982334556326, "loss": 3.4059, "step": 43330 }, { "epoch": 2.944353852425601, "grad_norm": 0.7473616003990173, "learning_rate": 0.0006319557684467998, "loss": 3.5092, "step": 43335 }, { "epoch": 2.944693572496263, "grad_norm": 0.6957234740257263, "learning_rate": 0.0006319133034379671, "loss": 3.853, "step": 43340 }, { "epoch": 2.945033292566925, "grad_norm": 1.0074269771575928, "learning_rate": 0.0006318708384291344, "loss": 3.5768, "step": 43345 }, { "epoch": 2.9453730126375866, "grad_norm": 0.769325315952301, "learning_rate": 0.0006318283734203016, "loss": 3.5487, "step": 43350 }, { "epoch": 2.945712732708248, "grad_norm": 0.835279107093811, "learning_rate": 0.000631785908411469, "loss": 3.7238, "step": 43355 }, { "epoch": 2.9460524527789103, "grad_norm": 0.748982310295105, "learning_rate": 0.0006317434434026363, "loss": 3.5139, "step": 43360 }, { "epoch": 2.946392172849572, "grad_norm": 0.8109654784202576, "learning_rate": 0.0006317009783938035, "loss": 3.4353, "step": 43365 }, { "epoch": 2.9467318929202335, "grad_norm": 1.2224628925323486, "learning_rate": 0.0006316585133849708, "loss": 3.7055, "step": 43370 }, { "epoch": 2.9470716129908956, "grad_norm": 0.8102120161056519, "learning_rate": 0.000631616048376138, "loss": 3.8043, "step": 43375 }, { "epoch": 2.9474113330615572, "grad_norm": 0.8882004618644714, "learning_rate": 0.0006315735833673053, "loss": 3.3616, "step": 43380 }, { "epoch": 2.947751053132219, "grad_norm": 0.9587160348892212, "learning_rate": 0.0006315311183584726, "loss": 3.4486, "step": 43385 }, { "epoch": 2.948090773202881, "grad_norm": 0.8002419471740723, "learning_rate": 0.0006314886533496399, "loss": 3.7185, "step": 43390 }, { "epoch": 2.9484304932735426, "grad_norm": 0.8356196284294128, "learning_rate": 0.0006314461883408072, "loss": 3.555, "step": 43395 }, { "epoch": 2.948770213344204, "grad_norm": 0.9570494890213013, "learning_rate": 0.0006314037233319745, "loss": 3.5738, "step": 43400 }, { "epoch": 2.9491099334148663, "grad_norm": 0.7428749203681946, "learning_rate": 0.0006313612583231417, "loss": 3.3779, "step": 43405 }, { "epoch": 2.949449653485528, "grad_norm": 1.1080286502838135, "learning_rate": 0.000631318793314309, "loss": 3.28, "step": 43410 }, { "epoch": 2.9497893735561895, "grad_norm": 0.8242689967155457, "learning_rate": 0.0006312763283054763, "loss": 3.5895, "step": 43415 }, { "epoch": 2.9501290936268516, "grad_norm": 0.8062947988510132, "learning_rate": 0.0006312338632966435, "loss": 3.4197, "step": 43420 }, { "epoch": 2.9504688136975132, "grad_norm": 0.7636348009109497, "learning_rate": 0.0006311913982878108, "loss": 3.7236, "step": 43425 }, { "epoch": 2.950808533768175, "grad_norm": 1.1278858184814453, "learning_rate": 0.0006311489332789782, "loss": 3.8628, "step": 43430 }, { "epoch": 2.951148253838837, "grad_norm": 1.1522893905639648, "learning_rate": 0.0006311064682701454, "loss": 3.7794, "step": 43435 }, { "epoch": 2.9514879739094986, "grad_norm": 0.8423879742622375, "learning_rate": 0.0006310640032613127, "loss": 3.6291, "step": 43440 }, { "epoch": 2.95182769398016, "grad_norm": 0.7841156125068665, "learning_rate": 0.00063102153825248, "loss": 3.7412, "step": 43445 }, { "epoch": 2.9521674140508223, "grad_norm": 0.9093571305274963, "learning_rate": 0.0006309790732436472, "loss": 3.4045, "step": 43450 }, { "epoch": 2.952507134121484, "grad_norm": 1.0720208883285522, "learning_rate": 0.0006309366082348145, "loss": 3.4602, "step": 43455 }, { "epoch": 2.9528468541921455, "grad_norm": 0.7797314524650574, "learning_rate": 0.0006308941432259819, "loss": 3.5052, "step": 43460 }, { "epoch": 2.9531865742628076, "grad_norm": 0.9134397506713867, "learning_rate": 0.0006308516782171491, "loss": 3.524, "step": 43465 }, { "epoch": 2.9535262943334692, "grad_norm": 0.9190880656242371, "learning_rate": 0.0006308092132083164, "loss": 3.7669, "step": 43470 }, { "epoch": 2.953866014404131, "grad_norm": 0.9022619128227234, "learning_rate": 0.0006307667481994836, "loss": 3.4313, "step": 43475 }, { "epoch": 2.954205734474793, "grad_norm": 1.0890593528747559, "learning_rate": 0.0006307242831906509, "loss": 3.6823, "step": 43480 }, { "epoch": 2.9545454545454546, "grad_norm": 0.6224093437194824, "learning_rate": 0.0006306818181818182, "loss": 3.6398, "step": 43485 }, { "epoch": 2.954885174616116, "grad_norm": 0.7774902582168579, "learning_rate": 0.0006306393531729854, "loss": 3.7479, "step": 43490 }, { "epoch": 2.9552248946867783, "grad_norm": 0.7682737112045288, "learning_rate": 0.0006305968881641528, "loss": 3.3711, "step": 43495 }, { "epoch": 2.95556461475744, "grad_norm": 0.9445845484733582, "learning_rate": 0.0006305544231553201, "loss": 3.5381, "step": 43500 }, { "epoch": 2.9559043348281016, "grad_norm": 0.8026590347290039, "learning_rate": 0.0006305119581464873, "loss": 3.2857, "step": 43505 }, { "epoch": 2.9562440548987636, "grad_norm": 0.8016337156295776, "learning_rate": 0.0006304694931376546, "loss": 3.6065, "step": 43510 }, { "epoch": 2.9565837749694253, "grad_norm": 0.8848589062690735, "learning_rate": 0.0006304270281288219, "loss": 3.2338, "step": 43515 }, { "epoch": 2.956923495040087, "grad_norm": 0.6999162435531616, "learning_rate": 0.0006303845631199891, "loss": 3.4057, "step": 43520 }, { "epoch": 2.957263215110749, "grad_norm": 0.8389745354652405, "learning_rate": 0.0006303420981111563, "loss": 3.5429, "step": 43525 }, { "epoch": 2.9576029351814106, "grad_norm": 0.8223007917404175, "learning_rate": 0.0006302996331023238, "loss": 3.7101, "step": 43530 }, { "epoch": 2.9579426552520722, "grad_norm": 1.1538479328155518, "learning_rate": 0.000630257168093491, "loss": 3.4019, "step": 43535 }, { "epoch": 2.9582823753227343, "grad_norm": 0.8646745085716248, "learning_rate": 0.0006302147030846582, "loss": 3.4104, "step": 43540 }, { "epoch": 2.958622095393396, "grad_norm": 0.8348680734634399, "learning_rate": 0.0006301722380758256, "loss": 3.6123, "step": 43545 }, { "epoch": 2.9589618154640576, "grad_norm": 0.8467800617218018, "learning_rate": 0.0006301297730669928, "loss": 3.6381, "step": 43550 }, { "epoch": 2.9593015355347196, "grad_norm": 0.9674025177955627, "learning_rate": 0.00063008730805816, "loss": 3.2129, "step": 43555 }, { "epoch": 2.9596412556053813, "grad_norm": 0.8191869258880615, "learning_rate": 0.0006300448430493274, "loss": 3.5761, "step": 43560 }, { "epoch": 2.959980975676043, "grad_norm": 0.7879136800765991, "learning_rate": 0.0006300023780404947, "loss": 3.3012, "step": 43565 }, { "epoch": 2.960320695746705, "grad_norm": 0.98897784948349, "learning_rate": 0.0006299599130316619, "loss": 3.4116, "step": 43570 }, { "epoch": 2.9606604158173666, "grad_norm": 0.9606609344482422, "learning_rate": 0.0006299174480228293, "loss": 3.7332, "step": 43575 }, { "epoch": 2.9610001358880282, "grad_norm": 0.8893470764160156, "learning_rate": 0.0006298749830139965, "loss": 3.6162, "step": 43580 }, { "epoch": 2.9613398559586903, "grad_norm": 0.8940730094909668, "learning_rate": 0.0006298325180051637, "loss": 3.6049, "step": 43585 }, { "epoch": 2.961679576029352, "grad_norm": 0.6304094791412354, "learning_rate": 0.000629790052996331, "loss": 3.7876, "step": 43590 }, { "epoch": 2.9620192961000136, "grad_norm": 0.8289564251899719, "learning_rate": 0.0006297475879874983, "loss": 3.5264, "step": 43595 }, { "epoch": 2.962359016170675, "grad_norm": 0.7271533608436584, "learning_rate": 0.0006297051229786656, "loss": 3.7018, "step": 43600 }, { "epoch": 2.9626987362413373, "grad_norm": 0.6673784852027893, "learning_rate": 0.0006296626579698329, "loss": 3.6634, "step": 43605 }, { "epoch": 2.963038456311999, "grad_norm": 0.8496295213699341, "learning_rate": 0.0006296201929610002, "loss": 3.6089, "step": 43610 }, { "epoch": 2.9633781763826605, "grad_norm": 0.9031476974487305, "learning_rate": 0.0006295777279521674, "loss": 3.3457, "step": 43615 }, { "epoch": 2.9637178964533226, "grad_norm": 0.8204229474067688, "learning_rate": 0.0006295352629433347, "loss": 3.6942, "step": 43620 }, { "epoch": 2.9640576165239843, "grad_norm": 0.8116554617881775, "learning_rate": 0.0006294927979345019, "loss": 3.6558, "step": 43625 }, { "epoch": 2.964397336594646, "grad_norm": 1.00860595703125, "learning_rate": 0.0006294503329256692, "loss": 3.3386, "step": 43630 }, { "epoch": 2.9647370566653075, "grad_norm": 0.9073499441146851, "learning_rate": 0.0006294078679168366, "loss": 3.5925, "step": 43635 }, { "epoch": 2.9650767767359696, "grad_norm": 0.9822063446044922, "learning_rate": 0.0006293654029080038, "loss": 3.5747, "step": 43640 }, { "epoch": 2.965416496806631, "grad_norm": 0.7479739785194397, "learning_rate": 0.0006293229378991711, "loss": 3.6546, "step": 43645 }, { "epoch": 2.965756216877293, "grad_norm": 1.19801926612854, "learning_rate": 0.0006292804728903384, "loss": 3.581, "step": 43650 }, { "epoch": 2.966095936947955, "grad_norm": 0.680182158946991, "learning_rate": 0.0006292380078815056, "loss": 3.7808, "step": 43655 }, { "epoch": 2.9664356570186166, "grad_norm": 0.9670102596282959, "learning_rate": 0.0006291955428726728, "loss": 3.711, "step": 43660 }, { "epoch": 2.966775377089278, "grad_norm": 0.8214612007141113, "learning_rate": 0.0006291530778638403, "loss": 3.5466, "step": 43665 }, { "epoch": 2.9671150971599403, "grad_norm": 0.775115966796875, "learning_rate": 0.0006291106128550075, "loss": 3.7463, "step": 43670 }, { "epoch": 2.967454817230602, "grad_norm": 0.7591709494590759, "learning_rate": 0.0006290681478461747, "loss": 3.5534, "step": 43675 }, { "epoch": 2.9677945373012635, "grad_norm": 0.9011418223381042, "learning_rate": 0.0006290256828373421, "loss": 3.6353, "step": 43680 }, { "epoch": 2.9681342573719256, "grad_norm": 0.836404025554657, "learning_rate": 0.0006289832178285093, "loss": 3.625, "step": 43685 }, { "epoch": 2.9684739774425872, "grad_norm": 0.7148934006690979, "learning_rate": 0.0006289407528196765, "loss": 3.4055, "step": 43690 }, { "epoch": 2.968813697513249, "grad_norm": 1.003218650817871, "learning_rate": 0.0006288982878108439, "loss": 3.7591, "step": 43695 }, { "epoch": 2.969153417583911, "grad_norm": 0.9518901109695435, "learning_rate": 0.0006288558228020112, "loss": 3.5449, "step": 43700 }, { "epoch": 2.9694931376545726, "grad_norm": 1.126798391342163, "learning_rate": 0.0006288133577931784, "loss": 3.5021, "step": 43705 }, { "epoch": 2.969832857725234, "grad_norm": 0.8072933554649353, "learning_rate": 0.0006287708927843458, "loss": 3.5632, "step": 43710 }, { "epoch": 2.9701725777958963, "grad_norm": 1.0812022686004639, "learning_rate": 0.000628728427775513, "loss": 3.3619, "step": 43715 }, { "epoch": 2.970512297866558, "grad_norm": 0.7577311396598816, "learning_rate": 0.0006286859627666802, "loss": 3.3946, "step": 43720 }, { "epoch": 2.9708520179372195, "grad_norm": 0.9825789332389832, "learning_rate": 0.0006286434977578475, "loss": 3.4622, "step": 43725 }, { "epoch": 2.9711917380078816, "grad_norm": 1.282747507095337, "learning_rate": 0.0006286010327490148, "loss": 3.6503, "step": 43730 }, { "epoch": 2.9715314580785432, "grad_norm": 0.8374155759811401, "learning_rate": 0.0006285585677401821, "loss": 3.545, "step": 43735 }, { "epoch": 2.971871178149205, "grad_norm": 0.8747990727424622, "learning_rate": 0.0006285161027313494, "loss": 3.4923, "step": 43740 }, { "epoch": 2.972210898219867, "grad_norm": 0.8463892936706543, "learning_rate": 0.0006284736377225167, "loss": 3.7538, "step": 43745 }, { "epoch": 2.9725506182905286, "grad_norm": 0.7155724167823792, "learning_rate": 0.0006284311727136839, "loss": 3.6562, "step": 43750 }, { "epoch": 2.97289033836119, "grad_norm": 1.040749192237854, "learning_rate": 0.0006283887077048512, "loss": 3.4812, "step": 43755 }, { "epoch": 2.9732300584318523, "grad_norm": 1.0344443321228027, "learning_rate": 0.0006283462426960185, "loss": 3.8361, "step": 43760 }, { "epoch": 2.973569778502514, "grad_norm": 1.138869047164917, "learning_rate": 0.0006283037776871857, "loss": 3.369, "step": 43765 }, { "epoch": 2.9739094985731755, "grad_norm": 0.701641857624054, "learning_rate": 0.0006282613126783531, "loss": 3.6449, "step": 43770 }, { "epoch": 2.9742492186438376, "grad_norm": 0.9684972763061523, "learning_rate": 0.0006282188476695203, "loss": 3.3405, "step": 43775 }, { "epoch": 2.9745889387144993, "grad_norm": 0.9837685823440552, "learning_rate": 0.0006281763826606877, "loss": 3.679, "step": 43780 }, { "epoch": 2.974928658785161, "grad_norm": 0.7664158940315247, "learning_rate": 0.0006281339176518549, "loss": 3.5599, "step": 43785 }, { "epoch": 2.975268378855823, "grad_norm": 0.7252779603004456, "learning_rate": 0.0006280914526430221, "loss": 3.6912, "step": 43790 }, { "epoch": 2.9756080989264846, "grad_norm": 1.0869415998458862, "learning_rate": 0.0006280489876341895, "loss": 3.5824, "step": 43795 }, { "epoch": 2.975947818997146, "grad_norm": 0.7292291522026062, "learning_rate": 0.0006280065226253567, "loss": 3.5245, "step": 43800 }, { "epoch": 2.9762875390678083, "grad_norm": 0.7518904209136963, "learning_rate": 0.000627964057616524, "loss": 3.5599, "step": 43805 }, { "epoch": 2.97662725913847, "grad_norm": 0.9415013790130615, "learning_rate": 0.0006279215926076914, "loss": 3.4884, "step": 43810 }, { "epoch": 2.9769669792091316, "grad_norm": 0.8391397595405579, "learning_rate": 0.0006278791275988586, "loss": 3.6852, "step": 43815 }, { "epoch": 2.9773066992797936, "grad_norm": 1.0144782066345215, "learning_rate": 0.0006278366625900258, "loss": 3.8149, "step": 43820 }, { "epoch": 2.9776464193504553, "grad_norm": 0.6430363059043884, "learning_rate": 0.0006277941975811931, "loss": 3.506, "step": 43825 }, { "epoch": 2.977986139421117, "grad_norm": 0.7965458035469055, "learning_rate": 0.0006277517325723604, "loss": 3.4789, "step": 43830 }, { "epoch": 2.978325859491779, "grad_norm": 0.9330983757972717, "learning_rate": 0.0006277092675635276, "loss": 3.1209, "step": 43835 }, { "epoch": 2.9786655795624406, "grad_norm": 0.7246177792549133, "learning_rate": 0.000627666802554695, "loss": 3.6196, "step": 43840 }, { "epoch": 2.9790052996331022, "grad_norm": 0.7585707902908325, "learning_rate": 0.0006276243375458623, "loss": 3.6361, "step": 43845 }, { "epoch": 2.9793450197037643, "grad_norm": 0.9207903742790222, "learning_rate": 0.0006275818725370295, "loss": 3.6421, "step": 43850 }, { "epoch": 2.979684739774426, "grad_norm": 0.6925026774406433, "learning_rate": 0.0006275394075281968, "loss": 3.3962, "step": 43855 }, { "epoch": 2.9800244598450876, "grad_norm": 1.051146149635315, "learning_rate": 0.000627496942519364, "loss": 3.5245, "step": 43860 }, { "epoch": 2.9803641799157496, "grad_norm": 0.8130782842636108, "learning_rate": 0.0006274544775105313, "loss": 3.4172, "step": 43865 }, { "epoch": 2.9807038999864113, "grad_norm": 0.8268676996231079, "learning_rate": 0.0006274120125016986, "loss": 3.5554, "step": 43870 }, { "epoch": 2.981043620057073, "grad_norm": 0.9722691774368286, "learning_rate": 0.000627369547492866, "loss": 3.3905, "step": 43875 }, { "epoch": 2.981383340127735, "grad_norm": 0.8878602981567383, "learning_rate": 0.0006273270824840332, "loss": 3.7108, "step": 43880 }, { "epoch": 2.9817230601983966, "grad_norm": 0.9585939049720764, "learning_rate": 0.0006272846174752005, "loss": 3.4167, "step": 43885 }, { "epoch": 2.9820627802690582, "grad_norm": 0.9558597207069397, "learning_rate": 0.0006272421524663677, "loss": 3.3112, "step": 43890 }, { "epoch": 2.9824025003397203, "grad_norm": 0.8682817220687866, "learning_rate": 0.000627199687457535, "loss": 3.9436, "step": 43895 }, { "epoch": 2.982742220410382, "grad_norm": 1.224189043045044, "learning_rate": 0.0006271572224487023, "loss": 3.5899, "step": 43900 }, { "epoch": 2.9830819404810436, "grad_norm": 0.8706996440887451, "learning_rate": 0.0006271147574398695, "loss": 3.544, "step": 43905 }, { "epoch": 2.9834216605517057, "grad_norm": 1.1097068786621094, "learning_rate": 0.0006270722924310369, "loss": 3.5458, "step": 43910 }, { "epoch": 2.9837613806223673, "grad_norm": 0.7391234636306763, "learning_rate": 0.0006270298274222042, "loss": 3.6481, "step": 43915 }, { "epoch": 2.984101100693029, "grad_norm": 0.8417186141014099, "learning_rate": 0.0006269873624133714, "loss": 3.5111, "step": 43920 }, { "epoch": 2.984440820763691, "grad_norm": 0.7816815376281738, "learning_rate": 0.0006269448974045386, "loss": 3.582, "step": 43925 }, { "epoch": 2.9847805408343526, "grad_norm": 0.7922887802124023, "learning_rate": 0.000626902432395706, "loss": 3.3545, "step": 43930 }, { "epoch": 2.9851202609050143, "grad_norm": 1.2046717405319214, "learning_rate": 0.0006268599673868732, "loss": 3.2082, "step": 43935 }, { "epoch": 2.985459980975676, "grad_norm": 0.8437392711639404, "learning_rate": 0.0006268175023780404, "loss": 3.4684, "step": 43940 }, { "epoch": 2.985799701046338, "grad_norm": 0.8711814880371094, "learning_rate": 0.0006267750373692079, "loss": 3.493, "step": 43945 }, { "epoch": 2.9861394211169996, "grad_norm": 0.7585463523864746, "learning_rate": 0.0006267325723603751, "loss": 3.5958, "step": 43950 }, { "epoch": 2.9864791411876612, "grad_norm": 0.9516391158103943, "learning_rate": 0.0006266901073515423, "loss": 3.5987, "step": 43955 }, { "epoch": 2.9868188612583233, "grad_norm": 1.0616626739501953, "learning_rate": 0.0006266476423427097, "loss": 3.734, "step": 43960 }, { "epoch": 2.987158581328985, "grad_norm": 0.9060606360435486, "learning_rate": 0.0006266051773338769, "loss": 3.5885, "step": 43965 }, { "epoch": 2.9874983013996466, "grad_norm": 0.8912849426269531, "learning_rate": 0.0006265627123250441, "loss": 3.7553, "step": 43970 }, { "epoch": 2.987838021470308, "grad_norm": 0.6663211584091187, "learning_rate": 0.0006265202473162114, "loss": 3.4388, "step": 43975 }, { "epoch": 2.9881777415409703, "grad_norm": 0.9011244177818298, "learning_rate": 0.0006264777823073788, "loss": 3.5546, "step": 43980 }, { "epoch": 2.988517461611632, "grad_norm": 0.7722564935684204, "learning_rate": 0.000626435317298546, "loss": 3.403, "step": 43985 }, { "epoch": 2.9888571816822935, "grad_norm": 1.4766318798065186, "learning_rate": 0.0006263928522897133, "loss": 3.7331, "step": 43990 }, { "epoch": 2.9891969017529556, "grad_norm": 0.9727010130882263, "learning_rate": 0.0006263503872808806, "loss": 3.786, "step": 43995 }, { "epoch": 2.9895366218236172, "grad_norm": 0.8336761593818665, "learning_rate": 0.0006263079222720478, "loss": 3.4902, "step": 44000 }, { "epoch": 2.989876341894279, "grad_norm": 0.9994022846221924, "learning_rate": 0.0006262654572632151, "loss": 3.555, "step": 44005 }, { "epoch": 2.990216061964941, "grad_norm": 1.0250581502914429, "learning_rate": 0.0006262229922543823, "loss": 3.345, "step": 44010 }, { "epoch": 2.9905557820356026, "grad_norm": 0.913670003414154, "learning_rate": 0.0006261805272455497, "loss": 3.7434, "step": 44015 }, { "epoch": 2.990895502106264, "grad_norm": 0.9201777577400208, "learning_rate": 0.000626138062236717, "loss": 3.5661, "step": 44020 }, { "epoch": 2.9912352221769263, "grad_norm": 0.8307444453239441, "learning_rate": 0.0006260955972278842, "loss": 3.528, "step": 44025 }, { "epoch": 2.991574942247588, "grad_norm": 0.9045605063438416, "learning_rate": 0.0006260531322190515, "loss": 4.1383, "step": 44030 }, { "epoch": 2.9919146623182495, "grad_norm": 0.7937968969345093, "learning_rate": 0.0006260106672102188, "loss": 3.4655, "step": 44035 }, { "epoch": 2.9922543823889116, "grad_norm": 0.7280620336532593, "learning_rate": 0.000625968202201386, "loss": 3.4974, "step": 44040 }, { "epoch": 2.9925941024595732, "grad_norm": 0.6923540830612183, "learning_rate": 0.0006259257371925533, "loss": 3.6847, "step": 44045 }, { "epoch": 2.992933822530235, "grad_norm": 0.8300819993019104, "learning_rate": 0.0006258832721837207, "loss": 3.6558, "step": 44050 }, { "epoch": 2.993273542600897, "grad_norm": 1.0668988227844238, "learning_rate": 0.0006258408071748879, "loss": 3.7649, "step": 44055 }, { "epoch": 2.9936132626715586, "grad_norm": 0.9185171127319336, "learning_rate": 0.0006257983421660551, "loss": 3.6659, "step": 44060 }, { "epoch": 2.99395298274222, "grad_norm": 1.0113152265548706, "learning_rate": 0.0006257558771572225, "loss": 3.5088, "step": 44065 }, { "epoch": 2.9942927028128823, "grad_norm": 1.111271858215332, "learning_rate": 0.0006257134121483897, "loss": 3.6718, "step": 44070 }, { "epoch": 2.994632422883544, "grad_norm": 0.8769662380218506, "learning_rate": 0.0006256709471395569, "loss": 3.5358, "step": 44075 }, { "epoch": 2.9949721429542056, "grad_norm": 1.0024797916412354, "learning_rate": 0.0006256284821307243, "loss": 3.3745, "step": 44080 }, { "epoch": 2.9953118630248676, "grad_norm": 1.2478286027908325, "learning_rate": 0.0006255860171218916, "loss": 3.6312, "step": 44085 }, { "epoch": 2.9956515830955293, "grad_norm": 0.8599347472190857, "learning_rate": 0.0006255435521130588, "loss": 3.4607, "step": 44090 }, { "epoch": 2.995991303166191, "grad_norm": 0.7776811122894287, "learning_rate": 0.0006255010871042262, "loss": 3.7538, "step": 44095 }, { "epoch": 2.996331023236853, "grad_norm": 0.8967260122299194, "learning_rate": 0.0006254586220953934, "loss": 3.648, "step": 44100 }, { "epoch": 2.9966707433075146, "grad_norm": 0.7885599136352539, "learning_rate": 0.0006254161570865606, "loss": 3.754, "step": 44105 }, { "epoch": 2.9970104633781762, "grad_norm": 0.8298906683921814, "learning_rate": 0.000625373692077728, "loss": 3.5024, "step": 44110 }, { "epoch": 2.9973501834488383, "grad_norm": 0.9467365741729736, "learning_rate": 0.0006253312270688952, "loss": 3.3715, "step": 44115 }, { "epoch": 2.9976899035195, "grad_norm": 0.8111266493797302, "learning_rate": 0.0006252887620600626, "loss": 3.686, "step": 44120 }, { "epoch": 2.9980296235901616, "grad_norm": 0.7424034476280212, "learning_rate": 0.0006252462970512298, "loss": 3.7784, "step": 44125 }, { "epoch": 2.9983693436608236, "grad_norm": 1.104880452156067, "learning_rate": 0.0006252038320423971, "loss": 3.72, "step": 44130 }, { "epoch": 2.9987090637314853, "grad_norm": 0.9103474020957947, "learning_rate": 0.0006251613670335644, "loss": 3.657, "step": 44135 }, { "epoch": 2.999048783802147, "grad_norm": 0.8004710078239441, "learning_rate": 0.0006251189020247316, "loss": 3.2919, "step": 44140 }, { "epoch": 2.999388503872809, "grad_norm": 0.8446131348609924, "learning_rate": 0.0006250764370158989, "loss": 3.5441, "step": 44145 }, { "epoch": 2.9997282239434706, "grad_norm": 0.9181636571884155, "learning_rate": 0.0006250339720070662, "loss": 3.5484, "step": 44150 }, { "epoch": 3.0, "eval_bertscore": { "f1": 0.8418314553806632, "precision": 0.8450208992641071, "recall": 0.8394055590690574 }, "eval_bleu_4": 0.016728719458788406, "eval_exact_match": 0.00048454307587944567, "eval_loss": 3.4487621784210205, "eval_meteor": 0.08368355167368755, "eval_rouge": { "rouge1": 0.1103591106566483, "rouge2": 0.015271913591662111, "rougeL": 0.09662826870291116, "rougeLsum": 0.09671776163818707 }, "eval_runtime": 2871.8923, "eval_samples_per_second": 3.593, "eval_steps_per_second": 0.449, "step": 44154 }, { "epoch": 3.0000679440141322, "grad_norm": 0.7587734460830688, "learning_rate": 0.0006249915069982335, "loss": 3.4551, "step": 44155 }, { "epoch": 3.0004076640847943, "grad_norm": 0.8537425994873047, "learning_rate": 0.0006249490419894007, "loss": 3.6393, "step": 44160 }, { "epoch": 3.000747384155456, "grad_norm": 0.9793977737426758, "learning_rate": 0.0006249065769805681, "loss": 3.5345, "step": 44165 }, { "epoch": 3.0010871042261176, "grad_norm": 0.8857905268669128, "learning_rate": 0.0006248641119717353, "loss": 3.3382, "step": 44170 }, { "epoch": 3.0014268242967796, "grad_norm": 1.116859793663025, "learning_rate": 0.0006248216469629025, "loss": 3.5566, "step": 44175 }, { "epoch": 3.0017665443674413, "grad_norm": 0.8154410123825073, "learning_rate": 0.0006247791819540699, "loss": 3.4416, "step": 44180 }, { "epoch": 3.002106264438103, "grad_norm": 0.9563935399055481, "learning_rate": 0.0006247367169452371, "loss": 3.3575, "step": 44185 }, { "epoch": 3.002445984508765, "grad_norm": 0.7413280606269836, "learning_rate": 0.0006246942519364044, "loss": 3.7411, "step": 44190 }, { "epoch": 3.0027857045794266, "grad_norm": 0.9423155188560486, "learning_rate": 0.0006246517869275718, "loss": 3.2664, "step": 44195 }, { "epoch": 3.0031254246500882, "grad_norm": 0.8401862382888794, "learning_rate": 0.000624609321918739, "loss": 3.3992, "step": 44200 }, { "epoch": 3.0034651447207503, "grad_norm": 0.9264494776725769, "learning_rate": 0.0006245668569099062, "loss": 3.5958, "step": 44205 }, { "epoch": 3.003804864791412, "grad_norm": 0.7941858172416687, "learning_rate": 0.0006245243919010735, "loss": 3.5103, "step": 44210 }, { "epoch": 3.0041445848620736, "grad_norm": 0.7986721992492676, "learning_rate": 0.0006244819268922408, "loss": 3.5613, "step": 44215 }, { "epoch": 3.004484304932735, "grad_norm": 1.05695641040802, "learning_rate": 0.000624439461883408, "loss": 3.4487, "step": 44220 }, { "epoch": 3.0048240250033973, "grad_norm": 1.0200462341308594, "learning_rate": 0.0006243969968745754, "loss": 3.5782, "step": 44225 }, { "epoch": 3.005163745074059, "grad_norm": 0.9126034379005432, "learning_rate": 0.0006243545318657427, "loss": 3.353, "step": 44230 }, { "epoch": 3.0055034651447206, "grad_norm": 1.036638617515564, "learning_rate": 0.0006243120668569099, "loss": 3.3516, "step": 44235 }, { "epoch": 3.0058431852153826, "grad_norm": 0.8657658696174622, "learning_rate": 0.0006242696018480772, "loss": 3.5713, "step": 44240 }, { "epoch": 3.0061829052860443, "grad_norm": 0.8542855381965637, "learning_rate": 0.0006242271368392445, "loss": 3.6116, "step": 44245 }, { "epoch": 3.006522625356706, "grad_norm": 0.9780679941177368, "learning_rate": 0.0006241846718304117, "loss": 3.5583, "step": 44250 }, { "epoch": 3.006862345427368, "grad_norm": 0.9350326061248779, "learning_rate": 0.0006241422068215791, "loss": 3.4318, "step": 44255 }, { "epoch": 3.0072020654980296, "grad_norm": 0.910668671131134, "learning_rate": 0.0006240997418127464, "loss": 3.8419, "step": 44260 }, { "epoch": 3.0075417855686912, "grad_norm": 0.9529253244400024, "learning_rate": 0.0006240572768039136, "loss": 3.6944, "step": 44265 }, { "epoch": 3.0078815056393533, "grad_norm": 0.9596734642982483, "learning_rate": 0.0006240148117950809, "loss": 3.1237, "step": 44270 }, { "epoch": 3.008221225710015, "grad_norm": 0.9969460368156433, "learning_rate": 0.0006239723467862481, "loss": 3.4173, "step": 44275 }, { "epoch": 3.0085609457806766, "grad_norm": 0.7946327924728394, "learning_rate": 0.0006239298817774154, "loss": 3.721, "step": 44280 }, { "epoch": 3.0089006658513386, "grad_norm": 0.9425920248031616, "learning_rate": 0.0006238874167685827, "loss": 3.4195, "step": 44285 }, { "epoch": 3.0092403859220003, "grad_norm": 0.9533142447471619, "learning_rate": 0.00062384495175975, "loss": 3.4596, "step": 44290 }, { "epoch": 3.009580105992662, "grad_norm": 0.7369396686553955, "learning_rate": 0.0006238024867509173, "loss": 3.5571, "step": 44295 }, { "epoch": 3.009919826063324, "grad_norm": 0.743521511554718, "learning_rate": 0.0006237600217420846, "loss": 3.4584, "step": 44300 }, { "epoch": 3.0102595461339856, "grad_norm": 0.8170987367630005, "learning_rate": 0.0006237175567332518, "loss": 3.4312, "step": 44305 }, { "epoch": 3.0105992662046472, "grad_norm": 0.8228139877319336, "learning_rate": 0.000623675091724419, "loss": 3.7053, "step": 44310 }, { "epoch": 3.0109389862753093, "grad_norm": 1.0465143918991089, "learning_rate": 0.0006236326267155864, "loss": 3.5666, "step": 44315 }, { "epoch": 3.011278706345971, "grad_norm": 0.7851721048355103, "learning_rate": 0.0006235901617067536, "loss": 3.4488, "step": 44320 }, { "epoch": 3.0116184264166326, "grad_norm": 1.0019341707229614, "learning_rate": 0.0006235476966979209, "loss": 3.5466, "step": 44325 }, { "epoch": 3.0119581464872947, "grad_norm": 0.8480458855628967, "learning_rate": 0.0006235052316890883, "loss": 3.7905, "step": 44330 }, { "epoch": 3.0122978665579563, "grad_norm": 0.7701349258422852, "learning_rate": 0.0006234627666802555, "loss": 3.4941, "step": 44335 }, { "epoch": 3.012637586628618, "grad_norm": 0.814443051815033, "learning_rate": 0.0006234203016714227, "loss": 3.4202, "step": 44340 }, { "epoch": 3.01297730669928, "grad_norm": 1.0867106914520264, "learning_rate": 0.0006233778366625901, "loss": 3.6937, "step": 44345 }, { "epoch": 3.0133170267699416, "grad_norm": 1.02762770652771, "learning_rate": 0.0006233353716537573, "loss": 3.5081, "step": 44350 }, { "epoch": 3.0136567468406033, "grad_norm": 0.9240368008613586, "learning_rate": 0.0006232929066449245, "loss": 3.6061, "step": 44355 }, { "epoch": 3.0139964669112653, "grad_norm": 0.7748308181762695, "learning_rate": 0.000623250441636092, "loss": 3.5716, "step": 44360 }, { "epoch": 3.014336186981927, "grad_norm": 0.8593391180038452, "learning_rate": 0.0006232079766272592, "loss": 3.4778, "step": 44365 }, { "epoch": 3.0146759070525886, "grad_norm": 0.8019334077835083, "learning_rate": 0.0006231655116184264, "loss": 3.532, "step": 44370 }, { "epoch": 3.01501562712325, "grad_norm": 0.7535484433174133, "learning_rate": 0.0006231230466095937, "loss": 3.6166, "step": 44375 }, { "epoch": 3.0153553471939123, "grad_norm": 0.9959530234336853, "learning_rate": 0.000623080581600761, "loss": 3.3402, "step": 44380 }, { "epoch": 3.015695067264574, "grad_norm": 1.0316100120544434, "learning_rate": 0.0006230381165919282, "loss": 3.6328, "step": 44385 }, { "epoch": 3.0160347873352356, "grad_norm": 1.038744568824768, "learning_rate": 0.0006229956515830955, "loss": 3.5862, "step": 44390 }, { "epoch": 3.0163745074058976, "grad_norm": 0.7308980226516724, "learning_rate": 0.0006229531865742629, "loss": 3.3996, "step": 44395 }, { "epoch": 3.0167142274765593, "grad_norm": 0.9256865978240967, "learning_rate": 0.0006229107215654301, "loss": 3.7534, "step": 44400 }, { "epoch": 3.017053947547221, "grad_norm": 0.7495373487472534, "learning_rate": 0.0006228682565565974, "loss": 3.4057, "step": 44405 }, { "epoch": 3.017393667617883, "grad_norm": 0.9175406098365784, "learning_rate": 0.0006228257915477646, "loss": 3.4197, "step": 44410 }, { "epoch": 3.0177333876885446, "grad_norm": 0.9846746921539307, "learning_rate": 0.0006227833265389319, "loss": 3.7763, "step": 44415 }, { "epoch": 3.0180731077592062, "grad_norm": 0.9462878108024597, "learning_rate": 0.0006227408615300992, "loss": 3.65, "step": 44420 }, { "epoch": 3.0184128278298683, "grad_norm": 0.9233075380325317, "learning_rate": 0.0006226983965212664, "loss": 3.7797, "step": 44425 }, { "epoch": 3.01875254790053, "grad_norm": 0.6709443926811218, "learning_rate": 0.0006226559315124338, "loss": 3.6611, "step": 44430 }, { "epoch": 3.0190922679711916, "grad_norm": 0.8499719500541687, "learning_rate": 0.0006226134665036011, "loss": 3.2718, "step": 44435 }, { "epoch": 3.0194319880418536, "grad_norm": 0.7731772661209106, "learning_rate": 0.0006225710014947683, "loss": 3.824, "step": 44440 }, { "epoch": 3.0197717081125153, "grad_norm": 0.8898109197616577, "learning_rate": 0.0006225285364859356, "loss": 3.5272, "step": 44445 }, { "epoch": 3.020111428183177, "grad_norm": 0.8469430208206177, "learning_rate": 0.0006224860714771029, "loss": 3.5453, "step": 44450 }, { "epoch": 3.020451148253839, "grad_norm": 1.038163661956787, "learning_rate": 0.0006224436064682701, "loss": 3.4011, "step": 44455 }, { "epoch": 3.0207908683245006, "grad_norm": 0.850507915019989, "learning_rate": 0.0006224011414594373, "loss": 3.4436, "step": 44460 }, { "epoch": 3.0211305883951622, "grad_norm": 1.0557423830032349, "learning_rate": 0.0006223586764506048, "loss": 3.5964, "step": 44465 }, { "epoch": 3.0214703084658243, "grad_norm": 1.007834792137146, "learning_rate": 0.000622316211441772, "loss": 3.374, "step": 44470 }, { "epoch": 3.021810028536486, "grad_norm": 0.9377970695495605, "learning_rate": 0.0006222737464329393, "loss": 3.5325, "step": 44475 }, { "epoch": 3.0221497486071476, "grad_norm": 0.9997595548629761, "learning_rate": 0.0006222312814241066, "loss": 3.662, "step": 44480 }, { "epoch": 3.0224894686778097, "grad_norm": 0.9916901588439941, "learning_rate": 0.0006221888164152738, "loss": 3.7871, "step": 44485 }, { "epoch": 3.0228291887484713, "grad_norm": 0.7340381145477295, "learning_rate": 0.0006221463514064411, "loss": 3.2203, "step": 44490 }, { "epoch": 3.023168908819133, "grad_norm": 0.9175915122032166, "learning_rate": 0.0006221038863976084, "loss": 3.5158, "step": 44495 }, { "epoch": 3.023508628889795, "grad_norm": 0.8127618432044983, "learning_rate": 0.0006220614213887757, "loss": 3.5503, "step": 44500 }, { "epoch": 3.0238483489604566, "grad_norm": 1.0173509120941162, "learning_rate": 0.000622018956379943, "loss": 3.37, "step": 44505 }, { "epoch": 3.0241880690311183, "grad_norm": 0.8375577926635742, "learning_rate": 0.0006219764913711102, "loss": 3.5834, "step": 44510 }, { "epoch": 3.0245277891017803, "grad_norm": 0.9129244685173035, "learning_rate": 0.0006219340263622775, "loss": 3.4842, "step": 44515 }, { "epoch": 3.024867509172442, "grad_norm": 0.8662765622138977, "learning_rate": 0.0006218915613534448, "loss": 3.4254, "step": 44520 }, { "epoch": 3.0252072292431036, "grad_norm": 0.9503839612007141, "learning_rate": 0.000621849096344612, "loss": 3.4832, "step": 44525 }, { "epoch": 3.0255469493137657, "grad_norm": 1.7057368755340576, "learning_rate": 0.0006218066313357793, "loss": 3.6556, "step": 44530 }, { "epoch": 3.0258866693844273, "grad_norm": 0.8857652544975281, "learning_rate": 0.0006217641663269467, "loss": 3.4592, "step": 44535 }, { "epoch": 3.026226389455089, "grad_norm": 0.8442726135253906, "learning_rate": 0.0006217217013181139, "loss": 4.0455, "step": 44540 }, { "epoch": 3.026566109525751, "grad_norm": 0.8663811087608337, "learning_rate": 0.0006216792363092812, "loss": 3.1774, "step": 44545 }, { "epoch": 3.0269058295964126, "grad_norm": 1.2360427379608154, "learning_rate": 0.0006216367713004485, "loss": 3.5238, "step": 44550 }, { "epoch": 3.0272455496670743, "grad_norm": 0.6989754438400269, "learning_rate": 0.0006215943062916157, "loss": 3.5719, "step": 44555 }, { "epoch": 3.027585269737736, "grad_norm": 1.040585994720459, "learning_rate": 0.0006215518412827829, "loss": 3.4666, "step": 44560 }, { "epoch": 3.027924989808398, "grad_norm": 0.7921519875526428, "learning_rate": 0.0006215093762739503, "loss": 3.6157, "step": 44565 }, { "epoch": 3.0282647098790596, "grad_norm": 0.7128247022628784, "learning_rate": 0.0006214669112651176, "loss": 3.5776, "step": 44570 }, { "epoch": 3.0286044299497212, "grad_norm": 0.9293463230133057, "learning_rate": 0.0006214244462562848, "loss": 3.5391, "step": 44575 }, { "epoch": 3.0289441500203833, "grad_norm": 1.028183102607727, "learning_rate": 0.0006213819812474522, "loss": 3.3579, "step": 44580 }, { "epoch": 3.029283870091045, "grad_norm": 1.0128626823425293, "learning_rate": 0.0006213395162386194, "loss": 3.5641, "step": 44585 }, { "epoch": 3.0296235901617066, "grad_norm": 0.9808063507080078, "learning_rate": 0.0006212970512297866, "loss": 3.3048, "step": 44590 }, { "epoch": 3.0299633102323686, "grad_norm": 1.132185459136963, "learning_rate": 0.000621254586220954, "loss": 3.5066, "step": 44595 }, { "epoch": 3.0303030303030303, "grad_norm": 0.8683560490608215, "learning_rate": 0.0006212121212121212, "loss": 3.5271, "step": 44600 }, { "epoch": 3.030642750373692, "grad_norm": 0.9837058782577515, "learning_rate": 0.0006211696562032885, "loss": 3.4809, "step": 44605 }, { "epoch": 3.030982470444354, "grad_norm": 0.7621415257453918, "learning_rate": 0.0006211271911944558, "loss": 3.445, "step": 44610 }, { "epoch": 3.0313221905150156, "grad_norm": 0.7445390820503235, "learning_rate": 0.0006210847261856231, "loss": 3.4419, "step": 44615 }, { "epoch": 3.0316619105856772, "grad_norm": 0.7593072056770325, "learning_rate": 0.0006210422611767903, "loss": 3.3905, "step": 44620 }, { "epoch": 3.0320016306563393, "grad_norm": 1.0786997079849243, "learning_rate": 0.0006209997961679576, "loss": 3.548, "step": 44625 }, { "epoch": 3.032341350727001, "grad_norm": 0.9297082424163818, "learning_rate": 0.0006209573311591249, "loss": 3.7538, "step": 44630 }, { "epoch": 3.0326810707976626, "grad_norm": 0.9149011969566345, "learning_rate": 0.0006209148661502921, "loss": 3.4998, "step": 44635 }, { "epoch": 3.0330207908683247, "grad_norm": 0.6716964840888977, "learning_rate": 0.0006208724011414595, "loss": 3.2348, "step": 44640 }, { "epoch": 3.0333605109389863, "grad_norm": 0.9719027280807495, "learning_rate": 0.0006208299361326268, "loss": 3.6899, "step": 44645 }, { "epoch": 3.033700231009648, "grad_norm": 0.7154822945594788, "learning_rate": 0.000620787471123794, "loss": 3.1278, "step": 44650 }, { "epoch": 3.03403995108031, "grad_norm": 0.9694234132766724, "learning_rate": 0.0006207450061149613, "loss": 3.5738, "step": 44655 }, { "epoch": 3.0343796711509716, "grad_norm": 0.9827795028686523, "learning_rate": 0.0006207025411061285, "loss": 3.4599, "step": 44660 }, { "epoch": 3.0347193912216333, "grad_norm": 0.8312759399414062, "learning_rate": 0.0006206600760972958, "loss": 3.7417, "step": 44665 }, { "epoch": 3.0350591112922953, "grad_norm": 1.3861132860183716, "learning_rate": 0.0006206176110884631, "loss": 3.2752, "step": 44670 }, { "epoch": 3.035398831362957, "grad_norm": 0.7960460186004639, "learning_rate": 0.0006205751460796304, "loss": 3.348, "step": 44675 }, { "epoch": 3.0357385514336186, "grad_norm": 1.1006197929382324, "learning_rate": 0.0006205326810707977, "loss": 3.5713, "step": 44680 }, { "epoch": 3.0360782715042807, "grad_norm": 0.8786075115203857, "learning_rate": 0.000620490216061965, "loss": 3.6173, "step": 44685 }, { "epoch": 3.0364179915749423, "grad_norm": 0.7931172847747803, "learning_rate": 0.0006204477510531322, "loss": 3.483, "step": 44690 }, { "epoch": 3.036757711645604, "grad_norm": 1.0337727069854736, "learning_rate": 0.0006204052860442994, "loss": 3.3107, "step": 44695 }, { "epoch": 3.037097431716266, "grad_norm": 0.8015468120574951, "learning_rate": 0.0006203628210354668, "loss": 3.5872, "step": 44700 }, { "epoch": 3.0374371517869276, "grad_norm": 0.9476364254951477, "learning_rate": 0.000620320356026634, "loss": 3.3736, "step": 44705 }, { "epoch": 3.0377768718575893, "grad_norm": 0.7108371257781982, "learning_rate": 0.0006202778910178013, "loss": 3.7601, "step": 44710 }, { "epoch": 3.038116591928251, "grad_norm": 0.9984156489372253, "learning_rate": 0.0006202354260089687, "loss": 3.237, "step": 44715 }, { "epoch": 3.038456311998913, "grad_norm": 0.7695490121841431, "learning_rate": 0.0006201929610001359, "loss": 3.4121, "step": 44720 }, { "epoch": 3.0387960320695746, "grad_norm": 0.7655357122421265, "learning_rate": 0.0006201504959913031, "loss": 3.4028, "step": 44725 }, { "epoch": 3.0391357521402362, "grad_norm": 1.0114009380340576, "learning_rate": 0.0006201080309824705, "loss": 3.5339, "step": 44730 }, { "epoch": 3.0394754722108983, "grad_norm": 0.9525306820869446, "learning_rate": 0.0006200655659736377, "loss": 3.6553, "step": 44735 }, { "epoch": 3.03981519228156, "grad_norm": 1.0761363506317139, "learning_rate": 0.0006200231009648049, "loss": 3.178, "step": 44740 }, { "epoch": 3.0401549123522216, "grad_norm": 0.9626399278640747, "learning_rate": 0.0006199806359559724, "loss": 3.6965, "step": 44745 }, { "epoch": 3.0404946324228836, "grad_norm": 0.7250663042068481, "learning_rate": 0.0006199381709471396, "loss": 3.4678, "step": 44750 }, { "epoch": 3.0408343524935453, "grad_norm": 0.7076701521873474, "learning_rate": 0.0006198957059383068, "loss": 3.1853, "step": 44755 }, { "epoch": 3.041174072564207, "grad_norm": 0.9442560076713562, "learning_rate": 0.0006198532409294741, "loss": 3.3991, "step": 44760 }, { "epoch": 3.041513792634869, "grad_norm": 0.7688484191894531, "learning_rate": 0.0006198107759206414, "loss": 3.3604, "step": 44765 }, { "epoch": 3.0418535127055306, "grad_norm": 0.9175731539726257, "learning_rate": 0.0006197683109118086, "loss": 3.1811, "step": 44770 }, { "epoch": 3.0421932327761922, "grad_norm": 1.1826454401016235, "learning_rate": 0.000619725845902976, "loss": 3.5752, "step": 44775 }, { "epoch": 3.0425329528468543, "grad_norm": 1.0257331132888794, "learning_rate": 0.0006196833808941433, "loss": 3.4285, "step": 44780 }, { "epoch": 3.042872672917516, "grad_norm": 0.8988550901412964, "learning_rate": 0.0006196409158853105, "loss": 3.4096, "step": 44785 }, { "epoch": 3.0432123929881776, "grad_norm": 0.9561294913291931, "learning_rate": 0.0006195984508764778, "loss": 3.2367, "step": 44790 }, { "epoch": 3.0435521130588397, "grad_norm": 0.9324541091918945, "learning_rate": 0.000619555985867645, "loss": 3.4837, "step": 44795 }, { "epoch": 3.0438918331295013, "grad_norm": 0.7877375483512878, "learning_rate": 0.0006195135208588123, "loss": 3.5689, "step": 44800 }, { "epoch": 3.044231553200163, "grad_norm": 1.051604151725769, "learning_rate": 0.0006194710558499796, "loss": 3.4706, "step": 44805 }, { "epoch": 3.044571273270825, "grad_norm": 0.761008083820343, "learning_rate": 0.0006194285908411469, "loss": 3.2802, "step": 44810 }, { "epoch": 3.0449109933414866, "grad_norm": 0.7640891075134277, "learning_rate": 0.0006193861258323143, "loss": 3.7687, "step": 44815 }, { "epoch": 3.0452507134121483, "grad_norm": 1.0195801258087158, "learning_rate": 0.0006193436608234815, "loss": 3.5593, "step": 44820 }, { "epoch": 3.0455904334828103, "grad_norm": 0.6773487329483032, "learning_rate": 0.0006193011958146487, "loss": 3.5398, "step": 44825 }, { "epoch": 3.045930153553472, "grad_norm": 0.7669513821601868, "learning_rate": 0.0006192587308058161, "loss": 3.5649, "step": 44830 }, { "epoch": 3.0462698736241336, "grad_norm": 0.8625152111053467, "learning_rate": 0.0006192162657969833, "loss": 3.2235, "step": 44835 }, { "epoch": 3.0466095936947957, "grad_norm": 0.8469083905220032, "learning_rate": 0.0006191738007881505, "loss": 3.3657, "step": 44840 }, { "epoch": 3.0469493137654573, "grad_norm": 0.8547325134277344, "learning_rate": 0.000619131335779318, "loss": 3.3513, "step": 44845 }, { "epoch": 3.047289033836119, "grad_norm": 0.8656178116798401, "learning_rate": 0.0006190888707704852, "loss": 3.6527, "step": 44850 }, { "epoch": 3.047628753906781, "grad_norm": 0.7473421692848206, "learning_rate": 0.0006190464057616524, "loss": 3.5475, "step": 44855 }, { "epoch": 3.0479684739774426, "grad_norm": 0.8366896510124207, "learning_rate": 0.0006190039407528197, "loss": 3.3631, "step": 44860 }, { "epoch": 3.0483081940481043, "grad_norm": 1.0271514654159546, "learning_rate": 0.000618961475743987, "loss": 3.47, "step": 44865 }, { "epoch": 3.0486479141187663, "grad_norm": 0.8512331247329712, "learning_rate": 0.0006189190107351542, "loss": 3.4168, "step": 44870 }, { "epoch": 3.048987634189428, "grad_norm": 0.9701826572418213, "learning_rate": 0.0006188765457263215, "loss": 3.4582, "step": 44875 }, { "epoch": 3.0493273542600896, "grad_norm": 0.9623094797134399, "learning_rate": 0.0006188340807174889, "loss": 3.3999, "step": 44880 }, { "epoch": 3.0496670743307517, "grad_norm": 1.053808331489563, "learning_rate": 0.0006187916157086561, "loss": 3.5541, "step": 44885 }, { "epoch": 3.0500067944014133, "grad_norm": 0.8328718543052673, "learning_rate": 0.0006187491506998234, "loss": 3.6691, "step": 44890 }, { "epoch": 3.050346514472075, "grad_norm": 0.88230299949646, "learning_rate": 0.0006187066856909906, "loss": 3.4575, "step": 44895 }, { "epoch": 3.0506862345427366, "grad_norm": 1.1323318481445312, "learning_rate": 0.0006186642206821579, "loss": 3.5895, "step": 44900 }, { "epoch": 3.0510259546133986, "grad_norm": 0.7941529750823975, "learning_rate": 0.0006186217556733252, "loss": 3.2563, "step": 44905 }, { "epoch": 3.0513656746840603, "grad_norm": 1.0307403802871704, "learning_rate": 0.0006185792906644924, "loss": 3.4624, "step": 44910 }, { "epoch": 3.051705394754722, "grad_norm": 0.8485110998153687, "learning_rate": 0.0006185368256556598, "loss": 3.7035, "step": 44915 }, { "epoch": 3.052045114825384, "grad_norm": 0.7861713171005249, "learning_rate": 0.0006184943606468271, "loss": 3.6012, "step": 44920 }, { "epoch": 3.0523848348960456, "grad_norm": 0.9029018878936768, "learning_rate": 0.0006184518956379943, "loss": 3.5309, "step": 44925 }, { "epoch": 3.0527245549667072, "grad_norm": 1.1556962728500366, "learning_rate": 0.0006184094306291616, "loss": 3.5723, "step": 44930 }, { "epoch": 3.0530642750373693, "grad_norm": 1.5281010866165161, "learning_rate": 0.0006183669656203289, "loss": 3.2678, "step": 44935 }, { "epoch": 3.053403995108031, "grad_norm": 0.921668529510498, "learning_rate": 0.0006183245006114961, "loss": 3.464, "step": 44940 }, { "epoch": 3.0537437151786926, "grad_norm": 0.7961602807044983, "learning_rate": 0.0006182820356026633, "loss": 3.6467, "step": 44945 }, { "epoch": 3.0540834352493547, "grad_norm": 0.8791989684104919, "learning_rate": 0.0006182395705938308, "loss": 3.5686, "step": 44950 }, { "epoch": 3.0544231553200163, "grad_norm": 0.7829093337059021, "learning_rate": 0.000618197105584998, "loss": 3.3888, "step": 44955 }, { "epoch": 3.054762875390678, "grad_norm": 0.6417112350463867, "learning_rate": 0.0006181546405761652, "loss": 3.5382, "step": 44960 }, { "epoch": 3.05510259546134, "grad_norm": 0.9146685600280762, "learning_rate": 0.0006181121755673326, "loss": 3.4807, "step": 44965 }, { "epoch": 3.0554423155320016, "grad_norm": 0.8432337045669556, "learning_rate": 0.0006180697105584998, "loss": 3.3869, "step": 44970 }, { "epoch": 3.0557820356026633, "grad_norm": 0.9609442353248596, "learning_rate": 0.000618027245549667, "loss": 3.5435, "step": 44975 }, { "epoch": 3.0561217556733253, "grad_norm": 0.9016304016113281, "learning_rate": 0.0006179847805408344, "loss": 3.5718, "step": 44980 }, { "epoch": 3.056461475743987, "grad_norm": 0.7896712422370911, "learning_rate": 0.0006179423155320017, "loss": 3.5571, "step": 44985 }, { "epoch": 3.0568011958146486, "grad_norm": 0.8888314366340637, "learning_rate": 0.0006178998505231689, "loss": 3.5939, "step": 44990 }, { "epoch": 3.0571409158853107, "grad_norm": 0.8216575384140015, "learning_rate": 0.0006178573855143363, "loss": 3.237, "step": 44995 }, { "epoch": 3.0574806359559723, "grad_norm": 0.8404461145401001, "learning_rate": 0.0006178149205055035, "loss": 3.5068, "step": 45000 }, { "epoch": 3.057820356026634, "grad_norm": 0.8509763479232788, "learning_rate": 0.0006177724554966707, "loss": 3.3418, "step": 45005 }, { "epoch": 3.058160076097296, "grad_norm": 0.8443201184272766, "learning_rate": 0.000617729990487838, "loss": 3.2105, "step": 45010 }, { "epoch": 3.0584997961679576, "grad_norm": 0.8886515498161316, "learning_rate": 0.0006176875254790053, "loss": 3.6226, "step": 45015 }, { "epoch": 3.0588395162386193, "grad_norm": 0.9703799486160278, "learning_rate": 0.0006176450604701726, "loss": 3.2671, "step": 45020 }, { "epoch": 3.0591792363092813, "grad_norm": 0.7728033661842346, "learning_rate": 0.0006176025954613399, "loss": 3.5969, "step": 45025 }, { "epoch": 3.059518956379943, "grad_norm": 0.8360174894332886, "learning_rate": 0.0006175601304525072, "loss": 3.6248, "step": 45030 }, { "epoch": 3.0598586764506046, "grad_norm": 1.0041223764419556, "learning_rate": 0.0006175176654436744, "loss": 3.4198, "step": 45035 }, { "epoch": 3.0601983965212667, "grad_norm": 0.9259546995162964, "learning_rate": 0.0006174752004348417, "loss": 3.6459, "step": 45040 }, { "epoch": 3.0605381165919283, "grad_norm": 0.8048959374427795, "learning_rate": 0.0006174327354260089, "loss": 3.3272, "step": 45045 }, { "epoch": 3.06087783666259, "grad_norm": 0.9225819706916809, "learning_rate": 0.0006173902704171762, "loss": 3.4202, "step": 45050 }, { "epoch": 3.0612175567332516, "grad_norm": 0.8617943525314331, "learning_rate": 0.0006173478054083436, "loss": 3.4648, "step": 45055 }, { "epoch": 3.0615572768039137, "grad_norm": 0.7335138916969299, "learning_rate": 0.0006173053403995108, "loss": 3.5822, "step": 45060 }, { "epoch": 3.0618969968745753, "grad_norm": 1.068841814994812, "learning_rate": 0.0006172628753906781, "loss": 3.5074, "step": 45065 }, { "epoch": 3.062236716945237, "grad_norm": 1.107946515083313, "learning_rate": 0.0006172204103818454, "loss": 3.3981, "step": 45070 }, { "epoch": 3.062576437015899, "grad_norm": 0.8038787245750427, "learning_rate": 0.0006171779453730126, "loss": 3.8388, "step": 45075 }, { "epoch": 3.0629161570865606, "grad_norm": 0.6807218790054321, "learning_rate": 0.0006171354803641798, "loss": 3.5894, "step": 45080 }, { "epoch": 3.0632558771572223, "grad_norm": 0.8259105682373047, "learning_rate": 0.0006170930153553472, "loss": 3.7078, "step": 45085 }, { "epoch": 3.0635955972278843, "grad_norm": 0.7785531878471375, "learning_rate": 0.0006170505503465145, "loss": 3.4476, "step": 45090 }, { "epoch": 3.063935317298546, "grad_norm": 0.7262071967124939, "learning_rate": 0.0006170080853376817, "loss": 3.4877, "step": 45095 }, { "epoch": 3.0642750373692076, "grad_norm": 0.9480195045471191, "learning_rate": 0.0006169656203288491, "loss": 3.4708, "step": 45100 }, { "epoch": 3.0646147574398697, "grad_norm": 1.0308184623718262, "learning_rate": 0.0006169231553200163, "loss": 3.6492, "step": 45105 }, { "epoch": 3.0649544775105313, "grad_norm": 0.7712064981460571, "learning_rate": 0.0006168806903111835, "loss": 3.7732, "step": 45110 }, { "epoch": 3.065294197581193, "grad_norm": 0.9747133851051331, "learning_rate": 0.0006168382253023509, "loss": 3.4881, "step": 45115 }, { "epoch": 3.065633917651855, "grad_norm": 0.7935348749160767, "learning_rate": 0.0006167957602935181, "loss": 3.5671, "step": 45120 }, { "epoch": 3.0659736377225166, "grad_norm": 0.9908781051635742, "learning_rate": 0.0006167532952846854, "loss": 3.4903, "step": 45125 }, { "epoch": 3.0663133577931783, "grad_norm": 0.9632322192192078, "learning_rate": 0.0006167108302758528, "loss": 3.481, "step": 45130 }, { "epoch": 3.0666530778638403, "grad_norm": 0.8346583843231201, "learning_rate": 0.00061666836526702, "loss": 3.4845, "step": 45135 }, { "epoch": 3.066992797934502, "grad_norm": 1.064267635345459, "learning_rate": 0.0006166259002581872, "loss": 3.3632, "step": 45140 }, { "epoch": 3.0673325180051636, "grad_norm": 0.9516655802726746, "learning_rate": 0.0006165834352493545, "loss": 3.3366, "step": 45145 }, { "epoch": 3.0676722380758257, "grad_norm": 0.9191951155662537, "learning_rate": 0.0006165409702405218, "loss": 3.5985, "step": 45150 }, { "epoch": 3.0680119581464873, "grad_norm": 1.1472865343093872, "learning_rate": 0.0006164985052316891, "loss": 3.4945, "step": 45155 }, { "epoch": 3.068351678217149, "grad_norm": 0.7719565033912659, "learning_rate": 0.0006164560402228564, "loss": 3.449, "step": 45160 }, { "epoch": 3.068691398287811, "grad_norm": 0.7928420305252075, "learning_rate": 0.0006164135752140237, "loss": 3.6857, "step": 45165 }, { "epoch": 3.0690311183584726, "grad_norm": 0.8342414498329163, "learning_rate": 0.000616371110205191, "loss": 3.531, "step": 45170 }, { "epoch": 3.0693708384291343, "grad_norm": 1.037482500076294, "learning_rate": 0.0006163286451963582, "loss": 3.4296, "step": 45175 }, { "epoch": 3.0697105584997963, "grad_norm": 5.440883159637451, "learning_rate": 0.0006162861801875255, "loss": 3.4942, "step": 45180 }, { "epoch": 3.070050278570458, "grad_norm": 0.9358147382736206, "learning_rate": 0.0006162437151786928, "loss": 3.5077, "step": 45185 }, { "epoch": 3.0703899986411196, "grad_norm": 0.8425736427307129, "learning_rate": 0.00061620125016986, "loss": 3.6558, "step": 45190 }, { "epoch": 3.0707297187117817, "grad_norm": 0.9440665245056152, "learning_rate": 0.0006161587851610273, "loss": 3.3705, "step": 45195 }, { "epoch": 3.0710694387824433, "grad_norm": 0.8631463050842285, "learning_rate": 0.0006161163201521947, "loss": 3.4484, "step": 45200 }, { "epoch": 3.071409158853105, "grad_norm": 0.8975459933280945, "learning_rate": 0.0006160738551433619, "loss": 3.1227, "step": 45205 }, { "epoch": 3.071748878923767, "grad_norm": 1.028167486190796, "learning_rate": 0.0006160313901345291, "loss": 3.5941, "step": 45210 }, { "epoch": 3.0720885989944287, "grad_norm": 0.9432021379470825, "learning_rate": 0.0006159889251256965, "loss": 3.5403, "step": 45215 }, { "epoch": 3.0724283190650903, "grad_norm": 0.8812975883483887, "learning_rate": 0.0006159464601168637, "loss": 3.529, "step": 45220 }, { "epoch": 3.0727680391357524, "grad_norm": 0.9139883518218994, "learning_rate": 0.0006159039951080309, "loss": 3.5489, "step": 45225 }, { "epoch": 3.073107759206414, "grad_norm": 0.7881546020507812, "learning_rate": 0.0006158615300991984, "loss": 3.3667, "step": 45230 }, { "epoch": 3.0734474792770756, "grad_norm": 0.9506675004959106, "learning_rate": 0.0006158190650903656, "loss": 3.1087, "step": 45235 }, { "epoch": 3.0737871993477373, "grad_norm": 0.8200773000717163, "learning_rate": 0.0006157766000815328, "loss": 3.5376, "step": 45240 }, { "epoch": 3.0741269194183993, "grad_norm": 0.9803846478462219, "learning_rate": 0.0006157341350727001, "loss": 3.759, "step": 45245 }, { "epoch": 3.074466639489061, "grad_norm": 1.1714228391647339, "learning_rate": 0.0006156916700638674, "loss": 3.8305, "step": 45250 }, { "epoch": 3.0748063595597226, "grad_norm": 0.7777289748191833, "learning_rate": 0.0006156492050550346, "loss": 3.5223, "step": 45255 }, { "epoch": 3.0751460796303847, "grad_norm": 1.1189665794372559, "learning_rate": 0.0006156067400462019, "loss": 3.2781, "step": 45260 }, { "epoch": 3.0754857997010463, "grad_norm": 1.408313512802124, "learning_rate": 0.0006155642750373693, "loss": 3.4674, "step": 45265 }, { "epoch": 3.075825519771708, "grad_norm": 1.066909670829773, "learning_rate": 0.0006155218100285365, "loss": 3.2782, "step": 45270 }, { "epoch": 3.07616523984237, "grad_norm": 0.8093449473381042, "learning_rate": 0.0006154793450197038, "loss": 3.2382, "step": 45275 }, { "epoch": 3.0765049599130316, "grad_norm": 0.7433966398239136, "learning_rate": 0.000615436880010871, "loss": 3.346, "step": 45280 }, { "epoch": 3.0768446799836933, "grad_norm": 0.7567658424377441, "learning_rate": 0.0006153944150020383, "loss": 3.726, "step": 45285 }, { "epoch": 3.0771844000543553, "grad_norm": 0.6373672485351562, "learning_rate": 0.0006153519499932056, "loss": 3.4846, "step": 45290 }, { "epoch": 3.077524120125017, "grad_norm": 1.0138905048370361, "learning_rate": 0.0006153094849843728, "loss": 3.5036, "step": 45295 }, { "epoch": 3.0778638401956786, "grad_norm": 0.9293628931045532, "learning_rate": 0.0006152670199755402, "loss": 3.6881, "step": 45300 }, { "epoch": 3.0782035602663407, "grad_norm": 0.6633815765380859, "learning_rate": 0.0006152245549667075, "loss": 3.4591, "step": 45305 }, { "epoch": 3.0785432803370023, "grad_norm": 1.136802315711975, "learning_rate": 0.0006151820899578747, "loss": 3.5295, "step": 45310 }, { "epoch": 3.078883000407664, "grad_norm": 0.6393746733665466, "learning_rate": 0.000615139624949042, "loss": 3.6466, "step": 45315 }, { "epoch": 3.079222720478326, "grad_norm": 0.9364288449287415, "learning_rate": 0.0006150971599402093, "loss": 3.4708, "step": 45320 }, { "epoch": 3.0795624405489876, "grad_norm": 0.7833111882209778, "learning_rate": 0.0006150546949313765, "loss": 3.4949, "step": 45325 }, { "epoch": 3.0799021606196493, "grad_norm": 0.864564061164856, "learning_rate": 0.0006150122299225437, "loss": 3.516, "step": 45330 }, { "epoch": 3.0802418806903114, "grad_norm": 0.9636841416358948, "learning_rate": 0.0006149697649137112, "loss": 3.629, "step": 45335 }, { "epoch": 3.080581600760973, "grad_norm": 0.8058896660804749, "learning_rate": 0.0006149272999048784, "loss": 3.4274, "step": 45340 }, { "epoch": 3.0809213208316346, "grad_norm": 0.8056949973106384, "learning_rate": 0.0006148848348960456, "loss": 3.7142, "step": 45345 }, { "epoch": 3.0812610409022967, "grad_norm": 0.9493510127067566, "learning_rate": 0.000614842369887213, "loss": 3.5336, "step": 45350 }, { "epoch": 3.0816007609729583, "grad_norm": 1.0686918497085571, "learning_rate": 0.0006147999048783802, "loss": 3.4349, "step": 45355 }, { "epoch": 3.08194048104362, "grad_norm": 0.9191470742225647, "learning_rate": 0.0006147574398695474, "loss": 3.6214, "step": 45360 }, { "epoch": 3.082280201114282, "grad_norm": 0.9779872298240662, "learning_rate": 0.0006147149748607149, "loss": 3.4851, "step": 45365 }, { "epoch": 3.0826199211849437, "grad_norm": 0.9575986266136169, "learning_rate": 0.0006146725098518821, "loss": 3.5434, "step": 45370 }, { "epoch": 3.0829596412556053, "grad_norm": 0.8877205848693848, "learning_rate": 0.0006146300448430493, "loss": 3.0983, "step": 45375 }, { "epoch": 3.0832993613262674, "grad_norm": 0.849320113658905, "learning_rate": 0.0006145875798342167, "loss": 3.5135, "step": 45380 }, { "epoch": 3.083639081396929, "grad_norm": 0.857899010181427, "learning_rate": 0.0006145451148253839, "loss": 3.5719, "step": 45385 }, { "epoch": 3.0839788014675906, "grad_norm": 1.0104761123657227, "learning_rate": 0.0006145026498165511, "loss": 3.5767, "step": 45390 }, { "epoch": 3.0843185215382523, "grad_norm": 0.7928483486175537, "learning_rate": 0.0006144601848077184, "loss": 3.5407, "step": 45395 }, { "epoch": 3.0846582416089143, "grad_norm": 1.0758967399597168, "learning_rate": 0.0006144177197988858, "loss": 3.5006, "step": 45400 }, { "epoch": 3.084997961679576, "grad_norm": 1.118409276008606, "learning_rate": 0.000614375254790053, "loss": 3.4327, "step": 45405 }, { "epoch": 3.0853376817502376, "grad_norm": 0.9949268698692322, "learning_rate": 0.0006143327897812203, "loss": 3.3565, "step": 45410 }, { "epoch": 3.0856774018208997, "grad_norm": 0.956587553024292, "learning_rate": 0.0006142903247723876, "loss": 3.3409, "step": 45415 }, { "epoch": 3.0860171218915613, "grad_norm": 1.0234997272491455, "learning_rate": 0.0006142478597635548, "loss": 3.4243, "step": 45420 }, { "epoch": 3.086356841962223, "grad_norm": 1.0361543893814087, "learning_rate": 0.0006142053947547221, "loss": 3.5781, "step": 45425 }, { "epoch": 3.086696562032885, "grad_norm": 0.8830130696296692, "learning_rate": 0.0006141629297458893, "loss": 3.5968, "step": 45430 }, { "epoch": 3.0870362821035466, "grad_norm": 0.8168507218360901, "learning_rate": 0.0006141204647370567, "loss": 3.6369, "step": 45435 }, { "epoch": 3.0873760021742083, "grad_norm": 0.866477906703949, "learning_rate": 0.000614077999728224, "loss": 3.4407, "step": 45440 }, { "epoch": 3.0877157222448703, "grad_norm": 0.9973773956298828, "learning_rate": 0.0006140355347193912, "loss": 3.4483, "step": 45445 }, { "epoch": 3.088055442315532, "grad_norm": 0.9263100028038025, "learning_rate": 0.0006139930697105585, "loss": 3.8532, "step": 45450 }, { "epoch": 3.0883951623861936, "grad_norm": 0.827989935874939, "learning_rate": 0.0006139506047017258, "loss": 3.5145, "step": 45455 }, { "epoch": 3.0887348824568557, "grad_norm": 0.887234628200531, "learning_rate": 0.000613908139692893, "loss": 3.5755, "step": 45460 }, { "epoch": 3.0890746025275173, "grad_norm": 0.909027636051178, "learning_rate": 0.0006138656746840603, "loss": 3.3482, "step": 45465 }, { "epoch": 3.089414322598179, "grad_norm": 0.7998340725898743, "learning_rate": 0.0006138232096752277, "loss": 3.3738, "step": 45470 }, { "epoch": 3.089754042668841, "grad_norm": 0.8746528625488281, "learning_rate": 0.0006137807446663949, "loss": 3.282, "step": 45475 }, { "epoch": 3.0900937627395026, "grad_norm": 0.8293395042419434, "learning_rate": 0.0006137382796575621, "loss": 3.2905, "step": 45480 }, { "epoch": 3.0904334828101643, "grad_norm": 0.7804747223854065, "learning_rate": 0.0006136958146487295, "loss": 3.5026, "step": 45485 }, { "epoch": 3.0907732028808264, "grad_norm": 0.7489993572235107, "learning_rate": 0.0006136533496398967, "loss": 3.6158, "step": 45490 }, { "epoch": 3.091112922951488, "grad_norm": 0.9320643544197083, "learning_rate": 0.000613610884631064, "loss": 3.5148, "step": 45495 }, { "epoch": 3.0914526430221496, "grad_norm": 1.1442352533340454, "learning_rate": 0.0006135684196222313, "loss": 3.5386, "step": 45500 }, { "epoch": 3.0917923630928117, "grad_norm": 1.1635397672653198, "learning_rate": 0.0006135259546133986, "loss": 3.3985, "step": 45505 }, { "epoch": 3.0921320831634733, "grad_norm": 0.9988688826560974, "learning_rate": 0.0006134834896045659, "loss": 3.6746, "step": 45510 }, { "epoch": 3.092471803234135, "grad_norm": 1.1425533294677734, "learning_rate": 0.0006134410245957332, "loss": 3.6726, "step": 45515 }, { "epoch": 3.092811523304797, "grad_norm": 0.5993607044219971, "learning_rate": 0.0006133985595869004, "loss": 3.6583, "step": 45520 }, { "epoch": 3.0931512433754587, "grad_norm": 1.0234966278076172, "learning_rate": 0.0006133560945780677, "loss": 3.5852, "step": 45525 }, { "epoch": 3.0934909634461203, "grad_norm": 0.9165229797363281, "learning_rate": 0.000613313629569235, "loss": 3.6828, "step": 45530 }, { "epoch": 3.0938306835167824, "grad_norm": 1.1545401811599731, "learning_rate": 0.0006132711645604022, "loss": 3.4452, "step": 45535 }, { "epoch": 3.094170403587444, "grad_norm": 0.6971798539161682, "learning_rate": 0.0006132286995515696, "loss": 3.5657, "step": 45540 }, { "epoch": 3.0945101236581056, "grad_norm": 0.8443059325218201, "learning_rate": 0.0006131862345427368, "loss": 3.7617, "step": 45545 }, { "epoch": 3.0948498437287677, "grad_norm": 0.9233357310295105, "learning_rate": 0.0006131437695339041, "loss": 3.5182, "step": 45550 }, { "epoch": 3.0951895637994293, "grad_norm": 0.827242374420166, "learning_rate": 0.0006131013045250714, "loss": 3.6712, "step": 45555 }, { "epoch": 3.095529283870091, "grad_norm": 0.8903420567512512, "learning_rate": 0.0006130588395162386, "loss": 3.5544, "step": 45560 }, { "epoch": 3.095869003940753, "grad_norm": 0.952860414981842, "learning_rate": 0.0006130163745074059, "loss": 3.5172, "step": 45565 }, { "epoch": 3.0962087240114147, "grad_norm": 0.9107601046562195, "learning_rate": 0.0006129739094985732, "loss": 3.5609, "step": 45570 }, { "epoch": 3.0965484440820763, "grad_norm": 0.7514654994010925, "learning_rate": 0.0006129314444897405, "loss": 3.5451, "step": 45575 }, { "epoch": 3.096888164152738, "grad_norm": 0.8514201641082764, "learning_rate": 0.0006128889794809077, "loss": 3.3305, "step": 45580 }, { "epoch": 3.0972278842234, "grad_norm": 0.7876724004745483, "learning_rate": 0.0006128465144720751, "loss": 3.6559, "step": 45585 }, { "epoch": 3.0975676042940616, "grad_norm": 1.1478502750396729, "learning_rate": 0.0006128040494632423, "loss": 3.4563, "step": 45590 }, { "epoch": 3.0979073243647233, "grad_norm": 0.6941608190536499, "learning_rate": 0.0006127615844544095, "loss": 3.3395, "step": 45595 }, { "epoch": 3.0982470444353853, "grad_norm": 0.9790254235267639, "learning_rate": 0.0006127191194455769, "loss": 3.4416, "step": 45600 }, { "epoch": 3.098586764506047, "grad_norm": 0.6391562819480896, "learning_rate": 0.0006126766544367441, "loss": 3.394, "step": 45605 }, { "epoch": 3.0989264845767086, "grad_norm": 0.7648009657859802, "learning_rate": 0.0006126341894279114, "loss": 3.3705, "step": 45610 }, { "epoch": 3.0992662046473707, "grad_norm": 0.9162180423736572, "learning_rate": 0.0006125917244190788, "loss": 3.2821, "step": 45615 }, { "epoch": 3.0996059247180323, "grad_norm": 0.7310871481895447, "learning_rate": 0.000612549259410246, "loss": 3.5377, "step": 45620 }, { "epoch": 3.099945644788694, "grad_norm": 1.382814884185791, "learning_rate": 0.0006125067944014132, "loss": 3.2878, "step": 45625 }, { "epoch": 3.100285364859356, "grad_norm": 0.8591187000274658, "learning_rate": 0.0006124643293925805, "loss": 3.487, "step": 45630 }, { "epoch": 3.1006250849300176, "grad_norm": 1.040737271308899, "learning_rate": 0.0006124218643837478, "loss": 3.5998, "step": 45635 }, { "epoch": 3.1009648050006793, "grad_norm": 0.8227304220199585, "learning_rate": 0.000612379399374915, "loss": 3.2118, "step": 45640 }, { "epoch": 3.1013045250713414, "grad_norm": 1.0637708902359009, "learning_rate": 0.0006123369343660824, "loss": 3.592, "step": 45645 }, { "epoch": 3.101644245142003, "grad_norm": 0.8912635445594788, "learning_rate": 0.0006122944693572497, "loss": 3.4011, "step": 45650 }, { "epoch": 3.1019839652126646, "grad_norm": 0.8216953873634338, "learning_rate": 0.0006122520043484169, "loss": 3.5048, "step": 45655 }, { "epoch": 3.1023236852833267, "grad_norm": 1.0509198904037476, "learning_rate": 0.0006122095393395842, "loss": 3.7288, "step": 45660 }, { "epoch": 3.1026634053539883, "grad_norm": 1.0647850036621094, "learning_rate": 0.0006121670743307515, "loss": 3.3626, "step": 45665 }, { "epoch": 3.10300312542465, "grad_norm": 1.048405647277832, "learning_rate": 0.0006121246093219187, "loss": 3.3651, "step": 45670 }, { "epoch": 3.103342845495312, "grad_norm": 0.8326548933982849, "learning_rate": 0.000612082144313086, "loss": 3.126, "step": 45675 }, { "epoch": 3.1036825655659737, "grad_norm": 1.2499514818191528, "learning_rate": 0.0006120396793042534, "loss": 3.6108, "step": 45680 }, { "epoch": 3.1040222856366353, "grad_norm": 0.7635319828987122, "learning_rate": 0.0006119972142954206, "loss": 3.709, "step": 45685 }, { "epoch": 3.1043620057072974, "grad_norm": 1.0022763013839722, "learning_rate": 0.0006119547492865879, "loss": 3.4879, "step": 45690 }, { "epoch": 3.104701725777959, "grad_norm": 1.2274727821350098, "learning_rate": 0.0006119122842777551, "loss": 3.4572, "step": 45695 }, { "epoch": 3.1050414458486206, "grad_norm": 1.105048418045044, "learning_rate": 0.0006118698192689224, "loss": 3.7056, "step": 45700 }, { "epoch": 3.1053811659192827, "grad_norm": 1.2544480562210083, "learning_rate": 0.0006118273542600897, "loss": 3.5978, "step": 45705 }, { "epoch": 3.1057208859899443, "grad_norm": 0.956779956817627, "learning_rate": 0.0006117848892512569, "loss": 3.7558, "step": 45710 }, { "epoch": 3.106060606060606, "grad_norm": 1.2315346002578735, "learning_rate": 0.0006117424242424243, "loss": 3.3869, "step": 45715 }, { "epoch": 3.106400326131268, "grad_norm": 0.772576093673706, "learning_rate": 0.0006116999592335916, "loss": 3.4572, "step": 45720 }, { "epoch": 3.1067400462019297, "grad_norm": 0.7742746472358704, "learning_rate": 0.0006116574942247588, "loss": 3.4549, "step": 45725 }, { "epoch": 3.1070797662725913, "grad_norm": 0.7628903985023499, "learning_rate": 0.000611615029215926, "loss": 3.3794, "step": 45730 }, { "epoch": 3.107419486343253, "grad_norm": 0.9067809581756592, "learning_rate": 0.0006115725642070934, "loss": 3.4851, "step": 45735 }, { "epoch": 3.107759206413915, "grad_norm": 1.1830251216888428, "learning_rate": 0.0006115300991982606, "loss": 3.4939, "step": 45740 }, { "epoch": 3.1080989264845766, "grad_norm": 0.9798082709312439, "learning_rate": 0.0006114876341894278, "loss": 3.6278, "step": 45745 }, { "epoch": 3.1084386465552383, "grad_norm": 1.1009111404418945, "learning_rate": 0.0006114451691805953, "loss": 3.665, "step": 45750 }, { "epoch": 3.1087783666259003, "grad_norm": 1.0659257173538208, "learning_rate": 0.0006114027041717625, "loss": 3.4826, "step": 45755 }, { "epoch": 3.109118086696562, "grad_norm": 0.9402558207511902, "learning_rate": 0.0006113602391629297, "loss": 3.5599, "step": 45760 }, { "epoch": 3.1094578067672236, "grad_norm": 0.9770012497901917, "learning_rate": 0.0006113177741540971, "loss": 3.2657, "step": 45765 }, { "epoch": 3.1097975268378857, "grad_norm": 1.1731699705123901, "learning_rate": 0.0006112753091452643, "loss": 3.4244, "step": 45770 }, { "epoch": 3.1101372469085473, "grad_norm": 0.7399146556854248, "learning_rate": 0.0006112328441364315, "loss": 3.4765, "step": 45775 }, { "epoch": 3.110476966979209, "grad_norm": 0.8070579171180725, "learning_rate": 0.0006111903791275988, "loss": 3.6267, "step": 45780 }, { "epoch": 3.110816687049871, "grad_norm": 0.8490104675292969, "learning_rate": 0.0006111479141187662, "loss": 3.577, "step": 45785 }, { "epoch": 3.1111564071205327, "grad_norm": 0.879550039768219, "learning_rate": 0.0006111054491099334, "loss": 3.788, "step": 45790 }, { "epoch": 3.1114961271911943, "grad_norm": 1.026548981666565, "learning_rate": 0.0006110629841011007, "loss": 3.6224, "step": 45795 }, { "epoch": 3.1118358472618564, "grad_norm": 0.884977400302887, "learning_rate": 0.000611020519092268, "loss": 3.7323, "step": 45800 }, { "epoch": 3.112175567332518, "grad_norm": 0.9593492746353149, "learning_rate": 0.0006109780540834352, "loss": 3.4578, "step": 45805 }, { "epoch": 3.1125152874031796, "grad_norm": 0.7511401176452637, "learning_rate": 0.0006109355890746025, "loss": 3.3578, "step": 45810 }, { "epoch": 3.1128550074738417, "grad_norm": 0.7133017778396606, "learning_rate": 0.0006108931240657697, "loss": 3.4851, "step": 45815 }, { "epoch": 3.1131947275445033, "grad_norm": 0.8954891562461853, "learning_rate": 0.0006108506590569371, "loss": 3.749, "step": 45820 }, { "epoch": 3.113534447615165, "grad_norm": 0.8989901542663574, "learning_rate": 0.0006108081940481044, "loss": 3.3813, "step": 45825 }, { "epoch": 3.113874167685827, "grad_norm": 0.687868058681488, "learning_rate": 0.0006107657290392716, "loss": 3.6056, "step": 45830 }, { "epoch": 3.1142138877564887, "grad_norm": 0.8543491363525391, "learning_rate": 0.000610723264030439, "loss": 3.5718, "step": 45835 }, { "epoch": 3.1145536078271503, "grad_norm": 0.8791754245758057, "learning_rate": 0.0006106807990216062, "loss": 3.5718, "step": 45840 }, { "epoch": 3.1148933278978124, "grad_norm": 0.822270393371582, "learning_rate": 0.0006106383340127734, "loss": 3.5301, "step": 45845 }, { "epoch": 3.115233047968474, "grad_norm": 0.8403809070587158, "learning_rate": 0.0006105958690039408, "loss": 3.5613, "step": 45850 }, { "epoch": 3.1155727680391356, "grad_norm": 1.1512744426727295, "learning_rate": 0.0006105534039951081, "loss": 3.466, "step": 45855 }, { "epoch": 3.1159124881097977, "grad_norm": 0.8929910063743591, "learning_rate": 0.0006105109389862753, "loss": 3.3816, "step": 45860 }, { "epoch": 3.1162522081804593, "grad_norm": 0.7213155031204224, "learning_rate": 0.0006104684739774427, "loss": 3.5908, "step": 45865 }, { "epoch": 3.116591928251121, "grad_norm": 0.9248718619346619, "learning_rate": 0.0006104260089686099, "loss": 3.4261, "step": 45870 }, { "epoch": 3.116931648321783, "grad_norm": 0.8567730188369751, "learning_rate": 0.0006103835439597771, "loss": 3.5589, "step": 45875 }, { "epoch": 3.1172713683924447, "grad_norm": 0.7617050409317017, "learning_rate": 0.0006103410789509444, "loss": 3.7152, "step": 45880 }, { "epoch": 3.1176110884631063, "grad_norm": 0.8798654079437256, "learning_rate": 0.0006102986139421117, "loss": 3.477, "step": 45885 }, { "epoch": 3.1179508085337684, "grad_norm": 1.1022757291793823, "learning_rate": 0.000610256148933279, "loss": 3.5538, "step": 45890 }, { "epoch": 3.11829052860443, "grad_norm": 1.0818129777908325, "learning_rate": 0.0006102136839244463, "loss": 3.3807, "step": 45895 }, { "epoch": 3.1186302486750916, "grad_norm": 1.0450913906097412, "learning_rate": 0.0006101712189156136, "loss": 3.7009, "step": 45900 }, { "epoch": 3.1189699687457537, "grad_norm": 0.7491384148597717, "learning_rate": 0.0006101287539067808, "loss": 3.7027, "step": 45905 }, { "epoch": 3.1193096888164153, "grad_norm": 1.2240276336669922, "learning_rate": 0.0006100862888979481, "loss": 3.7465, "step": 45910 }, { "epoch": 3.119649408887077, "grad_norm": 0.9320614337921143, "learning_rate": 0.0006100438238891154, "loss": 3.5809, "step": 45915 }, { "epoch": 3.1199891289577386, "grad_norm": 0.833908200263977, "learning_rate": 0.0006100013588802826, "loss": 3.4359, "step": 45920 }, { "epoch": 3.1203288490284007, "grad_norm": 0.8332535028457642, "learning_rate": 0.00060995889387145, "loss": 3.3515, "step": 45925 }, { "epoch": 3.1206685690990623, "grad_norm": 0.8754799962043762, "learning_rate": 0.0006099164288626172, "loss": 3.5041, "step": 45930 }, { "epoch": 3.121008289169724, "grad_norm": 0.9463571310043335, "learning_rate": 0.0006098739638537845, "loss": 3.4861, "step": 45935 }, { "epoch": 3.121348009240386, "grad_norm": 0.8511512279510498, "learning_rate": 0.0006098314988449518, "loss": 3.2937, "step": 45940 }, { "epoch": 3.1216877293110477, "grad_norm": 1.0166258811950684, "learning_rate": 0.000609789033836119, "loss": 3.5524, "step": 45945 }, { "epoch": 3.1220274493817093, "grad_norm": 0.8162824511528015, "learning_rate": 0.0006097465688272863, "loss": 3.6029, "step": 45950 }, { "epoch": 3.1223671694523714, "grad_norm": 0.8140731453895569, "learning_rate": 0.0006097041038184537, "loss": 3.2698, "step": 45955 }, { "epoch": 3.122706889523033, "grad_norm": 0.929046630859375, "learning_rate": 0.0006096616388096209, "loss": 3.5996, "step": 45960 }, { "epoch": 3.1230466095936946, "grad_norm": 0.7766615152359009, "learning_rate": 0.0006096191738007882, "loss": 3.2856, "step": 45965 }, { "epoch": 3.1233863296643567, "grad_norm": 1.1617099046707153, "learning_rate": 0.0006095767087919555, "loss": 3.5837, "step": 45970 }, { "epoch": 3.1237260497350183, "grad_norm": 1.1074469089508057, "learning_rate": 0.0006095342437831227, "loss": 3.6085, "step": 45975 }, { "epoch": 3.12406576980568, "grad_norm": 0.878135621547699, "learning_rate": 0.0006094917787742899, "loss": 3.3347, "step": 45980 }, { "epoch": 3.124405489876342, "grad_norm": 0.7617248296737671, "learning_rate": 0.0006094493137654573, "loss": 3.7015, "step": 45985 }, { "epoch": 3.1247452099470037, "grad_norm": 0.7472463250160217, "learning_rate": 0.0006094068487566246, "loss": 3.2382, "step": 45990 }, { "epoch": 3.1250849300176653, "grad_norm": 0.7606608867645264, "learning_rate": 0.0006093643837477918, "loss": 3.5554, "step": 45995 }, { "epoch": 3.1254246500883274, "grad_norm": 0.87429279088974, "learning_rate": 0.0006093219187389592, "loss": 3.7741, "step": 46000 }, { "epoch": 3.125764370158989, "grad_norm": 0.8357203602790833, "learning_rate": 0.0006092794537301264, "loss": 3.4156, "step": 46005 }, { "epoch": 3.1261040902296506, "grad_norm": 0.8043299317359924, "learning_rate": 0.0006092369887212936, "loss": 3.6339, "step": 46010 }, { "epoch": 3.1264438103003127, "grad_norm": 0.9077935814857483, "learning_rate": 0.000609194523712461, "loss": 3.4129, "step": 46015 }, { "epoch": 3.1267835303709743, "grad_norm": 0.9562990069389343, "learning_rate": 0.0006091520587036282, "loss": 3.4865, "step": 46020 }, { "epoch": 3.127123250441636, "grad_norm": 0.7083468437194824, "learning_rate": 0.0006091095936947955, "loss": 3.4868, "step": 46025 }, { "epoch": 3.127462970512298, "grad_norm": 0.7912203669548035, "learning_rate": 0.0006090671286859628, "loss": 3.5057, "step": 46030 }, { "epoch": 3.1278026905829597, "grad_norm": 0.9937058091163635, "learning_rate": 0.0006090246636771301, "loss": 3.2897, "step": 46035 }, { "epoch": 3.1281424106536213, "grad_norm": 1.2250436544418335, "learning_rate": 0.0006089821986682973, "loss": 3.5508, "step": 46040 }, { "epoch": 3.1284821307242834, "grad_norm": 0.8452025651931763, "learning_rate": 0.0006089397336594646, "loss": 3.3022, "step": 46045 }, { "epoch": 3.128821850794945, "grad_norm": 1.0547435283660889, "learning_rate": 0.0006088972686506319, "loss": 3.5506, "step": 46050 }, { "epoch": 3.1291615708656066, "grad_norm": 5.125038146972656, "learning_rate": 0.0006088548036417991, "loss": 3.3906, "step": 46055 }, { "epoch": 3.1295012909362687, "grad_norm": 0.8804455399513245, "learning_rate": 0.0006088123386329665, "loss": 3.5465, "step": 46060 }, { "epoch": 3.1298410110069304, "grad_norm": 1.298341989517212, "learning_rate": 0.0006087698736241338, "loss": 3.5442, "step": 46065 }, { "epoch": 3.130180731077592, "grad_norm": 0.8561143279075623, "learning_rate": 0.000608727408615301, "loss": 3.6698, "step": 46070 }, { "epoch": 3.1305204511482536, "grad_norm": 1.1191811561584473, "learning_rate": 0.0006086849436064683, "loss": 3.4043, "step": 46075 }, { "epoch": 3.1308601712189157, "grad_norm": 1.1392382383346558, "learning_rate": 0.0006086424785976355, "loss": 3.5465, "step": 46080 }, { "epoch": 3.1311998912895773, "grad_norm": 1.0232793092727661, "learning_rate": 0.0006086000135888028, "loss": 3.3369, "step": 46085 }, { "epoch": 3.131539611360239, "grad_norm": 0.8181219100952148, "learning_rate": 0.0006085575485799701, "loss": 3.7747, "step": 46090 }, { "epoch": 3.131879331430901, "grad_norm": 0.9445599317550659, "learning_rate": 0.0006085150835711374, "loss": 3.6791, "step": 46095 }, { "epoch": 3.1322190515015627, "grad_norm": 0.8215139508247375, "learning_rate": 0.0006084726185623047, "loss": 3.6856, "step": 46100 }, { "epoch": 3.1325587715722243, "grad_norm": 1.0595476627349854, "learning_rate": 0.000608430153553472, "loss": 3.82, "step": 46105 }, { "epoch": 3.1328984916428864, "grad_norm": 0.8977946639060974, "learning_rate": 0.0006083876885446392, "loss": 3.5782, "step": 46110 }, { "epoch": 3.133238211713548, "grad_norm": 1.048440933227539, "learning_rate": 0.0006083452235358064, "loss": 3.3939, "step": 46115 }, { "epoch": 3.1335779317842096, "grad_norm": 0.8835228681564331, "learning_rate": 0.0006083027585269738, "loss": 3.4892, "step": 46120 }, { "epoch": 3.1339176518548717, "grad_norm": 0.9006416201591492, "learning_rate": 0.000608260293518141, "loss": 3.758, "step": 46125 }, { "epoch": 3.1342573719255333, "grad_norm": 0.8280627727508545, "learning_rate": 0.0006082178285093083, "loss": 3.1895, "step": 46130 }, { "epoch": 3.134597091996195, "grad_norm": 1.0463484525680542, "learning_rate": 0.0006081753635004757, "loss": 3.4391, "step": 46135 }, { "epoch": 3.134936812066857, "grad_norm": 0.7998212575912476, "learning_rate": 0.0006081328984916429, "loss": 3.3238, "step": 46140 }, { "epoch": 3.1352765321375187, "grad_norm": 1.1017510890960693, "learning_rate": 0.0006080904334828101, "loss": 3.5266, "step": 46145 }, { "epoch": 3.1356162522081803, "grad_norm": 1.0030502080917358, "learning_rate": 0.0006080479684739775, "loss": 3.5316, "step": 46150 }, { "epoch": 3.1359559722788424, "grad_norm": 0.7428054809570312, "learning_rate": 0.0006080055034651447, "loss": 3.5298, "step": 46155 }, { "epoch": 3.136295692349504, "grad_norm": 0.9752693176269531, "learning_rate": 0.0006079630384563119, "loss": 3.4351, "step": 46160 }, { "epoch": 3.1366354124201656, "grad_norm": 1.1509345769882202, "learning_rate": 0.0006079205734474794, "loss": 3.5448, "step": 46165 }, { "epoch": 3.1369751324908277, "grad_norm": 0.7178778648376465, "learning_rate": 0.0006078781084386466, "loss": 3.6176, "step": 46170 }, { "epoch": 3.1373148525614893, "grad_norm": 0.9408407211303711, "learning_rate": 0.0006078356434298139, "loss": 3.6918, "step": 46175 }, { "epoch": 3.137654572632151, "grad_norm": 0.8537754416465759, "learning_rate": 0.0006077931784209811, "loss": 3.5029, "step": 46180 }, { "epoch": 3.137994292702813, "grad_norm": 0.9286121726036072, "learning_rate": 0.0006077507134121484, "loss": 3.5665, "step": 46185 }, { "epoch": 3.1383340127734747, "grad_norm": 1.01081383228302, "learning_rate": 0.0006077082484033157, "loss": 3.6354, "step": 46190 }, { "epoch": 3.1386737328441363, "grad_norm": 0.7832593321800232, "learning_rate": 0.0006076657833944829, "loss": 3.6438, "step": 46195 }, { "epoch": 3.1390134529147984, "grad_norm": 1.0232324600219727, "learning_rate": 0.0006076233183856503, "loss": 3.5045, "step": 46200 }, { "epoch": 3.13935317298546, "grad_norm": 0.9431591033935547, "learning_rate": 0.0006075808533768176, "loss": 3.4027, "step": 46205 }, { "epoch": 3.1396928930561216, "grad_norm": 0.930108368396759, "learning_rate": 0.0006075383883679848, "loss": 3.4724, "step": 46210 }, { "epoch": 3.1400326131267837, "grad_norm": 0.7250892519950867, "learning_rate": 0.000607495923359152, "loss": 3.8049, "step": 46215 }, { "epoch": 3.1403723331974454, "grad_norm": 0.74888676404953, "learning_rate": 0.0006074534583503194, "loss": 3.5611, "step": 46220 }, { "epoch": 3.140712053268107, "grad_norm": 0.9732818007469177, "learning_rate": 0.0006074109933414866, "loss": 3.4848, "step": 46225 }, { "epoch": 3.141051773338769, "grad_norm": 0.9598218202590942, "learning_rate": 0.0006073685283326538, "loss": 3.3701, "step": 46230 }, { "epoch": 3.1413914934094307, "grad_norm": 1.0637699365615845, "learning_rate": 0.0006073260633238213, "loss": 3.544, "step": 46235 }, { "epoch": 3.1417312134800923, "grad_norm": 0.7196893692016602, "learning_rate": 0.0006072835983149885, "loss": 3.4909, "step": 46240 }, { "epoch": 3.1420709335507544, "grad_norm": 1.0989863872528076, "learning_rate": 0.0006072411333061557, "loss": 3.411, "step": 46245 }, { "epoch": 3.142410653621416, "grad_norm": 0.8415891528129578, "learning_rate": 0.0006071986682973231, "loss": 3.5753, "step": 46250 }, { "epoch": 3.1427503736920777, "grad_norm": 1.1494345664978027, "learning_rate": 0.0006071562032884903, "loss": 3.4642, "step": 46255 }, { "epoch": 3.1430900937627397, "grad_norm": 1.1215225458145142, "learning_rate": 0.0006071137382796575, "loss": 3.5905, "step": 46260 }, { "epoch": 3.1434298138334014, "grad_norm": 1.0338077545166016, "learning_rate": 0.0006070712732708248, "loss": 3.5728, "step": 46265 }, { "epoch": 3.143769533904063, "grad_norm": 1.2264279127120972, "learning_rate": 0.0006070288082619922, "loss": 3.6859, "step": 46270 }, { "epoch": 3.1441092539747246, "grad_norm": 0.9312006831169128, "learning_rate": 0.0006069863432531594, "loss": 3.6933, "step": 46275 }, { "epoch": 3.1444489740453867, "grad_norm": 0.9472574591636658, "learning_rate": 0.0006069438782443267, "loss": 3.575, "step": 46280 }, { "epoch": 3.1447886941160483, "grad_norm": 0.8425294756889343, "learning_rate": 0.000606901413235494, "loss": 3.5546, "step": 46285 }, { "epoch": 3.14512841418671, "grad_norm": 0.7385185360908508, "learning_rate": 0.0006068589482266612, "loss": 3.6576, "step": 46290 }, { "epoch": 3.145468134257372, "grad_norm": 0.9139735698699951, "learning_rate": 0.0006068164832178285, "loss": 3.3996, "step": 46295 }, { "epoch": 3.1458078543280337, "grad_norm": 0.8780132532119751, "learning_rate": 0.0006067740182089958, "loss": 3.7442, "step": 46300 }, { "epoch": 3.1461475743986953, "grad_norm": 0.9901491403579712, "learning_rate": 0.0006067315532001631, "loss": 3.7015, "step": 46305 }, { "epoch": 3.1464872944693574, "grad_norm": 0.9189825654029846, "learning_rate": 0.0006066890881913304, "loss": 3.3848, "step": 46310 }, { "epoch": 3.146827014540019, "grad_norm": 0.7744531035423279, "learning_rate": 0.0006066466231824976, "loss": 3.3679, "step": 46315 }, { "epoch": 3.1471667346106806, "grad_norm": 0.9594285488128662, "learning_rate": 0.0006066041581736649, "loss": 3.3517, "step": 46320 }, { "epoch": 3.1475064546813427, "grad_norm": 1.1310527324676514, "learning_rate": 0.0006065616931648322, "loss": 3.7523, "step": 46325 }, { "epoch": 3.1478461747520043, "grad_norm": 0.8826537728309631, "learning_rate": 0.0006065192281559994, "loss": 3.6548, "step": 46330 }, { "epoch": 3.148185894822666, "grad_norm": 0.9914456009864807, "learning_rate": 0.0006064767631471667, "loss": 3.5523, "step": 46335 }, { "epoch": 3.148525614893328, "grad_norm": 0.9719395637512207, "learning_rate": 0.0006064342981383341, "loss": 3.3755, "step": 46340 }, { "epoch": 3.1488653349639897, "grad_norm": 1.081804633140564, "learning_rate": 0.0006063918331295013, "loss": 3.6586, "step": 46345 }, { "epoch": 3.1492050550346513, "grad_norm": 0.7768450975418091, "learning_rate": 0.0006063493681206686, "loss": 3.4705, "step": 46350 }, { "epoch": 3.1495447751053134, "grad_norm": 0.9053600430488586, "learning_rate": 0.0006063069031118359, "loss": 3.3648, "step": 46355 }, { "epoch": 3.149884495175975, "grad_norm": 0.9429015517234802, "learning_rate": 0.0006062644381030031, "loss": 3.6208, "step": 46360 }, { "epoch": 3.1502242152466366, "grad_norm": 0.9059349298477173, "learning_rate": 0.0006062219730941703, "loss": 3.725, "step": 46365 }, { "epoch": 3.1505639353172987, "grad_norm": 1.1260673999786377, "learning_rate": 0.0006061795080853377, "loss": 3.3564, "step": 46370 }, { "epoch": 3.1509036553879604, "grad_norm": 0.7665789127349854, "learning_rate": 0.000606137043076505, "loss": 3.3842, "step": 46375 }, { "epoch": 3.151243375458622, "grad_norm": 0.9315463900566101, "learning_rate": 0.0006060945780676722, "loss": 3.4877, "step": 46380 }, { "epoch": 3.151583095529284, "grad_norm": 0.8784552812576294, "learning_rate": 0.0006060521130588396, "loss": 3.6311, "step": 46385 }, { "epoch": 3.1519228155999457, "grad_norm": 0.7249850630760193, "learning_rate": 0.0006060096480500068, "loss": 3.4901, "step": 46390 }, { "epoch": 3.1522625356706073, "grad_norm": 0.8721307516098022, "learning_rate": 0.000605967183041174, "loss": 3.5998, "step": 46395 }, { "epoch": 3.1526022557412694, "grad_norm": 0.92719966173172, "learning_rate": 0.0006059247180323414, "loss": 3.7339, "step": 46400 }, { "epoch": 3.152941975811931, "grad_norm": 1.2453628778457642, "learning_rate": 0.0006058822530235086, "loss": 3.4902, "step": 46405 }, { "epoch": 3.1532816958825927, "grad_norm": 0.8595046401023865, "learning_rate": 0.0006058397880146759, "loss": 3.5458, "step": 46410 }, { "epoch": 3.1536214159532543, "grad_norm": 0.7776077389717102, "learning_rate": 0.0006057973230058433, "loss": 3.3416, "step": 46415 }, { "epoch": 3.1539611360239164, "grad_norm": 0.9832218885421753, "learning_rate": 0.0006057548579970105, "loss": 3.5176, "step": 46420 }, { "epoch": 3.154300856094578, "grad_norm": 0.9439659118652344, "learning_rate": 0.0006057123929881777, "loss": 3.7549, "step": 46425 }, { "epoch": 3.1546405761652396, "grad_norm": 0.8922276496887207, "learning_rate": 0.000605669927979345, "loss": 3.5491, "step": 46430 }, { "epoch": 3.1549802962359017, "grad_norm": 1.005068302154541, "learning_rate": 0.0006056274629705123, "loss": 3.4233, "step": 46435 }, { "epoch": 3.1553200163065633, "grad_norm": 0.9090473055839539, "learning_rate": 0.0006055849979616795, "loss": 3.56, "step": 46440 }, { "epoch": 3.155659736377225, "grad_norm": 0.9250444769859314, "learning_rate": 0.0006055425329528469, "loss": 3.6162, "step": 46445 }, { "epoch": 3.155999456447887, "grad_norm": 0.7810159921646118, "learning_rate": 0.0006055000679440142, "loss": 3.7067, "step": 46450 }, { "epoch": 3.1563391765185487, "grad_norm": 0.8447842597961426, "learning_rate": 0.0006054576029351814, "loss": 3.7874, "step": 46455 }, { "epoch": 3.1566788965892103, "grad_norm": 0.935393214225769, "learning_rate": 0.0006054151379263487, "loss": 3.4429, "step": 46460 }, { "epoch": 3.1570186166598724, "grad_norm": 0.6645131707191467, "learning_rate": 0.0006053726729175159, "loss": 3.4582, "step": 46465 }, { "epoch": 3.157358336730534, "grad_norm": 1.0213868618011475, "learning_rate": 0.0006053302079086832, "loss": 3.6913, "step": 46470 }, { "epoch": 3.1576980568011956, "grad_norm": 1.238684058189392, "learning_rate": 0.0006052877428998506, "loss": 3.2454, "step": 46475 }, { "epoch": 3.1580377768718577, "grad_norm": 0.8169494271278381, "learning_rate": 0.0006052452778910178, "loss": 3.3102, "step": 46480 }, { "epoch": 3.1583774969425193, "grad_norm": 0.8555706739425659, "learning_rate": 0.0006052028128821851, "loss": 3.4806, "step": 46485 }, { "epoch": 3.158717217013181, "grad_norm": 0.9451950788497925, "learning_rate": 0.0006051603478733524, "loss": 3.4342, "step": 46490 }, { "epoch": 3.159056937083843, "grad_norm": 0.9904257655143738, "learning_rate": 0.0006051178828645196, "loss": 3.5535, "step": 46495 }, { "epoch": 3.1593966571545047, "grad_norm": 0.6905050277709961, "learning_rate": 0.0006050754178556868, "loss": 3.041, "step": 46500 }, { "epoch": 3.1597363772251663, "grad_norm": 0.9902119040489197, "learning_rate": 0.0006050329528468542, "loss": 3.6276, "step": 46505 }, { "epoch": 3.1600760972958284, "grad_norm": 1.1795539855957031, "learning_rate": 0.0006049904878380215, "loss": 3.6598, "step": 46510 }, { "epoch": 3.16041581736649, "grad_norm": 0.9729307293891907, "learning_rate": 0.0006049480228291889, "loss": 3.7301, "step": 46515 }, { "epoch": 3.1607555374371517, "grad_norm": 0.9324309825897217, "learning_rate": 0.0006049055578203561, "loss": 3.6959, "step": 46520 }, { "epoch": 3.1610952575078137, "grad_norm": 1.0121296644210815, "learning_rate": 0.0006048630928115233, "loss": 3.4641, "step": 46525 }, { "epoch": 3.1614349775784754, "grad_norm": 0.9177004098892212, "learning_rate": 0.0006048206278026906, "loss": 3.5441, "step": 46530 }, { "epoch": 3.161774697649137, "grad_norm": 0.7553184628486633, "learning_rate": 0.0006047781627938579, "loss": 3.6605, "step": 46535 }, { "epoch": 3.162114417719799, "grad_norm": 1.1297813653945923, "learning_rate": 0.0006047356977850251, "loss": 3.4095, "step": 46540 }, { "epoch": 3.1624541377904607, "grad_norm": 1.040817141532898, "learning_rate": 0.0006046932327761925, "loss": 3.4169, "step": 46545 }, { "epoch": 3.1627938578611223, "grad_norm": 0.6253998279571533, "learning_rate": 0.0006046507677673598, "loss": 3.6725, "step": 46550 }, { "epoch": 3.1631335779317844, "grad_norm": 1.7424874305725098, "learning_rate": 0.000604608302758527, "loss": 3.6132, "step": 46555 }, { "epoch": 3.163473298002446, "grad_norm": 1.038636565208435, "learning_rate": 0.0006045658377496943, "loss": 3.5153, "step": 46560 }, { "epoch": 3.1638130180731077, "grad_norm": 0.6953262686729431, "learning_rate": 0.0006045233727408615, "loss": 3.6992, "step": 46565 }, { "epoch": 3.1641527381437697, "grad_norm": 0.9195459485054016, "learning_rate": 0.0006044809077320288, "loss": 3.4486, "step": 46570 }, { "epoch": 3.1644924582144314, "grad_norm": 0.9000627994537354, "learning_rate": 0.0006044384427231961, "loss": 3.7722, "step": 46575 }, { "epoch": 3.164832178285093, "grad_norm": 0.7390528917312622, "learning_rate": 0.0006043959777143634, "loss": 3.754, "step": 46580 }, { "epoch": 3.165171898355755, "grad_norm": 1.1858088970184326, "learning_rate": 0.0006043535127055307, "loss": 3.3887, "step": 46585 }, { "epoch": 3.1655116184264167, "grad_norm": 1.1932612657546997, "learning_rate": 0.000604311047696698, "loss": 3.302, "step": 46590 }, { "epoch": 3.1658513384970783, "grad_norm": 0.8553206324577332, "learning_rate": 0.0006042685826878652, "loss": 3.7434, "step": 46595 }, { "epoch": 3.1661910585677404, "grad_norm": 0.9183288812637329, "learning_rate": 0.0006042261176790325, "loss": 3.5441, "step": 46600 }, { "epoch": 3.166530778638402, "grad_norm": 0.9383103251457214, "learning_rate": 0.0006041836526701998, "loss": 3.6497, "step": 46605 }, { "epoch": 3.1668704987090637, "grad_norm": 0.9877063632011414, "learning_rate": 0.000604141187661367, "loss": 3.4506, "step": 46610 }, { "epoch": 3.1672102187797253, "grad_norm": 0.9763429760932922, "learning_rate": 0.0006040987226525343, "loss": 3.4583, "step": 46615 }, { "epoch": 3.1675499388503874, "grad_norm": 0.9898585081100464, "learning_rate": 0.0006040562576437017, "loss": 3.6222, "step": 46620 }, { "epoch": 3.167889658921049, "grad_norm": 0.8736823797225952, "learning_rate": 0.0006040137926348689, "loss": 3.8507, "step": 46625 }, { "epoch": 3.1682293789917106, "grad_norm": 0.7745020389556885, "learning_rate": 0.0006039713276260361, "loss": 3.2142, "step": 46630 }, { "epoch": 3.1685690990623727, "grad_norm": 0.8240602612495422, "learning_rate": 0.0006039288626172035, "loss": 3.6573, "step": 46635 }, { "epoch": 3.1689088191330343, "grad_norm": 0.756883978843689, "learning_rate": 0.0006038863976083707, "loss": 3.5787, "step": 46640 }, { "epoch": 3.169248539203696, "grad_norm": 1.2295364141464233, "learning_rate": 0.0006038439325995379, "loss": 3.6238, "step": 46645 }, { "epoch": 3.169588259274358, "grad_norm": 1.1505990028381348, "learning_rate": 0.0006038014675907054, "loss": 3.6274, "step": 46650 }, { "epoch": 3.1699279793450197, "grad_norm": 0.9199177026748657, "learning_rate": 0.0006037590025818726, "loss": 3.5468, "step": 46655 }, { "epoch": 3.1702676994156813, "grad_norm": 0.9085102677345276, "learning_rate": 0.0006037165375730398, "loss": 3.6878, "step": 46660 }, { "epoch": 3.1706074194863434, "grad_norm": 0.6793073415756226, "learning_rate": 0.0006036740725642071, "loss": 3.5002, "step": 46665 }, { "epoch": 3.170947139557005, "grad_norm": 0.9024698138237, "learning_rate": 0.0006036316075553744, "loss": 3.5565, "step": 46670 }, { "epoch": 3.1712868596276667, "grad_norm": 0.9943620562553406, "learning_rate": 0.0006035891425465416, "loss": 3.669, "step": 46675 }, { "epoch": 3.1716265796983287, "grad_norm": 0.8209461569786072, "learning_rate": 0.0006035466775377089, "loss": 3.6526, "step": 46680 }, { "epoch": 3.1719662997689904, "grad_norm": 1.0510286092758179, "learning_rate": 0.0006035042125288763, "loss": 3.3729, "step": 46685 }, { "epoch": 3.172306019839652, "grad_norm": 1.8927001953125, "learning_rate": 0.0006034617475200435, "loss": 3.5641, "step": 46690 }, { "epoch": 3.172645739910314, "grad_norm": 1.046531081199646, "learning_rate": 0.0006034192825112108, "loss": 3.771, "step": 46695 }, { "epoch": 3.1729854599809757, "grad_norm": 0.9411276578903198, "learning_rate": 0.000603376817502378, "loss": 3.3155, "step": 46700 }, { "epoch": 3.1733251800516373, "grad_norm": 0.9345074892044067, "learning_rate": 0.0006033343524935453, "loss": 3.4648, "step": 46705 }, { "epoch": 3.1736649001222994, "grad_norm": 0.9831835627555847, "learning_rate": 0.0006032918874847126, "loss": 3.5108, "step": 46710 }, { "epoch": 3.174004620192961, "grad_norm": 1.223319411277771, "learning_rate": 0.0006032494224758798, "loss": 3.713, "step": 46715 }, { "epoch": 3.1743443402636227, "grad_norm": 0.8318305611610413, "learning_rate": 0.0006032069574670472, "loss": 3.5504, "step": 46720 }, { "epoch": 3.1746840603342847, "grad_norm": 0.8005041480064392, "learning_rate": 0.0006031644924582145, "loss": 3.4345, "step": 46725 }, { "epoch": 3.1750237804049464, "grad_norm": 0.7797107100486755, "learning_rate": 0.0006031220274493817, "loss": 3.5251, "step": 46730 }, { "epoch": 3.175363500475608, "grad_norm": 0.889167845249176, "learning_rate": 0.000603079562440549, "loss": 3.47, "step": 46735 }, { "epoch": 3.17570322054627, "grad_norm": 0.9055701494216919, "learning_rate": 0.0006030370974317163, "loss": 3.5907, "step": 46740 }, { "epoch": 3.1760429406169317, "grad_norm": 0.942388653755188, "learning_rate": 0.0006029946324228835, "loss": 3.7345, "step": 46745 }, { "epoch": 3.1763826606875933, "grad_norm": 0.8008630275726318, "learning_rate": 0.0006029521674140507, "loss": 3.4447, "step": 46750 }, { "epoch": 3.176722380758255, "grad_norm": 0.7601468563079834, "learning_rate": 0.0006029097024052182, "loss": 3.1252, "step": 46755 }, { "epoch": 3.177062100828917, "grad_norm": 1.0411310195922852, "learning_rate": 0.0006028672373963854, "loss": 3.5155, "step": 46760 }, { "epoch": 3.1774018208995787, "grad_norm": 0.7391448616981506, "learning_rate": 0.0006028247723875526, "loss": 3.389, "step": 46765 }, { "epoch": 3.1777415409702403, "grad_norm": 0.8726449608802795, "learning_rate": 0.00060278230737872, "loss": 3.5627, "step": 46770 }, { "epoch": 3.1780812610409024, "grad_norm": 1.0630723237991333, "learning_rate": 0.0006027398423698872, "loss": 3.3517, "step": 46775 }, { "epoch": 3.178420981111564, "grad_norm": 0.9158639311790466, "learning_rate": 0.0006026973773610544, "loss": 3.5598, "step": 46780 }, { "epoch": 3.1787607011822256, "grad_norm": 0.8446755409240723, "learning_rate": 0.0006026549123522218, "loss": 3.4093, "step": 46785 }, { "epoch": 3.1791004212528877, "grad_norm": 0.836820125579834, "learning_rate": 0.0006026124473433891, "loss": 3.814, "step": 46790 }, { "epoch": 3.1794401413235494, "grad_norm": 0.769568145275116, "learning_rate": 0.0006025699823345563, "loss": 3.6058, "step": 46795 }, { "epoch": 3.179779861394211, "grad_norm": 1.3094507455825806, "learning_rate": 0.0006025275173257237, "loss": 3.3002, "step": 46800 }, { "epoch": 3.180119581464873, "grad_norm": 0.7064046859741211, "learning_rate": 0.0006024850523168909, "loss": 3.352, "step": 46805 }, { "epoch": 3.1804593015355347, "grad_norm": 1.1609294414520264, "learning_rate": 0.0006024425873080581, "loss": 3.5385, "step": 46810 }, { "epoch": 3.1807990216061963, "grad_norm": 0.8500900268554688, "learning_rate": 0.0006024001222992254, "loss": 3.4063, "step": 46815 }, { "epoch": 3.1811387416768584, "grad_norm": 0.8350246548652649, "learning_rate": 0.0006023576572903927, "loss": 3.4049, "step": 46820 }, { "epoch": 3.18147846174752, "grad_norm": 0.7162759900093079, "learning_rate": 0.00060231519228156, "loss": 3.5785, "step": 46825 }, { "epoch": 3.1818181818181817, "grad_norm": 0.7550603151321411, "learning_rate": 0.0006022727272727273, "loss": 3.5449, "step": 46830 }, { "epoch": 3.1821579018888437, "grad_norm": 0.8434705138206482, "learning_rate": 0.0006022302622638946, "loss": 3.5738, "step": 46835 }, { "epoch": 3.1824976219595054, "grad_norm": 1.0039403438568115, "learning_rate": 0.0006021877972550618, "loss": 3.3661, "step": 46840 }, { "epoch": 3.182837342030167, "grad_norm": 0.8391554951667786, "learning_rate": 0.0006021453322462291, "loss": 3.5292, "step": 46845 }, { "epoch": 3.183177062100829, "grad_norm": 0.9582727551460266, "learning_rate": 0.0006021028672373963, "loss": 3.5577, "step": 46850 }, { "epoch": 3.1835167821714907, "grad_norm": 0.8422686457633972, "learning_rate": 0.0006020604022285637, "loss": 3.5484, "step": 46855 }, { "epoch": 3.1838565022421523, "grad_norm": 1.2003206014633179, "learning_rate": 0.000602017937219731, "loss": 3.6119, "step": 46860 }, { "epoch": 3.1841962223128144, "grad_norm": 1.0818209648132324, "learning_rate": 0.0006019754722108982, "loss": 3.4672, "step": 46865 }, { "epoch": 3.184535942383476, "grad_norm": 0.8151135444641113, "learning_rate": 0.0006019330072020656, "loss": 3.676, "step": 46870 }, { "epoch": 3.1848756624541377, "grad_norm": 0.9601738452911377, "learning_rate": 0.0006018905421932328, "loss": 3.6208, "step": 46875 }, { "epoch": 3.1852153825247997, "grad_norm": 0.7576903700828552, "learning_rate": 0.0006018480771844, "loss": 3.5858, "step": 46880 }, { "epoch": 3.1855551025954614, "grad_norm": 0.8233585953712463, "learning_rate": 0.0006018056121755674, "loss": 3.748, "step": 46885 }, { "epoch": 3.185894822666123, "grad_norm": 0.9840953946113586, "learning_rate": 0.0006017631471667346, "loss": 3.488, "step": 46890 }, { "epoch": 3.186234542736785, "grad_norm": 1.1098753213882446, "learning_rate": 0.0006017206821579019, "loss": 3.4388, "step": 46895 }, { "epoch": 3.1865742628074467, "grad_norm": 1.1684784889221191, "learning_rate": 0.0006016782171490693, "loss": 3.6886, "step": 46900 }, { "epoch": 3.1869139828781083, "grad_norm": 0.8859899044036865, "learning_rate": 0.0006016357521402365, "loss": 3.3408, "step": 46905 }, { "epoch": 3.1872537029487704, "grad_norm": 0.8561869859695435, "learning_rate": 0.0006015932871314037, "loss": 3.6252, "step": 46910 }, { "epoch": 3.187593423019432, "grad_norm": 0.8533803224563599, "learning_rate": 0.000601550822122571, "loss": 3.4019, "step": 46915 }, { "epoch": 3.1879331430900937, "grad_norm": 1.049674153327942, "learning_rate": 0.0006015083571137383, "loss": 3.6391, "step": 46920 }, { "epoch": 3.1882728631607558, "grad_norm": 0.8303041458129883, "learning_rate": 0.0006014658921049055, "loss": 3.6925, "step": 46925 }, { "epoch": 3.1886125832314174, "grad_norm": 0.8091197609901428, "learning_rate": 0.0006014234270960729, "loss": 3.5285, "step": 46930 }, { "epoch": 3.188952303302079, "grad_norm": 0.9443579316139221, "learning_rate": 0.0006013809620872402, "loss": 3.5063, "step": 46935 }, { "epoch": 3.189292023372741, "grad_norm": 1.035934329032898, "learning_rate": 0.0006013384970784074, "loss": 3.6002, "step": 46940 }, { "epoch": 3.1896317434434027, "grad_norm": 0.6452840566635132, "learning_rate": 0.0006012960320695747, "loss": 3.4307, "step": 46945 }, { "epoch": 3.1899714635140644, "grad_norm": 0.7430636286735535, "learning_rate": 0.000601253567060742, "loss": 3.5211, "step": 46950 }, { "epoch": 3.190311183584726, "grad_norm": 1.0702970027923584, "learning_rate": 0.0006012111020519092, "loss": 3.3982, "step": 46955 }, { "epoch": 3.190650903655388, "grad_norm": 0.9080306887626648, "learning_rate": 0.0006011686370430765, "loss": 3.324, "step": 46960 }, { "epoch": 3.1909906237260497, "grad_norm": 1.035197138786316, "learning_rate": 0.0006011261720342438, "loss": 3.4461, "step": 46965 }, { "epoch": 3.1913303437967113, "grad_norm": 1.2540777921676636, "learning_rate": 0.0006010837070254111, "loss": 3.3924, "step": 46970 }, { "epoch": 3.1916700638673734, "grad_norm": 0.9723957777023315, "learning_rate": 0.0006010412420165784, "loss": 3.576, "step": 46975 }, { "epoch": 3.192009783938035, "grad_norm": 1.0530803203582764, "learning_rate": 0.0006009987770077456, "loss": 3.4289, "step": 46980 }, { "epoch": 3.1923495040086967, "grad_norm": 0.9055414795875549, "learning_rate": 0.0006009563119989129, "loss": 3.4464, "step": 46985 }, { "epoch": 3.1926892240793587, "grad_norm": 0.9550784230232239, "learning_rate": 0.0006009138469900802, "loss": 3.5365, "step": 46990 }, { "epoch": 3.1930289441500204, "grad_norm": 0.7817603349685669, "learning_rate": 0.0006008713819812474, "loss": 3.4384, "step": 46995 }, { "epoch": 3.193368664220682, "grad_norm": 0.8422337174415588, "learning_rate": 0.0006008289169724147, "loss": 3.4229, "step": 47000 }, { "epoch": 3.193708384291344, "grad_norm": 0.7332444190979004, "learning_rate": 0.0006007864519635821, "loss": 3.7348, "step": 47005 }, { "epoch": 3.1940481043620057, "grad_norm": 0.8244227170944214, "learning_rate": 0.0006007439869547493, "loss": 3.3134, "step": 47010 }, { "epoch": 3.1943878244326673, "grad_norm": 0.9194483757019043, "learning_rate": 0.0006007015219459165, "loss": 3.5259, "step": 47015 }, { "epoch": 3.1947275445033294, "grad_norm": 0.820408821105957, "learning_rate": 0.0006006590569370839, "loss": 3.5827, "step": 47020 }, { "epoch": 3.195067264573991, "grad_norm": 0.8288946151733398, "learning_rate": 0.0006006165919282511, "loss": 3.6084, "step": 47025 }, { "epoch": 3.1954069846446527, "grad_norm": 0.8105474710464478, "learning_rate": 0.0006005741269194183, "loss": 3.4301, "step": 47030 }, { "epoch": 3.1957467047153147, "grad_norm": 0.7377923727035522, "learning_rate": 0.0006005316619105858, "loss": 3.4609, "step": 47035 }, { "epoch": 3.1960864247859764, "grad_norm": 1.0504072904586792, "learning_rate": 0.000600489196901753, "loss": 3.5594, "step": 47040 }, { "epoch": 3.196426144856638, "grad_norm": 0.7716879844665527, "learning_rate": 0.0006004467318929202, "loss": 3.2592, "step": 47045 }, { "epoch": 3.1967658649273, "grad_norm": 1.0174421072006226, "learning_rate": 0.0006004042668840875, "loss": 3.3935, "step": 47050 }, { "epoch": 3.1971055849979617, "grad_norm": 0.7798956632614136, "learning_rate": 0.0006003618018752548, "loss": 3.5128, "step": 47055 }, { "epoch": 3.1974453050686233, "grad_norm": 0.9326068162918091, "learning_rate": 0.000600319336866422, "loss": 3.2722, "step": 47060 }, { "epoch": 3.1977850251392854, "grad_norm": 0.8306412696838379, "learning_rate": 0.0006002768718575894, "loss": 3.7013, "step": 47065 }, { "epoch": 3.198124745209947, "grad_norm": 0.9422821998596191, "learning_rate": 0.0006002344068487567, "loss": 3.5461, "step": 47070 }, { "epoch": 3.1984644652806087, "grad_norm": 0.7829583287239075, "learning_rate": 0.0006001919418399239, "loss": 3.7749, "step": 47075 }, { "epoch": 3.1988041853512708, "grad_norm": 0.8829296231269836, "learning_rate": 0.0006001494768310912, "loss": 3.3466, "step": 47080 }, { "epoch": 3.1991439054219324, "grad_norm": 1.0024772882461548, "learning_rate": 0.0006001070118222585, "loss": 3.6069, "step": 47085 }, { "epoch": 3.199483625492594, "grad_norm": 0.8746809363365173, "learning_rate": 0.0006000645468134257, "loss": 3.6644, "step": 47090 }, { "epoch": 3.1998233455632556, "grad_norm": 0.8248069286346436, "learning_rate": 0.000600022081804593, "loss": 3.3991, "step": 47095 }, { "epoch": 3.2001630656339177, "grad_norm": 0.7885226011276245, "learning_rate": 0.0005999796167957604, "loss": 3.4272, "step": 47100 }, { "epoch": 3.2005027857045794, "grad_norm": 0.9073771834373474, "learning_rate": 0.0005999371517869276, "loss": 3.4027, "step": 47105 }, { "epoch": 3.200842505775241, "grad_norm": 0.7669292688369751, "learning_rate": 0.0005998946867780949, "loss": 3.8691, "step": 47110 }, { "epoch": 3.201182225845903, "grad_norm": 0.8867843151092529, "learning_rate": 0.0005998522217692621, "loss": 3.5143, "step": 47115 }, { "epoch": 3.2015219459165647, "grad_norm": 0.8381679654121399, "learning_rate": 0.0005998097567604294, "loss": 3.5298, "step": 47120 }, { "epoch": 3.2018616659872263, "grad_norm": 0.8222712874412537, "learning_rate": 0.0005997672917515967, "loss": 3.1479, "step": 47125 }, { "epoch": 3.2022013860578884, "grad_norm": 0.8474553823471069, "learning_rate": 0.0005997248267427639, "loss": 3.6948, "step": 47130 }, { "epoch": 3.20254110612855, "grad_norm": 1.05476975440979, "learning_rate": 0.0005996823617339313, "loss": 3.6951, "step": 47135 }, { "epoch": 3.2028808261992117, "grad_norm": 1.0135157108306885, "learning_rate": 0.0005996398967250986, "loss": 3.5089, "step": 47140 }, { "epoch": 3.2032205462698737, "grad_norm": 0.82040935754776, "learning_rate": 0.0005995974317162658, "loss": 3.5641, "step": 47145 }, { "epoch": 3.2035602663405354, "grad_norm": 0.8681055903434753, "learning_rate": 0.000599554966707433, "loss": 3.5092, "step": 47150 }, { "epoch": 3.203899986411197, "grad_norm": 0.7641761302947998, "learning_rate": 0.0005995125016986004, "loss": 3.6961, "step": 47155 }, { "epoch": 3.204239706481859, "grad_norm": 0.9352573752403259, "learning_rate": 0.0005994700366897676, "loss": 3.598, "step": 47160 }, { "epoch": 3.2045794265525207, "grad_norm": 1.0525802373886108, "learning_rate": 0.0005994275716809348, "loss": 3.5342, "step": 47165 }, { "epoch": 3.2049191466231823, "grad_norm": 0.8156723976135254, "learning_rate": 0.0005993851066721023, "loss": 3.7183, "step": 47170 }, { "epoch": 3.2052588666938444, "grad_norm": 0.7460827231407166, "learning_rate": 0.0005993426416632695, "loss": 3.3779, "step": 47175 }, { "epoch": 3.205598586764506, "grad_norm": 0.7945560216903687, "learning_rate": 0.0005993001766544367, "loss": 3.6999, "step": 47180 }, { "epoch": 3.2059383068351677, "grad_norm": 0.8378802537918091, "learning_rate": 0.0005992577116456041, "loss": 3.3785, "step": 47185 }, { "epoch": 3.2062780269058297, "grad_norm": 0.9115859866142273, "learning_rate": 0.0005992152466367713, "loss": 3.3966, "step": 47190 }, { "epoch": 3.2066177469764914, "grad_norm": 0.9437305927276611, "learning_rate": 0.0005991727816279386, "loss": 3.4793, "step": 47195 }, { "epoch": 3.206957467047153, "grad_norm": 0.7891079187393188, "learning_rate": 0.0005991303166191058, "loss": 3.2899, "step": 47200 }, { "epoch": 3.207297187117815, "grad_norm": 1.0026376247406006, "learning_rate": 0.0005990878516102732, "loss": 3.3531, "step": 47205 }, { "epoch": 3.2076369071884767, "grad_norm": 0.752679169178009, "learning_rate": 0.0005990453866014405, "loss": 3.6478, "step": 47210 }, { "epoch": 3.2079766272591383, "grad_norm": 0.9419549107551575, "learning_rate": 0.0005990029215926077, "loss": 3.222, "step": 47215 }, { "epoch": 3.2083163473298004, "grad_norm": 1.06291925907135, "learning_rate": 0.000598960456583775, "loss": 3.3734, "step": 47220 }, { "epoch": 3.208656067400462, "grad_norm": 0.7885763645172119, "learning_rate": 0.0005989179915749423, "loss": 3.4591, "step": 47225 }, { "epoch": 3.2089957874711237, "grad_norm": 0.804499626159668, "learning_rate": 0.0005988755265661095, "loss": 3.1605, "step": 47230 }, { "epoch": 3.2093355075417858, "grad_norm": 1.008347988128662, "learning_rate": 0.0005988330615572767, "loss": 3.8184, "step": 47235 }, { "epoch": 3.2096752276124474, "grad_norm": 0.9152479767799377, "learning_rate": 0.0005987905965484442, "loss": 3.5769, "step": 47240 }, { "epoch": 3.210014947683109, "grad_norm": 0.908177375793457, "learning_rate": 0.0005987481315396114, "loss": 3.6251, "step": 47245 }, { "epoch": 3.210354667753771, "grad_norm": 0.9669256806373596, "learning_rate": 0.0005987056665307786, "loss": 3.7742, "step": 47250 }, { "epoch": 3.2106943878244327, "grad_norm": 0.8498631715774536, "learning_rate": 0.000598663201521946, "loss": 3.4866, "step": 47255 }, { "epoch": 3.2110341078950944, "grad_norm": 0.8783431053161621, "learning_rate": 0.0005986207365131132, "loss": 3.5807, "step": 47260 }, { "epoch": 3.2113738279657564, "grad_norm": 1.0012271404266357, "learning_rate": 0.0005985782715042804, "loss": 3.2675, "step": 47265 }, { "epoch": 3.211713548036418, "grad_norm": 0.6955933570861816, "learning_rate": 0.0005985358064954478, "loss": 3.59, "step": 47270 }, { "epoch": 3.2120532681070797, "grad_norm": 1.4229991436004639, "learning_rate": 0.0005984933414866151, "loss": 3.4889, "step": 47275 }, { "epoch": 3.2123929881777418, "grad_norm": 1.0758135318756104, "learning_rate": 0.0005984508764777823, "loss": 3.6058, "step": 47280 }, { "epoch": 3.2127327082484034, "grad_norm": 0.7102364897727966, "learning_rate": 0.0005984084114689497, "loss": 3.4476, "step": 47285 }, { "epoch": 3.213072428319065, "grad_norm": 1.1684587001800537, "learning_rate": 0.0005983659464601169, "loss": 3.5806, "step": 47290 }, { "epoch": 3.2134121483897267, "grad_norm": 0.9978742003440857, "learning_rate": 0.0005983234814512841, "loss": 3.3936, "step": 47295 }, { "epoch": 3.2137518684603887, "grad_norm": 1.1747196912765503, "learning_rate": 0.0005982810164424514, "loss": 3.7275, "step": 47300 }, { "epoch": 3.2140915885310504, "grad_norm": 0.713520884513855, "learning_rate": 0.0005982385514336187, "loss": 3.1827, "step": 47305 }, { "epoch": 3.214431308601712, "grad_norm": 0.760844349861145, "learning_rate": 0.000598196086424786, "loss": 3.2023, "step": 47310 }, { "epoch": 3.214771028672374, "grad_norm": 0.7904848456382751, "learning_rate": 0.0005981536214159533, "loss": 3.5108, "step": 47315 }, { "epoch": 3.2151107487430357, "grad_norm": 0.7856009602546692, "learning_rate": 0.0005981111564071206, "loss": 3.673, "step": 47320 }, { "epoch": 3.2154504688136973, "grad_norm": 0.7806258201599121, "learning_rate": 0.0005980686913982878, "loss": 3.6654, "step": 47325 }, { "epoch": 3.2157901888843594, "grad_norm": 1.455320119857788, "learning_rate": 0.0005980262263894551, "loss": 3.3671, "step": 47330 }, { "epoch": 3.216129908955021, "grad_norm": 0.9934332370758057, "learning_rate": 0.0005979837613806224, "loss": 3.4671, "step": 47335 }, { "epoch": 3.2164696290256827, "grad_norm": 0.7893030643463135, "learning_rate": 0.0005979412963717896, "loss": 3.4871, "step": 47340 }, { "epoch": 3.2168093490963448, "grad_norm": 0.9301550984382629, "learning_rate": 0.000597898831362957, "loss": 3.2587, "step": 47345 }, { "epoch": 3.2171490691670064, "grad_norm": 0.8590857982635498, "learning_rate": 0.0005978563663541242, "loss": 3.5407, "step": 47350 }, { "epoch": 3.217488789237668, "grad_norm": 0.7223317623138428, "learning_rate": 0.0005978139013452915, "loss": 3.3956, "step": 47355 }, { "epoch": 3.21782850930833, "grad_norm": 0.9383039474487305, "learning_rate": 0.0005977714363364588, "loss": 3.7168, "step": 47360 }, { "epoch": 3.2181682293789917, "grad_norm": 1.3038867712020874, "learning_rate": 0.000597728971327626, "loss": 3.3965, "step": 47365 }, { "epoch": 3.2185079494496533, "grad_norm": 0.7732632160186768, "learning_rate": 0.0005976865063187933, "loss": 3.6644, "step": 47370 }, { "epoch": 3.2188476695203154, "grad_norm": 0.8270350694656372, "learning_rate": 0.0005976440413099606, "loss": 3.383, "step": 47375 }, { "epoch": 3.219187389590977, "grad_norm": 0.7388333678245544, "learning_rate": 0.0005976015763011279, "loss": 3.7236, "step": 47380 }, { "epoch": 3.2195271096616387, "grad_norm": 0.7632237672805786, "learning_rate": 0.0005975591112922952, "loss": 3.1926, "step": 47385 }, { "epoch": 3.2198668297323008, "grad_norm": 0.7406578660011292, "learning_rate": 0.0005975166462834625, "loss": 3.6323, "step": 47390 }, { "epoch": 3.2202065498029624, "grad_norm": 1.0568454265594482, "learning_rate": 0.0005974741812746297, "loss": 3.5822, "step": 47395 }, { "epoch": 3.220546269873624, "grad_norm": 0.7848625779151917, "learning_rate": 0.0005974317162657969, "loss": 3.4369, "step": 47400 }, { "epoch": 3.220885989944286, "grad_norm": 1.4528298377990723, "learning_rate": 0.0005973892512569643, "loss": 3.4255, "step": 47405 }, { "epoch": 3.2212257100149477, "grad_norm": 0.819679319858551, "learning_rate": 0.0005973467862481315, "loss": 3.4595, "step": 47410 }, { "epoch": 3.2215654300856094, "grad_norm": 1.0311625003814697, "learning_rate": 0.0005973043212392988, "loss": 3.6325, "step": 47415 }, { "epoch": 3.2219051501562714, "grad_norm": 1.0439918041229248, "learning_rate": 0.0005972618562304662, "loss": 3.4917, "step": 47420 }, { "epoch": 3.222244870226933, "grad_norm": 1.1609846353530884, "learning_rate": 0.0005972193912216334, "loss": 3.4052, "step": 47425 }, { "epoch": 3.2225845902975947, "grad_norm": 1.8750985860824585, "learning_rate": 0.0005971769262128006, "loss": 3.3674, "step": 47430 }, { "epoch": 3.2229243103682563, "grad_norm": 1.1035839319229126, "learning_rate": 0.000597134461203968, "loss": 3.527, "step": 47435 }, { "epoch": 3.2232640304389184, "grad_norm": 1.4744371175765991, "learning_rate": 0.0005970919961951352, "loss": 3.4153, "step": 47440 }, { "epoch": 3.22360375050958, "grad_norm": 1.0677400827407837, "learning_rate": 0.0005970495311863024, "loss": 3.6763, "step": 47445 }, { "epoch": 3.2239434705802417, "grad_norm": 1.136963129043579, "learning_rate": 0.0005970070661774698, "loss": 3.4439, "step": 47450 }, { "epoch": 3.2242831906509037, "grad_norm": 0.8413625359535217, "learning_rate": 0.0005969646011686371, "loss": 3.6596, "step": 47455 }, { "epoch": 3.2246229107215654, "grad_norm": 0.9899761080741882, "learning_rate": 0.0005969221361598043, "loss": 3.5833, "step": 47460 }, { "epoch": 3.224962630792227, "grad_norm": 0.8009094595909119, "learning_rate": 0.0005968796711509716, "loss": 3.5576, "step": 47465 }, { "epoch": 3.225302350862889, "grad_norm": 0.8916170597076416, "learning_rate": 0.0005968372061421389, "loss": 3.4667, "step": 47470 }, { "epoch": 3.2256420709335507, "grad_norm": 0.7639058828353882, "learning_rate": 0.0005967947411333061, "loss": 3.5045, "step": 47475 }, { "epoch": 3.2259817910042123, "grad_norm": 0.7537293434143066, "learning_rate": 0.0005967522761244734, "loss": 3.713, "step": 47480 }, { "epoch": 3.2263215110748744, "grad_norm": 0.7929649353027344, "learning_rate": 0.0005967098111156408, "loss": 3.7627, "step": 47485 }, { "epoch": 3.226661231145536, "grad_norm": 0.8976273536682129, "learning_rate": 0.000596667346106808, "loss": 3.5703, "step": 47490 }, { "epoch": 3.2270009512161977, "grad_norm": 1.0552699565887451, "learning_rate": 0.0005966248810979753, "loss": 3.2194, "step": 47495 }, { "epoch": 3.2273406712868598, "grad_norm": 0.9746345281600952, "learning_rate": 0.0005965824160891425, "loss": 3.2445, "step": 47500 }, { "epoch": 3.2276803913575214, "grad_norm": 0.8705211281776428, "learning_rate": 0.0005965399510803098, "loss": 3.7214, "step": 47505 }, { "epoch": 3.228020111428183, "grad_norm": 0.8772838711738586, "learning_rate": 0.0005964974860714771, "loss": 3.6214, "step": 47510 }, { "epoch": 3.228359831498845, "grad_norm": 0.8401340842247009, "learning_rate": 0.0005964550210626443, "loss": 3.8197, "step": 47515 }, { "epoch": 3.2286995515695067, "grad_norm": 0.7370373010635376, "learning_rate": 0.0005964125560538117, "loss": 3.2784, "step": 47520 }, { "epoch": 3.2290392716401684, "grad_norm": 0.7231046557426453, "learning_rate": 0.000596370091044979, "loss": 3.5432, "step": 47525 }, { "epoch": 3.2293789917108304, "grad_norm": 0.8223814964294434, "learning_rate": 0.0005963276260361462, "loss": 3.6303, "step": 47530 }, { "epoch": 3.229718711781492, "grad_norm": 0.9232456088066101, "learning_rate": 0.0005962851610273136, "loss": 3.1634, "step": 47535 }, { "epoch": 3.2300584318521537, "grad_norm": 0.9933381080627441, "learning_rate": 0.0005962426960184808, "loss": 3.4103, "step": 47540 }, { "epoch": 3.2303981519228158, "grad_norm": 0.7173306345939636, "learning_rate": 0.000596200231009648, "loss": 3.4353, "step": 47545 }, { "epoch": 3.2307378719934774, "grad_norm": 0.9653517007827759, "learning_rate": 0.0005961577660008153, "loss": 3.1973, "step": 47550 }, { "epoch": 3.231077592064139, "grad_norm": 1.0191166400909424, "learning_rate": 0.0005961153009919827, "loss": 3.5182, "step": 47555 }, { "epoch": 3.231417312134801, "grad_norm": 0.8312004804611206, "learning_rate": 0.0005960728359831499, "loss": 3.273, "step": 47560 }, { "epoch": 3.2317570322054627, "grad_norm": 0.7020907402038574, "learning_rate": 0.0005960303709743172, "loss": 3.2968, "step": 47565 }, { "epoch": 3.2320967522761244, "grad_norm": 0.7730855941772461, "learning_rate": 0.0005959879059654845, "loss": 3.467, "step": 47570 }, { "epoch": 3.2324364723467864, "grad_norm": 0.8848169445991516, "learning_rate": 0.0005959454409566517, "loss": 3.5819, "step": 47575 }, { "epoch": 3.232776192417448, "grad_norm": 0.9852948188781738, "learning_rate": 0.000595902975947819, "loss": 3.5139, "step": 47580 }, { "epoch": 3.2331159124881097, "grad_norm": 0.971278190612793, "learning_rate": 0.0005958605109389862, "loss": 3.745, "step": 47585 }, { "epoch": 3.2334556325587718, "grad_norm": 0.7775343656539917, "learning_rate": 0.0005958180459301536, "loss": 3.6919, "step": 47590 }, { "epoch": 3.2337953526294334, "grad_norm": 0.8719583749771118, "learning_rate": 0.0005957755809213209, "loss": 3.3408, "step": 47595 }, { "epoch": 3.234135072700095, "grad_norm": 0.8771272897720337, "learning_rate": 0.0005957331159124881, "loss": 3.8052, "step": 47600 }, { "epoch": 3.234474792770757, "grad_norm": 1.0511424541473389, "learning_rate": 0.0005956906509036554, "loss": 3.669, "step": 47605 }, { "epoch": 3.2348145128414187, "grad_norm": 0.7543144822120667, "learning_rate": 0.0005956481858948227, "loss": 3.3067, "step": 47610 }, { "epoch": 3.2351542329120804, "grad_norm": 0.9410807490348816, "learning_rate": 0.0005956057208859899, "loss": 3.6167, "step": 47615 }, { "epoch": 3.2354939529827424, "grad_norm": 0.7431885004043579, "learning_rate": 0.0005955632558771572, "loss": 3.7184, "step": 47620 }, { "epoch": 3.235833673053404, "grad_norm": 1.0892412662506104, "learning_rate": 0.0005955207908683246, "loss": 3.3993, "step": 47625 }, { "epoch": 3.2361733931240657, "grad_norm": 0.7989431619644165, "learning_rate": 0.0005954783258594918, "loss": 3.5031, "step": 47630 }, { "epoch": 3.2365131131947273, "grad_norm": 0.8208248615264893, "learning_rate": 0.000595435860850659, "loss": 3.508, "step": 47635 }, { "epoch": 3.2368528332653894, "grad_norm": 0.6892132759094238, "learning_rate": 0.0005953933958418264, "loss": 3.6028, "step": 47640 }, { "epoch": 3.237192553336051, "grad_norm": 0.869514524936676, "learning_rate": 0.0005953509308329936, "loss": 3.5991, "step": 47645 }, { "epoch": 3.2375322734067127, "grad_norm": 0.8583365678787231, "learning_rate": 0.0005953084658241608, "loss": 3.3453, "step": 47650 }, { "epoch": 3.2378719934773748, "grad_norm": 1.061503291130066, "learning_rate": 0.0005952660008153283, "loss": 3.4741, "step": 47655 }, { "epoch": 3.2382117135480364, "grad_norm": 0.8906923532485962, "learning_rate": 0.0005952235358064955, "loss": 3.7001, "step": 47660 }, { "epoch": 3.238551433618698, "grad_norm": 0.9585073590278625, "learning_rate": 0.0005951810707976627, "loss": 3.5161, "step": 47665 }, { "epoch": 3.23889115368936, "grad_norm": 0.6733388304710388, "learning_rate": 0.0005951386057888301, "loss": 3.5677, "step": 47670 }, { "epoch": 3.2392308737600217, "grad_norm": 1.0495548248291016, "learning_rate": 0.0005950961407799973, "loss": 3.541, "step": 47675 }, { "epoch": 3.2395705938306834, "grad_norm": 0.7816379070281982, "learning_rate": 0.0005950536757711645, "loss": 3.4436, "step": 47680 }, { "epoch": 3.2399103139013454, "grad_norm": 0.9670003652572632, "learning_rate": 0.0005950112107623318, "loss": 3.8514, "step": 47685 }, { "epoch": 3.240250033972007, "grad_norm": 0.8924698233604431, "learning_rate": 0.0005949687457534992, "loss": 3.4297, "step": 47690 }, { "epoch": 3.2405897540426687, "grad_norm": 0.7472886443138123, "learning_rate": 0.0005949262807446664, "loss": 3.8485, "step": 47695 }, { "epoch": 3.2409294741133308, "grad_norm": 0.7940961718559265, "learning_rate": 0.0005948838157358337, "loss": 3.4862, "step": 47700 }, { "epoch": 3.2412691941839924, "grad_norm": 0.7485201358795166, "learning_rate": 0.000594841350727001, "loss": 3.6177, "step": 47705 }, { "epoch": 3.241608914254654, "grad_norm": 1.0085467100143433, "learning_rate": 0.0005947988857181682, "loss": 3.6709, "step": 47710 }, { "epoch": 3.241948634325316, "grad_norm": 1.01823091506958, "learning_rate": 0.0005947564207093355, "loss": 3.5782, "step": 47715 }, { "epoch": 3.2422883543959777, "grad_norm": 1.1326749324798584, "learning_rate": 0.0005947139557005028, "loss": 3.5941, "step": 47720 }, { "epoch": 3.2426280744666394, "grad_norm": 0.9730358123779297, "learning_rate": 0.0005946714906916701, "loss": 3.5624, "step": 47725 }, { "epoch": 3.2429677945373014, "grad_norm": 1.0460444688796997, "learning_rate": 0.0005946290256828374, "loss": 3.4828, "step": 47730 }, { "epoch": 3.243307514607963, "grad_norm": 0.9467706680297852, "learning_rate": 0.0005945865606740046, "loss": 3.5464, "step": 47735 }, { "epoch": 3.2436472346786247, "grad_norm": 0.9326621890068054, "learning_rate": 0.0005945440956651719, "loss": 3.6374, "step": 47740 }, { "epoch": 3.2439869547492868, "grad_norm": 0.7779879570007324, "learning_rate": 0.0005945016306563392, "loss": 3.5103, "step": 47745 }, { "epoch": 3.2443266748199484, "grad_norm": 0.9772298336029053, "learning_rate": 0.0005944591656475064, "loss": 3.447, "step": 47750 }, { "epoch": 3.24466639489061, "grad_norm": 0.9856424331665039, "learning_rate": 0.0005944167006386737, "loss": 3.711, "step": 47755 }, { "epoch": 3.245006114961272, "grad_norm": 1.587141752243042, "learning_rate": 0.0005943742356298411, "loss": 3.5275, "step": 47760 }, { "epoch": 3.2453458350319337, "grad_norm": 0.801163911819458, "learning_rate": 0.0005943317706210083, "loss": 3.663, "step": 47765 }, { "epoch": 3.2456855551025954, "grad_norm": 0.8324598670005798, "learning_rate": 0.0005942893056121756, "loss": 3.5451, "step": 47770 }, { "epoch": 3.246025275173257, "grad_norm": 0.856397807598114, "learning_rate": 0.0005942468406033429, "loss": 3.4862, "step": 47775 }, { "epoch": 3.246364995243919, "grad_norm": 0.852536141872406, "learning_rate": 0.0005942043755945101, "loss": 3.5828, "step": 47780 }, { "epoch": 3.2467047153145807, "grad_norm": 0.8401280641555786, "learning_rate": 0.0005941619105856773, "loss": 3.1456, "step": 47785 }, { "epoch": 3.2470444353852423, "grad_norm": 0.9983728528022766, "learning_rate": 0.0005941194455768447, "loss": 3.4288, "step": 47790 }, { "epoch": 3.2473841554559044, "grad_norm": 1.0214403867721558, "learning_rate": 0.000594076980568012, "loss": 3.4486, "step": 47795 }, { "epoch": 3.247723875526566, "grad_norm": 0.8960226774215698, "learning_rate": 0.0005940345155591792, "loss": 3.4927, "step": 47800 }, { "epoch": 3.2480635955972277, "grad_norm": 0.9636126160621643, "learning_rate": 0.0005939920505503466, "loss": 3.3291, "step": 47805 }, { "epoch": 3.2484033156678898, "grad_norm": 0.7597578763961792, "learning_rate": 0.0005939495855415138, "loss": 3.6918, "step": 47810 }, { "epoch": 3.2487430357385514, "grad_norm": 0.8202013373374939, "learning_rate": 0.000593907120532681, "loss": 3.2596, "step": 47815 }, { "epoch": 3.249082755809213, "grad_norm": 0.7173140048980713, "learning_rate": 0.0005938646555238484, "loss": 3.7939, "step": 47820 }, { "epoch": 3.249422475879875, "grad_norm": 1.1240214109420776, "learning_rate": 0.0005938221905150156, "loss": 3.5145, "step": 47825 }, { "epoch": 3.2497621959505367, "grad_norm": 0.8800768256187439, "learning_rate": 0.0005937797255061829, "loss": 3.4274, "step": 47830 }, { "epoch": 3.2501019160211984, "grad_norm": 0.8554653525352478, "learning_rate": 0.0005937372604973503, "loss": 3.611, "step": 47835 }, { "epoch": 3.2504416360918604, "grad_norm": 0.9862136840820312, "learning_rate": 0.0005936947954885175, "loss": 3.7074, "step": 47840 }, { "epoch": 3.250781356162522, "grad_norm": 0.7944707274436951, "learning_rate": 0.0005936523304796847, "loss": 3.5169, "step": 47845 }, { "epoch": 3.2511210762331837, "grad_norm": 0.8265283703804016, "learning_rate": 0.000593609865470852, "loss": 3.7306, "step": 47850 }, { "epoch": 3.2514607963038458, "grad_norm": 1.030619740486145, "learning_rate": 0.0005935674004620193, "loss": 3.6605, "step": 47855 }, { "epoch": 3.2518005163745074, "grad_norm": 0.6259447932243347, "learning_rate": 0.0005935249354531865, "loss": 3.575, "step": 47860 }, { "epoch": 3.252140236445169, "grad_norm": 0.8588775992393494, "learning_rate": 0.0005934824704443539, "loss": 3.319, "step": 47865 }, { "epoch": 3.252479956515831, "grad_norm": 0.8911780714988708, "learning_rate": 0.0005934400054355212, "loss": 3.4639, "step": 47870 }, { "epoch": 3.2528196765864927, "grad_norm": 0.9665262699127197, "learning_rate": 0.0005933975404266885, "loss": 3.297, "step": 47875 }, { "epoch": 3.2531593966571544, "grad_norm": 0.9011222720146179, "learning_rate": 0.0005933550754178557, "loss": 3.6044, "step": 47880 }, { "epoch": 3.2534991167278164, "grad_norm": 0.869540810585022, "learning_rate": 0.0005933126104090229, "loss": 3.5505, "step": 47885 }, { "epoch": 3.253838836798478, "grad_norm": 0.9042954444885254, "learning_rate": 0.0005932701454001903, "loss": 3.6101, "step": 47890 }, { "epoch": 3.2541785568691397, "grad_norm": 1.0394214391708374, "learning_rate": 0.0005932276803913575, "loss": 3.558, "step": 47895 }, { "epoch": 3.254518276939802, "grad_norm": 0.8977407813072205, "learning_rate": 0.0005931852153825248, "loss": 3.367, "step": 47900 }, { "epoch": 3.2548579970104634, "grad_norm": 1.0113568305969238, "learning_rate": 0.0005931427503736922, "loss": 3.4587, "step": 47905 }, { "epoch": 3.255197717081125, "grad_norm": 0.8451798558235168, "learning_rate": 0.0005931002853648594, "loss": 3.4611, "step": 47910 }, { "epoch": 3.255537437151787, "grad_norm": 1.0124350786209106, "learning_rate": 0.0005930578203560266, "loss": 3.3668, "step": 47915 }, { "epoch": 3.2558771572224487, "grad_norm": 0.8732821345329285, "learning_rate": 0.000593015355347194, "loss": 3.8955, "step": 47920 }, { "epoch": 3.2562168772931104, "grad_norm": 0.6830423474311829, "learning_rate": 0.0005929728903383612, "loss": 3.5467, "step": 47925 }, { "epoch": 3.2565565973637725, "grad_norm": 0.90553879737854, "learning_rate": 0.0005929304253295284, "loss": 3.6289, "step": 47930 }, { "epoch": 3.256896317434434, "grad_norm": 0.9308068156242371, "learning_rate": 0.0005928879603206959, "loss": 3.8293, "step": 47935 }, { "epoch": 3.2572360375050957, "grad_norm": 1.0142700672149658, "learning_rate": 0.0005928454953118631, "loss": 3.4183, "step": 47940 }, { "epoch": 3.257575757575758, "grad_norm": 0.826784074306488, "learning_rate": 0.0005928030303030303, "loss": 3.5251, "step": 47945 }, { "epoch": 3.2579154776464194, "grad_norm": 0.9642004370689392, "learning_rate": 0.0005927605652941976, "loss": 3.5878, "step": 47950 }, { "epoch": 3.258255197717081, "grad_norm": 0.7544974088668823, "learning_rate": 0.0005927181002853649, "loss": 3.6442, "step": 47955 }, { "epoch": 3.258594917787743, "grad_norm": 0.8166085481643677, "learning_rate": 0.0005926756352765321, "loss": 3.7246, "step": 47960 }, { "epoch": 3.2589346378584048, "grad_norm": 0.9310194253921509, "learning_rate": 0.0005926331702676994, "loss": 3.5062, "step": 47965 }, { "epoch": 3.2592743579290664, "grad_norm": 0.8698039650917053, "learning_rate": 0.0005925907052588668, "loss": 3.6505, "step": 47970 }, { "epoch": 3.2596140779997285, "grad_norm": 1.0251821279525757, "learning_rate": 0.000592548240250034, "loss": 3.3749, "step": 47975 }, { "epoch": 3.25995379807039, "grad_norm": 0.8241837620735168, "learning_rate": 0.0005925057752412013, "loss": 3.3739, "step": 47980 }, { "epoch": 3.2602935181410517, "grad_norm": 0.7880303263664246, "learning_rate": 0.0005924633102323685, "loss": 3.6489, "step": 47985 }, { "epoch": 3.2606332382117134, "grad_norm": 0.8381921648979187, "learning_rate": 0.0005924208452235358, "loss": 3.5077, "step": 47990 }, { "epoch": 3.2609729582823754, "grad_norm": 0.9544795751571655, "learning_rate": 0.0005923783802147031, "loss": 3.7188, "step": 47995 }, { "epoch": 3.261312678353037, "grad_norm": 0.7482354044914246, "learning_rate": 0.0005923359152058703, "loss": 3.6199, "step": 48000 }, { "epoch": 3.2616523984236987, "grad_norm": 0.9611053466796875, "learning_rate": 0.0005922934501970377, "loss": 3.5024, "step": 48005 }, { "epoch": 3.2619921184943608, "grad_norm": 0.7979375720024109, "learning_rate": 0.000592250985188205, "loss": 3.7431, "step": 48010 }, { "epoch": 3.2623318385650224, "grad_norm": 0.9857934713363647, "learning_rate": 0.0005922085201793722, "loss": 3.5621, "step": 48015 }, { "epoch": 3.262671558635684, "grad_norm": 1.0658526420593262, "learning_rate": 0.0005921660551705395, "loss": 3.4895, "step": 48020 }, { "epoch": 3.263011278706346, "grad_norm": 0.8188419342041016, "learning_rate": 0.0005921235901617068, "loss": 3.7967, "step": 48025 }, { "epoch": 3.2633509987770077, "grad_norm": 0.6816413998603821, "learning_rate": 0.000592081125152874, "loss": 3.5721, "step": 48030 }, { "epoch": 3.2636907188476694, "grad_norm": 0.8533735871315002, "learning_rate": 0.0005920386601440412, "loss": 3.4847, "step": 48035 }, { "epoch": 3.2640304389183314, "grad_norm": 0.6705483794212341, "learning_rate": 0.0005919961951352087, "loss": 3.5714, "step": 48040 }, { "epoch": 3.264370158988993, "grad_norm": 1.1123254299163818, "learning_rate": 0.0005919537301263759, "loss": 3.3895, "step": 48045 }, { "epoch": 3.2647098790596547, "grad_norm": 0.9214597940444946, "learning_rate": 0.0005919112651175431, "loss": 3.7081, "step": 48050 }, { "epoch": 3.265049599130317, "grad_norm": 0.9863991141319275, "learning_rate": 0.0005918688001087105, "loss": 3.2857, "step": 48055 }, { "epoch": 3.2653893192009784, "grad_norm": 1.1323281526565552, "learning_rate": 0.0005918263350998777, "loss": 3.2503, "step": 48060 }, { "epoch": 3.26572903927164, "grad_norm": 0.9129437208175659, "learning_rate": 0.0005917838700910449, "loss": 3.4975, "step": 48065 }, { "epoch": 3.266068759342302, "grad_norm": 0.8707402348518372, "learning_rate": 0.0005917414050822123, "loss": 3.5394, "step": 48070 }, { "epoch": 3.2664084794129638, "grad_norm": 0.8104270100593567, "learning_rate": 0.0005916989400733796, "loss": 3.557, "step": 48075 }, { "epoch": 3.2667481994836254, "grad_norm": 0.7981496453285217, "learning_rate": 0.0005916564750645468, "loss": 3.3854, "step": 48080 }, { "epoch": 3.2670879195542875, "grad_norm": 1.0782002210617065, "learning_rate": 0.0005916140100557141, "loss": 3.3512, "step": 48085 }, { "epoch": 3.267427639624949, "grad_norm": 1.2251445055007935, "learning_rate": 0.0005915715450468814, "loss": 3.5906, "step": 48090 }, { "epoch": 3.2677673596956107, "grad_norm": 1.0092848539352417, "learning_rate": 0.0005915290800380486, "loss": 3.5482, "step": 48095 }, { "epoch": 3.2681070797662723, "grad_norm": 0.7547249794006348, "learning_rate": 0.0005914866150292159, "loss": 3.579, "step": 48100 }, { "epoch": 3.2684467998369344, "grad_norm": 0.799691379070282, "learning_rate": 0.0005914441500203832, "loss": 3.5736, "step": 48105 }, { "epoch": 3.268786519907596, "grad_norm": 1.1969653367996216, "learning_rate": 0.0005914016850115505, "loss": 3.5798, "step": 48110 }, { "epoch": 3.2691262399782577, "grad_norm": 0.9269769191741943, "learning_rate": 0.0005913592200027178, "loss": 3.528, "step": 48115 }, { "epoch": 3.2694659600489198, "grad_norm": 0.6286087036132812, "learning_rate": 0.000591316754993885, "loss": 3.5958, "step": 48120 }, { "epoch": 3.2698056801195814, "grad_norm": 0.9626367688179016, "learning_rate": 0.0005912742899850523, "loss": 3.6998, "step": 48125 }, { "epoch": 3.270145400190243, "grad_norm": 0.8893303871154785, "learning_rate": 0.0005912318249762196, "loss": 3.3395, "step": 48130 }, { "epoch": 3.270485120260905, "grad_norm": 1.2342472076416016, "learning_rate": 0.0005911893599673868, "loss": 3.3006, "step": 48135 }, { "epoch": 3.2708248403315667, "grad_norm": 1.2438536882400513, "learning_rate": 0.0005911468949585541, "loss": 3.508, "step": 48140 }, { "epoch": 3.2711645604022284, "grad_norm": 1.0036423206329346, "learning_rate": 0.0005911044299497215, "loss": 3.6891, "step": 48145 }, { "epoch": 3.2715042804728904, "grad_norm": 0.7225595712661743, "learning_rate": 0.0005910619649408887, "loss": 3.7253, "step": 48150 }, { "epoch": 3.271844000543552, "grad_norm": 1.049621343612671, "learning_rate": 0.000591019499932056, "loss": 3.6803, "step": 48155 }, { "epoch": 3.2721837206142137, "grad_norm": 0.9969396591186523, "learning_rate": 0.0005909770349232233, "loss": 3.0746, "step": 48160 }, { "epoch": 3.2725234406848758, "grad_norm": 1.0230343341827393, "learning_rate": 0.0005909345699143905, "loss": 3.4988, "step": 48165 }, { "epoch": 3.2728631607555374, "grad_norm": 1.0106868743896484, "learning_rate": 0.0005908921049055577, "loss": 3.2487, "step": 48170 }, { "epoch": 3.273202880826199, "grad_norm": 0.988264262676239, "learning_rate": 0.0005908496398967251, "loss": 3.4339, "step": 48175 }, { "epoch": 3.273542600896861, "grad_norm": 0.8124376535415649, "learning_rate": 0.0005908071748878924, "loss": 3.5168, "step": 48180 }, { "epoch": 3.2738823209675227, "grad_norm": 1.035197377204895, "learning_rate": 0.0005907647098790596, "loss": 3.5886, "step": 48185 }, { "epoch": 3.2742220410381844, "grad_norm": 1.0544496774673462, "learning_rate": 0.000590722244870227, "loss": 3.5541, "step": 48190 }, { "epoch": 3.2745617611088464, "grad_norm": 0.9685846567153931, "learning_rate": 0.0005906797798613942, "loss": 3.6486, "step": 48195 }, { "epoch": 3.274901481179508, "grad_norm": 0.9912694692611694, "learning_rate": 0.0005906373148525614, "loss": 3.5064, "step": 48200 }, { "epoch": 3.2752412012501697, "grad_norm": 1.1322304010391235, "learning_rate": 0.0005905948498437288, "loss": 3.4654, "step": 48205 }, { "epoch": 3.275580921320832, "grad_norm": 0.982255220413208, "learning_rate": 0.000590552384834896, "loss": 3.6384, "step": 48210 }, { "epoch": 3.2759206413914934, "grad_norm": 0.859764575958252, "learning_rate": 0.0005905099198260634, "loss": 3.4107, "step": 48215 }, { "epoch": 3.276260361462155, "grad_norm": 0.6648565530776978, "learning_rate": 0.0005904674548172307, "loss": 3.5586, "step": 48220 }, { "epoch": 3.276600081532817, "grad_norm": 0.8247596025466919, "learning_rate": 0.0005904249898083979, "loss": 3.5166, "step": 48225 }, { "epoch": 3.2769398016034788, "grad_norm": 1.1701301336288452, "learning_rate": 0.0005903825247995652, "loss": 3.5289, "step": 48230 }, { "epoch": 3.2772795216741404, "grad_norm": 1.1855719089508057, "learning_rate": 0.0005903400597907324, "loss": 3.5381, "step": 48235 }, { "epoch": 3.2776192417448025, "grad_norm": 0.9332132339477539, "learning_rate": 0.0005902975947818997, "loss": 3.5439, "step": 48240 }, { "epoch": 3.277958961815464, "grad_norm": 0.7846083641052246, "learning_rate": 0.0005902551297730671, "loss": 3.7167, "step": 48245 }, { "epoch": 3.2782986818861257, "grad_norm": 0.8178573846817017, "learning_rate": 0.0005902126647642343, "loss": 3.6252, "step": 48250 }, { "epoch": 3.278638401956788, "grad_norm": 0.866978645324707, "learning_rate": 0.0005901701997554016, "loss": 3.3862, "step": 48255 }, { "epoch": 3.2789781220274494, "grad_norm": 1.1222516298294067, "learning_rate": 0.0005901277347465689, "loss": 3.4811, "step": 48260 }, { "epoch": 3.279317842098111, "grad_norm": 0.8810768723487854, "learning_rate": 0.0005900852697377361, "loss": 3.5017, "step": 48265 }, { "epoch": 3.279657562168773, "grad_norm": 0.7866187691688538, "learning_rate": 0.0005900428047289033, "loss": 3.4903, "step": 48270 }, { "epoch": 3.2799972822394348, "grad_norm": 1.19028902053833, "learning_rate": 0.0005900003397200707, "loss": 3.4481, "step": 48275 }, { "epoch": 3.2803370023100964, "grad_norm": 3.2851321697235107, "learning_rate": 0.000589957874711238, "loss": 3.5865, "step": 48280 }, { "epoch": 3.2806767223807585, "grad_norm": 0.8071588277816772, "learning_rate": 0.0005899154097024052, "loss": 3.5645, "step": 48285 }, { "epoch": 3.28101644245142, "grad_norm": 0.916167140007019, "learning_rate": 0.0005898729446935726, "loss": 3.3681, "step": 48290 }, { "epoch": 3.2813561625220817, "grad_norm": 0.7808096408843994, "learning_rate": 0.0005898304796847398, "loss": 3.6605, "step": 48295 }, { "epoch": 3.281695882592744, "grad_norm": 0.9146581888198853, "learning_rate": 0.000589788014675907, "loss": 3.4266, "step": 48300 }, { "epoch": 3.2820356026634054, "grad_norm": 1.1291333436965942, "learning_rate": 0.0005897455496670744, "loss": 3.5305, "step": 48305 }, { "epoch": 3.282375322734067, "grad_norm": 1.1976310014724731, "learning_rate": 0.0005897030846582416, "loss": 3.7032, "step": 48310 }, { "epoch": 3.282715042804729, "grad_norm": 0.9564703702926636, "learning_rate": 0.0005896606196494089, "loss": 3.5537, "step": 48315 }, { "epoch": 3.2830547628753908, "grad_norm": 0.9660830497741699, "learning_rate": 0.0005896181546405763, "loss": 3.5807, "step": 48320 }, { "epoch": 3.2833944829460524, "grad_norm": 1.0186069011688232, "learning_rate": 0.0005895756896317435, "loss": 3.5968, "step": 48325 }, { "epoch": 3.283734203016714, "grad_norm": 0.880481481552124, "learning_rate": 0.0005895332246229107, "loss": 3.5364, "step": 48330 }, { "epoch": 3.284073923087376, "grad_norm": 0.9628724455833435, "learning_rate": 0.000589490759614078, "loss": 3.4647, "step": 48335 }, { "epoch": 3.2844136431580377, "grad_norm": 0.8436428308486938, "learning_rate": 0.0005894482946052453, "loss": 3.4925, "step": 48340 }, { "epoch": 3.2847533632286994, "grad_norm": 0.7321258187294006, "learning_rate": 0.0005894058295964125, "loss": 3.1267, "step": 48345 }, { "epoch": 3.2850930832993614, "grad_norm": 1.0233181715011597, "learning_rate": 0.0005893633645875799, "loss": 3.5618, "step": 48350 }, { "epoch": 3.285432803370023, "grad_norm": 0.7987353205680847, "learning_rate": 0.0005893208995787472, "loss": 3.4351, "step": 48355 }, { "epoch": 3.2857725234406847, "grad_norm": 1.008324384689331, "learning_rate": 0.0005892784345699144, "loss": 3.4012, "step": 48360 }, { "epoch": 3.286112243511347, "grad_norm": 0.8408792614936829, "learning_rate": 0.0005892359695610817, "loss": 3.5417, "step": 48365 }, { "epoch": 3.2864519635820084, "grad_norm": 2.9090662002563477, "learning_rate": 0.000589193504552249, "loss": 3.5862, "step": 48370 }, { "epoch": 3.28679168365267, "grad_norm": 1.4570045471191406, "learning_rate": 0.0005891510395434162, "loss": 3.6157, "step": 48375 }, { "epoch": 3.287131403723332, "grad_norm": 0.8945032954216003, "learning_rate": 0.0005891085745345835, "loss": 3.6317, "step": 48380 }, { "epoch": 3.2874711237939938, "grad_norm": 0.7966932058334351, "learning_rate": 0.0005890661095257508, "loss": 3.4614, "step": 48385 }, { "epoch": 3.2878108438646554, "grad_norm": 0.9876346588134766, "learning_rate": 0.0005890236445169181, "loss": 3.6183, "step": 48390 }, { "epoch": 3.2881505639353175, "grad_norm": 0.8333478569984436, "learning_rate": 0.0005889811795080854, "loss": 3.5255, "step": 48395 }, { "epoch": 3.288490284005979, "grad_norm": 0.7316277623176575, "learning_rate": 0.0005889387144992526, "loss": 3.6183, "step": 48400 }, { "epoch": 3.2888300040766407, "grad_norm": 0.900107204914093, "learning_rate": 0.0005888962494904199, "loss": 3.861, "step": 48405 }, { "epoch": 3.289169724147303, "grad_norm": 0.7224419713020325, "learning_rate": 0.0005888537844815872, "loss": 3.668, "step": 48410 }, { "epoch": 3.2895094442179644, "grad_norm": 1.0782051086425781, "learning_rate": 0.0005888113194727544, "loss": 3.4122, "step": 48415 }, { "epoch": 3.289849164288626, "grad_norm": 0.9715402126312256, "learning_rate": 0.0005887688544639217, "loss": 3.3546, "step": 48420 }, { "epoch": 3.290188884359288, "grad_norm": 0.8950231075286865, "learning_rate": 0.0005887263894550891, "loss": 3.4969, "step": 48425 }, { "epoch": 3.2905286044299498, "grad_norm": 1.0084583759307861, "learning_rate": 0.0005886839244462563, "loss": 3.6045, "step": 48430 }, { "epoch": 3.2908683245006114, "grad_norm": 0.931252658367157, "learning_rate": 0.0005886414594374235, "loss": 3.3763, "step": 48435 }, { "epoch": 3.291208044571273, "grad_norm": 1.1343733072280884, "learning_rate": 0.0005885989944285909, "loss": 3.8747, "step": 48440 }, { "epoch": 3.291547764641935, "grad_norm": 0.8119016289710999, "learning_rate": 0.0005885565294197581, "loss": 3.8975, "step": 48445 }, { "epoch": 3.2918874847125967, "grad_norm": 0.708626925945282, "learning_rate": 0.0005885140644109253, "loss": 3.7945, "step": 48450 }, { "epoch": 3.2922272047832584, "grad_norm": 0.7924911379814148, "learning_rate": 0.0005884715994020928, "loss": 3.1025, "step": 48455 }, { "epoch": 3.2925669248539204, "grad_norm": 0.815510630607605, "learning_rate": 0.00058842913439326, "loss": 3.2525, "step": 48460 }, { "epoch": 3.292906644924582, "grad_norm": 0.875045120716095, "learning_rate": 0.0005883866693844272, "loss": 3.5468, "step": 48465 }, { "epoch": 3.2932463649952437, "grad_norm": 1.8612422943115234, "learning_rate": 0.0005883442043755945, "loss": 3.6783, "step": 48470 }, { "epoch": 3.2935860850659058, "grad_norm": 1.1833158731460571, "learning_rate": 0.0005883017393667618, "loss": 3.4675, "step": 48475 }, { "epoch": 3.2939258051365674, "grad_norm": 1.1374233961105347, "learning_rate": 0.000588259274357929, "loss": 3.7298, "step": 48480 }, { "epoch": 3.294265525207229, "grad_norm": 0.8647292852401733, "learning_rate": 0.0005882168093490963, "loss": 3.4268, "step": 48485 }, { "epoch": 3.294605245277891, "grad_norm": 0.9696346521377563, "learning_rate": 0.0005881743443402637, "loss": 3.6202, "step": 48490 }, { "epoch": 3.2949449653485527, "grad_norm": 1.0303493738174438, "learning_rate": 0.0005881318793314309, "loss": 3.4976, "step": 48495 }, { "epoch": 3.2952846854192144, "grad_norm": 0.8287239670753479, "learning_rate": 0.0005880894143225982, "loss": 3.4932, "step": 48500 }, { "epoch": 3.2956244054898765, "grad_norm": 0.7907270789146423, "learning_rate": 0.0005880469493137655, "loss": 3.6521, "step": 48505 }, { "epoch": 3.295964125560538, "grad_norm": 0.8741850256919861, "learning_rate": 0.0005880044843049327, "loss": 3.3853, "step": 48510 }, { "epoch": 3.2963038456311997, "grad_norm": 0.8967990875244141, "learning_rate": 0.0005879620192961, "loss": 3.5932, "step": 48515 }, { "epoch": 3.296643565701862, "grad_norm": 0.79707932472229, "learning_rate": 0.0005879195542872672, "loss": 3.392, "step": 48520 }, { "epoch": 3.2969832857725234, "grad_norm": 0.9180774092674255, "learning_rate": 0.0005878770892784346, "loss": 3.346, "step": 48525 }, { "epoch": 3.297323005843185, "grad_norm": 2.717019557952881, "learning_rate": 0.0005878346242696019, "loss": 3.4252, "step": 48530 }, { "epoch": 3.297662725913847, "grad_norm": 0.8125959634780884, "learning_rate": 0.0005877921592607691, "loss": 3.2545, "step": 48535 }, { "epoch": 3.2980024459845088, "grad_norm": 0.921142041683197, "learning_rate": 0.0005877496942519364, "loss": 3.2168, "step": 48540 }, { "epoch": 3.2983421660551704, "grad_norm": 0.9875665307044983, "learning_rate": 0.0005877072292431037, "loss": 3.6412, "step": 48545 }, { "epoch": 3.2986818861258325, "grad_norm": 1.061631202697754, "learning_rate": 0.0005876647642342709, "loss": 3.4933, "step": 48550 }, { "epoch": 3.299021606196494, "grad_norm": 0.8779887557029724, "learning_rate": 0.0005876222992254383, "loss": 3.6356, "step": 48555 }, { "epoch": 3.2993613262671557, "grad_norm": 1.2114033699035645, "learning_rate": 0.0005875798342166056, "loss": 3.5949, "step": 48560 }, { "epoch": 3.299701046337818, "grad_norm": 0.8092819452285767, "learning_rate": 0.0005875373692077728, "loss": 3.3185, "step": 48565 }, { "epoch": 3.3000407664084794, "grad_norm": 0.7472546100616455, "learning_rate": 0.0005874949041989402, "loss": 3.6334, "step": 48570 }, { "epoch": 3.300380486479141, "grad_norm": 0.9886726140975952, "learning_rate": 0.0005874524391901074, "loss": 3.5992, "step": 48575 }, { "epoch": 3.300720206549803, "grad_norm": 0.8509883284568787, "learning_rate": 0.0005874099741812746, "loss": 3.6199, "step": 48580 }, { "epoch": 3.3010599266204648, "grad_norm": 0.7618286609649658, "learning_rate": 0.0005873675091724419, "loss": 3.6605, "step": 48585 }, { "epoch": 3.3013996466911264, "grad_norm": 0.9415149092674255, "learning_rate": 0.0005873250441636092, "loss": 3.363, "step": 48590 }, { "epoch": 3.3017393667617885, "grad_norm": 0.8691444993019104, "learning_rate": 0.0005872825791547765, "loss": 3.6451, "step": 48595 }, { "epoch": 3.30207908683245, "grad_norm": 0.7674158215522766, "learning_rate": 0.0005872401141459438, "loss": 3.4965, "step": 48600 }, { "epoch": 3.3024188069031117, "grad_norm": 0.978895366191864, "learning_rate": 0.0005871976491371111, "loss": 3.3084, "step": 48605 }, { "epoch": 3.302758526973774, "grad_norm": 0.9119603037834167, "learning_rate": 0.0005871551841282783, "loss": 3.5772, "step": 48610 }, { "epoch": 3.3030982470444354, "grad_norm": 0.9546215534210205, "learning_rate": 0.0005871127191194456, "loss": 3.8092, "step": 48615 }, { "epoch": 3.303437967115097, "grad_norm": 0.8560947775840759, "learning_rate": 0.0005870702541106128, "loss": 3.5182, "step": 48620 }, { "epoch": 3.303777687185759, "grad_norm": 1.0189309120178223, "learning_rate": 0.0005870277891017801, "loss": 3.3749, "step": 48625 }, { "epoch": 3.304117407256421, "grad_norm": 0.9473175406455994, "learning_rate": 0.0005869853240929475, "loss": 3.3947, "step": 48630 }, { "epoch": 3.3044571273270824, "grad_norm": 0.9259925484657288, "learning_rate": 0.0005869428590841147, "loss": 3.7293, "step": 48635 }, { "epoch": 3.3047968473977445, "grad_norm": 0.9136019349098206, "learning_rate": 0.000586900394075282, "loss": 3.7313, "step": 48640 }, { "epoch": 3.305136567468406, "grad_norm": 1.0143972635269165, "learning_rate": 0.0005868579290664493, "loss": 3.609, "step": 48645 }, { "epoch": 3.3054762875390677, "grad_norm": 0.757497251033783, "learning_rate": 0.0005868154640576165, "loss": 3.6623, "step": 48650 }, { "epoch": 3.30581600760973, "grad_norm": 0.7379955649375916, "learning_rate": 0.0005867729990487837, "loss": 3.4996, "step": 48655 }, { "epoch": 3.3061557276803915, "grad_norm": 0.7923427820205688, "learning_rate": 0.0005867305340399511, "loss": 3.7755, "step": 48660 }, { "epoch": 3.306495447751053, "grad_norm": 0.7765454649925232, "learning_rate": 0.0005866880690311184, "loss": 3.4495, "step": 48665 }, { "epoch": 3.3068351678217147, "grad_norm": 0.9614738821983337, "learning_rate": 0.0005866456040222856, "loss": 3.5063, "step": 48670 }, { "epoch": 3.307174887892377, "grad_norm": 0.8087530732154846, "learning_rate": 0.000586603139013453, "loss": 3.4781, "step": 48675 }, { "epoch": 3.3075146079630384, "grad_norm": 0.8304346203804016, "learning_rate": 0.0005865606740046202, "loss": 3.3196, "step": 48680 }, { "epoch": 3.3078543280337, "grad_norm": 1.115853190422058, "learning_rate": 0.0005865182089957874, "loss": 3.2299, "step": 48685 }, { "epoch": 3.308194048104362, "grad_norm": 0.9241434931755066, "learning_rate": 0.0005864757439869548, "loss": 3.3211, "step": 48690 }, { "epoch": 3.3085337681750238, "grad_norm": 0.9389660358428955, "learning_rate": 0.000586433278978122, "loss": 3.5039, "step": 48695 }, { "epoch": 3.3088734882456854, "grad_norm": 0.8963150978088379, "learning_rate": 0.0005863908139692893, "loss": 3.7154, "step": 48700 }, { "epoch": 3.3092132083163475, "grad_norm": 0.7866663336753845, "learning_rate": 0.0005863483489604567, "loss": 3.4228, "step": 48705 }, { "epoch": 3.309552928387009, "grad_norm": 0.9301358461380005, "learning_rate": 0.0005863058839516239, "loss": 3.3827, "step": 48710 }, { "epoch": 3.3098926484576707, "grad_norm": 0.8656550645828247, "learning_rate": 0.0005862634189427911, "loss": 3.4802, "step": 48715 }, { "epoch": 3.310232368528333, "grad_norm": 0.8601676225662231, "learning_rate": 0.0005862209539339584, "loss": 3.316, "step": 48720 }, { "epoch": 3.3105720885989944, "grad_norm": 1.144139289855957, "learning_rate": 0.0005861784889251257, "loss": 3.582, "step": 48725 }, { "epoch": 3.310911808669656, "grad_norm": 0.9279145002365112, "learning_rate": 0.0005861360239162929, "loss": 3.4837, "step": 48730 }, { "epoch": 3.311251528740318, "grad_norm": 0.8956047296524048, "learning_rate": 0.0005860935589074603, "loss": 3.3379, "step": 48735 }, { "epoch": 3.3115912488109798, "grad_norm": 0.8557221293449402, "learning_rate": 0.0005860510938986276, "loss": 3.6164, "step": 48740 }, { "epoch": 3.3119309688816414, "grad_norm": 1.053959608078003, "learning_rate": 0.0005860086288897948, "loss": 3.3273, "step": 48745 }, { "epoch": 3.3122706889523035, "grad_norm": 0.8266658782958984, "learning_rate": 0.0005859661638809621, "loss": 3.7829, "step": 48750 }, { "epoch": 3.312610409022965, "grad_norm": 0.7742833495140076, "learning_rate": 0.0005859236988721294, "loss": 3.2963, "step": 48755 }, { "epoch": 3.3129501290936267, "grad_norm": 0.7619365453720093, "learning_rate": 0.0005858812338632966, "loss": 3.399, "step": 48760 }, { "epoch": 3.313289849164289, "grad_norm": 0.8914700150489807, "learning_rate": 0.000585838768854464, "loss": 3.5502, "step": 48765 }, { "epoch": 3.3136295692349504, "grad_norm": 1.0287597179412842, "learning_rate": 0.0005857963038456312, "loss": 3.371, "step": 48770 }, { "epoch": 3.313969289305612, "grad_norm": 0.7766013741493225, "learning_rate": 0.0005857538388367985, "loss": 3.4013, "step": 48775 }, { "epoch": 3.3143090093762737, "grad_norm": 0.9402775168418884, "learning_rate": 0.0005857113738279658, "loss": 3.6048, "step": 48780 }, { "epoch": 3.314648729446936, "grad_norm": 1.0506435632705688, "learning_rate": 0.000585668908819133, "loss": 3.5098, "step": 48785 }, { "epoch": 3.3149884495175974, "grad_norm": 0.8823086023330688, "learning_rate": 0.0005856264438103003, "loss": 3.5752, "step": 48790 }, { "epoch": 3.315328169588259, "grad_norm": 0.7824456691741943, "learning_rate": 0.0005855839788014676, "loss": 3.6757, "step": 48795 }, { "epoch": 3.315667889658921, "grad_norm": 0.9050862193107605, "learning_rate": 0.0005855415137926349, "loss": 3.4355, "step": 48800 }, { "epoch": 3.3160076097295828, "grad_norm": 0.9255902767181396, "learning_rate": 0.0005854990487838022, "loss": 3.6004, "step": 48805 }, { "epoch": 3.3163473298002444, "grad_norm": 0.7035595178604126, "learning_rate": 0.0005854565837749695, "loss": 3.4515, "step": 48810 }, { "epoch": 3.3166870498709065, "grad_norm": 1.0416179895401, "learning_rate": 0.0005854141187661367, "loss": 3.5305, "step": 48815 }, { "epoch": 3.317026769941568, "grad_norm": 1.0944303274154663, "learning_rate": 0.0005853716537573039, "loss": 3.4974, "step": 48820 }, { "epoch": 3.3173664900122297, "grad_norm": 0.9998109340667725, "learning_rate": 0.0005853291887484713, "loss": 3.6343, "step": 48825 }, { "epoch": 3.317706210082892, "grad_norm": 0.9304737448692322, "learning_rate": 0.0005852867237396385, "loss": 3.3724, "step": 48830 }, { "epoch": 3.3180459301535534, "grad_norm": 0.6992810368537903, "learning_rate": 0.0005852442587308058, "loss": 3.7925, "step": 48835 }, { "epoch": 3.318385650224215, "grad_norm": 0.7170081734657288, "learning_rate": 0.0005852017937219732, "loss": 3.638, "step": 48840 }, { "epoch": 3.318725370294877, "grad_norm": 0.7346291542053223, "learning_rate": 0.0005851593287131404, "loss": 3.7804, "step": 48845 }, { "epoch": 3.3190650903655388, "grad_norm": 0.8415378928184509, "learning_rate": 0.0005851168637043076, "loss": 3.1964, "step": 48850 }, { "epoch": 3.3194048104362004, "grad_norm": 0.8232410550117493, "learning_rate": 0.000585074398695475, "loss": 3.663, "step": 48855 }, { "epoch": 3.3197445305068625, "grad_norm": 0.8720488548278809, "learning_rate": 0.0005850319336866422, "loss": 3.126, "step": 48860 }, { "epoch": 3.320084250577524, "grad_norm": 1.0773766040802002, "learning_rate": 0.0005849894686778094, "loss": 3.5334, "step": 48865 }, { "epoch": 3.3204239706481857, "grad_norm": 0.8816829919815063, "learning_rate": 0.0005849470036689768, "loss": 3.7275, "step": 48870 }, { "epoch": 3.320763690718848, "grad_norm": 0.8054467439651489, "learning_rate": 0.0005849045386601441, "loss": 3.4183, "step": 48875 }, { "epoch": 3.3211034107895094, "grad_norm": 0.9802054166793823, "learning_rate": 0.0005848620736513113, "loss": 3.5446, "step": 48880 }, { "epoch": 3.321443130860171, "grad_norm": 0.9185819625854492, "learning_rate": 0.0005848196086424786, "loss": 3.496, "step": 48885 }, { "epoch": 3.321782850930833, "grad_norm": 1.0449241399765015, "learning_rate": 0.0005847771436336459, "loss": 3.5153, "step": 48890 }, { "epoch": 3.3221225710014948, "grad_norm": 0.9002196192741394, "learning_rate": 0.0005847346786248132, "loss": 3.5827, "step": 48895 }, { "epoch": 3.3224622910721564, "grad_norm": 0.7623147368431091, "learning_rate": 0.0005846922136159804, "loss": 3.6484, "step": 48900 }, { "epoch": 3.3228020111428185, "grad_norm": 0.9724432229995728, "learning_rate": 0.0005846497486071478, "loss": 3.4639, "step": 48905 }, { "epoch": 3.32314173121348, "grad_norm": 0.7406185865402222, "learning_rate": 0.0005846072835983151, "loss": 3.8827, "step": 48910 }, { "epoch": 3.3234814512841417, "grad_norm": 0.9950745105743408, "learning_rate": 0.0005845648185894823, "loss": 3.5762, "step": 48915 }, { "epoch": 3.323821171354804, "grad_norm": 0.9344888925552368, "learning_rate": 0.0005845223535806495, "loss": 3.4181, "step": 48920 }, { "epoch": 3.3241608914254654, "grad_norm": 0.8858740925788879, "learning_rate": 0.0005844798885718169, "loss": 3.5536, "step": 48925 }, { "epoch": 3.324500611496127, "grad_norm": 1.044658899307251, "learning_rate": 0.0005844374235629841, "loss": 3.5687, "step": 48930 }, { "epoch": 3.324840331566789, "grad_norm": 0.9449087381362915, "learning_rate": 0.0005843949585541513, "loss": 3.5733, "step": 48935 }, { "epoch": 3.325180051637451, "grad_norm": 1.0380264520645142, "learning_rate": 0.0005843524935453188, "loss": 3.4844, "step": 48940 }, { "epoch": 3.3255197717081124, "grad_norm": 0.7972650527954102, "learning_rate": 0.000584310028536486, "loss": 3.5664, "step": 48945 }, { "epoch": 3.3258594917787745, "grad_norm": 0.9937728047370911, "learning_rate": 0.0005842675635276532, "loss": 3.6499, "step": 48950 }, { "epoch": 3.326199211849436, "grad_norm": 0.8365567326545715, "learning_rate": 0.0005842250985188206, "loss": 3.639, "step": 48955 }, { "epoch": 3.3265389319200978, "grad_norm": 0.8307557106018066, "learning_rate": 0.0005841826335099878, "loss": 3.526, "step": 48960 }, { "epoch": 3.32687865199076, "grad_norm": 0.9083724617958069, "learning_rate": 0.000584140168501155, "loss": 3.7752, "step": 48965 }, { "epoch": 3.3272183720614215, "grad_norm": 0.9049288034439087, "learning_rate": 0.0005840977034923223, "loss": 3.5597, "step": 48970 }, { "epoch": 3.327558092132083, "grad_norm": 0.8801900744438171, "learning_rate": 0.0005840552384834897, "loss": 3.2409, "step": 48975 }, { "epoch": 3.327897812202745, "grad_norm": 0.8086159229278564, "learning_rate": 0.0005840127734746569, "loss": 3.4845, "step": 48980 }, { "epoch": 3.328237532273407, "grad_norm": 0.8561450839042664, "learning_rate": 0.0005839703084658242, "loss": 3.4352, "step": 48985 }, { "epoch": 3.3285772523440684, "grad_norm": 1.0218385457992554, "learning_rate": 0.0005839278434569915, "loss": 3.3732, "step": 48990 }, { "epoch": 3.3289169724147305, "grad_norm": 1.2099497318267822, "learning_rate": 0.0005838853784481587, "loss": 3.5525, "step": 48995 }, { "epoch": 3.329256692485392, "grad_norm": 0.970750093460083, "learning_rate": 0.000583842913439326, "loss": 3.6337, "step": 49000 }, { "epoch": 3.3295964125560538, "grad_norm": 0.9754088521003723, "learning_rate": 0.0005838004484304932, "loss": 3.3792, "step": 49005 }, { "epoch": 3.3299361326267154, "grad_norm": 1.0598530769348145, "learning_rate": 0.0005837579834216606, "loss": 3.5458, "step": 49010 }, { "epoch": 3.3302758526973775, "grad_norm": 1.1472526788711548, "learning_rate": 0.0005837155184128279, "loss": 3.3822, "step": 49015 }, { "epoch": 3.330615572768039, "grad_norm": 1.1263474225997925, "learning_rate": 0.0005836730534039951, "loss": 3.4454, "step": 49020 }, { "epoch": 3.3309552928387007, "grad_norm": 0.912329912185669, "learning_rate": 0.0005836305883951624, "loss": 3.732, "step": 49025 }, { "epoch": 3.331295012909363, "grad_norm": 0.931022047996521, "learning_rate": 0.0005835881233863297, "loss": 3.7318, "step": 49030 }, { "epoch": 3.3316347329800244, "grad_norm": 0.7978280782699585, "learning_rate": 0.0005835456583774969, "loss": 3.7725, "step": 49035 }, { "epoch": 3.331974453050686, "grad_norm": 0.8076285123825073, "learning_rate": 0.0005835031933686642, "loss": 3.7428, "step": 49040 }, { "epoch": 3.332314173121348, "grad_norm": 0.71431565284729, "learning_rate": 0.0005834607283598316, "loss": 3.438, "step": 49045 }, { "epoch": 3.3326538931920098, "grad_norm": 1.2527812719345093, "learning_rate": 0.0005834182633509988, "loss": 3.6554, "step": 49050 }, { "epoch": 3.3329936132626714, "grad_norm": 0.6672350168228149, "learning_rate": 0.000583375798342166, "loss": 3.6194, "step": 49055 }, { "epoch": 3.3333333333333335, "grad_norm": 0.7781176567077637, "learning_rate": 0.0005833333333333334, "loss": 3.6281, "step": 49060 }, { "epoch": 3.333673053403995, "grad_norm": 0.9181593656539917, "learning_rate": 0.0005832908683245006, "loss": 3.1371, "step": 49065 }, { "epoch": 3.3340127734746567, "grad_norm": 1.114783763885498, "learning_rate": 0.0005832484033156678, "loss": 3.2219, "step": 49070 }, { "epoch": 3.334352493545319, "grad_norm": 0.8489429354667664, "learning_rate": 0.0005832059383068352, "loss": 3.561, "step": 49075 }, { "epoch": 3.3346922136159804, "grad_norm": 0.9623716473579407, "learning_rate": 0.0005831634732980025, "loss": 3.4225, "step": 49080 }, { "epoch": 3.335031933686642, "grad_norm": 0.8397031426429749, "learning_rate": 0.0005831210082891697, "loss": 3.5534, "step": 49085 }, { "epoch": 3.335371653757304, "grad_norm": 0.83551025390625, "learning_rate": 0.0005830785432803371, "loss": 3.2837, "step": 49090 }, { "epoch": 3.335711373827966, "grad_norm": 0.8887967467308044, "learning_rate": 0.0005830360782715043, "loss": 3.8307, "step": 49095 }, { "epoch": 3.3360510938986274, "grad_norm": 0.8023266792297363, "learning_rate": 0.0005829936132626715, "loss": 3.5649, "step": 49100 }, { "epoch": 3.3363908139692895, "grad_norm": 0.9357014298439026, "learning_rate": 0.0005829511482538388, "loss": 3.3846, "step": 49105 }, { "epoch": 3.336730534039951, "grad_norm": 0.8584401607513428, "learning_rate": 0.0005829086832450061, "loss": 3.7953, "step": 49110 }, { "epoch": 3.3370702541106128, "grad_norm": 0.897729754447937, "learning_rate": 0.0005828662182361734, "loss": 3.4747, "step": 49115 }, { "epoch": 3.3374099741812744, "grad_norm": 0.7875022888183594, "learning_rate": 0.0005828237532273407, "loss": 3.4458, "step": 49120 }, { "epoch": 3.3377496942519365, "grad_norm": 1.0723532438278198, "learning_rate": 0.000582781288218508, "loss": 3.5319, "step": 49125 }, { "epoch": 3.338089414322598, "grad_norm": 0.8995426893234253, "learning_rate": 0.0005827388232096752, "loss": 3.5665, "step": 49130 }, { "epoch": 3.3384291343932597, "grad_norm": 0.7588154077529907, "learning_rate": 0.0005826963582008425, "loss": 3.5394, "step": 49135 }, { "epoch": 3.338768854463922, "grad_norm": 1.076982021331787, "learning_rate": 0.0005826538931920098, "loss": 3.8499, "step": 49140 }, { "epoch": 3.3391085745345834, "grad_norm": 0.9514582753181458, "learning_rate": 0.000582611428183177, "loss": 3.6433, "step": 49145 }, { "epoch": 3.339448294605245, "grad_norm": 0.7473044991493225, "learning_rate": 0.0005825689631743444, "loss": 3.5236, "step": 49150 }, { "epoch": 3.339788014675907, "grad_norm": 1.0321956872940063, "learning_rate": 0.0005825264981655116, "loss": 3.3804, "step": 49155 }, { "epoch": 3.3401277347465688, "grad_norm": 0.9432067275047302, "learning_rate": 0.0005824840331566789, "loss": 3.4241, "step": 49160 }, { "epoch": 3.3404674548172304, "grad_norm": 1.1034932136535645, "learning_rate": 0.0005824415681478462, "loss": 3.4387, "step": 49165 }, { "epoch": 3.3408071748878925, "grad_norm": 0.8038483262062073, "learning_rate": 0.0005823991031390134, "loss": 3.5473, "step": 49170 }, { "epoch": 3.341146894958554, "grad_norm": 0.8671546578407288, "learning_rate": 0.0005823566381301807, "loss": 3.5384, "step": 49175 }, { "epoch": 3.3414866150292157, "grad_norm": 0.9980518817901611, "learning_rate": 0.000582314173121348, "loss": 3.3211, "step": 49180 }, { "epoch": 3.341826335099878, "grad_norm": 0.9877550601959229, "learning_rate": 0.0005822717081125153, "loss": 3.154, "step": 49185 }, { "epoch": 3.3421660551705394, "grad_norm": 0.7359760403633118, "learning_rate": 0.0005822292431036826, "loss": 3.3558, "step": 49190 }, { "epoch": 3.342505775241201, "grad_norm": 0.9286291599273682, "learning_rate": 0.0005821867780948499, "loss": 3.3735, "step": 49195 }, { "epoch": 3.342845495311863, "grad_norm": 1.0428755283355713, "learning_rate": 0.0005821443130860171, "loss": 3.5466, "step": 49200 }, { "epoch": 3.3431852153825248, "grad_norm": 0.8222222924232483, "learning_rate": 0.0005821018480771843, "loss": 3.6782, "step": 49205 }, { "epoch": 3.3435249354531864, "grad_norm": 1.1202727556228638, "learning_rate": 0.0005820593830683517, "loss": 3.652, "step": 49210 }, { "epoch": 3.3438646555238485, "grad_norm": 1.102121114730835, "learning_rate": 0.0005820169180595189, "loss": 3.5105, "step": 49215 }, { "epoch": 3.34420437559451, "grad_norm": 0.9617036581039429, "learning_rate": 0.0005819744530506862, "loss": 3.4512, "step": 49220 }, { "epoch": 3.3445440956651717, "grad_norm": 0.963440477848053, "learning_rate": 0.0005819319880418536, "loss": 3.4761, "step": 49225 }, { "epoch": 3.344883815735834, "grad_norm": 0.82782381772995, "learning_rate": 0.0005818895230330208, "loss": 3.6558, "step": 49230 }, { "epoch": 3.3452235358064955, "grad_norm": 0.6698066592216492, "learning_rate": 0.0005818470580241881, "loss": 3.1863, "step": 49235 }, { "epoch": 3.345563255877157, "grad_norm": 0.9416516423225403, "learning_rate": 0.0005818045930153554, "loss": 3.4994, "step": 49240 }, { "epoch": 3.345902975947819, "grad_norm": 0.6994809508323669, "learning_rate": 0.0005817621280065226, "loss": 3.5688, "step": 49245 }, { "epoch": 3.346242696018481, "grad_norm": 2.019428253173828, "learning_rate": 0.0005817196629976899, "loss": 3.602, "step": 49250 }, { "epoch": 3.3465824160891424, "grad_norm": 1.1064766645431519, "learning_rate": 0.0005816771979888573, "loss": 3.6118, "step": 49255 }, { "epoch": 3.3469221361598045, "grad_norm": 0.8234317302703857, "learning_rate": 0.0005816347329800245, "loss": 3.6022, "step": 49260 }, { "epoch": 3.347261856230466, "grad_norm": 1.0805864334106445, "learning_rate": 0.0005815922679711918, "loss": 3.5319, "step": 49265 }, { "epoch": 3.3476015763011278, "grad_norm": 0.9605377316474915, "learning_rate": 0.000581549802962359, "loss": 3.7473, "step": 49270 }, { "epoch": 3.34794129637179, "grad_norm": 0.7837151885032654, "learning_rate": 0.0005815073379535263, "loss": 3.64, "step": 49275 }, { "epoch": 3.3482810164424515, "grad_norm": 1.0982580184936523, "learning_rate": 0.0005814648729446936, "loss": 3.7129, "step": 49280 }, { "epoch": 3.348620736513113, "grad_norm": 0.8103988170623779, "learning_rate": 0.0005814224079358608, "loss": 3.6508, "step": 49285 }, { "epoch": 3.348960456583775, "grad_norm": 0.8725731372833252, "learning_rate": 0.0005813799429270282, "loss": 3.5725, "step": 49290 }, { "epoch": 3.349300176654437, "grad_norm": 0.7639144062995911, "learning_rate": 0.0005813374779181955, "loss": 3.5523, "step": 49295 }, { "epoch": 3.3496398967250984, "grad_norm": 0.856993556022644, "learning_rate": 0.0005812950129093627, "loss": 3.4821, "step": 49300 }, { "epoch": 3.3499796167957605, "grad_norm": 1.027595043182373, "learning_rate": 0.0005812525479005299, "loss": 3.5007, "step": 49305 }, { "epoch": 3.350319336866422, "grad_norm": 0.9101130962371826, "learning_rate": 0.0005812100828916973, "loss": 3.5386, "step": 49310 }, { "epoch": 3.3506590569370838, "grad_norm": 1.6726362705230713, "learning_rate": 0.0005811676178828645, "loss": 3.7531, "step": 49315 }, { "epoch": 3.350998777007746, "grad_norm": 0.8654756546020508, "learning_rate": 0.0005811251528740317, "loss": 3.6461, "step": 49320 }, { "epoch": 3.3513384970784075, "grad_norm": 0.9322261810302734, "learning_rate": 0.0005810826878651992, "loss": 3.3982, "step": 49325 }, { "epoch": 3.351678217149069, "grad_norm": 1.7551757097244263, "learning_rate": 0.0005810402228563664, "loss": 3.3868, "step": 49330 }, { "epoch": 3.352017937219731, "grad_norm": 1.2007821798324585, "learning_rate": 0.0005809977578475336, "loss": 3.4701, "step": 49335 }, { "epoch": 3.352357657290393, "grad_norm": 0.9446572065353394, "learning_rate": 0.000580955292838701, "loss": 3.6011, "step": 49340 }, { "epoch": 3.3526973773610544, "grad_norm": 1.7760640382766724, "learning_rate": 0.0005809128278298682, "loss": 3.3534, "step": 49345 }, { "epoch": 3.353037097431716, "grad_norm": 0.7445230484008789, "learning_rate": 0.0005808703628210354, "loss": 3.4277, "step": 49350 }, { "epoch": 3.353376817502378, "grad_norm": 0.9552304148674011, "learning_rate": 0.0005808278978122029, "loss": 3.2389, "step": 49355 }, { "epoch": 3.35371653757304, "grad_norm": 1.0730969905853271, "learning_rate": 0.0005807854328033701, "loss": 3.3811, "step": 49360 }, { "epoch": 3.3540562576437014, "grad_norm": 0.9461817145347595, "learning_rate": 0.0005807429677945373, "loss": 3.4797, "step": 49365 }, { "epoch": 3.3543959777143635, "grad_norm": 0.8003263473510742, "learning_rate": 0.0005807005027857046, "loss": 3.4392, "step": 49370 }, { "epoch": 3.354735697785025, "grad_norm": 0.8905390501022339, "learning_rate": 0.0005806580377768719, "loss": 3.4122, "step": 49375 }, { "epoch": 3.3550754178556867, "grad_norm": 0.9331502914428711, "learning_rate": 0.0005806155727680391, "loss": 3.4942, "step": 49380 }, { "epoch": 3.355415137926349, "grad_norm": 1.1724779605865479, "learning_rate": 0.0005805731077592064, "loss": 3.5529, "step": 49385 }, { "epoch": 3.3557548579970105, "grad_norm": 0.7212226390838623, "learning_rate": 0.0005805306427503738, "loss": 3.3232, "step": 49390 }, { "epoch": 3.356094578067672, "grad_norm": 0.9879406690597534, "learning_rate": 0.000580488177741541, "loss": 3.6626, "step": 49395 }, { "epoch": 3.356434298138334, "grad_norm": 0.9071311354637146, "learning_rate": 0.0005804457127327083, "loss": 3.5355, "step": 49400 }, { "epoch": 3.356774018208996, "grad_norm": 0.7567989230155945, "learning_rate": 0.0005804032477238755, "loss": 3.4091, "step": 49405 }, { "epoch": 3.3571137382796574, "grad_norm": 0.8235266208648682, "learning_rate": 0.0005803607827150428, "loss": 3.8166, "step": 49410 }, { "epoch": 3.3574534583503195, "grad_norm": 0.8578166365623474, "learning_rate": 0.0005803183177062101, "loss": 3.3705, "step": 49415 }, { "epoch": 3.357793178420981, "grad_norm": 0.7126883268356323, "learning_rate": 0.0005802758526973773, "loss": 3.5085, "step": 49420 }, { "epoch": 3.3581328984916428, "grad_norm": 0.8250067234039307, "learning_rate": 0.0005802333876885447, "loss": 3.5275, "step": 49425 }, { "epoch": 3.358472618562305, "grad_norm": 0.7594175934791565, "learning_rate": 0.000580190922679712, "loss": 3.4701, "step": 49430 }, { "epoch": 3.3588123386329665, "grad_norm": 0.87702476978302, "learning_rate": 0.0005801484576708792, "loss": 3.1108, "step": 49435 }, { "epoch": 3.359152058703628, "grad_norm": 1.1399182081222534, "learning_rate": 0.0005801059926620465, "loss": 3.1993, "step": 49440 }, { "epoch": 3.35949177877429, "grad_norm": 1.0692555904388428, "learning_rate": 0.0005800635276532138, "loss": 3.2399, "step": 49445 }, { "epoch": 3.359831498844952, "grad_norm": 0.9493743181228638, "learning_rate": 0.000580021062644381, "loss": 3.3503, "step": 49450 }, { "epoch": 3.3601712189156134, "grad_norm": 1.017435908317566, "learning_rate": 0.0005799785976355482, "loss": 3.8118, "step": 49455 }, { "epoch": 3.360510938986275, "grad_norm": 0.9139876961708069, "learning_rate": 0.0005799361326267157, "loss": 3.4476, "step": 49460 }, { "epoch": 3.360850659056937, "grad_norm": 1.0009920597076416, "learning_rate": 0.0005798936676178829, "loss": 3.3259, "step": 49465 }, { "epoch": 3.3611903791275988, "grad_norm": 0.8961812257766724, "learning_rate": 0.0005798512026090501, "loss": 3.5671, "step": 49470 }, { "epoch": 3.3615300991982604, "grad_norm": 0.9386341571807861, "learning_rate": 0.0005798087376002175, "loss": 3.5515, "step": 49475 }, { "epoch": 3.3618698192689225, "grad_norm": 0.8955754637718201, "learning_rate": 0.0005797662725913847, "loss": 3.5784, "step": 49480 }, { "epoch": 3.362209539339584, "grad_norm": 1.1475889682769775, "learning_rate": 0.0005797238075825519, "loss": 3.3413, "step": 49485 }, { "epoch": 3.3625492594102457, "grad_norm": 0.8356018662452698, "learning_rate": 0.0005796813425737193, "loss": 3.6213, "step": 49490 }, { "epoch": 3.362888979480908, "grad_norm": 1.1528613567352295, "learning_rate": 0.0005796388775648866, "loss": 3.6929, "step": 49495 }, { "epoch": 3.3632286995515694, "grad_norm": 0.9828994274139404, "learning_rate": 0.0005795964125560538, "loss": 3.5448, "step": 49500 }, { "epoch": 3.363568419622231, "grad_norm": 0.9633510708808899, "learning_rate": 0.0005795539475472211, "loss": 3.3844, "step": 49505 }, { "epoch": 3.363908139692893, "grad_norm": 0.9110558032989502, "learning_rate": 0.0005795114825383884, "loss": 3.4923, "step": 49510 }, { "epoch": 3.364247859763555, "grad_norm": 1.036224603652954, "learning_rate": 0.0005794690175295556, "loss": 3.8053, "step": 49515 }, { "epoch": 3.3645875798342164, "grad_norm": 0.8948330879211426, "learning_rate": 0.0005794265525207229, "loss": 3.3891, "step": 49520 }, { "epoch": 3.3649272999048785, "grad_norm": 0.9554365873336792, "learning_rate": 0.0005793840875118902, "loss": 3.5588, "step": 49525 }, { "epoch": 3.36526701997554, "grad_norm": 1.0419509410858154, "learning_rate": 0.0005793416225030575, "loss": 3.6099, "step": 49530 }, { "epoch": 3.3656067400462018, "grad_norm": 0.8801946640014648, "learning_rate": 0.0005792991574942248, "loss": 3.8076, "step": 49535 }, { "epoch": 3.365946460116864, "grad_norm": 0.8852800726890564, "learning_rate": 0.000579256692485392, "loss": 3.5506, "step": 49540 }, { "epoch": 3.3662861801875255, "grad_norm": 0.9366156458854675, "learning_rate": 0.0005792142274765593, "loss": 3.5611, "step": 49545 }, { "epoch": 3.366625900258187, "grad_norm": 0.768460214138031, "learning_rate": 0.0005791717624677266, "loss": 3.6667, "step": 49550 }, { "epoch": 3.366965620328849, "grad_norm": 0.7904855012893677, "learning_rate": 0.0005791292974588938, "loss": 3.6792, "step": 49555 }, { "epoch": 3.367305340399511, "grad_norm": 0.849022626876831, "learning_rate": 0.0005790868324500611, "loss": 3.2532, "step": 49560 }, { "epoch": 3.3676450604701724, "grad_norm": 0.9996537566184998, "learning_rate": 0.0005790443674412285, "loss": 3.5101, "step": 49565 }, { "epoch": 3.3679847805408345, "grad_norm": 0.9901342988014221, "learning_rate": 0.0005790019024323957, "loss": 3.5807, "step": 49570 }, { "epoch": 3.368324500611496, "grad_norm": 0.760643482208252, "learning_rate": 0.0005789594374235631, "loss": 3.4887, "step": 49575 }, { "epoch": 3.3686642206821578, "grad_norm": 1.0973819494247437, "learning_rate": 0.0005789169724147303, "loss": 3.5991, "step": 49580 }, { "epoch": 3.36900394075282, "grad_norm": 1.0243356227874756, "learning_rate": 0.0005788745074058975, "loss": 3.2717, "step": 49585 }, { "epoch": 3.3693436608234815, "grad_norm": 0.6387946605682373, "learning_rate": 0.0005788320423970649, "loss": 3.4676, "step": 49590 }, { "epoch": 3.369683380894143, "grad_norm": 0.9899793863296509, "learning_rate": 0.0005787895773882321, "loss": 3.5605, "step": 49595 }, { "epoch": 3.370023100964805, "grad_norm": 1.023699164390564, "learning_rate": 0.0005787471123793994, "loss": 3.5338, "step": 49600 }, { "epoch": 3.370362821035467, "grad_norm": 0.8970270156860352, "learning_rate": 0.0005787046473705667, "loss": 3.5108, "step": 49605 }, { "epoch": 3.3707025411061284, "grad_norm": 0.8073155879974365, "learning_rate": 0.000578662182361734, "loss": 3.5912, "step": 49610 }, { "epoch": 3.3710422611767905, "grad_norm": 0.687433123588562, "learning_rate": 0.0005786197173529012, "loss": 3.3209, "step": 49615 }, { "epoch": 3.371381981247452, "grad_norm": 0.8400617241859436, "learning_rate": 0.0005785772523440685, "loss": 3.6146, "step": 49620 }, { "epoch": 3.3717217013181138, "grad_norm": 0.9855444431304932, "learning_rate": 0.0005785347873352358, "loss": 3.4652, "step": 49625 }, { "epoch": 3.372061421388776, "grad_norm": 1.0187273025512695, "learning_rate": 0.000578492322326403, "loss": 3.5558, "step": 49630 }, { "epoch": 3.3724011414594375, "grad_norm": 1.021185278892517, "learning_rate": 0.0005784498573175704, "loss": 3.5024, "step": 49635 }, { "epoch": 3.372740861530099, "grad_norm": 0.7996768355369568, "learning_rate": 0.0005784073923087377, "loss": 3.4078, "step": 49640 }, { "epoch": 3.373080581600761, "grad_norm": 0.8960021734237671, "learning_rate": 0.0005783649272999049, "loss": 3.4692, "step": 49645 }, { "epoch": 3.373420301671423, "grad_norm": 0.7097633481025696, "learning_rate": 0.0005783224622910722, "loss": 3.5738, "step": 49650 }, { "epoch": 3.3737600217420844, "grad_norm": 0.9948644638061523, "learning_rate": 0.0005782799972822394, "loss": 3.6191, "step": 49655 }, { "epoch": 3.3740997418127465, "grad_norm": 1.1178803443908691, "learning_rate": 0.0005782375322734067, "loss": 3.3585, "step": 49660 }, { "epoch": 3.374439461883408, "grad_norm": 1.4558680057525635, "learning_rate": 0.000578195067264574, "loss": 3.5182, "step": 49665 }, { "epoch": 3.37477918195407, "grad_norm": 0.7311971187591553, "learning_rate": 0.0005781526022557413, "loss": 3.5622, "step": 49670 }, { "epoch": 3.375118902024732, "grad_norm": 0.8076525926589966, "learning_rate": 0.0005781101372469086, "loss": 3.4742, "step": 49675 }, { "epoch": 3.3754586220953935, "grad_norm": 1.0293940305709839, "learning_rate": 0.0005780676722380759, "loss": 3.3569, "step": 49680 }, { "epoch": 3.375798342166055, "grad_norm": 0.7506266832351685, "learning_rate": 0.0005780252072292431, "loss": 3.5488, "step": 49685 }, { "epoch": 3.3761380622367168, "grad_norm": 1.185490608215332, "learning_rate": 0.0005779827422204103, "loss": 3.4223, "step": 49690 }, { "epoch": 3.376477782307379, "grad_norm": 0.8698786497116089, "learning_rate": 0.0005779402772115777, "loss": 3.7503, "step": 49695 }, { "epoch": 3.3768175023780405, "grad_norm": 1.3655766248703003, "learning_rate": 0.0005778978122027449, "loss": 3.5298, "step": 49700 }, { "epoch": 3.377157222448702, "grad_norm": 0.8765692114830017, "learning_rate": 0.0005778553471939122, "loss": 3.4266, "step": 49705 }, { "epoch": 3.377496942519364, "grad_norm": 0.8900591731071472, "learning_rate": 0.0005778128821850796, "loss": 3.423, "step": 49710 }, { "epoch": 3.377836662590026, "grad_norm": 0.7250720262527466, "learning_rate": 0.0005777704171762468, "loss": 3.4104, "step": 49715 }, { "epoch": 3.3781763826606874, "grad_norm": 0.9090709090232849, "learning_rate": 0.000577727952167414, "loss": 3.6717, "step": 49720 }, { "epoch": 3.3785161027313495, "grad_norm": 0.9523364305496216, "learning_rate": 0.0005776854871585814, "loss": 3.4284, "step": 49725 }, { "epoch": 3.378855822802011, "grad_norm": 0.9779092669487, "learning_rate": 0.0005776430221497486, "loss": 3.5203, "step": 49730 }, { "epoch": 3.3791955428726728, "grad_norm": 0.9906118512153625, "learning_rate": 0.0005776005571409158, "loss": 3.5643, "step": 49735 }, { "epoch": 3.379535262943335, "grad_norm": 0.9301699995994568, "learning_rate": 0.0005775580921320833, "loss": 3.6165, "step": 49740 }, { "epoch": 3.3798749830139965, "grad_norm": 1.045536756515503, "learning_rate": 0.0005775156271232505, "loss": 3.4772, "step": 49745 }, { "epoch": 3.380214703084658, "grad_norm": 0.9242030382156372, "learning_rate": 0.0005774731621144177, "loss": 3.4709, "step": 49750 }, { "epoch": 3.38055442315532, "grad_norm": 0.8237740397453308, "learning_rate": 0.000577430697105585, "loss": 3.485, "step": 49755 }, { "epoch": 3.380894143225982, "grad_norm": 1.0817036628723145, "learning_rate": 0.0005773882320967523, "loss": 3.3025, "step": 49760 }, { "epoch": 3.3812338632966434, "grad_norm": 1.1393084526062012, "learning_rate": 0.0005773457670879195, "loss": 3.5676, "step": 49765 }, { "epoch": 3.3815735833673055, "grad_norm": 0.827745258808136, "learning_rate": 0.0005773033020790868, "loss": 3.366, "step": 49770 }, { "epoch": 3.381913303437967, "grad_norm": 0.8032366037368774, "learning_rate": 0.0005772608370702542, "loss": 3.641, "step": 49775 }, { "epoch": 3.3822530235086288, "grad_norm": 0.9849168062210083, "learning_rate": 0.0005772183720614214, "loss": 3.6279, "step": 49780 }, { "epoch": 3.382592743579291, "grad_norm": 0.7085905075073242, "learning_rate": 0.0005771759070525887, "loss": 3.5161, "step": 49785 }, { "epoch": 3.3829324636499525, "grad_norm": 0.7520939707756042, "learning_rate": 0.000577133442043756, "loss": 3.6268, "step": 49790 }, { "epoch": 3.383272183720614, "grad_norm": 0.8060070872306824, "learning_rate": 0.0005770909770349232, "loss": 3.56, "step": 49795 }, { "epoch": 3.3836119037912757, "grad_norm": 0.7752734422683716, "learning_rate": 0.0005770485120260905, "loss": 3.6275, "step": 49800 }, { "epoch": 3.383951623861938, "grad_norm": 0.751921534538269, "learning_rate": 0.0005770060470172577, "loss": 3.2834, "step": 49805 }, { "epoch": 3.3842913439325994, "grad_norm": 0.8303234577178955, "learning_rate": 0.0005769635820084251, "loss": 3.6246, "step": 49810 }, { "epoch": 3.384631064003261, "grad_norm": 0.884976863861084, "learning_rate": 0.0005769211169995924, "loss": 3.5424, "step": 49815 }, { "epoch": 3.384970784073923, "grad_norm": 0.8058900833129883, "learning_rate": 0.0005768786519907596, "loss": 3.4267, "step": 49820 }, { "epoch": 3.385310504144585, "grad_norm": 0.8419861793518066, "learning_rate": 0.0005768361869819269, "loss": 3.5548, "step": 49825 }, { "epoch": 3.3856502242152464, "grad_norm": 1.2954851388931274, "learning_rate": 0.0005767937219730942, "loss": 3.7868, "step": 49830 }, { "epoch": 3.3859899442859085, "grad_norm": 1.1548715829849243, "learning_rate": 0.0005767512569642614, "loss": 3.8986, "step": 49835 }, { "epoch": 3.38632966435657, "grad_norm": 0.8057295083999634, "learning_rate": 0.0005767087919554286, "loss": 3.3758, "step": 49840 }, { "epoch": 3.3866693844272318, "grad_norm": 0.8722151517868042, "learning_rate": 0.0005766663269465961, "loss": 3.3405, "step": 49845 }, { "epoch": 3.387009104497894, "grad_norm": 0.851099967956543, "learning_rate": 0.0005766238619377633, "loss": 3.7131, "step": 49850 }, { "epoch": 3.3873488245685555, "grad_norm": 0.7858754992485046, "learning_rate": 0.0005765813969289305, "loss": 3.5018, "step": 49855 }, { "epoch": 3.387688544639217, "grad_norm": 1.0244842767715454, "learning_rate": 0.0005765389319200979, "loss": 3.528, "step": 49860 }, { "epoch": 3.388028264709879, "grad_norm": 0.8234319686889648, "learning_rate": 0.0005764964669112651, "loss": 3.5328, "step": 49865 }, { "epoch": 3.388367984780541, "grad_norm": 0.9315900206565857, "learning_rate": 0.0005764540019024323, "loss": 3.6246, "step": 49870 }, { "epoch": 3.3887077048512024, "grad_norm": 0.8407338857650757, "learning_rate": 0.0005764115368935997, "loss": 3.7096, "step": 49875 }, { "epoch": 3.3890474249218645, "grad_norm": 0.8791391849517822, "learning_rate": 0.000576369071884767, "loss": 3.6277, "step": 49880 }, { "epoch": 3.389387144992526, "grad_norm": 0.9804511070251465, "learning_rate": 0.0005763266068759342, "loss": 3.461, "step": 49885 }, { "epoch": 3.3897268650631878, "grad_norm": 1.0360069274902344, "learning_rate": 0.0005762841418671015, "loss": 3.3306, "step": 49890 }, { "epoch": 3.39006658513385, "grad_norm": 0.7966498732566833, "learning_rate": 0.0005762416768582688, "loss": 3.6058, "step": 49895 }, { "epoch": 3.3904063052045115, "grad_norm": 1.0146933794021606, "learning_rate": 0.000576199211849436, "loss": 3.7228, "step": 49900 }, { "epoch": 3.390746025275173, "grad_norm": 1.0984506607055664, "learning_rate": 0.0005761567468406033, "loss": 3.394, "step": 49905 }, { "epoch": 3.391085745345835, "grad_norm": 0.9458656907081604, "learning_rate": 0.0005761142818317706, "loss": 3.456, "step": 49910 }, { "epoch": 3.391425465416497, "grad_norm": 1.1642011404037476, "learning_rate": 0.000576071816822938, "loss": 3.43, "step": 49915 }, { "epoch": 3.3917651854871584, "grad_norm": 1.1110413074493408, "learning_rate": 0.0005760293518141052, "loss": 3.2909, "step": 49920 }, { "epoch": 3.3921049055578205, "grad_norm": 0.9257332682609558, "learning_rate": 0.0005759868868052725, "loss": 3.5936, "step": 49925 }, { "epoch": 3.392444625628482, "grad_norm": 0.8919389247894287, "learning_rate": 0.0005759444217964398, "loss": 3.5385, "step": 49930 }, { "epoch": 3.3927843456991438, "grad_norm": 0.9872719645500183, "learning_rate": 0.000575901956787607, "loss": 3.7157, "step": 49935 }, { "epoch": 3.393124065769806, "grad_norm": 14.682124137878418, "learning_rate": 0.0005758594917787742, "loss": 3.4595, "step": 49940 }, { "epoch": 3.3934637858404675, "grad_norm": 0.8736067414283752, "learning_rate": 0.0005758170267699417, "loss": 3.5281, "step": 49945 }, { "epoch": 3.393803505911129, "grad_norm": 0.8613869547843933, "learning_rate": 0.0005757745617611089, "loss": 3.5113, "step": 49950 }, { "epoch": 3.394143225981791, "grad_norm": 0.8032278418540955, "learning_rate": 0.0005757320967522761, "loss": 3.6965, "step": 49955 }, { "epoch": 3.394482946052453, "grad_norm": 0.7194338440895081, "learning_rate": 0.0005756896317434435, "loss": 3.5288, "step": 49960 }, { "epoch": 3.3948226661231145, "grad_norm": 0.8321855664253235, "learning_rate": 0.0005756471667346107, "loss": 3.5619, "step": 49965 }, { "epoch": 3.3951623861937765, "grad_norm": 0.9656549096107483, "learning_rate": 0.0005756047017257779, "loss": 3.3194, "step": 49970 }, { "epoch": 3.395502106264438, "grad_norm": 0.7215351462364197, "learning_rate": 0.0005755622367169453, "loss": 3.4955, "step": 49975 }, { "epoch": 3.3958418263351, "grad_norm": 0.948928713798523, "learning_rate": 0.0005755197717081126, "loss": 3.6232, "step": 49980 }, { "epoch": 3.396181546405762, "grad_norm": 0.7622394561767578, "learning_rate": 0.0005754773066992798, "loss": 3.4489, "step": 49985 }, { "epoch": 3.3965212664764235, "grad_norm": 0.7495489120483398, "learning_rate": 0.0005754348416904472, "loss": 3.7415, "step": 49990 }, { "epoch": 3.396860986547085, "grad_norm": 0.7207707166671753, "learning_rate": 0.0005753923766816144, "loss": 3.342, "step": 49995 }, { "epoch": 3.397200706617747, "grad_norm": 0.8471470475196838, "learning_rate": 0.0005753499116727816, "loss": 3.6707, "step": 50000 }, { "epoch": 3.397540426688409, "grad_norm": 1.2151710987091064, "learning_rate": 0.0005753074466639489, "loss": 3.6174, "step": 50005 }, { "epoch": 3.3978801467590705, "grad_norm": 0.9039880037307739, "learning_rate": 0.0005752649816551162, "loss": 3.6957, "step": 50010 }, { "epoch": 3.3982198668297325, "grad_norm": 0.9146090745925903, "learning_rate": 0.0005752225166462835, "loss": 3.6, "step": 50015 }, { "epoch": 3.398559586900394, "grad_norm": 0.9287815093994141, "learning_rate": 0.0005751800516374508, "loss": 3.435, "step": 50020 }, { "epoch": 3.398899306971056, "grad_norm": 1.2030836343765259, "learning_rate": 0.0005751375866286181, "loss": 3.7039, "step": 50025 }, { "epoch": 3.3992390270417174, "grad_norm": 0.7052626609802246, "learning_rate": 0.0005750951216197853, "loss": 3.4021, "step": 50030 }, { "epoch": 3.3995787471123795, "grad_norm": 0.8509361147880554, "learning_rate": 0.0005750526566109526, "loss": 3.5227, "step": 50035 }, { "epoch": 3.399918467183041, "grad_norm": 0.9617079496383667, "learning_rate": 0.0005750101916021198, "loss": 3.6982, "step": 50040 }, { "epoch": 3.4002581872537028, "grad_norm": 0.9806979894638062, "learning_rate": 0.0005749677265932871, "loss": 3.4404, "step": 50045 }, { "epoch": 3.400597907324365, "grad_norm": 0.7971715331077576, "learning_rate": 0.0005749252615844545, "loss": 3.5532, "step": 50050 }, { "epoch": 3.4009376273950265, "grad_norm": 0.9662729501724243, "learning_rate": 0.0005748827965756217, "loss": 3.5923, "step": 50055 }, { "epoch": 3.401277347465688, "grad_norm": 0.6705602407455444, "learning_rate": 0.000574840331566789, "loss": 3.3271, "step": 50060 }, { "epoch": 3.40161706753635, "grad_norm": 0.8430922031402588, "learning_rate": 0.0005747978665579563, "loss": 3.5735, "step": 50065 }, { "epoch": 3.401956787607012, "grad_norm": 0.9195185899734497, "learning_rate": 0.0005747554015491235, "loss": 3.4749, "step": 50070 }, { "epoch": 3.4022965076776734, "grad_norm": 1.0209014415740967, "learning_rate": 0.0005747129365402907, "loss": 3.5843, "step": 50075 }, { "epoch": 3.4026362277483355, "grad_norm": 0.8268157243728638, "learning_rate": 0.0005746704715314581, "loss": 3.2724, "step": 50080 }, { "epoch": 3.402975947818997, "grad_norm": 0.8612752556800842, "learning_rate": 0.0005746280065226254, "loss": 3.8027, "step": 50085 }, { "epoch": 3.403315667889659, "grad_norm": 0.7740690112113953, "learning_rate": 0.0005745855415137926, "loss": 3.4838, "step": 50090 }, { "epoch": 3.403655387960321, "grad_norm": 1.2786152362823486, "learning_rate": 0.00057454307650496, "loss": 3.735, "step": 50095 }, { "epoch": 3.4039951080309825, "grad_norm": 0.8987647294998169, "learning_rate": 0.0005745006114961272, "loss": 3.5005, "step": 50100 }, { "epoch": 3.404334828101644, "grad_norm": 0.9870299100875854, "learning_rate": 0.0005744581464872944, "loss": 3.588, "step": 50105 }, { "epoch": 3.404674548172306, "grad_norm": 0.8393194079399109, "learning_rate": 0.0005744156814784618, "loss": 3.7215, "step": 50110 }, { "epoch": 3.405014268242968, "grad_norm": 0.9247740507125854, "learning_rate": 0.000574373216469629, "loss": 3.6053, "step": 50115 }, { "epoch": 3.4053539883136295, "grad_norm": 0.859395444393158, "learning_rate": 0.0005743307514607963, "loss": 3.6522, "step": 50120 }, { "epoch": 3.4056937083842915, "grad_norm": 0.9788318276405334, "learning_rate": 0.0005742882864519637, "loss": 3.5815, "step": 50125 }, { "epoch": 3.406033428454953, "grad_norm": 0.8912394642829895, "learning_rate": 0.0005742458214431309, "loss": 3.7071, "step": 50130 }, { "epoch": 3.406373148525615, "grad_norm": 0.9066275358200073, "learning_rate": 0.0005742033564342981, "loss": 3.4305, "step": 50135 }, { "epoch": 3.4067128685962764, "grad_norm": 0.8749808073043823, "learning_rate": 0.0005741608914254654, "loss": 3.595, "step": 50140 }, { "epoch": 3.4070525886669385, "grad_norm": 0.939750611782074, "learning_rate": 0.0005741184264166327, "loss": 3.4813, "step": 50145 }, { "epoch": 3.4073923087376, "grad_norm": 0.9295110702514648, "learning_rate": 0.0005740759614077999, "loss": 3.5136, "step": 50150 }, { "epoch": 3.4077320288082618, "grad_norm": 0.9714298844337463, "learning_rate": 0.0005740334963989673, "loss": 3.7413, "step": 50155 }, { "epoch": 3.408071748878924, "grad_norm": 0.8394303917884827, "learning_rate": 0.0005739910313901346, "loss": 3.4702, "step": 50160 }, { "epoch": 3.4084114689495855, "grad_norm": 0.9525669813156128, "learning_rate": 0.0005739485663813018, "loss": 3.7708, "step": 50165 }, { "epoch": 3.408751189020247, "grad_norm": 0.8759309649467468, "learning_rate": 0.0005739061013724691, "loss": 3.5869, "step": 50170 }, { "epoch": 3.409090909090909, "grad_norm": 0.7641010284423828, "learning_rate": 0.0005738636363636364, "loss": 3.4793, "step": 50175 }, { "epoch": 3.409430629161571, "grad_norm": 0.768287718296051, "learning_rate": 0.0005738211713548036, "loss": 3.4763, "step": 50180 }, { "epoch": 3.4097703492322324, "grad_norm": 0.9428665637969971, "learning_rate": 0.0005737787063459709, "loss": 3.524, "step": 50185 }, { "epoch": 3.4101100693028945, "grad_norm": 0.9918295741081238, "learning_rate": 0.0005737362413371382, "loss": 3.4502, "step": 50190 }, { "epoch": 3.410449789373556, "grad_norm": 0.814432680606842, "learning_rate": 0.0005736937763283055, "loss": 3.3258, "step": 50195 }, { "epoch": 3.4107895094442178, "grad_norm": 1.043674349784851, "learning_rate": 0.0005736513113194728, "loss": 3.7549, "step": 50200 }, { "epoch": 3.41112922951488, "grad_norm": 1.3936353921890259, "learning_rate": 0.00057360884631064, "loss": 3.7159, "step": 50205 }, { "epoch": 3.4114689495855415, "grad_norm": 0.7998273372650146, "learning_rate": 0.0005735663813018073, "loss": 3.2901, "step": 50210 }, { "epoch": 3.411808669656203, "grad_norm": 0.8622553944587708, "learning_rate": 0.0005735239162929746, "loss": 3.6911, "step": 50215 }, { "epoch": 3.412148389726865, "grad_norm": 0.8285670876502991, "learning_rate": 0.0005734814512841418, "loss": 3.5615, "step": 50220 }, { "epoch": 3.412488109797527, "grad_norm": 1.0378657579421997, "learning_rate": 0.0005734389862753092, "loss": 3.439, "step": 50225 }, { "epoch": 3.4128278298681884, "grad_norm": 1.3652055263519287, "learning_rate": 0.0005733965212664765, "loss": 3.4319, "step": 50230 }, { "epoch": 3.4131675499388505, "grad_norm": 0.9098742008209229, "learning_rate": 0.0005733540562576437, "loss": 3.3168, "step": 50235 }, { "epoch": 3.413507270009512, "grad_norm": 0.8177469968795776, "learning_rate": 0.0005733115912488109, "loss": 3.5476, "step": 50240 }, { "epoch": 3.413846990080174, "grad_norm": 0.9842658638954163, "learning_rate": 0.0005732691262399783, "loss": 3.3489, "step": 50245 }, { "epoch": 3.414186710150836, "grad_norm": 0.8692606687545776, "learning_rate": 0.0005732266612311455, "loss": 3.6411, "step": 50250 }, { "epoch": 3.4145264302214975, "grad_norm": 0.7734696269035339, "learning_rate": 0.0005731841962223128, "loss": 3.39, "step": 50255 }, { "epoch": 3.414866150292159, "grad_norm": 0.9279956817626953, "learning_rate": 0.0005731417312134802, "loss": 3.5193, "step": 50260 }, { "epoch": 3.415205870362821, "grad_norm": 0.8650059103965759, "learning_rate": 0.0005730992662046474, "loss": 3.7323, "step": 50265 }, { "epoch": 3.415545590433483, "grad_norm": 0.9717267751693726, "learning_rate": 0.0005730568011958147, "loss": 3.6155, "step": 50270 }, { "epoch": 3.4158853105041445, "grad_norm": 1.0183500051498413, "learning_rate": 0.000573014336186982, "loss": 3.5613, "step": 50275 }, { "epoch": 3.4162250305748065, "grad_norm": 0.7673061490058899, "learning_rate": 0.0005729718711781492, "loss": 3.7083, "step": 50280 }, { "epoch": 3.416564750645468, "grad_norm": 0.8956186771392822, "learning_rate": 0.0005729294061693165, "loss": 3.4953, "step": 50285 }, { "epoch": 3.41690447071613, "grad_norm": 0.8078187704086304, "learning_rate": 0.0005728869411604837, "loss": 3.6639, "step": 50290 }, { "epoch": 3.417244190786792, "grad_norm": 0.8220173120498657, "learning_rate": 0.0005728444761516511, "loss": 3.4842, "step": 50295 }, { "epoch": 3.4175839108574535, "grad_norm": 0.846361517906189, "learning_rate": 0.0005728020111428184, "loss": 3.4141, "step": 50300 }, { "epoch": 3.417923630928115, "grad_norm": 0.7822868227958679, "learning_rate": 0.0005727595461339856, "loss": 3.3812, "step": 50305 }, { "epoch": 3.418263350998777, "grad_norm": 0.8164373636245728, "learning_rate": 0.0005727170811251529, "loss": 3.635, "step": 50310 }, { "epoch": 3.418603071069439, "grad_norm": 0.876943826675415, "learning_rate": 0.0005726746161163202, "loss": 3.3361, "step": 50315 }, { "epoch": 3.4189427911401005, "grad_norm": 0.909516453742981, "learning_rate": 0.0005726321511074874, "loss": 3.604, "step": 50320 }, { "epoch": 3.4192825112107625, "grad_norm": 0.977600634098053, "learning_rate": 0.0005725896860986546, "loss": 3.4293, "step": 50325 }, { "epoch": 3.419622231281424, "grad_norm": 0.9885809421539307, "learning_rate": 0.0005725472210898221, "loss": 3.7721, "step": 50330 }, { "epoch": 3.419961951352086, "grad_norm": 0.7592830657958984, "learning_rate": 0.0005725047560809893, "loss": 3.362, "step": 50335 }, { "epoch": 3.420301671422748, "grad_norm": 0.9529237151145935, "learning_rate": 0.0005724622910721565, "loss": 3.7546, "step": 50340 }, { "epoch": 3.4206413914934095, "grad_norm": 1.0991103649139404, "learning_rate": 0.0005724198260633239, "loss": 3.6324, "step": 50345 }, { "epoch": 3.420981111564071, "grad_norm": 0.9099186658859253, "learning_rate": 0.0005723773610544911, "loss": 3.5146, "step": 50350 }, { "epoch": 3.421320831634733, "grad_norm": 0.9755991101264954, "learning_rate": 0.0005723348960456583, "loss": 3.5894, "step": 50355 }, { "epoch": 3.421660551705395, "grad_norm": 1.043871283531189, "learning_rate": 0.0005722924310368257, "loss": 3.4235, "step": 50360 }, { "epoch": 3.4220002717760565, "grad_norm": 0.7539039850234985, "learning_rate": 0.000572249966027993, "loss": 3.497, "step": 50365 }, { "epoch": 3.422339991846718, "grad_norm": 0.9017581939697266, "learning_rate": 0.0005722075010191602, "loss": 3.4785, "step": 50370 }, { "epoch": 3.42267971191738, "grad_norm": 0.9821982979774475, "learning_rate": 0.0005721650360103276, "loss": 3.6954, "step": 50375 }, { "epoch": 3.423019431988042, "grad_norm": 1.0090309381484985, "learning_rate": 0.0005721225710014948, "loss": 3.2802, "step": 50380 }, { "epoch": 3.4233591520587034, "grad_norm": 1.425331711769104, "learning_rate": 0.000572080105992662, "loss": 3.7522, "step": 50385 }, { "epoch": 3.4236988721293655, "grad_norm": 0.8659571409225464, "learning_rate": 0.0005720376409838293, "loss": 3.4554, "step": 50390 }, { "epoch": 3.424038592200027, "grad_norm": 0.767653226852417, "learning_rate": 0.0005719951759749966, "loss": 3.4911, "step": 50395 }, { "epoch": 3.424378312270689, "grad_norm": 0.8002367615699768, "learning_rate": 0.0005719527109661639, "loss": 3.5028, "step": 50400 }, { "epoch": 3.424718032341351, "grad_norm": 0.8083398938179016, "learning_rate": 0.0005719102459573312, "loss": 3.3622, "step": 50405 }, { "epoch": 3.4250577524120125, "grad_norm": 1.0708001852035522, "learning_rate": 0.0005718677809484985, "loss": 3.5909, "step": 50410 }, { "epoch": 3.425397472482674, "grad_norm": 0.9617190361022949, "learning_rate": 0.0005718253159396657, "loss": 3.3842, "step": 50415 }, { "epoch": 3.425737192553336, "grad_norm": 0.8474251627922058, "learning_rate": 0.000571782850930833, "loss": 3.4895, "step": 50420 }, { "epoch": 3.426076912623998, "grad_norm": 0.9659502506256104, "learning_rate": 0.0005717403859220002, "loss": 3.6542, "step": 50425 }, { "epoch": 3.4264166326946595, "grad_norm": 0.8797829747200012, "learning_rate": 0.0005716979209131675, "loss": 3.5711, "step": 50430 }, { "epoch": 3.4267563527653215, "grad_norm": 0.7151371240615845, "learning_rate": 0.0005716554559043349, "loss": 3.3297, "step": 50435 }, { "epoch": 3.427096072835983, "grad_norm": 1.003941297531128, "learning_rate": 0.0005716129908955021, "loss": 3.3435, "step": 50440 }, { "epoch": 3.427435792906645, "grad_norm": 0.868050217628479, "learning_rate": 0.0005715705258866694, "loss": 3.5076, "step": 50445 }, { "epoch": 3.427775512977307, "grad_norm": 0.9177842140197754, "learning_rate": 0.0005715280608778367, "loss": 3.3433, "step": 50450 }, { "epoch": 3.4281152330479685, "grad_norm": 0.8501611948013306, "learning_rate": 0.0005714855958690039, "loss": 3.4678, "step": 50455 }, { "epoch": 3.42845495311863, "grad_norm": 0.7744552493095398, "learning_rate": 0.0005714431308601712, "loss": 3.7013, "step": 50460 }, { "epoch": 3.428794673189292, "grad_norm": 1.0888409614562988, "learning_rate": 0.0005714006658513386, "loss": 3.2259, "step": 50465 }, { "epoch": 3.429134393259954, "grad_norm": 0.8661327362060547, "learning_rate": 0.0005713582008425058, "loss": 3.5107, "step": 50470 }, { "epoch": 3.4294741133306155, "grad_norm": 0.8520111441612244, "learning_rate": 0.000571315735833673, "loss": 3.5699, "step": 50475 }, { "epoch": 3.429813833401277, "grad_norm": 0.8589596152305603, "learning_rate": 0.0005712732708248404, "loss": 3.5752, "step": 50480 }, { "epoch": 3.430153553471939, "grad_norm": 0.8942217826843262, "learning_rate": 0.0005712308058160076, "loss": 3.5982, "step": 50485 }, { "epoch": 3.430493273542601, "grad_norm": 0.6301111578941345, "learning_rate": 0.0005711883408071748, "loss": 3.5326, "step": 50490 }, { "epoch": 3.4308329936132624, "grad_norm": 1.0892735719680786, "learning_rate": 0.0005711458757983422, "loss": 3.3951, "step": 50495 }, { "epoch": 3.4311727136839245, "grad_norm": 0.7894288301467896, "learning_rate": 0.0005711034107895095, "loss": 3.7086, "step": 50500 }, { "epoch": 3.431512433754586, "grad_norm": 0.74152672290802, "learning_rate": 0.0005710609457806767, "loss": 3.4952, "step": 50505 }, { "epoch": 3.4318521538252478, "grad_norm": 0.8731594681739807, "learning_rate": 0.0005710184807718441, "loss": 3.3273, "step": 50510 }, { "epoch": 3.43219187389591, "grad_norm": 1.0777064561843872, "learning_rate": 0.0005709760157630113, "loss": 3.5356, "step": 50515 }, { "epoch": 3.4325315939665715, "grad_norm": 1.0710819959640503, "learning_rate": 0.0005709335507541785, "loss": 3.5413, "step": 50520 }, { "epoch": 3.432871314037233, "grad_norm": 0.9023421406745911, "learning_rate": 0.0005708910857453458, "loss": 3.1763, "step": 50525 }, { "epoch": 3.433211034107895, "grad_norm": 0.8025294542312622, "learning_rate": 0.0005708486207365131, "loss": 3.5811, "step": 50530 }, { "epoch": 3.433550754178557, "grad_norm": 1.0256314277648926, "learning_rate": 0.0005708061557276804, "loss": 3.286, "step": 50535 }, { "epoch": 3.4338904742492184, "grad_norm": 0.9729945063591003, "learning_rate": 0.0005707636907188477, "loss": 3.5473, "step": 50540 }, { "epoch": 3.4342301943198805, "grad_norm": 0.8616268634796143, "learning_rate": 0.000570721225710015, "loss": 3.4677, "step": 50545 }, { "epoch": 3.434569914390542, "grad_norm": 0.8979142904281616, "learning_rate": 0.0005706787607011822, "loss": 3.6041, "step": 50550 }, { "epoch": 3.434909634461204, "grad_norm": 0.8089253902435303, "learning_rate": 0.0005706362956923495, "loss": 3.455, "step": 50555 }, { "epoch": 3.435249354531866, "grad_norm": 0.9664849042892456, "learning_rate": 0.0005705938306835168, "loss": 3.7483, "step": 50560 }, { "epoch": 3.4355890746025275, "grad_norm": 0.8747547268867493, "learning_rate": 0.000570551365674684, "loss": 3.4744, "step": 50565 }, { "epoch": 3.435928794673189, "grad_norm": 0.8132421970367432, "learning_rate": 0.0005705089006658514, "loss": 3.4626, "step": 50570 }, { "epoch": 3.436268514743851, "grad_norm": 0.9073557257652283, "learning_rate": 0.0005704664356570186, "loss": 3.4309, "step": 50575 }, { "epoch": 3.436608234814513, "grad_norm": 1.06233549118042, "learning_rate": 0.0005704239706481859, "loss": 3.4554, "step": 50580 }, { "epoch": 3.4369479548851745, "grad_norm": 1.2398866415023804, "learning_rate": 0.0005703815056393532, "loss": 3.6278, "step": 50585 }, { "epoch": 3.4372876749558365, "grad_norm": 1.1009074449539185, "learning_rate": 0.0005703390406305204, "loss": 3.5756, "step": 50590 }, { "epoch": 3.437627395026498, "grad_norm": 0.9235627055168152, "learning_rate": 0.0005702965756216878, "loss": 3.6776, "step": 50595 }, { "epoch": 3.43796711509716, "grad_norm": 0.7246569395065308, "learning_rate": 0.000570254110612855, "loss": 3.7002, "step": 50600 }, { "epoch": 3.438306835167822, "grad_norm": 0.7154413461685181, "learning_rate": 0.0005702116456040223, "loss": 3.6034, "step": 50605 }, { "epoch": 3.4386465552384835, "grad_norm": 0.8884794116020203, "learning_rate": 0.0005701691805951897, "loss": 3.5206, "step": 50610 }, { "epoch": 3.438986275309145, "grad_norm": 0.8134838342666626, "learning_rate": 0.0005701267155863569, "loss": 3.2, "step": 50615 }, { "epoch": 3.439325995379807, "grad_norm": 0.9034074544906616, "learning_rate": 0.0005700842505775241, "loss": 3.6551, "step": 50620 }, { "epoch": 3.439665715450469, "grad_norm": 0.8287516832351685, "learning_rate": 0.0005700417855686914, "loss": 3.5064, "step": 50625 }, { "epoch": 3.4400054355211305, "grad_norm": 0.853687584400177, "learning_rate": 0.0005699993205598587, "loss": 3.7684, "step": 50630 }, { "epoch": 3.4403451555917925, "grad_norm": 0.8786543607711792, "learning_rate": 0.0005699568555510259, "loss": 3.4802, "step": 50635 }, { "epoch": 3.440684875662454, "grad_norm": 0.811477780342102, "learning_rate": 0.0005699143905421933, "loss": 3.5606, "step": 50640 }, { "epoch": 3.441024595733116, "grad_norm": 1.0445820093154907, "learning_rate": 0.0005698719255333606, "loss": 3.7642, "step": 50645 }, { "epoch": 3.441364315803778, "grad_norm": 0.8884252309799194, "learning_rate": 0.0005698294605245278, "loss": 3.4542, "step": 50650 }, { "epoch": 3.4417040358744395, "grad_norm": 1.108083963394165, "learning_rate": 0.0005697869955156951, "loss": 3.7055, "step": 50655 }, { "epoch": 3.442043755945101, "grad_norm": 0.7667628526687622, "learning_rate": 0.0005697445305068624, "loss": 3.5844, "step": 50660 }, { "epoch": 3.442383476015763, "grad_norm": 0.5906009078025818, "learning_rate": 0.0005697020654980296, "loss": 3.764, "step": 50665 }, { "epoch": 3.442723196086425, "grad_norm": 0.8602977395057678, "learning_rate": 0.0005696596004891969, "loss": 3.5556, "step": 50670 }, { "epoch": 3.4430629161570865, "grad_norm": 0.8311915993690491, "learning_rate": 0.0005696171354803643, "loss": 3.4764, "step": 50675 }, { "epoch": 3.4434026362277486, "grad_norm": 0.8537005186080933, "learning_rate": 0.0005695746704715315, "loss": 3.5944, "step": 50680 }, { "epoch": 3.44374235629841, "grad_norm": 0.8184798955917358, "learning_rate": 0.0005695322054626988, "loss": 3.6164, "step": 50685 }, { "epoch": 3.444082076369072, "grad_norm": 0.9553748965263367, "learning_rate": 0.000569489740453866, "loss": 3.6228, "step": 50690 }, { "epoch": 3.444421796439734, "grad_norm": 0.8695975542068481, "learning_rate": 0.0005694472754450333, "loss": 3.5587, "step": 50695 }, { "epoch": 3.4447615165103955, "grad_norm": 0.7541364431381226, "learning_rate": 0.0005694048104362006, "loss": 3.4542, "step": 50700 }, { "epoch": 3.445101236581057, "grad_norm": 0.8585219383239746, "learning_rate": 0.0005693623454273678, "loss": 3.4124, "step": 50705 }, { "epoch": 3.4454409566517192, "grad_norm": 0.9003470540046692, "learning_rate": 0.0005693198804185352, "loss": 3.6391, "step": 50710 }, { "epoch": 3.445780676722381, "grad_norm": 1.042048692703247, "learning_rate": 0.0005692774154097025, "loss": 3.7485, "step": 50715 }, { "epoch": 3.4461203967930425, "grad_norm": 0.8912600874900818, "learning_rate": 0.0005692349504008697, "loss": 3.6337, "step": 50720 }, { "epoch": 3.446460116863704, "grad_norm": 0.7305474281311035, "learning_rate": 0.0005691924853920369, "loss": 3.7329, "step": 50725 }, { "epoch": 3.446799836934366, "grad_norm": 0.8587639927864075, "learning_rate": 0.0005691500203832043, "loss": 3.8979, "step": 50730 }, { "epoch": 3.447139557005028, "grad_norm": 1.2681081295013428, "learning_rate": 0.0005691075553743715, "loss": 3.3609, "step": 50735 }, { "epoch": 3.4474792770756895, "grad_norm": 0.7709375619888306, "learning_rate": 0.0005690650903655387, "loss": 3.5286, "step": 50740 }, { "epoch": 3.4478189971463515, "grad_norm": 0.7426081299781799, "learning_rate": 0.0005690226253567062, "loss": 3.2639, "step": 50745 }, { "epoch": 3.448158717217013, "grad_norm": 1.4819930791854858, "learning_rate": 0.0005689801603478734, "loss": 3.539, "step": 50750 }, { "epoch": 3.448498437287675, "grad_norm": 0.8509702682495117, "learning_rate": 0.0005689376953390406, "loss": 3.4811, "step": 50755 }, { "epoch": 3.448838157358337, "grad_norm": 0.8367165327072144, "learning_rate": 0.000568895230330208, "loss": 3.3323, "step": 50760 }, { "epoch": 3.4491778774289985, "grad_norm": 0.9964286684989929, "learning_rate": 0.0005688527653213752, "loss": 3.239, "step": 50765 }, { "epoch": 3.44951759749966, "grad_norm": 1.1147127151489258, "learning_rate": 0.0005688103003125424, "loss": 3.5745, "step": 50770 }, { "epoch": 3.449857317570322, "grad_norm": 0.9712830781936646, "learning_rate": 0.0005687678353037097, "loss": 3.3486, "step": 50775 }, { "epoch": 3.450197037640984, "grad_norm": 0.8550812005996704, "learning_rate": 0.0005687253702948771, "loss": 3.3398, "step": 50780 }, { "epoch": 3.4505367577116455, "grad_norm": 0.8620516061782837, "learning_rate": 0.0005686829052860443, "loss": 3.5049, "step": 50785 }, { "epoch": 3.4508764777823075, "grad_norm": 0.8977075219154358, "learning_rate": 0.0005686404402772116, "loss": 3.8658, "step": 50790 }, { "epoch": 3.451216197852969, "grad_norm": 1.2573661804199219, "learning_rate": 0.0005685979752683789, "loss": 3.4595, "step": 50795 }, { "epoch": 3.451555917923631, "grad_norm": 0.8379766941070557, "learning_rate": 0.0005685555102595461, "loss": 3.7034, "step": 50800 }, { "epoch": 3.451895637994293, "grad_norm": 0.9348949193954468, "learning_rate": 0.0005685130452507134, "loss": 3.574, "step": 50805 }, { "epoch": 3.4522353580649545, "grad_norm": 0.8916747570037842, "learning_rate": 0.0005684705802418806, "loss": 3.5446, "step": 50810 }, { "epoch": 3.452575078135616, "grad_norm": 0.7410023808479309, "learning_rate": 0.000568428115233048, "loss": 3.692, "step": 50815 }, { "epoch": 3.452914798206278, "grad_norm": 0.8096827268600464, "learning_rate": 0.0005683856502242153, "loss": 3.4863, "step": 50820 }, { "epoch": 3.45325451827694, "grad_norm": 0.701348066329956, "learning_rate": 0.0005683431852153825, "loss": 3.5798, "step": 50825 }, { "epoch": 3.4535942383476015, "grad_norm": 0.9151264429092407, "learning_rate": 0.0005683007202065498, "loss": 3.5321, "step": 50830 }, { "epoch": 3.453933958418263, "grad_norm": 0.8722145557403564, "learning_rate": 0.0005682582551977171, "loss": 3.3863, "step": 50835 }, { "epoch": 3.454273678488925, "grad_norm": 0.9006122946739197, "learning_rate": 0.0005682157901888843, "loss": 3.5009, "step": 50840 }, { "epoch": 3.454613398559587, "grad_norm": 0.8197031021118164, "learning_rate": 0.0005681733251800516, "loss": 3.4763, "step": 50845 }, { "epoch": 3.4549531186302485, "grad_norm": 0.8744727969169617, "learning_rate": 0.000568130860171219, "loss": 3.5832, "step": 50850 }, { "epoch": 3.4552928387009105, "grad_norm": 1.117315649986267, "learning_rate": 0.0005680883951623862, "loss": 3.6203, "step": 50855 }, { "epoch": 3.455632558771572, "grad_norm": 0.7963263988494873, "learning_rate": 0.0005680459301535535, "loss": 3.3261, "step": 50860 }, { "epoch": 3.455972278842234, "grad_norm": 0.9327932596206665, "learning_rate": 0.0005680034651447208, "loss": 3.4591, "step": 50865 }, { "epoch": 3.456311998912896, "grad_norm": 0.9840716123580933, "learning_rate": 0.000567961000135888, "loss": 3.557, "step": 50870 }, { "epoch": 3.4566517189835575, "grad_norm": 1.1771697998046875, "learning_rate": 0.0005679185351270552, "loss": 3.5785, "step": 50875 }, { "epoch": 3.456991439054219, "grad_norm": 0.9164397716522217, "learning_rate": 0.0005678760701182226, "loss": 3.5117, "step": 50880 }, { "epoch": 3.457331159124881, "grad_norm": 0.9693806767463684, "learning_rate": 0.0005678336051093899, "loss": 3.4547, "step": 50885 }, { "epoch": 3.457670879195543, "grad_norm": 0.9002485275268555, "learning_rate": 0.0005677911401005571, "loss": 3.6228, "step": 50890 }, { "epoch": 3.4580105992662045, "grad_norm": 1.138275384902954, "learning_rate": 0.0005677486750917245, "loss": 3.297, "step": 50895 }, { "epoch": 3.4583503193368665, "grad_norm": 0.8543419241905212, "learning_rate": 0.0005677062100828917, "loss": 3.6483, "step": 50900 }, { "epoch": 3.458690039407528, "grad_norm": 0.8848923444747925, "learning_rate": 0.0005676637450740589, "loss": 3.6183, "step": 50905 }, { "epoch": 3.45902975947819, "grad_norm": 0.9100379943847656, "learning_rate": 0.0005676212800652263, "loss": 3.521, "step": 50910 }, { "epoch": 3.459369479548852, "grad_norm": 1.7573810815811157, "learning_rate": 0.0005675788150563935, "loss": 3.5461, "step": 50915 }, { "epoch": 3.4597091996195135, "grad_norm": 1.0431642532348633, "learning_rate": 0.0005675363500475608, "loss": 3.4086, "step": 50920 }, { "epoch": 3.460048919690175, "grad_norm": 0.9521357417106628, "learning_rate": 0.0005674938850387281, "loss": 3.3599, "step": 50925 }, { "epoch": 3.460388639760837, "grad_norm": 0.7850194573402405, "learning_rate": 0.0005674514200298954, "loss": 3.2531, "step": 50930 }, { "epoch": 3.460728359831499, "grad_norm": 1.3494582176208496, "learning_rate": 0.0005674089550210627, "loss": 3.6451, "step": 50935 }, { "epoch": 3.4610680799021605, "grad_norm": 0.869604766368866, "learning_rate": 0.0005673664900122299, "loss": 3.4525, "step": 50940 }, { "epoch": 3.4614077999728226, "grad_norm": 0.8547475337982178, "learning_rate": 0.0005673240250033972, "loss": 3.6296, "step": 50945 }, { "epoch": 3.461747520043484, "grad_norm": 0.8615207076072693, "learning_rate": 0.0005672815599945645, "loss": 3.5508, "step": 50950 }, { "epoch": 3.462087240114146, "grad_norm": 0.8000222444534302, "learning_rate": 0.0005672390949857318, "loss": 3.4841, "step": 50955 }, { "epoch": 3.462426960184808, "grad_norm": 0.7710859179496765, "learning_rate": 0.000567196629976899, "loss": 3.7641, "step": 50960 }, { "epoch": 3.4627666802554695, "grad_norm": 1.0300705432891846, "learning_rate": 0.0005671541649680664, "loss": 3.5524, "step": 50965 }, { "epoch": 3.463106400326131, "grad_norm": 0.837216317653656, "learning_rate": 0.0005671116999592336, "loss": 3.4492, "step": 50970 }, { "epoch": 3.4634461203967932, "grad_norm": 1.0632215738296509, "learning_rate": 0.0005670692349504008, "loss": 3.636, "step": 50975 }, { "epoch": 3.463785840467455, "grad_norm": 0.9279181957244873, "learning_rate": 0.0005670267699415682, "loss": 3.5595, "step": 50980 }, { "epoch": 3.4641255605381165, "grad_norm": 0.8463853597640991, "learning_rate": 0.0005669843049327354, "loss": 3.5197, "step": 50985 }, { "epoch": 3.4644652806087786, "grad_norm": 1.0096808671951294, "learning_rate": 0.0005669418399239027, "loss": 3.2733, "step": 50990 }, { "epoch": 3.46480500067944, "grad_norm": 0.8177899122238159, "learning_rate": 0.0005668993749150701, "loss": 3.5502, "step": 50995 }, { "epoch": 3.465144720750102, "grad_norm": 0.6265968680381775, "learning_rate": 0.0005668569099062373, "loss": 3.8016, "step": 51000 }, { "epoch": 3.465484440820764, "grad_norm": 0.7518759965896606, "learning_rate": 0.0005668144448974045, "loss": 3.8827, "step": 51005 }, { "epoch": 3.4658241608914255, "grad_norm": 0.9135801196098328, "learning_rate": 0.0005667719798885719, "loss": 3.4915, "step": 51010 }, { "epoch": 3.466163880962087, "grad_norm": 1.0301766395568848, "learning_rate": 0.0005667295148797391, "loss": 3.5549, "step": 51015 }, { "epoch": 3.4665036010327492, "grad_norm": 1.0821129083633423, "learning_rate": 0.0005666870498709063, "loss": 3.4086, "step": 51020 }, { "epoch": 3.466843321103411, "grad_norm": 1.0367754697799683, "learning_rate": 0.0005666445848620737, "loss": 3.5576, "step": 51025 }, { "epoch": 3.4671830411740725, "grad_norm": 0.6967639923095703, "learning_rate": 0.000566602119853241, "loss": 3.5741, "step": 51030 }, { "epoch": 3.4675227612447346, "grad_norm": 0.9032452702522278, "learning_rate": 0.0005665596548444082, "loss": 3.6342, "step": 51035 }, { "epoch": 3.467862481315396, "grad_norm": 1.0133156776428223, "learning_rate": 0.0005665171898355755, "loss": 3.6274, "step": 51040 }, { "epoch": 3.468202201386058, "grad_norm": 0.9498702883720398, "learning_rate": 0.0005664747248267428, "loss": 3.4234, "step": 51045 }, { "epoch": 3.46854192145672, "grad_norm": 0.9075527191162109, "learning_rate": 0.00056643225981791, "loss": 3.6376, "step": 51050 }, { "epoch": 3.4688816415273815, "grad_norm": 0.8078995943069458, "learning_rate": 0.0005663897948090774, "loss": 3.7091, "step": 51055 }, { "epoch": 3.469221361598043, "grad_norm": 1.065435767173767, "learning_rate": 0.0005663473298002447, "loss": 3.4035, "step": 51060 }, { "epoch": 3.469561081668705, "grad_norm": 0.8498144745826721, "learning_rate": 0.0005663048647914119, "loss": 3.3855, "step": 51065 }, { "epoch": 3.469900801739367, "grad_norm": 0.9964304566383362, "learning_rate": 0.0005662623997825792, "loss": 3.4021, "step": 51070 }, { "epoch": 3.4702405218100285, "grad_norm": 0.815377414226532, "learning_rate": 0.0005662199347737464, "loss": 3.3272, "step": 51075 }, { "epoch": 3.47058024188069, "grad_norm": 1.0796416997909546, "learning_rate": 0.0005661774697649137, "loss": 3.5905, "step": 51080 }, { "epoch": 3.470919961951352, "grad_norm": 0.999350368976593, "learning_rate": 0.000566135004756081, "loss": 3.5277, "step": 51085 }, { "epoch": 3.471259682022014, "grad_norm": 0.9466595649719238, "learning_rate": 0.0005660925397472483, "loss": 3.7873, "step": 51090 }, { "epoch": 3.4715994020926755, "grad_norm": 0.7036259174346924, "learning_rate": 0.0005660500747384156, "loss": 3.4771, "step": 51095 }, { "epoch": 3.4719391221633376, "grad_norm": 0.982236385345459, "learning_rate": 0.0005660076097295829, "loss": 3.5734, "step": 51100 }, { "epoch": 3.472278842233999, "grad_norm": 0.9965671300888062, "learning_rate": 0.0005659651447207501, "loss": 3.3784, "step": 51105 }, { "epoch": 3.472618562304661, "grad_norm": 0.7863239645957947, "learning_rate": 0.0005659226797119173, "loss": 3.5013, "step": 51110 }, { "epoch": 3.472958282375323, "grad_norm": 0.7466006278991699, "learning_rate": 0.0005658802147030847, "loss": 3.5878, "step": 51115 }, { "epoch": 3.4732980024459845, "grad_norm": 0.8626487851142883, "learning_rate": 0.0005658377496942519, "loss": 3.6068, "step": 51120 }, { "epoch": 3.473637722516646, "grad_norm": 1.3672266006469727, "learning_rate": 0.0005657952846854192, "loss": 3.5629, "step": 51125 }, { "epoch": 3.4739774425873082, "grad_norm": 0.9647884964942932, "learning_rate": 0.0005657528196765866, "loss": 3.5314, "step": 51130 }, { "epoch": 3.47431716265797, "grad_norm": 0.8028362989425659, "learning_rate": 0.0005657103546677538, "loss": 3.5949, "step": 51135 }, { "epoch": 3.4746568827286315, "grad_norm": 1.1468398571014404, "learning_rate": 0.000565667889658921, "loss": 3.4245, "step": 51140 }, { "epoch": 3.4749966027992936, "grad_norm": 0.8467814326286316, "learning_rate": 0.0005656254246500884, "loss": 3.5896, "step": 51145 }, { "epoch": 3.475336322869955, "grad_norm": 0.6715965867042542, "learning_rate": 0.0005655829596412556, "loss": 3.6635, "step": 51150 }, { "epoch": 3.475676042940617, "grad_norm": 1.0916974544525146, "learning_rate": 0.0005655404946324228, "loss": 3.4277, "step": 51155 }, { "epoch": 3.4760157630112785, "grad_norm": 0.8410515785217285, "learning_rate": 0.0005654980296235903, "loss": 3.6335, "step": 51160 }, { "epoch": 3.4763554830819405, "grad_norm": 0.8266076445579529, "learning_rate": 0.0005654555646147575, "loss": 3.3921, "step": 51165 }, { "epoch": 3.476695203152602, "grad_norm": 0.7660947442054749, "learning_rate": 0.0005654130996059247, "loss": 3.3394, "step": 51170 }, { "epoch": 3.477034923223264, "grad_norm": 0.8132218718528748, "learning_rate": 0.000565370634597092, "loss": 3.3233, "step": 51175 }, { "epoch": 3.477374643293926, "grad_norm": 0.9864709377288818, "learning_rate": 0.0005653281695882593, "loss": 3.6753, "step": 51180 }, { "epoch": 3.4777143633645875, "grad_norm": 0.8380324244499207, "learning_rate": 0.0005652857045794265, "loss": 3.7001, "step": 51185 }, { "epoch": 3.478054083435249, "grad_norm": 0.6473556160926819, "learning_rate": 0.0005652432395705938, "loss": 3.6258, "step": 51190 }, { "epoch": 3.478393803505911, "grad_norm": 0.7766239643096924, "learning_rate": 0.0005652007745617612, "loss": 3.5542, "step": 51195 }, { "epoch": 3.478733523576573, "grad_norm": 0.8013241291046143, "learning_rate": 0.0005651583095529284, "loss": 3.3974, "step": 51200 }, { "epoch": 3.4790732436472345, "grad_norm": 0.7030622363090515, "learning_rate": 0.0005651158445440957, "loss": 3.5066, "step": 51205 }, { "epoch": 3.4794129637178965, "grad_norm": 1.1094800233840942, "learning_rate": 0.000565073379535263, "loss": 3.1997, "step": 51210 }, { "epoch": 3.479752683788558, "grad_norm": 0.9074397683143616, "learning_rate": 0.0005650309145264302, "loss": 3.7866, "step": 51215 }, { "epoch": 3.48009240385922, "grad_norm": 1.0409674644470215, "learning_rate": 0.0005649884495175975, "loss": 3.5543, "step": 51220 }, { "epoch": 3.480432123929882, "grad_norm": 0.9041967988014221, "learning_rate": 0.0005649459845087647, "loss": 3.567, "step": 51225 }, { "epoch": 3.4807718440005435, "grad_norm": 1.072012186050415, "learning_rate": 0.0005649035194999321, "loss": 3.5292, "step": 51230 }, { "epoch": 3.481111564071205, "grad_norm": 0.9603410363197327, "learning_rate": 0.0005648610544910994, "loss": 3.6805, "step": 51235 }, { "epoch": 3.481451284141867, "grad_norm": 0.7793058753013611, "learning_rate": 0.0005648185894822666, "loss": 3.4742, "step": 51240 }, { "epoch": 3.481791004212529, "grad_norm": 0.9922502040863037, "learning_rate": 0.0005647761244734339, "loss": 3.3514, "step": 51245 }, { "epoch": 3.4821307242831905, "grad_norm": 0.9909048676490784, "learning_rate": 0.0005647336594646012, "loss": 3.377, "step": 51250 }, { "epoch": 3.4824704443538526, "grad_norm": 0.7987185716629028, "learning_rate": 0.0005646911944557684, "loss": 3.7446, "step": 51255 }, { "epoch": 3.482810164424514, "grad_norm": 0.8964027762413025, "learning_rate": 0.0005646487294469356, "loss": 3.5682, "step": 51260 }, { "epoch": 3.483149884495176, "grad_norm": 0.7689936757087708, "learning_rate": 0.0005646062644381031, "loss": 3.5389, "step": 51265 }, { "epoch": 3.483489604565838, "grad_norm": 0.8254878520965576, "learning_rate": 0.0005645637994292703, "loss": 3.8442, "step": 51270 }, { "epoch": 3.4838293246364995, "grad_norm": 0.8959947228431702, "learning_rate": 0.0005645213344204376, "loss": 3.3892, "step": 51275 }, { "epoch": 3.484169044707161, "grad_norm": 0.7640753388404846, "learning_rate": 0.0005644788694116049, "loss": 3.6694, "step": 51280 }, { "epoch": 3.4845087647778232, "grad_norm": 0.7767606377601624, "learning_rate": 0.0005644364044027721, "loss": 3.4289, "step": 51285 }, { "epoch": 3.484848484848485, "grad_norm": 1.109019160270691, "learning_rate": 0.0005643939393939394, "loss": 3.4552, "step": 51290 }, { "epoch": 3.4851882049191465, "grad_norm": 0.9672184586524963, "learning_rate": 0.0005643514743851067, "loss": 3.4566, "step": 51295 }, { "epoch": 3.4855279249898086, "grad_norm": 0.7632936239242554, "learning_rate": 0.000564309009376274, "loss": 3.4585, "step": 51300 }, { "epoch": 3.48586764506047, "grad_norm": 0.8213794827461243, "learning_rate": 0.0005642665443674413, "loss": 3.8212, "step": 51305 }, { "epoch": 3.486207365131132, "grad_norm": 0.9411114454269409, "learning_rate": 0.0005642240793586085, "loss": 3.3897, "step": 51310 }, { "epoch": 3.486547085201794, "grad_norm": 0.8775694370269775, "learning_rate": 0.0005641816143497758, "loss": 3.6353, "step": 51315 }, { "epoch": 3.4868868052724555, "grad_norm": 0.8697211146354675, "learning_rate": 0.0005641391493409431, "loss": 3.1997, "step": 51320 }, { "epoch": 3.487226525343117, "grad_norm": 0.9775707125663757, "learning_rate": 0.0005640966843321103, "loss": 3.6579, "step": 51325 }, { "epoch": 3.4875662454137792, "grad_norm": 0.840651273727417, "learning_rate": 0.0005640542193232776, "loss": 3.6888, "step": 51330 }, { "epoch": 3.487905965484441, "grad_norm": 0.7616641521453857, "learning_rate": 0.000564011754314445, "loss": 3.6693, "step": 51335 }, { "epoch": 3.4882456855551025, "grad_norm": 0.9862614274024963, "learning_rate": 0.0005639692893056122, "loss": 3.5066, "step": 51340 }, { "epoch": 3.4885854056257646, "grad_norm": 0.7202863097190857, "learning_rate": 0.0005639268242967795, "loss": 3.6784, "step": 51345 }, { "epoch": 3.488925125696426, "grad_norm": 0.8098672032356262, "learning_rate": 0.0005638843592879468, "loss": 3.613, "step": 51350 }, { "epoch": 3.489264845767088, "grad_norm": 0.7527655959129333, "learning_rate": 0.000563841894279114, "loss": 3.4663, "step": 51355 }, { "epoch": 3.48960456583775, "grad_norm": 0.8376598358154297, "learning_rate": 0.0005637994292702812, "loss": 3.3015, "step": 51360 }, { "epoch": 3.4899442859084115, "grad_norm": 0.8786569833755493, "learning_rate": 0.0005637569642614486, "loss": 3.6006, "step": 51365 }, { "epoch": 3.490284005979073, "grad_norm": 0.9088473916053772, "learning_rate": 0.0005637144992526159, "loss": 3.5948, "step": 51370 }, { "epoch": 3.4906237260497353, "grad_norm": 0.7834986448287964, "learning_rate": 0.0005636720342437831, "loss": 3.6424, "step": 51375 }, { "epoch": 3.490963446120397, "grad_norm": 0.8700459003448486, "learning_rate": 0.0005636295692349505, "loss": 3.5948, "step": 51380 }, { "epoch": 3.4913031661910585, "grad_norm": 0.8020461797714233, "learning_rate": 0.0005635871042261177, "loss": 3.4943, "step": 51385 }, { "epoch": 3.4916428862617206, "grad_norm": 0.9292786717414856, "learning_rate": 0.0005635446392172849, "loss": 3.3214, "step": 51390 }, { "epoch": 3.491982606332382, "grad_norm": 0.765859842300415, "learning_rate": 0.0005635021742084523, "loss": 3.635, "step": 51395 }, { "epoch": 3.492322326403044, "grad_norm": 0.9569457769393921, "learning_rate": 0.0005634597091996195, "loss": 3.5607, "step": 51400 }, { "epoch": 3.4926620464737055, "grad_norm": 0.9257526397705078, "learning_rate": 0.0005634172441907868, "loss": 3.5275, "step": 51405 }, { "epoch": 3.4930017665443676, "grad_norm": 1.1237123012542725, "learning_rate": 0.0005633747791819542, "loss": 3.5096, "step": 51410 }, { "epoch": 3.493341486615029, "grad_norm": 0.906174898147583, "learning_rate": 0.0005633323141731214, "loss": 3.3716, "step": 51415 }, { "epoch": 3.493681206685691, "grad_norm": 0.9369825720787048, "learning_rate": 0.0005632898491642886, "loss": 3.9223, "step": 51420 }, { "epoch": 3.494020926756353, "grad_norm": 0.9653894901275635, "learning_rate": 0.0005632473841554559, "loss": 3.6036, "step": 51425 }, { "epoch": 3.4943606468270145, "grad_norm": 0.8812628388404846, "learning_rate": 0.0005632049191466232, "loss": 3.5882, "step": 51430 }, { "epoch": 3.494700366897676, "grad_norm": 0.720146119594574, "learning_rate": 0.0005631624541377904, "loss": 3.7216, "step": 51435 }, { "epoch": 3.4950400869683382, "grad_norm": 0.7779410481452942, "learning_rate": 0.0005631199891289578, "loss": 3.3995, "step": 51440 }, { "epoch": 3.495379807039, "grad_norm": 0.9413473606109619, "learning_rate": 0.0005630775241201251, "loss": 3.4882, "step": 51445 }, { "epoch": 3.4957195271096615, "grad_norm": 0.9943309426307678, "learning_rate": 0.0005630350591112923, "loss": 3.3377, "step": 51450 }, { "epoch": 3.4960592471803236, "grad_norm": 1.2260239124298096, "learning_rate": 0.0005629925941024596, "loss": 3.5222, "step": 51455 }, { "epoch": 3.496398967250985, "grad_norm": 0.774575412273407, "learning_rate": 0.0005629501290936268, "loss": 3.6552, "step": 51460 }, { "epoch": 3.496738687321647, "grad_norm": 0.805324912071228, "learning_rate": 0.0005629076640847941, "loss": 3.4472, "step": 51465 }, { "epoch": 3.497078407392309, "grad_norm": 0.8504440188407898, "learning_rate": 0.0005628651990759614, "loss": 3.5397, "step": 51470 }, { "epoch": 3.4974181274629705, "grad_norm": 0.8804094791412354, "learning_rate": 0.0005628227340671287, "loss": 3.3276, "step": 51475 }, { "epoch": 3.497757847533632, "grad_norm": 0.7801376581192017, "learning_rate": 0.000562780269058296, "loss": 3.692, "step": 51480 }, { "epoch": 3.4980975676042942, "grad_norm": 0.9864916801452637, "learning_rate": 0.0005627378040494633, "loss": 3.3714, "step": 51485 }, { "epoch": 3.498437287674956, "grad_norm": 0.8376511931419373, "learning_rate": 0.0005626953390406305, "loss": 3.5077, "step": 51490 }, { "epoch": 3.4987770077456175, "grad_norm": 0.8168814182281494, "learning_rate": 0.0005626528740317977, "loss": 3.4695, "step": 51495 }, { "epoch": 3.499116727816279, "grad_norm": 0.7951503396034241, "learning_rate": 0.0005626104090229651, "loss": 3.3997, "step": 51500 }, { "epoch": 3.499456447886941, "grad_norm": 1.0576317310333252, "learning_rate": 0.0005625679440141323, "loss": 3.5215, "step": 51505 }, { "epoch": 3.499796167957603, "grad_norm": 0.7913901209831238, "learning_rate": 0.0005625254790052996, "loss": 3.7335, "step": 51510 }, { "epoch": 3.5001358880282645, "grad_norm": 0.8944619297981262, "learning_rate": 0.000562483013996467, "loss": 3.4656, "step": 51515 }, { "epoch": 3.5004756080989265, "grad_norm": 0.7629538178443909, "learning_rate": 0.0005624405489876342, "loss": 3.2441, "step": 51520 }, { "epoch": 3.500815328169588, "grad_norm": 0.8104501962661743, "learning_rate": 0.0005623980839788014, "loss": 3.3974, "step": 51525 }, { "epoch": 3.50115504824025, "grad_norm": 0.841345489025116, "learning_rate": 0.0005623556189699688, "loss": 3.542, "step": 51530 }, { "epoch": 3.501494768310912, "grad_norm": 0.7238110899925232, "learning_rate": 0.000562313153961136, "loss": 3.2091, "step": 51535 }, { "epoch": 3.5018344883815735, "grad_norm": 0.8902817368507385, "learning_rate": 0.0005622706889523032, "loss": 3.5345, "step": 51540 }, { "epoch": 3.502174208452235, "grad_norm": 1.0520728826522827, "learning_rate": 0.0005622282239434707, "loss": 3.4738, "step": 51545 }, { "epoch": 3.5025139285228972, "grad_norm": 0.8984050154685974, "learning_rate": 0.0005621857589346379, "loss": 3.5549, "step": 51550 }, { "epoch": 3.502853648593559, "grad_norm": 0.8637600541114807, "learning_rate": 0.0005621432939258051, "loss": 3.6015, "step": 51555 }, { "epoch": 3.5031933686642205, "grad_norm": 0.8415752053260803, "learning_rate": 0.0005621008289169724, "loss": 3.3714, "step": 51560 }, { "epoch": 3.5035330887348826, "grad_norm": 0.7472608089447021, "learning_rate": 0.0005620583639081397, "loss": 3.5942, "step": 51565 }, { "epoch": 3.503872808805544, "grad_norm": 0.7850767374038696, "learning_rate": 0.0005620158988993069, "loss": 3.2448, "step": 51570 }, { "epoch": 3.504212528876206, "grad_norm": 1.0866000652313232, "learning_rate": 0.0005619734338904742, "loss": 3.7759, "step": 51575 }, { "epoch": 3.504552248946868, "grad_norm": 0.9364721179008484, "learning_rate": 0.0005619309688816416, "loss": 3.4446, "step": 51580 }, { "epoch": 3.5048919690175295, "grad_norm": 0.7977840304374695, "learning_rate": 0.0005618885038728088, "loss": 3.4841, "step": 51585 }, { "epoch": 3.505231689088191, "grad_norm": 0.769893229007721, "learning_rate": 0.0005618460388639761, "loss": 3.3831, "step": 51590 }, { "epoch": 3.5055714091588532, "grad_norm": 1.0044397115707397, "learning_rate": 0.0005618035738551434, "loss": 3.8222, "step": 51595 }, { "epoch": 3.505911129229515, "grad_norm": 0.9172252416610718, "learning_rate": 0.0005617611088463106, "loss": 3.2412, "step": 51600 }, { "epoch": 3.5062508493001765, "grad_norm": 0.7814190983772278, "learning_rate": 0.0005617186438374779, "loss": 3.1614, "step": 51605 }, { "epoch": 3.5065905693708386, "grad_norm": 0.8668371438980103, "learning_rate": 0.0005616761788286451, "loss": 3.3322, "step": 51610 }, { "epoch": 3.5069302894415, "grad_norm": 0.7890636920928955, "learning_rate": 0.0005616337138198125, "loss": 3.2095, "step": 51615 }, { "epoch": 3.507270009512162, "grad_norm": 0.8138740658760071, "learning_rate": 0.0005615912488109798, "loss": 3.7045, "step": 51620 }, { "epoch": 3.507609729582824, "grad_norm": 1.4448821544647217, "learning_rate": 0.000561548783802147, "loss": 3.8606, "step": 51625 }, { "epoch": 3.5079494496534855, "grad_norm": 1.2234591245651245, "learning_rate": 0.0005615063187933144, "loss": 3.5153, "step": 51630 }, { "epoch": 3.508289169724147, "grad_norm": 0.8405647873878479, "learning_rate": 0.0005614638537844816, "loss": 3.4182, "step": 51635 }, { "epoch": 3.5086288897948092, "grad_norm": 1.1307792663574219, "learning_rate": 0.0005614213887756488, "loss": 3.7185, "step": 51640 }, { "epoch": 3.508968609865471, "grad_norm": 0.9317634105682373, "learning_rate": 0.0005613789237668163, "loss": 3.6125, "step": 51645 }, { "epoch": 3.5093083299361325, "grad_norm": 0.9644767045974731, "learning_rate": 0.0005613364587579835, "loss": 3.7728, "step": 51650 }, { "epoch": 3.5096480500067946, "grad_norm": 0.8064932823181152, "learning_rate": 0.0005612939937491507, "loss": 3.3894, "step": 51655 }, { "epoch": 3.509987770077456, "grad_norm": 0.8022134900093079, "learning_rate": 0.000561251528740318, "loss": 3.4637, "step": 51660 }, { "epoch": 3.510327490148118, "grad_norm": 0.6569642424583435, "learning_rate": 0.0005612090637314853, "loss": 3.4312, "step": 51665 }, { "epoch": 3.51066721021878, "grad_norm": 0.7643089890480042, "learning_rate": 0.0005611665987226525, "loss": 3.5662, "step": 51670 }, { "epoch": 3.5110069302894416, "grad_norm": 0.8506712913513184, "learning_rate": 0.0005611241337138198, "loss": 3.558, "step": 51675 }, { "epoch": 3.511346650360103, "grad_norm": 0.9346274733543396, "learning_rate": 0.0005610816687049872, "loss": 3.4561, "step": 51680 }, { "epoch": 3.5116863704307653, "grad_norm": 0.6653513312339783, "learning_rate": 0.0005610392036961544, "loss": 3.5411, "step": 51685 }, { "epoch": 3.512026090501427, "grad_norm": 0.8930595517158508, "learning_rate": 0.0005609967386873217, "loss": 3.3545, "step": 51690 }, { "epoch": 3.5123658105720885, "grad_norm": 1.024877667427063, "learning_rate": 0.000560954273678489, "loss": 3.7135, "step": 51695 }, { "epoch": 3.5127055306427506, "grad_norm": 1.0778228044509888, "learning_rate": 0.0005609118086696562, "loss": 3.7303, "step": 51700 }, { "epoch": 3.5130452507134122, "grad_norm": 0.9148048758506775, "learning_rate": 0.0005608693436608235, "loss": 3.3472, "step": 51705 }, { "epoch": 3.513384970784074, "grad_norm": 0.7239642143249512, "learning_rate": 0.0005608268786519907, "loss": 3.618, "step": 51710 }, { "epoch": 3.513724690854736, "grad_norm": 0.8533495664596558, "learning_rate": 0.0005607844136431581, "loss": 3.3132, "step": 51715 }, { "epoch": 3.5140644109253976, "grad_norm": 0.9184256196022034, "learning_rate": 0.0005607419486343254, "loss": 3.581, "step": 51720 }, { "epoch": 3.514404130996059, "grad_norm": 0.7706084847450256, "learning_rate": 0.0005606994836254926, "loss": 3.3683, "step": 51725 }, { "epoch": 3.5147438510667213, "grad_norm": 0.9520463347434998, "learning_rate": 0.0005606570186166599, "loss": 3.2902, "step": 51730 }, { "epoch": 3.515083571137383, "grad_norm": 0.7837512493133545, "learning_rate": 0.0005606145536078272, "loss": 3.4524, "step": 51735 }, { "epoch": 3.5154232912080445, "grad_norm": 0.9515770077705383, "learning_rate": 0.0005605720885989944, "loss": 3.5433, "step": 51740 }, { "epoch": 3.5157630112787066, "grad_norm": 1.1817463636398315, "learning_rate": 0.0005605296235901616, "loss": 3.6943, "step": 51745 }, { "epoch": 3.5161027313493682, "grad_norm": 0.9184385538101196, "learning_rate": 0.0005604871585813291, "loss": 3.843, "step": 51750 }, { "epoch": 3.51644245142003, "grad_norm": 0.8847455978393555, "learning_rate": 0.0005604446935724963, "loss": 3.5419, "step": 51755 }, { "epoch": 3.516782171490692, "grad_norm": 0.8379064798355103, "learning_rate": 0.0005604022285636635, "loss": 3.5398, "step": 51760 }, { "epoch": 3.5171218915613536, "grad_norm": 0.7975239157676697, "learning_rate": 0.0005603597635548309, "loss": 3.4863, "step": 51765 }, { "epoch": 3.517461611632015, "grad_norm": 0.9701249599456787, "learning_rate": 0.0005603172985459981, "loss": 3.5517, "step": 51770 }, { "epoch": 3.517801331702677, "grad_norm": 0.9126126766204834, "learning_rate": 0.0005602748335371653, "loss": 3.4602, "step": 51775 }, { "epoch": 3.518141051773339, "grad_norm": 0.9058125615119934, "learning_rate": 0.0005602323685283327, "loss": 3.5894, "step": 51780 }, { "epoch": 3.5184807718440005, "grad_norm": 1.140618920326233, "learning_rate": 0.0005601899035195, "loss": 3.3426, "step": 51785 }, { "epoch": 3.518820491914662, "grad_norm": 0.9155729413032532, "learning_rate": 0.0005601474385106672, "loss": 3.7154, "step": 51790 }, { "epoch": 3.5191602119853242, "grad_norm": 0.9906036853790283, "learning_rate": 0.0005601049735018346, "loss": 3.298, "step": 51795 }, { "epoch": 3.519499932055986, "grad_norm": 0.9347759485244751, "learning_rate": 0.0005600625084930018, "loss": 3.3577, "step": 51800 }, { "epoch": 3.5198396521266475, "grad_norm": 0.7696252465248108, "learning_rate": 0.000560020043484169, "loss": 3.7412, "step": 51805 }, { "epoch": 3.520179372197309, "grad_norm": 0.7300373315811157, "learning_rate": 0.0005599775784753363, "loss": 3.7743, "step": 51810 }, { "epoch": 3.520519092267971, "grad_norm": 0.9151380062103271, "learning_rate": 0.0005599351134665036, "loss": 3.4998, "step": 51815 }, { "epoch": 3.520858812338633, "grad_norm": 0.919293999671936, "learning_rate": 0.0005598926484576709, "loss": 3.3316, "step": 51820 }, { "epoch": 3.5211985324092945, "grad_norm": 0.813542366027832, "learning_rate": 0.0005598501834488382, "loss": 3.406, "step": 51825 }, { "epoch": 3.5215382524799566, "grad_norm": 0.918489933013916, "learning_rate": 0.0005598077184400055, "loss": 3.5884, "step": 51830 }, { "epoch": 3.521877972550618, "grad_norm": 0.8201988935470581, "learning_rate": 0.0005597652534311727, "loss": 3.7181, "step": 51835 }, { "epoch": 3.52221769262128, "grad_norm": 1.016832709312439, "learning_rate": 0.00055972278842234, "loss": 3.306, "step": 51840 }, { "epoch": 3.522557412691942, "grad_norm": 0.9122205972671509, "learning_rate": 0.0005596803234135072, "loss": 3.4174, "step": 51845 }, { "epoch": 3.5228971327626035, "grad_norm": 0.9363826513290405, "learning_rate": 0.0005596378584046745, "loss": 3.5525, "step": 51850 }, { "epoch": 3.523236852833265, "grad_norm": 0.806104302406311, "learning_rate": 0.0005595953933958419, "loss": 3.4359, "step": 51855 }, { "epoch": 3.5235765729039272, "grad_norm": 0.9297754764556885, "learning_rate": 0.0005595529283870091, "loss": 3.6022, "step": 51860 }, { "epoch": 3.523916292974589, "grad_norm": 0.7797461152076721, "learning_rate": 0.0005595104633781764, "loss": 3.4858, "step": 51865 }, { "epoch": 3.5242560130452505, "grad_norm": 0.7945965528488159, "learning_rate": 0.0005594679983693437, "loss": 3.3539, "step": 51870 }, { "epoch": 3.5245957331159126, "grad_norm": 0.8169232606887817, "learning_rate": 0.0005594255333605109, "loss": 3.6442, "step": 51875 }, { "epoch": 3.524935453186574, "grad_norm": 0.9521108269691467, "learning_rate": 0.0005593830683516782, "loss": 3.2598, "step": 51880 }, { "epoch": 3.525275173257236, "grad_norm": 0.7855775356292725, "learning_rate": 0.0005593406033428455, "loss": 3.2925, "step": 51885 }, { "epoch": 3.525614893327898, "grad_norm": 0.804915189743042, "learning_rate": 0.0005592981383340128, "loss": 3.665, "step": 51890 }, { "epoch": 3.5259546133985595, "grad_norm": 0.8565942049026489, "learning_rate": 0.00055925567332518, "loss": 3.5115, "step": 51895 }, { "epoch": 3.526294333469221, "grad_norm": 0.9097862243652344, "learning_rate": 0.0005592132083163474, "loss": 3.5318, "step": 51900 }, { "epoch": 3.5266340535398832, "grad_norm": 0.8703302145004272, "learning_rate": 0.0005591707433075146, "loss": 3.6007, "step": 51905 }, { "epoch": 3.526973773610545, "grad_norm": 0.8829666376113892, "learning_rate": 0.0005591282782986818, "loss": 3.6236, "step": 51910 }, { "epoch": 3.5273134936812065, "grad_norm": 1.082809329032898, "learning_rate": 0.0005590858132898492, "loss": 3.6727, "step": 51915 }, { "epoch": 3.5276532137518686, "grad_norm": 0.8204389810562134, "learning_rate": 0.0005590433482810164, "loss": 3.2575, "step": 51920 }, { "epoch": 3.52799293382253, "grad_norm": 0.8753173351287842, "learning_rate": 0.0005590008832721837, "loss": 3.5779, "step": 51925 }, { "epoch": 3.528332653893192, "grad_norm": 0.7334783673286438, "learning_rate": 0.0005589584182633511, "loss": 3.7181, "step": 51930 }, { "epoch": 3.528672373963854, "grad_norm": 0.8596217632293701, "learning_rate": 0.0005589159532545183, "loss": 3.4511, "step": 51935 }, { "epoch": 3.5290120940345155, "grad_norm": 1.0429655313491821, "learning_rate": 0.0005588734882456855, "loss": 3.4838, "step": 51940 }, { "epoch": 3.529351814105177, "grad_norm": 1.1335219144821167, "learning_rate": 0.0005588310232368528, "loss": 3.4396, "step": 51945 }, { "epoch": 3.5296915341758393, "grad_norm": 0.9223446249961853, "learning_rate": 0.0005587885582280201, "loss": 3.4127, "step": 51950 }, { "epoch": 3.530031254246501, "grad_norm": 0.8735531568527222, "learning_rate": 0.0005587460932191873, "loss": 3.4319, "step": 51955 }, { "epoch": 3.5303709743171625, "grad_norm": 1.1868902444839478, "learning_rate": 0.0005587036282103547, "loss": 3.5537, "step": 51960 }, { "epoch": 3.5307106943878246, "grad_norm": 0.8958865404129028, "learning_rate": 0.000558661163201522, "loss": 3.4669, "step": 51965 }, { "epoch": 3.531050414458486, "grad_norm": 0.7688196897506714, "learning_rate": 0.0005586186981926893, "loss": 3.6802, "step": 51970 }, { "epoch": 3.531390134529148, "grad_norm": 0.828703761100769, "learning_rate": 0.0005585762331838565, "loss": 3.1564, "step": 51975 }, { "epoch": 3.53172985459981, "grad_norm": 0.8530594110488892, "learning_rate": 0.0005585337681750238, "loss": 3.3192, "step": 51980 }, { "epoch": 3.5320695746704716, "grad_norm": 0.9764853119850159, "learning_rate": 0.0005584913031661911, "loss": 3.6971, "step": 51985 }, { "epoch": 3.532409294741133, "grad_norm": 0.8271609544754028, "learning_rate": 0.0005584488381573583, "loss": 3.5326, "step": 51990 }, { "epoch": 3.5327490148117953, "grad_norm": 1.083512783050537, "learning_rate": 0.0005584063731485256, "loss": 3.6448, "step": 51995 }, { "epoch": 3.533088734882457, "grad_norm": 1.0156619548797607, "learning_rate": 0.000558363908139693, "loss": 3.5835, "step": 52000 }, { "epoch": 3.5334284549531185, "grad_norm": 0.9077045321464539, "learning_rate": 0.0005583214431308602, "loss": 3.8323, "step": 52005 }, { "epoch": 3.5337681750237806, "grad_norm": 0.9944930672645569, "learning_rate": 0.0005582789781220274, "loss": 3.2179, "step": 52010 }, { "epoch": 3.5341078950944422, "grad_norm": 0.9028391242027283, "learning_rate": 0.0005582365131131948, "loss": 3.5602, "step": 52015 }, { "epoch": 3.534447615165104, "grad_norm": 0.8399282097816467, "learning_rate": 0.000558194048104362, "loss": 3.4665, "step": 52020 }, { "epoch": 3.534787335235766, "grad_norm": 0.73624587059021, "learning_rate": 0.0005581515830955292, "loss": 3.5786, "step": 52025 }, { "epoch": 3.5351270553064276, "grad_norm": 0.8361068964004517, "learning_rate": 0.0005581091180866967, "loss": 3.7046, "step": 52030 }, { "epoch": 3.535466775377089, "grad_norm": 1.0157029628753662, "learning_rate": 0.0005580666530778639, "loss": 3.6703, "step": 52035 }, { "epoch": 3.5358064954477513, "grad_norm": 1.007638931274414, "learning_rate": 0.0005580241880690311, "loss": 3.5216, "step": 52040 }, { "epoch": 3.536146215518413, "grad_norm": 0.7872365713119507, "learning_rate": 0.0005579817230601984, "loss": 3.6176, "step": 52045 }, { "epoch": 3.5364859355890745, "grad_norm": 0.8825936913490295, "learning_rate": 0.0005579392580513657, "loss": 3.5919, "step": 52050 }, { "epoch": 3.5368256556597366, "grad_norm": 0.9555568695068359, "learning_rate": 0.0005578967930425329, "loss": 3.6951, "step": 52055 }, { "epoch": 3.5371653757303982, "grad_norm": 0.9356720447540283, "learning_rate": 0.0005578543280337002, "loss": 3.4531, "step": 52060 }, { "epoch": 3.53750509580106, "grad_norm": 0.8546147346496582, "learning_rate": 0.0005578118630248676, "loss": 3.4239, "step": 52065 }, { "epoch": 3.537844815871722, "grad_norm": 1.025626540184021, "learning_rate": 0.0005577693980160348, "loss": 3.3635, "step": 52070 }, { "epoch": 3.5381845359423836, "grad_norm": 0.9694139361381531, "learning_rate": 0.0005577269330072021, "loss": 3.6169, "step": 52075 }, { "epoch": 3.538524256013045, "grad_norm": 1.0546377897262573, "learning_rate": 0.0005576844679983694, "loss": 3.6338, "step": 52080 }, { "epoch": 3.5388639760837073, "grad_norm": 1.0432416200637817, "learning_rate": 0.0005576420029895366, "loss": 3.3604, "step": 52085 }, { "epoch": 3.539203696154369, "grad_norm": 1.0540459156036377, "learning_rate": 0.0005575995379807039, "loss": 3.3928, "step": 52090 }, { "epoch": 3.5395434162250305, "grad_norm": 1.0021926164627075, "learning_rate": 0.0005575570729718711, "loss": 3.8106, "step": 52095 }, { "epoch": 3.5398831362956926, "grad_norm": 0.8311357498168945, "learning_rate": 0.0005575146079630385, "loss": 3.4701, "step": 52100 }, { "epoch": 3.5402228563663543, "grad_norm": 0.6501753330230713, "learning_rate": 0.0005574721429542058, "loss": 3.7566, "step": 52105 }, { "epoch": 3.540562576437016, "grad_norm": 0.8300336003303528, "learning_rate": 0.000557429677945373, "loss": 3.2969, "step": 52110 }, { "epoch": 3.5409022965076775, "grad_norm": 0.9344176650047302, "learning_rate": 0.0005573872129365403, "loss": 3.7353, "step": 52115 }, { "epoch": 3.5412420165783396, "grad_norm": 0.8325251936912537, "learning_rate": 0.0005573447479277076, "loss": 3.4615, "step": 52120 }, { "epoch": 3.541581736649001, "grad_norm": 0.7268164157867432, "learning_rate": 0.0005573022829188748, "loss": 3.3834, "step": 52125 }, { "epoch": 3.541921456719663, "grad_norm": 0.9894169569015503, "learning_rate": 0.000557259817910042, "loss": 3.5761, "step": 52130 }, { "epoch": 3.542261176790325, "grad_norm": 0.9691335558891296, "learning_rate": 0.0005572173529012095, "loss": 3.5849, "step": 52135 }, { "epoch": 3.5426008968609866, "grad_norm": 0.7903117537498474, "learning_rate": 0.0005571748878923767, "loss": 3.4318, "step": 52140 }, { "epoch": 3.542940616931648, "grad_norm": 0.7900323271751404, "learning_rate": 0.0005571324228835439, "loss": 3.588, "step": 52145 }, { "epoch": 3.54328033700231, "grad_norm": 0.6701492667198181, "learning_rate": 0.0005570899578747113, "loss": 3.447, "step": 52150 }, { "epoch": 3.543620057072972, "grad_norm": 0.9259559512138367, "learning_rate": 0.0005570474928658785, "loss": 3.7044, "step": 52155 }, { "epoch": 3.5439597771436335, "grad_norm": 0.9695510864257812, "learning_rate": 0.0005570050278570457, "loss": 3.656, "step": 52160 }, { "epoch": 3.544299497214295, "grad_norm": 0.7857998609542847, "learning_rate": 0.0005569625628482131, "loss": 3.5088, "step": 52165 }, { "epoch": 3.5446392172849572, "grad_norm": 0.8709975481033325, "learning_rate": 0.0005569200978393804, "loss": 3.4101, "step": 52170 }, { "epoch": 3.544978937355619, "grad_norm": 0.9217280149459839, "learning_rate": 0.0005568776328305476, "loss": 3.3223, "step": 52175 }, { "epoch": 3.5453186574262805, "grad_norm": 0.8068895936012268, "learning_rate": 0.000556835167821715, "loss": 3.3833, "step": 52180 }, { "epoch": 3.5456583774969426, "grad_norm": 0.7545412182807922, "learning_rate": 0.0005567927028128822, "loss": 3.0967, "step": 52185 }, { "epoch": 3.545998097567604, "grad_norm": 0.7863417267799377, "learning_rate": 0.0005567502378040494, "loss": 3.5885, "step": 52190 }, { "epoch": 3.546337817638266, "grad_norm": 0.8601597547531128, "learning_rate": 0.0005567077727952167, "loss": 3.2991, "step": 52195 }, { "epoch": 3.546677537708928, "grad_norm": 1.1049659252166748, "learning_rate": 0.000556665307786384, "loss": 3.3829, "step": 52200 }, { "epoch": 3.5470172577795895, "grad_norm": 1.1074718236923218, "learning_rate": 0.0005566228427775513, "loss": 3.4737, "step": 52205 }, { "epoch": 3.547356977850251, "grad_norm": 0.7934736609458923, "learning_rate": 0.0005565803777687186, "loss": 3.5343, "step": 52210 }, { "epoch": 3.5476966979209132, "grad_norm": 0.9247459173202515, "learning_rate": 0.0005565379127598859, "loss": 3.6102, "step": 52215 }, { "epoch": 3.548036417991575, "grad_norm": 0.8647847175598145, "learning_rate": 0.0005564954477510531, "loss": 3.5223, "step": 52220 }, { "epoch": 3.5483761380622365, "grad_norm": 1.0227409601211548, "learning_rate": 0.0005564529827422204, "loss": 3.4552, "step": 52225 }, { "epoch": 3.5487158581328986, "grad_norm": 0.9131997227668762, "learning_rate": 0.0005564105177333876, "loss": 3.6005, "step": 52230 }, { "epoch": 3.54905557820356, "grad_norm": 0.7534160614013672, "learning_rate": 0.0005563680527245549, "loss": 3.6412, "step": 52235 }, { "epoch": 3.549395298274222, "grad_norm": 0.873165488243103, "learning_rate": 0.0005563255877157223, "loss": 3.5216, "step": 52240 }, { "epoch": 3.549735018344884, "grad_norm": 0.9282477498054504, "learning_rate": 0.0005562831227068895, "loss": 3.5907, "step": 52245 }, { "epoch": 3.5500747384155455, "grad_norm": 0.7818275094032288, "learning_rate": 0.0005562406576980568, "loss": 3.5158, "step": 52250 }, { "epoch": 3.550414458486207, "grad_norm": 0.7446109056472778, "learning_rate": 0.0005561981926892241, "loss": 3.4593, "step": 52255 }, { "epoch": 3.5507541785568693, "grad_norm": 1.063023328781128, "learning_rate": 0.0005561557276803913, "loss": 3.3804, "step": 52260 }, { "epoch": 3.551093898627531, "grad_norm": 0.8629617094993591, "learning_rate": 0.0005561132626715586, "loss": 3.4822, "step": 52265 }, { "epoch": 3.5514336186981925, "grad_norm": 0.7897460460662842, "learning_rate": 0.000556070797662726, "loss": 3.4463, "step": 52270 }, { "epoch": 3.5517733387688546, "grad_norm": 0.7240408062934875, "learning_rate": 0.0005560283326538932, "loss": 3.7213, "step": 52275 }, { "epoch": 3.5521130588395162, "grad_norm": 1.0587586164474487, "learning_rate": 0.0005559858676450605, "loss": 3.5715, "step": 52280 }, { "epoch": 3.552452778910178, "grad_norm": 0.9337775111198425, "learning_rate": 0.0005559434026362278, "loss": 3.5866, "step": 52285 }, { "epoch": 3.55279249898084, "grad_norm": 1.1152263879776, "learning_rate": 0.000555900937627395, "loss": 3.5777, "step": 52290 }, { "epoch": 3.5531322190515016, "grad_norm": 1.1544393301010132, "learning_rate": 0.0005558584726185622, "loss": 3.8881, "step": 52295 }, { "epoch": 3.553471939122163, "grad_norm": 0.8033807277679443, "learning_rate": 0.0005558160076097296, "loss": 3.3601, "step": 52300 }, { "epoch": 3.5538116591928253, "grad_norm": 0.8602167963981628, "learning_rate": 0.0005557735426008969, "loss": 3.7696, "step": 52305 }, { "epoch": 3.554151379263487, "grad_norm": 0.8778760433197021, "learning_rate": 0.0005557310775920642, "loss": 3.4121, "step": 52310 }, { "epoch": 3.5544910993341485, "grad_norm": 0.734488308429718, "learning_rate": 0.0005556886125832315, "loss": 3.4768, "step": 52315 }, { "epoch": 3.5548308194048106, "grad_norm": 0.9240022897720337, "learning_rate": 0.0005556461475743987, "loss": 3.5798, "step": 52320 }, { "epoch": 3.5551705394754722, "grad_norm": 0.6525583267211914, "learning_rate": 0.000555603682565566, "loss": 3.6312, "step": 52325 }, { "epoch": 3.555510259546134, "grad_norm": 0.9384711384773254, "learning_rate": 0.0005555612175567333, "loss": 3.5772, "step": 52330 }, { "epoch": 3.555849979616796, "grad_norm": 1.2145386934280396, "learning_rate": 0.0005555187525479005, "loss": 3.6883, "step": 52335 }, { "epoch": 3.5561896996874576, "grad_norm": 0.8730262517929077, "learning_rate": 0.0005554762875390679, "loss": 3.4113, "step": 52340 }, { "epoch": 3.556529419758119, "grad_norm": 1.1078976392745972, "learning_rate": 0.0005554338225302351, "loss": 3.1874, "step": 52345 }, { "epoch": 3.5568691398287813, "grad_norm": 0.7804155945777893, "learning_rate": 0.0005553913575214024, "loss": 3.4113, "step": 52350 }, { "epoch": 3.557208859899443, "grad_norm": 1.106882095336914, "learning_rate": 0.0005553488925125697, "loss": 3.556, "step": 52355 }, { "epoch": 3.5575485799701045, "grad_norm": 0.8432283997535706, "learning_rate": 0.0005553064275037369, "loss": 3.5888, "step": 52360 }, { "epoch": 3.5578883000407666, "grad_norm": 0.8163414001464844, "learning_rate": 0.0005552639624949042, "loss": 3.4466, "step": 52365 }, { "epoch": 3.5582280201114282, "grad_norm": 0.9727712273597717, "learning_rate": 0.0005552214974860715, "loss": 3.4272, "step": 52370 }, { "epoch": 3.55856774018209, "grad_norm": 0.8184770345687866, "learning_rate": 0.0005551790324772388, "loss": 3.4395, "step": 52375 }, { "epoch": 3.558907460252752, "grad_norm": 0.7420095801353455, "learning_rate": 0.000555136567468406, "loss": 3.531, "step": 52380 }, { "epoch": 3.5592471803234136, "grad_norm": 0.7178859710693359, "learning_rate": 0.0005550941024595734, "loss": 3.3636, "step": 52385 }, { "epoch": 3.559586900394075, "grad_norm": 1.0624867677688599, "learning_rate": 0.0005550516374507406, "loss": 3.3615, "step": 52390 }, { "epoch": 3.5599266204647373, "grad_norm": 1.3558964729309082, "learning_rate": 0.0005550091724419078, "loss": 3.3387, "step": 52395 }, { "epoch": 3.560266340535399, "grad_norm": 1.0010141134262085, "learning_rate": 0.0005549667074330752, "loss": 3.5343, "step": 52400 }, { "epoch": 3.5606060606060606, "grad_norm": 0.847794771194458, "learning_rate": 0.0005549242424242424, "loss": 3.395, "step": 52405 }, { "epoch": 3.5609457806767226, "grad_norm": 0.7556495666503906, "learning_rate": 0.0005548817774154097, "loss": 3.532, "step": 52410 }, { "epoch": 3.5612855007473843, "grad_norm": 0.8140825629234314, "learning_rate": 0.0005548393124065771, "loss": 3.573, "step": 52415 }, { "epoch": 3.561625220818046, "grad_norm": 0.8537191152572632, "learning_rate": 0.0005547968473977443, "loss": 3.6089, "step": 52420 }, { "epoch": 3.561964940888708, "grad_norm": 0.8600668907165527, "learning_rate": 0.0005547543823889115, "loss": 3.6491, "step": 52425 }, { "epoch": 3.5623046609593696, "grad_norm": 0.8466275334358215, "learning_rate": 0.0005547119173800789, "loss": 3.3413, "step": 52430 }, { "epoch": 3.5626443810300312, "grad_norm": 0.8884152770042419, "learning_rate": 0.0005546694523712461, "loss": 3.7404, "step": 52435 }, { "epoch": 3.5629841011006933, "grad_norm": 1.2633672952651978, "learning_rate": 0.0005546269873624133, "loss": 3.2408, "step": 52440 }, { "epoch": 3.563323821171355, "grad_norm": 1.0530993938446045, "learning_rate": 0.0005545845223535807, "loss": 3.4943, "step": 52445 }, { "epoch": 3.5636635412420166, "grad_norm": 1.009983777999878, "learning_rate": 0.000554542057344748, "loss": 3.3403, "step": 52450 }, { "epoch": 3.564003261312678, "grad_norm": 1.1613073348999023, "learning_rate": 0.0005544995923359152, "loss": 3.39, "step": 52455 }, { "epoch": 3.5643429813833403, "grad_norm": 0.8669741153717041, "learning_rate": 0.0005544571273270825, "loss": 3.6979, "step": 52460 }, { "epoch": 3.564682701454002, "grad_norm": 0.8397752046585083, "learning_rate": 0.0005544146623182498, "loss": 3.3534, "step": 52465 }, { "epoch": 3.5650224215246635, "grad_norm": 0.9142210483551025, "learning_rate": 0.000554372197309417, "loss": 3.4304, "step": 52470 }, { "epoch": 3.5653621415953256, "grad_norm": 1.0372178554534912, "learning_rate": 0.0005543297323005843, "loss": 3.5681, "step": 52475 }, { "epoch": 3.5657018616659872, "grad_norm": 0.9111691117286682, "learning_rate": 0.0005542872672917517, "loss": 3.5819, "step": 52480 }, { "epoch": 3.566041581736649, "grad_norm": 0.8053564429283142, "learning_rate": 0.0005542448022829189, "loss": 3.6201, "step": 52485 }, { "epoch": 3.5663813018073105, "grad_norm": 0.9566690325737, "learning_rate": 0.0005542023372740862, "loss": 3.5511, "step": 52490 }, { "epoch": 3.5667210218779726, "grad_norm": 0.8295654654502869, "learning_rate": 0.0005541598722652534, "loss": 3.7962, "step": 52495 }, { "epoch": 3.567060741948634, "grad_norm": 0.7827464938163757, "learning_rate": 0.0005541174072564207, "loss": 3.3857, "step": 52500 }, { "epoch": 3.567400462019296, "grad_norm": 0.7262658476829529, "learning_rate": 0.000554074942247588, "loss": 3.3328, "step": 52505 }, { "epoch": 3.567740182089958, "grad_norm": 0.6547884345054626, "learning_rate": 0.0005540324772387552, "loss": 3.3839, "step": 52510 }, { "epoch": 3.5680799021606195, "grad_norm": 0.7867702841758728, "learning_rate": 0.0005539900122299226, "loss": 3.2648, "step": 52515 }, { "epoch": 3.568419622231281, "grad_norm": 0.8483701348304749, "learning_rate": 0.0005539475472210899, "loss": 3.6283, "step": 52520 }, { "epoch": 3.5687593423019432, "grad_norm": 1.0494918823242188, "learning_rate": 0.0005539050822122571, "loss": 3.8617, "step": 52525 }, { "epoch": 3.569099062372605, "grad_norm": 0.8794864416122437, "learning_rate": 0.0005538626172034243, "loss": 3.4755, "step": 52530 }, { "epoch": 3.5694387824432665, "grad_norm": 0.9872075319290161, "learning_rate": 0.0005538201521945917, "loss": 3.2696, "step": 52535 }, { "epoch": 3.5697785025139286, "grad_norm": 0.994526743888855, "learning_rate": 0.0005537776871857589, "loss": 3.9037, "step": 52540 }, { "epoch": 3.57011822258459, "grad_norm": 1.0126667022705078, "learning_rate": 0.0005537352221769261, "loss": 3.5087, "step": 52545 }, { "epoch": 3.570457942655252, "grad_norm": 0.9972997307777405, "learning_rate": 0.0005536927571680936, "loss": 3.5819, "step": 52550 }, { "epoch": 3.570797662725914, "grad_norm": 0.9009750485420227, "learning_rate": 0.0005536502921592608, "loss": 3.5678, "step": 52555 }, { "epoch": 3.5711373827965756, "grad_norm": 0.9232698678970337, "learning_rate": 0.000553607827150428, "loss": 3.4753, "step": 52560 }, { "epoch": 3.571477102867237, "grad_norm": 0.7926644086837769, "learning_rate": 0.0005535653621415954, "loss": 3.3699, "step": 52565 }, { "epoch": 3.5718168229378993, "grad_norm": 0.795653760433197, "learning_rate": 0.0005535228971327626, "loss": 3.5941, "step": 52570 }, { "epoch": 3.572156543008561, "grad_norm": 0.9408392310142517, "learning_rate": 0.0005534804321239298, "loss": 3.7129, "step": 52575 }, { "epoch": 3.5724962630792225, "grad_norm": 0.8918939828872681, "learning_rate": 0.0005534379671150971, "loss": 3.5735, "step": 52580 }, { "epoch": 3.5728359831498846, "grad_norm": 0.8696001768112183, "learning_rate": 0.0005533955021062645, "loss": 3.7956, "step": 52585 }, { "epoch": 3.5731757032205462, "grad_norm": 0.862492561340332, "learning_rate": 0.0005533530370974317, "loss": 3.6723, "step": 52590 }, { "epoch": 3.573515423291208, "grad_norm": 1.1803942918777466, "learning_rate": 0.000553310572088599, "loss": 3.4395, "step": 52595 }, { "epoch": 3.57385514336187, "grad_norm": 0.8333913683891296, "learning_rate": 0.0005532681070797663, "loss": 3.5296, "step": 52600 }, { "epoch": 3.5741948634325316, "grad_norm": 0.595969021320343, "learning_rate": 0.0005532256420709335, "loss": 3.4462, "step": 52605 }, { "epoch": 3.574534583503193, "grad_norm": 1.1871641874313354, "learning_rate": 0.0005531831770621008, "loss": 3.7298, "step": 52610 }, { "epoch": 3.5748743035738553, "grad_norm": 1.0818650722503662, "learning_rate": 0.000553140712053268, "loss": 3.6356, "step": 52615 }, { "epoch": 3.575214023644517, "grad_norm": 0.8001614212989807, "learning_rate": 0.0005530982470444354, "loss": 3.5454, "step": 52620 }, { "epoch": 3.5755537437151785, "grad_norm": 0.9451671242713928, "learning_rate": 0.0005530557820356027, "loss": 3.6534, "step": 52625 }, { "epoch": 3.5758934637858406, "grad_norm": 0.9551347494125366, "learning_rate": 0.00055301331702677, "loss": 3.7384, "step": 52630 }, { "epoch": 3.5762331838565022, "grad_norm": 1.0258092880249023, "learning_rate": 0.0005529708520179372, "loss": 3.3459, "step": 52635 }, { "epoch": 3.576572903927164, "grad_norm": 0.9507967829704285, "learning_rate": 0.0005529283870091045, "loss": 3.4059, "step": 52640 }, { "epoch": 3.576912623997826, "grad_norm": 0.803168773651123, "learning_rate": 0.0005528859220002717, "loss": 3.4028, "step": 52645 }, { "epoch": 3.5772523440684876, "grad_norm": 0.9862895011901855, "learning_rate": 0.0005528434569914391, "loss": 3.7526, "step": 52650 }, { "epoch": 3.577592064139149, "grad_norm": 0.8800660967826843, "learning_rate": 0.0005528009919826064, "loss": 3.4973, "step": 52655 }, { "epoch": 3.5779317842098113, "grad_norm": 0.9139227271080017, "learning_rate": 0.0005527585269737736, "loss": 3.4352, "step": 52660 }, { "epoch": 3.578271504280473, "grad_norm": 0.8356056809425354, "learning_rate": 0.000552716061964941, "loss": 3.4296, "step": 52665 }, { "epoch": 3.5786112243511345, "grad_norm": 0.8394856452941895, "learning_rate": 0.0005526735969561082, "loss": 3.4842, "step": 52670 }, { "epoch": 3.5789509444217966, "grad_norm": 0.9028449058532715, "learning_rate": 0.0005526311319472754, "loss": 3.5303, "step": 52675 }, { "epoch": 3.5792906644924583, "grad_norm": 0.6592805981636047, "learning_rate": 0.0005525886669384427, "loss": 3.5797, "step": 52680 }, { "epoch": 3.57963038456312, "grad_norm": 0.8321759104728699, "learning_rate": 0.00055254620192961, "loss": 3.6278, "step": 52685 }, { "epoch": 3.579970104633782, "grad_norm": 0.8286914825439453, "learning_rate": 0.0005525037369207773, "loss": 3.6836, "step": 52690 }, { "epoch": 3.5803098247044436, "grad_norm": 0.9556105136871338, "learning_rate": 0.0005524612719119446, "loss": 3.4488, "step": 52695 }, { "epoch": 3.580649544775105, "grad_norm": 0.8201723694801331, "learning_rate": 0.0005524188069031119, "loss": 3.8296, "step": 52700 }, { "epoch": 3.5809892648457673, "grad_norm": 0.8736295104026794, "learning_rate": 0.0005523763418942791, "loss": 3.5008, "step": 52705 }, { "epoch": 3.581328984916429, "grad_norm": 0.7972049117088318, "learning_rate": 0.0005523338768854464, "loss": 3.5393, "step": 52710 }, { "epoch": 3.5816687049870906, "grad_norm": 0.6801161766052246, "learning_rate": 0.0005522914118766137, "loss": 3.6074, "step": 52715 }, { "epoch": 3.5820084250577526, "grad_norm": 0.9474793076515198, "learning_rate": 0.0005522489468677809, "loss": 3.6357, "step": 52720 }, { "epoch": 3.5823481451284143, "grad_norm": 0.8200023174285889, "learning_rate": 0.0005522064818589483, "loss": 3.552, "step": 52725 }, { "epoch": 3.582687865199076, "grad_norm": 0.778630256652832, "learning_rate": 0.0005521640168501155, "loss": 3.4114, "step": 52730 }, { "epoch": 3.583027585269738, "grad_norm": 0.857019305229187, "learning_rate": 0.0005521215518412828, "loss": 3.5997, "step": 52735 }, { "epoch": 3.5833673053403996, "grad_norm": 0.9865216612815857, "learning_rate": 0.0005520790868324501, "loss": 3.464, "step": 52740 }, { "epoch": 3.5837070254110612, "grad_norm": 0.8849552273750305, "learning_rate": 0.0005520366218236173, "loss": 3.2026, "step": 52745 }, { "epoch": 3.5840467454817233, "grad_norm": 0.7534281611442566, "learning_rate": 0.0005519941568147846, "loss": 3.4803, "step": 52750 }, { "epoch": 3.584386465552385, "grad_norm": 0.8372395634651184, "learning_rate": 0.000551951691805952, "loss": 3.4433, "step": 52755 }, { "epoch": 3.5847261856230466, "grad_norm": 1.0841163396835327, "learning_rate": 0.0005519092267971192, "loss": 3.7106, "step": 52760 }, { "epoch": 3.5850659056937086, "grad_norm": 0.9874895215034485, "learning_rate": 0.0005518667617882865, "loss": 3.4888, "step": 52765 }, { "epoch": 3.5854056257643703, "grad_norm": 0.7308298349380493, "learning_rate": 0.0005518242967794538, "loss": 3.5205, "step": 52770 }, { "epoch": 3.585745345835032, "grad_norm": 0.9006463885307312, "learning_rate": 0.000551781831770621, "loss": 3.5123, "step": 52775 }, { "epoch": 3.586085065905694, "grad_norm": 0.6696062684059143, "learning_rate": 0.0005517393667617882, "loss": 3.5431, "step": 52780 }, { "epoch": 3.5864247859763556, "grad_norm": 1.0323081016540527, "learning_rate": 0.0005516969017529556, "loss": 3.2308, "step": 52785 }, { "epoch": 3.5867645060470172, "grad_norm": 0.7446427345275879, "learning_rate": 0.0005516544367441229, "loss": 3.7679, "step": 52790 }, { "epoch": 3.587104226117679, "grad_norm": 0.8995482921600342, "learning_rate": 0.0005516119717352901, "loss": 3.6302, "step": 52795 }, { "epoch": 3.587443946188341, "grad_norm": 0.7484413981437683, "learning_rate": 0.0005515695067264575, "loss": 3.8534, "step": 52800 }, { "epoch": 3.5877836662590026, "grad_norm": 0.9605672955513, "learning_rate": 0.0005515270417176247, "loss": 3.5409, "step": 52805 }, { "epoch": 3.588123386329664, "grad_norm": 0.9126277565956116, "learning_rate": 0.0005514845767087919, "loss": 3.9055, "step": 52810 }, { "epoch": 3.5884631064003263, "grad_norm": 0.8671048283576965, "learning_rate": 0.0005514421116999593, "loss": 3.6186, "step": 52815 }, { "epoch": 3.588802826470988, "grad_norm": 1.0121009349822998, "learning_rate": 0.0005513996466911265, "loss": 3.6075, "step": 52820 }, { "epoch": 3.5891425465416495, "grad_norm": 1.0537192821502686, "learning_rate": 0.0005513571816822938, "loss": 3.606, "step": 52825 }, { "epoch": 3.589482266612311, "grad_norm": 0.8901971578598022, "learning_rate": 0.0005513147166734612, "loss": 3.7166, "step": 52830 }, { "epoch": 3.5898219866829733, "grad_norm": 0.8190103769302368, "learning_rate": 0.0005512722516646284, "loss": 3.4991, "step": 52835 }, { "epoch": 3.590161706753635, "grad_norm": 0.7430129647254944, "learning_rate": 0.0005512297866557956, "loss": 3.7339, "step": 52840 }, { "epoch": 3.5905014268242965, "grad_norm": 0.865852952003479, "learning_rate": 0.0005511873216469629, "loss": 3.5387, "step": 52845 }, { "epoch": 3.5908411468949586, "grad_norm": 0.9233421683311462, "learning_rate": 0.0005511448566381302, "loss": 3.5489, "step": 52850 }, { "epoch": 3.59118086696562, "grad_norm": 1.1758742332458496, "learning_rate": 0.0005511023916292974, "loss": 3.5101, "step": 52855 }, { "epoch": 3.591520587036282, "grad_norm": 0.759839653968811, "learning_rate": 0.0005510599266204648, "loss": 3.4364, "step": 52860 }, { "epoch": 3.591860307106944, "grad_norm": 0.8615610003471375, "learning_rate": 0.0005510174616116321, "loss": 3.5271, "step": 52865 }, { "epoch": 3.5922000271776056, "grad_norm": 2.8105292320251465, "learning_rate": 0.0005509749966027993, "loss": 3.5537, "step": 52870 }, { "epoch": 3.592539747248267, "grad_norm": 0.8419066667556763, "learning_rate": 0.0005509325315939666, "loss": 3.2907, "step": 52875 }, { "epoch": 3.5928794673189293, "grad_norm": 1.0476595163345337, "learning_rate": 0.0005508900665851338, "loss": 3.5564, "step": 52880 }, { "epoch": 3.593219187389591, "grad_norm": 0.8517836332321167, "learning_rate": 0.0005508476015763011, "loss": 3.5576, "step": 52885 }, { "epoch": 3.5935589074602525, "grad_norm": 0.648360550403595, "learning_rate": 0.0005508051365674684, "loss": 3.4788, "step": 52890 }, { "epoch": 3.5938986275309146, "grad_norm": 0.912376880645752, "learning_rate": 0.0005507626715586357, "loss": 3.5519, "step": 52895 }, { "epoch": 3.5942383476015762, "grad_norm": 0.9318661689758301, "learning_rate": 0.000550720206549803, "loss": 3.3773, "step": 52900 }, { "epoch": 3.594578067672238, "grad_norm": 0.8613086342811584, "learning_rate": 0.0005506777415409703, "loss": 3.5423, "step": 52905 }, { "epoch": 3.5949177877429, "grad_norm": 1.0651054382324219, "learning_rate": 0.0005506352765321375, "loss": 3.5235, "step": 52910 }, { "epoch": 3.5952575078135616, "grad_norm": 0.8997407555580139, "learning_rate": 0.0005505928115233047, "loss": 3.7567, "step": 52915 }, { "epoch": 3.595597227884223, "grad_norm": 0.73149174451828, "learning_rate": 0.0005505503465144721, "loss": 3.5582, "step": 52920 }, { "epoch": 3.5959369479548853, "grad_norm": 0.8050642013549805, "learning_rate": 0.0005505078815056393, "loss": 3.1826, "step": 52925 }, { "epoch": 3.596276668025547, "grad_norm": 0.982751727104187, "learning_rate": 0.0005504654164968066, "loss": 3.532, "step": 52930 }, { "epoch": 3.5966163880962085, "grad_norm": 0.7732133865356445, "learning_rate": 0.000550422951487974, "loss": 3.3858, "step": 52935 }, { "epoch": 3.5969561081668706, "grad_norm": 1.0908161401748657, "learning_rate": 0.0005503804864791412, "loss": 3.3649, "step": 52940 }, { "epoch": 3.5972958282375322, "grad_norm": 0.8185614347457886, "learning_rate": 0.0005503380214703084, "loss": 3.3561, "step": 52945 }, { "epoch": 3.597635548308194, "grad_norm": 0.8773089647293091, "learning_rate": 0.0005502955564614758, "loss": 3.5795, "step": 52950 }, { "epoch": 3.597975268378856, "grad_norm": 1.0958915948867798, "learning_rate": 0.000550253091452643, "loss": 3.4933, "step": 52955 }, { "epoch": 3.5983149884495176, "grad_norm": 0.8125223517417908, "learning_rate": 0.0005502106264438102, "loss": 3.4812, "step": 52960 }, { "epoch": 3.598654708520179, "grad_norm": 0.8647769093513489, "learning_rate": 0.0005501681614349777, "loss": 3.5902, "step": 52965 }, { "epoch": 3.5989944285908413, "grad_norm": 0.8423730134963989, "learning_rate": 0.0005501256964261449, "loss": 3.5398, "step": 52970 }, { "epoch": 3.599334148661503, "grad_norm": 0.6446899771690369, "learning_rate": 0.0005500832314173121, "loss": 3.649, "step": 52975 }, { "epoch": 3.5996738687321646, "grad_norm": 0.8032547235488892, "learning_rate": 0.0005500407664084794, "loss": 3.3662, "step": 52980 }, { "epoch": 3.6000135888028266, "grad_norm": 1.1905312538146973, "learning_rate": 0.0005499983013996467, "loss": 3.4394, "step": 52985 }, { "epoch": 3.6003533088734883, "grad_norm": 0.8894798755645752, "learning_rate": 0.000549955836390814, "loss": 3.5324, "step": 52990 }, { "epoch": 3.60069302894415, "grad_norm": 0.9026572108268738, "learning_rate": 0.0005499133713819812, "loss": 3.7997, "step": 52995 }, { "epoch": 3.601032749014812, "grad_norm": 0.7525251507759094, "learning_rate": 0.0005498709063731486, "loss": 3.4542, "step": 53000 }, { "epoch": 3.6013724690854736, "grad_norm": 0.6907527446746826, "learning_rate": 0.0005498284413643159, "loss": 3.5605, "step": 53005 }, { "epoch": 3.6017121891561352, "grad_norm": 0.8454573750495911, "learning_rate": 0.0005497859763554831, "loss": 3.4105, "step": 53010 }, { "epoch": 3.6020519092267973, "grad_norm": 0.8242977261543274, "learning_rate": 0.0005497435113466504, "loss": 3.5047, "step": 53015 }, { "epoch": 3.602391629297459, "grad_norm": 0.8369054198265076, "learning_rate": 0.0005497010463378177, "loss": 3.4958, "step": 53020 }, { "epoch": 3.6027313493681206, "grad_norm": 0.9210314750671387, "learning_rate": 0.0005496585813289849, "loss": 3.4334, "step": 53025 }, { "epoch": 3.6030710694387826, "grad_norm": 0.912879228591919, "learning_rate": 0.0005496161163201521, "loss": 3.4379, "step": 53030 }, { "epoch": 3.6034107895094443, "grad_norm": 0.8952475190162659, "learning_rate": 0.0005495736513113196, "loss": 3.4311, "step": 53035 }, { "epoch": 3.603750509580106, "grad_norm": 0.9238948822021484, "learning_rate": 0.0005495311863024868, "loss": 3.3197, "step": 53040 }, { "epoch": 3.604090229650768, "grad_norm": 0.9383625388145447, "learning_rate": 0.000549488721293654, "loss": 3.483, "step": 53045 }, { "epoch": 3.6044299497214296, "grad_norm": 0.9761332869529724, "learning_rate": 0.0005494462562848214, "loss": 3.361, "step": 53050 }, { "epoch": 3.6047696697920912, "grad_norm": 0.6417686939239502, "learning_rate": 0.0005494037912759886, "loss": 3.5815, "step": 53055 }, { "epoch": 3.6051093898627533, "grad_norm": 0.8139020800590515, "learning_rate": 0.0005493613262671558, "loss": 3.6731, "step": 53060 }, { "epoch": 3.605449109933415, "grad_norm": 0.9614754915237427, "learning_rate": 0.0005493188612583232, "loss": 3.5456, "step": 53065 }, { "epoch": 3.6057888300040766, "grad_norm": 0.6282964944839478, "learning_rate": 0.0005492763962494905, "loss": 3.5653, "step": 53070 }, { "epoch": 3.6061285500747386, "grad_norm": 1.0063493251800537, "learning_rate": 0.0005492339312406577, "loss": 3.4016, "step": 53075 }, { "epoch": 3.6064682701454003, "grad_norm": 0.8474700450897217, "learning_rate": 0.000549191466231825, "loss": 3.4977, "step": 53080 }, { "epoch": 3.606807990216062, "grad_norm": 0.9115608334541321, "learning_rate": 0.0005491490012229923, "loss": 3.2934, "step": 53085 }, { "epoch": 3.607147710286724, "grad_norm": 0.9855828881263733, "learning_rate": 0.0005491065362141595, "loss": 3.4119, "step": 53090 }, { "epoch": 3.6074874303573856, "grad_norm": 0.7172865271568298, "learning_rate": 0.0005490640712053268, "loss": 3.4175, "step": 53095 }, { "epoch": 3.6078271504280472, "grad_norm": 0.8368202447891235, "learning_rate": 0.0005490216061964941, "loss": 3.562, "step": 53100 }, { "epoch": 3.6081668704987093, "grad_norm": 0.6668981909751892, "learning_rate": 0.0005489791411876614, "loss": 3.6515, "step": 53105 }, { "epoch": 3.608506590569371, "grad_norm": 0.8732035160064697, "learning_rate": 0.0005489366761788287, "loss": 3.1435, "step": 53110 }, { "epoch": 3.6088463106400326, "grad_norm": 0.754576563835144, "learning_rate": 0.000548894211169996, "loss": 3.585, "step": 53115 }, { "epoch": 3.6091860307106947, "grad_norm": 0.877511739730835, "learning_rate": 0.0005488517461611632, "loss": 3.5862, "step": 53120 }, { "epoch": 3.6095257507813563, "grad_norm": 1.0535022020339966, "learning_rate": 0.0005488092811523305, "loss": 3.6688, "step": 53125 }, { "epoch": 3.609865470852018, "grad_norm": 0.9201755523681641, "learning_rate": 0.0005487668161434977, "loss": 3.6635, "step": 53130 }, { "epoch": 3.6102051909226796, "grad_norm": 0.9185606837272644, "learning_rate": 0.000548724351134665, "loss": 3.8283, "step": 53135 }, { "epoch": 3.6105449109933416, "grad_norm": 0.7873866558074951, "learning_rate": 0.0005486818861258324, "loss": 3.3596, "step": 53140 }, { "epoch": 3.6108846310640033, "grad_norm": 0.8282515406608582, "learning_rate": 0.0005486394211169996, "loss": 3.4813, "step": 53145 }, { "epoch": 3.611224351134665, "grad_norm": 0.9264218211174011, "learning_rate": 0.0005485969561081669, "loss": 3.5958, "step": 53150 }, { "epoch": 3.611564071205327, "grad_norm": 0.7441644668579102, "learning_rate": 0.0005485544910993342, "loss": 3.5879, "step": 53155 }, { "epoch": 3.6119037912759886, "grad_norm": 1.0126665830612183, "learning_rate": 0.0005485120260905014, "loss": 3.4628, "step": 53160 }, { "epoch": 3.6122435113466502, "grad_norm": 0.9869412183761597, "learning_rate": 0.0005484695610816686, "loss": 3.7263, "step": 53165 }, { "epoch": 3.612583231417312, "grad_norm": 0.7897995114326477, "learning_rate": 0.000548427096072836, "loss": 3.3806, "step": 53170 }, { "epoch": 3.612922951487974, "grad_norm": 0.9668745994567871, "learning_rate": 0.0005483846310640033, "loss": 3.4123, "step": 53175 }, { "epoch": 3.6132626715586356, "grad_norm": 0.9032324552536011, "learning_rate": 0.0005483421660551705, "loss": 3.5822, "step": 53180 }, { "epoch": 3.613602391629297, "grad_norm": 0.9826297163963318, "learning_rate": 0.0005482997010463379, "loss": 3.4616, "step": 53185 }, { "epoch": 3.6139421116999593, "grad_norm": 1.0273572206497192, "learning_rate": 0.0005482572360375051, "loss": 3.5533, "step": 53190 }, { "epoch": 3.614281831770621, "grad_norm": 0.8396711349487305, "learning_rate": 0.0005482147710286723, "loss": 3.5326, "step": 53195 }, { "epoch": 3.6146215518412825, "grad_norm": 1.0667747259140015, "learning_rate": 0.0005481723060198397, "loss": 3.5191, "step": 53200 }, { "epoch": 3.6149612719119446, "grad_norm": 1.0240811109542847, "learning_rate": 0.0005481298410110069, "loss": 3.3831, "step": 53205 }, { "epoch": 3.6153009919826062, "grad_norm": 0.8221215009689331, "learning_rate": 0.0005480873760021742, "loss": 3.7308, "step": 53210 }, { "epoch": 3.615640712053268, "grad_norm": 1.0272125005722046, "learning_rate": 0.0005480449109933416, "loss": 3.6804, "step": 53215 }, { "epoch": 3.61598043212393, "grad_norm": 0.8021442294120789, "learning_rate": 0.0005480024459845088, "loss": 3.5299, "step": 53220 }, { "epoch": 3.6163201521945916, "grad_norm": 0.8069726824760437, "learning_rate": 0.000547959980975676, "loss": 3.3171, "step": 53225 }, { "epoch": 3.616659872265253, "grad_norm": 0.7607616186141968, "learning_rate": 0.0005479175159668433, "loss": 3.4704, "step": 53230 }, { "epoch": 3.6169995923359153, "grad_norm": 0.7912532091140747, "learning_rate": 0.0005478750509580106, "loss": 3.5246, "step": 53235 }, { "epoch": 3.617339312406577, "grad_norm": 0.9379469156265259, "learning_rate": 0.0005478325859491778, "loss": 3.6859, "step": 53240 }, { "epoch": 3.6176790324772385, "grad_norm": 1.0929460525512695, "learning_rate": 0.0005477901209403452, "loss": 3.3808, "step": 53245 }, { "epoch": 3.6180187525479006, "grad_norm": 0.9988842010498047, "learning_rate": 0.0005477476559315125, "loss": 3.5633, "step": 53250 }, { "epoch": 3.6183584726185622, "grad_norm": 1.0579338073730469, "learning_rate": 0.0005477051909226797, "loss": 3.5524, "step": 53255 }, { "epoch": 3.618698192689224, "grad_norm": 0.875450849533081, "learning_rate": 0.000547662725913847, "loss": 3.4359, "step": 53260 }, { "epoch": 3.619037912759886, "grad_norm": 0.8895996809005737, "learning_rate": 0.0005476202609050142, "loss": 3.3988, "step": 53265 }, { "epoch": 3.6193776328305476, "grad_norm": 0.9649412631988525, "learning_rate": 0.0005475777958961815, "loss": 3.4005, "step": 53270 }, { "epoch": 3.619717352901209, "grad_norm": 0.933057963848114, "learning_rate": 0.0005475353308873488, "loss": 3.487, "step": 53275 }, { "epoch": 3.6200570729718713, "grad_norm": 0.911983072757721, "learning_rate": 0.0005474928658785161, "loss": 3.6928, "step": 53280 }, { "epoch": 3.620396793042533, "grad_norm": 1.1191974878311157, "learning_rate": 0.0005474504008696834, "loss": 3.4711, "step": 53285 }, { "epoch": 3.6207365131131946, "grad_norm": 0.7322220802307129, "learning_rate": 0.0005474079358608507, "loss": 3.4827, "step": 53290 }, { "epoch": 3.6210762331838566, "grad_norm": 0.90591961145401, "learning_rate": 0.0005473654708520179, "loss": 3.4569, "step": 53295 }, { "epoch": 3.6214159532545183, "grad_norm": 1.0407510995864868, "learning_rate": 0.0005473230058431852, "loss": 3.4327, "step": 53300 }, { "epoch": 3.62175567332518, "grad_norm": 0.7971212863922119, "learning_rate": 0.0005472805408343525, "loss": 3.5798, "step": 53305 }, { "epoch": 3.622095393395842, "grad_norm": 0.9153208136558533, "learning_rate": 0.0005472380758255197, "loss": 3.6678, "step": 53310 }, { "epoch": 3.6224351134665036, "grad_norm": 0.789109468460083, "learning_rate": 0.000547195610816687, "loss": 3.4553, "step": 53315 }, { "epoch": 3.6227748335371652, "grad_norm": 1.2554986476898193, "learning_rate": 0.0005471531458078544, "loss": 3.1857, "step": 53320 }, { "epoch": 3.6231145536078273, "grad_norm": 0.778866708278656, "learning_rate": 0.0005471106807990216, "loss": 3.4443, "step": 53325 }, { "epoch": 3.623454273678489, "grad_norm": 0.9483287930488586, "learning_rate": 0.0005470682157901889, "loss": 3.6143, "step": 53330 }, { "epoch": 3.6237939937491506, "grad_norm": 0.7796667218208313, "learning_rate": 0.0005470257507813562, "loss": 3.4762, "step": 53335 }, { "epoch": 3.6241337138198126, "grad_norm": 0.8498959541320801, "learning_rate": 0.0005469832857725234, "loss": 3.6811, "step": 53340 }, { "epoch": 3.6244734338904743, "grad_norm": 0.9770117998123169, "learning_rate": 0.0005469408207636908, "loss": 3.2819, "step": 53345 }, { "epoch": 3.624813153961136, "grad_norm": 0.7897945046424866, "learning_rate": 0.0005468983557548581, "loss": 3.3054, "step": 53350 }, { "epoch": 3.625152874031798, "grad_norm": 0.7605648636817932, "learning_rate": 0.0005468558907460253, "loss": 3.5257, "step": 53355 }, { "epoch": 3.6254925941024596, "grad_norm": 1.0064584016799927, "learning_rate": 0.0005468134257371926, "loss": 3.8269, "step": 53360 }, { "epoch": 3.6258323141731212, "grad_norm": 0.6775883436203003, "learning_rate": 0.0005467709607283598, "loss": 3.4411, "step": 53365 }, { "epoch": 3.6261720342437833, "grad_norm": 0.7251072525978088, "learning_rate": 0.0005467284957195271, "loss": 3.6757, "step": 53370 }, { "epoch": 3.626511754314445, "grad_norm": 0.8246013522148132, "learning_rate": 0.0005466860307106944, "loss": 3.3034, "step": 53375 }, { "epoch": 3.6268514743851066, "grad_norm": 0.9590049982070923, "learning_rate": 0.0005466435657018617, "loss": 3.6785, "step": 53380 }, { "epoch": 3.6271911944557687, "grad_norm": 0.8394210338592529, "learning_rate": 0.000546601100693029, "loss": 3.661, "step": 53385 }, { "epoch": 3.6275309145264303, "grad_norm": 1.4547656774520874, "learning_rate": 0.0005465586356841963, "loss": 3.406, "step": 53390 }, { "epoch": 3.627870634597092, "grad_norm": 1.062156319618225, "learning_rate": 0.0005465161706753635, "loss": 3.4511, "step": 53395 }, { "epoch": 3.628210354667754, "grad_norm": 0.8980326056480408, "learning_rate": 0.0005464737056665308, "loss": 3.406, "step": 53400 }, { "epoch": 3.6285500747384156, "grad_norm": 0.9707894325256348, "learning_rate": 0.0005464312406576981, "loss": 3.4491, "step": 53405 }, { "epoch": 3.6288897948090773, "grad_norm": 1.3499656915664673, "learning_rate": 0.0005463887756488653, "loss": 3.7684, "step": 53410 }, { "epoch": 3.6292295148797393, "grad_norm": 0.7425840497016907, "learning_rate": 0.0005463463106400326, "loss": 3.4067, "step": 53415 }, { "epoch": 3.629569234950401, "grad_norm": 0.7487313151359558, "learning_rate": 0.0005463038456312, "loss": 3.3259, "step": 53420 }, { "epoch": 3.6299089550210626, "grad_norm": 0.865043580532074, "learning_rate": 0.0005462613806223672, "loss": 3.6391, "step": 53425 }, { "epoch": 3.6302486750917247, "grad_norm": 1.0164997577667236, "learning_rate": 0.0005462189156135344, "loss": 3.4808, "step": 53430 }, { "epoch": 3.6305883951623863, "grad_norm": 0.9972447156906128, "learning_rate": 0.0005461764506047018, "loss": 3.6799, "step": 53435 }, { "epoch": 3.630928115233048, "grad_norm": 1.0803797245025635, "learning_rate": 0.000546133985595869, "loss": 3.5883, "step": 53440 }, { "epoch": 3.63126783530371, "grad_norm": 0.814445972442627, "learning_rate": 0.0005460915205870362, "loss": 3.4415, "step": 53445 }, { "epoch": 3.6316075553743716, "grad_norm": 0.6653794050216675, "learning_rate": 0.0005460490555782037, "loss": 3.5498, "step": 53450 }, { "epoch": 3.6319472754450333, "grad_norm": 0.9789270758628845, "learning_rate": 0.0005460065905693709, "loss": 3.6238, "step": 53455 }, { "epoch": 3.6322869955156953, "grad_norm": 1.101654291152954, "learning_rate": 0.0005459641255605381, "loss": 3.1739, "step": 53460 }, { "epoch": 3.632626715586357, "grad_norm": 0.9288154244422913, "learning_rate": 0.0005459216605517054, "loss": 3.5879, "step": 53465 }, { "epoch": 3.6329664356570186, "grad_norm": 0.6950439214706421, "learning_rate": 0.0005458791955428727, "loss": 3.4351, "step": 53470 }, { "epoch": 3.6333061557276802, "grad_norm": 1.08597731590271, "learning_rate": 0.0005458367305340399, "loss": 3.5762, "step": 53475 }, { "epoch": 3.6336458757983423, "grad_norm": 1.059578537940979, "learning_rate": 0.0005457942655252072, "loss": 3.563, "step": 53480 }, { "epoch": 3.633985595869004, "grad_norm": 0.833982527256012, "learning_rate": 0.0005457518005163746, "loss": 3.5576, "step": 53485 }, { "epoch": 3.6343253159396656, "grad_norm": 1.1361275911331177, "learning_rate": 0.0005457093355075418, "loss": 3.5342, "step": 53490 }, { "epoch": 3.6346650360103276, "grad_norm": 0.881190836429596, "learning_rate": 0.0005456668704987091, "loss": 3.5084, "step": 53495 }, { "epoch": 3.6350047560809893, "grad_norm": 0.6833690404891968, "learning_rate": 0.0005456244054898764, "loss": 3.576, "step": 53500 }, { "epoch": 3.635344476151651, "grad_norm": 0.8589301705360413, "learning_rate": 0.0005455819404810436, "loss": 3.4652, "step": 53505 }, { "epoch": 3.635684196222313, "grad_norm": 0.7770761847496033, "learning_rate": 0.0005455394754722109, "loss": 3.6121, "step": 53510 }, { "epoch": 3.6360239162929746, "grad_norm": 0.7699763774871826, "learning_rate": 0.0005454970104633781, "loss": 3.4324, "step": 53515 }, { "epoch": 3.6363636363636362, "grad_norm": 0.8171278834342957, "learning_rate": 0.0005454545454545455, "loss": 3.7349, "step": 53520 }, { "epoch": 3.636703356434298, "grad_norm": 1.2641348838806152, "learning_rate": 0.0005454120804457128, "loss": 3.302, "step": 53525 }, { "epoch": 3.63704307650496, "grad_norm": 0.8112537860870361, "learning_rate": 0.00054536961543688, "loss": 3.8005, "step": 53530 }, { "epoch": 3.6373827965756216, "grad_norm": 0.7227480411529541, "learning_rate": 0.0005453271504280473, "loss": 3.8045, "step": 53535 }, { "epoch": 3.637722516646283, "grad_norm": 0.9976720213890076, "learning_rate": 0.0005452846854192146, "loss": 3.633, "step": 53540 }, { "epoch": 3.6380622367169453, "grad_norm": 0.7704304456710815, "learning_rate": 0.0005452422204103818, "loss": 3.5517, "step": 53545 }, { "epoch": 3.638401956787607, "grad_norm": 0.8514612913131714, "learning_rate": 0.000545199755401549, "loss": 3.6164, "step": 53550 }, { "epoch": 3.6387416768582685, "grad_norm": 0.7663012146949768, "learning_rate": 0.0005451572903927165, "loss": 3.7757, "step": 53555 }, { "epoch": 3.6390813969289306, "grad_norm": 0.9894139170646667, "learning_rate": 0.0005451148253838837, "loss": 3.302, "step": 53560 }, { "epoch": 3.6394211169995923, "grad_norm": 0.8416471481323242, "learning_rate": 0.0005450723603750509, "loss": 3.5129, "step": 53565 }, { "epoch": 3.639760837070254, "grad_norm": 0.8176944851875305, "learning_rate": 0.0005450298953662183, "loss": 3.4485, "step": 53570 }, { "epoch": 3.640100557140916, "grad_norm": 0.9307337999343872, "learning_rate": 0.0005449874303573855, "loss": 3.7807, "step": 53575 }, { "epoch": 3.6404402772115776, "grad_norm": 0.7537496089935303, "learning_rate": 0.0005449449653485527, "loss": 3.6988, "step": 53580 }, { "epoch": 3.6407799972822392, "grad_norm": 0.7648143172264099, "learning_rate": 0.0005449025003397201, "loss": 3.3825, "step": 53585 }, { "epoch": 3.6411197173529013, "grad_norm": 0.8649531602859497, "learning_rate": 0.0005448600353308874, "loss": 3.7286, "step": 53590 }, { "epoch": 3.641459437423563, "grad_norm": 1.1034812927246094, "learning_rate": 0.0005448175703220546, "loss": 3.4015, "step": 53595 }, { "epoch": 3.6417991574942246, "grad_norm": 1.0745538473129272, "learning_rate": 0.000544775105313222, "loss": 3.6461, "step": 53600 }, { "epoch": 3.6421388775648866, "grad_norm": 0.7815353870391846, "learning_rate": 0.0005447326403043892, "loss": 3.4524, "step": 53605 }, { "epoch": 3.6424785976355483, "grad_norm": 1.0037766695022583, "learning_rate": 0.0005446901752955564, "loss": 3.4103, "step": 53610 }, { "epoch": 3.64281831770621, "grad_norm": 0.9824424386024475, "learning_rate": 0.0005446477102867237, "loss": 3.5805, "step": 53615 }, { "epoch": 3.643158037776872, "grad_norm": 0.8942891359329224, "learning_rate": 0.000544605245277891, "loss": 3.4682, "step": 53620 }, { "epoch": 3.6434977578475336, "grad_norm": 0.8717586994171143, "learning_rate": 0.0005445627802690583, "loss": 3.6373, "step": 53625 }, { "epoch": 3.6438374779181952, "grad_norm": 0.9676894545555115, "learning_rate": 0.0005445203152602256, "loss": 3.8116, "step": 53630 }, { "epoch": 3.6441771979888573, "grad_norm": 0.8737943172454834, "learning_rate": 0.0005444778502513929, "loss": 3.3956, "step": 53635 }, { "epoch": 3.644516918059519, "grad_norm": 0.850583553314209, "learning_rate": 0.0005444353852425601, "loss": 3.6765, "step": 53640 }, { "epoch": 3.6448566381301806, "grad_norm": 0.8469993472099304, "learning_rate": 0.0005443929202337274, "loss": 3.3704, "step": 53645 }, { "epoch": 3.6451963582008426, "grad_norm": 0.8102148175239563, "learning_rate": 0.0005443504552248946, "loss": 3.5487, "step": 53650 }, { "epoch": 3.6455360782715043, "grad_norm": 0.9210461378097534, "learning_rate": 0.0005443079902160619, "loss": 3.4939, "step": 53655 }, { "epoch": 3.645875798342166, "grad_norm": 0.9317465424537659, "learning_rate": 0.0005442655252072293, "loss": 3.5968, "step": 53660 }, { "epoch": 3.646215518412828, "grad_norm": 1.086358666419983, "learning_rate": 0.0005442230601983965, "loss": 3.5471, "step": 53665 }, { "epoch": 3.6465552384834896, "grad_norm": 0.8703083992004395, "learning_rate": 0.0005441805951895639, "loss": 3.6222, "step": 53670 }, { "epoch": 3.6468949585541512, "grad_norm": 1.1880327463150024, "learning_rate": 0.0005441381301807311, "loss": 3.4471, "step": 53675 }, { "epoch": 3.6472346786248133, "grad_norm": 0.7977964878082275, "learning_rate": 0.0005440956651718983, "loss": 3.397, "step": 53680 }, { "epoch": 3.647574398695475, "grad_norm": 0.7841601371765137, "learning_rate": 0.0005440532001630657, "loss": 3.5529, "step": 53685 }, { "epoch": 3.6479141187661366, "grad_norm": 0.8847991824150085, "learning_rate": 0.0005440107351542329, "loss": 3.6659, "step": 53690 }, { "epoch": 3.6482538388367987, "grad_norm": 0.8309943079948425, "learning_rate": 0.0005439682701454002, "loss": 3.5767, "step": 53695 }, { "epoch": 3.6485935589074603, "grad_norm": 0.8960649371147156, "learning_rate": 0.0005439258051365676, "loss": 3.5285, "step": 53700 }, { "epoch": 3.648933278978122, "grad_norm": 1.1906756162643433, "learning_rate": 0.0005438833401277348, "loss": 3.5093, "step": 53705 }, { "epoch": 3.649272999048784, "grad_norm": 0.9441413879394531, "learning_rate": 0.000543840875118902, "loss": 3.7124, "step": 53710 }, { "epoch": 3.6496127191194456, "grad_norm": 0.7278782725334167, "learning_rate": 0.0005437984101100693, "loss": 3.7387, "step": 53715 }, { "epoch": 3.6499524391901073, "grad_norm": 0.8797478079795837, "learning_rate": 0.0005437559451012366, "loss": 3.5415, "step": 53720 }, { "epoch": 3.6502921592607693, "grad_norm": 0.7714616060256958, "learning_rate": 0.0005437134800924038, "loss": 3.5377, "step": 53725 }, { "epoch": 3.650631879331431, "grad_norm": 1.0111420154571533, "learning_rate": 0.0005436710150835712, "loss": 3.5683, "step": 53730 }, { "epoch": 3.6509715994020926, "grad_norm": 1.0158363580703735, "learning_rate": 0.0005436285500747385, "loss": 3.3675, "step": 53735 }, { "epoch": 3.6513113194727547, "grad_norm": 0.9356029629707336, "learning_rate": 0.0005435860850659057, "loss": 3.4682, "step": 53740 }, { "epoch": 3.6516510395434163, "grad_norm": 0.9091253876686096, "learning_rate": 0.000543543620057073, "loss": 3.3034, "step": 53745 }, { "epoch": 3.651990759614078, "grad_norm": 0.6773715615272522, "learning_rate": 0.0005435011550482403, "loss": 3.4817, "step": 53750 }, { "epoch": 3.65233047968474, "grad_norm": 0.7366059422492981, "learning_rate": 0.0005434586900394075, "loss": 3.6884, "step": 53755 }, { "epoch": 3.6526701997554016, "grad_norm": 0.8861567378044128, "learning_rate": 0.0005434162250305748, "loss": 3.4124, "step": 53760 }, { "epoch": 3.6530099198260633, "grad_norm": 1.0470925569534302, "learning_rate": 0.0005433737600217421, "loss": 3.4808, "step": 53765 }, { "epoch": 3.6533496398967253, "grad_norm": 0.8748601078987122, "learning_rate": 0.0005433312950129094, "loss": 3.3202, "step": 53770 }, { "epoch": 3.653689359967387, "grad_norm": 0.860626757144928, "learning_rate": 0.0005432888300040767, "loss": 3.6189, "step": 53775 }, { "epoch": 3.6540290800380486, "grad_norm": 0.8967095017433167, "learning_rate": 0.0005432463649952439, "loss": 3.548, "step": 53780 }, { "epoch": 3.6543688001087107, "grad_norm": 0.8716451525688171, "learning_rate": 0.0005432038999864112, "loss": 3.5785, "step": 53785 }, { "epoch": 3.6547085201793723, "grad_norm": 0.9621819257736206, "learning_rate": 0.0005431614349775785, "loss": 3.4762, "step": 53790 }, { "epoch": 3.655048240250034, "grad_norm": 0.8762222528457642, "learning_rate": 0.0005431189699687457, "loss": 3.5719, "step": 53795 }, { "epoch": 3.655387960320696, "grad_norm": 0.7597521543502808, "learning_rate": 0.000543076504959913, "loss": 3.5219, "step": 53800 }, { "epoch": 3.6557276803913576, "grad_norm": 1.2984267473220825, "learning_rate": 0.0005430340399510804, "loss": 3.6098, "step": 53805 }, { "epoch": 3.6560674004620193, "grad_norm": 1.042135238647461, "learning_rate": 0.0005429915749422476, "loss": 3.3923, "step": 53810 }, { "epoch": 3.656407120532681, "grad_norm": 0.7758339643478394, "learning_rate": 0.0005429491099334148, "loss": 3.5901, "step": 53815 }, { "epoch": 3.656746840603343, "grad_norm": 1.0324559211730957, "learning_rate": 0.0005429066449245822, "loss": 3.6956, "step": 53820 }, { "epoch": 3.6570865606740046, "grad_norm": 0.9685511589050293, "learning_rate": 0.0005428641799157494, "loss": 3.6136, "step": 53825 }, { "epoch": 3.6574262807446662, "grad_norm": 0.827765941619873, "learning_rate": 0.0005428217149069166, "loss": 3.3311, "step": 53830 }, { "epoch": 3.6577660008153283, "grad_norm": 0.8985838890075684, "learning_rate": 0.0005427792498980841, "loss": 3.0632, "step": 53835 }, { "epoch": 3.65810572088599, "grad_norm": 1.1034096479415894, "learning_rate": 0.0005427367848892513, "loss": 3.2518, "step": 53840 }, { "epoch": 3.6584454409566516, "grad_norm": 1.1418403387069702, "learning_rate": 0.0005426943198804185, "loss": 3.5205, "step": 53845 }, { "epoch": 3.6587851610273137, "grad_norm": 1.030767560005188, "learning_rate": 0.0005426518548715859, "loss": 3.5257, "step": 53850 }, { "epoch": 3.6591248810979753, "grad_norm": 1.037742018699646, "learning_rate": 0.0005426093898627531, "loss": 3.6175, "step": 53855 }, { "epoch": 3.659464601168637, "grad_norm": 0.906934916973114, "learning_rate": 0.0005425669248539203, "loss": 3.4753, "step": 53860 }, { "epoch": 3.6598043212392986, "grad_norm": 0.7545692920684814, "learning_rate": 0.0005425244598450876, "loss": 3.197, "step": 53865 }, { "epoch": 3.6601440413099606, "grad_norm": 0.7415077090263367, "learning_rate": 0.000542481994836255, "loss": 3.4878, "step": 53870 }, { "epoch": 3.6604837613806223, "grad_norm": 0.8560155034065247, "learning_rate": 0.0005424395298274222, "loss": 3.5776, "step": 53875 }, { "epoch": 3.660823481451284, "grad_norm": 0.8856314420700073, "learning_rate": 0.0005423970648185895, "loss": 3.5032, "step": 53880 }, { "epoch": 3.661163201521946, "grad_norm": 0.7570082545280457, "learning_rate": 0.0005423545998097568, "loss": 3.2862, "step": 53885 }, { "epoch": 3.6615029215926076, "grad_norm": 0.8551943302154541, "learning_rate": 0.000542312134800924, "loss": 3.5629, "step": 53890 }, { "epoch": 3.6618426416632692, "grad_norm": 0.9009014964103699, "learning_rate": 0.0005422696697920913, "loss": 3.7592, "step": 53895 }, { "epoch": 3.6621823617339313, "grad_norm": 0.7558007836341858, "learning_rate": 0.0005422272047832585, "loss": 3.5012, "step": 53900 }, { "epoch": 3.662522081804593, "grad_norm": 0.6824040412902832, "learning_rate": 0.0005421847397744259, "loss": 3.7095, "step": 53905 }, { "epoch": 3.6628618018752546, "grad_norm": 0.8432365655899048, "learning_rate": 0.0005421422747655932, "loss": 3.5574, "step": 53910 }, { "epoch": 3.6632015219459166, "grad_norm": 1.1616342067718506, "learning_rate": 0.0005420998097567604, "loss": 3.3939, "step": 53915 }, { "epoch": 3.6635412420165783, "grad_norm": 0.8574005365371704, "learning_rate": 0.0005420573447479277, "loss": 3.6149, "step": 53920 }, { "epoch": 3.66388096208724, "grad_norm": 0.8057312369346619, "learning_rate": 0.000542014879739095, "loss": 3.6741, "step": 53925 }, { "epoch": 3.664220682157902, "grad_norm": 0.727634072303772, "learning_rate": 0.0005419724147302622, "loss": 3.7926, "step": 53930 }, { "epoch": 3.6645604022285636, "grad_norm": 0.8224619626998901, "learning_rate": 0.0005419299497214295, "loss": 3.5678, "step": 53935 }, { "epoch": 3.6649001222992252, "grad_norm": 0.7493871450424194, "learning_rate": 0.0005418874847125969, "loss": 3.6014, "step": 53940 }, { "epoch": 3.6652398423698873, "grad_norm": 0.8806480169296265, "learning_rate": 0.0005418450197037641, "loss": 3.4166, "step": 53945 }, { "epoch": 3.665579562440549, "grad_norm": 0.8685665130615234, "learning_rate": 0.0005418025546949313, "loss": 3.3437, "step": 53950 }, { "epoch": 3.6659192825112106, "grad_norm": 1.0424593687057495, "learning_rate": 0.0005417600896860987, "loss": 3.7589, "step": 53955 }, { "epoch": 3.6662590025818727, "grad_norm": 0.7586832642555237, "learning_rate": 0.0005417176246772659, "loss": 3.5474, "step": 53960 }, { "epoch": 3.6665987226525343, "grad_norm": 0.9218701720237732, "learning_rate": 0.0005416751596684331, "loss": 3.7302, "step": 53965 }, { "epoch": 3.666938442723196, "grad_norm": 1.33937668800354, "learning_rate": 0.0005416326946596006, "loss": 3.5739, "step": 53970 }, { "epoch": 3.667278162793858, "grad_norm": 0.7950059771537781, "learning_rate": 0.0005415902296507678, "loss": 3.8805, "step": 53975 }, { "epoch": 3.6676178828645196, "grad_norm": 0.8059768080711365, "learning_rate": 0.000541547764641935, "loss": 3.6719, "step": 53980 }, { "epoch": 3.6679576029351812, "grad_norm": 1.020132064819336, "learning_rate": 0.0005415052996331024, "loss": 3.6698, "step": 53985 }, { "epoch": 3.6682973230058433, "grad_norm": 0.8819435238838196, "learning_rate": 0.0005414628346242696, "loss": 3.2554, "step": 53990 }, { "epoch": 3.668637043076505, "grad_norm": 0.8289917707443237, "learning_rate": 0.0005414203696154368, "loss": 3.5836, "step": 53995 }, { "epoch": 3.6689767631471666, "grad_norm": 1.0032011270523071, "learning_rate": 0.0005413779046066041, "loss": 3.5997, "step": 54000 }, { "epoch": 3.6693164832178287, "grad_norm": 0.8256915807723999, "learning_rate": 0.0005413354395977715, "loss": 3.5211, "step": 54005 }, { "epoch": 3.6696562032884903, "grad_norm": 0.6539381742477417, "learning_rate": 0.0005412929745889388, "loss": 3.4966, "step": 54010 }, { "epoch": 3.669995923359152, "grad_norm": 0.6959814429283142, "learning_rate": 0.000541250509580106, "loss": 3.6032, "step": 54015 }, { "epoch": 3.670335643429814, "grad_norm": 0.7760059833526611, "learning_rate": 0.0005412080445712733, "loss": 3.5113, "step": 54020 }, { "epoch": 3.6706753635004756, "grad_norm": 0.9195011258125305, "learning_rate": 0.0005411655795624406, "loss": 3.5846, "step": 54025 }, { "epoch": 3.6710150835711373, "grad_norm": 0.7785404324531555, "learning_rate": 0.0005411231145536078, "loss": 3.605, "step": 54030 }, { "epoch": 3.6713548036417993, "grad_norm": 0.7625938057899475, "learning_rate": 0.000541080649544775, "loss": 3.5765, "step": 54035 }, { "epoch": 3.671694523712461, "grad_norm": 0.7925326228141785, "learning_rate": 0.0005410381845359425, "loss": 3.6898, "step": 54040 }, { "epoch": 3.6720342437831226, "grad_norm": 0.8791647553443909, "learning_rate": 0.0005409957195271097, "loss": 3.6223, "step": 54045 }, { "epoch": 3.6723739638537847, "grad_norm": 0.737472653388977, "learning_rate": 0.000540953254518277, "loss": 3.5384, "step": 54050 }, { "epoch": 3.6727136839244463, "grad_norm": 1.0030609369277954, "learning_rate": 0.0005409107895094443, "loss": 3.6356, "step": 54055 }, { "epoch": 3.673053403995108, "grad_norm": 0.9176384806632996, "learning_rate": 0.0005408683245006115, "loss": 3.716, "step": 54060 }, { "epoch": 3.67339312406577, "grad_norm": 0.8432513475418091, "learning_rate": 0.0005408258594917787, "loss": 3.5127, "step": 54065 }, { "epoch": 3.6737328441364316, "grad_norm": 1.087085247039795, "learning_rate": 0.0005407833944829461, "loss": 3.6224, "step": 54070 }, { "epoch": 3.6740725642070933, "grad_norm": 0.7479074001312256, "learning_rate": 0.0005407409294741134, "loss": 3.809, "step": 54075 }, { "epoch": 3.6744122842777553, "grad_norm": 0.7922296524047852, "learning_rate": 0.0005406984644652806, "loss": 3.6853, "step": 54080 }, { "epoch": 3.674752004348417, "grad_norm": 1.0359100103378296, "learning_rate": 0.000540655999456448, "loss": 3.7038, "step": 54085 }, { "epoch": 3.6750917244190786, "grad_norm": 0.7226911187171936, "learning_rate": 0.0005406135344476152, "loss": 3.56, "step": 54090 }, { "epoch": 3.6754314444897407, "grad_norm": 0.924033522605896, "learning_rate": 0.0005405710694387824, "loss": 3.1501, "step": 54095 }, { "epoch": 3.6757711645604023, "grad_norm": 0.9478504657745361, "learning_rate": 0.0005405286044299497, "loss": 3.6716, "step": 54100 }, { "epoch": 3.676110884631064, "grad_norm": 0.9186589121818542, "learning_rate": 0.000540486139421117, "loss": 3.4695, "step": 54105 }, { "epoch": 3.676450604701726, "grad_norm": 1.0014567375183105, "learning_rate": 0.0005404436744122843, "loss": 3.5712, "step": 54110 }, { "epoch": 3.6767903247723877, "grad_norm": 0.8400470018386841, "learning_rate": 0.0005404012094034516, "loss": 3.4606, "step": 54115 }, { "epoch": 3.6771300448430493, "grad_norm": 0.9921585917472839, "learning_rate": 0.0005403587443946189, "loss": 3.5689, "step": 54120 }, { "epoch": 3.6774697649137114, "grad_norm": 0.9346041679382324, "learning_rate": 0.0005403162793857861, "loss": 3.5926, "step": 54125 }, { "epoch": 3.677809484984373, "grad_norm": 0.7197134494781494, "learning_rate": 0.0005402738143769534, "loss": 3.399, "step": 54130 }, { "epoch": 3.6781492050550346, "grad_norm": 0.8586878180503845, "learning_rate": 0.0005402313493681207, "loss": 3.6801, "step": 54135 }, { "epoch": 3.6784889251256967, "grad_norm": 0.9854698181152344, "learning_rate": 0.0005401888843592879, "loss": 3.6226, "step": 54140 }, { "epoch": 3.6788286451963583, "grad_norm": 0.8480338454246521, "learning_rate": 0.0005401464193504553, "loss": 3.6068, "step": 54145 }, { "epoch": 3.67916836526702, "grad_norm": 0.8386926651000977, "learning_rate": 0.0005401039543416225, "loss": 3.5964, "step": 54150 }, { "epoch": 3.6795080853376816, "grad_norm": 0.8429489135742188, "learning_rate": 0.0005400614893327898, "loss": 3.562, "step": 54155 }, { "epoch": 3.6798478054083437, "grad_norm": 0.8664031028747559, "learning_rate": 0.0005400190243239571, "loss": 3.505, "step": 54160 }, { "epoch": 3.6801875254790053, "grad_norm": 1.0885629653930664, "learning_rate": 0.0005399765593151243, "loss": 3.6498, "step": 54165 }, { "epoch": 3.680527245549667, "grad_norm": 0.8554604649543762, "learning_rate": 0.0005399340943062916, "loss": 3.1622, "step": 54170 }, { "epoch": 3.680866965620329, "grad_norm": 0.8289306163787842, "learning_rate": 0.0005398916292974589, "loss": 3.8338, "step": 54175 }, { "epoch": 3.6812066856909906, "grad_norm": 0.8493819832801819, "learning_rate": 0.0005398491642886262, "loss": 3.3331, "step": 54180 }, { "epoch": 3.6815464057616523, "grad_norm": 0.7965759038925171, "learning_rate": 0.0005398066992797935, "loss": 3.5322, "step": 54185 }, { "epoch": 3.6818861258323143, "grad_norm": 0.7904520630836487, "learning_rate": 0.0005397642342709608, "loss": 3.5402, "step": 54190 }, { "epoch": 3.682225845902976, "grad_norm": 0.7146794199943542, "learning_rate": 0.000539721769262128, "loss": 3.3843, "step": 54195 }, { "epoch": 3.6825655659736376, "grad_norm": 0.8724516034126282, "learning_rate": 0.0005396793042532952, "loss": 3.5765, "step": 54200 }, { "epoch": 3.6829052860442992, "grad_norm": 0.9540440440177917, "learning_rate": 0.0005396368392444626, "loss": 3.5208, "step": 54205 }, { "epoch": 3.6832450061149613, "grad_norm": 1.0120679140090942, "learning_rate": 0.0005395943742356298, "loss": 3.6354, "step": 54210 }, { "epoch": 3.683584726185623, "grad_norm": 0.8571012020111084, "learning_rate": 0.0005395519092267971, "loss": 3.5432, "step": 54215 }, { "epoch": 3.6839244462562846, "grad_norm": 0.7572273015975952, "learning_rate": 0.0005395094442179645, "loss": 3.4351, "step": 54220 }, { "epoch": 3.6842641663269466, "grad_norm": 1.0118818283081055, "learning_rate": 0.0005394669792091317, "loss": 3.4619, "step": 54225 }, { "epoch": 3.6846038863976083, "grad_norm": 0.8593848347663879, "learning_rate": 0.0005394245142002989, "loss": 3.6336, "step": 54230 }, { "epoch": 3.68494360646827, "grad_norm": 0.7839584946632385, "learning_rate": 0.0005393820491914663, "loss": 3.6019, "step": 54235 }, { "epoch": 3.685283326538932, "grad_norm": 0.9891354441642761, "learning_rate": 0.0005393395841826335, "loss": 3.244, "step": 54240 }, { "epoch": 3.6856230466095936, "grad_norm": 1.0445905923843384, "learning_rate": 0.0005392971191738007, "loss": 3.6093, "step": 54245 }, { "epoch": 3.6859627666802552, "grad_norm": 1.0240458250045776, "learning_rate": 0.0005392546541649682, "loss": 3.7381, "step": 54250 }, { "epoch": 3.6863024867509173, "grad_norm": 1.0918055772781372, "learning_rate": 0.0005392121891561354, "loss": 3.6994, "step": 54255 }, { "epoch": 3.686642206821579, "grad_norm": 0.715413510799408, "learning_rate": 0.0005391697241473026, "loss": 3.3235, "step": 54260 }, { "epoch": 3.6869819268922406, "grad_norm": 0.5910052061080933, "learning_rate": 0.0005391272591384699, "loss": 3.6812, "step": 54265 }, { "epoch": 3.6873216469629027, "grad_norm": 0.796925961971283, "learning_rate": 0.0005390847941296372, "loss": 3.5641, "step": 54270 }, { "epoch": 3.6876613670335643, "grad_norm": 0.8092347979545593, "learning_rate": 0.0005390423291208044, "loss": 3.373, "step": 54275 }, { "epoch": 3.688001087104226, "grad_norm": 0.6617710590362549, "learning_rate": 0.0005389998641119717, "loss": 3.6916, "step": 54280 }, { "epoch": 3.688340807174888, "grad_norm": 0.8136577010154724, "learning_rate": 0.0005389573991031391, "loss": 3.59, "step": 54285 }, { "epoch": 3.6886805272455496, "grad_norm": 1.155113697052002, "learning_rate": 0.0005389149340943063, "loss": 3.5284, "step": 54290 }, { "epoch": 3.6890202473162113, "grad_norm": 0.7612754702568054, "learning_rate": 0.0005388724690854736, "loss": 3.415, "step": 54295 }, { "epoch": 3.6893599673868733, "grad_norm": 1.0148868560791016, "learning_rate": 0.0005388300040766408, "loss": 3.4938, "step": 54300 }, { "epoch": 3.689699687457535, "grad_norm": 0.9893033504486084, "learning_rate": 0.0005387875390678081, "loss": 3.6539, "step": 54305 }, { "epoch": 3.6900394075281966, "grad_norm": 0.9492884874343872, "learning_rate": 0.0005387450740589754, "loss": 3.2321, "step": 54310 }, { "epoch": 3.6903791275988587, "grad_norm": 0.8600270748138428, "learning_rate": 0.0005387026090501426, "loss": 3.6231, "step": 54315 }, { "epoch": 3.6907188476695203, "grad_norm": 0.9982551336288452, "learning_rate": 0.00053866014404131, "loss": 3.7628, "step": 54320 }, { "epoch": 3.691058567740182, "grad_norm": 0.978286623954773, "learning_rate": 0.0005386176790324773, "loss": 3.4764, "step": 54325 }, { "epoch": 3.691398287810844, "grad_norm": 0.7374218702316284, "learning_rate": 0.0005385752140236445, "loss": 3.3236, "step": 54330 }, { "epoch": 3.6917380078815056, "grad_norm": 0.8285622000694275, "learning_rate": 0.0005385327490148117, "loss": 3.5261, "step": 54335 }, { "epoch": 3.6920777279521673, "grad_norm": 0.8415969610214233, "learning_rate": 0.0005384902840059791, "loss": 3.294, "step": 54340 }, { "epoch": 3.6924174480228293, "grad_norm": 0.8668003082275391, "learning_rate": 0.0005384478189971463, "loss": 3.4581, "step": 54345 }, { "epoch": 3.692757168093491, "grad_norm": 0.7305575013160706, "learning_rate": 0.0005384053539883136, "loss": 3.762, "step": 54350 }, { "epoch": 3.6930968881641526, "grad_norm": 0.7651715874671936, "learning_rate": 0.000538362888979481, "loss": 3.6299, "step": 54355 }, { "epoch": 3.6934366082348147, "grad_norm": 0.8194260597229004, "learning_rate": 0.0005383204239706482, "loss": 3.5059, "step": 54360 }, { "epoch": 3.6937763283054763, "grad_norm": 0.7573187351226807, "learning_rate": 0.0005382779589618155, "loss": 3.4695, "step": 54365 }, { "epoch": 3.694116048376138, "grad_norm": 0.7754042148590088, "learning_rate": 0.0005382354939529828, "loss": 3.4687, "step": 54370 }, { "epoch": 3.6944557684468, "grad_norm": 1.3281773328781128, "learning_rate": 0.00053819302894415, "loss": 3.636, "step": 54375 }, { "epoch": 3.6947954885174616, "grad_norm": 0.8469622731208801, "learning_rate": 0.0005381505639353173, "loss": 3.1267, "step": 54380 }, { "epoch": 3.6951352085881233, "grad_norm": 0.947311520576477, "learning_rate": 0.0005381080989264845, "loss": 3.4138, "step": 54385 }, { "epoch": 3.6954749286587854, "grad_norm": 0.9367074370384216, "learning_rate": 0.0005380656339176519, "loss": 3.5586, "step": 54390 }, { "epoch": 3.695814648729447, "grad_norm": 0.812942385673523, "learning_rate": 0.0005380231689088192, "loss": 3.3624, "step": 54395 }, { "epoch": 3.6961543688001086, "grad_norm": 1.1334902048110962, "learning_rate": 0.0005379807038999864, "loss": 3.3966, "step": 54400 }, { "epoch": 3.6964940888707707, "grad_norm": 0.8955584764480591, "learning_rate": 0.0005379382388911537, "loss": 3.7357, "step": 54405 }, { "epoch": 3.6968338089414323, "grad_norm": 0.8426429033279419, "learning_rate": 0.000537895773882321, "loss": 3.6096, "step": 54410 }, { "epoch": 3.697173529012094, "grad_norm": 0.9208871722221375, "learning_rate": 0.0005378533088734882, "loss": 3.507, "step": 54415 }, { "epoch": 3.697513249082756, "grad_norm": 0.9474171996116638, "learning_rate": 0.0005378108438646555, "loss": 3.5504, "step": 54420 }, { "epoch": 3.6978529691534177, "grad_norm": 0.8176616430282593, "learning_rate": 0.0005377683788558229, "loss": 3.4164, "step": 54425 }, { "epoch": 3.6981926892240793, "grad_norm": 0.8975966572761536, "learning_rate": 0.0005377259138469901, "loss": 3.3556, "step": 54430 }, { "epoch": 3.6985324092947414, "grad_norm": 0.9108996987342834, "learning_rate": 0.0005376834488381574, "loss": 3.5254, "step": 54435 }, { "epoch": 3.698872129365403, "grad_norm": 1.0248826742172241, "learning_rate": 0.0005376409838293247, "loss": 3.4814, "step": 54440 }, { "epoch": 3.6992118494360646, "grad_norm": 0.9548715353012085, "learning_rate": 0.0005375985188204919, "loss": 3.5446, "step": 54445 }, { "epoch": 3.6995515695067267, "grad_norm": 0.8565791249275208, "learning_rate": 0.0005375560538116591, "loss": 3.4506, "step": 54450 }, { "epoch": 3.6998912895773883, "grad_norm": 1.0968135595321655, "learning_rate": 0.0005375135888028266, "loss": 3.7325, "step": 54455 }, { "epoch": 3.70023100964805, "grad_norm": 0.7906412482261658, "learning_rate": 0.0005374711237939938, "loss": 3.5085, "step": 54460 }, { "epoch": 3.700570729718712, "grad_norm": 0.7648587226867676, "learning_rate": 0.000537428658785161, "loss": 3.4002, "step": 54465 }, { "epoch": 3.7009104497893737, "grad_norm": 0.7430204153060913, "learning_rate": 0.0005373861937763284, "loss": 3.5223, "step": 54470 }, { "epoch": 3.7012501698600353, "grad_norm": 0.7937458157539368, "learning_rate": 0.0005373437287674956, "loss": 3.5971, "step": 54475 }, { "epoch": 3.7015898899306974, "grad_norm": 0.8830466270446777, "learning_rate": 0.0005373012637586628, "loss": 3.5855, "step": 54480 }, { "epoch": 3.701929610001359, "grad_norm": 0.7268261313438416, "learning_rate": 0.0005372587987498302, "loss": 3.5045, "step": 54485 }, { "epoch": 3.7022693300720206, "grad_norm": 0.8419830799102783, "learning_rate": 0.0005372163337409975, "loss": 3.5513, "step": 54490 }, { "epoch": 3.7026090501426823, "grad_norm": 0.9575291872024536, "learning_rate": 0.0005371738687321647, "loss": 3.5601, "step": 54495 }, { "epoch": 3.7029487702133443, "grad_norm": 0.7267882227897644, "learning_rate": 0.000537131403723332, "loss": 3.5419, "step": 54500 }, { "epoch": 3.703288490284006, "grad_norm": 0.8549809455871582, "learning_rate": 0.0005370889387144993, "loss": 3.4817, "step": 54505 }, { "epoch": 3.7036282103546676, "grad_norm": 1.1098554134368896, "learning_rate": 0.0005370464737056665, "loss": 3.7328, "step": 54510 }, { "epoch": 3.7039679304253297, "grad_norm": 0.760221540927887, "learning_rate": 0.0005370040086968338, "loss": 3.2132, "step": 54515 }, { "epoch": 3.7043076504959913, "grad_norm": 1.0236623287200928, "learning_rate": 0.0005369615436880011, "loss": 3.3995, "step": 54520 }, { "epoch": 3.704647370566653, "grad_norm": 0.7703146934509277, "learning_rate": 0.0005369190786791684, "loss": 3.6967, "step": 54525 }, { "epoch": 3.704987090637315, "grad_norm": 0.7915014028549194, "learning_rate": 0.0005368766136703357, "loss": 3.5892, "step": 54530 }, { "epoch": 3.7053268107079766, "grad_norm": 0.8315061926841736, "learning_rate": 0.000536834148661503, "loss": 3.4938, "step": 54535 }, { "epoch": 3.7056665307786383, "grad_norm": 0.9444687962532043, "learning_rate": 0.0005367916836526702, "loss": 3.5801, "step": 54540 }, { "epoch": 3.7060062508493, "grad_norm": 0.7458621263504028, "learning_rate": 0.0005367492186438375, "loss": 3.6295, "step": 54545 }, { "epoch": 3.706345970919962, "grad_norm": 0.9720023274421692, "learning_rate": 0.0005367067536350047, "loss": 3.5545, "step": 54550 }, { "epoch": 3.7066856909906236, "grad_norm": 0.9546652436256409, "learning_rate": 0.000536664288626172, "loss": 3.4564, "step": 54555 }, { "epoch": 3.7070254110612852, "grad_norm": 1.1242135763168335, "learning_rate": 0.0005366218236173394, "loss": 3.5586, "step": 54560 }, { "epoch": 3.7073651311319473, "grad_norm": 0.8512378334999084, "learning_rate": 0.0005365793586085066, "loss": 3.7682, "step": 54565 }, { "epoch": 3.707704851202609, "grad_norm": 1.0664117336273193, "learning_rate": 0.0005365368935996739, "loss": 3.3629, "step": 54570 }, { "epoch": 3.7080445712732706, "grad_norm": 0.849688708782196, "learning_rate": 0.0005364944285908412, "loss": 3.4796, "step": 54575 }, { "epoch": 3.7083842913439327, "grad_norm": 0.7968799471855164, "learning_rate": 0.0005364519635820084, "loss": 3.53, "step": 54580 }, { "epoch": 3.7087240114145943, "grad_norm": 0.8794351816177368, "learning_rate": 0.0005364094985731756, "loss": 3.2573, "step": 54585 }, { "epoch": 3.709063731485256, "grad_norm": 1.0231249332427979, "learning_rate": 0.000536367033564343, "loss": 3.763, "step": 54590 }, { "epoch": 3.709403451555918, "grad_norm": 0.78081876039505, "learning_rate": 0.0005363245685555103, "loss": 3.545, "step": 54595 }, { "epoch": 3.7097431716265796, "grad_norm": 0.8613443374633789, "learning_rate": 0.0005362821035466775, "loss": 3.4478, "step": 54600 }, { "epoch": 3.7100828916972413, "grad_norm": 0.8601430654525757, "learning_rate": 0.0005362396385378449, "loss": 3.5354, "step": 54605 }, { "epoch": 3.7104226117679033, "grad_norm": 2.1507205963134766, "learning_rate": 0.0005361971735290121, "loss": 3.3419, "step": 54610 }, { "epoch": 3.710762331838565, "grad_norm": 0.8295083045959473, "learning_rate": 0.0005361547085201793, "loss": 3.2124, "step": 54615 }, { "epoch": 3.7111020519092266, "grad_norm": 0.7322379946708679, "learning_rate": 0.0005361122435113467, "loss": 3.309, "step": 54620 }, { "epoch": 3.7114417719798887, "grad_norm": 0.8152580857276917, "learning_rate": 0.0005360697785025139, "loss": 3.4282, "step": 54625 }, { "epoch": 3.7117814920505503, "grad_norm": 1.366969347000122, "learning_rate": 0.0005360273134936812, "loss": 3.8529, "step": 54630 }, { "epoch": 3.712121212121212, "grad_norm": 0.7198229432106018, "learning_rate": 0.0005359848484848486, "loss": 3.637, "step": 54635 }, { "epoch": 3.712460932191874, "grad_norm": 0.9979026913642883, "learning_rate": 0.0005359423834760158, "loss": 3.5131, "step": 54640 }, { "epoch": 3.7128006522625356, "grad_norm": 0.925813615322113, "learning_rate": 0.000535899918467183, "loss": 3.5843, "step": 54645 }, { "epoch": 3.7131403723331973, "grad_norm": 0.8099024891853333, "learning_rate": 0.0005358574534583503, "loss": 3.4138, "step": 54650 }, { "epoch": 3.7134800924038593, "grad_norm": 1.0962743759155273, "learning_rate": 0.0005358149884495176, "loss": 3.5525, "step": 54655 }, { "epoch": 3.713819812474521, "grad_norm": 0.8154871463775635, "learning_rate": 0.0005357725234406848, "loss": 3.3077, "step": 54660 }, { "epoch": 3.7141595325451826, "grad_norm": 0.8442808389663696, "learning_rate": 0.0005357300584318522, "loss": 3.3505, "step": 54665 }, { "epoch": 3.7144992526158447, "grad_norm": 0.7866305112838745, "learning_rate": 0.0005356875934230195, "loss": 3.5951, "step": 54670 }, { "epoch": 3.7148389726865063, "grad_norm": 0.741134762763977, "learning_rate": 0.0005356451284141867, "loss": 3.4746, "step": 54675 }, { "epoch": 3.715178692757168, "grad_norm": 1.075347900390625, "learning_rate": 0.000535602663405354, "loss": 3.3624, "step": 54680 }, { "epoch": 3.71551841282783, "grad_norm": 0.9806501865386963, "learning_rate": 0.0005355601983965212, "loss": 3.5982, "step": 54685 }, { "epoch": 3.7158581328984917, "grad_norm": 1.0076103210449219, "learning_rate": 0.0005355177333876886, "loss": 3.6586, "step": 54690 }, { "epoch": 3.7161978529691533, "grad_norm": 0.8461204767227173, "learning_rate": 0.0005354752683788558, "loss": 3.5973, "step": 54695 }, { "epoch": 3.7165375730398154, "grad_norm": 0.935477614402771, "learning_rate": 0.0005354328033700231, "loss": 3.6131, "step": 54700 }, { "epoch": 3.716877293110477, "grad_norm": 0.9814581274986267, "learning_rate": 0.0005353903383611905, "loss": 3.6125, "step": 54705 }, { "epoch": 3.7172170131811386, "grad_norm": 0.7364605665206909, "learning_rate": 0.0005353478733523577, "loss": 3.6075, "step": 54710 }, { "epoch": 3.7175567332518007, "grad_norm": 0.7956974506378174, "learning_rate": 0.0005353054083435249, "loss": 3.4073, "step": 54715 }, { "epoch": 3.7178964533224623, "grad_norm": 0.7337539196014404, "learning_rate": 0.0005352629433346923, "loss": 3.4736, "step": 54720 }, { "epoch": 3.718236173393124, "grad_norm": 0.8920583128929138, "learning_rate": 0.0005352204783258595, "loss": 3.4496, "step": 54725 }, { "epoch": 3.718575893463786, "grad_norm": 0.9275780320167542, "learning_rate": 0.0005351780133170267, "loss": 3.5347, "step": 54730 }, { "epoch": 3.7189156135344477, "grad_norm": 0.816299557685852, "learning_rate": 0.0005351355483081942, "loss": 3.2479, "step": 54735 }, { "epoch": 3.7192553336051093, "grad_norm": 1.1205774545669556, "learning_rate": 0.0005350930832993614, "loss": 3.6161, "step": 54740 }, { "epoch": 3.7195950536757714, "grad_norm": 0.8610690832138062, "learning_rate": 0.0005350506182905286, "loss": 3.5051, "step": 54745 }, { "epoch": 3.719934773746433, "grad_norm": 0.8933074474334717, "learning_rate": 0.0005350081532816959, "loss": 3.622, "step": 54750 }, { "epoch": 3.7202744938170946, "grad_norm": 0.6731392741203308, "learning_rate": 0.0005349656882728632, "loss": 3.5856, "step": 54755 }, { "epoch": 3.7206142138877567, "grad_norm": 0.6292549967765808, "learning_rate": 0.0005349232232640304, "loss": 3.5979, "step": 54760 }, { "epoch": 3.7209539339584183, "grad_norm": 0.749682605266571, "learning_rate": 0.0005348807582551977, "loss": 3.3863, "step": 54765 }, { "epoch": 3.72129365402908, "grad_norm": 0.8586956262588501, "learning_rate": 0.0005348382932463651, "loss": 3.4992, "step": 54770 }, { "epoch": 3.721633374099742, "grad_norm": 0.9240660667419434, "learning_rate": 0.0005347958282375323, "loss": 3.3668, "step": 54775 }, { "epoch": 3.7219730941704037, "grad_norm": 0.9373857378959656, "learning_rate": 0.0005347533632286996, "loss": 3.6412, "step": 54780 }, { "epoch": 3.7223128142410653, "grad_norm": 0.8075558543205261, "learning_rate": 0.0005347108982198668, "loss": 3.3116, "step": 54785 }, { "epoch": 3.7226525343117274, "grad_norm": 0.8918603658676147, "learning_rate": 0.0005346684332110341, "loss": 3.7121, "step": 54790 }, { "epoch": 3.722992254382389, "grad_norm": 1.1032791137695312, "learning_rate": 0.0005346259682022014, "loss": 3.581, "step": 54795 }, { "epoch": 3.7233319744530506, "grad_norm": 0.7940346002578735, "learning_rate": 0.0005345835031933686, "loss": 3.2447, "step": 54800 }, { "epoch": 3.7236716945237127, "grad_norm": 1.2446506023406982, "learning_rate": 0.000534541038184536, "loss": 3.4916, "step": 54805 }, { "epoch": 3.7240114145943743, "grad_norm": 1.3039567470550537, "learning_rate": 0.0005344985731757033, "loss": 3.4576, "step": 54810 }, { "epoch": 3.724351134665036, "grad_norm": 0.8366376161575317, "learning_rate": 0.0005344561081668705, "loss": 3.3213, "step": 54815 }, { "epoch": 3.724690854735698, "grad_norm": 0.9179151058197021, "learning_rate": 0.0005344136431580378, "loss": 3.7871, "step": 54820 }, { "epoch": 3.7250305748063597, "grad_norm": 0.9598586559295654, "learning_rate": 0.0005343711781492051, "loss": 3.348, "step": 54825 }, { "epoch": 3.7253702948770213, "grad_norm": 1.0367140769958496, "learning_rate": 0.0005343287131403723, "loss": 3.3409, "step": 54830 }, { "epoch": 3.725710014947683, "grad_norm": 0.8543326258659363, "learning_rate": 0.0005342862481315395, "loss": 3.4418, "step": 54835 }, { "epoch": 3.726049735018345, "grad_norm": 0.9478472471237183, "learning_rate": 0.000534243783122707, "loss": 3.4504, "step": 54840 }, { "epoch": 3.7263894550890067, "grad_norm": 0.833121657371521, "learning_rate": 0.0005342013181138742, "loss": 3.5395, "step": 54845 }, { "epoch": 3.7267291751596683, "grad_norm": 0.7631881833076477, "learning_rate": 0.0005341588531050414, "loss": 3.2038, "step": 54850 }, { "epoch": 3.7270688952303304, "grad_norm": 0.8639206290245056, "learning_rate": 0.0005341163880962088, "loss": 3.5329, "step": 54855 }, { "epoch": 3.727408615300992, "grad_norm": 0.7909950613975525, "learning_rate": 0.000534073923087376, "loss": 3.6553, "step": 54860 }, { "epoch": 3.7277483353716536, "grad_norm": 0.7943188548088074, "learning_rate": 0.0005340314580785432, "loss": 3.7804, "step": 54865 }, { "epoch": 3.7280880554423157, "grad_norm": 0.9541133642196655, "learning_rate": 0.0005339889930697106, "loss": 3.5663, "step": 54870 }, { "epoch": 3.7284277755129773, "grad_norm": 0.8442268967628479, "learning_rate": 0.0005339465280608779, "loss": 3.3964, "step": 54875 }, { "epoch": 3.728767495583639, "grad_norm": 0.8577226400375366, "learning_rate": 0.0005339040630520451, "loss": 3.448, "step": 54880 }, { "epoch": 3.7291072156543006, "grad_norm": 0.9387079477310181, "learning_rate": 0.0005338615980432124, "loss": 3.5225, "step": 54885 }, { "epoch": 3.7294469357249627, "grad_norm": 1.030052900314331, "learning_rate": 0.0005338191330343797, "loss": 3.6208, "step": 54890 }, { "epoch": 3.7297866557956243, "grad_norm": 1.0430415868759155, "learning_rate": 0.0005337766680255469, "loss": 3.671, "step": 54895 }, { "epoch": 3.730126375866286, "grad_norm": 1.1088014841079712, "learning_rate": 0.0005337342030167142, "loss": 3.4393, "step": 54900 }, { "epoch": 3.730466095936948, "grad_norm": 0.9064332246780396, "learning_rate": 0.0005336917380078815, "loss": 3.3686, "step": 54905 }, { "epoch": 3.7308058160076096, "grad_norm": 0.8930426239967346, "learning_rate": 0.0005336492729990488, "loss": 3.5592, "step": 54910 }, { "epoch": 3.7311455360782713, "grad_norm": 0.9819005131721497, "learning_rate": 0.0005336068079902161, "loss": 3.4059, "step": 54915 }, { "epoch": 3.7314852561489333, "grad_norm": 0.8169376254081726, "learning_rate": 0.0005335643429813834, "loss": 3.405, "step": 54920 }, { "epoch": 3.731824976219595, "grad_norm": 0.864361584186554, "learning_rate": 0.0005335218779725506, "loss": 3.5455, "step": 54925 }, { "epoch": 3.7321646962902566, "grad_norm": 0.9856473803520203, "learning_rate": 0.0005334794129637179, "loss": 3.5071, "step": 54930 }, { "epoch": 3.7325044163609187, "grad_norm": 0.8088100552558899, "learning_rate": 0.0005334369479548851, "loss": 3.596, "step": 54935 }, { "epoch": 3.7328441364315803, "grad_norm": 0.8745824098587036, "learning_rate": 0.0005333944829460524, "loss": 3.4903, "step": 54940 }, { "epoch": 3.733183856502242, "grad_norm": 1.0077687501907349, "learning_rate": 0.0005333520179372198, "loss": 3.3983, "step": 54945 }, { "epoch": 3.733523576572904, "grad_norm": 1.019166350364685, "learning_rate": 0.000533309552928387, "loss": 3.3717, "step": 54950 }, { "epoch": 3.7338632966435656, "grad_norm": 1.2827134132385254, "learning_rate": 0.0005332670879195543, "loss": 3.6747, "step": 54955 }, { "epoch": 3.7342030167142273, "grad_norm": 0.8667526245117188, "learning_rate": 0.0005332246229107216, "loss": 3.5614, "step": 54960 }, { "epoch": 3.7345427367848893, "grad_norm": 0.9789865612983704, "learning_rate": 0.0005331821579018888, "loss": 3.4407, "step": 54965 }, { "epoch": 3.734882456855551, "grad_norm": 0.7592118978500366, "learning_rate": 0.000533139692893056, "loss": 3.5635, "step": 54970 }, { "epoch": 3.7352221769262126, "grad_norm": 0.9635940194129944, "learning_rate": 0.0005330972278842234, "loss": 3.6552, "step": 54975 }, { "epoch": 3.7355618969968747, "grad_norm": 0.8601370453834534, "learning_rate": 0.0005330547628753907, "loss": 3.5822, "step": 54980 }, { "epoch": 3.7359016170675363, "grad_norm": 0.7420938014984131, "learning_rate": 0.0005330122978665579, "loss": 3.6643, "step": 54985 }, { "epoch": 3.736241337138198, "grad_norm": 0.8298119306564331, "learning_rate": 0.0005329698328577253, "loss": 3.4935, "step": 54990 }, { "epoch": 3.73658105720886, "grad_norm": 0.8837483525276184, "learning_rate": 0.0005329273678488925, "loss": 3.6411, "step": 54995 }, { "epoch": 3.7369207772795217, "grad_norm": 1.6148351430892944, "learning_rate": 0.0005328849028400597, "loss": 3.4252, "step": 55000 }, { "epoch": 3.7372604973501833, "grad_norm": 0.7759456038475037, "learning_rate": 0.0005328424378312271, "loss": 3.4859, "step": 55005 }, { "epoch": 3.7376002174208454, "grad_norm": 0.9428431391716003, "learning_rate": 0.0005327999728223943, "loss": 3.5675, "step": 55010 }, { "epoch": 3.737939937491507, "grad_norm": 0.8304556012153625, "learning_rate": 0.0005327575078135616, "loss": 3.7123, "step": 55015 }, { "epoch": 3.7382796575621686, "grad_norm": 0.9272135496139526, "learning_rate": 0.000532715042804729, "loss": 3.7683, "step": 55020 }, { "epoch": 3.7386193776328307, "grad_norm": 1.6169096231460571, "learning_rate": 0.0005326725777958962, "loss": 3.4087, "step": 55025 }, { "epoch": 3.7389590977034923, "grad_norm": 1.0453542470932007, "learning_rate": 0.0005326301127870635, "loss": 3.497, "step": 55030 }, { "epoch": 3.739298817774154, "grad_norm": 0.9136646389961243, "learning_rate": 0.0005325876477782307, "loss": 3.3562, "step": 55035 }, { "epoch": 3.739638537844816, "grad_norm": 1.4324803352355957, "learning_rate": 0.000532545182769398, "loss": 3.5394, "step": 55040 }, { "epoch": 3.7399782579154777, "grad_norm": 0.9308119416236877, "learning_rate": 0.0005325027177605654, "loss": 3.0882, "step": 55045 }, { "epoch": 3.7403179779861393, "grad_norm": 0.9755516648292542, "learning_rate": 0.0005324602527517326, "loss": 3.3318, "step": 55050 }, { "epoch": 3.7406576980568014, "grad_norm": 0.8834554553031921, "learning_rate": 0.0005324177877428999, "loss": 3.6192, "step": 55055 }, { "epoch": 3.740997418127463, "grad_norm": 1.0033539533615112, "learning_rate": 0.0005323753227340672, "loss": 3.615, "step": 55060 }, { "epoch": 3.7413371381981246, "grad_norm": 0.9126165509223938, "learning_rate": 0.0005323328577252344, "loss": 3.5484, "step": 55065 }, { "epoch": 3.7416768582687867, "grad_norm": 0.7196186184883118, "learning_rate": 0.0005322903927164016, "loss": 3.6829, "step": 55070 }, { "epoch": 3.7420165783394483, "grad_norm": 0.8776150941848755, "learning_rate": 0.000532247927707569, "loss": 3.8863, "step": 55075 }, { "epoch": 3.74235629841011, "grad_norm": 0.7584242820739746, "learning_rate": 0.0005322054626987363, "loss": 3.3319, "step": 55080 }, { "epoch": 3.742696018480772, "grad_norm": 0.8096224665641785, "learning_rate": 0.0005321629976899035, "loss": 3.3404, "step": 55085 }, { "epoch": 3.7430357385514337, "grad_norm": 1.1541545391082764, "learning_rate": 0.0005321205326810709, "loss": 3.3947, "step": 55090 }, { "epoch": 3.7433754586220953, "grad_norm": 0.8094286322593689, "learning_rate": 0.0005320780676722381, "loss": 3.5208, "step": 55095 }, { "epoch": 3.7437151786927574, "grad_norm": 1.1090295314788818, "learning_rate": 0.0005320356026634053, "loss": 3.4305, "step": 55100 }, { "epoch": 3.744054898763419, "grad_norm": 1.0830841064453125, "learning_rate": 0.0005319931376545727, "loss": 3.2967, "step": 55105 }, { "epoch": 3.7443946188340806, "grad_norm": 0.9869200587272644, "learning_rate": 0.0005319506726457399, "loss": 3.4402, "step": 55110 }, { "epoch": 3.7447343389047427, "grad_norm": 0.9945034980773926, "learning_rate": 0.0005319082076369072, "loss": 3.6205, "step": 55115 }, { "epoch": 3.7450740589754044, "grad_norm": 0.7808508276939392, "learning_rate": 0.0005318657426280746, "loss": 3.1969, "step": 55120 }, { "epoch": 3.745413779046066, "grad_norm": 1.0889617204666138, "learning_rate": 0.0005318232776192418, "loss": 3.085, "step": 55125 }, { "epoch": 3.745753499116728, "grad_norm": 0.8158285617828369, "learning_rate": 0.000531780812610409, "loss": 3.5704, "step": 55130 }, { "epoch": 3.7460932191873897, "grad_norm": 0.8806131482124329, "learning_rate": 0.0005317383476015763, "loss": 3.5143, "step": 55135 }, { "epoch": 3.7464329392580513, "grad_norm": 0.685218095779419, "learning_rate": 0.0005316958825927436, "loss": 3.3815, "step": 55140 }, { "epoch": 3.7467726593287134, "grad_norm": 0.9201647639274597, "learning_rate": 0.0005316534175839108, "loss": 3.2945, "step": 55145 }, { "epoch": 3.747112379399375, "grad_norm": 1.0250530242919922, "learning_rate": 0.0005316109525750782, "loss": 3.5713, "step": 55150 }, { "epoch": 3.7474520994700367, "grad_norm": 1.0740998983383179, "learning_rate": 0.0005315684875662455, "loss": 3.5787, "step": 55155 }, { "epoch": 3.7477918195406987, "grad_norm": 0.891015887260437, "learning_rate": 0.0005315260225574127, "loss": 3.4705, "step": 55160 }, { "epoch": 3.7481315396113604, "grad_norm": 0.862984299659729, "learning_rate": 0.00053148355754858, "loss": 3.6164, "step": 55165 }, { "epoch": 3.748471259682022, "grad_norm": 0.8843459486961365, "learning_rate": 0.0005314410925397473, "loss": 3.3594, "step": 55170 }, { "epoch": 3.7488109797526836, "grad_norm": 0.9694676995277405, "learning_rate": 0.0005313986275309145, "loss": 3.444, "step": 55175 }, { "epoch": 3.7491506998233457, "grad_norm": 1.276371717453003, "learning_rate": 0.0005313561625220818, "loss": 3.5675, "step": 55180 }, { "epoch": 3.7494904198940073, "grad_norm": 0.8703802824020386, "learning_rate": 0.0005313136975132491, "loss": 3.3623, "step": 55185 }, { "epoch": 3.749830139964669, "grad_norm": 0.814553439617157, "learning_rate": 0.0005312712325044164, "loss": 3.6685, "step": 55190 }, { "epoch": 3.750169860035331, "grad_norm": 0.9540380835533142, "learning_rate": 0.0005312287674955837, "loss": 3.2456, "step": 55195 }, { "epoch": 3.7505095801059927, "grad_norm": 0.9192229509353638, "learning_rate": 0.0005311863024867509, "loss": 3.3748, "step": 55200 }, { "epoch": 3.7508493001766543, "grad_norm": 0.768900990486145, "learning_rate": 0.0005311438374779182, "loss": 3.6243, "step": 55205 }, { "epoch": 3.7511890202473164, "grad_norm": 1.1297773122787476, "learning_rate": 0.0005311013724690855, "loss": 3.5125, "step": 55210 }, { "epoch": 3.751528740317978, "grad_norm": 0.7569725513458252, "learning_rate": 0.0005310589074602527, "loss": 3.476, "step": 55215 }, { "epoch": 3.7518684603886396, "grad_norm": 1.0698707103729248, "learning_rate": 0.00053101644245142, "loss": 3.3171, "step": 55220 }, { "epoch": 3.7522081804593013, "grad_norm": 0.9384840726852417, "learning_rate": 0.0005309739774425874, "loss": 3.4894, "step": 55225 }, { "epoch": 3.7525479005299633, "grad_norm": 0.7603185176849365, "learning_rate": 0.0005309315124337546, "loss": 3.7406, "step": 55230 }, { "epoch": 3.752887620600625, "grad_norm": 0.8559585213661194, "learning_rate": 0.0005308890474249218, "loss": 3.5188, "step": 55235 }, { "epoch": 3.7532273406712866, "grad_norm": 0.769366979598999, "learning_rate": 0.0005308465824160892, "loss": 3.2712, "step": 55240 }, { "epoch": 3.7535670607419487, "grad_norm": 1.2153433561325073, "learning_rate": 0.0005308041174072564, "loss": 3.815, "step": 55245 }, { "epoch": 3.7539067808126103, "grad_norm": 0.8166508078575134, "learning_rate": 0.0005307616523984236, "loss": 3.7103, "step": 55250 }, { "epoch": 3.754246500883272, "grad_norm": 0.6838829517364502, "learning_rate": 0.0005307191873895911, "loss": 3.3757, "step": 55255 }, { "epoch": 3.754586220953934, "grad_norm": 1.6971923112869263, "learning_rate": 0.0005306767223807583, "loss": 3.5624, "step": 55260 }, { "epoch": 3.7549259410245956, "grad_norm": 0.8775597214698792, "learning_rate": 0.0005306342573719255, "loss": 3.6253, "step": 55265 }, { "epoch": 3.7552656610952573, "grad_norm": 0.8145177960395813, "learning_rate": 0.0005305917923630929, "loss": 3.5181, "step": 55270 }, { "epoch": 3.7556053811659194, "grad_norm": 0.8092663884162903, "learning_rate": 0.0005305493273542601, "loss": 3.5185, "step": 55275 }, { "epoch": 3.755945101236581, "grad_norm": 0.9875311255455017, "learning_rate": 0.0005305068623454273, "loss": 3.4841, "step": 55280 }, { "epoch": 3.7562848213072426, "grad_norm": 0.9346238970756531, "learning_rate": 0.0005304643973365946, "loss": 3.5433, "step": 55285 }, { "epoch": 3.7566245413779047, "grad_norm": 0.8469724059104919, "learning_rate": 0.000530421932327762, "loss": 3.5228, "step": 55290 }, { "epoch": 3.7569642614485663, "grad_norm": 0.9983519911766052, "learning_rate": 0.0005303794673189292, "loss": 3.5077, "step": 55295 }, { "epoch": 3.757303981519228, "grad_norm": 1.0515414476394653, "learning_rate": 0.0005303370023100965, "loss": 3.6276, "step": 55300 }, { "epoch": 3.75764370158989, "grad_norm": 0.9465078115463257, "learning_rate": 0.0005302945373012638, "loss": 3.5428, "step": 55305 }, { "epoch": 3.7579834216605517, "grad_norm": 1.003580093383789, "learning_rate": 0.000530252072292431, "loss": 3.6825, "step": 55310 }, { "epoch": 3.7583231417312133, "grad_norm": 0.9517799615859985, "learning_rate": 0.0005302096072835983, "loss": 3.6813, "step": 55315 }, { "epoch": 3.7586628618018754, "grad_norm": 0.922330915927887, "learning_rate": 0.0005301671422747655, "loss": 3.3574, "step": 55320 }, { "epoch": 3.759002581872537, "grad_norm": 0.7383594512939453, "learning_rate": 0.0005301246772659329, "loss": 3.6515, "step": 55325 }, { "epoch": 3.7593423019431986, "grad_norm": 1.2569410800933838, "learning_rate": 0.0005300822122571002, "loss": 3.5015, "step": 55330 }, { "epoch": 3.7596820220138607, "grad_norm": 0.7399452328681946, "learning_rate": 0.0005300397472482674, "loss": 3.43, "step": 55335 }, { "epoch": 3.7600217420845223, "grad_norm": 0.7153240442276001, "learning_rate": 0.0005299972822394347, "loss": 3.6163, "step": 55340 }, { "epoch": 3.760361462155184, "grad_norm": 0.9509831070899963, "learning_rate": 0.000529954817230602, "loss": 3.6635, "step": 55345 }, { "epoch": 3.760701182225846, "grad_norm": 0.9036169648170471, "learning_rate": 0.0005299123522217692, "loss": 3.28, "step": 55350 }, { "epoch": 3.7610409022965077, "grad_norm": 0.8787235617637634, "learning_rate": 0.0005298698872129365, "loss": 3.7538, "step": 55355 }, { "epoch": 3.7613806223671693, "grad_norm": 0.9020276069641113, "learning_rate": 0.0005298274222041039, "loss": 3.7184, "step": 55360 }, { "epoch": 3.7617203424378314, "grad_norm": 0.8400426506996155, "learning_rate": 0.0005297849571952711, "loss": 3.5743, "step": 55365 }, { "epoch": 3.762060062508493, "grad_norm": 1.2143874168395996, "learning_rate": 0.0005297424921864385, "loss": 3.2048, "step": 55370 }, { "epoch": 3.7623997825791546, "grad_norm": 0.9159396290779114, "learning_rate": 0.0005297000271776057, "loss": 3.5762, "step": 55375 }, { "epoch": 3.7627395026498167, "grad_norm": 0.8436747193336487, "learning_rate": 0.0005296575621687729, "loss": 3.5981, "step": 55380 }, { "epoch": 3.7630792227204783, "grad_norm": 0.7627342343330383, "learning_rate": 0.0005296150971599402, "loss": 3.3867, "step": 55385 }, { "epoch": 3.76341894279114, "grad_norm": 0.8818159699440002, "learning_rate": 0.0005295726321511075, "loss": 3.5034, "step": 55390 }, { "epoch": 3.763758662861802, "grad_norm": 1.0074577331542969, "learning_rate": 0.0005295301671422748, "loss": 4.0743, "step": 55395 }, { "epoch": 3.7640983829324637, "grad_norm": 0.8862717747688293, "learning_rate": 0.0005294877021334421, "loss": 3.4943, "step": 55400 }, { "epoch": 3.7644381030031253, "grad_norm": 0.7939552068710327, "learning_rate": 0.0005294452371246094, "loss": 3.3237, "step": 55405 }, { "epoch": 3.7647778230737874, "grad_norm": 0.7286713719367981, "learning_rate": 0.0005294027721157766, "loss": 3.5104, "step": 55410 }, { "epoch": 3.765117543144449, "grad_norm": 0.7083745002746582, "learning_rate": 0.0005293603071069439, "loss": 3.4383, "step": 55415 }, { "epoch": 3.7654572632151107, "grad_norm": 1.279093861579895, "learning_rate": 0.0005293178420981111, "loss": 3.6801, "step": 55420 }, { "epoch": 3.7657969832857727, "grad_norm": 0.7574336528778076, "learning_rate": 0.0005292753770892784, "loss": 3.6631, "step": 55425 }, { "epoch": 3.7661367033564344, "grad_norm": 0.9487844109535217, "learning_rate": 0.0005292329120804458, "loss": 3.4574, "step": 55430 }, { "epoch": 3.766476423427096, "grad_norm": 0.9485490322113037, "learning_rate": 0.000529190447071613, "loss": 3.7347, "step": 55435 }, { "epoch": 3.766816143497758, "grad_norm": 0.8379043340682983, "learning_rate": 0.0005291479820627803, "loss": 3.4706, "step": 55440 }, { "epoch": 3.7671558635684197, "grad_norm": 0.9543617963790894, "learning_rate": 0.0005291055170539476, "loss": 3.4108, "step": 55445 }, { "epoch": 3.7674955836390813, "grad_norm": 0.9282843470573425, "learning_rate": 0.0005290630520451148, "loss": 3.6889, "step": 55450 }, { "epoch": 3.7678353037097434, "grad_norm": 0.8928327560424805, "learning_rate": 0.000529020587036282, "loss": 3.6793, "step": 55455 }, { "epoch": 3.768175023780405, "grad_norm": 1.122862696647644, "learning_rate": 0.0005289781220274494, "loss": 3.422, "step": 55460 }, { "epoch": 3.7685147438510667, "grad_norm": 0.8912630677223206, "learning_rate": 0.0005289356570186167, "loss": 3.2884, "step": 55465 }, { "epoch": 3.7688544639217287, "grad_norm": 1.162979006767273, "learning_rate": 0.000528893192009784, "loss": 3.3164, "step": 55470 }, { "epoch": 3.7691941839923904, "grad_norm": 1.2106269598007202, "learning_rate": 0.0005288507270009513, "loss": 3.4485, "step": 55475 }, { "epoch": 3.769533904063052, "grad_norm": 0.845757782459259, "learning_rate": 0.0005288082619921185, "loss": 3.4758, "step": 55480 }, { "epoch": 3.769873624133714, "grad_norm": 0.8576749563217163, "learning_rate": 0.0005287657969832857, "loss": 3.512, "step": 55485 }, { "epoch": 3.7702133442043757, "grad_norm": 0.7643755674362183, "learning_rate": 0.0005287233319744531, "loss": 3.5678, "step": 55490 }, { "epoch": 3.7705530642750373, "grad_norm": 0.8214532136917114, "learning_rate": 0.0005286808669656203, "loss": 3.576, "step": 55495 }, { "epoch": 3.7708927843456994, "grad_norm": 0.8651350736618042, "learning_rate": 0.0005286384019567876, "loss": 3.5838, "step": 55500 }, { "epoch": 3.771232504416361, "grad_norm": 0.9427428841590881, "learning_rate": 0.000528595936947955, "loss": 3.3835, "step": 55505 }, { "epoch": 3.7715722244870227, "grad_norm": 0.887864351272583, "learning_rate": 0.0005285534719391222, "loss": 3.353, "step": 55510 }, { "epoch": 3.7719119445576843, "grad_norm": 0.9157028198242188, "learning_rate": 0.0005285110069302894, "loss": 3.7803, "step": 55515 }, { "epoch": 3.7722516646283464, "grad_norm": 0.6996296644210815, "learning_rate": 0.0005284685419214567, "loss": 3.696, "step": 55520 }, { "epoch": 3.772591384699008, "grad_norm": 0.986216127872467, "learning_rate": 0.000528426076912624, "loss": 3.6178, "step": 55525 }, { "epoch": 3.7729311047696696, "grad_norm": 0.7205371260643005, "learning_rate": 0.0005283836119037912, "loss": 3.2717, "step": 55530 }, { "epoch": 3.7732708248403317, "grad_norm": 0.7644385099411011, "learning_rate": 0.0005283411468949586, "loss": 3.373, "step": 55535 }, { "epoch": 3.7736105449109933, "grad_norm": 0.80385822057724, "learning_rate": 0.0005282986818861259, "loss": 3.4645, "step": 55540 }, { "epoch": 3.773950264981655, "grad_norm": 0.9187750816345215, "learning_rate": 0.0005282562168772931, "loss": 3.6276, "step": 55545 }, { "epoch": 3.774289985052317, "grad_norm": 0.8756825923919678, "learning_rate": 0.0005282137518684604, "loss": 3.6302, "step": 55550 }, { "epoch": 3.7746297051229787, "grad_norm": 1.081735372543335, "learning_rate": 0.0005281712868596277, "loss": 3.5186, "step": 55555 }, { "epoch": 3.7749694251936403, "grad_norm": 1.0715614557266235, "learning_rate": 0.0005281288218507949, "loss": 3.5001, "step": 55560 }, { "epoch": 3.775309145264302, "grad_norm": 0.8683094382286072, "learning_rate": 0.0005280863568419622, "loss": 3.2655, "step": 55565 }, { "epoch": 3.775648865334964, "grad_norm": 0.916874349117279, "learning_rate": 0.0005280438918331295, "loss": 3.4946, "step": 55570 }, { "epoch": 3.7759885854056257, "grad_norm": 0.861598551273346, "learning_rate": 0.0005280014268242968, "loss": 3.8905, "step": 55575 }, { "epoch": 3.7763283054762873, "grad_norm": 0.8323996067047119, "learning_rate": 0.0005279589618154641, "loss": 3.4692, "step": 55580 }, { "epoch": 3.7766680255469494, "grad_norm": 1.052626371383667, "learning_rate": 0.0005279164968066313, "loss": 3.5184, "step": 55585 }, { "epoch": 3.777007745617611, "grad_norm": 0.983048677444458, "learning_rate": 0.0005278740317977986, "loss": 3.6364, "step": 55590 }, { "epoch": 3.7773474656882726, "grad_norm": 1.1843783855438232, "learning_rate": 0.0005278315667889659, "loss": 3.6942, "step": 55595 }, { "epoch": 3.7776871857589347, "grad_norm": 0.8867509365081787, "learning_rate": 0.0005277891017801331, "loss": 3.6293, "step": 55600 }, { "epoch": 3.7780269058295963, "grad_norm": 0.9513036012649536, "learning_rate": 0.0005277466367713005, "loss": 3.4245, "step": 55605 }, { "epoch": 3.778366625900258, "grad_norm": 0.8840452432632446, "learning_rate": 0.0005277041717624678, "loss": 3.5631, "step": 55610 }, { "epoch": 3.77870634597092, "grad_norm": 0.6065431833267212, "learning_rate": 0.000527661706753635, "loss": 3.6214, "step": 55615 }, { "epoch": 3.7790460660415817, "grad_norm": 0.8780247569084167, "learning_rate": 0.0005276192417448022, "loss": 3.5385, "step": 55620 }, { "epoch": 3.7793857861122433, "grad_norm": 0.758277177810669, "learning_rate": 0.0005275767767359696, "loss": 3.6123, "step": 55625 }, { "epoch": 3.7797255061829054, "grad_norm": 0.9010626673698425, "learning_rate": 0.0005275343117271368, "loss": 3.7336, "step": 55630 }, { "epoch": 3.780065226253567, "grad_norm": 0.7693651914596558, "learning_rate": 0.000527491846718304, "loss": 3.7174, "step": 55635 }, { "epoch": 3.7804049463242286, "grad_norm": 0.8228757977485657, "learning_rate": 0.0005274493817094715, "loss": 3.7287, "step": 55640 }, { "epoch": 3.7807446663948907, "grad_norm": 0.6740391850471497, "learning_rate": 0.0005274069167006387, "loss": 3.4981, "step": 55645 }, { "epoch": 3.7810843864655523, "grad_norm": 0.8589305281639099, "learning_rate": 0.0005273644516918059, "loss": 3.4607, "step": 55650 }, { "epoch": 3.781424106536214, "grad_norm": 0.7811538577079773, "learning_rate": 0.0005273219866829733, "loss": 3.6053, "step": 55655 }, { "epoch": 3.781763826606876, "grad_norm": 0.9331864714622498, "learning_rate": 0.0005272795216741405, "loss": 3.5713, "step": 55660 }, { "epoch": 3.7821035466775377, "grad_norm": 1.2087920904159546, "learning_rate": 0.0005272370566653077, "loss": 3.3601, "step": 55665 }, { "epoch": 3.7824432667481993, "grad_norm": 0.8008260726928711, "learning_rate": 0.0005271945916564752, "loss": 3.705, "step": 55670 }, { "epoch": 3.7827829868188614, "grad_norm": 0.9629670977592468, "learning_rate": 0.0005271521266476424, "loss": 3.4364, "step": 55675 }, { "epoch": 3.783122706889523, "grad_norm": 0.7897980809211731, "learning_rate": 0.0005271096616388096, "loss": 3.5066, "step": 55680 }, { "epoch": 3.7834624269601846, "grad_norm": 1.0639491081237793, "learning_rate": 0.0005270671966299769, "loss": 3.5499, "step": 55685 }, { "epoch": 3.7838021470308467, "grad_norm": 1.1327036619186401, "learning_rate": 0.0005270247316211442, "loss": 3.3359, "step": 55690 }, { "epoch": 3.7841418671015083, "grad_norm": 0.9999335408210754, "learning_rate": 0.0005269822666123114, "loss": 3.831, "step": 55695 }, { "epoch": 3.78448158717217, "grad_norm": 0.8211253881454468, "learning_rate": 0.0005269398016034787, "loss": 3.3374, "step": 55700 }, { "epoch": 3.784821307242832, "grad_norm": 0.9879361987113953, "learning_rate": 0.0005268973365946461, "loss": 3.4373, "step": 55705 }, { "epoch": 3.7851610273134937, "grad_norm": 0.8416990637779236, "learning_rate": 0.0005268548715858134, "loss": 3.528, "step": 55710 }, { "epoch": 3.7855007473841553, "grad_norm": 0.9534209966659546, "learning_rate": 0.0005268124065769806, "loss": 3.4609, "step": 55715 }, { "epoch": 3.7858404674548174, "grad_norm": 0.9439724683761597, "learning_rate": 0.0005267699415681478, "loss": 3.5412, "step": 55720 }, { "epoch": 3.786180187525479, "grad_norm": 0.8800684809684753, "learning_rate": 0.0005267274765593152, "loss": 3.4106, "step": 55725 }, { "epoch": 3.7865199075961407, "grad_norm": 0.9825429916381836, "learning_rate": 0.0005266850115504824, "loss": 3.788, "step": 55730 }, { "epoch": 3.7868596276668027, "grad_norm": 0.7447264790534973, "learning_rate": 0.0005266425465416496, "loss": 3.3975, "step": 55735 }, { "epoch": 3.7871993477374644, "grad_norm": 0.882600724697113, "learning_rate": 0.0005266000815328171, "loss": 3.6908, "step": 55740 }, { "epoch": 3.787539067808126, "grad_norm": 0.7813102006912231, "learning_rate": 0.0005265576165239843, "loss": 3.5993, "step": 55745 }, { "epoch": 3.787878787878788, "grad_norm": 0.9392270445823669, "learning_rate": 0.0005265151515151515, "loss": 3.4765, "step": 55750 }, { "epoch": 3.7882185079494497, "grad_norm": 0.8283581137657166, "learning_rate": 0.0005264726865063189, "loss": 3.6835, "step": 55755 }, { "epoch": 3.7885582280201113, "grad_norm": 0.7559888958930969, "learning_rate": 0.0005264302214974861, "loss": 3.485, "step": 55760 }, { "epoch": 3.7888979480907734, "grad_norm": 0.7605040669441223, "learning_rate": 0.0005263877564886533, "loss": 3.4826, "step": 55765 }, { "epoch": 3.789237668161435, "grad_norm": 0.8659229278564453, "learning_rate": 0.0005263452914798206, "loss": 3.6602, "step": 55770 }, { "epoch": 3.7895773882320967, "grad_norm": 0.910800039768219, "learning_rate": 0.000526302826470988, "loss": 3.556, "step": 55775 }, { "epoch": 3.7899171083027587, "grad_norm": 1.0667128562927246, "learning_rate": 0.0005262603614621552, "loss": 3.6387, "step": 55780 }, { "epoch": 3.7902568283734204, "grad_norm": 0.8082834482192993, "learning_rate": 0.0005262178964533225, "loss": 3.8618, "step": 55785 }, { "epoch": 3.790596548444082, "grad_norm": 0.8752985596656799, "learning_rate": 0.0005261754314444898, "loss": 3.4569, "step": 55790 }, { "epoch": 3.790936268514744, "grad_norm": 0.7948899269104004, "learning_rate": 0.000526132966435657, "loss": 3.5214, "step": 55795 }, { "epoch": 3.7912759885854057, "grad_norm": 0.9071364998817444, "learning_rate": 0.0005260905014268243, "loss": 3.2972, "step": 55800 }, { "epoch": 3.7916157086560673, "grad_norm": 1.055819034576416, "learning_rate": 0.0005260480364179915, "loss": 3.4413, "step": 55805 }, { "epoch": 3.7919554287267294, "grad_norm": 0.7746694087982178, "learning_rate": 0.0005260055714091589, "loss": 3.4812, "step": 55810 }, { "epoch": 3.792295148797391, "grad_norm": 0.8002044558525085, "learning_rate": 0.0005259631064003262, "loss": 3.4137, "step": 55815 }, { "epoch": 3.7926348688680527, "grad_norm": 0.8296111226081848, "learning_rate": 0.0005259206413914934, "loss": 3.7835, "step": 55820 }, { "epoch": 3.7929745889387148, "grad_norm": 0.9452680945396423, "learning_rate": 0.0005258781763826607, "loss": 3.4387, "step": 55825 }, { "epoch": 3.7933143090093764, "grad_norm": 0.8038287162780762, "learning_rate": 0.000525835711373828, "loss": 3.669, "step": 55830 }, { "epoch": 3.793654029080038, "grad_norm": 1.1663395166397095, "learning_rate": 0.0005257932463649952, "loss": 3.4569, "step": 55835 }, { "epoch": 3.7939937491507, "grad_norm": 1.2264289855957031, "learning_rate": 0.0005257507813561625, "loss": 3.543, "step": 55840 }, { "epoch": 3.7943334692213617, "grad_norm": 0.8041330575942993, "learning_rate": 0.0005257083163473299, "loss": 3.5046, "step": 55845 }, { "epoch": 3.7946731892920234, "grad_norm": 0.918626606464386, "learning_rate": 0.0005256658513384971, "loss": 3.3014, "step": 55850 }, { "epoch": 3.795012909362685, "grad_norm": 0.9897686243057251, "learning_rate": 0.0005256233863296644, "loss": 3.4, "step": 55855 }, { "epoch": 3.795352629433347, "grad_norm": 0.8054947853088379, "learning_rate": 0.0005255809213208317, "loss": 3.5353, "step": 55860 }, { "epoch": 3.7956923495040087, "grad_norm": 0.9137596487998962, "learning_rate": 0.0005255384563119989, "loss": 3.4681, "step": 55865 }, { "epoch": 3.7960320695746703, "grad_norm": 0.9184945225715637, "learning_rate": 0.0005254959913031661, "loss": 3.6371, "step": 55870 }, { "epoch": 3.7963717896453324, "grad_norm": 1.0417685508728027, "learning_rate": 0.0005254535262943335, "loss": 3.3319, "step": 55875 }, { "epoch": 3.796711509715994, "grad_norm": 0.769091784954071, "learning_rate": 0.0005254110612855008, "loss": 3.6697, "step": 55880 }, { "epoch": 3.7970512297866557, "grad_norm": 0.8785198926925659, "learning_rate": 0.000525368596276668, "loss": 3.6896, "step": 55885 }, { "epoch": 3.7973909498573177, "grad_norm": 0.9343380928039551, "learning_rate": 0.0005253261312678354, "loss": 3.6572, "step": 55890 }, { "epoch": 3.7977306699279794, "grad_norm": 0.9476320743560791, "learning_rate": 0.0005252836662590026, "loss": 3.3724, "step": 55895 }, { "epoch": 3.798070389998641, "grad_norm": 1.531165599822998, "learning_rate": 0.0005252412012501698, "loss": 3.6377, "step": 55900 }, { "epoch": 3.7984101100693026, "grad_norm": 1.2817524671554565, "learning_rate": 0.0005251987362413372, "loss": 3.7212, "step": 55905 }, { "epoch": 3.7987498301399647, "grad_norm": 0.9495688080787659, "learning_rate": 0.0005251562712325044, "loss": 3.0868, "step": 55910 }, { "epoch": 3.7990895502106263, "grad_norm": 0.716037929058075, "learning_rate": 0.0005251138062236717, "loss": 3.4624, "step": 55915 }, { "epoch": 3.799429270281288, "grad_norm": 1.2376518249511719, "learning_rate": 0.000525071341214839, "loss": 3.6066, "step": 55920 }, { "epoch": 3.79976899035195, "grad_norm": 0.7719739675521851, "learning_rate": 0.0005250288762060063, "loss": 3.6685, "step": 55925 }, { "epoch": 3.8001087104226117, "grad_norm": 0.8896769881248474, "learning_rate": 0.0005249864111971735, "loss": 3.6196, "step": 55930 }, { "epoch": 3.8004484304932733, "grad_norm": 0.694242537021637, "learning_rate": 0.0005249439461883408, "loss": 3.7168, "step": 55935 }, { "epoch": 3.8007881505639354, "grad_norm": 0.826335072517395, "learning_rate": 0.0005249014811795081, "loss": 3.3261, "step": 55940 }, { "epoch": 3.801127870634597, "grad_norm": 0.9762623906135559, "learning_rate": 0.0005248590161706753, "loss": 3.5885, "step": 55945 }, { "epoch": 3.8014675907052586, "grad_norm": 0.9352214932441711, "learning_rate": 0.0005248165511618427, "loss": 3.5257, "step": 55950 }, { "epoch": 3.8018073107759207, "grad_norm": 0.826858639717102, "learning_rate": 0.00052477408615301, "loss": 3.1532, "step": 55955 }, { "epoch": 3.8021470308465823, "grad_norm": 0.7236220240592957, "learning_rate": 0.0005247316211441772, "loss": 3.534, "step": 55960 }, { "epoch": 3.802486750917244, "grad_norm": 0.8774734139442444, "learning_rate": 0.0005246891561353445, "loss": 3.4565, "step": 55965 }, { "epoch": 3.802826470987906, "grad_norm": 1.298330307006836, "learning_rate": 0.0005246466911265117, "loss": 3.6107, "step": 55970 }, { "epoch": 3.8031661910585677, "grad_norm": 1.1160427331924438, "learning_rate": 0.000524604226117679, "loss": 3.5668, "step": 55975 }, { "epoch": 3.8035059111292293, "grad_norm": 0.8108828663825989, "learning_rate": 0.0005245617611088463, "loss": 3.4341, "step": 55980 }, { "epoch": 3.8038456311998914, "grad_norm": 0.838599681854248, "learning_rate": 0.0005245192961000136, "loss": 3.6294, "step": 55985 }, { "epoch": 3.804185351270553, "grad_norm": 0.8860453367233276, "learning_rate": 0.0005244768310911809, "loss": 3.4263, "step": 55990 }, { "epoch": 3.8045250713412146, "grad_norm": 0.9518068432807922, "learning_rate": 0.0005244343660823482, "loss": 3.4511, "step": 55995 }, { "epoch": 3.8048647914118767, "grad_norm": 0.9574953317642212, "learning_rate": 0.0005243919010735154, "loss": 3.394, "step": 56000 }, { "epoch": 3.8052045114825384, "grad_norm": 0.9196003079414368, "learning_rate": 0.0005243494360646826, "loss": 3.3477, "step": 56005 }, { "epoch": 3.8055442315532, "grad_norm": 1.246414303779602, "learning_rate": 0.00052430697105585, "loss": 3.4007, "step": 56010 }, { "epoch": 3.805883951623862, "grad_norm": 0.7405118942260742, "learning_rate": 0.0005242645060470172, "loss": 3.446, "step": 56015 }, { "epoch": 3.8062236716945237, "grad_norm": 0.7583832740783691, "learning_rate": 0.0005242220410381845, "loss": 3.6075, "step": 56020 }, { "epoch": 3.8065633917651853, "grad_norm": 0.9404184222221375, "learning_rate": 0.0005241795760293519, "loss": 3.4668, "step": 56025 }, { "epoch": 3.8069031118358474, "grad_norm": 0.7318946123123169, "learning_rate": 0.0005241371110205191, "loss": 3.6657, "step": 56030 }, { "epoch": 3.807242831906509, "grad_norm": 0.8825598359107971, "learning_rate": 0.0005240946460116863, "loss": 3.3975, "step": 56035 }, { "epoch": 3.8075825519771707, "grad_norm": 0.6793971657752991, "learning_rate": 0.0005240521810028537, "loss": 3.5602, "step": 56040 }, { "epoch": 3.8079222720478327, "grad_norm": 0.831713855266571, "learning_rate": 0.0005240097159940209, "loss": 3.4235, "step": 56045 }, { "epoch": 3.8082619921184944, "grad_norm": 1.0683919191360474, "learning_rate": 0.0005239672509851882, "loss": 3.5368, "step": 56050 }, { "epoch": 3.808601712189156, "grad_norm": 1.0225378274917603, "learning_rate": 0.0005239247859763556, "loss": 3.6813, "step": 56055 }, { "epoch": 3.808941432259818, "grad_norm": 0.851654052734375, "learning_rate": 0.0005238823209675228, "loss": 3.3987, "step": 56060 }, { "epoch": 3.8092811523304797, "grad_norm": 0.7730807065963745, "learning_rate": 0.0005238398559586901, "loss": 3.6825, "step": 56065 }, { "epoch": 3.8096208724011413, "grad_norm": 0.9623993039131165, "learning_rate": 0.0005237973909498573, "loss": 3.5484, "step": 56070 }, { "epoch": 3.8099605924718034, "grad_norm": 1.2083771228790283, "learning_rate": 0.0005237549259410246, "loss": 3.7746, "step": 56075 }, { "epoch": 3.810300312542465, "grad_norm": 1.0333926677703857, "learning_rate": 0.0005237124609321919, "loss": 3.6467, "step": 56080 }, { "epoch": 3.8106400326131267, "grad_norm": 0.9033041596412659, "learning_rate": 0.0005236699959233591, "loss": 3.5035, "step": 56085 }, { "epoch": 3.8109797526837887, "grad_norm": 1.1437511444091797, "learning_rate": 0.0005236275309145265, "loss": 3.3793, "step": 56090 }, { "epoch": 3.8113194727544504, "grad_norm": 1.888250708580017, "learning_rate": 0.0005235850659056938, "loss": 3.6034, "step": 56095 }, { "epoch": 3.811659192825112, "grad_norm": 0.8686556816101074, "learning_rate": 0.000523542600896861, "loss": 3.6128, "step": 56100 }, { "epoch": 3.811998912895774, "grad_norm": 1.0905582904815674, "learning_rate": 0.0005235001358880282, "loss": 3.6279, "step": 56105 }, { "epoch": 3.8123386329664357, "grad_norm": 1.099255084991455, "learning_rate": 0.0005234576708791956, "loss": 3.7917, "step": 56110 }, { "epoch": 3.8126783530370973, "grad_norm": 0.9166022539138794, "learning_rate": 0.0005234152058703628, "loss": 3.7595, "step": 56115 }, { "epoch": 3.8130180731077594, "grad_norm": 0.8268341422080994, "learning_rate": 0.00052337274086153, "loss": 3.5227, "step": 56120 }, { "epoch": 3.813357793178421, "grad_norm": 0.8269185423851013, "learning_rate": 0.0005233302758526975, "loss": 3.634, "step": 56125 }, { "epoch": 3.8136975132490827, "grad_norm": 1.073610544204712, "learning_rate": 0.0005232878108438647, "loss": 3.494, "step": 56130 }, { "epoch": 3.8140372333197448, "grad_norm": 0.9536941647529602, "learning_rate": 0.0005232453458350319, "loss": 3.3434, "step": 56135 }, { "epoch": 3.8143769533904064, "grad_norm": 1.1666574478149414, "learning_rate": 0.0005232028808261993, "loss": 3.348, "step": 56140 }, { "epoch": 3.814716673461068, "grad_norm": 1.099188208580017, "learning_rate": 0.0005231604158173665, "loss": 3.5287, "step": 56145 }, { "epoch": 3.81505639353173, "grad_norm": 0.8057329654693604, "learning_rate": 0.0005231179508085337, "loss": 3.7105, "step": 56150 }, { "epoch": 3.8153961136023917, "grad_norm": 1.3922618627548218, "learning_rate": 0.0005230754857997012, "loss": 3.6472, "step": 56155 }, { "epoch": 3.8157358336730534, "grad_norm": 1.201080322265625, "learning_rate": 0.0005230330207908684, "loss": 3.3811, "step": 56160 }, { "epoch": 3.8160755537437154, "grad_norm": 1.0848336219787598, "learning_rate": 0.0005229905557820356, "loss": 3.6336, "step": 56165 }, { "epoch": 3.816415273814377, "grad_norm": 0.9715064167976379, "learning_rate": 0.0005229480907732029, "loss": 3.5574, "step": 56170 }, { "epoch": 3.8167549938850387, "grad_norm": 0.7345954179763794, "learning_rate": 0.0005229056257643702, "loss": 3.4054, "step": 56175 }, { "epoch": 3.8170947139557008, "grad_norm": 0.7796631455421448, "learning_rate": 0.0005228631607555374, "loss": 3.6312, "step": 56180 }, { "epoch": 3.8174344340263624, "grad_norm": 0.7440600991249084, "learning_rate": 0.0005228206957467047, "loss": 3.6778, "step": 56185 }, { "epoch": 3.817774154097024, "grad_norm": 0.9003073573112488, "learning_rate": 0.0005227782307378721, "loss": 3.6073, "step": 56190 }, { "epoch": 3.8181138741676857, "grad_norm": 1.0145468711853027, "learning_rate": 0.0005227357657290393, "loss": 3.7774, "step": 56195 }, { "epoch": 3.8184535942383477, "grad_norm": 0.8687780499458313, "learning_rate": 0.0005226933007202066, "loss": 3.6586, "step": 56200 }, { "epoch": 3.8187933143090094, "grad_norm": 0.8589841723442078, "learning_rate": 0.0005226508357113738, "loss": 3.1222, "step": 56205 }, { "epoch": 3.819133034379671, "grad_norm": 0.843184769153595, "learning_rate": 0.0005226083707025411, "loss": 3.5567, "step": 56210 }, { "epoch": 3.819472754450333, "grad_norm": 1.095578670501709, "learning_rate": 0.0005225659056937084, "loss": 3.5497, "step": 56215 }, { "epoch": 3.8198124745209947, "grad_norm": 0.9289934039115906, "learning_rate": 0.0005225234406848756, "loss": 3.7642, "step": 56220 }, { "epoch": 3.8201521945916563, "grad_norm": 0.9652898907661438, "learning_rate": 0.000522480975676043, "loss": 3.4436, "step": 56225 }, { "epoch": 3.8204919146623184, "grad_norm": 0.7541878819465637, "learning_rate": 0.0005224385106672103, "loss": 3.3053, "step": 56230 }, { "epoch": 3.82083163473298, "grad_norm": 0.7974632978439331, "learning_rate": 0.0005223960456583775, "loss": 3.6799, "step": 56235 }, { "epoch": 3.8211713548036417, "grad_norm": 0.8026483654975891, "learning_rate": 0.0005223535806495448, "loss": 3.6149, "step": 56240 }, { "epoch": 3.8215110748743033, "grad_norm": 0.8584368824958801, "learning_rate": 0.0005223111156407121, "loss": 3.4489, "step": 56245 }, { "epoch": 3.8218507949449654, "grad_norm": 1.3426589965820312, "learning_rate": 0.0005222686506318793, "loss": 3.3665, "step": 56250 }, { "epoch": 3.822190515015627, "grad_norm": 1.054150104522705, "learning_rate": 0.0005222261856230465, "loss": 3.7507, "step": 56255 }, { "epoch": 3.8225302350862886, "grad_norm": 0.9774046540260315, "learning_rate": 0.000522183720614214, "loss": 3.4517, "step": 56260 }, { "epoch": 3.8228699551569507, "grad_norm": 1.6141369342803955, "learning_rate": 0.0005221412556053812, "loss": 3.5137, "step": 56265 }, { "epoch": 3.8232096752276123, "grad_norm": 0.7604255080223083, "learning_rate": 0.0005220987905965484, "loss": 3.2613, "step": 56270 }, { "epoch": 3.823549395298274, "grad_norm": 0.7882590293884277, "learning_rate": 0.0005220563255877158, "loss": 3.5131, "step": 56275 }, { "epoch": 3.823889115368936, "grad_norm": 1.00392746925354, "learning_rate": 0.000522013860578883, "loss": 3.6647, "step": 56280 }, { "epoch": 3.8242288354395977, "grad_norm": 1.240463376045227, "learning_rate": 0.0005219713955700502, "loss": 3.6492, "step": 56285 }, { "epoch": 3.8245685555102593, "grad_norm": 0.9950423240661621, "learning_rate": 0.0005219289305612176, "loss": 3.6989, "step": 56290 }, { "epoch": 3.8249082755809214, "grad_norm": 1.0007741451263428, "learning_rate": 0.0005218864655523849, "loss": 3.6167, "step": 56295 }, { "epoch": 3.825247995651583, "grad_norm": 0.6978954672813416, "learning_rate": 0.0005218440005435521, "loss": 3.5706, "step": 56300 }, { "epoch": 3.8255877157222447, "grad_norm": 0.8745890259742737, "learning_rate": 0.0005218015355347194, "loss": 3.526, "step": 56305 }, { "epoch": 3.8259274357929067, "grad_norm": 4.693668842315674, "learning_rate": 0.0005217590705258867, "loss": 3.7068, "step": 56310 }, { "epoch": 3.8262671558635684, "grad_norm": 0.716612696647644, "learning_rate": 0.0005217166055170539, "loss": 3.6121, "step": 56315 }, { "epoch": 3.82660687593423, "grad_norm": 0.8291292190551758, "learning_rate": 0.0005216741405082212, "loss": 3.2219, "step": 56320 }, { "epoch": 3.826946596004892, "grad_norm": 0.8352263569831848, "learning_rate": 0.0005216316754993885, "loss": 3.503, "step": 56325 }, { "epoch": 3.8272863160755537, "grad_norm": 0.7407249808311462, "learning_rate": 0.0005215892104905558, "loss": 3.4592, "step": 56330 }, { "epoch": 3.8276260361462153, "grad_norm": 0.8932452201843262, "learning_rate": 0.0005215467454817231, "loss": 3.5913, "step": 56335 }, { "epoch": 3.8279657562168774, "grad_norm": 0.7984388470649719, "learning_rate": 0.0005215042804728904, "loss": 3.5978, "step": 56340 }, { "epoch": 3.828305476287539, "grad_norm": 0.8221063017845154, "learning_rate": 0.0005214618154640576, "loss": 3.505, "step": 56345 }, { "epoch": 3.8286451963582007, "grad_norm": 0.8480583429336548, "learning_rate": 0.0005214193504552249, "loss": 3.7177, "step": 56350 }, { "epoch": 3.8289849164288627, "grad_norm": 1.1518019437789917, "learning_rate": 0.0005213768854463921, "loss": 3.1518, "step": 56355 }, { "epoch": 3.8293246364995244, "grad_norm": 0.896429181098938, "learning_rate": 0.0005213344204375594, "loss": 3.4817, "step": 56360 }, { "epoch": 3.829664356570186, "grad_norm": 0.9401424527168274, "learning_rate": 0.0005212919554287268, "loss": 3.6466, "step": 56365 }, { "epoch": 3.830004076640848, "grad_norm": 0.7908754944801331, "learning_rate": 0.000521249490419894, "loss": 3.5004, "step": 56370 }, { "epoch": 3.8303437967115097, "grad_norm": 1.1310920715332031, "learning_rate": 0.0005212070254110613, "loss": 3.4532, "step": 56375 }, { "epoch": 3.8306835167821713, "grad_norm": 0.9895877242088318, "learning_rate": 0.0005211645604022286, "loss": 3.4025, "step": 56380 }, { "epoch": 3.8310232368528334, "grad_norm": 0.9851517677307129, "learning_rate": 0.0005211220953933958, "loss": 3.4035, "step": 56385 }, { "epoch": 3.831362956923495, "grad_norm": 1.023783564567566, "learning_rate": 0.0005210796303845632, "loss": 3.4235, "step": 56390 }, { "epoch": 3.8317026769941567, "grad_norm": 1.093160629272461, "learning_rate": 0.0005210371653757304, "loss": 3.5946, "step": 56395 }, { "epoch": 3.8320423970648188, "grad_norm": 0.7521139979362488, "learning_rate": 0.0005209947003668977, "loss": 3.5783, "step": 56400 }, { "epoch": 3.8323821171354804, "grad_norm": 0.8869144916534424, "learning_rate": 0.000520952235358065, "loss": 3.6292, "step": 56405 }, { "epoch": 3.832721837206142, "grad_norm": 0.7833171486854553, "learning_rate": 0.0005209097703492323, "loss": 3.6418, "step": 56410 }, { "epoch": 3.833061557276804, "grad_norm": 0.864266574382782, "learning_rate": 0.0005208673053403995, "loss": 3.3513, "step": 56415 }, { "epoch": 3.8334012773474657, "grad_norm": 0.9043874144554138, "learning_rate": 0.0005208248403315668, "loss": 3.3321, "step": 56420 }, { "epoch": 3.8337409974181273, "grad_norm": 0.8984978199005127, "learning_rate": 0.0005207823753227341, "loss": 3.4075, "step": 56425 }, { "epoch": 3.8340807174887894, "grad_norm": 0.9726295471191406, "learning_rate": 0.0005207399103139013, "loss": 3.4485, "step": 56430 }, { "epoch": 3.834420437559451, "grad_norm": 0.948248028755188, "learning_rate": 0.0005206974453050687, "loss": 3.4806, "step": 56435 }, { "epoch": 3.8347601576301127, "grad_norm": 0.9400249123573303, "learning_rate": 0.000520654980296236, "loss": 3.2897, "step": 56440 }, { "epoch": 3.8350998777007748, "grad_norm": 0.8516218662261963, "learning_rate": 0.0005206125152874032, "loss": 3.5123, "step": 56445 }, { "epoch": 3.8354395977714364, "grad_norm": 0.8428704142570496, "learning_rate": 0.0005205700502785705, "loss": 3.5174, "step": 56450 }, { "epoch": 3.835779317842098, "grad_norm": 0.7582258582115173, "learning_rate": 0.0005205275852697377, "loss": 3.64, "step": 56455 }, { "epoch": 3.83611903791276, "grad_norm": 0.7963328957557678, "learning_rate": 0.000520485120260905, "loss": 3.6281, "step": 56460 }, { "epoch": 3.8364587579834217, "grad_norm": 0.8122186064720154, "learning_rate": 0.0005204426552520723, "loss": 3.4776, "step": 56465 }, { "epoch": 3.8367984780540834, "grad_norm": 0.8091825842857361, "learning_rate": 0.0005204001902432396, "loss": 3.4745, "step": 56470 }, { "epoch": 3.8371381981247454, "grad_norm": 0.9374104738235474, "learning_rate": 0.0005203577252344069, "loss": 3.6179, "step": 56475 }, { "epoch": 3.837477918195407, "grad_norm": 1.0840269327163696, "learning_rate": 0.0005203152602255742, "loss": 3.5384, "step": 56480 }, { "epoch": 3.8378176382660687, "grad_norm": 0.8359546065330505, "learning_rate": 0.0005202727952167414, "loss": 3.693, "step": 56485 }, { "epoch": 3.8381573583367308, "grad_norm": 1.3815990686416626, "learning_rate": 0.0005202303302079086, "loss": 3.5633, "step": 56490 }, { "epoch": 3.8384970784073924, "grad_norm": 0.7971181869506836, "learning_rate": 0.000520187865199076, "loss": 3.8695, "step": 56495 }, { "epoch": 3.838836798478054, "grad_norm": 1.3229036331176758, "learning_rate": 0.0005201454001902432, "loss": 3.4365, "step": 56500 }, { "epoch": 3.839176518548716, "grad_norm": 0.737005352973938, "learning_rate": 0.0005201029351814105, "loss": 3.5118, "step": 56505 }, { "epoch": 3.8395162386193777, "grad_norm": 0.9250162243843079, "learning_rate": 0.0005200604701725779, "loss": 3.7278, "step": 56510 }, { "epoch": 3.8398559586900394, "grad_norm": 0.8831732273101807, "learning_rate": 0.0005200180051637451, "loss": 3.614, "step": 56515 }, { "epoch": 3.8401956787607014, "grad_norm": 1.194557547569275, "learning_rate": 0.0005199755401549123, "loss": 3.64, "step": 56520 }, { "epoch": 3.840535398831363, "grad_norm": 0.7807213068008423, "learning_rate": 0.0005199330751460797, "loss": 3.3128, "step": 56525 }, { "epoch": 3.8408751189020247, "grad_norm": 0.9032267332077026, "learning_rate": 0.0005198906101372469, "loss": 3.5767, "step": 56530 }, { "epoch": 3.8412148389726863, "grad_norm": 1.3909187316894531, "learning_rate": 0.0005198481451284141, "loss": 3.3888, "step": 56535 }, { "epoch": 3.8415545590433484, "grad_norm": 0.9029501676559448, "learning_rate": 0.0005198056801195816, "loss": 3.7472, "step": 56540 }, { "epoch": 3.84189427911401, "grad_norm": 1.0491210222244263, "learning_rate": 0.0005197632151107488, "loss": 3.6895, "step": 56545 }, { "epoch": 3.8422339991846717, "grad_norm": 0.7877796292304993, "learning_rate": 0.000519720750101916, "loss": 3.6207, "step": 56550 }, { "epoch": 3.8425737192553338, "grad_norm": 0.9900450706481934, "learning_rate": 0.0005196782850930833, "loss": 3.711, "step": 56555 }, { "epoch": 3.8429134393259954, "grad_norm": 0.8556744456291199, "learning_rate": 0.0005196358200842506, "loss": 3.6598, "step": 56560 }, { "epoch": 3.843253159396657, "grad_norm": 0.8430207371711731, "learning_rate": 0.0005195933550754178, "loss": 3.4114, "step": 56565 }, { "epoch": 3.843592879467319, "grad_norm": 0.9363009333610535, "learning_rate": 0.0005195508900665851, "loss": 3.4848, "step": 56570 }, { "epoch": 3.8439325995379807, "grad_norm": 0.8533961772918701, "learning_rate": 0.0005195084250577525, "loss": 3.4063, "step": 56575 }, { "epoch": 3.8442723196086424, "grad_norm": 0.6958030462265015, "learning_rate": 0.0005194659600489197, "loss": 3.5669, "step": 56580 }, { "epoch": 3.844612039679304, "grad_norm": 0.9663972854614258, "learning_rate": 0.000519423495040087, "loss": 3.7466, "step": 56585 }, { "epoch": 3.844951759749966, "grad_norm": 1.4495911598205566, "learning_rate": 0.0005193810300312543, "loss": 3.3904, "step": 56590 }, { "epoch": 3.8452914798206277, "grad_norm": 0.7534974217414856, "learning_rate": 0.0005193385650224215, "loss": 3.3819, "step": 56595 }, { "epoch": 3.8456311998912893, "grad_norm": 1.2467442750930786, "learning_rate": 0.0005192961000135888, "loss": 3.6057, "step": 56600 }, { "epoch": 3.8459709199619514, "grad_norm": 0.8596462607383728, "learning_rate": 0.000519253635004756, "loss": 3.3968, "step": 56605 }, { "epoch": 3.846310640032613, "grad_norm": 0.7018900513648987, "learning_rate": 0.0005192111699959234, "loss": 3.3342, "step": 56610 }, { "epoch": 3.8466503601032747, "grad_norm": 1.3635196685791016, "learning_rate": 0.0005191687049870907, "loss": 3.7929, "step": 56615 }, { "epoch": 3.8469900801739367, "grad_norm": 0.7594895958900452, "learning_rate": 0.0005191262399782579, "loss": 3.3432, "step": 56620 }, { "epoch": 3.8473298002445984, "grad_norm": 0.751255989074707, "learning_rate": 0.0005190837749694252, "loss": 3.3112, "step": 56625 }, { "epoch": 3.84766952031526, "grad_norm": 1.0504775047302246, "learning_rate": 0.0005190413099605925, "loss": 3.6206, "step": 56630 }, { "epoch": 3.848009240385922, "grad_norm": 1.0043764114379883, "learning_rate": 0.0005189988449517597, "loss": 3.7035, "step": 56635 }, { "epoch": 3.8483489604565837, "grad_norm": 0.7751656174659729, "learning_rate": 0.0005189563799429269, "loss": 3.5102, "step": 56640 }, { "epoch": 3.8486886805272453, "grad_norm": 0.7378255724906921, "learning_rate": 0.0005189139149340944, "loss": 3.5598, "step": 56645 }, { "epoch": 3.8490284005979074, "grad_norm": 0.6752565503120422, "learning_rate": 0.0005188714499252616, "loss": 3.4617, "step": 56650 }, { "epoch": 3.849368120668569, "grad_norm": 0.8378898501396179, "learning_rate": 0.0005188289849164288, "loss": 3.4722, "step": 56655 }, { "epoch": 3.8497078407392307, "grad_norm": 0.7867488265037537, "learning_rate": 0.0005187865199075962, "loss": 3.6478, "step": 56660 }, { "epoch": 3.8500475608098927, "grad_norm": 1.0509885549545288, "learning_rate": 0.0005187440548987634, "loss": 3.41, "step": 56665 }, { "epoch": 3.8503872808805544, "grad_norm": 0.7209457159042358, "learning_rate": 0.0005187015898899306, "loss": 3.2945, "step": 56670 }, { "epoch": 3.850727000951216, "grad_norm": 0.8037388324737549, "learning_rate": 0.000518659124881098, "loss": 3.6782, "step": 56675 }, { "epoch": 3.851066721021878, "grad_norm": 1.1898324489593506, "learning_rate": 0.0005186166598722653, "loss": 3.456, "step": 56680 }, { "epoch": 3.8514064410925397, "grad_norm": 0.9519771933555603, "learning_rate": 0.0005185741948634325, "loss": 3.2943, "step": 56685 }, { "epoch": 3.8517461611632013, "grad_norm": 0.8820488452911377, "learning_rate": 0.0005185317298545999, "loss": 3.516, "step": 56690 }, { "epoch": 3.8520858812338634, "grad_norm": 1.0607048273086548, "learning_rate": 0.0005184892648457671, "loss": 3.7124, "step": 56695 }, { "epoch": 3.852425601304525, "grad_norm": 0.9673507809638977, "learning_rate": 0.0005184467998369343, "loss": 3.3364, "step": 56700 }, { "epoch": 3.8527653213751867, "grad_norm": 0.9544147253036499, "learning_rate": 0.0005184043348281016, "loss": 3.2988, "step": 56705 }, { "epoch": 3.8531050414458488, "grad_norm": 1.161620020866394, "learning_rate": 0.0005183618698192689, "loss": 3.4615, "step": 56710 }, { "epoch": 3.8534447615165104, "grad_norm": 0.9028362035751343, "learning_rate": 0.0005183194048104362, "loss": 3.4849, "step": 56715 }, { "epoch": 3.853784481587172, "grad_norm": 0.6639185547828674, "learning_rate": 0.0005182769398016035, "loss": 3.4402, "step": 56720 }, { "epoch": 3.854124201657834, "grad_norm": 0.8959195613861084, "learning_rate": 0.0005182344747927708, "loss": 3.5263, "step": 56725 }, { "epoch": 3.8544639217284957, "grad_norm": 0.9006872177124023, "learning_rate": 0.0005181920097839381, "loss": 3.5691, "step": 56730 }, { "epoch": 3.8548036417991574, "grad_norm": 0.7706353664398193, "learning_rate": 0.0005181495447751053, "loss": 3.4538, "step": 56735 }, { "epoch": 3.8551433618698194, "grad_norm": 1.0322017669677734, "learning_rate": 0.0005181070797662725, "loss": 3.6076, "step": 56740 }, { "epoch": 3.855483081940481, "grad_norm": 1.0281637907028198, "learning_rate": 0.00051806461475744, "loss": 3.3085, "step": 56745 }, { "epoch": 3.8558228020111427, "grad_norm": 0.7633823156356812, "learning_rate": 0.0005180221497486072, "loss": 3.5971, "step": 56750 }, { "epoch": 3.8561625220818048, "grad_norm": 0.7422765493392944, "learning_rate": 0.0005179796847397744, "loss": 3.5819, "step": 56755 }, { "epoch": 3.8565022421524664, "grad_norm": 0.7503259778022766, "learning_rate": 0.0005179372197309418, "loss": 3.5966, "step": 56760 }, { "epoch": 3.856841962223128, "grad_norm": 0.7888094186782837, "learning_rate": 0.000517894754722109, "loss": 3.3633, "step": 56765 }, { "epoch": 3.85718168229379, "grad_norm": 1.1008520126342773, "learning_rate": 0.0005178522897132762, "loss": 3.7545, "step": 56770 }, { "epoch": 3.8575214023644517, "grad_norm": 0.7092001438140869, "learning_rate": 0.0005178098247044436, "loss": 3.5847, "step": 56775 }, { "epoch": 3.8578611224351134, "grad_norm": 0.8815507292747498, "learning_rate": 0.0005177673596956109, "loss": 3.4544, "step": 56780 }, { "epoch": 3.8582008425057754, "grad_norm": 0.7847651839256287, "learning_rate": 0.0005177248946867781, "loss": 3.4706, "step": 56785 }, { "epoch": 3.858540562576437, "grad_norm": 0.8807682394981384, "learning_rate": 0.0005176824296779455, "loss": 3.3279, "step": 56790 }, { "epoch": 3.8588802826470987, "grad_norm": 0.6724677681922913, "learning_rate": 0.0005176399646691127, "loss": 3.6405, "step": 56795 }, { "epoch": 3.8592200027177608, "grad_norm": 0.9270042181015015, "learning_rate": 0.0005175974996602799, "loss": 3.3612, "step": 56800 }, { "epoch": 3.8595597227884224, "grad_norm": 0.9784954190254211, "learning_rate": 0.0005175550346514472, "loss": 3.4072, "step": 56805 }, { "epoch": 3.859899442859084, "grad_norm": 0.834963858127594, "learning_rate": 0.0005175125696426145, "loss": 3.5618, "step": 56810 }, { "epoch": 3.860239162929746, "grad_norm": 2.808793067932129, "learning_rate": 0.0005174701046337818, "loss": 3.403, "step": 56815 }, { "epoch": 3.8605788830004077, "grad_norm": 0.9575893878936768, "learning_rate": 0.0005174276396249491, "loss": 3.6383, "step": 56820 }, { "epoch": 3.8609186030710694, "grad_norm": 0.8541870713233948, "learning_rate": 0.0005173851746161164, "loss": 3.5109, "step": 56825 }, { "epoch": 3.8612583231417315, "grad_norm": 1.168784737586975, "learning_rate": 0.0005173427096072836, "loss": 3.4009, "step": 56830 }, { "epoch": 3.861598043212393, "grad_norm": 0.8494080901145935, "learning_rate": 0.0005173002445984509, "loss": 3.3637, "step": 56835 }, { "epoch": 3.8619377632830547, "grad_norm": 0.9269217252731323, "learning_rate": 0.0005172577795896181, "loss": 3.3499, "step": 56840 }, { "epoch": 3.862277483353717, "grad_norm": 0.9536073207855225, "learning_rate": 0.0005172153145807854, "loss": 3.265, "step": 56845 }, { "epoch": 3.8626172034243784, "grad_norm": 0.8348523378372192, "learning_rate": 0.0005171728495719528, "loss": 3.2388, "step": 56850 }, { "epoch": 3.86295692349504, "grad_norm": 0.9283326268196106, "learning_rate": 0.00051713038456312, "loss": 3.6327, "step": 56855 }, { "epoch": 3.863296643565702, "grad_norm": 0.7020705342292786, "learning_rate": 0.0005170879195542873, "loss": 3.2453, "step": 56860 }, { "epoch": 3.8636363636363638, "grad_norm": 1.2104182243347168, "learning_rate": 0.0005170454545454546, "loss": 3.2154, "step": 56865 }, { "epoch": 3.8639760837070254, "grad_norm": 0.8381550908088684, "learning_rate": 0.0005170029895366218, "loss": 3.5047, "step": 56870 }, { "epoch": 3.864315803777687, "grad_norm": 0.8751277327537537, "learning_rate": 0.000516960524527789, "loss": 3.8065, "step": 56875 }, { "epoch": 3.864655523848349, "grad_norm": 0.9942311644554138, "learning_rate": 0.0005169180595189564, "loss": 3.7295, "step": 56880 }, { "epoch": 3.8649952439190107, "grad_norm": 1.1221792697906494, "learning_rate": 0.0005168755945101237, "loss": 3.4266, "step": 56885 }, { "epoch": 3.8653349639896724, "grad_norm": 0.8594583868980408, "learning_rate": 0.000516833129501291, "loss": 3.5325, "step": 56890 }, { "epoch": 3.8656746840603344, "grad_norm": 0.8620944619178772, "learning_rate": 0.0005167906644924583, "loss": 3.5142, "step": 56895 }, { "epoch": 3.866014404130996, "grad_norm": 0.8854902982711792, "learning_rate": 0.0005167481994836255, "loss": 3.5443, "step": 56900 }, { "epoch": 3.8663541242016577, "grad_norm": 0.8330186605453491, "learning_rate": 0.0005167057344747927, "loss": 3.4999, "step": 56905 }, { "epoch": 3.8666938442723198, "grad_norm": 0.971399188041687, "learning_rate": 0.0005166632694659601, "loss": 3.7473, "step": 56910 }, { "epoch": 3.8670335643429814, "grad_norm": 1.0026673078536987, "learning_rate": 0.0005166208044571273, "loss": 3.5594, "step": 56915 }, { "epoch": 3.867373284413643, "grad_norm": 0.8057916760444641, "learning_rate": 0.0005165783394482946, "loss": 3.5488, "step": 56920 }, { "epoch": 3.8677130044843047, "grad_norm": 0.8402663469314575, "learning_rate": 0.000516535874439462, "loss": 3.3995, "step": 56925 }, { "epoch": 3.8680527245549667, "grad_norm": 0.9617754220962524, "learning_rate": 0.0005164934094306292, "loss": 3.5485, "step": 56930 }, { "epoch": 3.8683924446256284, "grad_norm": 0.9858745336532593, "learning_rate": 0.0005164509444217964, "loss": 3.3319, "step": 56935 }, { "epoch": 3.86873216469629, "grad_norm": 1.46211576461792, "learning_rate": 0.0005164084794129637, "loss": 3.6278, "step": 56940 }, { "epoch": 3.869071884766952, "grad_norm": 0.9968897700309753, "learning_rate": 0.000516366014404131, "loss": 3.7801, "step": 56945 }, { "epoch": 3.8694116048376137, "grad_norm": 0.8682659268379211, "learning_rate": 0.0005163235493952982, "loss": 3.4278, "step": 56950 }, { "epoch": 3.8697513249082753, "grad_norm": 0.9458279013633728, "learning_rate": 0.0005162810843864656, "loss": 3.499, "step": 56955 }, { "epoch": 3.8700910449789374, "grad_norm": 1.1017022132873535, "learning_rate": 0.0005162386193776329, "loss": 3.6844, "step": 56960 }, { "epoch": 3.870430765049599, "grad_norm": 0.8443915843963623, "learning_rate": 0.0005161961543688001, "loss": 3.6566, "step": 56965 }, { "epoch": 3.8707704851202607, "grad_norm": 0.9440657496452332, "learning_rate": 0.0005161536893599674, "loss": 3.5465, "step": 56970 }, { "epoch": 3.8711102051909227, "grad_norm": 1.0590320825576782, "learning_rate": 0.0005161112243511347, "loss": 3.435, "step": 56975 }, { "epoch": 3.8714499252615844, "grad_norm": 0.7551957368850708, "learning_rate": 0.0005160687593423019, "loss": 3.5542, "step": 56980 }, { "epoch": 3.871789645332246, "grad_norm": 1.1121883392333984, "learning_rate": 0.0005160262943334692, "loss": 3.1299, "step": 56985 }, { "epoch": 3.872129365402908, "grad_norm": 1.3470536470413208, "learning_rate": 0.0005159838293246365, "loss": 3.5815, "step": 56990 }, { "epoch": 3.8724690854735697, "grad_norm": 0.8728624582290649, "learning_rate": 0.0005159413643158038, "loss": 3.4315, "step": 56995 }, { "epoch": 3.8728088055442313, "grad_norm": 0.7523467540740967, "learning_rate": 0.0005158988993069711, "loss": 3.4652, "step": 57000 }, { "epoch": 3.8731485256148934, "grad_norm": 0.7703983783721924, "learning_rate": 0.0005158564342981383, "loss": 3.3774, "step": 57005 }, { "epoch": 3.873488245685555, "grad_norm": 1.0352245569229126, "learning_rate": 0.0005158139692893056, "loss": 3.4683, "step": 57010 }, { "epoch": 3.8738279657562167, "grad_norm": 1.3914529085159302, "learning_rate": 0.0005157715042804729, "loss": 3.4494, "step": 57015 }, { "epoch": 3.8741676858268788, "grad_norm": 0.9471056461334229, "learning_rate": 0.0005157290392716401, "loss": 3.6172, "step": 57020 }, { "epoch": 3.8745074058975404, "grad_norm": 0.8569105267524719, "learning_rate": 0.0005156865742628075, "loss": 3.4887, "step": 57025 }, { "epoch": 3.874847125968202, "grad_norm": 0.9105156064033508, "learning_rate": 0.0005156441092539748, "loss": 3.3621, "step": 57030 }, { "epoch": 3.875186846038864, "grad_norm": 1.0738773345947266, "learning_rate": 0.000515601644245142, "loss": 3.5846, "step": 57035 }, { "epoch": 3.8755265661095257, "grad_norm": 0.7471365928649902, "learning_rate": 0.0005155591792363092, "loss": 3.365, "step": 57040 }, { "epoch": 3.8758662861801874, "grad_norm": 0.8757900595664978, "learning_rate": 0.0005155167142274766, "loss": 3.3868, "step": 57045 }, { "epoch": 3.8762060062508494, "grad_norm": 1.2710239887237549, "learning_rate": 0.0005154742492186438, "loss": 3.465, "step": 57050 }, { "epoch": 3.876545726321511, "grad_norm": 1.083652138710022, "learning_rate": 0.000515431784209811, "loss": 3.3321, "step": 57055 }, { "epoch": 3.8768854463921727, "grad_norm": 0.7637492418289185, "learning_rate": 0.0005153893192009785, "loss": 3.5403, "step": 57060 }, { "epoch": 3.8772251664628348, "grad_norm": 0.913613498210907, "learning_rate": 0.0005153468541921457, "loss": 3.219, "step": 57065 }, { "epoch": 3.8775648865334964, "grad_norm": 0.7874297499656677, "learning_rate": 0.000515304389183313, "loss": 3.3399, "step": 57070 }, { "epoch": 3.877904606604158, "grad_norm": 0.9019875526428223, "learning_rate": 0.0005152619241744803, "loss": 3.7198, "step": 57075 }, { "epoch": 3.87824432667482, "grad_norm": 0.9436920881271362, "learning_rate": 0.0005152194591656475, "loss": 3.4572, "step": 57080 }, { "epoch": 3.8785840467454817, "grad_norm": 0.9355970621109009, "learning_rate": 0.0005151769941568148, "loss": 3.8913, "step": 57085 }, { "epoch": 3.8789237668161434, "grad_norm": 1.0530781745910645, "learning_rate": 0.000515134529147982, "loss": 3.7054, "step": 57090 }, { "epoch": 3.8792634868868054, "grad_norm": 1.028910517692566, "learning_rate": 0.0005150920641391494, "loss": 3.4881, "step": 57095 }, { "epoch": 3.879603206957467, "grad_norm": 0.9683576822280884, "learning_rate": 0.0005150495991303167, "loss": 3.4541, "step": 57100 }, { "epoch": 3.8799429270281287, "grad_norm": 0.8740150332450867, "learning_rate": 0.0005150071341214839, "loss": 3.5283, "step": 57105 }, { "epoch": 3.880282647098791, "grad_norm": 0.8849396109580994, "learning_rate": 0.0005149646691126512, "loss": 3.4975, "step": 57110 }, { "epoch": 3.8806223671694524, "grad_norm": 0.7530997395515442, "learning_rate": 0.0005149222041038185, "loss": 3.5231, "step": 57115 }, { "epoch": 3.880962087240114, "grad_norm": 0.9675373435020447, "learning_rate": 0.0005148797390949857, "loss": 3.4312, "step": 57120 }, { "epoch": 3.881301807310776, "grad_norm": 0.8525757193565369, "learning_rate": 0.000514837274086153, "loss": 3.4738, "step": 57125 }, { "epoch": 3.8816415273814378, "grad_norm": 0.8331127762794495, "learning_rate": 0.0005147948090773204, "loss": 3.6299, "step": 57130 }, { "epoch": 3.8819812474520994, "grad_norm": 0.8812710046768188, "learning_rate": 0.0005147523440684876, "loss": 3.3453, "step": 57135 }, { "epoch": 3.8823209675227615, "grad_norm": 0.8526292443275452, "learning_rate": 0.0005147098790596548, "loss": 3.4998, "step": 57140 }, { "epoch": 3.882660687593423, "grad_norm": 0.7922732830047607, "learning_rate": 0.0005146674140508222, "loss": 3.3101, "step": 57145 }, { "epoch": 3.8830004076640847, "grad_norm": 0.7912677526473999, "learning_rate": 0.0005146249490419894, "loss": 3.4137, "step": 57150 }, { "epoch": 3.883340127734747, "grad_norm": 2.3813555240631104, "learning_rate": 0.0005145824840331566, "loss": 3.3924, "step": 57155 }, { "epoch": 3.8836798478054084, "grad_norm": 0.8601946234703064, "learning_rate": 0.000514540019024324, "loss": 3.8621, "step": 57160 }, { "epoch": 3.88401956787607, "grad_norm": 0.9431272149085999, "learning_rate": 0.0005144975540154913, "loss": 3.6281, "step": 57165 }, { "epoch": 3.884359287946732, "grad_norm": 0.7753309011459351, "learning_rate": 0.0005144550890066585, "loss": 3.5319, "step": 57170 }, { "epoch": 3.8846990080173938, "grad_norm": 0.938681960105896, "learning_rate": 0.0005144126239978259, "loss": 3.3856, "step": 57175 }, { "epoch": 3.8850387280880554, "grad_norm": 0.9258414506912231, "learning_rate": 0.0005143701589889931, "loss": 3.2998, "step": 57180 }, { "epoch": 3.8853784481587175, "grad_norm": 0.8434944748878479, "learning_rate": 0.0005143276939801603, "loss": 3.5003, "step": 57185 }, { "epoch": 3.885718168229379, "grad_norm": 0.7840365767478943, "learning_rate": 0.0005142852289713276, "loss": 3.5619, "step": 57190 }, { "epoch": 3.8860578883000407, "grad_norm": 0.7357649207115173, "learning_rate": 0.0005142427639624949, "loss": 3.5696, "step": 57195 }, { "epoch": 3.886397608370703, "grad_norm": 0.7472124695777893, "learning_rate": 0.0005142002989536622, "loss": 3.2473, "step": 57200 }, { "epoch": 3.8867373284413644, "grad_norm": 0.8440210819244385, "learning_rate": 0.0005141578339448295, "loss": 3.5664, "step": 57205 }, { "epoch": 3.887077048512026, "grad_norm": 0.7241885662078857, "learning_rate": 0.0005141153689359968, "loss": 3.6276, "step": 57210 }, { "epoch": 3.887416768582688, "grad_norm": 1.3372856378555298, "learning_rate": 0.000514072903927164, "loss": 3.6902, "step": 57215 }, { "epoch": 3.8877564886533498, "grad_norm": 0.82537841796875, "learning_rate": 0.0005140304389183313, "loss": 3.5656, "step": 57220 }, { "epoch": 3.8880962087240114, "grad_norm": 0.9292195439338684, "learning_rate": 0.0005139879739094985, "loss": 3.4186, "step": 57225 }, { "epoch": 3.888435928794673, "grad_norm": 1.121232032775879, "learning_rate": 0.0005139455089006658, "loss": 3.3884, "step": 57230 }, { "epoch": 3.888775648865335, "grad_norm": 0.8992807865142822, "learning_rate": 0.0005139030438918332, "loss": 3.5825, "step": 57235 }, { "epoch": 3.8891153689359967, "grad_norm": 0.7218926548957825, "learning_rate": 0.0005138605788830004, "loss": 3.361, "step": 57240 }, { "epoch": 3.8894550890066584, "grad_norm": 0.8430278897285461, "learning_rate": 0.0005138181138741677, "loss": 3.418, "step": 57245 }, { "epoch": 3.8897948090773204, "grad_norm": 0.7378146648406982, "learning_rate": 0.000513775648865335, "loss": 3.3837, "step": 57250 }, { "epoch": 3.890134529147982, "grad_norm": 0.8218130469322205, "learning_rate": 0.0005137331838565022, "loss": 3.573, "step": 57255 }, { "epoch": 3.8904742492186437, "grad_norm": 0.9248813390731812, "learning_rate": 0.0005136907188476695, "loss": 3.5349, "step": 57260 }, { "epoch": 3.8908139692893053, "grad_norm": 1.049005389213562, "learning_rate": 0.0005136482538388368, "loss": 3.6043, "step": 57265 }, { "epoch": 3.8911536893599674, "grad_norm": 0.904442310333252, "learning_rate": 0.0005136057888300041, "loss": 3.3792, "step": 57270 }, { "epoch": 3.891493409430629, "grad_norm": 1.3326131105422974, "learning_rate": 0.0005135633238211714, "loss": 3.5085, "step": 57275 }, { "epoch": 3.8918331295012907, "grad_norm": 0.8558046221733093, "learning_rate": 0.0005135208588123387, "loss": 3.3705, "step": 57280 }, { "epoch": 3.8921728495719528, "grad_norm": 0.8421690464019775, "learning_rate": 0.0005134783938035059, "loss": 3.5558, "step": 57285 }, { "epoch": 3.8925125696426144, "grad_norm": 0.8939539194107056, "learning_rate": 0.0005134359287946731, "loss": 3.6272, "step": 57290 }, { "epoch": 3.892852289713276, "grad_norm": 0.8592181205749512, "learning_rate": 0.0005133934637858405, "loss": 3.6238, "step": 57295 }, { "epoch": 3.893192009783938, "grad_norm": 1.068464994430542, "learning_rate": 0.0005133509987770077, "loss": 3.5182, "step": 57300 }, { "epoch": 3.8935317298545997, "grad_norm": 0.8556610345840454, "learning_rate": 0.000513308533768175, "loss": 3.7974, "step": 57305 }, { "epoch": 3.8938714499252614, "grad_norm": 1.110970377922058, "learning_rate": 0.0005132660687593424, "loss": 3.5784, "step": 57310 }, { "epoch": 3.8942111699959234, "grad_norm": 1.2706059217453003, "learning_rate": 0.0005132236037505096, "loss": 3.5925, "step": 57315 }, { "epoch": 3.894550890066585, "grad_norm": 1.3484011888504028, "learning_rate": 0.0005131811387416768, "loss": 3.6465, "step": 57320 }, { "epoch": 3.8948906101372467, "grad_norm": 0.8560666441917419, "learning_rate": 0.0005131386737328442, "loss": 3.5376, "step": 57325 }, { "epoch": 3.8952303302079088, "grad_norm": 0.7510702013969421, "learning_rate": 0.0005130962087240114, "loss": 3.5653, "step": 57330 }, { "epoch": 3.8955700502785704, "grad_norm": 0.9162725210189819, "learning_rate": 0.0005130537437151786, "loss": 3.5778, "step": 57335 }, { "epoch": 3.895909770349232, "grad_norm": 0.8164543509483337, "learning_rate": 0.000513011278706346, "loss": 3.3287, "step": 57340 }, { "epoch": 3.896249490419894, "grad_norm": 1.0346342325210571, "learning_rate": 0.0005129688136975133, "loss": 3.5921, "step": 57345 }, { "epoch": 3.8965892104905557, "grad_norm": 0.6747474670410156, "learning_rate": 0.0005129263486886805, "loss": 3.8719, "step": 57350 }, { "epoch": 3.8969289305612174, "grad_norm": 0.7605888247489929, "learning_rate": 0.0005128838836798478, "loss": 3.6179, "step": 57355 }, { "epoch": 3.8972686506318794, "grad_norm": 0.8844541907310486, "learning_rate": 0.0005128414186710151, "loss": 3.5693, "step": 57360 }, { "epoch": 3.897608370702541, "grad_norm": 0.8584783673286438, "learning_rate": 0.0005127989536621823, "loss": 3.6423, "step": 57365 }, { "epoch": 3.8979480907732027, "grad_norm": 0.8506049513816833, "learning_rate": 0.0005127564886533497, "loss": 3.6482, "step": 57370 }, { "epoch": 3.8982878108438648, "grad_norm": 1.076507806777954, "learning_rate": 0.000512714023644517, "loss": 3.7152, "step": 57375 }, { "epoch": 3.8986275309145264, "grad_norm": 1.1912646293640137, "learning_rate": 0.0005126715586356842, "loss": 3.7478, "step": 57380 }, { "epoch": 3.898967250985188, "grad_norm": 0.7257078886032104, "learning_rate": 0.0005126290936268515, "loss": 3.4808, "step": 57385 }, { "epoch": 3.89930697105585, "grad_norm": 0.994621217250824, "learning_rate": 0.0005125866286180187, "loss": 3.6285, "step": 57390 }, { "epoch": 3.8996466911265117, "grad_norm": 1.2263237237930298, "learning_rate": 0.000512544163609186, "loss": 3.4403, "step": 57395 }, { "epoch": 3.8999864111971734, "grad_norm": 1.171305775642395, "learning_rate": 0.0005125016986003533, "loss": 3.3856, "step": 57400 }, { "epoch": 3.9003261312678354, "grad_norm": 0.9217913150787354, "learning_rate": 0.0005124592335915206, "loss": 3.3311, "step": 57405 }, { "epoch": 3.900665851338497, "grad_norm": 0.7820585370063782, "learning_rate": 0.000512416768582688, "loss": 3.3996, "step": 57410 }, { "epoch": 3.9010055714091587, "grad_norm": 0.8422624468803406, "learning_rate": 0.0005123743035738552, "loss": 3.3056, "step": 57415 }, { "epoch": 3.901345291479821, "grad_norm": 0.9750136137008667, "learning_rate": 0.0005123318385650224, "loss": 3.4811, "step": 57420 }, { "epoch": 3.9016850115504824, "grad_norm": 0.9572112560272217, "learning_rate": 0.0005122893735561898, "loss": 3.575, "step": 57425 }, { "epoch": 3.902024731621144, "grad_norm": 0.9061694741249084, "learning_rate": 0.000512246908547357, "loss": 3.5866, "step": 57430 }, { "epoch": 3.902364451691806, "grad_norm": 1.2927359342575073, "learning_rate": 0.0005122044435385242, "loss": 3.424, "step": 57435 }, { "epoch": 3.9027041717624678, "grad_norm": 0.7477893233299255, "learning_rate": 0.0005121619785296916, "loss": 3.5954, "step": 57440 }, { "epoch": 3.9030438918331294, "grad_norm": 1.0782461166381836, "learning_rate": 0.0005121195135208589, "loss": 3.2305, "step": 57445 }, { "epoch": 3.9033836119037915, "grad_norm": 0.7344518303871155, "learning_rate": 0.0005120770485120261, "loss": 3.3471, "step": 57450 }, { "epoch": 3.903723331974453, "grad_norm": 0.8119267225265503, "learning_rate": 0.0005120345835031934, "loss": 3.7283, "step": 57455 }, { "epoch": 3.9040630520451147, "grad_norm": 0.7091952562332153, "learning_rate": 0.0005119921184943607, "loss": 3.5513, "step": 57460 }, { "epoch": 3.904402772115777, "grad_norm": 0.7994626760482788, "learning_rate": 0.0005119496534855279, "loss": 3.602, "step": 57465 }, { "epoch": 3.9047424921864384, "grad_norm": 1.053463339805603, "learning_rate": 0.0005119071884766952, "loss": 3.5018, "step": 57470 }, { "epoch": 3.9050822122571, "grad_norm": 0.8438632488250732, "learning_rate": 0.0005118647234678626, "loss": 3.3809, "step": 57475 }, { "epoch": 3.905421932327762, "grad_norm": 1.4400862455368042, "learning_rate": 0.0005118222584590298, "loss": 3.5545, "step": 57480 }, { "epoch": 3.9057616523984238, "grad_norm": 1.162955641746521, "learning_rate": 0.0005117797934501971, "loss": 3.4222, "step": 57485 }, { "epoch": 3.9061013724690854, "grad_norm": 1.1513211727142334, "learning_rate": 0.0005117373284413643, "loss": 3.0722, "step": 57490 }, { "epoch": 3.9064410925397475, "grad_norm": 1.0799967050552368, "learning_rate": 0.0005116948634325316, "loss": 3.5244, "step": 57495 }, { "epoch": 3.906780812610409, "grad_norm": 0.7801952958106995, "learning_rate": 0.0005116523984236989, "loss": 3.5727, "step": 57500 }, { "epoch": 3.9071205326810707, "grad_norm": 0.731644868850708, "learning_rate": 0.0005116099334148661, "loss": 3.593, "step": 57505 }, { "epoch": 3.907460252751733, "grad_norm": 0.7126206159591675, "learning_rate": 0.0005115674684060335, "loss": 3.3499, "step": 57510 }, { "epoch": 3.9077999728223944, "grad_norm": 0.9671463966369629, "learning_rate": 0.0005115250033972008, "loss": 3.5789, "step": 57515 }, { "epoch": 3.908139692893056, "grad_norm": 0.8821979761123657, "learning_rate": 0.000511482538388368, "loss": 3.5914, "step": 57520 }, { "epoch": 3.908479412963718, "grad_norm": 1.115835428237915, "learning_rate": 0.0005114400733795352, "loss": 3.278, "step": 57525 }, { "epoch": 3.90881913303438, "grad_norm": 1.2433019876480103, "learning_rate": 0.0005113976083707026, "loss": 3.4583, "step": 57530 }, { "epoch": 3.9091588531050414, "grad_norm": 0.9704692363739014, "learning_rate": 0.0005113551433618698, "loss": 3.628, "step": 57535 }, { "epoch": 3.9094985731757035, "grad_norm": 0.9200735688209534, "learning_rate": 0.000511312678353037, "loss": 3.4703, "step": 57540 }, { "epoch": 3.909838293246365, "grad_norm": 0.7929883599281311, "learning_rate": 0.0005112702133442045, "loss": 3.5966, "step": 57545 }, { "epoch": 3.9101780133170267, "grad_norm": 1.006364107131958, "learning_rate": 0.0005112277483353717, "loss": 3.5069, "step": 57550 }, { "epoch": 3.910517733387689, "grad_norm": 0.947656512260437, "learning_rate": 0.0005111852833265389, "loss": 3.395, "step": 57555 }, { "epoch": 3.9108574534583505, "grad_norm": 0.8188271522521973, "learning_rate": 0.0005111428183177063, "loss": 3.6131, "step": 57560 }, { "epoch": 3.911197173529012, "grad_norm": 0.8153405785560608, "learning_rate": 0.0005111003533088735, "loss": 3.5333, "step": 57565 }, { "epoch": 3.9115368935996737, "grad_norm": 0.7255359292030334, "learning_rate": 0.0005110578883000407, "loss": 3.4606, "step": 57570 }, { "epoch": 3.911876613670336, "grad_norm": 0.9502444863319397, "learning_rate": 0.000511015423291208, "loss": 3.4896, "step": 57575 }, { "epoch": 3.9122163337409974, "grad_norm": 0.7664276361465454, "learning_rate": 0.0005109729582823754, "loss": 3.555, "step": 57580 }, { "epoch": 3.912556053811659, "grad_norm": 0.8619721531867981, "learning_rate": 0.0005109304932735426, "loss": 3.4382, "step": 57585 }, { "epoch": 3.912895773882321, "grad_norm": 0.9719002842903137, "learning_rate": 0.0005108880282647099, "loss": 3.7332, "step": 57590 }, { "epoch": 3.9132354939529828, "grad_norm": 1.02799654006958, "learning_rate": 0.0005108455632558772, "loss": 3.3307, "step": 57595 }, { "epoch": 3.9135752140236444, "grad_norm": 1.0915124416351318, "learning_rate": 0.0005108030982470444, "loss": 3.5541, "step": 57600 }, { "epoch": 3.913914934094306, "grad_norm": 0.9150426983833313, "learning_rate": 0.0005107606332382117, "loss": 3.6145, "step": 57605 }, { "epoch": 3.914254654164968, "grad_norm": 0.8408659100532532, "learning_rate": 0.000510718168229379, "loss": 3.473, "step": 57610 }, { "epoch": 3.9145943742356297, "grad_norm": 1.0726178884506226, "learning_rate": 0.0005106757032205463, "loss": 3.5011, "step": 57615 }, { "epoch": 3.9149340943062914, "grad_norm": 0.8507474660873413, "learning_rate": 0.0005106332382117136, "loss": 3.5434, "step": 57620 }, { "epoch": 3.9152738143769534, "grad_norm": 0.895517885684967, "learning_rate": 0.0005105907732028808, "loss": 3.7419, "step": 57625 }, { "epoch": 3.915613534447615, "grad_norm": 1.185979962348938, "learning_rate": 0.0005105483081940481, "loss": 3.4073, "step": 57630 }, { "epoch": 3.9159532545182767, "grad_norm": 0.8075200319290161, "learning_rate": 0.0005105058431852154, "loss": 3.6822, "step": 57635 }, { "epoch": 3.9162929745889388, "grad_norm": 0.9030041098594666, "learning_rate": 0.0005104633781763826, "loss": 3.627, "step": 57640 }, { "epoch": 3.9166326946596004, "grad_norm": 0.8038302659988403, "learning_rate": 0.0005104209131675499, "loss": 3.7073, "step": 57645 }, { "epoch": 3.916972414730262, "grad_norm": 0.7335073947906494, "learning_rate": 0.0005103784481587173, "loss": 3.3522, "step": 57650 }, { "epoch": 3.917312134800924, "grad_norm": 0.9355706572532654, "learning_rate": 0.0005103359831498845, "loss": 3.3473, "step": 57655 }, { "epoch": 3.9176518548715857, "grad_norm": 1.360446572303772, "learning_rate": 0.0005102935181410518, "loss": 3.2673, "step": 57660 }, { "epoch": 3.9179915749422474, "grad_norm": 0.6115610599517822, "learning_rate": 0.0005102510531322191, "loss": 3.6815, "step": 57665 }, { "epoch": 3.9183312950129094, "grad_norm": 1.0875880718231201, "learning_rate": 0.0005102085881233863, "loss": 3.4687, "step": 57670 }, { "epoch": 3.918671015083571, "grad_norm": 0.7535082101821899, "learning_rate": 0.0005101661231145535, "loss": 3.7056, "step": 57675 }, { "epoch": 3.9190107351542327, "grad_norm": 0.7272796630859375, "learning_rate": 0.0005101236581057209, "loss": 3.5094, "step": 57680 }, { "epoch": 3.919350455224895, "grad_norm": 0.8328291773796082, "learning_rate": 0.0005100811930968882, "loss": 3.5007, "step": 57685 }, { "epoch": 3.9196901752955564, "grad_norm": 0.7181466221809387, "learning_rate": 0.0005100387280880554, "loss": 3.547, "step": 57690 }, { "epoch": 3.920029895366218, "grad_norm": 0.9408383369445801, "learning_rate": 0.0005099962630792228, "loss": 3.8982, "step": 57695 }, { "epoch": 3.92036961543688, "grad_norm": 0.8429552316665649, "learning_rate": 0.00050995379807039, "loss": 3.7638, "step": 57700 }, { "epoch": 3.9207093355075417, "grad_norm": 0.7025629281997681, "learning_rate": 0.0005099113330615572, "loss": 3.6959, "step": 57705 }, { "epoch": 3.9210490555782034, "grad_norm": 1.2279465198516846, "learning_rate": 0.0005098688680527246, "loss": 3.4879, "step": 57710 }, { "epoch": 3.9213887756488655, "grad_norm": 1.05165433883667, "learning_rate": 0.0005098264030438918, "loss": 3.3845, "step": 57715 }, { "epoch": 3.921728495719527, "grad_norm": 1.0895110368728638, "learning_rate": 0.0005097839380350591, "loss": 3.2854, "step": 57720 }, { "epoch": 3.9220682157901887, "grad_norm": 0.7939515709877014, "learning_rate": 0.0005097414730262264, "loss": 3.4283, "step": 57725 }, { "epoch": 3.922407935860851, "grad_norm": 1.3596566915512085, "learning_rate": 0.0005096990080173937, "loss": 3.5282, "step": 57730 }, { "epoch": 3.9227476559315124, "grad_norm": 0.8200575113296509, "learning_rate": 0.0005096565430085609, "loss": 3.5375, "step": 57735 }, { "epoch": 3.923087376002174, "grad_norm": 0.9607060551643372, "learning_rate": 0.0005096140779997282, "loss": 3.3503, "step": 57740 }, { "epoch": 3.923427096072836, "grad_norm": 0.8489375710487366, "learning_rate": 0.0005095716129908955, "loss": 3.394, "step": 57745 }, { "epoch": 3.9237668161434978, "grad_norm": 0.9171178936958313, "learning_rate": 0.0005095291479820628, "loss": 3.6719, "step": 57750 }, { "epoch": 3.9241065362141594, "grad_norm": 1.2545490264892578, "learning_rate": 0.0005094866829732301, "loss": 3.5052, "step": 57755 }, { "epoch": 3.9244462562848215, "grad_norm": 0.8707087635993958, "learning_rate": 0.0005094442179643974, "loss": 3.5498, "step": 57760 }, { "epoch": 3.924785976355483, "grad_norm": 0.6219019293785095, "learning_rate": 0.0005094017529555647, "loss": 3.539, "step": 57765 }, { "epoch": 3.9251256964261447, "grad_norm": 0.8875517845153809, "learning_rate": 0.0005093592879467319, "loss": 3.3529, "step": 57770 }, { "epoch": 3.925465416496807, "grad_norm": 0.7593134045600891, "learning_rate": 0.0005093168229378991, "loss": 3.5859, "step": 57775 }, { "epoch": 3.9258051365674684, "grad_norm": 0.8748698830604553, "learning_rate": 0.0005092743579290665, "loss": 3.3756, "step": 57780 }, { "epoch": 3.92614485663813, "grad_norm": 1.0114917755126953, "learning_rate": 0.0005092318929202337, "loss": 3.5962, "step": 57785 }, { "epoch": 3.926484576708792, "grad_norm": 0.7679345607757568, "learning_rate": 0.000509189427911401, "loss": 3.5556, "step": 57790 }, { "epoch": 3.9268242967794538, "grad_norm": 0.7611069679260254, "learning_rate": 0.0005091469629025684, "loss": 3.2607, "step": 57795 }, { "epoch": 3.9271640168501154, "grad_norm": 0.9605770707130432, "learning_rate": 0.0005091044978937356, "loss": 3.6002, "step": 57800 }, { "epoch": 3.9275037369207775, "grad_norm": 0.9018978476524353, "learning_rate": 0.0005090620328849028, "loss": 3.6232, "step": 57805 }, { "epoch": 3.927843456991439, "grad_norm": 1.0532951354980469, "learning_rate": 0.0005090195678760702, "loss": 3.4671, "step": 57810 }, { "epoch": 3.9281831770621007, "grad_norm": 1.0418877601623535, "learning_rate": 0.0005089771028672374, "loss": 3.4496, "step": 57815 }, { "epoch": 3.928522897132763, "grad_norm": 1.046834111213684, "learning_rate": 0.0005089346378584046, "loss": 3.3334, "step": 57820 }, { "epoch": 3.9288626172034244, "grad_norm": 1.0155375003814697, "learning_rate": 0.000508892172849572, "loss": 3.6435, "step": 57825 }, { "epoch": 3.929202337274086, "grad_norm": 0.7824139595031738, "learning_rate": 0.0005088497078407393, "loss": 3.5666, "step": 57830 }, { "epoch": 3.929542057344748, "grad_norm": 0.8723594546318054, "learning_rate": 0.0005088072428319065, "loss": 3.2868, "step": 57835 }, { "epoch": 3.92988177741541, "grad_norm": 0.828716516494751, "learning_rate": 0.0005087647778230738, "loss": 3.2986, "step": 57840 }, { "epoch": 3.9302214974860714, "grad_norm": 0.954928994178772, "learning_rate": 0.0005087223128142411, "loss": 3.5946, "step": 57845 }, { "epoch": 3.9305612175567335, "grad_norm": 0.779865026473999, "learning_rate": 0.0005086798478054083, "loss": 3.4754, "step": 57850 }, { "epoch": 3.930900937627395, "grad_norm": 0.7327186465263367, "learning_rate": 0.0005086373827965756, "loss": 3.5298, "step": 57855 }, { "epoch": 3.9312406576980568, "grad_norm": 0.918663501739502, "learning_rate": 0.000508594917787743, "loss": 3.6597, "step": 57860 }, { "epoch": 3.931580377768719, "grad_norm": 0.9499974846839905, "learning_rate": 0.0005085524527789102, "loss": 3.4245, "step": 57865 }, { "epoch": 3.9319200978393805, "grad_norm": 0.9082774519920349, "learning_rate": 0.0005085099877700775, "loss": 3.3617, "step": 57870 }, { "epoch": 3.932259817910042, "grad_norm": 0.8721675872802734, "learning_rate": 0.0005084675227612447, "loss": 3.6073, "step": 57875 }, { "epoch": 3.932599537980704, "grad_norm": 0.7511876821517944, "learning_rate": 0.000508425057752412, "loss": 3.4622, "step": 57880 }, { "epoch": 3.932939258051366, "grad_norm": 0.9155673384666443, "learning_rate": 0.0005083825927435793, "loss": 3.6787, "step": 57885 }, { "epoch": 3.9332789781220274, "grad_norm": 0.6869795918464661, "learning_rate": 0.0005083401277347465, "loss": 3.4943, "step": 57890 }, { "epoch": 3.9336186981926895, "grad_norm": 0.7228953838348389, "learning_rate": 0.0005082976627259139, "loss": 3.4925, "step": 57895 }, { "epoch": 3.933958418263351, "grad_norm": 0.7208672761917114, "learning_rate": 0.0005082551977170812, "loss": 3.3846, "step": 57900 }, { "epoch": 3.9342981383340128, "grad_norm": 1.0521990060806274, "learning_rate": 0.0005082127327082484, "loss": 3.6357, "step": 57905 }, { "epoch": 3.9346378584046744, "grad_norm": 0.849660336971283, "learning_rate": 0.0005081702676994156, "loss": 3.6777, "step": 57910 }, { "epoch": 3.9349775784753365, "grad_norm": 0.778304934501648, "learning_rate": 0.000508127802690583, "loss": 3.6254, "step": 57915 }, { "epoch": 3.935317298545998, "grad_norm": 1.0945446491241455, "learning_rate": 0.0005080853376817502, "loss": 3.2789, "step": 57920 }, { "epoch": 3.9356570186166597, "grad_norm": 1.1343942880630493, "learning_rate": 0.0005080428726729174, "loss": 3.6489, "step": 57925 }, { "epoch": 3.935996738687322, "grad_norm": 0.9594082832336426, "learning_rate": 0.0005080004076640849, "loss": 3.3318, "step": 57930 }, { "epoch": 3.9363364587579834, "grad_norm": 0.8449326753616333, "learning_rate": 0.0005079579426552521, "loss": 3.5973, "step": 57935 }, { "epoch": 3.936676178828645, "grad_norm": 1.0172070264816284, "learning_rate": 0.0005079154776464193, "loss": 3.4541, "step": 57940 }, { "epoch": 3.9370158988993067, "grad_norm": 0.766942024230957, "learning_rate": 0.0005078730126375867, "loss": 3.1327, "step": 57945 }, { "epoch": 3.9373556189699688, "grad_norm": 0.9737443327903748, "learning_rate": 0.0005078305476287539, "loss": 3.4705, "step": 57950 }, { "epoch": 3.9376953390406304, "grad_norm": 0.8648078441619873, "learning_rate": 0.0005077880826199211, "loss": 3.5477, "step": 57955 }, { "epoch": 3.938035059111292, "grad_norm": 0.9023715853691101, "learning_rate": 0.0005077456176110886, "loss": 3.2744, "step": 57960 }, { "epoch": 3.938374779181954, "grad_norm": 0.9019187688827515, "learning_rate": 0.0005077031526022558, "loss": 3.6515, "step": 57965 }, { "epoch": 3.9387144992526157, "grad_norm": 1.0867276191711426, "learning_rate": 0.000507660687593423, "loss": 3.7183, "step": 57970 }, { "epoch": 3.9390542193232774, "grad_norm": 0.812721848487854, "learning_rate": 0.0005076182225845903, "loss": 3.489, "step": 57975 }, { "epoch": 3.9393939393939394, "grad_norm": 0.8396128416061401, "learning_rate": 0.0005075757575757576, "loss": 3.2889, "step": 57980 }, { "epoch": 3.939733659464601, "grad_norm": 0.9133723378181458, "learning_rate": 0.0005075332925669248, "loss": 3.5939, "step": 57985 }, { "epoch": 3.9400733795352627, "grad_norm": 0.8736927509307861, "learning_rate": 0.0005074908275580921, "loss": 3.4874, "step": 57990 }, { "epoch": 3.940413099605925, "grad_norm": 0.8803768754005432, "learning_rate": 0.0005074483625492595, "loss": 3.3504, "step": 57995 }, { "epoch": 3.9407528196765864, "grad_norm": 0.9039371609687805, "learning_rate": 0.0005074058975404267, "loss": 3.3516, "step": 58000 }, { "epoch": 3.941092539747248, "grad_norm": 1.075278878211975, "learning_rate": 0.000507363432531594, "loss": 3.2157, "step": 58005 }, { "epoch": 3.94143225981791, "grad_norm": 0.9838477373123169, "learning_rate": 0.0005073209675227613, "loss": 3.5814, "step": 58010 }, { "epoch": 3.9417719798885718, "grad_norm": 1.0882760286331177, "learning_rate": 0.0005072785025139285, "loss": 3.4352, "step": 58015 }, { "epoch": 3.9421116999592334, "grad_norm": 0.798846423625946, "learning_rate": 0.0005072360375050958, "loss": 3.5862, "step": 58020 }, { "epoch": 3.9424514200298955, "grad_norm": 0.9038563370704651, "learning_rate": 0.000507193572496263, "loss": 3.4691, "step": 58025 }, { "epoch": 3.942791140100557, "grad_norm": 1.0672730207443237, "learning_rate": 0.0005071511074874304, "loss": 3.4586, "step": 58030 }, { "epoch": 3.9431308601712187, "grad_norm": 0.7741675972938538, "learning_rate": 0.0005071086424785977, "loss": 3.427, "step": 58035 }, { "epoch": 3.943470580241881, "grad_norm": 0.8939539790153503, "learning_rate": 0.0005070661774697649, "loss": 3.5251, "step": 58040 }, { "epoch": 3.9438103003125424, "grad_norm": 0.8015594482421875, "learning_rate": 0.0005070237124609322, "loss": 3.5034, "step": 58045 }, { "epoch": 3.944150020383204, "grad_norm": 0.8641104102134705, "learning_rate": 0.0005069812474520995, "loss": 3.4, "step": 58050 }, { "epoch": 3.944489740453866, "grad_norm": 0.7646827101707458, "learning_rate": 0.0005069387824432667, "loss": 3.6448, "step": 58055 }, { "epoch": 3.9448294605245278, "grad_norm": 1.0285121202468872, "learning_rate": 0.0005068963174344339, "loss": 3.5994, "step": 58060 }, { "epoch": 3.9451691805951894, "grad_norm": 0.9637117385864258, "learning_rate": 0.0005068538524256014, "loss": 3.4345, "step": 58065 }, { "epoch": 3.9455089006658515, "grad_norm": 0.8554577827453613, "learning_rate": 0.0005068113874167686, "loss": 3.5788, "step": 58070 }, { "epoch": 3.945848620736513, "grad_norm": 0.8484289646148682, "learning_rate": 0.0005067689224079358, "loss": 3.6474, "step": 58075 }, { "epoch": 3.9461883408071747, "grad_norm": 0.7393940091133118, "learning_rate": 0.0005067264573991032, "loss": 3.5786, "step": 58080 }, { "epoch": 3.946528060877837, "grad_norm": 0.8794519901275635, "learning_rate": 0.0005066839923902704, "loss": 3.5425, "step": 58085 }, { "epoch": 3.9468677809484984, "grad_norm": 0.8264247179031372, "learning_rate": 0.0005066415273814377, "loss": 3.4844, "step": 58090 }, { "epoch": 3.94720750101916, "grad_norm": 0.830420732498169, "learning_rate": 0.000506599062372605, "loss": 3.4909, "step": 58095 }, { "epoch": 3.947547221089822, "grad_norm": 1.3286415338516235, "learning_rate": 0.0005065565973637723, "loss": 3.7214, "step": 58100 }, { "epoch": 3.9478869411604838, "grad_norm": 1.4399476051330566, "learning_rate": 0.0005065141323549396, "loss": 3.7089, "step": 58105 }, { "epoch": 3.9482266612311454, "grad_norm": 1.0080400705337524, "learning_rate": 0.0005064716673461069, "loss": 3.2644, "step": 58110 }, { "epoch": 3.9485663813018075, "grad_norm": 0.9388505220413208, "learning_rate": 0.0005064292023372741, "loss": 3.5571, "step": 58115 }, { "epoch": 3.948906101372469, "grad_norm": 0.9111339449882507, "learning_rate": 0.0005063867373284414, "loss": 3.2708, "step": 58120 }, { "epoch": 3.9492458214431307, "grad_norm": 0.8298900723457336, "learning_rate": 0.0005063442723196086, "loss": 3.339, "step": 58125 }, { "epoch": 3.949585541513793, "grad_norm": 0.9932072758674622, "learning_rate": 0.0005063018073107759, "loss": 3.8228, "step": 58130 }, { "epoch": 3.9499252615844545, "grad_norm": 0.8818564414978027, "learning_rate": 0.0005062593423019433, "loss": 3.5516, "step": 58135 }, { "epoch": 3.950264981655116, "grad_norm": 1.029739499092102, "learning_rate": 0.0005062168772931105, "loss": 3.5174, "step": 58140 }, { "epoch": 3.950604701725778, "grad_norm": 1.0952411890029907, "learning_rate": 0.0005061744122842778, "loss": 3.634, "step": 58145 }, { "epoch": 3.95094442179644, "grad_norm": 0.8502510786056519, "learning_rate": 0.0005061319472754451, "loss": 3.5604, "step": 58150 }, { "epoch": 3.9512841418671014, "grad_norm": 0.7648395299911499, "learning_rate": 0.0005060894822666123, "loss": 3.5383, "step": 58155 }, { "epoch": 3.9516238619377635, "grad_norm": 0.828384518623352, "learning_rate": 0.0005060470172577795, "loss": 3.7654, "step": 58160 }, { "epoch": 3.951963582008425, "grad_norm": 1.8672354221343994, "learning_rate": 0.0005060045522489469, "loss": 3.8266, "step": 58165 }, { "epoch": 3.9523033020790868, "grad_norm": 0.9821152091026306, "learning_rate": 0.0005059620872401142, "loss": 3.4282, "step": 58170 }, { "epoch": 3.952643022149749, "grad_norm": 0.8536328077316284, "learning_rate": 0.0005059196222312814, "loss": 3.4142, "step": 58175 }, { "epoch": 3.9529827422204105, "grad_norm": 0.7489006519317627, "learning_rate": 0.0005058771572224488, "loss": 3.3939, "step": 58180 }, { "epoch": 3.953322462291072, "grad_norm": 0.903169572353363, "learning_rate": 0.000505834692213616, "loss": 3.501, "step": 58185 }, { "epoch": 3.953662182361734, "grad_norm": 0.7916556596755981, "learning_rate": 0.0005057922272047832, "loss": 3.4626, "step": 58190 }, { "epoch": 3.954001902432396, "grad_norm": 0.8764162659645081, "learning_rate": 0.0005057497621959506, "loss": 3.4875, "step": 58195 }, { "epoch": 3.9543416225030574, "grad_norm": 0.8005889654159546, "learning_rate": 0.0005057072971871178, "loss": 3.4745, "step": 58200 }, { "epoch": 3.9546813425737195, "grad_norm": 1.0455639362335205, "learning_rate": 0.0005056648321782851, "loss": 3.6033, "step": 58205 }, { "epoch": 3.955021062644381, "grad_norm": 0.8559653759002686, "learning_rate": 0.0005056223671694525, "loss": 3.2944, "step": 58210 }, { "epoch": 3.9553607827150428, "grad_norm": 0.9199241995811462, "learning_rate": 0.0005055799021606197, "loss": 3.261, "step": 58215 }, { "epoch": 3.955700502785705, "grad_norm": 0.9811674356460571, "learning_rate": 0.0005055374371517869, "loss": 3.7129, "step": 58220 }, { "epoch": 3.9560402228563665, "grad_norm": 0.8099051117897034, "learning_rate": 0.0005054949721429542, "loss": 3.3218, "step": 58225 }, { "epoch": 3.956379942927028, "grad_norm": 0.8604681491851807, "learning_rate": 0.0005054525071341215, "loss": 3.3551, "step": 58230 }, { "epoch": 3.95671966299769, "grad_norm": 0.7500579357147217, "learning_rate": 0.0005054100421252887, "loss": 3.7321, "step": 58235 }, { "epoch": 3.957059383068352, "grad_norm": 0.837807834148407, "learning_rate": 0.0005053675771164561, "loss": 3.6117, "step": 58240 }, { "epoch": 3.9573991031390134, "grad_norm": 0.9512894749641418, "learning_rate": 0.0005053251121076234, "loss": 3.6947, "step": 58245 }, { "epoch": 3.957738823209675, "grad_norm": 0.8467386364936829, "learning_rate": 0.0005052826470987906, "loss": 3.5233, "step": 58250 }, { "epoch": 3.958078543280337, "grad_norm": 0.9090461730957031, "learning_rate": 0.0005052401820899579, "loss": 3.3133, "step": 58255 }, { "epoch": 3.958418263350999, "grad_norm": 0.9531348347663879, "learning_rate": 0.0005051977170811251, "loss": 3.3717, "step": 58260 }, { "epoch": 3.9587579834216604, "grad_norm": 0.9099034667015076, "learning_rate": 0.0005051552520722924, "loss": 3.68, "step": 58265 }, { "epoch": 3.9590977034923225, "grad_norm": 0.9285187721252441, "learning_rate": 0.0005051127870634597, "loss": 3.5716, "step": 58270 }, { "epoch": 3.959437423562984, "grad_norm": 0.934465229511261, "learning_rate": 0.000505070322054627, "loss": 3.2589, "step": 58275 }, { "epoch": 3.9597771436336457, "grad_norm": 0.7939115762710571, "learning_rate": 0.0005050278570457943, "loss": 3.5726, "step": 58280 }, { "epoch": 3.9601168637043074, "grad_norm": 0.9468801617622375, "learning_rate": 0.0005049853920369616, "loss": 3.5406, "step": 58285 }, { "epoch": 3.9604565837749695, "grad_norm": 0.7870660424232483, "learning_rate": 0.0005049429270281288, "loss": 3.7892, "step": 58290 }, { "epoch": 3.960796303845631, "grad_norm": 0.8129415512084961, "learning_rate": 0.000504900462019296, "loss": 3.5256, "step": 58295 }, { "epoch": 3.9611360239162927, "grad_norm": 2.075011968612671, "learning_rate": 0.0005048579970104634, "loss": 3.4665, "step": 58300 }, { "epoch": 3.961475743986955, "grad_norm": 0.9316096305847168, "learning_rate": 0.0005048155320016306, "loss": 3.542, "step": 58305 }, { "epoch": 3.9618154640576164, "grad_norm": 0.8207512497901917, "learning_rate": 0.000504773066992798, "loss": 3.3907, "step": 58310 }, { "epoch": 3.962155184128278, "grad_norm": 0.8904290795326233, "learning_rate": 0.0005047306019839653, "loss": 3.7004, "step": 58315 }, { "epoch": 3.96249490419894, "grad_norm": 0.8550782203674316, "learning_rate": 0.0005046881369751325, "loss": 3.5086, "step": 58320 }, { "epoch": 3.9628346242696018, "grad_norm": 0.7565914392471313, "learning_rate": 0.0005046456719662997, "loss": 3.4796, "step": 58325 }, { "epoch": 3.9631743443402634, "grad_norm": 1.092847466468811, "learning_rate": 0.0005046032069574671, "loss": 3.3838, "step": 58330 }, { "epoch": 3.9635140644109255, "grad_norm": 0.8022302389144897, "learning_rate": 0.0005045607419486343, "loss": 3.5696, "step": 58335 }, { "epoch": 3.963853784481587, "grad_norm": 0.9650247693061829, "learning_rate": 0.0005045182769398015, "loss": 3.5475, "step": 58340 }, { "epoch": 3.9641935045522487, "grad_norm": 0.9735673069953918, "learning_rate": 0.000504475811930969, "loss": 3.1982, "step": 58345 }, { "epoch": 3.964533224622911, "grad_norm": 0.8439900875091553, "learning_rate": 0.0005044333469221362, "loss": 3.8076, "step": 58350 }, { "epoch": 3.9648729446935724, "grad_norm": 0.9903268218040466, "learning_rate": 0.0005043908819133034, "loss": 3.2138, "step": 58355 }, { "epoch": 3.965212664764234, "grad_norm": 0.8935691118240356, "learning_rate": 0.0005043484169044707, "loss": 3.402, "step": 58360 }, { "epoch": 3.965552384834896, "grad_norm": 0.8377406597137451, "learning_rate": 0.000504305951895638, "loss": 3.6499, "step": 58365 }, { "epoch": 3.9658921049055578, "grad_norm": 0.9769845604896545, "learning_rate": 0.0005042634868868052, "loss": 3.4396, "step": 58370 }, { "epoch": 3.9662318249762194, "grad_norm": 0.8500931262969971, "learning_rate": 0.0005042210218779725, "loss": 3.4506, "step": 58375 }, { "epoch": 3.9665715450468815, "grad_norm": 0.9201495051383972, "learning_rate": 0.0005041785568691399, "loss": 3.571, "step": 58380 }, { "epoch": 3.966911265117543, "grad_norm": 0.8753264546394348, "learning_rate": 0.0005041360918603071, "loss": 3.6226, "step": 58385 }, { "epoch": 3.9672509851882047, "grad_norm": 1.1276609897613525, "learning_rate": 0.0005040936268514744, "loss": 3.6304, "step": 58390 }, { "epoch": 3.967590705258867, "grad_norm": 0.7851729989051819, "learning_rate": 0.0005040511618426417, "loss": 3.6937, "step": 58395 }, { "epoch": 3.9679304253295284, "grad_norm": 0.803649365901947, "learning_rate": 0.0005040086968338089, "loss": 3.4422, "step": 58400 }, { "epoch": 3.96827014540019, "grad_norm": 0.9867132306098938, "learning_rate": 0.0005039662318249762, "loss": 3.454, "step": 58405 }, { "epoch": 3.968609865470852, "grad_norm": 0.8968766927719116, "learning_rate": 0.0005039237668161434, "loss": 3.4482, "step": 58410 }, { "epoch": 3.968949585541514, "grad_norm": 0.9215635061264038, "learning_rate": 0.0005038813018073108, "loss": 3.265, "step": 58415 }, { "epoch": 3.9692893056121754, "grad_norm": 0.9452293515205383, "learning_rate": 0.0005038388367984781, "loss": 3.4326, "step": 58420 }, { "epoch": 3.9696290256828375, "grad_norm": 0.9856594204902649, "learning_rate": 0.0005037963717896453, "loss": 3.7966, "step": 58425 }, { "epoch": 3.969968745753499, "grad_norm": 1.2272056341171265, "learning_rate": 0.0005037539067808127, "loss": 3.3756, "step": 58430 }, { "epoch": 3.9703084658241607, "grad_norm": 0.759789764881134, "learning_rate": 0.0005037114417719799, "loss": 3.3371, "step": 58435 }, { "epoch": 3.970648185894823, "grad_norm": 0.8865832090377808, "learning_rate": 0.0005036689767631471, "loss": 3.4559, "step": 58440 }, { "epoch": 3.9709879059654845, "grad_norm": 1.0445077419281006, "learning_rate": 0.0005036265117543146, "loss": 3.4061, "step": 58445 }, { "epoch": 3.971327626036146, "grad_norm": 1.873594045639038, "learning_rate": 0.0005035840467454818, "loss": 3.1775, "step": 58450 }, { "epoch": 3.971667346106808, "grad_norm": 1.0005426406860352, "learning_rate": 0.000503541581736649, "loss": 3.5394, "step": 58455 }, { "epoch": 3.97200706617747, "grad_norm": 2.5633442401885986, "learning_rate": 0.0005034991167278163, "loss": 3.4697, "step": 58460 }, { "epoch": 3.9723467862481314, "grad_norm": 0.9882625341415405, "learning_rate": 0.0005034566517189836, "loss": 3.4715, "step": 58465 }, { "epoch": 3.9726865063187935, "grad_norm": 0.9934515357017517, "learning_rate": 0.0005034141867101508, "loss": 3.2818, "step": 58470 }, { "epoch": 3.973026226389455, "grad_norm": 0.9851252436637878, "learning_rate": 0.0005033717217013181, "loss": 3.4467, "step": 58475 }, { "epoch": 3.9733659464601168, "grad_norm": 0.8459964990615845, "learning_rate": 0.0005033292566924855, "loss": 3.5555, "step": 58480 }, { "epoch": 3.973705666530779, "grad_norm": 0.8251769542694092, "learning_rate": 0.0005032867916836527, "loss": 3.1989, "step": 58485 }, { "epoch": 3.9740453866014405, "grad_norm": 0.8321053385734558, "learning_rate": 0.00050324432667482, "loss": 3.6527, "step": 58490 }, { "epoch": 3.974385106672102, "grad_norm": 0.8952816724777222, "learning_rate": 0.0005032018616659873, "loss": 3.3618, "step": 58495 }, { "epoch": 3.974724826742764, "grad_norm": 0.972129225730896, "learning_rate": 0.0005031593966571545, "loss": 3.4188, "step": 58500 }, { "epoch": 3.975064546813426, "grad_norm": 1.0175836086273193, "learning_rate": 0.0005031169316483218, "loss": 3.2086, "step": 58505 }, { "epoch": 3.9754042668840874, "grad_norm": 0.7839510440826416, "learning_rate": 0.000503074466639489, "loss": 3.5327, "step": 58510 }, { "epoch": 3.9757439869547495, "grad_norm": 0.8590485453605652, "learning_rate": 0.0005030320016306564, "loss": 3.6787, "step": 58515 }, { "epoch": 3.976083707025411, "grad_norm": 1.0174731016159058, "learning_rate": 0.0005029895366218237, "loss": 3.5586, "step": 58520 }, { "epoch": 3.9764234270960728, "grad_norm": 0.8730679154396057, "learning_rate": 0.0005029470716129909, "loss": 3.6173, "step": 58525 }, { "epoch": 3.976763147166735, "grad_norm": 1.154018759727478, "learning_rate": 0.0005029046066041582, "loss": 3.3978, "step": 58530 }, { "epoch": 3.9771028672373965, "grad_norm": 1.2623858451843262, "learning_rate": 0.0005028621415953255, "loss": 3.5161, "step": 58535 }, { "epoch": 3.977442587308058, "grad_norm": 0.8604353070259094, "learning_rate": 0.0005028196765864927, "loss": 3.5032, "step": 58540 }, { "epoch": 3.97778230737872, "grad_norm": 1.0804920196533203, "learning_rate": 0.00050277721157766, "loss": 3.4831, "step": 58545 }, { "epoch": 3.978122027449382, "grad_norm": 1.0577651262283325, "learning_rate": 0.0005027347465688274, "loss": 3.6307, "step": 58550 }, { "epoch": 3.9784617475200434, "grad_norm": 0.9079492688179016, "learning_rate": 0.0005026922815599946, "loss": 3.7143, "step": 58555 }, { "epoch": 3.9788014675907055, "grad_norm": 0.8260047435760498, "learning_rate": 0.0005026498165511618, "loss": 3.682, "step": 58560 }, { "epoch": 3.979141187661367, "grad_norm": 0.7071512937545776, "learning_rate": 0.0005026073515423292, "loss": 3.5207, "step": 58565 }, { "epoch": 3.979480907732029, "grad_norm": 0.9244656562805176, "learning_rate": 0.0005025648865334964, "loss": 3.542, "step": 58570 }, { "epoch": 3.979820627802691, "grad_norm": 1.1806243658065796, "learning_rate": 0.0005025224215246636, "loss": 3.3665, "step": 58575 }, { "epoch": 3.9801603478733525, "grad_norm": 0.7310066223144531, "learning_rate": 0.000502479956515831, "loss": 3.5364, "step": 58580 }, { "epoch": 3.980500067944014, "grad_norm": 0.8309284448623657, "learning_rate": 0.0005024374915069983, "loss": 3.5225, "step": 58585 }, { "epoch": 3.9808397880146758, "grad_norm": 0.9056680202484131, "learning_rate": 0.0005023950264981655, "loss": 3.468, "step": 58590 }, { "epoch": 3.981179508085338, "grad_norm": 0.9902760982513428, "learning_rate": 0.0005023525614893329, "loss": 3.546, "step": 58595 }, { "epoch": 3.9815192281559995, "grad_norm": 0.6969340443611145, "learning_rate": 0.0005023100964805001, "loss": 3.578, "step": 58600 }, { "epoch": 3.981858948226661, "grad_norm": 0.660993754863739, "learning_rate": 0.0005022676314716673, "loss": 3.528, "step": 58605 }, { "epoch": 3.982198668297323, "grad_norm": 0.872743546962738, "learning_rate": 0.0005022251664628346, "loss": 3.5203, "step": 58610 }, { "epoch": 3.982538388367985, "grad_norm": 0.8260291814804077, "learning_rate": 0.0005021827014540019, "loss": 3.4115, "step": 58615 }, { "epoch": 3.9828781084386464, "grad_norm": 0.7985127568244934, "learning_rate": 0.0005021402364451692, "loss": 3.6226, "step": 58620 }, { "epoch": 3.983217828509308, "grad_norm": 0.8280341029167175, "learning_rate": 0.0005020977714363365, "loss": 3.4645, "step": 58625 }, { "epoch": 3.98355754857997, "grad_norm": 0.7678314447402954, "learning_rate": 0.0005020553064275038, "loss": 3.5384, "step": 58630 }, { "epoch": 3.9838972686506318, "grad_norm": 0.8187561631202698, "learning_rate": 0.000502012841418671, "loss": 3.2806, "step": 58635 }, { "epoch": 3.9842369887212934, "grad_norm": 0.886323869228363, "learning_rate": 0.0005019703764098383, "loss": 3.6184, "step": 58640 }, { "epoch": 3.9845767087919555, "grad_norm": 0.9094097018241882, "learning_rate": 0.0005019279114010055, "loss": 3.5141, "step": 58645 }, { "epoch": 3.984916428862617, "grad_norm": 0.9438634514808655, "learning_rate": 0.0005018854463921728, "loss": 3.6474, "step": 58650 }, { "epoch": 3.9852561489332787, "grad_norm": 0.7943500876426697, "learning_rate": 0.0005018429813833402, "loss": 3.5123, "step": 58655 }, { "epoch": 3.985595869003941, "grad_norm": 0.8282130360603333, "learning_rate": 0.0005018005163745074, "loss": 3.3624, "step": 58660 }, { "epoch": 3.9859355890746024, "grad_norm": 0.843784749507904, "learning_rate": 0.0005017580513656747, "loss": 3.5491, "step": 58665 }, { "epoch": 3.986275309145264, "grad_norm": 0.8805307745933533, "learning_rate": 0.000501715586356842, "loss": 3.3268, "step": 58670 }, { "epoch": 3.986615029215926, "grad_norm": 0.7629668712615967, "learning_rate": 0.0005016731213480092, "loss": 3.563, "step": 58675 }, { "epoch": 3.9869547492865878, "grad_norm": 0.9756906032562256, "learning_rate": 0.0005016306563391765, "loss": 3.4587, "step": 58680 }, { "epoch": 3.9872944693572494, "grad_norm": 1.0828956365585327, "learning_rate": 0.0005015881913303438, "loss": 3.2026, "step": 58685 }, { "epoch": 3.9876341894279115, "grad_norm": 1.0435839891433716, "learning_rate": 0.0005015457263215111, "loss": 3.5432, "step": 58690 }, { "epoch": 3.987973909498573, "grad_norm": 0.6745830178260803, "learning_rate": 0.0005015032613126783, "loss": 3.6717, "step": 58695 }, { "epoch": 3.9883136295692347, "grad_norm": 0.8049938678741455, "learning_rate": 0.0005014607963038457, "loss": 3.3084, "step": 58700 }, { "epoch": 3.988653349639897, "grad_norm": 0.776008665561676, "learning_rate": 0.0005014183312950129, "loss": 3.5405, "step": 58705 }, { "epoch": 3.9889930697105584, "grad_norm": 0.9933795928955078, "learning_rate": 0.0005013758662861801, "loss": 3.4247, "step": 58710 }, { "epoch": 3.98933278978122, "grad_norm": 0.7957560420036316, "learning_rate": 0.0005013334012773475, "loss": 3.6848, "step": 58715 }, { "epoch": 3.989672509851882, "grad_norm": 1.2339847087860107, "learning_rate": 0.0005012909362685147, "loss": 3.7956, "step": 58720 }, { "epoch": 3.990012229922544, "grad_norm": 1.210588812828064, "learning_rate": 0.000501248471259682, "loss": 3.1582, "step": 58725 }, { "epoch": 3.9903519499932054, "grad_norm": 1.0566565990447998, "learning_rate": 0.0005012060062508494, "loss": 3.3669, "step": 58730 }, { "epoch": 3.9906916700638675, "grad_norm": 1.2095428705215454, "learning_rate": 0.0005011635412420166, "loss": 3.3136, "step": 58735 }, { "epoch": 3.991031390134529, "grad_norm": 0.6907721161842346, "learning_rate": 0.0005011210762331838, "loss": 3.4939, "step": 58740 }, { "epoch": 3.9913711102051908, "grad_norm": 0.8217974901199341, "learning_rate": 0.0005010786112243512, "loss": 3.4108, "step": 58745 }, { "epoch": 3.991710830275853, "grad_norm": 0.8160417079925537, "learning_rate": 0.0005010361462155184, "loss": 3.3811, "step": 58750 }, { "epoch": 3.9920505503465145, "grad_norm": 0.8994930386543274, "learning_rate": 0.0005009936812066856, "loss": 3.5395, "step": 58755 }, { "epoch": 3.992390270417176, "grad_norm": 0.9312856793403625, "learning_rate": 0.000500951216197853, "loss": 3.3703, "step": 58760 }, { "epoch": 3.992729990487838, "grad_norm": 1.0386981964111328, "learning_rate": 0.0005009087511890203, "loss": 3.7951, "step": 58765 }, { "epoch": 3.9930697105585, "grad_norm": 0.8984938859939575, "learning_rate": 0.0005008662861801876, "loss": 3.4229, "step": 58770 }, { "epoch": 3.9934094306291614, "grad_norm": 1.0134419202804565, "learning_rate": 0.0005008238211713548, "loss": 3.2128, "step": 58775 }, { "epoch": 3.9937491506998235, "grad_norm": 0.914245069026947, "learning_rate": 0.0005007813561625221, "loss": 3.3382, "step": 58780 }, { "epoch": 3.994088870770485, "grad_norm": 0.9447693824768066, "learning_rate": 0.0005007388911536894, "loss": 3.5376, "step": 58785 }, { "epoch": 3.9944285908411468, "grad_norm": 0.9548652768135071, "learning_rate": 0.0005006964261448566, "loss": 3.3727, "step": 58790 }, { "epoch": 3.994768310911809, "grad_norm": 0.924791693687439, "learning_rate": 0.000500653961136024, "loss": 3.5487, "step": 58795 }, { "epoch": 3.9951080309824705, "grad_norm": 1.0919173955917358, "learning_rate": 0.0005006114961271913, "loss": 3.5634, "step": 58800 }, { "epoch": 3.995447751053132, "grad_norm": 1.2216788530349731, "learning_rate": 0.0005005690311183585, "loss": 3.7662, "step": 58805 }, { "epoch": 3.995787471123794, "grad_norm": 0.9165224432945251, "learning_rate": 0.0005005265661095257, "loss": 3.5287, "step": 58810 }, { "epoch": 3.996127191194456, "grad_norm": 0.9591333270072937, "learning_rate": 0.0005004841011006931, "loss": 3.3288, "step": 58815 }, { "epoch": 3.9964669112651174, "grad_norm": 1.2829313278198242, "learning_rate": 0.0005004416360918603, "loss": 3.6745, "step": 58820 }, { "epoch": 3.9968066313357795, "grad_norm": 0.8852560520172119, "learning_rate": 0.0005003991710830275, "loss": 3.5221, "step": 58825 }, { "epoch": 3.997146351406441, "grad_norm": 0.8069028854370117, "learning_rate": 0.000500356706074195, "loss": 3.5751, "step": 58830 }, { "epoch": 3.9974860714771028, "grad_norm": 0.8644586801528931, "learning_rate": 0.0005003142410653622, "loss": 3.2415, "step": 58835 }, { "epoch": 3.997825791547765, "grad_norm": 1.0938270092010498, "learning_rate": 0.0005002717760565294, "loss": 3.4985, "step": 58840 }, { "epoch": 3.9981655116184265, "grad_norm": 0.6521931290626526, "learning_rate": 0.0005002293110476968, "loss": 3.6581, "step": 58845 }, { "epoch": 3.998505231689088, "grad_norm": 0.9686088562011719, "learning_rate": 0.000500186846038864, "loss": 3.3887, "step": 58850 }, { "epoch": 3.99884495175975, "grad_norm": 0.7416316270828247, "learning_rate": 0.0005001443810300312, "loss": 3.5103, "step": 58855 }, { "epoch": 3.999184671830412, "grad_norm": 0.9525768756866455, "learning_rate": 0.0005001019160211985, "loss": 3.688, "step": 58860 }, { "epoch": 3.9995243919010735, "grad_norm": 1.0199936628341675, "learning_rate": 0.0005000594510123659, "loss": 3.7501, "step": 58865 }, { "epoch": 3.9998641119717355, "grad_norm": 1.1737592220306396, "learning_rate": 0.0005000169860035331, "loss": 3.4213, "step": 58870 }, { "epoch": 4.0, "eval_bertscore": { "f1": 0.8385404370669559, "precision": 0.8427524314750152, "recall": 0.8351357977327213 }, "eval_bleu_4": 0.015128049595534286, "eval_exact_match": 0.0002907258455276674, "eval_loss": 3.432779550552368, "eval_meteor": 0.08361186489482236, "eval_rouge": { "rouge1": 0.12040188242092503, "rouge2": 0.017964918767180104, "rougeL": 0.10407237429544142, "rougeLsum": 0.10410244498813562 }, "eval_runtime": 2386.9732, "eval_samples_per_second": 4.323, "eval_steps_per_second": 0.54, "step": 58872 }, { "epoch": 4.000203832042397, "grad_norm": 0.8747281432151794, "learning_rate": 0.0004999745209947003, "loss": 3.7358, "step": 58875 }, { "epoch": 4.000543552113059, "grad_norm": 1.0096979141235352, "learning_rate": 0.0004999320559858677, "loss": 3.4673, "step": 58880 }, { "epoch": 4.000883272183721, "grad_norm": 0.8625751733779907, "learning_rate": 0.000499889590977035, "loss": 3.3461, "step": 58885 }, { "epoch": 4.001222992254382, "grad_norm": 0.7489694952964783, "learning_rate": 0.0004998471259682022, "loss": 3.3326, "step": 58890 }, { "epoch": 4.001562712325044, "grad_norm": 0.9151527285575867, "learning_rate": 0.0004998046609593694, "loss": 3.4834, "step": 58895 }, { "epoch": 4.001902432395706, "grad_norm": 0.9500098824501038, "learning_rate": 0.0004997621959505368, "loss": 3.4627, "step": 58900 }, { "epoch": 4.002242152466367, "grad_norm": 0.8319563865661621, "learning_rate": 0.000499719730941704, "loss": 3.3326, "step": 58905 }, { "epoch": 4.0025818725370295, "grad_norm": 0.6396570801734924, "learning_rate": 0.0004996772659328713, "loss": 3.4005, "step": 58910 }, { "epoch": 4.0029215926076915, "grad_norm": 1.0601556301116943, "learning_rate": 0.0004996348009240387, "loss": 3.3145, "step": 58915 }, { "epoch": 4.003261312678353, "grad_norm": 1.120114803314209, "learning_rate": 0.0004995923359152059, "loss": 3.5709, "step": 58920 }, { "epoch": 4.003601032749015, "grad_norm": 0.8364360928535461, "learning_rate": 0.0004995498709063731, "loss": 3.3213, "step": 58925 }, { "epoch": 4.003940752819677, "grad_norm": 1.0632083415985107, "learning_rate": 0.0004995074058975405, "loss": 3.7191, "step": 58930 }, { "epoch": 4.004280472890338, "grad_norm": 1.0068776607513428, "learning_rate": 0.0004994649408887077, "loss": 3.3446, "step": 58935 }, { "epoch": 4.004620192961, "grad_norm": 0.7390163540840149, "learning_rate": 0.000499422475879875, "loss": 3.5669, "step": 58940 }, { "epoch": 4.004959913031662, "grad_norm": 0.7953309416770935, "learning_rate": 0.0004993800108710422, "loss": 3.4071, "step": 58945 }, { "epoch": 4.005299633102323, "grad_norm": 0.7707774043083191, "learning_rate": 0.0004993375458622096, "loss": 3.4354, "step": 58950 }, { "epoch": 4.0056393531729855, "grad_norm": 0.9267033934593201, "learning_rate": 0.0004992950808533768, "loss": 3.3968, "step": 58955 }, { "epoch": 4.0059790732436475, "grad_norm": 1.0014992952346802, "learning_rate": 0.0004992526158445441, "loss": 3.5783, "step": 58960 }, { "epoch": 4.006318793314309, "grad_norm": 0.8574535250663757, "learning_rate": 0.0004992101508357114, "loss": 3.3148, "step": 58965 }, { "epoch": 4.006658513384971, "grad_norm": 0.8339497447013855, "learning_rate": 0.0004991676858268787, "loss": 3.3311, "step": 58970 }, { "epoch": 4.006998233455633, "grad_norm": 0.8833082318305969, "learning_rate": 0.0004991252208180459, "loss": 3.6522, "step": 58975 }, { "epoch": 4.007337953526294, "grad_norm": 0.866485059261322, "learning_rate": 0.0004990827558092132, "loss": 3.5873, "step": 58980 }, { "epoch": 4.007677673596956, "grad_norm": 0.8681113719940186, "learning_rate": 0.0004990402908003805, "loss": 3.6125, "step": 58985 }, { "epoch": 4.008017393667618, "grad_norm": 0.790141224861145, "learning_rate": 0.0004989978257915478, "loss": 3.4763, "step": 58990 }, { "epoch": 4.008357113738279, "grad_norm": 0.8715270161628723, "learning_rate": 0.000498955360782715, "loss": 3.5145, "step": 58995 }, { "epoch": 4.0086968338089415, "grad_norm": 0.8293136954307556, "learning_rate": 0.0004989128957738823, "loss": 3.6876, "step": 59000 }, { "epoch": 4.009036553879604, "grad_norm": 0.7862138748168945, "learning_rate": 0.0004988704307650496, "loss": 3.4034, "step": 59005 }, { "epoch": 4.009376273950265, "grad_norm": 0.8801670670509338, "learning_rate": 0.0004988279657562168, "loss": 3.426, "step": 59010 }, { "epoch": 4.009715994020927, "grad_norm": 1.132441520690918, "learning_rate": 0.0004987855007473842, "loss": 3.3286, "step": 59015 }, { "epoch": 4.010055714091589, "grad_norm": 0.6591469049453735, "learning_rate": 0.0004987430357385515, "loss": 3.8127, "step": 59020 }, { "epoch": 4.01039543416225, "grad_norm": 0.9934965372085571, "learning_rate": 0.0004987005707297187, "loss": 3.633, "step": 59025 }, { "epoch": 4.010735154232912, "grad_norm": 0.9317359924316406, "learning_rate": 0.000498658105720886, "loss": 3.4313, "step": 59030 }, { "epoch": 4.011074874303574, "grad_norm": 0.8778597712516785, "learning_rate": 0.0004986156407120533, "loss": 3.3994, "step": 59035 }, { "epoch": 4.011414594374235, "grad_norm": 1.0319303274154663, "learning_rate": 0.0004985731757032206, "loss": 3.291, "step": 59040 }, { "epoch": 4.0117543144448975, "grad_norm": 0.7920326590538025, "learning_rate": 0.0004985307106943878, "loss": 3.5725, "step": 59045 }, { "epoch": 4.01209403451556, "grad_norm": 0.9608009457588196, "learning_rate": 0.0004984882456855551, "loss": 3.471, "step": 59050 }, { "epoch": 4.012433754586221, "grad_norm": 1.0730987787246704, "learning_rate": 0.0004984457806767224, "loss": 3.4814, "step": 59055 }, { "epoch": 4.012773474656883, "grad_norm": 0.8771940469741821, "learning_rate": 0.0004984033156678896, "loss": 3.5399, "step": 59060 }, { "epoch": 4.013113194727545, "grad_norm": 0.7945749163627625, "learning_rate": 0.000498360850659057, "loss": 3.6079, "step": 59065 }, { "epoch": 4.013452914798206, "grad_norm": 0.9251720905303955, "learning_rate": 0.0004983183856502243, "loss": 3.5025, "step": 59070 }, { "epoch": 4.013792634868868, "grad_norm": 0.879957914352417, "learning_rate": 0.0004982759206413915, "loss": 3.3794, "step": 59075 }, { "epoch": 4.01413235493953, "grad_norm": 0.8275218605995178, "learning_rate": 0.0004982334556325588, "loss": 3.5161, "step": 59080 }, { "epoch": 4.014472075010191, "grad_norm": 1.0662089586257935, "learning_rate": 0.0004981909906237261, "loss": 3.4729, "step": 59085 }, { "epoch": 4.0148117950808535, "grad_norm": 0.924647331237793, "learning_rate": 0.0004981485256148933, "loss": 3.5644, "step": 59090 }, { "epoch": 4.015151515151516, "grad_norm": 0.692802906036377, "learning_rate": 0.0004981060606060606, "loss": 3.5337, "step": 59095 }, { "epoch": 4.015491235222177, "grad_norm": 0.7831913828849792, "learning_rate": 0.0004980635955972279, "loss": 3.6386, "step": 59100 }, { "epoch": 4.015830955292839, "grad_norm": 0.8935628533363342, "learning_rate": 0.0004980211305883952, "loss": 3.374, "step": 59105 }, { "epoch": 4.016170675363501, "grad_norm": 0.9295881390571594, "learning_rate": 0.0004979786655795624, "loss": 3.4827, "step": 59110 }, { "epoch": 4.016510395434162, "grad_norm": 0.9850232601165771, "learning_rate": 0.0004979362005707298, "loss": 3.2893, "step": 59115 }, { "epoch": 4.016850115504824, "grad_norm": 1.0450438261032104, "learning_rate": 0.000497893735561897, "loss": 3.4132, "step": 59120 }, { "epoch": 4.017189835575485, "grad_norm": 0.9847548007965088, "learning_rate": 0.0004978512705530643, "loss": 3.2274, "step": 59125 }, { "epoch": 4.017529555646147, "grad_norm": 0.6877517700195312, "learning_rate": 0.0004978088055442316, "loss": 3.5862, "step": 59130 }, { "epoch": 4.0178692757168095, "grad_norm": 1.0290706157684326, "learning_rate": 0.0004977663405353988, "loss": 3.6977, "step": 59135 }, { "epoch": 4.018208995787471, "grad_norm": 1.152679204940796, "learning_rate": 0.0004977238755265661, "loss": 3.5757, "step": 59140 }, { "epoch": 4.018548715858133, "grad_norm": 0.7825775146484375, "learning_rate": 0.0004976814105177334, "loss": 3.4211, "step": 59145 }, { "epoch": 4.018888435928795, "grad_norm": 0.7771532535552979, "learning_rate": 0.0004976389455089007, "loss": 3.4468, "step": 59150 }, { "epoch": 4.019228155999456, "grad_norm": 0.6235162615776062, "learning_rate": 0.0004975964805000679, "loss": 3.751, "step": 59155 }, { "epoch": 4.019567876070118, "grad_norm": 0.8276233673095703, "learning_rate": 0.0004975540154912352, "loss": 3.2886, "step": 59160 }, { "epoch": 4.01990759614078, "grad_norm": 0.7455196976661682, "learning_rate": 0.0004975115504824025, "loss": 3.5087, "step": 59165 }, { "epoch": 4.020247316211441, "grad_norm": 0.7343783974647522, "learning_rate": 0.0004974690854735698, "loss": 3.3992, "step": 59170 }, { "epoch": 4.0205870362821035, "grad_norm": 0.8975481390953064, "learning_rate": 0.0004974266204647371, "loss": 3.6677, "step": 59175 }, { "epoch": 4.0209267563527655, "grad_norm": 0.8668843507766724, "learning_rate": 0.0004973841554559044, "loss": 3.352, "step": 59180 }, { "epoch": 4.021266476423427, "grad_norm": 1.0666024684906006, "learning_rate": 0.0004973416904470716, "loss": 3.6495, "step": 59185 }, { "epoch": 4.021606196494089, "grad_norm": 0.83885657787323, "learning_rate": 0.0004972992254382389, "loss": 3.7973, "step": 59190 }, { "epoch": 4.021945916564751, "grad_norm": 0.8596224784851074, "learning_rate": 0.0004972567604294061, "loss": 3.5714, "step": 59195 }, { "epoch": 4.022285636635412, "grad_norm": 0.86994469165802, "learning_rate": 0.0004972142954205735, "loss": 3.4898, "step": 59200 }, { "epoch": 4.022625356706074, "grad_norm": 0.8553299307823181, "learning_rate": 0.0004971718304117407, "loss": 3.4782, "step": 59205 }, { "epoch": 4.022965076776736, "grad_norm": 0.8902169466018677, "learning_rate": 0.000497129365402908, "loss": 3.5854, "step": 59210 }, { "epoch": 4.023304796847397, "grad_norm": 0.8655052781105042, "learning_rate": 0.0004970869003940753, "loss": 3.6774, "step": 59215 }, { "epoch": 4.0236445169180595, "grad_norm": 0.7810933589935303, "learning_rate": 0.0004970444353852426, "loss": 3.4983, "step": 59220 }, { "epoch": 4.0239842369887215, "grad_norm": 0.9515738487243652, "learning_rate": 0.0004970019703764098, "loss": 3.5432, "step": 59225 }, { "epoch": 4.024323957059383, "grad_norm": 0.6742497682571411, "learning_rate": 0.0004969595053675772, "loss": 3.7443, "step": 59230 }, { "epoch": 4.024663677130045, "grad_norm": 0.7862504720687866, "learning_rate": 0.0004969170403587444, "loss": 3.6295, "step": 59235 }, { "epoch": 4.025003397200707, "grad_norm": 0.8000181913375854, "learning_rate": 0.0004968745753499117, "loss": 3.5042, "step": 59240 }, { "epoch": 4.025343117271368, "grad_norm": 0.8202589750289917, "learning_rate": 0.0004968321103410789, "loss": 3.6363, "step": 59245 }, { "epoch": 4.02568283734203, "grad_norm": 0.9175881743431091, "learning_rate": 0.0004967896453322463, "loss": 3.6685, "step": 59250 }, { "epoch": 4.026022557412692, "grad_norm": 0.8016685247421265, "learning_rate": 0.0004967471803234135, "loss": 3.4981, "step": 59255 }, { "epoch": 4.026362277483353, "grad_norm": 0.7623411417007446, "learning_rate": 0.0004967047153145807, "loss": 3.4201, "step": 59260 }, { "epoch": 4.0267019975540155, "grad_norm": 0.8220285773277283, "learning_rate": 0.0004966622503057481, "loss": 3.745, "step": 59265 }, { "epoch": 4.0270417176246776, "grad_norm": 0.9626908302307129, "learning_rate": 0.0004966197852969154, "loss": 3.6727, "step": 59270 }, { "epoch": 4.027381437695339, "grad_norm": 0.963424026966095, "learning_rate": 0.0004965773202880826, "loss": 3.5682, "step": 59275 }, { "epoch": 4.027721157766001, "grad_norm": 0.7740755677223206, "learning_rate": 0.00049653485527925, "loss": 3.3562, "step": 59280 }, { "epoch": 4.028060877836663, "grad_norm": 0.924393355846405, "learning_rate": 0.0004964923902704172, "loss": 3.6037, "step": 59285 }, { "epoch": 4.028400597907324, "grad_norm": 0.7366365790367126, "learning_rate": 0.0004964499252615844, "loss": 3.3526, "step": 59290 }, { "epoch": 4.028740317977986, "grad_norm": 1.064188838005066, "learning_rate": 0.0004964074602527517, "loss": 3.437, "step": 59295 }, { "epoch": 4.029080038048648, "grad_norm": 0.610778272151947, "learning_rate": 0.0004963649952439191, "loss": 3.5023, "step": 59300 }, { "epoch": 4.029419758119309, "grad_norm": 0.906237781047821, "learning_rate": 0.0004963225302350863, "loss": 3.488, "step": 59305 }, { "epoch": 4.0297594781899715, "grad_norm": 0.8823177218437195, "learning_rate": 0.0004962800652262535, "loss": 3.364, "step": 59310 }, { "epoch": 4.030099198260634, "grad_norm": 0.8561570644378662, "learning_rate": 0.0004962376002174209, "loss": 3.4643, "step": 59315 }, { "epoch": 4.030438918331295, "grad_norm": 0.7763888835906982, "learning_rate": 0.0004961951352085881, "loss": 3.534, "step": 59320 }, { "epoch": 4.030778638401957, "grad_norm": 0.8408674597740173, "learning_rate": 0.0004961526701997554, "loss": 3.5215, "step": 59325 }, { "epoch": 4.031118358472619, "grad_norm": 0.7309010624885559, "learning_rate": 0.0004961102051909228, "loss": 3.6271, "step": 59330 }, { "epoch": 4.03145807854328, "grad_norm": 1.031607747077942, "learning_rate": 0.00049606774018209, "loss": 3.5652, "step": 59335 }, { "epoch": 4.031797798613942, "grad_norm": 0.8041316866874695, "learning_rate": 0.0004960252751732572, "loss": 3.4452, "step": 59340 }, { "epoch": 4.032137518684604, "grad_norm": 1.041819453239441, "learning_rate": 0.0004959828101644245, "loss": 3.4553, "step": 59345 }, { "epoch": 4.032477238755265, "grad_norm": 0.8617551922798157, "learning_rate": 0.0004959403451555918, "loss": 3.5529, "step": 59350 }, { "epoch": 4.0328169588259275, "grad_norm": 0.9571493864059448, "learning_rate": 0.0004958978801467591, "loss": 3.56, "step": 59355 }, { "epoch": 4.03315667889659, "grad_norm": 0.964963436126709, "learning_rate": 0.0004958554151379263, "loss": 3.3228, "step": 59360 }, { "epoch": 4.033496398967251, "grad_norm": 0.8677114248275757, "learning_rate": 0.0004958129501290937, "loss": 3.3487, "step": 59365 }, { "epoch": 4.033836119037913, "grad_norm": 0.8580537438392639, "learning_rate": 0.0004957704851202609, "loss": 3.474, "step": 59370 }, { "epoch": 4.034175839108575, "grad_norm": 0.6939281225204468, "learning_rate": 0.0004957280201114282, "loss": 3.397, "step": 59375 }, { "epoch": 4.034515559179236, "grad_norm": 0.7544398307800293, "learning_rate": 0.0004956855551025954, "loss": 3.7162, "step": 59380 }, { "epoch": 4.034855279249898, "grad_norm": 0.8536282777786255, "learning_rate": 0.0004956430900937628, "loss": 3.4894, "step": 59385 }, { "epoch": 4.03519499932056, "grad_norm": 0.8817777037620544, "learning_rate": 0.00049560062508493, "loss": 3.3923, "step": 59390 }, { "epoch": 4.035534719391221, "grad_norm": 0.9778486490249634, "learning_rate": 0.0004955581600760973, "loss": 3.2971, "step": 59395 }, { "epoch": 4.0358744394618835, "grad_norm": 9.085989952087402, "learning_rate": 0.0004955156950672646, "loss": 3.2477, "step": 59400 }, { "epoch": 4.036214159532546, "grad_norm": 0.7704107165336609, "learning_rate": 0.0004954732300584319, "loss": 3.4429, "step": 59405 }, { "epoch": 4.036553879603207, "grad_norm": 0.968893826007843, "learning_rate": 0.0004954307650495991, "loss": 3.3488, "step": 59410 }, { "epoch": 4.036893599673869, "grad_norm": 0.88393634557724, "learning_rate": 0.0004953883000407664, "loss": 3.3584, "step": 59415 }, { "epoch": 4.037233319744531, "grad_norm": 1.0462424755096436, "learning_rate": 0.0004953458350319337, "loss": 3.3883, "step": 59420 }, { "epoch": 4.037573039815192, "grad_norm": 0.9671331644058228, "learning_rate": 0.000495303370023101, "loss": 3.4942, "step": 59425 }, { "epoch": 4.037912759885854, "grad_norm": 1.1834206581115723, "learning_rate": 0.0004952609050142683, "loss": 3.5535, "step": 59430 }, { "epoch": 4.038252479956516, "grad_norm": 0.8951017260551453, "learning_rate": 0.0004952184400054356, "loss": 3.4215, "step": 59435 }, { "epoch": 4.0385922000271774, "grad_norm": 0.9307129383087158, "learning_rate": 0.0004951759749966028, "loss": 3.4521, "step": 59440 }, { "epoch": 4.0389319200978395, "grad_norm": 1.2310844659805298, "learning_rate": 0.00049513350998777, "loss": 3.5149, "step": 59445 }, { "epoch": 4.039271640168501, "grad_norm": 0.9202895760536194, "learning_rate": 0.0004950910449789374, "loss": 3.3708, "step": 59450 }, { "epoch": 4.039611360239163, "grad_norm": 0.9394950866699219, "learning_rate": 0.0004950485799701047, "loss": 3.7742, "step": 59455 }, { "epoch": 4.039951080309825, "grad_norm": 0.7934741377830505, "learning_rate": 0.0004950061149612719, "loss": 3.5666, "step": 59460 }, { "epoch": 4.040290800380486, "grad_norm": 0.9467673897743225, "learning_rate": 0.0004949636499524392, "loss": 3.4321, "step": 59465 }, { "epoch": 4.040630520451148, "grad_norm": 0.7550963163375854, "learning_rate": 0.0004949211849436065, "loss": 3.6178, "step": 59470 }, { "epoch": 4.04097024052181, "grad_norm": 1.1531764268875122, "learning_rate": 0.0004948787199347737, "loss": 3.5039, "step": 59475 }, { "epoch": 4.041309960592471, "grad_norm": 0.912929117679596, "learning_rate": 0.000494836254925941, "loss": 3.5001, "step": 59480 }, { "epoch": 4.0416496806631335, "grad_norm": 0.9658652544021606, "learning_rate": 0.0004947937899171083, "loss": 3.455, "step": 59485 }, { "epoch": 4.0419894007337955, "grad_norm": 1.0641367435455322, "learning_rate": 0.0004947513249082756, "loss": 3.5756, "step": 59490 }, { "epoch": 4.042329120804457, "grad_norm": 1.0192972421646118, "learning_rate": 0.0004947088598994428, "loss": 3.7225, "step": 59495 }, { "epoch": 4.042668840875119, "grad_norm": 0.9425919651985168, "learning_rate": 0.0004946663948906102, "loss": 3.5573, "step": 59500 }, { "epoch": 4.043008560945781, "grad_norm": 0.6880160570144653, "learning_rate": 0.0004946239298817774, "loss": 3.7185, "step": 59505 }, { "epoch": 4.043348281016442, "grad_norm": 0.8692899346351624, "learning_rate": 0.0004945814648729447, "loss": 3.4777, "step": 59510 }, { "epoch": 4.043688001087104, "grad_norm": 0.9909335374832153, "learning_rate": 0.000494538999864112, "loss": 3.4808, "step": 59515 }, { "epoch": 4.044027721157766, "grad_norm": 1.2313508987426758, "learning_rate": 0.0004944965348552792, "loss": 3.3739, "step": 59520 }, { "epoch": 4.044367441228427, "grad_norm": 0.7821127772331238, "learning_rate": 0.0004944540698464465, "loss": 3.4041, "step": 59525 }, { "epoch": 4.0447071612990895, "grad_norm": 0.7277593612670898, "learning_rate": 0.0004944116048376139, "loss": 3.4754, "step": 59530 }, { "epoch": 4.0450468813697515, "grad_norm": 0.9902859926223755, "learning_rate": 0.0004943691398287811, "loss": 3.6015, "step": 59535 }, { "epoch": 4.045386601440413, "grad_norm": 0.75090491771698, "learning_rate": 0.0004943266748199484, "loss": 3.6644, "step": 59540 }, { "epoch": 4.045726321511075, "grad_norm": 0.8302679061889648, "learning_rate": 0.0004942842098111156, "loss": 3.3744, "step": 59545 }, { "epoch": 4.046066041581737, "grad_norm": 0.8659022450447083, "learning_rate": 0.000494241744802283, "loss": 3.4081, "step": 59550 }, { "epoch": 4.046405761652398, "grad_norm": 0.8431478142738342, "learning_rate": 0.0004941992797934502, "loss": 3.2691, "step": 59555 }, { "epoch": 4.04674548172306, "grad_norm": 0.8332762122154236, "learning_rate": 0.0004941568147846175, "loss": 3.7002, "step": 59560 }, { "epoch": 4.047085201793722, "grad_norm": 1.0419656038284302, "learning_rate": 0.0004941143497757848, "loss": 3.5708, "step": 59565 }, { "epoch": 4.047424921864383, "grad_norm": 1.036781907081604, "learning_rate": 0.000494071884766952, "loss": 3.3737, "step": 59570 }, { "epoch": 4.0477646419350455, "grad_norm": 0.8540711998939514, "learning_rate": 0.0004940294197581193, "loss": 3.2013, "step": 59575 }, { "epoch": 4.048104362005708, "grad_norm": 0.8660485148429871, "learning_rate": 0.0004939869547492867, "loss": 3.5735, "step": 59580 }, { "epoch": 4.048444082076369, "grad_norm": 1.5885177850723267, "learning_rate": 0.0004939444897404539, "loss": 3.4739, "step": 59585 }, { "epoch": 4.048783802147031, "grad_norm": 0.8557515144348145, "learning_rate": 0.0004939020247316211, "loss": 3.3974, "step": 59590 }, { "epoch": 4.049123522217693, "grad_norm": 1.22386634349823, "learning_rate": 0.0004938595597227884, "loss": 3.4099, "step": 59595 }, { "epoch": 4.049463242288354, "grad_norm": 0.9990693926811218, "learning_rate": 0.0004938170947139557, "loss": 3.6372, "step": 59600 }, { "epoch": 4.049802962359016, "grad_norm": 1.225592851638794, "learning_rate": 0.000493774629705123, "loss": 3.6008, "step": 59605 }, { "epoch": 4.050142682429678, "grad_norm": 0.8566232323646545, "learning_rate": 0.0004937321646962903, "loss": 3.5786, "step": 59610 }, { "epoch": 4.050482402500339, "grad_norm": 0.8954619765281677, "learning_rate": 0.0004936896996874576, "loss": 3.4853, "step": 59615 }, { "epoch": 4.0508221225710015, "grad_norm": 1.31369149684906, "learning_rate": 0.0004936472346786248, "loss": 3.5876, "step": 59620 }, { "epoch": 4.051161842641664, "grad_norm": 0.9822103381156921, "learning_rate": 0.0004936047696697921, "loss": 3.3509, "step": 59625 }, { "epoch": 4.051501562712325, "grad_norm": 0.8911438584327698, "learning_rate": 0.0004935623046609593, "loss": 3.4431, "step": 59630 }, { "epoch": 4.051841282782987, "grad_norm": 1.1207389831542969, "learning_rate": 0.0004935198396521267, "loss": 3.768, "step": 59635 }, { "epoch": 4.052181002853649, "grad_norm": 0.6248236894607544, "learning_rate": 0.0004934773746432939, "loss": 3.3655, "step": 59640 }, { "epoch": 4.05252072292431, "grad_norm": 0.7271294593811035, "learning_rate": 0.0004934349096344612, "loss": 3.5107, "step": 59645 }, { "epoch": 4.052860442994972, "grad_norm": 1.7869746685028076, "learning_rate": 0.0004933924446256285, "loss": 3.5311, "step": 59650 }, { "epoch": 4.053200163065634, "grad_norm": 0.9492328763008118, "learning_rate": 0.0004933499796167958, "loss": 3.4546, "step": 59655 }, { "epoch": 4.053539883136295, "grad_norm": 0.9061926603317261, "learning_rate": 0.000493307514607963, "loss": 3.3874, "step": 59660 }, { "epoch": 4.0538796032069575, "grad_norm": 1.0815743207931519, "learning_rate": 0.0004932650495991304, "loss": 3.547, "step": 59665 }, { "epoch": 4.05421932327762, "grad_norm": 0.9014431834220886, "learning_rate": 0.0004932225845902976, "loss": 3.6709, "step": 59670 }, { "epoch": 4.054559043348281, "grad_norm": 3.110290765762329, "learning_rate": 0.0004931801195814648, "loss": 3.5499, "step": 59675 }, { "epoch": 4.054898763418943, "grad_norm": 0.8537842631340027, "learning_rate": 0.0004931376545726321, "loss": 3.6232, "step": 59680 }, { "epoch": 4.055238483489605, "grad_norm": 0.8123435974121094, "learning_rate": 0.0004930951895637995, "loss": 3.4592, "step": 59685 }, { "epoch": 4.055578203560266, "grad_norm": 0.9474018812179565, "learning_rate": 0.0004930527245549667, "loss": 3.3542, "step": 59690 }, { "epoch": 4.055917923630928, "grad_norm": 0.994622528553009, "learning_rate": 0.000493010259546134, "loss": 3.4069, "step": 59695 }, { "epoch": 4.05625764370159, "grad_norm": 0.9391903281211853, "learning_rate": 0.0004929677945373013, "loss": 3.3757, "step": 59700 }, { "epoch": 4.056597363772251, "grad_norm": 0.9274879097938538, "learning_rate": 0.0004929253295284685, "loss": 3.4118, "step": 59705 }, { "epoch": 4.0569370838429135, "grad_norm": 0.8870189785957336, "learning_rate": 0.0004928828645196358, "loss": 3.5679, "step": 59710 }, { "epoch": 4.057276803913576, "grad_norm": 0.8023012280464172, "learning_rate": 0.0004928403995108032, "loss": 3.6025, "step": 59715 }, { "epoch": 4.057616523984237, "grad_norm": 0.8325897455215454, "learning_rate": 0.0004927979345019704, "loss": 3.4824, "step": 59720 }, { "epoch": 4.057956244054899, "grad_norm": 0.7491126656532288, "learning_rate": 0.0004927554694931376, "loss": 3.5764, "step": 59725 }, { "epoch": 4.058295964125561, "grad_norm": 1.0194222927093506, "learning_rate": 0.000492713004484305, "loss": 3.5386, "step": 59730 }, { "epoch": 4.058635684196222, "grad_norm": 0.7877004146575928, "learning_rate": 0.0004926705394754723, "loss": 3.442, "step": 59735 }, { "epoch": 4.058975404266884, "grad_norm": 1.1093658208847046, "learning_rate": 0.0004926280744666395, "loss": 3.5874, "step": 59740 }, { "epoch": 4.059315124337546, "grad_norm": 0.7492717504501343, "learning_rate": 0.0004925856094578067, "loss": 3.5593, "step": 59745 }, { "epoch": 4.0596548444082075, "grad_norm": 0.9673604965209961, "learning_rate": 0.0004925431444489741, "loss": 3.5165, "step": 59750 }, { "epoch": 4.0599945644788695, "grad_norm": 0.8375464081764221, "learning_rate": 0.0004925006794401413, "loss": 3.4809, "step": 59755 }, { "epoch": 4.060334284549532, "grad_norm": 0.9903005361557007, "learning_rate": 0.0004924582144313086, "loss": 3.4375, "step": 59760 }, { "epoch": 4.060674004620193, "grad_norm": 1.018079400062561, "learning_rate": 0.000492415749422476, "loss": 3.3943, "step": 59765 }, { "epoch": 4.061013724690855, "grad_norm": 1.0245575904846191, "learning_rate": 0.0004923732844136432, "loss": 3.4658, "step": 59770 }, { "epoch": 4.061353444761517, "grad_norm": 1.0447276830673218, "learning_rate": 0.0004923308194048104, "loss": 3.5528, "step": 59775 }, { "epoch": 4.061693164832178, "grad_norm": 0.8653343915939331, "learning_rate": 0.0004922883543959777, "loss": 3.4713, "step": 59780 }, { "epoch": 4.06203288490284, "grad_norm": 1.0537867546081543, "learning_rate": 0.000492245889387145, "loss": 3.393, "step": 59785 }, { "epoch": 4.062372604973502, "grad_norm": 0.9974700808525085, "learning_rate": 0.0004922034243783123, "loss": 3.541, "step": 59790 }, { "epoch": 4.0627123250441635, "grad_norm": 1.0795929431915283, "learning_rate": 0.0004921609593694795, "loss": 3.745, "step": 59795 }, { "epoch": 4.0630520451148255, "grad_norm": 1.1418275833129883, "learning_rate": 0.0004921184943606469, "loss": 3.5299, "step": 59800 }, { "epoch": 4.063391765185487, "grad_norm": 0.7432627081871033, "learning_rate": 0.0004920760293518141, "loss": 3.5906, "step": 59805 }, { "epoch": 4.063731485256149, "grad_norm": 0.8620191216468811, "learning_rate": 0.0004920335643429814, "loss": 3.5134, "step": 59810 }, { "epoch": 4.064071205326811, "grad_norm": 0.9250773191452026, "learning_rate": 0.0004919910993341487, "loss": 3.3716, "step": 59815 }, { "epoch": 4.064410925397472, "grad_norm": 0.7490436434745789, "learning_rate": 0.000491948634325316, "loss": 3.6238, "step": 59820 }, { "epoch": 4.064750645468134, "grad_norm": 1.1570332050323486, "learning_rate": 0.0004919061693164832, "loss": 3.4601, "step": 59825 }, { "epoch": 4.065090365538796, "grad_norm": 0.9343281388282776, "learning_rate": 0.0004918637043076504, "loss": 3.3059, "step": 59830 }, { "epoch": 4.065430085609457, "grad_norm": 0.8995257616043091, "learning_rate": 0.0004918212392988178, "loss": 3.2564, "step": 59835 }, { "epoch": 4.0657698056801195, "grad_norm": 0.8480408787727356, "learning_rate": 0.0004917787742899851, "loss": 3.7564, "step": 59840 }, { "epoch": 4.0661095257507816, "grad_norm": 0.800126314163208, "learning_rate": 0.0004917363092811523, "loss": 3.6931, "step": 59845 }, { "epoch": 4.066449245821443, "grad_norm": 0.9306234121322632, "learning_rate": 0.0004916938442723196, "loss": 3.4449, "step": 59850 }, { "epoch": 4.066788965892105, "grad_norm": 1.2591867446899414, "learning_rate": 0.0004916513792634869, "loss": 3.1755, "step": 59855 }, { "epoch": 4.067128685962767, "grad_norm": 1.1272797584533691, "learning_rate": 0.0004916089142546541, "loss": 3.3914, "step": 59860 }, { "epoch": 4.067468406033428, "grad_norm": 1.0475786924362183, "learning_rate": 0.0004915664492458215, "loss": 3.4657, "step": 59865 }, { "epoch": 4.06780812610409, "grad_norm": 0.8131334185600281, "learning_rate": 0.0004915239842369888, "loss": 3.6088, "step": 59870 }, { "epoch": 4.068147846174752, "grad_norm": 0.8794819712638855, "learning_rate": 0.000491481519228156, "loss": 3.6126, "step": 59875 }, { "epoch": 4.068487566245413, "grad_norm": 1.0365935564041138, "learning_rate": 0.0004914390542193232, "loss": 3.654, "step": 59880 }, { "epoch": 4.0688272863160755, "grad_norm": 1.0568506717681885, "learning_rate": 0.0004913965892104906, "loss": 3.5577, "step": 59885 }, { "epoch": 4.069167006386738, "grad_norm": 0.87224942445755, "learning_rate": 0.0004913541242016579, "loss": 3.4776, "step": 59890 }, { "epoch": 4.069506726457399, "grad_norm": 1.0197595357894897, "learning_rate": 0.0004913116591928251, "loss": 3.495, "step": 59895 }, { "epoch": 4.069846446528061, "grad_norm": 0.8176692128181458, "learning_rate": 0.0004912691941839924, "loss": 3.6596, "step": 59900 }, { "epoch": 4.070186166598723, "grad_norm": 0.8002888560295105, "learning_rate": 0.0004912267291751597, "loss": 3.7552, "step": 59905 }, { "epoch": 4.070525886669384, "grad_norm": 0.8595325946807861, "learning_rate": 0.0004911842641663269, "loss": 3.3244, "step": 59910 }, { "epoch": 4.070865606740046, "grad_norm": 1.0854674577713013, "learning_rate": 0.0004911417991574943, "loss": 3.6076, "step": 59915 }, { "epoch": 4.071205326810708, "grad_norm": 1.0645173788070679, "learning_rate": 0.0004910993341486616, "loss": 3.5576, "step": 59920 }, { "epoch": 4.071545046881369, "grad_norm": 0.7809297442436218, "learning_rate": 0.0004910568691398288, "loss": 3.5359, "step": 59925 }, { "epoch": 4.0718847669520315, "grad_norm": 0.9088901877403259, "learning_rate": 0.000491014404130996, "loss": 3.6544, "step": 59930 }, { "epoch": 4.072224487022694, "grad_norm": 0.7722979784011841, "learning_rate": 0.0004909719391221634, "loss": 3.441, "step": 59935 }, { "epoch": 4.072564207093355, "grad_norm": 0.8845393657684326, "learning_rate": 0.0004909294741133306, "loss": 3.718, "step": 59940 }, { "epoch": 4.072903927164017, "grad_norm": 0.928385853767395, "learning_rate": 0.0004908870091044979, "loss": 3.8538, "step": 59945 }, { "epoch": 4.073243647234679, "grad_norm": 0.972971498966217, "learning_rate": 0.0004908445440956652, "loss": 3.5335, "step": 59950 }, { "epoch": 4.07358336730534, "grad_norm": 0.8681070804595947, "learning_rate": 0.0004908020790868325, "loss": 3.4464, "step": 59955 }, { "epoch": 4.073923087376002, "grad_norm": 1.008081316947937, "learning_rate": 0.0004907596140779997, "loss": 3.3947, "step": 59960 }, { "epoch": 4.074262807446664, "grad_norm": 0.9712215065956116, "learning_rate": 0.0004907171490691671, "loss": 3.1869, "step": 59965 }, { "epoch": 4.074602527517325, "grad_norm": 0.7963645458221436, "learning_rate": 0.0004906746840603343, "loss": 3.4136, "step": 59970 }, { "epoch": 4.0749422475879875, "grad_norm": 0.9250348210334778, "learning_rate": 0.0004906322190515016, "loss": 3.6041, "step": 59975 }, { "epoch": 4.07528196765865, "grad_norm": 0.8094295263290405, "learning_rate": 0.0004905897540426688, "loss": 3.5135, "step": 59980 }, { "epoch": 4.075621687729311, "grad_norm": 0.7919937372207642, "learning_rate": 0.0004905472890338361, "loss": 3.3237, "step": 59985 }, { "epoch": 4.075961407799973, "grad_norm": 0.862321138381958, "learning_rate": 0.0004905048240250034, "loss": 3.3792, "step": 59990 }, { "epoch": 4.076301127870635, "grad_norm": 1.0237566232681274, "learning_rate": 0.0004904623590161707, "loss": 3.2765, "step": 59995 }, { "epoch": 4.076640847941296, "grad_norm": 0.9160123467445374, "learning_rate": 0.000490419894007338, "loss": 3.5139, "step": 60000 }, { "epoch": 4.076980568011958, "grad_norm": 0.8233296275138855, "learning_rate": 0.0004903774289985052, "loss": 3.4318, "step": 60005 }, { "epoch": 4.07732028808262, "grad_norm": 0.8600610494613647, "learning_rate": 0.0004903349639896725, "loss": 3.4327, "step": 60010 }, { "epoch": 4.0776600081532814, "grad_norm": 1.3831515312194824, "learning_rate": 0.0004902924989808397, "loss": 3.3522, "step": 60015 }, { "epoch": 4.0779997282239435, "grad_norm": 1.198341727256775, "learning_rate": 0.0004902500339720071, "loss": 3.4329, "step": 60020 }, { "epoch": 4.078339448294606, "grad_norm": 0.8583764433860779, "learning_rate": 0.0004902075689631744, "loss": 3.3961, "step": 60025 }, { "epoch": 4.078679168365267, "grad_norm": 0.7868750691413879, "learning_rate": 0.0004901651039543416, "loss": 3.4684, "step": 60030 }, { "epoch": 4.079018888435929, "grad_norm": 0.8584799766540527, "learning_rate": 0.0004901226389455089, "loss": 3.4652, "step": 60035 }, { "epoch": 4.079358608506591, "grad_norm": 1.0132217407226562, "learning_rate": 0.0004900801739366762, "loss": 3.5187, "step": 60040 }, { "epoch": 4.079698328577252, "grad_norm": 1.039062738418579, "learning_rate": 0.0004900377089278434, "loss": 3.4672, "step": 60045 }, { "epoch": 4.080038048647914, "grad_norm": 0.8012052774429321, "learning_rate": 0.0004899952439190108, "loss": 3.4535, "step": 60050 }, { "epoch": 4.080377768718576, "grad_norm": 0.8432538509368896, "learning_rate": 0.000489952778910178, "loss": 3.5628, "step": 60055 }, { "epoch": 4.0807174887892375, "grad_norm": 0.7210903763771057, "learning_rate": 0.0004899103139013453, "loss": 3.5153, "step": 60060 }, { "epoch": 4.0810572088598995, "grad_norm": 0.8559758067131042, "learning_rate": 0.0004898678488925125, "loss": 3.3882, "step": 60065 }, { "epoch": 4.081396928930562, "grad_norm": 1.042021632194519, "learning_rate": 0.0004898253838836799, "loss": 3.3883, "step": 60070 }, { "epoch": 4.081736649001223, "grad_norm": 1.1754270792007446, "learning_rate": 0.0004897829188748471, "loss": 3.4268, "step": 60075 }, { "epoch": 4.082076369071885, "grad_norm": 1.0656787157058716, "learning_rate": 0.0004897404538660144, "loss": 3.4968, "step": 60080 }, { "epoch": 4.082416089142547, "grad_norm": 0.8314180970191956, "learning_rate": 0.0004896979888571817, "loss": 3.5941, "step": 60085 }, { "epoch": 4.082755809213208, "grad_norm": 0.8209211826324463, "learning_rate": 0.000489655523848349, "loss": 3.6653, "step": 60090 }, { "epoch": 4.08309552928387, "grad_norm": 1.1467338800430298, "learning_rate": 0.0004896130588395162, "loss": 3.933, "step": 60095 }, { "epoch": 4.083435249354532, "grad_norm": 0.8042275905609131, "learning_rate": 0.0004895705938306836, "loss": 3.3902, "step": 60100 }, { "epoch": 4.0837749694251935, "grad_norm": 0.8383256793022156, "learning_rate": 0.0004895281288218508, "loss": 3.2632, "step": 60105 }, { "epoch": 4.0841146894958555, "grad_norm": 0.7507006525993347, "learning_rate": 0.000489485663813018, "loss": 3.5874, "step": 60110 }, { "epoch": 4.084454409566518, "grad_norm": 0.8265739679336548, "learning_rate": 0.0004894431988041853, "loss": 3.6833, "step": 60115 }, { "epoch": 4.084794129637179, "grad_norm": 0.7642436027526855, "learning_rate": 0.0004894007337953527, "loss": 3.4768, "step": 60120 }, { "epoch": 4.085133849707841, "grad_norm": 0.9462882876396179, "learning_rate": 0.0004893582687865199, "loss": 3.6035, "step": 60125 }, { "epoch": 4.085473569778502, "grad_norm": 0.9995160102844238, "learning_rate": 0.0004893158037776872, "loss": 3.4231, "step": 60130 }, { "epoch": 4.085813289849164, "grad_norm": 1.0521057844161987, "learning_rate": 0.0004892733387688545, "loss": 3.7853, "step": 60135 }, { "epoch": 4.086153009919826, "grad_norm": 1.0439428091049194, "learning_rate": 0.0004892308737600217, "loss": 3.3278, "step": 60140 }, { "epoch": 4.086492729990487, "grad_norm": 0.8915185928344727, "learning_rate": 0.000489188408751189, "loss": 3.5125, "step": 60145 }, { "epoch": 4.0868324500611495, "grad_norm": 0.7876999974250793, "learning_rate": 0.0004891459437423564, "loss": 3.5396, "step": 60150 }, { "epoch": 4.0871721701318116, "grad_norm": 0.6847761273384094, "learning_rate": 0.0004891034787335236, "loss": 3.7642, "step": 60155 }, { "epoch": 4.087511890202473, "grad_norm": 1.1301612854003906, "learning_rate": 0.0004890610137246908, "loss": 3.5201, "step": 60160 }, { "epoch": 4.087851610273135, "grad_norm": 0.8413057327270508, "learning_rate": 0.0004890185487158582, "loss": 3.5959, "step": 60165 }, { "epoch": 4.088191330343797, "grad_norm": 0.9388220906257629, "learning_rate": 0.0004889760837070254, "loss": 3.3561, "step": 60170 }, { "epoch": 4.088531050414458, "grad_norm": 1.0634440183639526, "learning_rate": 0.0004889336186981927, "loss": 3.6321, "step": 60175 }, { "epoch": 4.08887077048512, "grad_norm": 0.8490501642227173, "learning_rate": 0.00048889115368936, "loss": 3.7104, "step": 60180 }, { "epoch": 4.089210490555782, "grad_norm": 0.8806116580963135, "learning_rate": 0.0004888486886805273, "loss": 3.2592, "step": 60185 }, { "epoch": 4.089550210626443, "grad_norm": 0.9158693552017212, "learning_rate": 0.0004888062236716945, "loss": 3.5069, "step": 60190 }, { "epoch": 4.0898899306971055, "grad_norm": 0.9916989803314209, "learning_rate": 0.0004887637586628618, "loss": 3.7523, "step": 60195 }, { "epoch": 4.090229650767768, "grad_norm": 0.7990747690200806, "learning_rate": 0.0004887212936540291, "loss": 3.4108, "step": 60200 }, { "epoch": 4.090569370838429, "grad_norm": 1.6441184282302856, "learning_rate": 0.0004886788286451964, "loss": 3.686, "step": 60205 }, { "epoch": 4.090909090909091, "grad_norm": 0.824474036693573, "learning_rate": 0.0004886363636363636, "loss": 3.352, "step": 60210 }, { "epoch": 4.091248810979753, "grad_norm": 0.8636743426322937, "learning_rate": 0.000488593898627531, "loss": 3.5884, "step": 60215 }, { "epoch": 4.091588531050414, "grad_norm": 0.9154664874076843, "learning_rate": 0.0004885514336186982, "loss": 3.4228, "step": 60220 }, { "epoch": 4.091928251121076, "grad_norm": 0.9216048121452332, "learning_rate": 0.0004885089686098655, "loss": 3.6307, "step": 60225 }, { "epoch": 4.092267971191738, "grad_norm": 0.7878738641738892, "learning_rate": 0.0004884665036010327, "loss": 3.6046, "step": 60230 }, { "epoch": 4.092607691262399, "grad_norm": 0.7026242613792419, "learning_rate": 0.0004884240385922001, "loss": 3.3199, "step": 60235 }, { "epoch": 4.0929474113330615, "grad_norm": 1.1400413513183594, "learning_rate": 0.0004883815735833673, "loss": 3.262, "step": 60240 }, { "epoch": 4.093287131403724, "grad_norm": 1.0244392156600952, "learning_rate": 0.0004883391085745346, "loss": 3.4037, "step": 60245 }, { "epoch": 4.093626851474385, "grad_norm": 0.9020297527313232, "learning_rate": 0.0004882966435657019, "loss": 3.2456, "step": 60250 }, { "epoch": 4.093966571545047, "grad_norm": 0.8943917751312256, "learning_rate": 0.00048825417855686914, "loss": 3.4998, "step": 60255 }, { "epoch": 4.094306291615709, "grad_norm": 1.0855733156204224, "learning_rate": 0.00048821171354803647, "loss": 3.5796, "step": 60260 }, { "epoch": 4.09464601168637, "grad_norm": 0.8314254283905029, "learning_rate": 0.0004881692485392037, "loss": 3.6525, "step": 60265 }, { "epoch": 4.094985731757032, "grad_norm": 1.0861260890960693, "learning_rate": 0.000488126783530371, "loss": 3.3879, "step": 60270 }, { "epoch": 4.095325451827694, "grad_norm": 0.8492047190666199, "learning_rate": 0.00048808431852153826, "loss": 3.3807, "step": 60275 }, { "epoch": 4.095665171898355, "grad_norm": 0.851253092288971, "learning_rate": 0.00048804185351270554, "loss": 3.4912, "step": 60280 }, { "epoch": 4.0960048919690175, "grad_norm": 0.7417199611663818, "learning_rate": 0.0004879993885038728, "loss": 3.3065, "step": 60285 }, { "epoch": 4.09634461203968, "grad_norm": 0.9336373805999756, "learning_rate": 0.0004879569234950401, "loss": 3.439, "step": 60290 }, { "epoch": 4.096684332110341, "grad_norm": 1.1025924682617188, "learning_rate": 0.0004879144584862074, "loss": 3.4208, "step": 60295 }, { "epoch": 4.097024052181003, "grad_norm": 0.8482956886291504, "learning_rate": 0.00048787199347737466, "loss": 3.5746, "step": 60300 }, { "epoch": 4.097363772251665, "grad_norm": 0.7988093495368958, "learning_rate": 0.00048782952846854194, "loss": 3.6002, "step": 60305 }, { "epoch": 4.097703492322326, "grad_norm": 0.760061502456665, "learning_rate": 0.00048778706345970916, "loss": 3.2685, "step": 60310 }, { "epoch": 4.098043212392988, "grad_norm": 0.8115400075912476, "learning_rate": 0.0004877445984508765, "loss": 3.5794, "step": 60315 }, { "epoch": 4.09838293246365, "grad_norm": 0.8011322617530823, "learning_rate": 0.0004877021334420438, "loss": 3.354, "step": 60320 }, { "epoch": 4.0987226525343115, "grad_norm": 2.6934351921081543, "learning_rate": 0.000487659668433211, "loss": 3.5544, "step": 60325 }, { "epoch": 4.0990623726049735, "grad_norm": 0.8311012983322144, "learning_rate": 0.00048761720342437834, "loss": 3.4974, "step": 60330 }, { "epoch": 4.099402092675636, "grad_norm": 0.9006627798080444, "learning_rate": 0.0004875747384155456, "loss": 3.6025, "step": 60335 }, { "epoch": 4.099741812746297, "grad_norm": 0.9660932421684265, "learning_rate": 0.00048753227340671284, "loss": 3.4796, "step": 60340 }, { "epoch": 4.100081532816959, "grad_norm": 0.825990617275238, "learning_rate": 0.0004874898083978802, "loss": 3.5355, "step": 60345 }, { "epoch": 4.100421252887621, "grad_norm": 0.765449047088623, "learning_rate": 0.00048744734338904746, "loss": 3.6171, "step": 60350 }, { "epoch": 4.100760972958282, "grad_norm": 0.6704328656196594, "learning_rate": 0.0004874048783802147, "loss": 3.544, "step": 60355 }, { "epoch": 4.101100693028944, "grad_norm": 0.9004793167114258, "learning_rate": 0.00048736241337138197, "loss": 3.5444, "step": 60360 }, { "epoch": 4.101440413099606, "grad_norm": 0.9138051271438599, "learning_rate": 0.0004873199483625493, "loss": 3.5035, "step": 60365 }, { "epoch": 4.1017801331702675, "grad_norm": 0.9839326739311218, "learning_rate": 0.0004872774833537165, "loss": 3.4922, "step": 60370 }, { "epoch": 4.1021198532409295, "grad_norm": 0.9757943749427795, "learning_rate": 0.0004872350183448838, "loss": 3.4791, "step": 60375 }, { "epoch": 4.102459573311592, "grad_norm": 0.7705000042915344, "learning_rate": 0.00048719255333605114, "loss": 3.6002, "step": 60380 }, { "epoch": 4.102799293382253, "grad_norm": 0.862695574760437, "learning_rate": 0.00048715008832721837, "loss": 3.4593, "step": 60385 }, { "epoch": 4.103139013452915, "grad_norm": 0.8168323636054993, "learning_rate": 0.00048710762331838565, "loss": 3.413, "step": 60390 }, { "epoch": 4.103478733523577, "grad_norm": 0.7711490988731384, "learning_rate": 0.0004870651583095529, "loss": 3.3285, "step": 60395 }, { "epoch": 4.103818453594238, "grad_norm": 0.8221679329872131, "learning_rate": 0.00048702269330072026, "loss": 3.3133, "step": 60400 }, { "epoch": 4.1041581736649, "grad_norm": 1.0641248226165771, "learning_rate": 0.0004869802282918875, "loss": 3.4725, "step": 60405 }, { "epoch": 4.104497893735562, "grad_norm": 0.8830304145812988, "learning_rate": 0.00048693776328305477, "loss": 3.4793, "step": 60410 }, { "epoch": 4.1048376138062235, "grad_norm": 0.9198535680770874, "learning_rate": 0.0004868952982742221, "loss": 3.4362, "step": 60415 }, { "epoch": 4.1051773338768855, "grad_norm": 1.0214260816574097, "learning_rate": 0.0004868528332653893, "loss": 3.5625, "step": 60420 }, { "epoch": 4.105517053947548, "grad_norm": 0.9782505035400391, "learning_rate": 0.0004868103682565566, "loss": 3.6029, "step": 60425 }, { "epoch": 4.105856774018209, "grad_norm": 0.815485954284668, "learning_rate": 0.0004867679032477239, "loss": 3.4065, "step": 60430 }, { "epoch": 4.106196494088871, "grad_norm": 0.8695691227912903, "learning_rate": 0.00048672543823889117, "loss": 3.1953, "step": 60435 }, { "epoch": 4.106536214159533, "grad_norm": 0.9137519598007202, "learning_rate": 0.00048668297323005845, "loss": 3.5601, "step": 60440 }, { "epoch": 4.106875934230194, "grad_norm": 0.7799518704414368, "learning_rate": 0.0004866405082212257, "loss": 3.5166, "step": 60445 }, { "epoch": 4.107215654300856, "grad_norm": 0.9309407472610474, "learning_rate": 0.000486598043212393, "loss": 3.6408, "step": 60450 }, { "epoch": 4.107555374371518, "grad_norm": 0.9623941779136658, "learning_rate": 0.0004865555782035603, "loss": 3.4567, "step": 60455 }, { "epoch": 4.1078950944421795, "grad_norm": 0.7922086119651794, "learning_rate": 0.00048651311319472757, "loss": 3.4271, "step": 60460 }, { "epoch": 4.108234814512842, "grad_norm": 1.0140390396118164, "learning_rate": 0.0004864706481858948, "loss": 3.3857, "step": 60465 }, { "epoch": 4.108574534583504, "grad_norm": 0.9296822547912598, "learning_rate": 0.0004864281831770621, "loss": 3.4689, "step": 60470 }, { "epoch": 4.108914254654165, "grad_norm": 1.0194514989852905, "learning_rate": 0.0004863857181682294, "loss": 3.6606, "step": 60475 }, { "epoch": 4.109253974724827, "grad_norm": 0.7608421444892883, "learning_rate": 0.00048634325315939663, "loss": 3.5576, "step": 60480 }, { "epoch": 4.109593694795488, "grad_norm": 0.7357890605926514, "learning_rate": 0.00048630078815056397, "loss": 3.4112, "step": 60485 }, { "epoch": 4.10993341486615, "grad_norm": 0.824417233467102, "learning_rate": 0.00048625832314173125, "loss": 3.7389, "step": 60490 }, { "epoch": 4.110273134936812, "grad_norm": 1.1876016855239868, "learning_rate": 0.0004862158581328985, "loss": 3.4617, "step": 60495 }, { "epoch": 4.110612855007473, "grad_norm": 0.923630952835083, "learning_rate": 0.00048617339312406575, "loss": 3.1587, "step": 60500 }, { "epoch": 4.1109525750781355, "grad_norm": 0.8195562362670898, "learning_rate": 0.0004861309281152331, "loss": 3.4922, "step": 60505 }, { "epoch": 4.111292295148798, "grad_norm": 0.9582125544548035, "learning_rate": 0.0004860884631064003, "loss": 3.5156, "step": 60510 }, { "epoch": 4.111632015219459, "grad_norm": 0.7129930257797241, "learning_rate": 0.0004860459980975676, "loss": 3.5218, "step": 60515 }, { "epoch": 4.111971735290121, "grad_norm": 1.013438105583191, "learning_rate": 0.00048600353308873493, "loss": 3.4358, "step": 60520 }, { "epoch": 4.112311455360783, "grad_norm": 1.8352806568145752, "learning_rate": 0.00048596106807990215, "loss": 3.4366, "step": 60525 }, { "epoch": 4.112651175431444, "grad_norm": 1.054357647895813, "learning_rate": 0.00048591860307106943, "loss": 3.3337, "step": 60530 }, { "epoch": 4.112990895502106, "grad_norm": 5.102238655090332, "learning_rate": 0.0004858761380622367, "loss": 3.2135, "step": 60535 }, { "epoch": 4.113330615572768, "grad_norm": 0.9283230900764465, "learning_rate": 0.000485833673053404, "loss": 3.2923, "step": 60540 }, { "epoch": 4.113670335643429, "grad_norm": 0.9232462644577026, "learning_rate": 0.0004857912080445713, "loss": 3.5495, "step": 60545 }, { "epoch": 4.1140100557140915, "grad_norm": 0.851104199886322, "learning_rate": 0.00048574874303573855, "loss": 3.3081, "step": 60550 }, { "epoch": 4.114349775784754, "grad_norm": 0.8237915635108948, "learning_rate": 0.00048570627802690583, "loss": 3.3159, "step": 60555 }, { "epoch": 4.114689495855415, "grad_norm": 1.305794596672058, "learning_rate": 0.0004856638130180731, "loss": 3.3933, "step": 60560 }, { "epoch": 4.115029215926077, "grad_norm": 0.8856663703918457, "learning_rate": 0.0004856213480092404, "loss": 3.4987, "step": 60565 }, { "epoch": 4.115368935996739, "grad_norm": 1.0998294353485107, "learning_rate": 0.0004855788830004077, "loss": 3.4094, "step": 60570 }, { "epoch": 4.1157086560674, "grad_norm": 1.2072299718856812, "learning_rate": 0.00048553641799157495, "loss": 3.1834, "step": 60575 }, { "epoch": 4.116048376138062, "grad_norm": 1.1457629203796387, "learning_rate": 0.00048549395298274223, "loss": 3.7113, "step": 60580 }, { "epoch": 4.116388096208724, "grad_norm": 0.7134578824043274, "learning_rate": 0.0004854514879739095, "loss": 3.2979, "step": 60585 }, { "epoch": 4.116727816279385, "grad_norm": 0.8877643346786499, "learning_rate": 0.0004854090229650768, "loss": 3.4389, "step": 60590 }, { "epoch": 4.1170675363500475, "grad_norm": 1.1142362356185913, "learning_rate": 0.0004853665579562441, "loss": 3.5986, "step": 60595 }, { "epoch": 4.11740725642071, "grad_norm": 0.8856332898139954, "learning_rate": 0.00048532409294741135, "loss": 3.7596, "step": 60600 }, { "epoch": 4.117746976491371, "grad_norm": 0.9383743405342102, "learning_rate": 0.0004852816279385786, "loss": 3.4659, "step": 60605 }, { "epoch": 4.118086696562033, "grad_norm": 1.0099022388458252, "learning_rate": 0.0004852391629297459, "loss": 3.296, "step": 60610 }, { "epoch": 4.118426416632695, "grad_norm": 1.203545093536377, "learning_rate": 0.0004851966979209132, "loss": 3.366, "step": 60615 }, { "epoch": 4.118766136703356, "grad_norm": 0.8963096141815186, "learning_rate": 0.0004851542329120804, "loss": 3.5149, "step": 60620 }, { "epoch": 4.119105856774018, "grad_norm": 0.7907694578170776, "learning_rate": 0.00048511176790324775, "loss": 3.5806, "step": 60625 }, { "epoch": 4.11944557684468, "grad_norm": 0.7245348691940308, "learning_rate": 0.00048506930289441504, "loss": 3.2355, "step": 60630 }, { "epoch": 4.1197852969153415, "grad_norm": 0.8068969249725342, "learning_rate": 0.00048502683788558226, "loss": 3.7529, "step": 60635 }, { "epoch": 4.1201250169860035, "grad_norm": 0.8081486821174622, "learning_rate": 0.0004849843728767496, "loss": 3.3521, "step": 60640 }, { "epoch": 4.120464737056666, "grad_norm": 0.8287180662155151, "learning_rate": 0.0004849419078679169, "loss": 3.5351, "step": 60645 }, { "epoch": 4.120804457127327, "grad_norm": 0.8903965950012207, "learning_rate": 0.0004848994428590841, "loss": 3.6391, "step": 60650 }, { "epoch": 4.121144177197989, "grad_norm": 0.8040282726287842, "learning_rate": 0.0004848569778502514, "loss": 3.569, "step": 60655 }, { "epoch": 4.121483897268651, "grad_norm": 0.6821370720863342, "learning_rate": 0.0004848145128414187, "loss": 3.3349, "step": 60660 }, { "epoch": 4.121823617339312, "grad_norm": 1.0014156103134155, "learning_rate": 0.00048477204783258594, "loss": 3.4021, "step": 60665 }, { "epoch": 4.122163337409974, "grad_norm": 1.094258427619934, "learning_rate": 0.0004847295828237532, "loss": 3.3974, "step": 60670 }, { "epoch": 4.122503057480636, "grad_norm": 1.1385911703109741, "learning_rate": 0.00048468711781492056, "loss": 3.3088, "step": 60675 }, { "epoch": 4.1228427775512975, "grad_norm": 1.1799378395080566, "learning_rate": 0.0004846446528060878, "loss": 3.2837, "step": 60680 }, { "epoch": 4.1231824976219595, "grad_norm": 1.2915782928466797, "learning_rate": 0.00048460218779725506, "loss": 3.4061, "step": 60685 }, { "epoch": 4.123522217692622, "grad_norm": 0.7857838869094849, "learning_rate": 0.00048455972278842234, "loss": 3.9054, "step": 60690 }, { "epoch": 4.123861937763283, "grad_norm": 0.9258236885070801, "learning_rate": 0.0004845172577795896, "loss": 3.1958, "step": 60695 }, { "epoch": 4.124201657833945, "grad_norm": 0.9963165521621704, "learning_rate": 0.0004844747927707569, "loss": 3.1761, "step": 60700 }, { "epoch": 4.124541377904607, "grad_norm": 1.0547771453857422, "learning_rate": 0.0004844323277619242, "loss": 3.441, "step": 60705 }, { "epoch": 4.124881097975268, "grad_norm": 0.9425568580627441, "learning_rate": 0.00048438986275309146, "loss": 3.3796, "step": 60710 }, { "epoch": 4.12522081804593, "grad_norm": 0.84581059217453, "learning_rate": 0.00048434739774425874, "loss": 3.462, "step": 60715 }, { "epoch": 4.125560538116592, "grad_norm": 0.7780881524085999, "learning_rate": 0.000484304932735426, "loss": 3.3941, "step": 60720 }, { "epoch": 4.1259002581872535, "grad_norm": 1.4650455713272095, "learning_rate": 0.00048426246772659325, "loss": 3.1523, "step": 60725 }, { "epoch": 4.1262399782579156, "grad_norm": 1.6117115020751953, "learning_rate": 0.0004842200027177606, "loss": 3.4549, "step": 60730 }, { "epoch": 4.126579698328578, "grad_norm": 0.9062334895133972, "learning_rate": 0.00048417753770892786, "loss": 3.4221, "step": 60735 }, { "epoch": 4.126919418399239, "grad_norm": 0.9595715999603271, "learning_rate": 0.00048413507270009514, "loss": 3.3914, "step": 60740 }, { "epoch": 4.127259138469901, "grad_norm": 1.1753897666931152, "learning_rate": 0.0004840926076912624, "loss": 3.1208, "step": 60745 }, { "epoch": 4.127598858540563, "grad_norm": 0.9783079624176025, "learning_rate": 0.0004840501426824297, "loss": 3.7187, "step": 60750 }, { "epoch": 4.127938578611224, "grad_norm": 0.7220286726951599, "learning_rate": 0.000484007677673597, "loss": 3.5384, "step": 60755 }, { "epoch": 4.128278298681886, "grad_norm": 0.9653136730194092, "learning_rate": 0.0004839652126647642, "loss": 3.5373, "step": 60760 }, { "epoch": 4.128618018752548, "grad_norm": 0.8879910111427307, "learning_rate": 0.00048392274765593154, "loss": 3.3493, "step": 60765 }, { "epoch": 4.1289577388232095, "grad_norm": 1.165257453918457, "learning_rate": 0.0004838802826470988, "loss": 3.5048, "step": 60770 }, { "epoch": 4.129297458893872, "grad_norm": 0.7714213132858276, "learning_rate": 0.00048383781763826605, "loss": 3.3744, "step": 60775 }, { "epoch": 4.129637178964534, "grad_norm": 0.9698021411895752, "learning_rate": 0.0004837953526294334, "loss": 3.506, "step": 60780 }, { "epoch": 4.129976899035195, "grad_norm": 0.6581893563270569, "learning_rate": 0.00048375288762060066, "loss": 3.5473, "step": 60785 }, { "epoch": 4.130316619105857, "grad_norm": 1.1890594959259033, "learning_rate": 0.0004837104226117679, "loss": 3.5606, "step": 60790 }, { "epoch": 4.130656339176519, "grad_norm": 0.8945116996765137, "learning_rate": 0.00048366795760293517, "loss": 3.4167, "step": 60795 }, { "epoch": 4.13099605924718, "grad_norm": 0.9084517359733582, "learning_rate": 0.0004836254925941025, "loss": 3.3764, "step": 60800 }, { "epoch": 4.131335779317842, "grad_norm": 0.8003615736961365, "learning_rate": 0.00048358302758526973, "loss": 3.2852, "step": 60805 }, { "epoch": 4.131675499388503, "grad_norm": 1.0807470083236694, "learning_rate": 0.000483540562576437, "loss": 3.563, "step": 60810 }, { "epoch": 4.1320152194591655, "grad_norm": 1.020410418510437, "learning_rate": 0.00048349809756760434, "loss": 3.3261, "step": 60815 }, { "epoch": 4.132354939529828, "grad_norm": 0.8012077808380127, "learning_rate": 0.00048345563255877157, "loss": 3.5699, "step": 60820 }, { "epoch": 4.132694659600489, "grad_norm": 0.852891743183136, "learning_rate": 0.00048341316754993885, "loss": 3.4496, "step": 60825 }, { "epoch": 4.133034379671151, "grad_norm": 0.9984927773475647, "learning_rate": 0.00048337070254110613, "loss": 3.4363, "step": 60830 }, { "epoch": 4.133374099741813, "grad_norm": 0.8366738557815552, "learning_rate": 0.0004833282375322734, "loss": 3.4055, "step": 60835 }, { "epoch": 4.133713819812474, "grad_norm": 1.1941158771514893, "learning_rate": 0.0004832857725234407, "loss": 3.3566, "step": 60840 }, { "epoch": 4.134053539883136, "grad_norm": 1.2291193008422852, "learning_rate": 0.00048324330751460797, "loss": 3.2872, "step": 60845 }, { "epoch": 4.134393259953798, "grad_norm": 0.8321509957313538, "learning_rate": 0.00048320084250577525, "loss": 3.5308, "step": 60850 }, { "epoch": 4.134732980024459, "grad_norm": 0.8231420516967773, "learning_rate": 0.00048315837749694253, "loss": 3.4741, "step": 60855 }, { "epoch": 4.1350727000951215, "grad_norm": 1.0114644765853882, "learning_rate": 0.0004831159124881098, "loss": 3.5014, "step": 60860 }, { "epoch": 4.135412420165784, "grad_norm": 0.8152646422386169, "learning_rate": 0.00048307344747927704, "loss": 3.5738, "step": 60865 }, { "epoch": 4.135752140236445, "grad_norm": 0.8439964056015015, "learning_rate": 0.00048303098247044437, "loss": 3.4225, "step": 60870 }, { "epoch": 4.136091860307107, "grad_norm": 0.7128999829292297, "learning_rate": 0.00048298851746161165, "loss": 3.457, "step": 60875 }, { "epoch": 4.136431580377769, "grad_norm": 0.8958193063735962, "learning_rate": 0.0004829460524527789, "loss": 3.4807, "step": 60880 }, { "epoch": 4.13677130044843, "grad_norm": 0.9176084399223328, "learning_rate": 0.0004829035874439462, "loss": 3.4527, "step": 60885 }, { "epoch": 4.137111020519092, "grad_norm": 0.9475146532058716, "learning_rate": 0.0004828611224351135, "loss": 3.5877, "step": 60890 }, { "epoch": 4.137450740589754, "grad_norm": 1.0759168863296509, "learning_rate": 0.0004828186574262807, "loss": 3.4587, "step": 60895 }, { "epoch": 4.1377904606604154, "grad_norm": 0.8839792013168335, "learning_rate": 0.000482776192417448, "loss": 3.4785, "step": 60900 }, { "epoch": 4.1381301807310775, "grad_norm": 0.9047330617904663, "learning_rate": 0.00048273372740861533, "loss": 3.4357, "step": 60905 }, { "epoch": 4.13846990080174, "grad_norm": 0.9330630898475647, "learning_rate": 0.0004826912623997826, "loss": 3.5254, "step": 60910 }, { "epoch": 4.138809620872401, "grad_norm": 0.9344311952590942, "learning_rate": 0.00048264879739094984, "loss": 3.6818, "step": 60915 }, { "epoch": 4.139149340943063, "grad_norm": 1.0314078330993652, "learning_rate": 0.00048260633238211717, "loss": 3.39, "step": 60920 }, { "epoch": 4.139489061013725, "grad_norm": 0.8636992573738098, "learning_rate": 0.00048256386737328445, "loss": 3.3487, "step": 60925 }, { "epoch": 4.139828781084386, "grad_norm": 1.0891183614730835, "learning_rate": 0.0004825214023644517, "loss": 3.7929, "step": 60930 }, { "epoch": 4.140168501155048, "grad_norm": 0.9468405246734619, "learning_rate": 0.000482478937355619, "loss": 3.4391, "step": 60935 }, { "epoch": 4.14050822122571, "grad_norm": 0.7584990859031677, "learning_rate": 0.0004824364723467863, "loss": 3.4859, "step": 60940 }, { "epoch": 4.1408479412963715, "grad_norm": 0.8876801133155823, "learning_rate": 0.0004823940073379535, "loss": 3.7726, "step": 60945 }, { "epoch": 4.1411876613670335, "grad_norm": 0.7832413911819458, "learning_rate": 0.0004823515423291208, "loss": 3.6383, "step": 60950 }, { "epoch": 4.141527381437696, "grad_norm": 0.8907681107521057, "learning_rate": 0.00048230907732028813, "loss": 3.1618, "step": 60955 }, { "epoch": 4.141867101508357, "grad_norm": 0.8434287905693054, "learning_rate": 0.00048226661231145536, "loss": 3.3583, "step": 60960 }, { "epoch": 4.142206821579019, "grad_norm": 0.9744192957878113, "learning_rate": 0.00048222414730262264, "loss": 3.429, "step": 60965 }, { "epoch": 4.142546541649681, "grad_norm": 1.0301381349563599, "learning_rate": 0.00048218168229378997, "loss": 3.5584, "step": 60970 }, { "epoch": 4.142886261720342, "grad_norm": 0.767314612865448, "learning_rate": 0.0004821392172849572, "loss": 3.4044, "step": 60975 }, { "epoch": 4.143225981791004, "grad_norm": 0.7550290822982788, "learning_rate": 0.0004820967522761245, "loss": 3.3663, "step": 60980 }, { "epoch": 4.143565701861666, "grad_norm": 0.9915202260017395, "learning_rate": 0.00048205428726729176, "loss": 3.6109, "step": 60985 }, { "epoch": 4.1439054219323275, "grad_norm": 1.026853322982788, "learning_rate": 0.00048201182225845904, "loss": 3.3546, "step": 60990 }, { "epoch": 4.1442451420029895, "grad_norm": 0.8758509159088135, "learning_rate": 0.0004819693572496263, "loss": 3.3082, "step": 60995 }, { "epoch": 4.144584862073652, "grad_norm": 0.918562114238739, "learning_rate": 0.0004819268922407936, "loss": 3.2879, "step": 61000 }, { "epoch": 4.144924582144313, "grad_norm": 0.858489453792572, "learning_rate": 0.0004818844272319609, "loss": 3.3901, "step": 61005 }, { "epoch": 4.145264302214975, "grad_norm": 0.783415675163269, "learning_rate": 0.00048184196222312816, "loss": 3.5607, "step": 61010 }, { "epoch": 4.145604022285637, "grad_norm": 0.8915145993232727, "learning_rate": 0.00048179949721429544, "loss": 3.5051, "step": 61015 }, { "epoch": 4.145943742356298, "grad_norm": 0.8396680951118469, "learning_rate": 0.00048175703220546266, "loss": 3.4385, "step": 61020 }, { "epoch": 4.14628346242696, "grad_norm": 0.9635047912597656, "learning_rate": 0.00048171456719663, "loss": 3.5492, "step": 61025 }, { "epoch": 4.146623182497622, "grad_norm": 0.8289498090744019, "learning_rate": 0.0004816721021877973, "loss": 3.5478, "step": 61030 }, { "epoch": 4.1469629025682835, "grad_norm": 0.8306374549865723, "learning_rate": 0.0004816296371789645, "loss": 3.4731, "step": 61035 }, { "epoch": 4.147302622638946, "grad_norm": 0.7349468469619751, "learning_rate": 0.00048158717217013184, "loss": 3.5544, "step": 61040 }, { "epoch": 4.147642342709608, "grad_norm": 0.9649933576583862, "learning_rate": 0.0004815447071612991, "loss": 3.7283, "step": 61045 }, { "epoch": 4.147982062780269, "grad_norm": 1.2236928939819336, "learning_rate": 0.00048150224215246634, "loss": 3.4891, "step": 61050 }, { "epoch": 4.148321782850931, "grad_norm": 0.7976474165916443, "learning_rate": 0.0004814597771436336, "loss": 3.5651, "step": 61055 }, { "epoch": 4.148661502921593, "grad_norm": 0.7144026756286621, "learning_rate": 0.00048141731213480096, "loss": 3.4696, "step": 61060 }, { "epoch": 4.149001222992254, "grad_norm": 0.8160803914070129, "learning_rate": 0.0004813748471259682, "loss": 3.4091, "step": 61065 }, { "epoch": 4.149340943062916, "grad_norm": 0.8607046604156494, "learning_rate": 0.00048133238211713546, "loss": 3.5294, "step": 61070 }, { "epoch": 4.149680663133578, "grad_norm": 0.9162683486938477, "learning_rate": 0.0004812899171083028, "loss": 3.4826, "step": 61075 }, { "epoch": 4.1500203832042395, "grad_norm": 0.8390758633613586, "learning_rate": 0.0004812474520994701, "loss": 3.4112, "step": 61080 }, { "epoch": 4.150360103274902, "grad_norm": 1.1057442426681519, "learning_rate": 0.0004812049870906373, "loss": 3.4551, "step": 61085 }, { "epoch": 4.150699823345564, "grad_norm": 0.9776700735092163, "learning_rate": 0.0004811625220818046, "loss": 3.4702, "step": 61090 }, { "epoch": 4.151039543416225, "grad_norm": 0.9008141756057739, "learning_rate": 0.0004811200570729719, "loss": 3.6891, "step": 61095 }, { "epoch": 4.151379263486887, "grad_norm": 0.7931249141693115, "learning_rate": 0.00048107759206413915, "loss": 3.5026, "step": 61100 }, { "epoch": 4.151718983557549, "grad_norm": 0.8162325620651245, "learning_rate": 0.0004810351270553064, "loss": 3.4009, "step": 61105 }, { "epoch": 4.15205870362821, "grad_norm": 0.9525900483131409, "learning_rate": 0.00048099266204647376, "loss": 3.6679, "step": 61110 }, { "epoch": 4.152398423698872, "grad_norm": 1.1448752880096436, "learning_rate": 0.000480950197037641, "loss": 3.5221, "step": 61115 }, { "epoch": 4.152738143769534, "grad_norm": 0.7758772373199463, "learning_rate": 0.00048090773202880827, "loss": 3.4811, "step": 61120 }, { "epoch": 4.1530778638401955, "grad_norm": 0.799312174320221, "learning_rate": 0.00048086526701997555, "loss": 3.5557, "step": 61125 }, { "epoch": 4.153417583910858, "grad_norm": 0.8320074081420898, "learning_rate": 0.0004808228020111428, "loss": 3.374, "step": 61130 }, { "epoch": 4.15375730398152, "grad_norm": 0.8839256167411804, "learning_rate": 0.0004807803370023101, "loss": 3.553, "step": 61135 }, { "epoch": 4.154097024052181, "grad_norm": 1.1912697553634644, "learning_rate": 0.0004807378719934774, "loss": 3.5927, "step": 61140 }, { "epoch": 4.154436744122843, "grad_norm": 0.8356027007102966, "learning_rate": 0.00048069540698464467, "loss": 3.4487, "step": 61145 }, { "epoch": 4.154776464193505, "grad_norm": 1.07656991481781, "learning_rate": 0.00048065294197581195, "loss": 3.5596, "step": 61150 }, { "epoch": 4.155116184264166, "grad_norm": 0.7973971962928772, "learning_rate": 0.0004806104769669792, "loss": 3.6639, "step": 61155 }, { "epoch": 4.155455904334828, "grad_norm": 1.0467630624771118, "learning_rate": 0.00048056801195814645, "loss": 3.4463, "step": 61160 }, { "epoch": 4.15579562440549, "grad_norm": 0.9411207437515259, "learning_rate": 0.0004805255469493138, "loss": 3.5244, "step": 61165 }, { "epoch": 4.1561353444761515, "grad_norm": 0.8440948724746704, "learning_rate": 0.00048048308194048107, "loss": 3.713, "step": 61170 }, { "epoch": 4.156475064546814, "grad_norm": 0.8739214539527893, "learning_rate": 0.0004804406169316483, "loss": 3.3699, "step": 61175 }, { "epoch": 4.156814784617475, "grad_norm": 1.0458316802978516, "learning_rate": 0.0004803981519228156, "loss": 3.5153, "step": 61180 }, { "epoch": 4.157154504688137, "grad_norm": 1.1161315441131592, "learning_rate": 0.0004803556869139829, "loss": 3.6033, "step": 61185 }, { "epoch": 4.157494224758799, "grad_norm": 0.8622521758079529, "learning_rate": 0.00048031322190515013, "loss": 3.5284, "step": 61190 }, { "epoch": 4.15783394482946, "grad_norm": 0.7702986598014832, "learning_rate": 0.00048027075689631747, "loss": 3.6047, "step": 61195 }, { "epoch": 4.158173664900122, "grad_norm": 0.9935966730117798, "learning_rate": 0.00048022829188748475, "loss": 3.4371, "step": 61200 }, { "epoch": 4.158513384970784, "grad_norm": 1.020129680633545, "learning_rate": 0.00048018582687865197, "loss": 3.2862, "step": 61205 }, { "epoch": 4.1588531050414455, "grad_norm": 1.269012451171875, "learning_rate": 0.00048014336186981925, "loss": 3.8026, "step": 61210 }, { "epoch": 4.1591928251121075, "grad_norm": 0.7302483916282654, "learning_rate": 0.0004801008968609866, "loss": 3.3462, "step": 61215 }, { "epoch": 4.15953254518277, "grad_norm": 0.8843251466751099, "learning_rate": 0.0004800584318521538, "loss": 3.5796, "step": 61220 }, { "epoch": 4.159872265253431, "grad_norm": 1.0964646339416504, "learning_rate": 0.0004800159668433211, "loss": 3.4576, "step": 61225 }, { "epoch": 4.160211985324093, "grad_norm": 0.8815962076187134, "learning_rate": 0.00047997350183448843, "loss": 3.3914, "step": 61230 }, { "epoch": 4.160551705394755, "grad_norm": 0.9033495187759399, "learning_rate": 0.00047993103682565565, "loss": 3.4816, "step": 61235 }, { "epoch": 4.160891425465416, "grad_norm": 0.9238218069076538, "learning_rate": 0.00047988857181682293, "loss": 3.3832, "step": 61240 }, { "epoch": 4.161231145536078, "grad_norm": 0.8261555433273315, "learning_rate": 0.0004798461068079902, "loss": 3.5648, "step": 61245 }, { "epoch": 4.16157086560674, "grad_norm": 1.2196403741836548, "learning_rate": 0.00047980364179915755, "loss": 3.4318, "step": 61250 }, { "epoch": 4.1619105856774015, "grad_norm": 1.1675587892532349, "learning_rate": 0.0004797611767903248, "loss": 3.5861, "step": 61255 }, { "epoch": 4.1622503057480635, "grad_norm": 0.7549326419830322, "learning_rate": 0.00047971871178149205, "loss": 3.6748, "step": 61260 }, { "epoch": 4.162590025818726, "grad_norm": 1.1471422910690308, "learning_rate": 0.0004796762467726594, "loss": 3.5278, "step": 61265 }, { "epoch": 4.162929745889387, "grad_norm": 1.0092384815216064, "learning_rate": 0.0004796337817638266, "loss": 3.4407, "step": 61270 }, { "epoch": 4.163269465960049, "grad_norm": 0.8209322690963745, "learning_rate": 0.0004795913167549939, "loss": 3.5374, "step": 61275 }, { "epoch": 4.163609186030711, "grad_norm": 0.9471033215522766, "learning_rate": 0.0004795488517461612, "loss": 3.6338, "step": 61280 }, { "epoch": 4.163948906101372, "grad_norm": 0.7848088145256042, "learning_rate": 0.00047950638673732845, "loss": 3.3755, "step": 61285 }, { "epoch": 4.164288626172034, "grad_norm": 0.9488942623138428, "learning_rate": 0.00047946392172849573, "loss": 3.4213, "step": 61290 }, { "epoch": 4.164628346242696, "grad_norm": 0.9325140714645386, "learning_rate": 0.000479421456719663, "loss": 3.4231, "step": 61295 }, { "epoch": 4.1649680663133575, "grad_norm": 1.0085484981536865, "learning_rate": 0.0004793789917108303, "loss": 3.5084, "step": 61300 }, { "epoch": 4.1653077863840196, "grad_norm": 0.809970498085022, "learning_rate": 0.0004793365267019976, "loss": 3.6421, "step": 61305 }, { "epoch": 4.165647506454682, "grad_norm": 0.7918409705162048, "learning_rate": 0.00047929406169316485, "loss": 3.3706, "step": 61310 }, { "epoch": 4.165987226525343, "grad_norm": 1.1134058237075806, "learning_rate": 0.0004792515966843321, "loss": 3.5831, "step": 61315 }, { "epoch": 4.166326946596005, "grad_norm": 0.8777232766151428, "learning_rate": 0.0004792091316754994, "loss": 3.4488, "step": 61320 }, { "epoch": 4.166666666666667, "grad_norm": 0.8292273879051208, "learning_rate": 0.0004791666666666667, "loss": 3.3655, "step": 61325 }, { "epoch": 4.167006386737328, "grad_norm": 0.9717200994491577, "learning_rate": 0.0004791242016578339, "loss": 3.5134, "step": 61330 }, { "epoch": 4.16734610680799, "grad_norm": 0.7630997896194458, "learning_rate": 0.00047908173664900125, "loss": 3.5761, "step": 61335 }, { "epoch": 4.167685826878652, "grad_norm": 0.903893768787384, "learning_rate": 0.00047903927164016853, "loss": 3.7695, "step": 61340 }, { "epoch": 4.1680255469493135, "grad_norm": 0.8937538266181946, "learning_rate": 0.00047899680663133576, "loss": 3.1996, "step": 61345 }, { "epoch": 4.168365267019976, "grad_norm": 1.085886001586914, "learning_rate": 0.00047895434162250304, "loss": 3.3027, "step": 61350 }, { "epoch": 4.168704987090638, "grad_norm": 0.880458414554596, "learning_rate": 0.0004789118766136704, "loss": 3.6383, "step": 61355 }, { "epoch": 4.169044707161299, "grad_norm": 0.7507785558700562, "learning_rate": 0.0004788694116048376, "loss": 3.6534, "step": 61360 }, { "epoch": 4.169384427231961, "grad_norm": 0.9068813920021057, "learning_rate": 0.0004788269465960049, "loss": 3.2842, "step": 61365 }, { "epoch": 4.169724147302623, "grad_norm": 0.9297317266464233, "learning_rate": 0.0004787844815871722, "loss": 3.2945, "step": 61370 }, { "epoch": 4.170063867373284, "grad_norm": 0.8585667610168457, "learning_rate": 0.00047874201657833944, "loss": 3.4027, "step": 61375 }, { "epoch": 4.170403587443946, "grad_norm": 0.9907045364379883, "learning_rate": 0.0004786995515695067, "loss": 3.5582, "step": 61380 }, { "epoch": 4.170743307514608, "grad_norm": 0.9121895432472229, "learning_rate": 0.000478657086560674, "loss": 3.5211, "step": 61385 }, { "epoch": 4.1710830275852695, "grad_norm": 0.8772000670433044, "learning_rate": 0.0004786146215518413, "loss": 3.0921, "step": 61390 }, { "epoch": 4.171422747655932, "grad_norm": 0.9559676647186279, "learning_rate": 0.00047857215654300856, "loss": 3.678, "step": 61395 }, { "epoch": 4.171762467726594, "grad_norm": 1.0583782196044922, "learning_rate": 0.00047852969153417584, "loss": 3.4914, "step": 61400 }, { "epoch": 4.172102187797255, "grad_norm": 0.756763756275177, "learning_rate": 0.0004784872265253431, "loss": 3.4113, "step": 61405 }, { "epoch": 4.172441907867917, "grad_norm": 0.8362118601799011, "learning_rate": 0.0004784447615165104, "loss": 3.6134, "step": 61410 }, { "epoch": 4.172781627938579, "grad_norm": 1.069839596748352, "learning_rate": 0.0004784022965076777, "loss": 3.4441, "step": 61415 }, { "epoch": 4.17312134800924, "grad_norm": 0.9363768696784973, "learning_rate": 0.00047835983149884496, "loss": 3.5099, "step": 61420 }, { "epoch": 4.173461068079902, "grad_norm": 0.9069871306419373, "learning_rate": 0.00047831736649001224, "loss": 3.4737, "step": 61425 }, { "epoch": 4.173800788150564, "grad_norm": 0.8612582087516785, "learning_rate": 0.0004782749014811795, "loss": 3.5325, "step": 61430 }, { "epoch": 4.1741405082212255, "grad_norm": 1.0577627420425415, "learning_rate": 0.0004782324364723468, "loss": 3.6395, "step": 61435 }, { "epoch": 4.174480228291888, "grad_norm": 0.9363875985145569, "learning_rate": 0.0004781899714635141, "loss": 3.6046, "step": 61440 }, { "epoch": 4.17481994836255, "grad_norm": 0.8154097199440002, "learning_rate": 0.00047814750645468136, "loss": 3.5131, "step": 61445 }, { "epoch": 4.175159668433211, "grad_norm": 0.6793762445449829, "learning_rate": 0.00047810504144584864, "loss": 3.6875, "step": 61450 }, { "epoch": 4.175499388503873, "grad_norm": 0.8323537707328796, "learning_rate": 0.00047806257643701587, "loss": 3.5121, "step": 61455 }, { "epoch": 4.175839108574535, "grad_norm": 0.7416667342185974, "learning_rate": 0.0004780201114281832, "loss": 3.335, "step": 61460 }, { "epoch": 4.176178828645196, "grad_norm": 1.047333002090454, "learning_rate": 0.0004779776464193505, "loss": 3.3923, "step": 61465 }, { "epoch": 4.176518548715858, "grad_norm": 1.0007826089859009, "learning_rate": 0.0004779351814105177, "loss": 3.4439, "step": 61470 }, { "epoch": 4.17685826878652, "grad_norm": 0.8062480092048645, "learning_rate": 0.00047789271640168504, "loss": 3.5497, "step": 61475 }, { "epoch": 4.1771979888571815, "grad_norm": 0.7364413738250732, "learning_rate": 0.0004778502513928523, "loss": 3.6515, "step": 61480 }, { "epoch": 4.177537708927844, "grad_norm": 1.056564211845398, "learning_rate": 0.00047780778638401955, "loss": 3.2767, "step": 61485 }, { "epoch": 4.177877428998505, "grad_norm": 0.8792128562927246, "learning_rate": 0.0004777653213751869, "loss": 3.4981, "step": 61490 }, { "epoch": 4.178217149069167, "grad_norm": 0.9576679468154907, "learning_rate": 0.00047772285636635416, "loss": 3.4957, "step": 61495 }, { "epoch": 4.178556869139829, "grad_norm": 0.8643674850463867, "learning_rate": 0.0004776803913575214, "loss": 3.4386, "step": 61500 }, { "epoch": 4.17889658921049, "grad_norm": 1.5490754842758179, "learning_rate": 0.00047763792634868867, "loss": 3.5235, "step": 61505 }, { "epoch": 4.179236309281152, "grad_norm": 1.0392727851867676, "learning_rate": 0.000477595461339856, "loss": 3.4765, "step": 61510 }, { "epoch": 4.179576029351814, "grad_norm": 1.1129608154296875, "learning_rate": 0.00047755299633102323, "loss": 3.2285, "step": 61515 }, { "epoch": 4.1799157494224755, "grad_norm": 0.7832546234130859, "learning_rate": 0.0004775105313221905, "loss": 3.5716, "step": 61520 }, { "epoch": 4.1802554694931375, "grad_norm": 0.9150946140289307, "learning_rate": 0.00047746806631335784, "loss": 3.1979, "step": 61525 }, { "epoch": 4.1805951895638, "grad_norm": 0.64959716796875, "learning_rate": 0.00047742560130452507, "loss": 3.673, "step": 61530 }, { "epoch": 4.180934909634461, "grad_norm": 0.977350652217865, "learning_rate": 0.00047738313629569235, "loss": 3.7289, "step": 61535 }, { "epoch": 4.181274629705123, "grad_norm": 0.9625280499458313, "learning_rate": 0.00047734067128685963, "loss": 3.4824, "step": 61540 }, { "epoch": 4.181614349775785, "grad_norm": 0.9279688000679016, "learning_rate": 0.0004772982062780269, "loss": 3.5479, "step": 61545 }, { "epoch": 4.181954069846446, "grad_norm": 0.9773569107055664, "learning_rate": 0.0004772557412691942, "loss": 3.4389, "step": 61550 }, { "epoch": 4.182293789917108, "grad_norm": 0.8943056464195251, "learning_rate": 0.00047721327626036147, "loss": 3.524, "step": 61555 }, { "epoch": 4.18263350998777, "grad_norm": 0.8047959208488464, "learning_rate": 0.00047717081125152875, "loss": 3.2588, "step": 61560 }, { "epoch": 4.1829732300584315, "grad_norm": 0.8745054006576538, "learning_rate": 0.00047712834624269603, "loss": 3.6413, "step": 61565 }, { "epoch": 4.1833129501290935, "grad_norm": 0.9662274718284607, "learning_rate": 0.0004770858812338633, "loss": 3.4595, "step": 61570 }, { "epoch": 4.183652670199756, "grad_norm": 0.8292756676673889, "learning_rate": 0.00047704341622503054, "loss": 3.6498, "step": 61575 }, { "epoch": 4.183992390270417, "grad_norm": 0.7691963911056519, "learning_rate": 0.00047700095121619787, "loss": 3.6595, "step": 61580 }, { "epoch": 4.184332110341079, "grad_norm": 0.6002717614173889, "learning_rate": 0.00047695848620736515, "loss": 3.5455, "step": 61585 }, { "epoch": 4.184671830411741, "grad_norm": 0.7317185997962952, "learning_rate": 0.00047691602119853243, "loss": 3.5331, "step": 61590 }, { "epoch": 4.185011550482402, "grad_norm": 0.8353995680809021, "learning_rate": 0.0004768735561896997, "loss": 3.5765, "step": 61595 }, { "epoch": 4.185351270553064, "grad_norm": 0.7571285963058472, "learning_rate": 0.000476831091180867, "loss": 3.7165, "step": 61600 }, { "epoch": 4.185690990623726, "grad_norm": 0.8857873678207397, "learning_rate": 0.00047678862617203427, "loss": 3.5826, "step": 61605 }, { "epoch": 4.1860307106943875, "grad_norm": 1.500422716140747, "learning_rate": 0.0004767461611632015, "loss": 3.594, "step": 61610 }, { "epoch": 4.1863704307650496, "grad_norm": 1.162610411643982, "learning_rate": 0.00047670369615436883, "loss": 3.4978, "step": 61615 }, { "epoch": 4.186710150835712, "grad_norm": 1.0689085721969604, "learning_rate": 0.0004766612311455361, "loss": 3.5149, "step": 61620 }, { "epoch": 4.187049870906373, "grad_norm": 0.910282552242279, "learning_rate": 0.00047661876613670334, "loss": 3.4493, "step": 61625 }, { "epoch": 4.187389590977035, "grad_norm": 1.0557277202606201, "learning_rate": 0.00047657630112787067, "loss": 3.6462, "step": 61630 }, { "epoch": 4.187729311047697, "grad_norm": 0.9505425095558167, "learning_rate": 0.00047653383611903795, "loss": 3.3805, "step": 61635 }, { "epoch": 4.188069031118358, "grad_norm": 0.8598436117172241, "learning_rate": 0.0004764913711102052, "loss": 3.6132, "step": 61640 }, { "epoch": 4.18840875118902, "grad_norm": 0.7603653073310852, "learning_rate": 0.00047644890610137246, "loss": 3.3015, "step": 61645 }, { "epoch": 4.188748471259682, "grad_norm": 0.9686315059661865, "learning_rate": 0.0004764064410925398, "loss": 3.3529, "step": 61650 }, { "epoch": 4.1890881913303435, "grad_norm": 0.904583752155304, "learning_rate": 0.000476363976083707, "loss": 3.4892, "step": 61655 }, { "epoch": 4.189427911401006, "grad_norm": 1.062279462814331, "learning_rate": 0.0004763215110748743, "loss": 3.3009, "step": 61660 }, { "epoch": 4.189767631471668, "grad_norm": 0.8175138235092163, "learning_rate": 0.00047627904606604163, "loss": 3.4636, "step": 61665 }, { "epoch": 4.190107351542329, "grad_norm": 0.9394064545631409, "learning_rate": 0.00047623658105720886, "loss": 3.5016, "step": 61670 }, { "epoch": 4.190447071612991, "grad_norm": 1.0974457263946533, "learning_rate": 0.00047619411604837614, "loss": 3.4715, "step": 61675 }, { "epoch": 4.190786791683653, "grad_norm": 0.8635875582695007, "learning_rate": 0.0004761516510395434, "loss": 3.3515, "step": 61680 }, { "epoch": 4.191126511754314, "grad_norm": 0.9464661478996277, "learning_rate": 0.0004761091860307107, "loss": 3.1918, "step": 61685 }, { "epoch": 4.191466231824976, "grad_norm": 1.0221835374832153, "learning_rate": 0.000476066721021878, "loss": 3.5597, "step": 61690 }, { "epoch": 4.191805951895638, "grad_norm": 0.932763934135437, "learning_rate": 0.00047602425601304526, "loss": 3.7033, "step": 61695 }, { "epoch": 4.1921456719662995, "grad_norm": 0.8700383901596069, "learning_rate": 0.00047598179100421254, "loss": 3.6733, "step": 61700 }, { "epoch": 4.192485392036962, "grad_norm": 0.8092880845069885, "learning_rate": 0.0004759393259953798, "loss": 3.2863, "step": 61705 }, { "epoch": 4.192825112107624, "grad_norm": 0.9158156514167786, "learning_rate": 0.0004758968609865471, "loss": 3.3602, "step": 61710 }, { "epoch": 4.193164832178285, "grad_norm": 0.9173544645309448, "learning_rate": 0.0004758543959777143, "loss": 3.3187, "step": 61715 }, { "epoch": 4.193504552248947, "grad_norm": 0.8213764429092407, "learning_rate": 0.00047581193096888166, "loss": 3.3591, "step": 61720 }, { "epoch": 4.193844272319609, "grad_norm": 0.8613501787185669, "learning_rate": 0.00047576946596004894, "loss": 3.3481, "step": 61725 }, { "epoch": 4.19418399239027, "grad_norm": 0.8321738839149475, "learning_rate": 0.00047572700095121616, "loss": 3.599, "step": 61730 }, { "epoch": 4.194523712460932, "grad_norm": 0.9807544946670532, "learning_rate": 0.0004756845359423835, "loss": 3.5017, "step": 61735 }, { "epoch": 4.194863432531594, "grad_norm": 0.8135389685630798, "learning_rate": 0.0004756420709335508, "loss": 3.4571, "step": 61740 }, { "epoch": 4.1952031526022555, "grad_norm": 0.9644518494606018, "learning_rate": 0.000475599605924718, "loss": 3.584, "step": 61745 }, { "epoch": 4.195542872672918, "grad_norm": 0.7378988862037659, "learning_rate": 0.0004755571409158853, "loss": 3.6518, "step": 61750 }, { "epoch": 4.19588259274358, "grad_norm": 0.8387075066566467, "learning_rate": 0.0004755146759070526, "loss": 3.5965, "step": 61755 }, { "epoch": 4.196222312814241, "grad_norm": 0.8610532283782959, "learning_rate": 0.0004754722108982199, "loss": 3.4606, "step": 61760 }, { "epoch": 4.196562032884903, "grad_norm": 0.8247411847114563, "learning_rate": 0.0004754297458893871, "loss": 3.3262, "step": 61765 }, { "epoch": 4.196901752955565, "grad_norm": 0.980965793132782, "learning_rate": 0.00047538728088055446, "loss": 3.6096, "step": 61770 }, { "epoch": 4.197241473026226, "grad_norm": 0.8761126399040222, "learning_rate": 0.00047534481587172174, "loss": 3.5151, "step": 61775 }, { "epoch": 4.197581193096888, "grad_norm": 0.8383309245109558, "learning_rate": 0.00047530235086288896, "loss": 3.5543, "step": 61780 }, { "epoch": 4.19792091316755, "grad_norm": 0.9866697788238525, "learning_rate": 0.0004752598858540563, "loss": 3.404, "step": 61785 }, { "epoch": 4.1982606332382115, "grad_norm": 0.6978961229324341, "learning_rate": 0.0004752174208452236, "loss": 3.5538, "step": 61790 }, { "epoch": 4.198600353308874, "grad_norm": 0.9162205457687378, "learning_rate": 0.0004751749558363908, "loss": 3.4356, "step": 61795 }, { "epoch": 4.198940073379536, "grad_norm": 0.8467340469360352, "learning_rate": 0.0004751324908275581, "loss": 3.7083, "step": 61800 }, { "epoch": 4.199279793450197, "grad_norm": 0.9185104370117188, "learning_rate": 0.0004750900258187254, "loss": 3.538, "step": 61805 }, { "epoch": 4.199619513520859, "grad_norm": 0.9455029368400574, "learning_rate": 0.00047504756080989265, "loss": 3.5196, "step": 61810 }, { "epoch": 4.199959233591521, "grad_norm": 0.8226585388183594, "learning_rate": 0.0004750050958010599, "loss": 3.5964, "step": 61815 }, { "epoch": 4.200298953662182, "grad_norm": 1.0438311100006104, "learning_rate": 0.00047496263079222726, "loss": 3.5756, "step": 61820 }, { "epoch": 4.200638673732844, "grad_norm": 0.9407967329025269, "learning_rate": 0.0004749201657833945, "loss": 3.4944, "step": 61825 }, { "epoch": 4.200978393803506, "grad_norm": 0.8689625263214111, "learning_rate": 0.00047487770077456177, "loss": 3.6475, "step": 61830 }, { "epoch": 4.2013181138741675, "grad_norm": 1.5444892644882202, "learning_rate": 0.00047483523576572905, "loss": 3.507, "step": 61835 }, { "epoch": 4.20165783394483, "grad_norm": 0.8338109254837036, "learning_rate": 0.0004747927707568963, "loss": 3.398, "step": 61840 }, { "epoch": 4.201997554015492, "grad_norm": 7.324791431427002, "learning_rate": 0.0004747503057480636, "loss": 3.4509, "step": 61845 }, { "epoch": 4.202337274086153, "grad_norm": 1.0446778535842896, "learning_rate": 0.0004747078407392309, "loss": 3.3387, "step": 61850 }, { "epoch": 4.202676994156815, "grad_norm": 1.123361349105835, "learning_rate": 0.00047466537573039817, "loss": 3.2169, "step": 61855 }, { "epoch": 4.203016714227476, "grad_norm": 0.8754403591156006, "learning_rate": 0.00047462291072156545, "loss": 3.371, "step": 61860 }, { "epoch": 4.203356434298138, "grad_norm": 0.8180702328681946, "learning_rate": 0.0004745804457127327, "loss": 3.383, "step": 61865 }, { "epoch": 4.2036961543688, "grad_norm": 0.9273421764373779, "learning_rate": 0.00047453798070389995, "loss": 3.4114, "step": 61870 }, { "epoch": 4.2040358744394615, "grad_norm": 1.1498231887817383, "learning_rate": 0.0004744955156950673, "loss": 3.1782, "step": 61875 }, { "epoch": 4.2043755945101235, "grad_norm": 0.745573103427887, "learning_rate": 0.00047445305068623457, "loss": 3.5043, "step": 61880 }, { "epoch": 4.204715314580786, "grad_norm": 0.7588533759117126, "learning_rate": 0.0004744105856774018, "loss": 3.5179, "step": 61885 }, { "epoch": 4.205055034651447, "grad_norm": 0.9656900763511658, "learning_rate": 0.0004743681206685691, "loss": 3.4826, "step": 61890 }, { "epoch": 4.205394754722109, "grad_norm": 0.964968204498291, "learning_rate": 0.0004743256556597364, "loss": 3.5144, "step": 61895 }, { "epoch": 4.205734474792771, "grad_norm": 1.1871525049209595, "learning_rate": 0.00047428319065090363, "loss": 3.3906, "step": 61900 }, { "epoch": 4.206074194863432, "grad_norm": 0.9983317852020264, "learning_rate": 0.0004742407256420709, "loss": 3.3158, "step": 61905 }, { "epoch": 4.206413914934094, "grad_norm": 0.8124498128890991, "learning_rate": 0.00047419826063323825, "loss": 3.5586, "step": 61910 }, { "epoch": 4.206753635004756, "grad_norm": 0.7715133428573608, "learning_rate": 0.00047415579562440547, "loss": 3.6549, "step": 61915 }, { "epoch": 4.2070933550754175, "grad_norm": 1.1109403371810913, "learning_rate": 0.00047411333061557275, "loss": 3.5215, "step": 61920 }, { "epoch": 4.20743307514608, "grad_norm": 0.7509203553199768, "learning_rate": 0.0004740708656067401, "loss": 3.3871, "step": 61925 }, { "epoch": 4.207772795216742, "grad_norm": 0.8395061492919922, "learning_rate": 0.00047402840059790737, "loss": 3.3894, "step": 61930 }, { "epoch": 4.208112515287403, "grad_norm": 0.6614095568656921, "learning_rate": 0.0004739859355890746, "loss": 3.462, "step": 61935 }, { "epoch": 4.208452235358065, "grad_norm": 0.9735466837882996, "learning_rate": 0.0004739434705802419, "loss": 3.5217, "step": 61940 }, { "epoch": 4.208791955428727, "grad_norm": 0.768747866153717, "learning_rate": 0.0004739010055714092, "loss": 3.3749, "step": 61945 }, { "epoch": 4.209131675499388, "grad_norm": 0.8979070782661438, "learning_rate": 0.00047385854056257643, "loss": 3.667, "step": 61950 }, { "epoch": 4.20947139557005, "grad_norm": 0.9836918115615845, "learning_rate": 0.0004738160755537437, "loss": 3.4834, "step": 61955 }, { "epoch": 4.209811115640712, "grad_norm": 0.9414037466049194, "learning_rate": 0.00047377361054491105, "loss": 3.4967, "step": 61960 }, { "epoch": 4.2101508357113735, "grad_norm": 1.077491283416748, "learning_rate": 0.0004737311455360783, "loss": 3.5865, "step": 61965 }, { "epoch": 4.210490555782036, "grad_norm": 0.8763558864593506, "learning_rate": 0.00047368868052724555, "loss": 3.2732, "step": 61970 }, { "epoch": 4.210830275852698, "grad_norm": 0.8472718596458435, "learning_rate": 0.00047364621551841283, "loss": 3.5139, "step": 61975 }, { "epoch": 4.211169995923359, "grad_norm": 0.923512876033783, "learning_rate": 0.0004736037505095801, "loss": 3.3765, "step": 61980 }, { "epoch": 4.211509715994021, "grad_norm": 0.9110320210456848, "learning_rate": 0.0004735612855007474, "loss": 3.4979, "step": 61985 }, { "epoch": 4.211849436064683, "grad_norm": 0.9856606721878052, "learning_rate": 0.0004735188204919147, "loss": 3.3492, "step": 61990 }, { "epoch": 4.212189156135344, "grad_norm": 1.143573522567749, "learning_rate": 0.00047347635548308195, "loss": 3.521, "step": 61995 }, { "epoch": 4.212528876206006, "grad_norm": 0.7409008741378784, "learning_rate": 0.00047343389047424923, "loss": 3.5756, "step": 62000 }, { "epoch": 4.212868596276668, "grad_norm": 1.0568146705627441, "learning_rate": 0.0004733914254654165, "loss": 3.5422, "step": 62005 }, { "epoch": 4.2132083163473295, "grad_norm": 1.1648577451705933, "learning_rate": 0.00047334896045658374, "loss": 3.349, "step": 62010 }, { "epoch": 4.213548036417992, "grad_norm": 0.6332630515098572, "learning_rate": 0.0004733064954477511, "loss": 3.5297, "step": 62015 }, { "epoch": 4.213887756488654, "grad_norm": 0.990820586681366, "learning_rate": 0.00047326403043891835, "loss": 3.5881, "step": 62020 }, { "epoch": 4.214227476559315, "grad_norm": 0.7660771012306213, "learning_rate": 0.0004732215654300856, "loss": 3.6545, "step": 62025 }, { "epoch": 4.214567196629977, "grad_norm": 0.7424066066741943, "learning_rate": 0.0004731791004212529, "loss": 3.4747, "step": 62030 }, { "epoch": 4.214906916700639, "grad_norm": 1.3720229864120483, "learning_rate": 0.0004731366354124202, "loss": 3.5914, "step": 62035 }, { "epoch": 4.2152466367713, "grad_norm": 1.0905529260635376, "learning_rate": 0.0004730941704035874, "loss": 3.178, "step": 62040 }, { "epoch": 4.215586356841962, "grad_norm": 0.8345396518707275, "learning_rate": 0.00047305170539475475, "loss": 3.6747, "step": 62045 }, { "epoch": 4.215926076912624, "grad_norm": 0.7810105681419373, "learning_rate": 0.00047300924038592203, "loss": 3.3026, "step": 62050 }, { "epoch": 4.2162657969832855, "grad_norm": 1.2986418008804321, "learning_rate": 0.00047296677537708926, "loss": 3.2723, "step": 62055 }, { "epoch": 4.216605517053948, "grad_norm": 0.942055881023407, "learning_rate": 0.00047292431036825654, "loss": 3.3877, "step": 62060 }, { "epoch": 4.21694523712461, "grad_norm": 0.7600162029266357, "learning_rate": 0.0004728818453594239, "loss": 3.4083, "step": 62065 }, { "epoch": 4.217284957195271, "grad_norm": 1.151014804840088, "learning_rate": 0.0004728393803505911, "loss": 3.7406, "step": 62070 }, { "epoch": 4.217624677265933, "grad_norm": 0.9654062390327454, "learning_rate": 0.0004727969153417584, "loss": 3.4223, "step": 62075 }, { "epoch": 4.217964397336595, "grad_norm": 0.8574452996253967, "learning_rate": 0.0004727544503329257, "loss": 3.3046, "step": 62080 }, { "epoch": 4.218304117407256, "grad_norm": 0.923534631729126, "learning_rate": 0.00047271198532409294, "loss": 3.5365, "step": 62085 }, { "epoch": 4.218643837477918, "grad_norm": 1.0087449550628662, "learning_rate": 0.0004726695203152602, "loss": 3.521, "step": 62090 }, { "epoch": 4.21898355754858, "grad_norm": 0.8792451620101929, "learning_rate": 0.0004726270553064275, "loss": 3.4545, "step": 62095 }, { "epoch": 4.2193232776192415, "grad_norm": 0.8680570721626282, "learning_rate": 0.00047258459029759484, "loss": 3.5337, "step": 62100 }, { "epoch": 4.219662997689904, "grad_norm": 0.7142009139060974, "learning_rate": 0.00047254212528876206, "loss": 3.6364, "step": 62105 }, { "epoch": 4.220002717760566, "grad_norm": 0.9738248586654663, "learning_rate": 0.00047249966027992934, "loss": 3.1205, "step": 62110 }, { "epoch": 4.220342437831227, "grad_norm": 0.8425074219703674, "learning_rate": 0.0004724571952710967, "loss": 3.4945, "step": 62115 }, { "epoch": 4.220682157901889, "grad_norm": 0.7377592921257019, "learning_rate": 0.0004724147302622639, "loss": 3.5725, "step": 62120 }, { "epoch": 4.221021877972551, "grad_norm": 0.8036885261535645, "learning_rate": 0.0004723722652534312, "loss": 3.4892, "step": 62125 }, { "epoch": 4.221361598043212, "grad_norm": 0.9702445268630981, "learning_rate": 0.00047232980024459846, "loss": 3.5585, "step": 62130 }, { "epoch": 4.221701318113874, "grad_norm": 0.9419636130332947, "learning_rate": 0.00047228733523576574, "loss": 3.3501, "step": 62135 }, { "epoch": 4.222041038184536, "grad_norm": 0.7829832434654236, "learning_rate": 0.000472244870226933, "loss": 3.3922, "step": 62140 }, { "epoch": 4.2223807582551975, "grad_norm": 0.7860432863235474, "learning_rate": 0.0004722024052181003, "loss": 3.5126, "step": 62145 }, { "epoch": 4.22272047832586, "grad_norm": 0.8846808075904846, "learning_rate": 0.0004721599402092676, "loss": 3.6187, "step": 62150 }, { "epoch": 4.223060198396522, "grad_norm": 0.9532198905944824, "learning_rate": 0.00047211747520043486, "loss": 3.6106, "step": 62155 }, { "epoch": 4.223399918467183, "grad_norm": 0.8060060143470764, "learning_rate": 0.00047207501019160214, "loss": 3.3492, "step": 62160 }, { "epoch": 4.223739638537845, "grad_norm": 0.8280306458473206, "learning_rate": 0.00047203254518276937, "loss": 3.3005, "step": 62165 }, { "epoch": 4.224079358608506, "grad_norm": 1.0023374557495117, "learning_rate": 0.0004719900801739367, "loss": 3.2242, "step": 62170 }, { "epoch": 4.224419078679168, "grad_norm": 0.7857472896575928, "learning_rate": 0.000471947615165104, "loss": 3.2931, "step": 62175 }, { "epoch": 4.22475879874983, "grad_norm": 1.0773108005523682, "learning_rate": 0.0004719051501562712, "loss": 3.5876, "step": 62180 }, { "epoch": 4.2250985188204915, "grad_norm": 0.9203125238418579, "learning_rate": 0.00047186268514743854, "loss": 3.3219, "step": 62185 }, { "epoch": 4.2254382388911536, "grad_norm": 1.1758345365524292, "learning_rate": 0.0004718202201386058, "loss": 3.5427, "step": 62190 }, { "epoch": 4.225777958961816, "grad_norm": 0.8343169093132019, "learning_rate": 0.00047177775512977305, "loss": 3.3379, "step": 62195 }, { "epoch": 4.226117679032477, "grad_norm": 0.9148077964782715, "learning_rate": 0.00047173529012094033, "loss": 3.5548, "step": 62200 }, { "epoch": 4.226457399103139, "grad_norm": 0.9804446697235107, "learning_rate": 0.00047169282511210766, "loss": 3.5708, "step": 62205 }, { "epoch": 4.226797119173801, "grad_norm": 0.8714418411254883, "learning_rate": 0.0004716503601032749, "loss": 3.2235, "step": 62210 }, { "epoch": 4.227136839244462, "grad_norm": 1.0064260959625244, "learning_rate": 0.00047160789509444217, "loss": 3.3066, "step": 62215 }, { "epoch": 4.227476559315124, "grad_norm": 0.8281447291374207, "learning_rate": 0.0004715654300856095, "loss": 3.5559, "step": 62220 }, { "epoch": 4.227816279385786, "grad_norm": 0.8623903393745422, "learning_rate": 0.00047152296507677673, "loss": 3.3446, "step": 62225 }, { "epoch": 4.2281559994564475, "grad_norm": 1.0136678218841553, "learning_rate": 0.000471480500067944, "loss": 3.4321, "step": 62230 }, { "epoch": 4.22849571952711, "grad_norm": 1.19075608253479, "learning_rate": 0.0004714380350591113, "loss": 3.4994, "step": 62235 }, { "epoch": 4.228835439597772, "grad_norm": 1.0560334920883179, "learning_rate": 0.00047139557005027857, "loss": 3.5499, "step": 62240 }, { "epoch": 4.229175159668433, "grad_norm": 0.9597827792167664, "learning_rate": 0.00047135310504144585, "loss": 3.5025, "step": 62245 }, { "epoch": 4.229514879739095, "grad_norm": 0.7794488072395325, "learning_rate": 0.00047131064003261313, "loss": 3.3411, "step": 62250 }, { "epoch": 4.229854599809757, "grad_norm": 0.8876743316650391, "learning_rate": 0.0004712681750237804, "loss": 3.4017, "step": 62255 }, { "epoch": 4.230194319880418, "grad_norm": 0.7387467622756958, "learning_rate": 0.0004712257100149477, "loss": 3.7088, "step": 62260 }, { "epoch": 4.23053403995108, "grad_norm": 0.8620684146881104, "learning_rate": 0.00047118324500611497, "loss": 3.4169, "step": 62265 }, { "epoch": 4.230873760021742, "grad_norm": 1.9649810791015625, "learning_rate": 0.00047114077999728225, "loss": 3.3432, "step": 62270 }, { "epoch": 4.2312134800924035, "grad_norm": 0.8399444222450256, "learning_rate": 0.00047109831498844953, "loss": 3.5822, "step": 62275 }, { "epoch": 4.231553200163066, "grad_norm": 0.8617279529571533, "learning_rate": 0.0004710558499796168, "loss": 3.6869, "step": 62280 }, { "epoch": 4.231892920233728, "grad_norm": 0.7887837886810303, "learning_rate": 0.0004710133849707841, "loss": 3.6155, "step": 62285 }, { "epoch": 4.232232640304389, "grad_norm": 0.856587827205658, "learning_rate": 0.00047097091996195137, "loss": 3.5358, "step": 62290 }, { "epoch": 4.232572360375051, "grad_norm": 0.9303193092346191, "learning_rate": 0.00047092845495311865, "loss": 3.4816, "step": 62295 }, { "epoch": 4.232912080445713, "grad_norm": 0.9314841032028198, "learning_rate": 0.00047088598994428593, "loss": 3.47, "step": 62300 }, { "epoch": 4.233251800516374, "grad_norm": 1.1167455911636353, "learning_rate": 0.00047084352493545316, "loss": 3.3843, "step": 62305 }, { "epoch": 4.233591520587036, "grad_norm": 1.0100398063659668, "learning_rate": 0.0004708010599266205, "loss": 3.224, "step": 62310 }, { "epoch": 4.233931240657698, "grad_norm": 0.895997166633606, "learning_rate": 0.00047075859491778777, "loss": 3.5847, "step": 62315 }, { "epoch": 4.2342709607283595, "grad_norm": 0.837062656879425, "learning_rate": 0.000470716129908955, "loss": 3.1669, "step": 62320 }, { "epoch": 4.234610680799022, "grad_norm": 1.0972082614898682, "learning_rate": 0.00047067366490012233, "loss": 3.2607, "step": 62325 }, { "epoch": 4.234950400869684, "grad_norm": 1.03299880027771, "learning_rate": 0.0004706311998912896, "loss": 3.3383, "step": 62330 }, { "epoch": 4.235290120940345, "grad_norm": 0.8328218460083008, "learning_rate": 0.00047058873488245684, "loss": 3.691, "step": 62335 }, { "epoch": 4.235629841011007, "grad_norm": 0.7953068017959595, "learning_rate": 0.00047054626987362417, "loss": 3.5038, "step": 62340 }, { "epoch": 4.235969561081669, "grad_norm": 2.4780311584472656, "learning_rate": 0.00047050380486479145, "loss": 3.3094, "step": 62345 }, { "epoch": 4.23630928115233, "grad_norm": 1.0611635446548462, "learning_rate": 0.0004704613398559587, "loss": 3.4803, "step": 62350 }, { "epoch": 4.236649001222992, "grad_norm": 0.8358001112937927, "learning_rate": 0.00047041887484712596, "loss": 3.4165, "step": 62355 }, { "epoch": 4.236988721293654, "grad_norm": 0.8839282989501953, "learning_rate": 0.0004703764098382933, "loss": 3.3485, "step": 62360 }, { "epoch": 4.2373284413643155, "grad_norm": 0.6768919229507446, "learning_rate": 0.0004703339448294605, "loss": 3.4817, "step": 62365 }, { "epoch": 4.237668161434978, "grad_norm": 0.9148284196853638, "learning_rate": 0.0004702914798206278, "loss": 3.4989, "step": 62370 }, { "epoch": 4.23800788150564, "grad_norm": 0.8957261443138123, "learning_rate": 0.00047024901481179513, "loss": 3.3334, "step": 62375 }, { "epoch": 4.238347601576301, "grad_norm": 0.9470516443252563, "learning_rate": 0.00047020654980296236, "loss": 3.4824, "step": 62380 }, { "epoch": 4.238687321646963, "grad_norm": 0.9323439002037048, "learning_rate": 0.00047016408479412964, "loss": 3.4644, "step": 62385 }, { "epoch": 4.239027041717625, "grad_norm": 0.8894497752189636, "learning_rate": 0.0004701216197852969, "loss": 3.4867, "step": 62390 }, { "epoch": 4.239366761788286, "grad_norm": 1.1439660787582397, "learning_rate": 0.0004700791547764642, "loss": 3.5749, "step": 62395 }, { "epoch": 4.239706481858948, "grad_norm": 0.9108113050460815, "learning_rate": 0.0004700366897676315, "loss": 3.4178, "step": 62400 }, { "epoch": 4.24004620192961, "grad_norm": 1.0065360069274902, "learning_rate": 0.00046999422475879876, "loss": 3.4966, "step": 62405 }, { "epoch": 4.2403859220002715, "grad_norm": 0.9580250978469849, "learning_rate": 0.00046995175974996604, "loss": 3.6634, "step": 62410 }, { "epoch": 4.240725642070934, "grad_norm": 0.7834501266479492, "learning_rate": 0.0004699092947411333, "loss": 3.6127, "step": 62415 }, { "epoch": 4.241065362141596, "grad_norm": 0.7347567081451416, "learning_rate": 0.0004698668297323006, "loss": 3.6298, "step": 62420 }, { "epoch": 4.241405082212257, "grad_norm": 1.1166601181030273, "learning_rate": 0.0004698243647234678, "loss": 3.5458, "step": 62425 }, { "epoch": 4.241744802282919, "grad_norm": 0.8660541772842407, "learning_rate": 0.00046978189971463516, "loss": 3.2887, "step": 62430 }, { "epoch": 4.242084522353581, "grad_norm": 0.8945692181587219, "learning_rate": 0.00046973943470580244, "loss": 3.3532, "step": 62435 }, { "epoch": 4.242424242424242, "grad_norm": 0.7985753417015076, "learning_rate": 0.0004696969696969697, "loss": 3.2211, "step": 62440 }, { "epoch": 4.242763962494904, "grad_norm": 0.8653902411460876, "learning_rate": 0.000469654504688137, "loss": 3.6749, "step": 62445 }, { "epoch": 4.243103682565566, "grad_norm": 0.9973759651184082, "learning_rate": 0.0004696120396793043, "loss": 3.4082, "step": 62450 }, { "epoch": 4.2434434026362275, "grad_norm": 0.8569865822792053, "learning_rate": 0.00046956957467047156, "loss": 3.617, "step": 62455 }, { "epoch": 4.24378312270689, "grad_norm": 0.5923313498497009, "learning_rate": 0.0004695271096616388, "loss": 3.5664, "step": 62460 }, { "epoch": 4.244122842777552, "grad_norm": 0.7659757733345032, "learning_rate": 0.0004694846446528061, "loss": 3.4558, "step": 62465 }, { "epoch": 4.244462562848213, "grad_norm": 0.8422825336456299, "learning_rate": 0.0004694421796439734, "loss": 3.1768, "step": 62470 }, { "epoch": 4.244802282918875, "grad_norm": 3.5348448753356934, "learning_rate": 0.0004693997146351406, "loss": 3.4113, "step": 62475 }, { "epoch": 4.245142002989537, "grad_norm": 0.895234227180481, "learning_rate": 0.00046935724962630796, "loss": 3.262, "step": 62480 }, { "epoch": 4.245481723060198, "grad_norm": 0.9319461584091187, "learning_rate": 0.00046931478461747524, "loss": 3.277, "step": 62485 }, { "epoch": 4.24582144313086, "grad_norm": 1.0715136528015137, "learning_rate": 0.00046927231960864246, "loss": 3.3541, "step": 62490 }, { "epoch": 4.246161163201522, "grad_norm": 0.9865347743034363, "learning_rate": 0.00046922985459980974, "loss": 3.3116, "step": 62495 }, { "epoch": 4.246500883272184, "grad_norm": 1.1252214908599854, "learning_rate": 0.0004691873895909771, "loss": 3.5165, "step": 62500 }, { "epoch": 4.246840603342846, "grad_norm": 0.9303372502326965, "learning_rate": 0.0004691449245821443, "loss": 3.4558, "step": 62505 }, { "epoch": 4.247180323413508, "grad_norm": 0.981105387210846, "learning_rate": 0.0004691024595733116, "loss": 3.6547, "step": 62510 }, { "epoch": 4.247520043484169, "grad_norm": 1.1469906568527222, "learning_rate": 0.0004690599945644789, "loss": 3.5213, "step": 62515 }, { "epoch": 4.247859763554831, "grad_norm": 0.923545241355896, "learning_rate": 0.00046901752955564615, "loss": 3.2604, "step": 62520 }, { "epoch": 4.248199483625493, "grad_norm": 0.8923083543777466, "learning_rate": 0.0004689750645468134, "loss": 3.2821, "step": 62525 }, { "epoch": 4.248539203696154, "grad_norm": 0.7732527256011963, "learning_rate": 0.0004689325995379807, "loss": 3.4976, "step": 62530 }, { "epoch": 4.248878923766816, "grad_norm": 0.8284834027290344, "learning_rate": 0.000468890134529148, "loss": 3.3868, "step": 62535 }, { "epoch": 4.2492186438374775, "grad_norm": 0.9480911493301392, "learning_rate": 0.00046884766952031527, "loss": 3.6136, "step": 62540 }, { "epoch": 4.24955836390814, "grad_norm": 0.9909446239471436, "learning_rate": 0.00046880520451148255, "loss": 3.3003, "step": 62545 }, { "epoch": 4.249898083978802, "grad_norm": 0.9021207094192505, "learning_rate": 0.0004687627395026498, "loss": 3.4878, "step": 62550 }, { "epoch": 4.250237804049463, "grad_norm": 0.749625027179718, "learning_rate": 0.0004687202744938171, "loss": 3.3482, "step": 62555 }, { "epoch": 4.250577524120125, "grad_norm": 0.7491831183433533, "learning_rate": 0.0004686778094849844, "loss": 3.1142, "step": 62560 }, { "epoch": 4.250917244190787, "grad_norm": 4.378658771514893, "learning_rate": 0.0004686353444761516, "loss": 3.5059, "step": 62565 }, { "epoch": 4.251256964261448, "grad_norm": 0.8541637659072876, "learning_rate": 0.00046859287946731895, "loss": 3.5269, "step": 62570 }, { "epoch": 4.25159668433211, "grad_norm": 0.9197339415550232, "learning_rate": 0.0004685504144584862, "loss": 3.5367, "step": 62575 }, { "epoch": 4.251936404402772, "grad_norm": 0.8299925923347473, "learning_rate": 0.00046850794944965345, "loss": 3.2937, "step": 62580 }, { "epoch": 4.2522761244734335, "grad_norm": 0.8622176051139832, "learning_rate": 0.0004684654844408208, "loss": 3.5138, "step": 62585 }, { "epoch": 4.252615844544096, "grad_norm": 1.0262635946273804, "learning_rate": 0.00046842301943198807, "loss": 3.5114, "step": 62590 }, { "epoch": 4.252955564614758, "grad_norm": 0.7360864281654358, "learning_rate": 0.0004683805544231553, "loss": 3.4764, "step": 62595 }, { "epoch": 4.253295284685419, "grad_norm": 0.9645975232124329, "learning_rate": 0.00046833808941432257, "loss": 3.4238, "step": 62600 }, { "epoch": 4.253635004756081, "grad_norm": 0.962471604347229, "learning_rate": 0.0004682956244054899, "loss": 3.1454, "step": 62605 }, { "epoch": 4.253974724826743, "grad_norm": 0.9216309189796448, "learning_rate": 0.0004682531593966572, "loss": 3.4826, "step": 62610 }, { "epoch": 4.254314444897404, "grad_norm": 0.8070670962333679, "learning_rate": 0.0004682106943878244, "loss": 3.4024, "step": 62615 }, { "epoch": 4.254654164968066, "grad_norm": 0.9592640995979309, "learning_rate": 0.00046816822937899175, "loss": 3.5089, "step": 62620 }, { "epoch": 4.254993885038728, "grad_norm": 0.8739386796951294, "learning_rate": 0.000468125764370159, "loss": 3.5299, "step": 62625 }, { "epoch": 4.2553336051093895, "grad_norm": 0.8468846678733826, "learning_rate": 0.00046808329936132625, "loss": 3.7183, "step": 62630 }, { "epoch": 4.255673325180052, "grad_norm": 1.026724100112915, "learning_rate": 0.0004680408343524936, "loss": 3.4499, "step": 62635 }, { "epoch": 4.256013045250714, "grad_norm": 1.1355243921279907, "learning_rate": 0.00046799836934366087, "loss": 3.1432, "step": 62640 }, { "epoch": 4.256352765321375, "grad_norm": 0.8798123598098755, "learning_rate": 0.0004679559043348281, "loss": 3.4388, "step": 62645 }, { "epoch": 4.256692485392037, "grad_norm": 0.8509390354156494, "learning_rate": 0.0004679134393259954, "loss": 3.4567, "step": 62650 }, { "epoch": 4.257032205462699, "grad_norm": 0.8399722576141357, "learning_rate": 0.0004678709743171627, "loss": 3.6202, "step": 62655 }, { "epoch": 4.25737192553336, "grad_norm": 0.7817937731742859, "learning_rate": 0.00046782850930832993, "loss": 3.2841, "step": 62660 }, { "epoch": 4.257711645604022, "grad_norm": 0.723937451839447, "learning_rate": 0.0004677860442994972, "loss": 3.6308, "step": 62665 }, { "epoch": 4.258051365674684, "grad_norm": 0.8056321740150452, "learning_rate": 0.00046774357929066455, "loss": 3.7713, "step": 62670 }, { "epoch": 4.2583910857453455, "grad_norm": 0.863405704498291, "learning_rate": 0.0004677011142818318, "loss": 3.3284, "step": 62675 }, { "epoch": 4.258730805816008, "grad_norm": 1.2852163314819336, "learning_rate": 0.00046765864927299905, "loss": 3.243, "step": 62680 }, { "epoch": 4.25907052588667, "grad_norm": 0.855522096157074, "learning_rate": 0.00046761618426416633, "loss": 3.3942, "step": 62685 }, { "epoch": 4.259410245957331, "grad_norm": 1.0875468254089355, "learning_rate": 0.0004675737192553336, "loss": 3.3366, "step": 62690 }, { "epoch": 4.259749966027993, "grad_norm": 0.8120050430297852, "learning_rate": 0.0004675312542465009, "loss": 3.3214, "step": 62695 }, { "epoch": 4.260089686098655, "grad_norm": 0.7442243099212646, "learning_rate": 0.0004674887892376682, "loss": 3.531, "step": 62700 }, { "epoch": 4.260429406169316, "grad_norm": 1.1789648532867432, "learning_rate": 0.00046744632422883545, "loss": 3.6601, "step": 62705 }, { "epoch": 4.260769126239978, "grad_norm": 0.9964724183082581, "learning_rate": 0.00046740385922000273, "loss": 3.6971, "step": 62710 }, { "epoch": 4.26110884631064, "grad_norm": 0.8174118995666504, "learning_rate": 0.00046736139421117, "loss": 3.6148, "step": 62715 }, { "epoch": 4.2614485663813015, "grad_norm": 1.0132817029953003, "learning_rate": 0.00046731892920233724, "loss": 3.2955, "step": 62720 }, { "epoch": 4.261788286451964, "grad_norm": 1.0247809886932373, "learning_rate": 0.0004672764641935046, "loss": 3.5491, "step": 62725 }, { "epoch": 4.262128006522626, "grad_norm": 1.5601515769958496, "learning_rate": 0.00046723399918467185, "loss": 3.6784, "step": 62730 }, { "epoch": 4.262467726593287, "grad_norm": 0.883522629737854, "learning_rate": 0.0004671915341758391, "loss": 3.5579, "step": 62735 }, { "epoch": 4.262807446663949, "grad_norm": 0.8184067010879517, "learning_rate": 0.0004671490691670064, "loss": 3.453, "step": 62740 }, { "epoch": 4.263147166734611, "grad_norm": 0.9404013752937317, "learning_rate": 0.0004671066041581737, "loss": 3.6703, "step": 62745 }, { "epoch": 4.263486886805272, "grad_norm": 1.2359004020690918, "learning_rate": 0.0004670641391493409, "loss": 3.4121, "step": 62750 }, { "epoch": 4.263826606875934, "grad_norm": 0.992039680480957, "learning_rate": 0.0004670216741405082, "loss": 3.3679, "step": 62755 }, { "epoch": 4.264166326946596, "grad_norm": 0.8321199417114258, "learning_rate": 0.00046697920913167553, "loss": 3.6196, "step": 62760 }, { "epoch": 4.2645060470172576, "grad_norm": 0.8964241147041321, "learning_rate": 0.00046693674412284276, "loss": 3.4838, "step": 62765 }, { "epoch": 4.26484576708792, "grad_norm": 0.7762098908424377, "learning_rate": 0.00046689427911401004, "loss": 3.4609, "step": 62770 }, { "epoch": 4.265185487158582, "grad_norm": 0.849297046661377, "learning_rate": 0.0004668518141051774, "loss": 3.5284, "step": 62775 }, { "epoch": 4.265525207229243, "grad_norm": 1.0686362981796265, "learning_rate": 0.00046680934909634466, "loss": 3.2587, "step": 62780 }, { "epoch": 4.265864927299905, "grad_norm": 1.022510290145874, "learning_rate": 0.0004667668840875119, "loss": 3.4365, "step": 62785 }, { "epoch": 4.266204647370567, "grad_norm": 1.0102448463439941, "learning_rate": 0.00046672441907867916, "loss": 3.4856, "step": 62790 }, { "epoch": 4.266544367441228, "grad_norm": 0.9376881718635559, "learning_rate": 0.0004666819540698465, "loss": 3.4935, "step": 62795 }, { "epoch": 4.26688408751189, "grad_norm": 1.0369386672973633, "learning_rate": 0.0004666394890610137, "loss": 3.4773, "step": 62800 }, { "epoch": 4.267223807582552, "grad_norm": 0.9463576078414917, "learning_rate": 0.000466597024052181, "loss": 3.4082, "step": 62805 }, { "epoch": 4.267563527653214, "grad_norm": 1.0640326738357544, "learning_rate": 0.00046655455904334834, "loss": 3.3137, "step": 62810 }, { "epoch": 4.267903247723876, "grad_norm": 0.9896097779273987, "learning_rate": 0.00046651209403451556, "loss": 3.7832, "step": 62815 }, { "epoch": 4.268242967794538, "grad_norm": 1.0026336908340454, "learning_rate": 0.00046646962902568284, "loss": 3.1124, "step": 62820 }, { "epoch": 4.268582687865199, "grad_norm": 0.9170539975166321, "learning_rate": 0.0004664271640168501, "loss": 3.5988, "step": 62825 }, { "epoch": 4.268922407935861, "grad_norm": 0.8372460007667542, "learning_rate": 0.0004663846990080174, "loss": 3.7902, "step": 62830 }, { "epoch": 4.269262128006522, "grad_norm": 0.8830277919769287, "learning_rate": 0.0004663422339991847, "loss": 3.5153, "step": 62835 }, { "epoch": 4.269601848077184, "grad_norm": 0.7884312868118286, "learning_rate": 0.00046629976899035196, "loss": 3.5536, "step": 62840 }, { "epoch": 4.269941568147846, "grad_norm": 0.9174456596374512, "learning_rate": 0.00046625730398151924, "loss": 3.6974, "step": 62845 }, { "epoch": 4.2702812882185075, "grad_norm": 0.7554647326469421, "learning_rate": 0.0004662148389726865, "loss": 3.5009, "step": 62850 }, { "epoch": 4.27062100828917, "grad_norm": 0.8156687021255493, "learning_rate": 0.0004661723739638538, "loss": 3.6231, "step": 62855 }, { "epoch": 4.270960728359832, "grad_norm": 0.8334752321243286, "learning_rate": 0.00046612990895502103, "loss": 3.4368, "step": 62860 }, { "epoch": 4.271300448430493, "grad_norm": 0.8297866582870483, "learning_rate": 0.00046608744394618836, "loss": 3.6961, "step": 62865 }, { "epoch": 4.271640168501155, "grad_norm": 0.7849826216697693, "learning_rate": 0.00046604497893735564, "loss": 3.4026, "step": 62870 }, { "epoch": 4.271979888571817, "grad_norm": 0.6912524700164795, "learning_rate": 0.00046600251392852287, "loss": 3.5435, "step": 62875 }, { "epoch": 4.272319608642478, "grad_norm": 0.9883927702903748, "learning_rate": 0.0004659600489196902, "loss": 3.1717, "step": 62880 }, { "epoch": 4.27265932871314, "grad_norm": 1.0142556428909302, "learning_rate": 0.0004659175839108575, "loss": 3.4933, "step": 62885 }, { "epoch": 4.272999048783802, "grad_norm": 0.9743466973304749, "learning_rate": 0.0004658751189020247, "loss": 3.6622, "step": 62890 }, { "epoch": 4.2733387688544635, "grad_norm": 1.0876703262329102, "learning_rate": 0.000465832653893192, "loss": 3.4563, "step": 62895 }, { "epoch": 4.273678488925126, "grad_norm": 0.8752255439758301, "learning_rate": 0.0004657901888843593, "loss": 3.2035, "step": 62900 }, { "epoch": 4.274018208995788, "grad_norm": 2.4211843013763428, "learning_rate": 0.00046574772387552655, "loss": 3.4968, "step": 62905 }, { "epoch": 4.274357929066449, "grad_norm": 0.7658143043518066, "learning_rate": 0.00046570525886669383, "loss": 3.3747, "step": 62910 }, { "epoch": 4.274697649137111, "grad_norm": 0.9630666971206665, "learning_rate": 0.00046566279385786116, "loss": 3.3537, "step": 62915 }, { "epoch": 4.275037369207773, "grad_norm": 0.9136813879013062, "learning_rate": 0.0004656203288490284, "loss": 3.6859, "step": 62920 }, { "epoch": 4.275377089278434, "grad_norm": 0.8126678466796875, "learning_rate": 0.00046557786384019567, "loss": 3.5053, "step": 62925 }, { "epoch": 4.275716809349096, "grad_norm": 0.9543166160583496, "learning_rate": 0.000465535398831363, "loss": 3.5323, "step": 62930 }, { "epoch": 4.276056529419758, "grad_norm": 0.8145275712013245, "learning_rate": 0.00046549293382253023, "loss": 3.3303, "step": 62935 }, { "epoch": 4.2763962494904195, "grad_norm": 0.8845632076263428, "learning_rate": 0.0004654504688136975, "loss": 3.5354, "step": 62940 }, { "epoch": 4.276735969561082, "grad_norm": 1.0814285278320312, "learning_rate": 0.0004654080038048648, "loss": 3.3671, "step": 62945 }, { "epoch": 4.277075689631744, "grad_norm": 0.8797786831855774, "learning_rate": 0.0004653655387960321, "loss": 3.4992, "step": 62950 }, { "epoch": 4.277415409702405, "grad_norm": 0.9148866534233093, "learning_rate": 0.00046532307378719935, "loss": 3.5765, "step": 62955 }, { "epoch": 4.277755129773067, "grad_norm": 1.0035520792007446, "learning_rate": 0.00046528060877836663, "loss": 3.3923, "step": 62960 }, { "epoch": 4.278094849843729, "grad_norm": 0.8205432295799255, "learning_rate": 0.00046523814376953396, "loss": 3.6367, "step": 62965 }, { "epoch": 4.27843456991439, "grad_norm": 0.942388117313385, "learning_rate": 0.0004651956787607012, "loss": 3.4002, "step": 62970 }, { "epoch": 4.278774289985052, "grad_norm": 0.8025549054145813, "learning_rate": 0.00046515321375186847, "loss": 3.503, "step": 62975 }, { "epoch": 4.279114010055714, "grad_norm": 0.9052891135215759, "learning_rate": 0.00046511074874303575, "loss": 3.6461, "step": 62980 }, { "epoch": 4.2794537301263755, "grad_norm": 0.7075366973876953, "learning_rate": 0.00046506828373420303, "loss": 3.3896, "step": 62985 }, { "epoch": 4.279793450197038, "grad_norm": 0.7186359167098999, "learning_rate": 0.0004650258187253703, "loss": 3.3471, "step": 62990 }, { "epoch": 4.2801331702677, "grad_norm": 0.8579268455505371, "learning_rate": 0.0004649833537165376, "loss": 3.1928, "step": 62995 }, { "epoch": 4.280472890338361, "grad_norm": 1.119499683380127, "learning_rate": 0.00046494088870770487, "loss": 3.4639, "step": 63000 }, { "epoch": 4.280812610409023, "grad_norm": 0.9440551400184631, "learning_rate": 0.00046489842369887215, "loss": 3.7019, "step": 63005 }, { "epoch": 4.281152330479685, "grad_norm": 1.0752047300338745, "learning_rate": 0.00046485595869003943, "loss": 3.6316, "step": 63010 }, { "epoch": 4.281492050550346, "grad_norm": 0.8332939147949219, "learning_rate": 0.00046481349368120666, "loss": 3.5088, "step": 63015 }, { "epoch": 4.281831770621008, "grad_norm": 1.0204322338104248, "learning_rate": 0.000464771028672374, "loss": 3.6723, "step": 63020 }, { "epoch": 4.28217149069167, "grad_norm": 0.7515679001808167, "learning_rate": 0.00046472856366354127, "loss": 3.5883, "step": 63025 }, { "epoch": 4.2825112107623315, "grad_norm": 1.017456293106079, "learning_rate": 0.0004646860986547085, "loss": 3.5022, "step": 63030 }, { "epoch": 4.282850930832994, "grad_norm": 0.7432921528816223, "learning_rate": 0.00046464363364587583, "loss": 3.6638, "step": 63035 }, { "epoch": 4.283190650903656, "grad_norm": 0.9040016531944275, "learning_rate": 0.0004646011686370431, "loss": 3.5003, "step": 63040 }, { "epoch": 4.283530370974317, "grad_norm": 0.8391094207763672, "learning_rate": 0.00046455870362821034, "loss": 3.5795, "step": 63045 }, { "epoch": 4.283870091044979, "grad_norm": 0.8326451778411865, "learning_rate": 0.0004645162386193776, "loss": 3.4526, "step": 63050 }, { "epoch": 4.284209811115641, "grad_norm": 0.7569173574447632, "learning_rate": 0.00046447377361054495, "loss": 3.6632, "step": 63055 }, { "epoch": 4.284549531186302, "grad_norm": 0.9134597778320312, "learning_rate": 0.0004644313086017122, "loss": 3.6009, "step": 63060 }, { "epoch": 4.284889251256964, "grad_norm": 0.9937013983726501, "learning_rate": 0.00046438884359287946, "loss": 3.667, "step": 63065 }, { "epoch": 4.285228971327626, "grad_norm": 0.8157731294631958, "learning_rate": 0.0004643463785840468, "loss": 3.4999, "step": 63070 }, { "epoch": 4.2855686913982876, "grad_norm": 1.0677434206008911, "learning_rate": 0.000464303913575214, "loss": 3.7102, "step": 63075 }, { "epoch": 4.28590841146895, "grad_norm": 1.0701124668121338, "learning_rate": 0.0004642614485663813, "loss": 3.7098, "step": 63080 }, { "epoch": 4.286248131539612, "grad_norm": 0.8966395258903503, "learning_rate": 0.0004642189835575486, "loss": 3.4293, "step": 63085 }, { "epoch": 4.286587851610273, "grad_norm": 0.8636654019355774, "learning_rate": 0.00046417651854871586, "loss": 3.3805, "step": 63090 }, { "epoch": 4.286927571680935, "grad_norm": 0.9314202070236206, "learning_rate": 0.00046413405353988314, "loss": 2.9326, "step": 63095 }, { "epoch": 4.287267291751597, "grad_norm": 0.7656220197677612, "learning_rate": 0.0004640915885310504, "loss": 3.552, "step": 63100 }, { "epoch": 4.287607011822258, "grad_norm": 0.8057489991188049, "learning_rate": 0.0004640491235222177, "loss": 3.3063, "step": 63105 }, { "epoch": 4.28794673189292, "grad_norm": 1.1799037456512451, "learning_rate": 0.000464006658513385, "loss": 3.486, "step": 63110 }, { "epoch": 4.288286451963582, "grad_norm": 0.7449032068252563, "learning_rate": 0.00046396419350455226, "loss": 3.4772, "step": 63115 }, { "epoch": 4.288626172034244, "grad_norm": 1.057191252708435, "learning_rate": 0.00046392172849571954, "loss": 3.5552, "step": 63120 }, { "epoch": 4.288965892104906, "grad_norm": 0.849864661693573, "learning_rate": 0.0004638792634868868, "loss": 3.8615, "step": 63125 }, { "epoch": 4.289305612175568, "grad_norm": 1.073035478591919, "learning_rate": 0.0004638367984780541, "loss": 3.4355, "step": 63130 }, { "epoch": 4.289645332246229, "grad_norm": 0.9139671325683594, "learning_rate": 0.0004637943334692214, "loss": 3.5101, "step": 63135 }, { "epoch": 4.289985052316891, "grad_norm": 0.9718029499053955, "learning_rate": 0.00046375186846038866, "loss": 3.3728, "step": 63140 }, { "epoch": 4.290324772387553, "grad_norm": 0.7044605016708374, "learning_rate": 0.00046370940345155594, "loss": 3.5521, "step": 63145 }, { "epoch": 4.290664492458214, "grad_norm": 0.9600394368171692, "learning_rate": 0.0004636669384427232, "loss": 3.7049, "step": 63150 }, { "epoch": 4.291004212528876, "grad_norm": 1.0087790489196777, "learning_rate": 0.00046362447343389044, "loss": 3.6528, "step": 63155 }, { "epoch": 4.291343932599538, "grad_norm": 0.8499536514282227, "learning_rate": 0.0004635820084250578, "loss": 3.5781, "step": 63160 }, { "epoch": 4.2916836526702, "grad_norm": 1.048994541168213, "learning_rate": 0.00046353954341622506, "loss": 3.2282, "step": 63165 }, { "epoch": 4.292023372740862, "grad_norm": 1.14274001121521, "learning_rate": 0.0004634970784073923, "loss": 3.4732, "step": 63170 }, { "epoch": 4.292363092811524, "grad_norm": 0.8861408829689026, "learning_rate": 0.0004634546133985596, "loss": 3.4481, "step": 63175 }, { "epoch": 4.292702812882185, "grad_norm": 0.9583660364151001, "learning_rate": 0.0004634121483897269, "loss": 3.4129, "step": 63180 }, { "epoch": 4.293042532952847, "grad_norm": 0.9439297318458557, "learning_rate": 0.0004633696833808941, "loss": 3.501, "step": 63185 }, { "epoch": 4.293382253023509, "grad_norm": 1.1283248662948608, "learning_rate": 0.00046332721837206146, "loss": 3.4876, "step": 63190 }, { "epoch": 4.29372197309417, "grad_norm": 0.702498733997345, "learning_rate": 0.00046328475336322874, "loss": 3.7509, "step": 63195 }, { "epoch": 4.294061693164832, "grad_norm": 1.118389368057251, "learning_rate": 0.00046324228835439596, "loss": 3.285, "step": 63200 }, { "epoch": 4.294401413235494, "grad_norm": 1.0719313621520996, "learning_rate": 0.00046319982334556324, "loss": 3.5133, "step": 63205 }, { "epoch": 4.294741133306156, "grad_norm": 0.9516918063163757, "learning_rate": 0.0004631573583367306, "loss": 3.4966, "step": 63210 }, { "epoch": 4.295080853376818, "grad_norm": 0.9275661110877991, "learning_rate": 0.0004631148933278978, "loss": 3.5664, "step": 63215 }, { "epoch": 4.29542057344748, "grad_norm": 0.935204267501831, "learning_rate": 0.0004630724283190651, "loss": 3.6038, "step": 63220 }, { "epoch": 4.295760293518141, "grad_norm": 1.0143736600875854, "learning_rate": 0.0004630299633102324, "loss": 3.4991, "step": 63225 }, { "epoch": 4.296100013588803, "grad_norm": 0.8882645964622498, "learning_rate": 0.00046298749830139965, "loss": 3.5197, "step": 63230 }, { "epoch": 4.296439733659464, "grad_norm": 1.0124783515930176, "learning_rate": 0.0004629450332925669, "loss": 3.3145, "step": 63235 }, { "epoch": 4.296779453730126, "grad_norm": 1.1305333375930786, "learning_rate": 0.0004629025682837342, "loss": 3.3406, "step": 63240 }, { "epoch": 4.297119173800788, "grad_norm": 0.7439259886741638, "learning_rate": 0.0004628601032749015, "loss": 3.5638, "step": 63245 }, { "epoch": 4.2974588938714495, "grad_norm": 0.757042407989502, "learning_rate": 0.00046281763826606877, "loss": 3.5936, "step": 63250 }, { "epoch": 4.297798613942112, "grad_norm": 0.9157543182373047, "learning_rate": 0.00046277517325723605, "loss": 3.4952, "step": 63255 }, { "epoch": 4.298138334012774, "grad_norm": 1.0470733642578125, "learning_rate": 0.0004627327082484033, "loss": 3.4363, "step": 63260 }, { "epoch": 4.298478054083435, "grad_norm": 1.0580462217330933, "learning_rate": 0.0004626902432395706, "loss": 3.7582, "step": 63265 }, { "epoch": 4.298817774154097, "grad_norm": 1.00447678565979, "learning_rate": 0.0004626477782307379, "loss": 3.3235, "step": 63270 }, { "epoch": 4.299157494224759, "grad_norm": 0.932273268699646, "learning_rate": 0.0004626053132219051, "loss": 3.6634, "step": 63275 }, { "epoch": 4.29949721429542, "grad_norm": 0.8090956807136536, "learning_rate": 0.00046256284821307245, "loss": 3.5485, "step": 63280 }, { "epoch": 4.299836934366082, "grad_norm": 1.0920292139053345, "learning_rate": 0.0004625203832042397, "loss": 3.514, "step": 63285 }, { "epoch": 4.300176654436744, "grad_norm": 0.7680724859237671, "learning_rate": 0.000462477918195407, "loss": 3.6105, "step": 63290 }, { "epoch": 4.3005163745074055, "grad_norm": 0.8498972654342651, "learning_rate": 0.0004624354531865743, "loss": 3.6357, "step": 63295 }, { "epoch": 4.300856094578068, "grad_norm": 1.0505073070526123, "learning_rate": 0.00046239298817774157, "loss": 3.3726, "step": 63300 }, { "epoch": 4.30119581464873, "grad_norm": 0.9010647535324097, "learning_rate": 0.00046235052316890885, "loss": 3.6079, "step": 63305 }, { "epoch": 4.301535534719391, "grad_norm": 1.0109111070632935, "learning_rate": 0.00046230805816007607, "loss": 3.208, "step": 63310 }, { "epoch": 4.301875254790053, "grad_norm": 0.9397577047348022, "learning_rate": 0.0004622655931512434, "loss": 3.4915, "step": 63315 }, { "epoch": 4.302214974860715, "grad_norm": 0.8470300436019897, "learning_rate": 0.0004622231281424107, "loss": 3.6991, "step": 63320 }, { "epoch": 4.302554694931376, "grad_norm": 0.7456369996070862, "learning_rate": 0.0004621806631335779, "loss": 3.4182, "step": 63325 }, { "epoch": 4.302894415002038, "grad_norm": 0.9127781987190247, "learning_rate": 0.00046213819812474525, "loss": 3.5517, "step": 63330 }, { "epoch": 4.3032341350727, "grad_norm": 0.9428461790084839, "learning_rate": 0.0004620957331159125, "loss": 3.3641, "step": 63335 }, { "epoch": 4.3035738551433615, "grad_norm": 1.1578031778335571, "learning_rate": 0.00046205326810707975, "loss": 3.3254, "step": 63340 }, { "epoch": 4.303913575214024, "grad_norm": 0.7605162262916565, "learning_rate": 0.00046201080309824703, "loss": 3.1745, "step": 63345 }, { "epoch": 4.304253295284686, "grad_norm": 0.8030891418457031, "learning_rate": 0.00046196833808941437, "loss": 3.4805, "step": 63350 }, { "epoch": 4.304593015355347, "grad_norm": 0.7844246625900269, "learning_rate": 0.0004619258730805816, "loss": 3.3293, "step": 63355 }, { "epoch": 4.304932735426009, "grad_norm": 0.9471333026885986, "learning_rate": 0.0004618834080717489, "loss": 3.8534, "step": 63360 }, { "epoch": 4.305272455496671, "grad_norm": 1.0193318128585815, "learning_rate": 0.0004618409430629162, "loss": 3.6415, "step": 63365 }, { "epoch": 4.305612175567332, "grad_norm": 0.9798857569694519, "learning_rate": 0.00046179847805408343, "loss": 3.5176, "step": 63370 }, { "epoch": 4.305951895637994, "grad_norm": 0.8460768461227417, "learning_rate": 0.0004617560130452507, "loss": 3.5642, "step": 63375 }, { "epoch": 4.306291615708656, "grad_norm": 0.8960776329040527, "learning_rate": 0.000461713548036418, "loss": 3.223, "step": 63380 }, { "epoch": 4.306631335779318, "grad_norm": 0.8703097105026245, "learning_rate": 0.0004616710830275853, "loss": 3.3396, "step": 63385 }, { "epoch": 4.30697105584998, "grad_norm": 0.9503531455993652, "learning_rate": 0.00046162861801875255, "loss": 3.5174, "step": 63390 }, { "epoch": 4.307310775920642, "grad_norm": 0.8498976230621338, "learning_rate": 0.00046158615300991983, "loss": 3.4601, "step": 63395 }, { "epoch": 4.307650495991303, "grad_norm": 0.8161876201629639, "learning_rate": 0.0004615436880010871, "loss": 3.5758, "step": 63400 }, { "epoch": 4.307990216061965, "grad_norm": 0.7444183826446533, "learning_rate": 0.0004615012229922544, "loss": 3.283, "step": 63405 }, { "epoch": 4.308329936132627, "grad_norm": 0.7839939594268799, "learning_rate": 0.0004614587579834217, "loss": 3.514, "step": 63410 }, { "epoch": 4.308669656203288, "grad_norm": 0.7391408085823059, "learning_rate": 0.0004614162929745889, "loss": 3.6163, "step": 63415 }, { "epoch": 4.30900937627395, "grad_norm": 0.9623317718505859, "learning_rate": 0.00046137382796575623, "loss": 3.5905, "step": 63420 }, { "epoch": 4.309349096344612, "grad_norm": 0.785484790802002, "learning_rate": 0.0004613313629569235, "loss": 3.5826, "step": 63425 }, { "epoch": 4.309688816415274, "grad_norm": 1.0002474784851074, "learning_rate": 0.00046128889794809074, "loss": 3.6054, "step": 63430 }, { "epoch": 4.310028536485936, "grad_norm": 0.732146680355072, "learning_rate": 0.0004612464329392581, "loss": 3.5689, "step": 63435 }, { "epoch": 4.310368256556598, "grad_norm": 1.2116374969482422, "learning_rate": 0.00046120396793042535, "loss": 3.4533, "step": 63440 }, { "epoch": 4.310707976627259, "grad_norm": 0.9260076284408569, "learning_rate": 0.0004611615029215926, "loss": 3.3293, "step": 63445 }, { "epoch": 4.311047696697921, "grad_norm": 0.895195484161377, "learning_rate": 0.00046111903791275986, "loss": 3.5937, "step": 63450 }, { "epoch": 4.311387416768583, "grad_norm": 0.7954394817352295, "learning_rate": 0.0004610765729039272, "loss": 3.7715, "step": 63455 }, { "epoch": 4.311727136839244, "grad_norm": 1.0760308504104614, "learning_rate": 0.0004610341078950945, "loss": 3.6492, "step": 63460 }, { "epoch": 4.312066856909906, "grad_norm": 0.8424020409584045, "learning_rate": 0.0004609916428862617, "loss": 3.3422, "step": 63465 }, { "epoch": 4.312406576980568, "grad_norm": 0.893039345741272, "learning_rate": 0.00046094917787742903, "loss": 3.5619, "step": 63470 }, { "epoch": 4.31274629705123, "grad_norm": 1.2063446044921875, "learning_rate": 0.0004609067128685963, "loss": 3.691, "step": 63475 }, { "epoch": 4.313086017121892, "grad_norm": 0.9133750200271606, "learning_rate": 0.00046086424785976354, "loss": 3.5499, "step": 63480 }, { "epoch": 4.313425737192554, "grad_norm": 6.863533020019531, "learning_rate": 0.0004608217828509309, "loss": 3.459, "step": 63485 }, { "epoch": 4.313765457263215, "grad_norm": 0.8867606520652771, "learning_rate": 0.00046077931784209815, "loss": 3.561, "step": 63490 }, { "epoch": 4.314105177333877, "grad_norm": 0.850654125213623, "learning_rate": 0.0004607368528332654, "loss": 3.4441, "step": 63495 }, { "epoch": 4.314444897404539, "grad_norm": 0.7537965178489685, "learning_rate": 0.00046069438782443266, "loss": 3.6557, "step": 63500 }, { "epoch": 4.3147846174752, "grad_norm": 0.8483400940895081, "learning_rate": 0.0004606519228156, "loss": 3.5814, "step": 63505 }, { "epoch": 4.315124337545862, "grad_norm": 0.8274759650230408, "learning_rate": 0.0004606094578067672, "loss": 3.5797, "step": 63510 }, { "epoch": 4.3154640576165235, "grad_norm": 1.2165584564208984, "learning_rate": 0.0004605669927979345, "loss": 3.751, "step": 63515 }, { "epoch": 4.315803777687186, "grad_norm": 0.9291759133338928, "learning_rate": 0.00046052452778910184, "loss": 3.3624, "step": 63520 }, { "epoch": 4.316143497757848, "grad_norm": 0.7642979025840759, "learning_rate": 0.00046048206278026906, "loss": 3.527, "step": 63525 }, { "epoch": 4.316483217828509, "grad_norm": 0.8614895343780518, "learning_rate": 0.00046043959777143634, "loss": 3.264, "step": 63530 }, { "epoch": 4.316822937899171, "grad_norm": 0.9371413588523865, "learning_rate": 0.0004603971327626036, "loss": 3.5584, "step": 63535 }, { "epoch": 4.317162657969833, "grad_norm": 0.7602739334106445, "learning_rate": 0.0004603546677537709, "loss": 3.4627, "step": 63540 }, { "epoch": 4.317502378040494, "grad_norm": 0.8997742533683777, "learning_rate": 0.0004603122027449382, "loss": 3.5373, "step": 63545 }, { "epoch": 4.317842098111156, "grad_norm": 0.8409783840179443, "learning_rate": 0.00046026973773610546, "loss": 3.4716, "step": 63550 }, { "epoch": 4.318181818181818, "grad_norm": 0.8912867307662964, "learning_rate": 0.00046022727272727274, "loss": 3.6941, "step": 63555 }, { "epoch": 4.3185215382524795, "grad_norm": 0.8730382323265076, "learning_rate": 0.00046018480771844, "loss": 3.5182, "step": 63560 }, { "epoch": 4.318861258323142, "grad_norm": 1.0314522981643677, "learning_rate": 0.0004601423427096073, "loss": 3.5165, "step": 63565 }, { "epoch": 4.319200978393804, "grad_norm": 0.9358851909637451, "learning_rate": 0.00046009987770077453, "loss": 3.5095, "step": 63570 }, { "epoch": 4.319540698464465, "grad_norm": 0.888331949710846, "learning_rate": 0.00046005741269194186, "loss": 3.3407, "step": 63575 }, { "epoch": 4.319880418535127, "grad_norm": 0.7379451990127563, "learning_rate": 0.00046001494768310914, "loss": 3.675, "step": 63580 }, { "epoch": 4.320220138605789, "grad_norm": 0.7439711689949036, "learning_rate": 0.00045997248267427637, "loss": 3.6333, "step": 63585 }, { "epoch": 4.32055985867645, "grad_norm": 1.0048997402191162, "learning_rate": 0.0004599300176654437, "loss": 3.3139, "step": 63590 }, { "epoch": 4.320899578747112, "grad_norm": 1.190597653388977, "learning_rate": 0.000459887552656611, "loss": 3.2786, "step": 63595 }, { "epoch": 4.321239298817774, "grad_norm": 0.9907605051994324, "learning_rate": 0.0004598450876477782, "loss": 3.4597, "step": 63600 }, { "epoch": 4.3215790188884355, "grad_norm": 0.8446999788284302, "learning_rate": 0.0004598026226389455, "loss": 3.5349, "step": 63605 }, { "epoch": 4.321918738959098, "grad_norm": 1.0107758045196533, "learning_rate": 0.0004597601576301128, "loss": 3.4014, "step": 63610 }, { "epoch": 4.32225845902976, "grad_norm": 0.9221784472465515, "learning_rate": 0.00045971769262128005, "loss": 3.5041, "step": 63615 }, { "epoch": 4.322598179100421, "grad_norm": 0.8770763874053955, "learning_rate": 0.00045967522761244733, "loss": 3.4542, "step": 63620 }, { "epoch": 4.322937899171083, "grad_norm": 0.8623422384262085, "learning_rate": 0.00045963276260361466, "loss": 3.4452, "step": 63625 }, { "epoch": 4.323277619241745, "grad_norm": 1.0108615159988403, "learning_rate": 0.00045959029759478194, "loss": 3.6085, "step": 63630 }, { "epoch": 4.323617339312406, "grad_norm": 0.9481383562088013, "learning_rate": 0.00045954783258594917, "loss": 3.2311, "step": 63635 }, { "epoch": 4.323957059383068, "grad_norm": 0.7505761981010437, "learning_rate": 0.00045950536757711645, "loss": 3.5852, "step": 63640 }, { "epoch": 4.32429677945373, "grad_norm": 0.9163798093795776, "learning_rate": 0.0004594629025682838, "loss": 3.5715, "step": 63645 }, { "epoch": 4.3246364995243916, "grad_norm": 0.7255598306655884, "learning_rate": 0.000459420437559451, "loss": 3.5082, "step": 63650 }, { "epoch": 4.324976219595054, "grad_norm": 0.8730499744415283, "learning_rate": 0.0004593779725506183, "loss": 3.3951, "step": 63655 }, { "epoch": 4.325315939665716, "grad_norm": 0.8823094964027405, "learning_rate": 0.0004593355075417856, "loss": 3.2884, "step": 63660 }, { "epoch": 4.325655659736377, "grad_norm": 1.156896710395813, "learning_rate": 0.00045929304253295285, "loss": 3.7539, "step": 63665 }, { "epoch": 4.325995379807039, "grad_norm": 0.8146911859512329, "learning_rate": 0.00045925057752412013, "loss": 3.4958, "step": 63670 }, { "epoch": 4.326335099877701, "grad_norm": 0.7865837216377258, "learning_rate": 0.0004592081125152874, "loss": 3.5582, "step": 63675 }, { "epoch": 4.326674819948362, "grad_norm": 0.9667829871177673, "learning_rate": 0.0004591656475064547, "loss": 3.8062, "step": 63680 }, { "epoch": 4.327014540019024, "grad_norm": 0.726967453956604, "learning_rate": 0.00045912318249762197, "loss": 3.5864, "step": 63685 }, { "epoch": 4.327354260089686, "grad_norm": 0.8275024890899658, "learning_rate": 0.00045908071748878925, "loss": 3.6488, "step": 63690 }, { "epoch": 4.327693980160348, "grad_norm": 0.9242106080055237, "learning_rate": 0.00045903825247995653, "loss": 3.5204, "step": 63695 }, { "epoch": 4.32803370023101, "grad_norm": 0.8766757249832153, "learning_rate": 0.0004589957874711238, "loss": 3.2796, "step": 63700 }, { "epoch": 4.328373420301672, "grad_norm": 0.8839744329452515, "learning_rate": 0.0004589533224622911, "loss": 3.3753, "step": 63705 }, { "epoch": 4.328713140372333, "grad_norm": 0.7990246415138245, "learning_rate": 0.0004589108574534583, "loss": 3.5364, "step": 63710 }, { "epoch": 4.329052860442995, "grad_norm": 0.8658140301704407, "learning_rate": 0.00045886839244462565, "loss": 3.1989, "step": 63715 }, { "epoch": 4.329392580513657, "grad_norm": 0.7308144569396973, "learning_rate": 0.00045882592743579293, "loss": 3.3909, "step": 63720 }, { "epoch": 4.329732300584318, "grad_norm": 0.7782678604125977, "learning_rate": 0.00045878346242696016, "loss": 3.2512, "step": 63725 }, { "epoch": 4.33007202065498, "grad_norm": 0.874247670173645, "learning_rate": 0.0004587409974181275, "loss": 3.3537, "step": 63730 }, { "epoch": 4.330411740725642, "grad_norm": 0.975520670413971, "learning_rate": 0.00045869853240929477, "loss": 3.1277, "step": 63735 }, { "epoch": 4.330751460796304, "grad_norm": 0.8464484810829163, "learning_rate": 0.000458656067400462, "loss": 3.3297, "step": 63740 }, { "epoch": 4.331091180866966, "grad_norm": 0.9867607355117798, "learning_rate": 0.0004586136023916293, "loss": 3.1022, "step": 63745 }, { "epoch": 4.331430900937628, "grad_norm": 0.8536937832832336, "learning_rate": 0.0004585711373827966, "loss": 3.6501, "step": 63750 }, { "epoch": 4.331770621008289, "grad_norm": 0.9176635146141052, "learning_rate": 0.00045852867237396384, "loss": 3.3217, "step": 63755 }, { "epoch": 4.332110341078951, "grad_norm": 0.8491596579551697, "learning_rate": 0.0004584862073651311, "loss": 3.3535, "step": 63760 }, { "epoch": 4.332450061149613, "grad_norm": 1.1154425144195557, "learning_rate": 0.00045844374235629845, "loss": 3.2336, "step": 63765 }, { "epoch": 4.332789781220274, "grad_norm": 0.7484056949615479, "learning_rate": 0.0004584012773474657, "loss": 3.4865, "step": 63770 }, { "epoch": 4.333129501290936, "grad_norm": 1.037522315979004, "learning_rate": 0.00045835881233863296, "loss": 3.3765, "step": 63775 }, { "epoch": 4.333469221361598, "grad_norm": 0.8413194417953491, "learning_rate": 0.0004583163473298003, "loss": 3.2379, "step": 63780 }, { "epoch": 4.33380894143226, "grad_norm": 1.2168850898742676, "learning_rate": 0.0004582738823209675, "loss": 3.9675, "step": 63785 }, { "epoch": 4.334148661502922, "grad_norm": 0.746817946434021, "learning_rate": 0.0004582314173121348, "loss": 3.4105, "step": 63790 }, { "epoch": 4.334488381573584, "grad_norm": 1.2545156478881836, "learning_rate": 0.0004581889523033021, "loss": 3.4533, "step": 63795 }, { "epoch": 4.334828101644245, "grad_norm": 0.9837030172348022, "learning_rate": 0.0004581464872944694, "loss": 3.1944, "step": 63800 }, { "epoch": 4.335167821714907, "grad_norm": 0.6560445427894592, "learning_rate": 0.00045810402228563664, "loss": 3.5484, "step": 63805 }, { "epoch": 4.335507541785569, "grad_norm": 1.5255625247955322, "learning_rate": 0.0004580615572768039, "loss": 3.116, "step": 63810 }, { "epoch": 4.33584726185623, "grad_norm": 0.8826807141304016, "learning_rate": 0.00045801909226797125, "loss": 3.7089, "step": 63815 }, { "epoch": 4.336186981926892, "grad_norm": 0.9641222953796387, "learning_rate": 0.0004579766272591385, "loss": 3.5097, "step": 63820 }, { "epoch": 4.336526701997554, "grad_norm": 0.884658932685852, "learning_rate": 0.00045793416225030576, "loss": 3.7325, "step": 63825 }, { "epoch": 4.336866422068216, "grad_norm": 0.8462684750556946, "learning_rate": 0.00045789169724147304, "loss": 3.622, "step": 63830 }, { "epoch": 4.337206142138878, "grad_norm": 0.8645135164260864, "learning_rate": 0.0004578492322326403, "loss": 3.359, "step": 63835 }, { "epoch": 4.33754586220954, "grad_norm": 0.7918854355812073, "learning_rate": 0.0004578067672238076, "loss": 3.483, "step": 63840 }, { "epoch": 4.337885582280201, "grad_norm": 0.9172031283378601, "learning_rate": 0.0004577643022149749, "loss": 3.5068, "step": 63845 }, { "epoch": 4.338225302350863, "grad_norm": 1.0127224922180176, "learning_rate": 0.00045772183720614216, "loss": 3.0608, "step": 63850 }, { "epoch": 4.338565022421525, "grad_norm": 0.6827316284179688, "learning_rate": 0.00045767937219730944, "loss": 3.4395, "step": 63855 }, { "epoch": 4.338904742492186, "grad_norm": 0.8347532749176025, "learning_rate": 0.0004576369071884767, "loss": 3.4962, "step": 63860 }, { "epoch": 4.339244462562848, "grad_norm": 1.13916015625, "learning_rate": 0.00045759444217964394, "loss": 3.7297, "step": 63865 }, { "epoch": 4.33958418263351, "grad_norm": 0.9371297359466553, "learning_rate": 0.0004575519771708113, "loss": 3.5323, "step": 63870 }, { "epoch": 4.339923902704172, "grad_norm": 0.7954642176628113, "learning_rate": 0.00045750951216197856, "loss": 3.4789, "step": 63875 }, { "epoch": 4.340263622774834, "grad_norm": 0.84361732006073, "learning_rate": 0.0004574670471531458, "loss": 3.7971, "step": 63880 }, { "epoch": 4.340603342845496, "grad_norm": 1.1230800151824951, "learning_rate": 0.0004574245821443131, "loss": 3.6912, "step": 63885 }, { "epoch": 4.340943062916157, "grad_norm": 0.7453582286834717, "learning_rate": 0.0004573821171354804, "loss": 3.5036, "step": 63890 }, { "epoch": 4.341282782986819, "grad_norm": 0.689608097076416, "learning_rate": 0.0004573396521266476, "loss": 3.3715, "step": 63895 }, { "epoch": 4.341622503057481, "grad_norm": 0.9573660492897034, "learning_rate": 0.0004572971871178149, "loss": 3.3909, "step": 63900 }, { "epoch": 4.341962223128142, "grad_norm": 0.8617348074913025, "learning_rate": 0.00045725472210898224, "loss": 3.3257, "step": 63905 }, { "epoch": 4.342301943198804, "grad_norm": 1.2585164308547974, "learning_rate": 0.00045721225710014946, "loss": 3.3754, "step": 63910 }, { "epoch": 4.3426416632694655, "grad_norm": 0.8391521573066711, "learning_rate": 0.00045716979209131674, "loss": 3.5223, "step": 63915 }, { "epoch": 4.342981383340128, "grad_norm": 0.935937762260437, "learning_rate": 0.0004571273270824841, "loss": 3.5075, "step": 63920 }, { "epoch": 4.34332110341079, "grad_norm": 0.7743483185768127, "learning_rate": 0.0004570848620736513, "loss": 3.8233, "step": 63925 }, { "epoch": 4.343660823481451, "grad_norm": 0.9026889801025391, "learning_rate": 0.0004570423970648186, "loss": 3.72, "step": 63930 }, { "epoch": 4.344000543552113, "grad_norm": 0.8418740034103394, "learning_rate": 0.00045699993205598587, "loss": 3.6296, "step": 63935 }, { "epoch": 4.344340263622775, "grad_norm": 0.9789199829101562, "learning_rate": 0.00045695746704715315, "loss": 3.5162, "step": 63940 }, { "epoch": 4.344679983693436, "grad_norm": 1.1322083473205566, "learning_rate": 0.0004569150020383204, "loss": 3.3734, "step": 63945 }, { "epoch": 4.345019703764098, "grad_norm": 0.7849491834640503, "learning_rate": 0.0004568725370294877, "loss": 3.4661, "step": 63950 }, { "epoch": 4.34535942383476, "grad_norm": 0.8749169111251831, "learning_rate": 0.000456830072020655, "loss": 3.3094, "step": 63955 }, { "epoch": 4.345699143905422, "grad_norm": 0.8175095319747925, "learning_rate": 0.00045678760701182227, "loss": 3.3418, "step": 63960 }, { "epoch": 4.346038863976084, "grad_norm": 1.2671945095062256, "learning_rate": 0.00045674514200298955, "loss": 3.4372, "step": 63965 }, { "epoch": 4.346378584046746, "grad_norm": 0.8950656652450562, "learning_rate": 0.0004567026769941568, "loss": 3.0568, "step": 63970 }, { "epoch": 4.346718304117407, "grad_norm": 0.8960034847259521, "learning_rate": 0.0004566602119853241, "loss": 3.6081, "step": 63975 }, { "epoch": 4.347058024188069, "grad_norm": 1.1570065021514893, "learning_rate": 0.0004566177469764914, "loss": 3.3209, "step": 63980 }, { "epoch": 4.347397744258731, "grad_norm": 0.7255001068115234, "learning_rate": 0.00045657528196765867, "loss": 3.5867, "step": 63985 }, { "epoch": 4.347737464329392, "grad_norm": 1.0038740634918213, "learning_rate": 0.00045653281695882595, "loss": 3.521, "step": 63990 }, { "epoch": 4.348077184400054, "grad_norm": 1.0589931011199951, "learning_rate": 0.0004564903519499932, "loss": 3.6237, "step": 63995 }, { "epoch": 4.348416904470716, "grad_norm": 0.7493960857391357, "learning_rate": 0.0004564478869411605, "loss": 3.2497, "step": 64000 }, { "epoch": 4.348756624541378, "grad_norm": 1.1200355291366577, "learning_rate": 0.00045640542193232773, "loss": 3.4345, "step": 64005 }, { "epoch": 4.34909634461204, "grad_norm": 1.0469470024108887, "learning_rate": 0.00045636295692349507, "loss": 3.6444, "step": 64010 }, { "epoch": 4.349436064682702, "grad_norm": 1.3059152364730835, "learning_rate": 0.00045632049191466235, "loss": 3.2828, "step": 64015 }, { "epoch": 4.349775784753363, "grad_norm": 0.796098530292511, "learning_rate": 0.00045627802690582957, "loss": 3.6809, "step": 64020 }, { "epoch": 4.350115504824025, "grad_norm": 1.08735990524292, "learning_rate": 0.0004562355618969969, "loss": 3.3172, "step": 64025 }, { "epoch": 4.350455224894687, "grad_norm": 1.0638996362686157, "learning_rate": 0.0004561930968881642, "loss": 3.7254, "step": 64030 }, { "epoch": 4.350794944965348, "grad_norm": 0.816719114780426, "learning_rate": 0.0004561506318793314, "loss": 3.2767, "step": 64035 }, { "epoch": 4.35113466503601, "grad_norm": 0.975882887840271, "learning_rate": 0.00045610816687049875, "loss": 3.7186, "step": 64040 }, { "epoch": 4.351474385106672, "grad_norm": 0.8562283515930176, "learning_rate": 0.000456065701861666, "loss": 3.4719, "step": 64045 }, { "epoch": 4.351814105177334, "grad_norm": 0.8062695860862732, "learning_rate": 0.00045602323685283325, "loss": 3.1774, "step": 64050 }, { "epoch": 4.352153825247996, "grad_norm": 0.8286985754966736, "learning_rate": 0.00045598077184400053, "loss": 3.303, "step": 64055 }, { "epoch": 4.352493545318658, "grad_norm": 1.1181873083114624, "learning_rate": 0.00045593830683516787, "loss": 3.4947, "step": 64060 }, { "epoch": 4.352833265389319, "grad_norm": 0.8421272039413452, "learning_rate": 0.0004558958418263351, "loss": 3.6738, "step": 64065 }, { "epoch": 4.353172985459981, "grad_norm": 0.8119519948959351, "learning_rate": 0.00045585337681750237, "loss": 3.4687, "step": 64070 }, { "epoch": 4.353512705530643, "grad_norm": 0.9335781335830688, "learning_rate": 0.0004558109118086697, "loss": 3.6479, "step": 64075 }, { "epoch": 4.353852425601304, "grad_norm": 0.7960092425346375, "learning_rate": 0.00045576844679983693, "loss": 3.4977, "step": 64080 }, { "epoch": 4.354192145671966, "grad_norm": 1.1534667015075684, "learning_rate": 0.0004557259817910042, "loss": 3.3004, "step": 64085 }, { "epoch": 4.354531865742628, "grad_norm": 0.805350124835968, "learning_rate": 0.0004556835167821715, "loss": 3.3429, "step": 64090 }, { "epoch": 4.35487158581329, "grad_norm": 2.7468109130859375, "learning_rate": 0.0004556410517733388, "loss": 3.6474, "step": 64095 }, { "epoch": 4.355211305883952, "grad_norm": 0.8282303214073181, "learning_rate": 0.00045559858676450605, "loss": 3.4075, "step": 64100 }, { "epoch": 4.355551025954614, "grad_norm": 0.8197724223136902, "learning_rate": 0.00045555612175567333, "loss": 3.5509, "step": 64105 }, { "epoch": 4.355890746025275, "grad_norm": 0.8302217125892639, "learning_rate": 0.0004555136567468406, "loss": 3.4633, "step": 64110 }, { "epoch": 4.356230466095937, "grad_norm": 0.7723009586334229, "learning_rate": 0.0004554711917380079, "loss": 3.2346, "step": 64115 }, { "epoch": 4.356570186166599, "grad_norm": 0.9268537163734436, "learning_rate": 0.0004554287267291752, "loss": 3.8154, "step": 64120 }, { "epoch": 4.35690990623726, "grad_norm": 0.9327241778373718, "learning_rate": 0.0004553862617203424, "loss": 3.5625, "step": 64125 }, { "epoch": 4.357249626307922, "grad_norm": 0.8181995153427124, "learning_rate": 0.00045534379671150973, "loss": 3.4468, "step": 64130 }, { "epoch": 4.357589346378584, "grad_norm": 0.884998619556427, "learning_rate": 0.000455301331702677, "loss": 3.4595, "step": 64135 }, { "epoch": 4.357929066449246, "grad_norm": 0.8104830980300903, "learning_rate": 0.0004552588666938443, "loss": 3.4045, "step": 64140 }, { "epoch": 4.358268786519908, "grad_norm": 1.0157188177108765, "learning_rate": 0.0004552164016850116, "loss": 3.6781, "step": 64145 }, { "epoch": 4.35860850659057, "grad_norm": 0.808129608631134, "learning_rate": 0.00045517393667617885, "loss": 3.2705, "step": 64150 }, { "epoch": 4.358948226661231, "grad_norm": 0.790598452091217, "learning_rate": 0.00045513147166734613, "loss": 3.638, "step": 64155 }, { "epoch": 4.359287946731893, "grad_norm": 1.295979619026184, "learning_rate": 0.00045508900665851336, "loss": 3.4877, "step": 64160 }, { "epoch": 4.359627666802555, "grad_norm": 0.9578388929367065, "learning_rate": 0.0004550465416496807, "loss": 3.3999, "step": 64165 }, { "epoch": 4.359967386873216, "grad_norm": 1.0575147867202759, "learning_rate": 0.000455004076640848, "loss": 3.4249, "step": 64170 }, { "epoch": 4.360307106943878, "grad_norm": 0.8213820457458496, "learning_rate": 0.0004549616116320152, "loss": 3.5628, "step": 64175 }, { "epoch": 4.36064682701454, "grad_norm": 0.8148659467697144, "learning_rate": 0.00045491914662318253, "loss": 3.3695, "step": 64180 }, { "epoch": 4.360986547085202, "grad_norm": 0.8647225499153137, "learning_rate": 0.0004548766816143498, "loss": 3.707, "step": 64185 }, { "epoch": 4.361326267155864, "grad_norm": 0.8026604056358337, "learning_rate": 0.00045483421660551704, "loss": 3.5337, "step": 64190 }, { "epoch": 4.361665987226525, "grad_norm": 0.9234213829040527, "learning_rate": 0.0004547917515966843, "loss": 3.4892, "step": 64195 }, { "epoch": 4.362005707297187, "grad_norm": 0.9592143297195435, "learning_rate": 0.00045474928658785165, "loss": 3.3486, "step": 64200 }, { "epoch": 4.362345427367849, "grad_norm": 1.0063343048095703, "learning_rate": 0.0004547068215790189, "loss": 3.5446, "step": 64205 }, { "epoch": 4.36268514743851, "grad_norm": 1.1948484182357788, "learning_rate": 0.00045466435657018616, "loss": 3.3303, "step": 64210 }, { "epoch": 4.363024867509172, "grad_norm": 0.8311105370521545, "learning_rate": 0.0004546218915613535, "loss": 3.5713, "step": 64215 }, { "epoch": 4.363364587579834, "grad_norm": 0.8245605826377869, "learning_rate": 0.0004545794265525207, "loss": 3.5606, "step": 64220 }, { "epoch": 4.3637043076504956, "grad_norm": 0.709775984287262, "learning_rate": 0.000454536961543688, "loss": 3.4571, "step": 64225 }, { "epoch": 4.364044027721158, "grad_norm": 0.8208695650100708, "learning_rate": 0.0004544944965348553, "loss": 3.5937, "step": 64230 }, { "epoch": 4.36438374779182, "grad_norm": 1.0970091819763184, "learning_rate": 0.00045445203152602256, "loss": 3.4654, "step": 64235 }, { "epoch": 4.364723467862481, "grad_norm": 0.9484691619873047, "learning_rate": 0.00045440956651718984, "loss": 3.568, "step": 64240 }, { "epoch": 4.365063187933143, "grad_norm": 0.9631114602088928, "learning_rate": 0.0004543671015083571, "loss": 3.6399, "step": 64245 }, { "epoch": 4.365402908003805, "grad_norm": 1.0244508981704712, "learning_rate": 0.0004543246364995244, "loss": 3.4196, "step": 64250 }, { "epoch": 4.365742628074466, "grad_norm": 0.8785644769668579, "learning_rate": 0.0004542821714906917, "loss": 3.2782, "step": 64255 }, { "epoch": 4.366082348145128, "grad_norm": 1.381556510925293, "learning_rate": 0.00045423970648185896, "loss": 3.3896, "step": 64260 }, { "epoch": 4.36642206821579, "grad_norm": 0.8162555694580078, "learning_rate": 0.0004541972414730262, "loss": 3.1273, "step": 64265 }, { "epoch": 4.366761788286452, "grad_norm": 0.760924756526947, "learning_rate": 0.0004541547764641935, "loss": 3.261, "step": 64270 }, { "epoch": 4.367101508357114, "grad_norm": 0.8858211040496826, "learning_rate": 0.0004541123114553608, "loss": 3.6744, "step": 64275 }, { "epoch": 4.367441228427776, "grad_norm": 1.0548710823059082, "learning_rate": 0.00045406984644652803, "loss": 3.4382, "step": 64280 }, { "epoch": 4.367780948498437, "grad_norm": 1.025705099105835, "learning_rate": 0.00045402738143769536, "loss": 3.5001, "step": 64285 }, { "epoch": 4.368120668569099, "grad_norm": 1.5040837526321411, "learning_rate": 0.00045398491642886264, "loss": 3.403, "step": 64290 }, { "epoch": 4.368460388639761, "grad_norm": 0.8470531105995178, "learning_rate": 0.00045394245142002987, "loss": 3.475, "step": 64295 }, { "epoch": 4.368800108710422, "grad_norm": 0.9951391816139221, "learning_rate": 0.00045389998641119715, "loss": 3.6035, "step": 64300 }, { "epoch": 4.369139828781084, "grad_norm": 0.9323769807815552, "learning_rate": 0.0004538575214023645, "loss": 3.8978, "step": 64305 }, { "epoch": 4.369479548851746, "grad_norm": 0.9128039479255676, "learning_rate": 0.00045381505639353176, "loss": 3.4414, "step": 64310 }, { "epoch": 4.369819268922408, "grad_norm": 0.7647458910942078, "learning_rate": 0.000453772591384699, "loss": 3.2307, "step": 64315 }, { "epoch": 4.37015898899307, "grad_norm": 0.9004073739051819, "learning_rate": 0.0004537301263758663, "loss": 3.7186, "step": 64320 }, { "epoch": 4.370498709063732, "grad_norm": 0.788355827331543, "learning_rate": 0.0004536876613670336, "loss": 3.3409, "step": 64325 }, { "epoch": 4.370838429134393, "grad_norm": 0.648705244064331, "learning_rate": 0.00045364519635820083, "loss": 3.777, "step": 64330 }, { "epoch": 4.371178149205055, "grad_norm": 1.0367971658706665, "learning_rate": 0.00045360273134936816, "loss": 3.6295, "step": 64335 }, { "epoch": 4.371517869275717, "grad_norm": 0.9515285491943359, "learning_rate": 0.00045356026634053544, "loss": 3.1983, "step": 64340 }, { "epoch": 4.371857589346378, "grad_norm": 0.8702841997146606, "learning_rate": 0.00045351780133170267, "loss": 3.9471, "step": 64345 }, { "epoch": 4.37219730941704, "grad_norm": 0.8304216861724854, "learning_rate": 0.00045347533632286995, "loss": 3.3932, "step": 64350 }, { "epoch": 4.372537029487702, "grad_norm": 0.8743628263473511, "learning_rate": 0.0004534328713140373, "loss": 3.6531, "step": 64355 }, { "epoch": 4.372876749558364, "grad_norm": 0.9574486017227173, "learning_rate": 0.0004533904063052045, "loss": 3.4912, "step": 64360 }, { "epoch": 4.373216469629026, "grad_norm": 0.936106264591217, "learning_rate": 0.0004533479412963718, "loss": 3.722, "step": 64365 }, { "epoch": 4.373556189699688, "grad_norm": 1.44229257106781, "learning_rate": 0.0004533054762875391, "loss": 3.4364, "step": 64370 }, { "epoch": 4.373895909770349, "grad_norm": 0.8339200615882874, "learning_rate": 0.00045326301127870635, "loss": 3.7202, "step": 64375 }, { "epoch": 4.374235629841011, "grad_norm": 0.9917379021644592, "learning_rate": 0.00045322054626987363, "loss": 3.665, "step": 64380 }, { "epoch": 4.374575349911673, "grad_norm": 0.8637394309043884, "learning_rate": 0.0004531780812610409, "loss": 3.2085, "step": 64385 }, { "epoch": 4.374915069982334, "grad_norm": 0.9819831848144531, "learning_rate": 0.0004531356162522082, "loss": 3.3795, "step": 64390 }, { "epoch": 4.375254790052996, "grad_norm": 0.9710754752159119, "learning_rate": 0.00045309315124337547, "loss": 3.6736, "step": 64395 }, { "epoch": 4.375594510123658, "grad_norm": 1.006867527961731, "learning_rate": 0.00045305068623454275, "loss": 3.3275, "step": 64400 }, { "epoch": 4.37593423019432, "grad_norm": 0.9713447093963623, "learning_rate": 0.00045300822122571003, "loss": 3.5142, "step": 64405 }, { "epoch": 4.376273950264982, "grad_norm": 0.9223304390907288, "learning_rate": 0.0004529657562168773, "loss": 3.354, "step": 64410 }, { "epoch": 4.376613670335644, "grad_norm": 0.7713569402694702, "learning_rate": 0.0004529232912080446, "loss": 3.4836, "step": 64415 }, { "epoch": 4.376953390406305, "grad_norm": 0.8136366009712219, "learning_rate": 0.0004528808261992118, "loss": 3.6488, "step": 64420 }, { "epoch": 4.377293110476967, "grad_norm": 1.0428146123886108, "learning_rate": 0.00045283836119037915, "loss": 3.503, "step": 64425 }, { "epoch": 4.377632830547629, "grad_norm": 0.9239062666893005, "learning_rate": 0.00045279589618154643, "loss": 3.8233, "step": 64430 }, { "epoch": 4.37797255061829, "grad_norm": 0.6955722570419312, "learning_rate": 0.00045275343117271366, "loss": 3.7353, "step": 64435 }, { "epoch": 4.378312270688952, "grad_norm": 0.8523550033569336, "learning_rate": 0.000452710966163881, "loss": 3.3785, "step": 64440 }, { "epoch": 4.378651990759614, "grad_norm": 0.8160797357559204, "learning_rate": 0.00045266850115504827, "loss": 3.3877, "step": 64445 }, { "epoch": 4.378991710830276, "grad_norm": 0.9816880822181702, "learning_rate": 0.0004526260361462155, "loss": 3.804, "step": 64450 }, { "epoch": 4.379331430900938, "grad_norm": 1.4800978899002075, "learning_rate": 0.0004525835711373828, "loss": 3.3352, "step": 64455 }, { "epoch": 4.3796711509716, "grad_norm": 0.706000566482544, "learning_rate": 0.0004525411061285501, "loss": 3.3569, "step": 64460 }, { "epoch": 4.380010871042261, "grad_norm": 0.8496415615081787, "learning_rate": 0.00045249864111971734, "loss": 3.5992, "step": 64465 }, { "epoch": 4.380350591112923, "grad_norm": 0.9088304042816162, "learning_rate": 0.0004524561761108846, "loss": 3.3331, "step": 64470 }, { "epoch": 4.380690311183585, "grad_norm": 0.8301309943199158, "learning_rate": 0.00045241371110205195, "loss": 3.4504, "step": 64475 }, { "epoch": 4.381030031254246, "grad_norm": 1.1841665506362915, "learning_rate": 0.00045237124609321923, "loss": 3.529, "step": 64480 }, { "epoch": 4.381369751324908, "grad_norm": 0.9794401526451111, "learning_rate": 0.00045232878108438646, "loss": 3.5589, "step": 64485 }, { "epoch": 4.38170947139557, "grad_norm": 0.9266629219055176, "learning_rate": 0.00045228631607555374, "loss": 3.5896, "step": 64490 }, { "epoch": 4.382049191466232, "grad_norm": 0.9738286137580872, "learning_rate": 0.00045224385106672107, "loss": 3.5562, "step": 64495 }, { "epoch": 4.382388911536894, "grad_norm": 0.9822114706039429, "learning_rate": 0.0004522013860578883, "loss": 3.2382, "step": 64500 }, { "epoch": 4.382728631607556, "grad_norm": 0.9082299470901489, "learning_rate": 0.0004521589210490556, "loss": 3.6357, "step": 64505 }, { "epoch": 4.383068351678217, "grad_norm": 0.8554168939590454, "learning_rate": 0.0004521164560402229, "loss": 3.4156, "step": 64510 }, { "epoch": 4.383408071748879, "grad_norm": 0.7571812868118286, "learning_rate": 0.00045207399103139014, "loss": 3.5565, "step": 64515 }, { "epoch": 4.383747791819541, "grad_norm": 0.9785128831863403, "learning_rate": 0.0004520315260225574, "loss": 3.5728, "step": 64520 }, { "epoch": 4.384087511890202, "grad_norm": 0.9914416670799255, "learning_rate": 0.0004519890610137247, "loss": 3.8714, "step": 64525 }, { "epoch": 4.384427231960864, "grad_norm": 0.8125157952308655, "learning_rate": 0.000451946596004892, "loss": 3.7296, "step": 64530 }, { "epoch": 4.384766952031526, "grad_norm": 0.8399844765663147, "learning_rate": 0.00045190413099605926, "loss": 3.4348, "step": 64535 }, { "epoch": 4.385106672102188, "grad_norm": 0.9351385831832886, "learning_rate": 0.00045186166598722654, "loss": 3.7293, "step": 64540 }, { "epoch": 4.38544639217285, "grad_norm": 1.010532021522522, "learning_rate": 0.0004518192009783938, "loss": 3.318, "step": 64545 }, { "epoch": 4.385786112243512, "grad_norm": 1.1710469722747803, "learning_rate": 0.0004517767359695611, "loss": 3.6804, "step": 64550 }, { "epoch": 4.386125832314173, "grad_norm": 0.7254777550697327, "learning_rate": 0.0004517342709607284, "loss": 3.4764, "step": 64555 }, { "epoch": 4.386465552384835, "grad_norm": 0.7284232378005981, "learning_rate": 0.0004516918059518956, "loss": 3.442, "step": 64560 }, { "epoch": 4.386805272455497, "grad_norm": 0.9451462030410767, "learning_rate": 0.00045164934094306294, "loss": 3.5935, "step": 64565 }, { "epoch": 4.387144992526158, "grad_norm": 0.9240872263908386, "learning_rate": 0.0004516068759342302, "loss": 3.5202, "step": 64570 }, { "epoch": 4.38748471259682, "grad_norm": 0.8979571461677551, "learning_rate": 0.00045156441092539744, "loss": 3.4783, "step": 64575 }, { "epoch": 4.3878244326674825, "grad_norm": 0.9020591974258423, "learning_rate": 0.0004515219459165648, "loss": 3.6488, "step": 64580 }, { "epoch": 4.388164152738144, "grad_norm": 0.8123366832733154, "learning_rate": 0.00045147948090773206, "loss": 3.5484, "step": 64585 }, { "epoch": 4.388503872808806, "grad_norm": 0.8190121650695801, "learning_rate": 0.0004514370158988993, "loss": 3.4835, "step": 64590 }, { "epoch": 4.388843592879467, "grad_norm": 0.8332828283309937, "learning_rate": 0.00045139455089006656, "loss": 3.628, "step": 64595 }, { "epoch": 4.389183312950129, "grad_norm": 0.7954887747764587, "learning_rate": 0.0004513520858812339, "loss": 3.3835, "step": 64600 }, { "epoch": 4.389523033020791, "grad_norm": 0.8848952651023865, "learning_rate": 0.0004513096208724011, "loss": 3.6436, "step": 64605 }, { "epoch": 4.389862753091452, "grad_norm": 0.8876662850379944, "learning_rate": 0.0004512671558635684, "loss": 3.6554, "step": 64610 }, { "epoch": 4.390202473162114, "grad_norm": 0.8347315788269043, "learning_rate": 0.00045122469085473574, "loss": 3.7396, "step": 64615 }, { "epoch": 4.390542193232776, "grad_norm": 0.809548020362854, "learning_rate": 0.00045118222584590296, "loss": 3.4019, "step": 64620 }, { "epoch": 4.390881913303438, "grad_norm": 1.0197629928588867, "learning_rate": 0.00045113976083707024, "loss": 3.5469, "step": 64625 }, { "epoch": 4.3912216333741, "grad_norm": 0.8092890381813049, "learning_rate": 0.0004510972958282376, "loss": 3.3925, "step": 64630 }, { "epoch": 4.391561353444762, "grad_norm": 1.1317576169967651, "learning_rate": 0.0004510548308194048, "loss": 3.3887, "step": 64635 }, { "epoch": 4.391901073515423, "grad_norm": 0.8381431102752686, "learning_rate": 0.0004510123658105721, "loss": 3.5798, "step": 64640 }, { "epoch": 4.392240793586085, "grad_norm": 0.8388566970825195, "learning_rate": 0.00045096990080173936, "loss": 3.4054, "step": 64645 }, { "epoch": 4.392580513656747, "grad_norm": 0.9554895162582397, "learning_rate": 0.0004509274357929067, "loss": 3.3432, "step": 64650 }, { "epoch": 4.392920233727408, "grad_norm": 0.898276686668396, "learning_rate": 0.0004508849707840739, "loss": 3.4542, "step": 64655 }, { "epoch": 4.39325995379807, "grad_norm": 0.7132056951522827, "learning_rate": 0.0004508425057752412, "loss": 3.643, "step": 64660 }, { "epoch": 4.393599673868732, "grad_norm": 1.1174218654632568, "learning_rate": 0.00045080004076640854, "loss": 3.3068, "step": 64665 }, { "epoch": 4.393939393939394, "grad_norm": 0.8897207379341125, "learning_rate": 0.00045075757575757577, "loss": 3.4755, "step": 64670 }, { "epoch": 4.394279114010056, "grad_norm": 0.8851433396339417, "learning_rate": 0.00045071511074874305, "loss": 3.2876, "step": 64675 }, { "epoch": 4.394618834080718, "grad_norm": 0.7211104035377502, "learning_rate": 0.0004506726457399103, "loss": 3.687, "step": 64680 }, { "epoch": 4.394958554151379, "grad_norm": 0.8784237504005432, "learning_rate": 0.0004506301807310776, "loss": 3.6228, "step": 64685 }, { "epoch": 4.395298274222041, "grad_norm": 0.8926588296890259, "learning_rate": 0.0004505877157222449, "loss": 3.516, "step": 64690 }, { "epoch": 4.395637994292703, "grad_norm": 1.0050911903381348, "learning_rate": 0.00045054525071341217, "loss": 3.3623, "step": 64695 }, { "epoch": 4.395977714363364, "grad_norm": 0.9478135108947754, "learning_rate": 0.00045050278570457945, "loss": 3.2387, "step": 64700 }, { "epoch": 4.396317434434026, "grad_norm": 1.6698096990585327, "learning_rate": 0.0004504603206957467, "loss": 3.485, "step": 64705 }, { "epoch": 4.396657154504688, "grad_norm": 0.9323243498802185, "learning_rate": 0.000450417855686914, "loss": 3.3901, "step": 64710 }, { "epoch": 4.39699687457535, "grad_norm": 0.7532806396484375, "learning_rate": 0.00045037539067808123, "loss": 3.4245, "step": 64715 }, { "epoch": 4.397336594646012, "grad_norm": 0.9737496972084045, "learning_rate": 0.00045033292566924857, "loss": 3.2909, "step": 64720 }, { "epoch": 4.397676314716674, "grad_norm": 0.9262939095497131, "learning_rate": 0.00045029046066041585, "loss": 3.4267, "step": 64725 }, { "epoch": 4.398016034787335, "grad_norm": 0.7953515648841858, "learning_rate": 0.00045024799565158307, "loss": 3.3702, "step": 64730 }, { "epoch": 4.398355754857997, "grad_norm": 0.590951681137085, "learning_rate": 0.0004502055306427504, "loss": 3.5927, "step": 64735 }, { "epoch": 4.398695474928659, "grad_norm": 0.9835566878318787, "learning_rate": 0.0004501630656339177, "loss": 3.2923, "step": 64740 }, { "epoch": 4.39903519499932, "grad_norm": 0.8848749399185181, "learning_rate": 0.0004501206006250849, "loss": 3.468, "step": 64745 }, { "epoch": 4.399374915069982, "grad_norm": 1.1962647438049316, "learning_rate": 0.0004500781356162522, "loss": 3.255, "step": 64750 }, { "epoch": 4.399714635140644, "grad_norm": 0.7615827918052673, "learning_rate": 0.0004500356706074195, "loss": 3.5284, "step": 64755 }, { "epoch": 4.400054355211306, "grad_norm": 1.0787931680679321, "learning_rate": 0.00044999320559858675, "loss": 3.4973, "step": 64760 }, { "epoch": 4.400394075281968, "grad_norm": 0.8010728359222412, "learning_rate": 0.00044995074058975403, "loss": 3.29, "step": 64765 }, { "epoch": 4.40073379535263, "grad_norm": 0.8789026737213135, "learning_rate": 0.00044990827558092137, "loss": 3.3412, "step": 64770 }, { "epoch": 4.401073515423291, "grad_norm": 0.8462004065513611, "learning_rate": 0.0004498658105720886, "loss": 3.4673, "step": 64775 }, { "epoch": 4.401413235493953, "grad_norm": 0.8739003539085388, "learning_rate": 0.00044982334556325587, "loss": 3.3336, "step": 64780 }, { "epoch": 4.401752955564615, "grad_norm": 0.8325495719909668, "learning_rate": 0.00044978088055442315, "loss": 3.2661, "step": 64785 }, { "epoch": 4.402092675635276, "grad_norm": 1.1298160552978516, "learning_rate": 0.00044973841554559043, "loss": 3.5385, "step": 64790 }, { "epoch": 4.402432395705938, "grad_norm": 0.9124968647956848, "learning_rate": 0.0004496959505367577, "loss": 3.4836, "step": 64795 }, { "epoch": 4.4027721157766, "grad_norm": 0.8804593086242676, "learning_rate": 0.000449653485527925, "loss": 3.1837, "step": 64800 }, { "epoch": 4.403111835847262, "grad_norm": 1.1899088621139526, "learning_rate": 0.0004496110205190923, "loss": 3.43, "step": 64805 }, { "epoch": 4.403451555917924, "grad_norm": 1.0679731369018555, "learning_rate": 0.00044956855551025955, "loss": 3.4203, "step": 64810 }, { "epoch": 4.403791275988586, "grad_norm": 0.970270574092865, "learning_rate": 0.00044952609050142683, "loss": 3.4975, "step": 64815 }, { "epoch": 4.404130996059247, "grad_norm": 0.9771348237991333, "learning_rate": 0.0004494836254925941, "loss": 3.3282, "step": 64820 }, { "epoch": 4.404470716129909, "grad_norm": 0.8059468269348145, "learning_rate": 0.0004494411604837614, "loss": 3.3468, "step": 64825 }, { "epoch": 4.404810436200571, "grad_norm": 0.8164841532707214, "learning_rate": 0.0004493986954749287, "loss": 3.8383, "step": 64830 }, { "epoch": 4.405150156271232, "grad_norm": 1.2418636083602905, "learning_rate": 0.00044935623046609595, "loss": 3.7778, "step": 64835 }, { "epoch": 4.405489876341894, "grad_norm": 0.8264691233634949, "learning_rate": 0.00044931376545726323, "loss": 3.4205, "step": 64840 }, { "epoch": 4.4058295964125564, "grad_norm": 0.8884705305099487, "learning_rate": 0.0004492713004484305, "loss": 3.33, "step": 64845 }, { "epoch": 4.406169316483218, "grad_norm": 0.8003307580947876, "learning_rate": 0.0004492288354395978, "loss": 3.4485, "step": 64850 }, { "epoch": 4.40650903655388, "grad_norm": 0.7426823377609253, "learning_rate": 0.000449186370430765, "loss": 3.4116, "step": 64855 }, { "epoch": 4.406848756624542, "grad_norm": 1.2305539846420288, "learning_rate": 0.00044914390542193235, "loss": 3.2162, "step": 64860 }, { "epoch": 4.407188476695203, "grad_norm": 0.8456860780715942, "learning_rate": 0.00044910144041309963, "loss": 3.626, "step": 64865 }, { "epoch": 4.407528196765865, "grad_norm": 0.9313734769821167, "learning_rate": 0.00044905897540426686, "loss": 3.3284, "step": 64870 }, { "epoch": 4.407867916836526, "grad_norm": 1.011651873588562, "learning_rate": 0.0004490165103954342, "loss": 3.5675, "step": 64875 }, { "epoch": 4.408207636907188, "grad_norm": 0.9485716223716736, "learning_rate": 0.0004489740453866015, "loss": 3.7079, "step": 64880 }, { "epoch": 4.40854735697785, "grad_norm": 0.8012006282806396, "learning_rate": 0.0004489315803777687, "loss": 3.331, "step": 64885 }, { "epoch": 4.408887077048512, "grad_norm": 0.9415907263755798, "learning_rate": 0.00044888911536893603, "loss": 3.5031, "step": 64890 }, { "epoch": 4.409226797119174, "grad_norm": 0.7125803828239441, "learning_rate": 0.0004488466503601033, "loss": 3.373, "step": 64895 }, { "epoch": 4.409566517189836, "grad_norm": 0.8787747621536255, "learning_rate": 0.00044880418535127054, "loss": 3.403, "step": 64900 }, { "epoch": 4.409906237260497, "grad_norm": 0.8242262005805969, "learning_rate": 0.0004487617203424378, "loss": 3.4112, "step": 64905 }, { "epoch": 4.410245957331159, "grad_norm": 0.8411560654640198, "learning_rate": 0.00044871925533360515, "loss": 3.5182, "step": 64910 }, { "epoch": 4.410585677401821, "grad_norm": 0.9842845797538757, "learning_rate": 0.0004486767903247724, "loss": 3.6295, "step": 64915 }, { "epoch": 4.410925397472482, "grad_norm": 1.024263620376587, "learning_rate": 0.00044863432531593966, "loss": 3.2088, "step": 64920 }, { "epoch": 4.411265117543144, "grad_norm": 0.79705810546875, "learning_rate": 0.000448591860307107, "loss": 3.2276, "step": 64925 }, { "epoch": 4.411604837613806, "grad_norm": 0.8538332581520081, "learning_rate": 0.0004485493952982742, "loss": 3.3012, "step": 64930 }, { "epoch": 4.411944557684468, "grad_norm": 0.9359583854675293, "learning_rate": 0.0004485069302894415, "loss": 3.6225, "step": 64935 }, { "epoch": 4.41228427775513, "grad_norm": 0.8045932054519653, "learning_rate": 0.0004484644652806088, "loss": 3.516, "step": 64940 }, { "epoch": 4.412623997825792, "grad_norm": 0.780533492565155, "learning_rate": 0.00044842200027177606, "loss": 3.3913, "step": 64945 }, { "epoch": 4.412963717896453, "grad_norm": 1.0491788387298584, "learning_rate": 0.00044837953526294334, "loss": 3.3989, "step": 64950 }, { "epoch": 4.413303437967115, "grad_norm": 1.1852960586547852, "learning_rate": 0.0004483370702541106, "loss": 3.524, "step": 64955 }, { "epoch": 4.413643158037777, "grad_norm": 0.8996584415435791, "learning_rate": 0.0004482946052452779, "loss": 3.7289, "step": 64960 }, { "epoch": 4.413982878108438, "grad_norm": 0.7434871196746826, "learning_rate": 0.0004482521402364452, "loss": 3.3752, "step": 64965 }, { "epoch": 4.4143225981791, "grad_norm": 1.070533275604248, "learning_rate": 0.00044820967522761246, "loss": 3.6421, "step": 64970 }, { "epoch": 4.414662318249762, "grad_norm": 0.7161136269569397, "learning_rate": 0.0004481672102187797, "loss": 3.3524, "step": 64975 }, { "epoch": 4.415002038320424, "grad_norm": 0.8421730399131775, "learning_rate": 0.000448124745209947, "loss": 3.5382, "step": 64980 }, { "epoch": 4.415341758391086, "grad_norm": 0.9722555875778198, "learning_rate": 0.0004480822802011143, "loss": 3.6208, "step": 64985 }, { "epoch": 4.415681478461748, "grad_norm": 0.8298919200897217, "learning_rate": 0.0004480398151922816, "loss": 3.6129, "step": 64990 }, { "epoch": 4.416021198532409, "grad_norm": 0.8144833445549011, "learning_rate": 0.00044799735018344886, "loss": 3.5713, "step": 64995 }, { "epoch": 4.416360918603071, "grad_norm": 1.0004266500473022, "learning_rate": 0.00044795488517461614, "loss": 3.2115, "step": 65000 }, { "epoch": 4.416700638673733, "grad_norm": 0.8938560485839844, "learning_rate": 0.0004479124201657834, "loss": 3.2683, "step": 65005 }, { "epoch": 4.417040358744394, "grad_norm": 0.7159640789031982, "learning_rate": 0.00044786995515695065, "loss": 3.2967, "step": 65010 }, { "epoch": 4.417380078815056, "grad_norm": 0.8809435367584229, "learning_rate": 0.000447827490148118, "loss": 3.5437, "step": 65015 }, { "epoch": 4.417719798885718, "grad_norm": 0.8484206199645996, "learning_rate": 0.00044778502513928526, "loss": 3.4811, "step": 65020 }, { "epoch": 4.41805951895638, "grad_norm": 0.7763258218765259, "learning_rate": 0.0004477425601304525, "loss": 3.3201, "step": 65025 }, { "epoch": 4.418399239027042, "grad_norm": 0.8743746876716614, "learning_rate": 0.0004477000951216198, "loss": 3.5965, "step": 65030 }, { "epoch": 4.418738959097704, "grad_norm": 0.9821796417236328, "learning_rate": 0.0004476576301127871, "loss": 3.4462, "step": 65035 }, { "epoch": 4.419078679168365, "grad_norm": 0.7879642248153687, "learning_rate": 0.00044761516510395433, "loss": 3.3875, "step": 65040 }, { "epoch": 4.419418399239027, "grad_norm": 1.009671688079834, "learning_rate": 0.0004475727000951216, "loss": 3.2279, "step": 65045 }, { "epoch": 4.419758119309689, "grad_norm": 0.9736917018890381, "learning_rate": 0.00044753023508628894, "loss": 3.372, "step": 65050 }, { "epoch": 4.42009783938035, "grad_norm": 0.8007050156593323, "learning_rate": 0.00044748777007745617, "loss": 3.6673, "step": 65055 }, { "epoch": 4.420437559451012, "grad_norm": 0.8634523153305054, "learning_rate": 0.00044744530506862345, "loss": 3.6329, "step": 65060 }, { "epoch": 4.420777279521674, "grad_norm": 1.0416736602783203, "learning_rate": 0.0004474028400597908, "loss": 3.5827, "step": 65065 }, { "epoch": 4.421116999592336, "grad_norm": 0.8211742639541626, "learning_rate": 0.000447360375050958, "loss": 3.3254, "step": 65070 }, { "epoch": 4.421456719662998, "grad_norm": 1.0899970531463623, "learning_rate": 0.0004473179100421253, "loss": 3.4016, "step": 65075 }, { "epoch": 4.42179643973366, "grad_norm": 0.8331630825996399, "learning_rate": 0.00044727544503329257, "loss": 3.4202, "step": 65080 }, { "epoch": 4.422136159804321, "grad_norm": 0.9047204256057739, "learning_rate": 0.00044723298002445985, "loss": 3.6312, "step": 65085 }, { "epoch": 4.422475879874983, "grad_norm": 0.9336113929748535, "learning_rate": 0.00044719051501562713, "loss": 3.1886, "step": 65090 }, { "epoch": 4.422815599945645, "grad_norm": 0.7998154759407043, "learning_rate": 0.0004471480500067944, "loss": 3.5472, "step": 65095 }, { "epoch": 4.423155320016306, "grad_norm": 0.9060941338539124, "learning_rate": 0.0004471055849979617, "loss": 3.6218, "step": 65100 }, { "epoch": 4.423495040086968, "grad_norm": 0.9013347029685974, "learning_rate": 0.00044706311998912897, "loss": 3.5244, "step": 65105 }, { "epoch": 4.42383476015763, "grad_norm": 1.8107386827468872, "learning_rate": 0.00044702065498029625, "loss": 3.5768, "step": 65110 }, { "epoch": 4.424174480228292, "grad_norm": 0.9807305335998535, "learning_rate": 0.0004469781899714635, "loss": 3.5115, "step": 65115 }, { "epoch": 4.424514200298954, "grad_norm": 0.9302863478660583, "learning_rate": 0.0004469357249626308, "loss": 3.3412, "step": 65120 }, { "epoch": 4.424853920369616, "grad_norm": 1.0677871704101562, "learning_rate": 0.0004468932599537981, "loss": 3.3396, "step": 65125 }, { "epoch": 4.425193640440277, "grad_norm": 0.8922421932220459, "learning_rate": 0.0004468507949449653, "loss": 3.2335, "step": 65130 }, { "epoch": 4.425533360510939, "grad_norm": 0.7762733697891235, "learning_rate": 0.00044680832993613265, "loss": 3.3928, "step": 65135 }, { "epoch": 4.425873080581601, "grad_norm": 0.9222429394721985, "learning_rate": 0.00044676586492729993, "loss": 3.4203, "step": 65140 }, { "epoch": 4.426212800652262, "grad_norm": 1.1646645069122314, "learning_rate": 0.00044672339991846716, "loss": 3.5337, "step": 65145 }, { "epoch": 4.426552520722924, "grad_norm": 0.9924411773681641, "learning_rate": 0.00044668093490963444, "loss": 3.4629, "step": 65150 }, { "epoch": 4.4268922407935865, "grad_norm": 0.8357965350151062, "learning_rate": 0.00044663846990080177, "loss": 3.5043, "step": 65155 }, { "epoch": 4.427231960864248, "grad_norm": 0.8970503211021423, "learning_rate": 0.00044659600489196905, "loss": 3.8881, "step": 65160 }, { "epoch": 4.42757168093491, "grad_norm": 0.9612279534339905, "learning_rate": 0.0004465535398831363, "loss": 3.8546, "step": 65165 }, { "epoch": 4.427911401005572, "grad_norm": 0.8405584692955017, "learning_rate": 0.0004465110748743036, "loss": 3.626, "step": 65170 }, { "epoch": 4.428251121076233, "grad_norm": 1.0533685684204102, "learning_rate": 0.0004464686098654709, "loss": 3.2728, "step": 65175 }, { "epoch": 4.428590841146895, "grad_norm": 0.8456205725669861, "learning_rate": 0.0004464261448566381, "loss": 3.6083, "step": 65180 }, { "epoch": 4.428930561217557, "grad_norm": 0.7051971554756165, "learning_rate": 0.00044638367984780545, "loss": 3.5593, "step": 65185 }, { "epoch": 4.429270281288218, "grad_norm": 0.9375216364860535, "learning_rate": 0.00044634121483897273, "loss": 3.6292, "step": 65190 }, { "epoch": 4.42961000135888, "grad_norm": 0.7986471056938171, "learning_rate": 0.00044629874983013996, "loss": 3.331, "step": 65195 }, { "epoch": 4.4299497214295425, "grad_norm": 0.8557507991790771, "learning_rate": 0.00044625628482130724, "loss": 3.712, "step": 65200 }, { "epoch": 4.430289441500204, "grad_norm": 0.9136731624603271, "learning_rate": 0.00044621381981247457, "loss": 3.5324, "step": 65205 }, { "epoch": 4.430629161570866, "grad_norm": 0.8843671083450317, "learning_rate": 0.0004461713548036418, "loss": 3.1793, "step": 65210 }, { "epoch": 4.430968881641528, "grad_norm": 1.2320665121078491, "learning_rate": 0.0004461288897948091, "loss": 3.749, "step": 65215 }, { "epoch": 4.431308601712189, "grad_norm": 1.0043127536773682, "learning_rate": 0.0004460864247859764, "loss": 3.7167, "step": 65220 }, { "epoch": 4.431648321782851, "grad_norm": 0.9097742438316345, "learning_rate": 0.00044604395977714364, "loss": 3.5056, "step": 65225 }, { "epoch": 4.431988041853513, "grad_norm": 1.01972496509552, "learning_rate": 0.0004460014947683109, "loss": 3.5939, "step": 65230 }, { "epoch": 4.432327761924174, "grad_norm": 1.4198657274246216, "learning_rate": 0.0004459590297594782, "loss": 3.7147, "step": 65235 }, { "epoch": 4.432667481994836, "grad_norm": 1.1254534721374512, "learning_rate": 0.0004459165647506455, "loss": 3.4951, "step": 65240 }, { "epoch": 4.4330072020654985, "grad_norm": 0.9748072624206543, "learning_rate": 0.00044587409974181276, "loss": 3.4507, "step": 65245 }, { "epoch": 4.43334692213616, "grad_norm": 0.7694755792617798, "learning_rate": 0.00044583163473298004, "loss": 3.878, "step": 65250 }, { "epoch": 4.433686642206822, "grad_norm": 1.0270973443984985, "learning_rate": 0.0004457891697241473, "loss": 3.4706, "step": 65255 }, { "epoch": 4.434026362277484, "grad_norm": 0.9487940669059753, "learning_rate": 0.0004457467047153146, "loss": 3.3172, "step": 65260 }, { "epoch": 4.434366082348145, "grad_norm": 1.1061460971832275, "learning_rate": 0.0004457042397064819, "loss": 3.6393, "step": 65265 }, { "epoch": 4.434705802418807, "grad_norm": 0.832764208316803, "learning_rate": 0.0004456617746976491, "loss": 3.6784, "step": 65270 }, { "epoch": 4.435045522489468, "grad_norm": 0.9059832692146301, "learning_rate": 0.00044561930968881644, "loss": 3.4462, "step": 65275 }, { "epoch": 4.43538524256013, "grad_norm": 0.9542608261108398, "learning_rate": 0.0004455768446799837, "loss": 3.3771, "step": 65280 }, { "epoch": 4.435724962630792, "grad_norm": 1.0440117120742798, "learning_rate": 0.00044553437967115094, "loss": 3.8525, "step": 65285 }, { "epoch": 4.436064682701454, "grad_norm": 0.954830527305603, "learning_rate": 0.0004454919146623183, "loss": 3.1139, "step": 65290 }, { "epoch": 4.436404402772116, "grad_norm": 1.2768272161483765, "learning_rate": 0.00044544944965348556, "loss": 3.4418, "step": 65295 }, { "epoch": 4.436744122842778, "grad_norm": 0.9730943441390991, "learning_rate": 0.0004454069846446528, "loss": 3.5238, "step": 65300 }, { "epoch": 4.437083842913439, "grad_norm": 0.9132259488105774, "learning_rate": 0.00044536451963582006, "loss": 3.6255, "step": 65305 }, { "epoch": 4.437423562984101, "grad_norm": 0.9365383386611938, "learning_rate": 0.0004453220546269874, "loss": 3.5968, "step": 65310 }, { "epoch": 4.437763283054763, "grad_norm": 0.9567075371742249, "learning_rate": 0.0004452795896181546, "loss": 3.375, "step": 65315 }, { "epoch": 4.438103003125424, "grad_norm": 0.7754851579666138, "learning_rate": 0.0004452371246093219, "loss": 3.6649, "step": 65320 }, { "epoch": 4.438442723196086, "grad_norm": 1.2116923332214355, "learning_rate": 0.00044519465960048924, "loss": 3.3742, "step": 65325 }, { "epoch": 4.438782443266748, "grad_norm": 0.8112424612045288, "learning_rate": 0.0004451521945916565, "loss": 3.3966, "step": 65330 }, { "epoch": 4.43912216333741, "grad_norm": 0.8631371259689331, "learning_rate": 0.00044510972958282374, "loss": 3.7245, "step": 65335 }, { "epoch": 4.439461883408072, "grad_norm": 0.8542788028717041, "learning_rate": 0.000445067264573991, "loss": 3.3919, "step": 65340 }, { "epoch": 4.439801603478734, "grad_norm": 0.8197934031486511, "learning_rate": 0.00044502479956515836, "loss": 3.4249, "step": 65345 }, { "epoch": 4.440141323549395, "grad_norm": 0.7710437178611755, "learning_rate": 0.0004449823345563256, "loss": 3.6363, "step": 65350 }, { "epoch": 4.440481043620057, "grad_norm": 0.7870102524757385, "learning_rate": 0.00044493986954749286, "loss": 3.3879, "step": 65355 }, { "epoch": 4.440820763690719, "grad_norm": 0.9849935173988342, "learning_rate": 0.0004448974045386602, "loss": 3.4856, "step": 65360 }, { "epoch": 4.44116048376138, "grad_norm": 0.8454204797744751, "learning_rate": 0.0004448549395298274, "loss": 3.6144, "step": 65365 }, { "epoch": 4.441500203832042, "grad_norm": 0.8572724461555481, "learning_rate": 0.0004448124745209947, "loss": 3.3884, "step": 65370 }, { "epoch": 4.441839923902704, "grad_norm": 1.002380609512329, "learning_rate": 0.000444770009512162, "loss": 3.3085, "step": 65375 }, { "epoch": 4.442179643973366, "grad_norm": 0.9569190144538879, "learning_rate": 0.00044472754450332927, "loss": 3.5528, "step": 65380 }, { "epoch": 4.442519364044028, "grad_norm": 0.901733934879303, "learning_rate": 0.00044468507949449655, "loss": 3.2565, "step": 65385 }, { "epoch": 4.44285908411469, "grad_norm": 0.8372353315353394, "learning_rate": 0.0004446426144856638, "loss": 3.5403, "step": 65390 }, { "epoch": 4.443198804185351, "grad_norm": 1.1170272827148438, "learning_rate": 0.0004446001494768311, "loss": 3.5647, "step": 65395 }, { "epoch": 4.443538524256013, "grad_norm": 1.0626559257507324, "learning_rate": 0.0004445576844679984, "loss": 3.6172, "step": 65400 }, { "epoch": 4.443878244326675, "grad_norm": 0.9299407601356506, "learning_rate": 0.00044451521945916567, "loss": 3.3716, "step": 65405 }, { "epoch": 4.444217964397336, "grad_norm": 0.8780089020729065, "learning_rate": 0.0004444727544503329, "loss": 3.3707, "step": 65410 }, { "epoch": 4.444557684467998, "grad_norm": 0.9045068621635437, "learning_rate": 0.0004444302894415002, "loss": 3.5019, "step": 65415 }, { "epoch": 4.4448974045386604, "grad_norm": 0.7839063405990601, "learning_rate": 0.0004443878244326675, "loss": 3.5688, "step": 65420 }, { "epoch": 4.445237124609322, "grad_norm": 0.9780973196029663, "learning_rate": 0.00044434535942383473, "loss": 3.6582, "step": 65425 }, { "epoch": 4.445576844679984, "grad_norm": 0.9142229557037354, "learning_rate": 0.00044430289441500207, "loss": 3.5304, "step": 65430 }, { "epoch": 4.445916564750646, "grad_norm": 1.0334123373031616, "learning_rate": 0.00044426042940616935, "loss": 3.4038, "step": 65435 }, { "epoch": 4.446256284821307, "grad_norm": 0.7088323831558228, "learning_rate": 0.00044421796439733657, "loss": 3.5331, "step": 65440 }, { "epoch": 4.446596004891969, "grad_norm": 0.938849151134491, "learning_rate": 0.00044417549938850385, "loss": 3.4942, "step": 65445 }, { "epoch": 4.446935724962631, "grad_norm": 0.8618916869163513, "learning_rate": 0.0004441330343796712, "loss": 3.538, "step": 65450 }, { "epoch": 4.447275445033292, "grad_norm": 1.6311894655227661, "learning_rate": 0.0004440905693708384, "loss": 3.6887, "step": 65455 }, { "epoch": 4.447615165103954, "grad_norm": 0.8689915537834167, "learning_rate": 0.0004440481043620057, "loss": 3.2764, "step": 65460 }, { "epoch": 4.4479548851746165, "grad_norm": 1.1043154001235962, "learning_rate": 0.000444005639353173, "loss": 3.6724, "step": 65465 }, { "epoch": 4.448294605245278, "grad_norm": 0.864713191986084, "learning_rate": 0.00044396317434434025, "loss": 3.4313, "step": 65470 }, { "epoch": 4.44863432531594, "grad_norm": 0.8858205676078796, "learning_rate": 0.00044392070933550753, "loss": 3.381, "step": 65475 }, { "epoch": 4.448974045386602, "grad_norm": 1.025192379951477, "learning_rate": 0.00044387824432667487, "loss": 3.384, "step": 65480 }, { "epoch": 4.449313765457263, "grad_norm": 0.8928725123405457, "learning_rate": 0.0004438357793178421, "loss": 3.2779, "step": 65485 }, { "epoch": 4.449653485527925, "grad_norm": 0.7886186838150024, "learning_rate": 0.00044379331430900937, "loss": 3.5011, "step": 65490 }, { "epoch": 4.449993205598587, "grad_norm": 0.910686731338501, "learning_rate": 0.00044375084930017665, "loss": 3.5708, "step": 65495 }, { "epoch": 4.450332925669248, "grad_norm": 0.6709072589874268, "learning_rate": 0.000443708384291344, "loss": 3.4831, "step": 65500 }, { "epoch": 4.45067264573991, "grad_norm": 0.8429123759269714, "learning_rate": 0.0004436659192825112, "loss": 3.3521, "step": 65505 }, { "epoch": 4.4510123658105725, "grad_norm": 0.8000031113624573, "learning_rate": 0.0004436234542736785, "loss": 3.4365, "step": 65510 }, { "epoch": 4.451352085881234, "grad_norm": 0.9841476082801819, "learning_rate": 0.00044358098926484583, "loss": 3.8182, "step": 65515 }, { "epoch": 4.451691805951896, "grad_norm": 0.8006512522697449, "learning_rate": 0.00044353852425601305, "loss": 3.5815, "step": 65520 }, { "epoch": 4.452031526022558, "grad_norm": 0.9701135158538818, "learning_rate": 0.00044349605924718033, "loss": 3.5722, "step": 65525 }, { "epoch": 4.452371246093219, "grad_norm": 0.6828458309173584, "learning_rate": 0.0004434535942383476, "loss": 3.0316, "step": 65530 }, { "epoch": 4.452710966163881, "grad_norm": 1.0784547328948975, "learning_rate": 0.0004434111292295149, "loss": 3.341, "step": 65535 }, { "epoch": 4.453050686234543, "grad_norm": 0.9231592416763306, "learning_rate": 0.0004433686642206822, "loss": 3.4328, "step": 65540 }, { "epoch": 4.453390406305204, "grad_norm": 0.9386988282203674, "learning_rate": 0.00044332619921184945, "loss": 3.6561, "step": 65545 }, { "epoch": 4.453730126375866, "grad_norm": 0.8226339817047119, "learning_rate": 0.00044328373420301673, "loss": 3.4146, "step": 65550 }, { "epoch": 4.454069846446528, "grad_norm": 0.7354975938796997, "learning_rate": 0.000443241269194184, "loss": 3.5147, "step": 65555 }, { "epoch": 4.45440956651719, "grad_norm": 0.6774933338165283, "learning_rate": 0.0004431988041853513, "loss": 3.45, "step": 65560 }, { "epoch": 4.454749286587852, "grad_norm": 0.9289747476577759, "learning_rate": 0.0004431563391765185, "loss": 3.386, "step": 65565 }, { "epoch": 4.455089006658513, "grad_norm": 1.1111987829208374, "learning_rate": 0.00044311387416768585, "loss": 3.6929, "step": 65570 }, { "epoch": 4.455428726729175, "grad_norm": 0.8658860921859741, "learning_rate": 0.00044307140915885313, "loss": 3.6657, "step": 65575 }, { "epoch": 4.455768446799837, "grad_norm": 0.7922098636627197, "learning_rate": 0.00044302894415002036, "loss": 3.3009, "step": 65580 }, { "epoch": 4.456108166870498, "grad_norm": 0.9770646095275879, "learning_rate": 0.0004429864791411877, "loss": 3.2895, "step": 65585 }, { "epoch": 4.45644788694116, "grad_norm": 1.0743848085403442, "learning_rate": 0.000442944014132355, "loss": 3.4552, "step": 65590 }, { "epoch": 4.456787607011822, "grad_norm": 0.8378328680992126, "learning_rate": 0.0004429015491235222, "loss": 3.5557, "step": 65595 }, { "epoch": 4.457127327082484, "grad_norm": 0.9926184415817261, "learning_rate": 0.0004428590841146895, "loss": 3.5326, "step": 65600 }, { "epoch": 4.457467047153146, "grad_norm": 0.8915340304374695, "learning_rate": 0.0004428166191058568, "loss": 3.5334, "step": 65605 }, { "epoch": 4.457806767223808, "grad_norm": 1.261305570602417, "learning_rate": 0.00044277415409702404, "loss": 3.5451, "step": 65610 }, { "epoch": 4.458146487294469, "grad_norm": 0.9417118430137634, "learning_rate": 0.0004427316890881913, "loss": 3.5452, "step": 65615 }, { "epoch": 4.458486207365131, "grad_norm": 0.7899617552757263, "learning_rate": 0.00044268922407935865, "loss": 3.7262, "step": 65620 }, { "epoch": 4.458825927435793, "grad_norm": 0.8470172882080078, "learning_rate": 0.0004426467590705259, "loss": 3.2161, "step": 65625 }, { "epoch": 4.459165647506454, "grad_norm": 1.2755868434906006, "learning_rate": 0.00044260429406169316, "loss": 3.5261, "step": 65630 }, { "epoch": 4.459505367577116, "grad_norm": 0.9039474725723267, "learning_rate": 0.00044256182905286044, "loss": 3.4218, "step": 65635 }, { "epoch": 4.459845087647778, "grad_norm": 0.720127284526825, "learning_rate": 0.0004425193640440277, "loss": 3.7125, "step": 65640 }, { "epoch": 4.46018480771844, "grad_norm": 1.0623605251312256, "learning_rate": 0.000442476899035195, "loss": 3.4404, "step": 65645 }, { "epoch": 4.460524527789102, "grad_norm": 0.8914993405342102, "learning_rate": 0.0004424344340263623, "loss": 3.3855, "step": 65650 }, { "epoch": 4.460864247859764, "grad_norm": 0.9546502828598022, "learning_rate": 0.00044239196901752956, "loss": 3.4651, "step": 65655 }, { "epoch": 4.461203967930425, "grad_norm": 1.0451639890670776, "learning_rate": 0.00044234950400869684, "loss": 3.2596, "step": 65660 }, { "epoch": 4.461543688001087, "grad_norm": 0.698577880859375, "learning_rate": 0.0004423070389998641, "loss": 3.4828, "step": 65665 }, { "epoch": 4.461883408071749, "grad_norm": 0.9439581632614136, "learning_rate": 0.0004422645739910314, "loss": 3.5105, "step": 65670 }, { "epoch": 4.46222312814241, "grad_norm": 0.9097123146057129, "learning_rate": 0.0004422221089821987, "loss": 3.6666, "step": 65675 }, { "epoch": 4.462562848213072, "grad_norm": 0.6626315712928772, "learning_rate": 0.00044217964397336596, "loss": 3.5438, "step": 65680 }, { "epoch": 4.462902568283734, "grad_norm": 0.9837448000907898, "learning_rate": 0.00044213717896453324, "loss": 3.5487, "step": 65685 }, { "epoch": 4.463242288354396, "grad_norm": 0.9373995661735535, "learning_rate": 0.0004420947139557005, "loss": 3.5843, "step": 65690 }, { "epoch": 4.463582008425058, "grad_norm": 1.002805233001709, "learning_rate": 0.0004420522489468678, "loss": 3.234, "step": 65695 }, { "epoch": 4.46392172849572, "grad_norm": 0.9372333884239197, "learning_rate": 0.0004420097839380351, "loss": 3.4055, "step": 65700 }, { "epoch": 4.464261448566381, "grad_norm": 0.7636845111846924, "learning_rate": 0.0004419673189292023, "loss": 3.4936, "step": 65705 }, { "epoch": 4.464601168637043, "grad_norm": 0.7814468145370483, "learning_rate": 0.00044192485392036964, "loss": 3.4019, "step": 65710 }, { "epoch": 4.464940888707705, "grad_norm": 0.9036640524864197, "learning_rate": 0.0004418823889115369, "loss": 3.4396, "step": 65715 }, { "epoch": 4.465280608778366, "grad_norm": 1.068583369255066, "learning_rate": 0.00044183992390270415, "loss": 3.5664, "step": 65720 }, { "epoch": 4.465620328849028, "grad_norm": 1.1410255432128906, "learning_rate": 0.0004417974588938715, "loss": 3.4444, "step": 65725 }, { "epoch": 4.4659600489196905, "grad_norm": 0.8179012537002563, "learning_rate": 0.00044175499388503876, "loss": 3.3289, "step": 65730 }, { "epoch": 4.466299768990352, "grad_norm": 0.9046792387962341, "learning_rate": 0.000441712528876206, "loss": 3.4473, "step": 65735 }, { "epoch": 4.466639489061014, "grad_norm": 0.9371708035469055, "learning_rate": 0.00044167006386737327, "loss": 3.1384, "step": 65740 }, { "epoch": 4.466979209131676, "grad_norm": 1.0257292985916138, "learning_rate": 0.0004416275988585406, "loss": 3.3667, "step": 65745 }, { "epoch": 4.467318929202337, "grad_norm": 0.6881449222564697, "learning_rate": 0.00044158513384970783, "loss": 3.4212, "step": 65750 }, { "epoch": 4.467658649272999, "grad_norm": 0.888635516166687, "learning_rate": 0.0004415426688408751, "loss": 3.4424, "step": 65755 }, { "epoch": 4.467998369343661, "grad_norm": 0.7769893407821655, "learning_rate": 0.00044150020383204244, "loss": 3.7971, "step": 65760 }, { "epoch": 4.468338089414322, "grad_norm": 0.8105292320251465, "learning_rate": 0.00044145773882320967, "loss": 3.4989, "step": 65765 }, { "epoch": 4.468677809484984, "grad_norm": 0.9504432082176208, "learning_rate": 0.00044141527381437695, "loss": 3.6537, "step": 65770 }, { "epoch": 4.4690175295556465, "grad_norm": 0.7363313436508179, "learning_rate": 0.0004413728088055443, "loss": 3.5313, "step": 65775 }, { "epoch": 4.469357249626308, "grad_norm": 0.8558781743049622, "learning_rate": 0.0004413303437967115, "loss": 3.2675, "step": 65780 }, { "epoch": 4.46969696969697, "grad_norm": 0.810850977897644, "learning_rate": 0.0004412878787878788, "loss": 3.3422, "step": 65785 }, { "epoch": 4.470036689767632, "grad_norm": 0.812824547290802, "learning_rate": 0.00044124541377904607, "loss": 3.4168, "step": 65790 }, { "epoch": 4.470376409838293, "grad_norm": 0.9039174914360046, "learning_rate": 0.00044120294877021335, "loss": 3.6817, "step": 65795 }, { "epoch": 4.470716129908955, "grad_norm": 1.151842474937439, "learning_rate": 0.00044116048376138063, "loss": 3.4555, "step": 65800 }, { "epoch": 4.471055849979617, "grad_norm": 1.0198984146118164, "learning_rate": 0.0004411180187525479, "loss": 3.5422, "step": 65805 }, { "epoch": 4.471395570050278, "grad_norm": 0.9967764019966125, "learning_rate": 0.0004410755537437152, "loss": 3.6507, "step": 65810 }, { "epoch": 4.47173529012094, "grad_norm": 0.7329361438751221, "learning_rate": 0.00044103308873488247, "loss": 3.3682, "step": 65815 }, { "epoch": 4.4720750101916025, "grad_norm": 0.9602144956588745, "learning_rate": 0.00044099062372604975, "loss": 3.4216, "step": 65820 }, { "epoch": 4.472414730262264, "grad_norm": 1.0068944692611694, "learning_rate": 0.000440948158717217, "loss": 3.3587, "step": 65825 }, { "epoch": 4.472754450332926, "grad_norm": 0.9294483065605164, "learning_rate": 0.0004409056937083843, "loss": 3.4904, "step": 65830 }, { "epoch": 4.473094170403588, "grad_norm": 0.978385865688324, "learning_rate": 0.0004408632286995516, "loss": 3.2934, "step": 65835 }, { "epoch": 4.473433890474249, "grad_norm": 0.9369053244590759, "learning_rate": 0.00044082076369071887, "loss": 3.5831, "step": 65840 }, { "epoch": 4.473773610544911, "grad_norm": 1.0030407905578613, "learning_rate": 0.00044077829868188615, "loss": 3.2767, "step": 65845 }, { "epoch": 4.474113330615573, "grad_norm": 0.9315404891967773, "learning_rate": 0.00044073583367305343, "loss": 3.2655, "step": 65850 }, { "epoch": 4.474453050686234, "grad_norm": 1.962358832359314, "learning_rate": 0.0004406933686642207, "loss": 3.2, "step": 65855 }, { "epoch": 4.474792770756896, "grad_norm": 1.1417176723480225, "learning_rate": 0.00044065090365538794, "loss": 3.3218, "step": 65860 }, { "epoch": 4.4751324908275585, "grad_norm": 0.8394288420677185, "learning_rate": 0.00044060843864655527, "loss": 3.4783, "step": 65865 }, { "epoch": 4.47547221089822, "grad_norm": 0.7305606603622437, "learning_rate": 0.00044056597363772255, "loss": 3.418, "step": 65870 }, { "epoch": 4.475811930968882, "grad_norm": 1.2269339561462402, "learning_rate": 0.0004405235086288898, "loss": 3.5639, "step": 65875 }, { "epoch": 4.476151651039544, "grad_norm": 1.099797248840332, "learning_rate": 0.0004404810436200571, "loss": 3.4951, "step": 65880 }, { "epoch": 4.476491371110205, "grad_norm": 1.0426537990570068, "learning_rate": 0.0004404385786112244, "loss": 3.3501, "step": 65885 }, { "epoch": 4.476831091180867, "grad_norm": 5.750442028045654, "learning_rate": 0.0004403961136023916, "loss": 3.7314, "step": 65890 }, { "epoch": 4.477170811251529, "grad_norm": 0.8145560026168823, "learning_rate": 0.0004403536485935589, "loss": 3.5004, "step": 65895 }, { "epoch": 4.47751053132219, "grad_norm": 1.0038005113601685, "learning_rate": 0.00044031118358472623, "loss": 3.3481, "step": 65900 }, { "epoch": 4.477850251392852, "grad_norm": 1.0480880737304688, "learning_rate": 0.00044026871857589346, "loss": 3.427, "step": 65905 }, { "epoch": 4.4781899714635145, "grad_norm": 0.7815715074539185, "learning_rate": 0.00044022625356706074, "loss": 3.2897, "step": 65910 }, { "epoch": 4.478529691534176, "grad_norm": 0.7847720384597778, "learning_rate": 0.00044018378855822807, "loss": 3.6785, "step": 65915 }, { "epoch": 4.478869411604838, "grad_norm": 1.0112708806991577, "learning_rate": 0.0004401413235493953, "loss": 3.2978, "step": 65920 }, { "epoch": 4.4792091316755, "grad_norm": 0.8791046142578125, "learning_rate": 0.0004400988585405626, "loss": 3.5416, "step": 65925 }, { "epoch": 4.479548851746161, "grad_norm": 0.8541334271430969, "learning_rate": 0.00044005639353172986, "loss": 3.4144, "step": 65930 }, { "epoch": 4.479888571816823, "grad_norm": 1.0964266061782837, "learning_rate": 0.00044001392852289714, "loss": 3.4693, "step": 65935 }, { "epoch": 4.480228291887485, "grad_norm": 0.7227388620376587, "learning_rate": 0.0004399714635140644, "loss": 3.4547, "step": 65940 }, { "epoch": 4.480568011958146, "grad_norm": 1.1640129089355469, "learning_rate": 0.0004399289985052317, "loss": 3.3635, "step": 65945 }, { "epoch": 4.480907732028808, "grad_norm": 0.9079115390777588, "learning_rate": 0.000439886533496399, "loss": 3.5078, "step": 65950 }, { "epoch": 4.4812474520994705, "grad_norm": 0.9792580008506775, "learning_rate": 0.00043984406848756626, "loss": 3.5263, "step": 65955 }, { "epoch": 4.481587172170132, "grad_norm": 0.9809687733650208, "learning_rate": 0.00043980160347873354, "loss": 3.5562, "step": 65960 }, { "epoch": 4.481926892240794, "grad_norm": 0.9644753932952881, "learning_rate": 0.00043975913846990076, "loss": 3.4382, "step": 65965 }, { "epoch": 4.482266612311455, "grad_norm": 0.9283496737480164, "learning_rate": 0.0004397166734610681, "loss": 3.3248, "step": 65970 }, { "epoch": 4.482606332382117, "grad_norm": 0.9568507671356201, "learning_rate": 0.0004396742084522354, "loss": 3.487, "step": 65975 }, { "epoch": 4.482946052452779, "grad_norm": 0.8961830139160156, "learning_rate": 0.0004396317434434026, "loss": 3.4706, "step": 65980 }, { "epoch": 4.48328577252344, "grad_norm": 1.1774773597717285, "learning_rate": 0.00043958927843456994, "loss": 3.6005, "step": 65985 }, { "epoch": 4.483625492594102, "grad_norm": 0.7419503331184387, "learning_rate": 0.0004395468134257372, "loss": 3.4128, "step": 65990 }, { "epoch": 4.483965212664764, "grad_norm": 0.8950122594833374, "learning_rate": 0.00043950434841690444, "loss": 3.4079, "step": 65995 }, { "epoch": 4.484304932735426, "grad_norm": 1.3669229745864868, "learning_rate": 0.0004394618834080717, "loss": 3.3542, "step": 66000 }, { "epoch": 4.484644652806088, "grad_norm": 1.194487452507019, "learning_rate": 0.00043941941839923906, "loss": 3.6456, "step": 66005 }, { "epoch": 4.48498437287675, "grad_norm": 0.976218044757843, "learning_rate": 0.00043937695339040634, "loss": 3.3337, "step": 66010 }, { "epoch": 4.485324092947411, "grad_norm": 0.8585435748100281, "learning_rate": 0.00043933448838157356, "loss": 3.5737, "step": 66015 }, { "epoch": 4.485663813018073, "grad_norm": 0.8201165795326233, "learning_rate": 0.0004392920233727409, "loss": 3.4264, "step": 66020 }, { "epoch": 4.486003533088735, "grad_norm": 0.8898891806602478, "learning_rate": 0.0004392495583639082, "loss": 3.3647, "step": 66025 }, { "epoch": 4.486343253159396, "grad_norm": 1.0027565956115723, "learning_rate": 0.0004392070933550754, "loss": 3.403, "step": 66030 }, { "epoch": 4.486682973230058, "grad_norm": 1.0746222734451294, "learning_rate": 0.00043916462834624274, "loss": 3.371, "step": 66035 }, { "epoch": 4.4870226933007205, "grad_norm": 1.0924683809280396, "learning_rate": 0.00043912216333741, "loss": 3.6391, "step": 66040 }, { "epoch": 4.487362413371382, "grad_norm": 1.2874822616577148, "learning_rate": 0.00043907969832857724, "loss": 3.5773, "step": 66045 }, { "epoch": 4.487702133442044, "grad_norm": 0.8159123659133911, "learning_rate": 0.0004390372333197445, "loss": 3.5776, "step": 66050 }, { "epoch": 4.488041853512706, "grad_norm": 0.8107771873474121, "learning_rate": 0.00043899476831091186, "loss": 3.2825, "step": 66055 }, { "epoch": 4.488381573583367, "grad_norm": 0.8560663461685181, "learning_rate": 0.0004389523033020791, "loss": 3.4484, "step": 66060 }, { "epoch": 4.488721293654029, "grad_norm": 0.7566354274749756, "learning_rate": 0.00043890983829324636, "loss": 3.5723, "step": 66065 }, { "epoch": 4.489061013724691, "grad_norm": 1.0398342609405518, "learning_rate": 0.0004388673732844137, "loss": 3.638, "step": 66070 }, { "epoch": 4.489400733795352, "grad_norm": 1.0090651512145996, "learning_rate": 0.0004388249082755809, "loss": 3.4901, "step": 66075 }, { "epoch": 4.489740453866014, "grad_norm": 0.8337435126304626, "learning_rate": 0.0004387824432667482, "loss": 3.348, "step": 66080 }, { "epoch": 4.4900801739366765, "grad_norm": 0.9805718660354614, "learning_rate": 0.0004387399782579155, "loss": 3.508, "step": 66085 }, { "epoch": 4.490419894007338, "grad_norm": 0.9576119184494019, "learning_rate": 0.00043869751324908277, "loss": 3.5723, "step": 66090 }, { "epoch": 4.490759614078, "grad_norm": 0.9095420837402344, "learning_rate": 0.00043865504824025005, "loss": 3.2335, "step": 66095 }, { "epoch": 4.491099334148662, "grad_norm": 0.9509983658790588, "learning_rate": 0.0004386125832314173, "loss": 3.3261, "step": 66100 }, { "epoch": 4.491439054219323, "grad_norm": 0.728829562664032, "learning_rate": 0.0004385701182225846, "loss": 3.4469, "step": 66105 }, { "epoch": 4.491778774289985, "grad_norm": 1.111512303352356, "learning_rate": 0.0004385276532137519, "loss": 3.3965, "step": 66110 }, { "epoch": 4.492118494360647, "grad_norm": 0.8686105608940125, "learning_rate": 0.00043848518820491917, "loss": 3.4867, "step": 66115 }, { "epoch": 4.492458214431308, "grad_norm": 0.8222286701202393, "learning_rate": 0.0004384427231960864, "loss": 3.7651, "step": 66120 }, { "epoch": 4.49279793450197, "grad_norm": 0.6755975484848022, "learning_rate": 0.0004384002581872537, "loss": 3.4555, "step": 66125 }, { "epoch": 4.4931376545726325, "grad_norm": 1.147624135017395, "learning_rate": 0.000438357793178421, "loss": 3.4567, "step": 66130 }, { "epoch": 4.493477374643294, "grad_norm": 0.9702037572860718, "learning_rate": 0.00043831532816958823, "loss": 3.4348, "step": 66135 }, { "epoch": 4.493817094713956, "grad_norm": 0.7835829257965088, "learning_rate": 0.00043827286316075557, "loss": 3.5303, "step": 66140 }, { "epoch": 4.494156814784618, "grad_norm": 0.8331335186958313, "learning_rate": 0.00043823039815192285, "loss": 3.4823, "step": 66145 }, { "epoch": 4.494496534855279, "grad_norm": 0.8219634294509888, "learning_rate": 0.00043818793314309007, "loss": 3.5153, "step": 66150 }, { "epoch": 4.494836254925941, "grad_norm": 0.7200407981872559, "learning_rate": 0.00043814546813425735, "loss": 3.4107, "step": 66155 }, { "epoch": 4.495175974996603, "grad_norm": 0.9197948575019836, "learning_rate": 0.0004381030031254247, "loss": 3.6076, "step": 66160 }, { "epoch": 4.495515695067264, "grad_norm": 0.9910964965820312, "learning_rate": 0.0004380605381165919, "loss": 3.4201, "step": 66165 }, { "epoch": 4.495855415137926, "grad_norm": 1.0844299793243408, "learning_rate": 0.0004380180731077592, "loss": 3.3236, "step": 66170 }, { "epoch": 4.4961951352085885, "grad_norm": 1.0974396467208862, "learning_rate": 0.0004379756080989265, "loss": 3.5541, "step": 66175 }, { "epoch": 4.49653485527925, "grad_norm": 0.8741354942321777, "learning_rate": 0.0004379331430900938, "loss": 3.4041, "step": 66180 }, { "epoch": 4.496874575349912, "grad_norm": 1.0638731718063354, "learning_rate": 0.00043789067808126103, "loss": 3.3409, "step": 66185 }, { "epoch": 4.497214295420574, "grad_norm": 0.8131060004234314, "learning_rate": 0.0004378482130724283, "loss": 3.7395, "step": 66190 }, { "epoch": 4.497554015491235, "grad_norm": 0.8135803937911987, "learning_rate": 0.00043780574806359565, "loss": 3.6257, "step": 66195 }, { "epoch": 4.497893735561897, "grad_norm": 1.1259260177612305, "learning_rate": 0.00043776328305476287, "loss": 3.5372, "step": 66200 }, { "epoch": 4.498233455632559, "grad_norm": 1.0854525566101074, "learning_rate": 0.00043772081804593015, "loss": 3.3179, "step": 66205 }, { "epoch": 4.49857317570322, "grad_norm": 0.8077121376991272, "learning_rate": 0.0004376783530370975, "loss": 3.669, "step": 66210 }, { "epoch": 4.498912895773882, "grad_norm": 0.8429264426231384, "learning_rate": 0.0004376358880282647, "loss": 3.3968, "step": 66215 }, { "epoch": 4.4992526158445445, "grad_norm": 0.7988156080245972, "learning_rate": 0.000437593423019432, "loss": 3.4344, "step": 66220 }, { "epoch": 4.499592335915206, "grad_norm": 1.184632658958435, "learning_rate": 0.0004375509580105993, "loss": 3.6033, "step": 66225 }, { "epoch": 4.499932055985868, "grad_norm": 0.771209716796875, "learning_rate": 0.00043750849300176655, "loss": 3.5254, "step": 66230 }, { "epoch": 4.500271776056529, "grad_norm": 0.8359200358390808, "learning_rate": 0.00043746602799293383, "loss": 3.3722, "step": 66235 }, { "epoch": 4.500611496127191, "grad_norm": 1.0414351224899292, "learning_rate": 0.0004374235629841011, "loss": 3.2538, "step": 66240 }, { "epoch": 4.500951216197853, "grad_norm": 1.0110838413238525, "learning_rate": 0.0004373810979752684, "loss": 3.5275, "step": 66245 }, { "epoch": 4.501290936268514, "grad_norm": 0.9511992335319519, "learning_rate": 0.0004373386329664357, "loss": 3.4831, "step": 66250 }, { "epoch": 4.501630656339176, "grad_norm": 0.8751468062400818, "learning_rate": 0.00043729616795760295, "loss": 3.1557, "step": 66255 }, { "epoch": 4.501970376409838, "grad_norm": 0.8245137333869934, "learning_rate": 0.0004372537029487702, "loss": 3.4178, "step": 66260 }, { "epoch": 4.5023100964805, "grad_norm": 0.807417094707489, "learning_rate": 0.0004372112379399375, "loss": 3.3439, "step": 66265 }, { "epoch": 4.502649816551162, "grad_norm": 0.8834036588668823, "learning_rate": 0.0004371687729311048, "loss": 3.3613, "step": 66270 }, { "epoch": 4.502989536621824, "grad_norm": 0.8848956823348999, "learning_rate": 0.000437126307922272, "loss": 3.4967, "step": 66275 }, { "epoch": 4.503329256692485, "grad_norm": 1.1724224090576172, "learning_rate": 0.00043708384291343935, "loss": 3.4971, "step": 66280 }, { "epoch": 4.503668976763147, "grad_norm": 0.9957519173622131, "learning_rate": 0.00043704137790460663, "loss": 3.5982, "step": 66285 }, { "epoch": 4.504008696833809, "grad_norm": 0.9878401160240173, "learning_rate": 0.00043699891289577386, "loss": 3.6778, "step": 66290 }, { "epoch": 4.50434841690447, "grad_norm": 0.8867027163505554, "learning_rate": 0.00043695644788694114, "loss": 3.4274, "step": 66295 }, { "epoch": 4.504688136975132, "grad_norm": 0.8326494097709656, "learning_rate": 0.0004369139828781085, "loss": 3.5251, "step": 66300 }, { "epoch": 4.5050278570457944, "grad_norm": 0.7408300042152405, "learning_rate": 0.0004368715178692757, "loss": 3.5677, "step": 66305 }, { "epoch": 4.505367577116456, "grad_norm": 0.8756265044212341, "learning_rate": 0.000436829052860443, "loss": 3.2307, "step": 66310 }, { "epoch": 4.505707297187118, "grad_norm": 0.9377771615982056, "learning_rate": 0.0004367865878516103, "loss": 3.4558, "step": 66315 }, { "epoch": 4.50604701725778, "grad_norm": 0.9203988313674927, "learning_rate": 0.00043674412284277754, "loss": 3.5058, "step": 66320 }, { "epoch": 4.506386737328441, "grad_norm": 0.850809633731842, "learning_rate": 0.0004367016578339448, "loss": 3.6603, "step": 66325 }, { "epoch": 4.506726457399103, "grad_norm": 0.8915470242500305, "learning_rate": 0.00043665919282511215, "loss": 3.44, "step": 66330 }, { "epoch": 4.507066177469765, "grad_norm": 0.7421795725822449, "learning_rate": 0.0004366167278162794, "loss": 3.5953, "step": 66335 }, { "epoch": 4.507405897540426, "grad_norm": 1.2483971118927002, "learning_rate": 0.00043657426280744666, "loss": 3.0663, "step": 66340 }, { "epoch": 4.507745617611088, "grad_norm": 0.9260044097900391, "learning_rate": 0.00043653179779861394, "loss": 3.7313, "step": 66345 }, { "epoch": 4.5080853376817505, "grad_norm": 0.8949269652366638, "learning_rate": 0.0004364893327897812, "loss": 3.4812, "step": 66350 }, { "epoch": 4.508425057752412, "grad_norm": 1.0003306865692139, "learning_rate": 0.0004364468677809485, "loss": 3.5219, "step": 66355 }, { "epoch": 4.508764777823074, "grad_norm": 0.6728094816207886, "learning_rate": 0.0004364044027721158, "loss": 3.6028, "step": 66360 }, { "epoch": 4.509104497893736, "grad_norm": 0.8707903027534485, "learning_rate": 0.0004363619377632831, "loss": 3.7663, "step": 66365 }, { "epoch": 4.509444217964397, "grad_norm": 1.1475162506103516, "learning_rate": 0.00043631947275445034, "loss": 3.5086, "step": 66370 }, { "epoch": 4.509783938035059, "grad_norm": 0.9056214094161987, "learning_rate": 0.0004362770077456176, "loss": 3.3424, "step": 66375 }, { "epoch": 4.510123658105721, "grad_norm": 0.8202046751976013, "learning_rate": 0.0004362345427367849, "loss": 3.5289, "step": 66380 }, { "epoch": 4.510463378176382, "grad_norm": 0.9594817161560059, "learning_rate": 0.0004361920777279522, "loss": 3.6661, "step": 66385 }, { "epoch": 4.510803098247044, "grad_norm": 1.0026971101760864, "learning_rate": 0.00043614961271911946, "loss": 3.5113, "step": 66390 }, { "epoch": 4.5111428183177065, "grad_norm": 0.7385213971138, "learning_rate": 0.00043610714771028674, "loss": 3.3318, "step": 66395 }, { "epoch": 4.511482538388368, "grad_norm": 1.0562478303909302, "learning_rate": 0.000436064682701454, "loss": 3.2828, "step": 66400 }, { "epoch": 4.51182225845903, "grad_norm": 1.2695852518081665, "learning_rate": 0.0004360222176926213, "loss": 3.553, "step": 66405 }, { "epoch": 4.512161978529692, "grad_norm": 1.3713313341140747, "learning_rate": 0.0004359797526837886, "loss": 3.4712, "step": 66410 }, { "epoch": 4.512501698600353, "grad_norm": 0.8170470595359802, "learning_rate": 0.0004359372876749558, "loss": 3.4437, "step": 66415 }, { "epoch": 4.512841418671015, "grad_norm": 1.041155457496643, "learning_rate": 0.00043589482266612314, "loss": 3.4742, "step": 66420 }, { "epoch": 4.513181138741677, "grad_norm": 0.8515216708183289, "learning_rate": 0.0004358523576572904, "loss": 3.3389, "step": 66425 }, { "epoch": 4.513520858812338, "grad_norm": 0.9591215252876282, "learning_rate": 0.00043580989264845765, "loss": 3.302, "step": 66430 }, { "epoch": 4.513860578883, "grad_norm": 0.8770819902420044, "learning_rate": 0.000435767427639625, "loss": 3.6477, "step": 66435 }, { "epoch": 4.5142002989536625, "grad_norm": 0.9640154838562012, "learning_rate": 0.00043572496263079226, "loss": 3.6178, "step": 66440 }, { "epoch": 4.514540019024324, "grad_norm": 0.9260295629501343, "learning_rate": 0.0004356824976219595, "loss": 3.443, "step": 66445 }, { "epoch": 4.514879739094986, "grad_norm": 0.9172662496566772, "learning_rate": 0.00043564003261312677, "loss": 3.5585, "step": 66450 }, { "epoch": 4.515219459165648, "grad_norm": 0.8893214464187622, "learning_rate": 0.0004355975676042941, "loss": 3.3658, "step": 66455 }, { "epoch": 4.515559179236309, "grad_norm": 0.7086695432662964, "learning_rate": 0.00043555510259546133, "loss": 3.4416, "step": 66460 }, { "epoch": 4.515898899306971, "grad_norm": 0.7693719267845154, "learning_rate": 0.0004355126375866286, "loss": 3.5764, "step": 66465 }, { "epoch": 4.516238619377633, "grad_norm": 0.8776002526283264, "learning_rate": 0.00043547017257779594, "loss": 3.5417, "step": 66470 }, { "epoch": 4.516578339448294, "grad_norm": 1.1325730085372925, "learning_rate": 0.00043542770756896317, "loss": 3.2041, "step": 66475 }, { "epoch": 4.516918059518956, "grad_norm": 0.8800426125526428, "learning_rate": 0.00043538524256013045, "loss": 3.2383, "step": 66480 }, { "epoch": 4.5172577795896185, "grad_norm": 0.7938007712364197, "learning_rate": 0.00043534277755129773, "loss": 3.4293, "step": 66485 }, { "epoch": 4.51759749966028, "grad_norm": 0.8930371403694153, "learning_rate": 0.000435300312542465, "loss": 3.5067, "step": 66490 }, { "epoch": 4.517937219730942, "grad_norm": 0.9529256224632263, "learning_rate": 0.0004352578475336323, "loss": 3.4535, "step": 66495 }, { "epoch": 4.518276939801604, "grad_norm": 0.9461964964866638, "learning_rate": 0.00043521538252479957, "loss": 3.5666, "step": 66500 }, { "epoch": 4.518616659872265, "grad_norm": 0.722118079662323, "learning_rate": 0.00043517291751596685, "loss": 3.3239, "step": 66505 }, { "epoch": 4.518956379942927, "grad_norm": 1.0842740535736084, "learning_rate": 0.00043513045250713413, "loss": 3.5684, "step": 66510 }, { "epoch": 4.519296100013589, "grad_norm": 0.9255064725875854, "learning_rate": 0.0004350879874983014, "loss": 3.4622, "step": 66515 }, { "epoch": 4.51963582008425, "grad_norm": 0.8189423084259033, "learning_rate": 0.00043504552248946863, "loss": 3.5544, "step": 66520 }, { "epoch": 4.519975540154912, "grad_norm": 0.8148610591888428, "learning_rate": 0.00043500305748063597, "loss": 3.5503, "step": 66525 }, { "epoch": 4.5203152602255745, "grad_norm": 0.7528272271156311, "learning_rate": 0.00043496059247180325, "loss": 3.5459, "step": 66530 }, { "epoch": 4.520654980296236, "grad_norm": 0.757683515548706, "learning_rate": 0.00043491812746297053, "loss": 3.3432, "step": 66535 }, { "epoch": 4.520994700366898, "grad_norm": 0.9672046899795532, "learning_rate": 0.0004348756624541378, "loss": 3.6097, "step": 66540 }, { "epoch": 4.52133442043756, "grad_norm": 0.8043573498725891, "learning_rate": 0.0004348331974453051, "loss": 3.4675, "step": 66545 }, { "epoch": 4.521674140508221, "grad_norm": 0.9672068357467651, "learning_rate": 0.00043479073243647237, "loss": 3.5262, "step": 66550 }, { "epoch": 4.522013860578883, "grad_norm": 0.8367587327957153, "learning_rate": 0.0004347482674276396, "loss": 3.4286, "step": 66555 }, { "epoch": 4.522353580649545, "grad_norm": 0.8188043236732483, "learning_rate": 0.00043470580241880693, "loss": 3.3617, "step": 66560 }, { "epoch": 4.522693300720206, "grad_norm": 0.843798041343689, "learning_rate": 0.0004346633374099742, "loss": 3.481, "step": 66565 }, { "epoch": 4.523033020790868, "grad_norm": 0.7933496236801147, "learning_rate": 0.00043462087240114144, "loss": 3.7504, "step": 66570 }, { "epoch": 4.5233727408615305, "grad_norm": 0.9600605368614197, "learning_rate": 0.00043457840739230877, "loss": 3.2795, "step": 66575 }, { "epoch": 4.523712460932192, "grad_norm": 0.732108473777771, "learning_rate": 0.00043453594238347605, "loss": 3.4566, "step": 66580 }, { "epoch": 4.524052181002854, "grad_norm": 0.9429569840431213, "learning_rate": 0.0004344934773746433, "loss": 3.4503, "step": 66585 }, { "epoch": 4.524391901073516, "grad_norm": 0.7102686762809753, "learning_rate": 0.00043445101236581056, "loss": 3.6884, "step": 66590 }, { "epoch": 4.524731621144177, "grad_norm": 0.6802005171775818, "learning_rate": 0.0004344085473569779, "loss": 3.3665, "step": 66595 }, { "epoch": 4.525071341214839, "grad_norm": 0.925494372844696, "learning_rate": 0.0004343660823481451, "loss": 3.6666, "step": 66600 }, { "epoch": 4.525411061285501, "grad_norm": 1.1452659368515015, "learning_rate": 0.0004343236173393124, "loss": 3.6077, "step": 66605 }, { "epoch": 4.525750781356162, "grad_norm": 0.9466968178749084, "learning_rate": 0.00043428115233047973, "loss": 3.4309, "step": 66610 }, { "epoch": 4.5260905014268245, "grad_norm": 1.0045777559280396, "learning_rate": 0.00043423868732164696, "loss": 3.4496, "step": 66615 }, { "epoch": 4.5264302214974865, "grad_norm": 0.9907852411270142, "learning_rate": 0.00043419622231281424, "loss": 3.3222, "step": 66620 }, { "epoch": 4.526769941568148, "grad_norm": 0.658758819103241, "learning_rate": 0.00043415375730398157, "loss": 3.2718, "step": 66625 }, { "epoch": 4.52710966163881, "grad_norm": 1.2165133953094482, "learning_rate": 0.0004341112922951488, "loss": 3.4049, "step": 66630 }, { "epoch": 4.527449381709472, "grad_norm": 0.8876256942749023, "learning_rate": 0.0004340688272863161, "loss": 3.434, "step": 66635 }, { "epoch": 4.527789101780133, "grad_norm": 0.7738040089607239, "learning_rate": 0.00043402636227748336, "loss": 3.3506, "step": 66640 }, { "epoch": 4.528128821850795, "grad_norm": 0.9086374640464783, "learning_rate": 0.00043398389726865064, "loss": 3.5073, "step": 66645 }, { "epoch": 4.528468541921457, "grad_norm": 0.760643720626831, "learning_rate": 0.0004339414322598179, "loss": 3.4905, "step": 66650 }, { "epoch": 4.528808261992118, "grad_norm": 0.7796370983123779, "learning_rate": 0.0004338989672509852, "loss": 3.5582, "step": 66655 }, { "epoch": 4.5291479820627805, "grad_norm": 0.7682840824127197, "learning_rate": 0.0004338565022421525, "loss": 3.3667, "step": 66660 }, { "epoch": 4.5294877021334425, "grad_norm": 0.9640697240829468, "learning_rate": 0.00043381403723331976, "loss": 3.4957, "step": 66665 }, { "epoch": 4.529827422204104, "grad_norm": 0.9150825142860413, "learning_rate": 0.00043377157222448704, "loss": 3.6277, "step": 66670 }, { "epoch": 4.530167142274766, "grad_norm": 0.9334050416946411, "learning_rate": 0.00043372910721565426, "loss": 3.6875, "step": 66675 }, { "epoch": 4.530506862345427, "grad_norm": 0.9090102314949036, "learning_rate": 0.0004336866422068216, "loss": 3.6752, "step": 66680 }, { "epoch": 4.530846582416089, "grad_norm": 0.8971827030181885, "learning_rate": 0.0004336441771979889, "loss": 3.5559, "step": 66685 }, { "epoch": 4.531186302486751, "grad_norm": 0.8988327383995056, "learning_rate": 0.0004336017121891561, "loss": 3.432, "step": 66690 }, { "epoch": 4.531526022557412, "grad_norm": 0.8552560806274414, "learning_rate": 0.00043355924718032344, "loss": 3.4808, "step": 66695 }, { "epoch": 4.531865742628074, "grad_norm": 0.708320677280426, "learning_rate": 0.0004335167821714907, "loss": 3.4108, "step": 66700 }, { "epoch": 4.5322054626987365, "grad_norm": 0.980912983417511, "learning_rate": 0.000433474317162658, "loss": 3.5147, "step": 66705 }, { "epoch": 4.532545182769398, "grad_norm": 0.9449055790901184, "learning_rate": 0.0004334318521538252, "loss": 3.5526, "step": 66710 }, { "epoch": 4.53288490284006, "grad_norm": 1.0668842792510986, "learning_rate": 0.00043338938714499256, "loss": 3.3522, "step": 66715 }, { "epoch": 4.533224622910722, "grad_norm": 1.159994125366211, "learning_rate": 0.00043334692213615984, "loss": 3.685, "step": 66720 }, { "epoch": 4.533564342981383, "grad_norm": 0.9329102635383606, "learning_rate": 0.00043330445712732706, "loss": 3.5058, "step": 66725 }, { "epoch": 4.533904063052045, "grad_norm": 0.9496855139732361, "learning_rate": 0.0004332619921184944, "loss": 3.4115, "step": 66730 }, { "epoch": 4.534243783122707, "grad_norm": 0.7862743735313416, "learning_rate": 0.0004332195271096617, "loss": 3.4623, "step": 66735 }, { "epoch": 4.534583503193368, "grad_norm": 0.7804041504859924, "learning_rate": 0.0004331770621008289, "loss": 3.5089, "step": 66740 }, { "epoch": 4.53492322326403, "grad_norm": 0.8277208805084229, "learning_rate": 0.0004331345970919962, "loss": 3.6347, "step": 66745 }, { "epoch": 4.5352629433346925, "grad_norm": 1.4659042358398438, "learning_rate": 0.0004330921320831635, "loss": 3.5089, "step": 66750 }, { "epoch": 4.535602663405354, "grad_norm": 0.9748002886772156, "learning_rate": 0.00043304966707433074, "loss": 3.3099, "step": 66755 }, { "epoch": 4.535942383476016, "grad_norm": 0.7658646702766418, "learning_rate": 0.000433007202065498, "loss": 3.5651, "step": 66760 }, { "epoch": 4.536282103546678, "grad_norm": 0.7752154469490051, "learning_rate": 0.00043296473705666536, "loss": 3.2891, "step": 66765 }, { "epoch": 4.536621823617339, "grad_norm": 0.8406447768211365, "learning_rate": 0.0004329222720478326, "loss": 3.3332, "step": 66770 }, { "epoch": 4.536961543688001, "grad_norm": 0.931098461151123, "learning_rate": 0.00043287980703899986, "loss": 3.5949, "step": 66775 }, { "epoch": 4.537301263758663, "grad_norm": 0.8848597407341003, "learning_rate": 0.00043283734203016714, "loss": 3.2596, "step": 66780 }, { "epoch": 4.537640983829324, "grad_norm": 1.1774585247039795, "learning_rate": 0.0004327948770213344, "loss": 3.6514, "step": 66785 }, { "epoch": 4.537980703899986, "grad_norm": 1.0215868949890137, "learning_rate": 0.0004327524120125017, "loss": 3.5618, "step": 66790 }, { "epoch": 4.5383204239706485, "grad_norm": 0.9636752009391785, "learning_rate": 0.000432709947003669, "loss": 3.6018, "step": 66795 }, { "epoch": 4.53866014404131, "grad_norm": 0.982258141040802, "learning_rate": 0.00043266748199483627, "loss": 3.5931, "step": 66800 }, { "epoch": 4.538999864111972, "grad_norm": 0.9146232604980469, "learning_rate": 0.00043262501698600355, "loss": 3.6283, "step": 66805 }, { "epoch": 4.539339584182634, "grad_norm": 0.8624289035797119, "learning_rate": 0.0004325825519771708, "loss": 3.9439, "step": 66810 }, { "epoch": 4.539679304253295, "grad_norm": 1.0013322830200195, "learning_rate": 0.00043254008696833805, "loss": 3.6557, "step": 66815 }, { "epoch": 4.540019024323957, "grad_norm": 0.8071323037147522, "learning_rate": 0.0004324976219595054, "loss": 3.5187, "step": 66820 }, { "epoch": 4.540358744394619, "grad_norm": 0.7959896326065063, "learning_rate": 0.00043245515695067267, "loss": 3.5998, "step": 66825 }, { "epoch": 4.54069846446528, "grad_norm": 0.8444793224334717, "learning_rate": 0.0004324126919418399, "loss": 3.4634, "step": 66830 }, { "epoch": 4.541038184535942, "grad_norm": 1.0232434272766113, "learning_rate": 0.0004323702269330072, "loss": 3.5174, "step": 66835 }, { "epoch": 4.5413779046066045, "grad_norm": 0.8612986207008362, "learning_rate": 0.0004323277619241745, "loss": 3.4453, "step": 66840 }, { "epoch": 4.541717624677266, "grad_norm": 1.2115777730941772, "learning_rate": 0.00043228529691534173, "loss": 3.3922, "step": 66845 }, { "epoch": 4.542057344747928, "grad_norm": 0.9160543084144592, "learning_rate": 0.000432242831906509, "loss": 3.6223, "step": 66850 }, { "epoch": 4.54239706481859, "grad_norm": 0.8424810767173767, "learning_rate": 0.00043220036689767635, "loss": 3.4271, "step": 66855 }, { "epoch": 4.542736784889251, "grad_norm": 0.7096657156944275, "learning_rate": 0.00043215790188884357, "loss": 3.4862, "step": 66860 }, { "epoch": 4.543076504959913, "grad_norm": 0.9247938990592957, "learning_rate": 0.00043211543688001085, "loss": 3.5417, "step": 66865 }, { "epoch": 4.543416225030575, "grad_norm": 1.4372491836547852, "learning_rate": 0.0004320729718711782, "loss": 3.4718, "step": 66870 }, { "epoch": 4.543755945101236, "grad_norm": 0.7821809649467468, "learning_rate": 0.00043203050686234547, "loss": 3.5486, "step": 66875 }, { "epoch": 4.5440956651718984, "grad_norm": 0.8051093816757202, "learning_rate": 0.0004319880418535127, "loss": 3.631, "step": 66880 }, { "epoch": 4.54443538524256, "grad_norm": 0.9351738095283508, "learning_rate": 0.00043194557684468, "loss": 3.45, "step": 66885 }, { "epoch": 4.544775105313222, "grad_norm": 1.1868656873703003, "learning_rate": 0.0004319031118358473, "loss": 3.3131, "step": 66890 }, { "epoch": 4.545114825383884, "grad_norm": 0.7916035652160645, "learning_rate": 0.00043186064682701453, "loss": 3.3709, "step": 66895 }, { "epoch": 4.545454545454545, "grad_norm": 0.9459665417671204, "learning_rate": 0.0004318181818181818, "loss": 3.3845, "step": 66900 }, { "epoch": 4.545794265525207, "grad_norm": 0.9013563990592957, "learning_rate": 0.00043177571680934915, "loss": 3.428, "step": 66905 }, { "epoch": 4.546133985595869, "grad_norm": 0.9774910807609558, "learning_rate": 0.00043173325180051637, "loss": 3.6417, "step": 66910 }, { "epoch": 4.54647370566653, "grad_norm": 1.0090702772140503, "learning_rate": 0.00043169078679168365, "loss": 3.4969, "step": 66915 }, { "epoch": 4.546813425737192, "grad_norm": 0.8805792331695557, "learning_rate": 0.000431648321782851, "loss": 3.4828, "step": 66920 }, { "epoch": 4.5471531458078545, "grad_norm": 0.7668918967247009, "learning_rate": 0.0004316058567740182, "loss": 3.651, "step": 66925 }, { "epoch": 4.547492865878516, "grad_norm": 0.9279001951217651, "learning_rate": 0.0004315633917651855, "loss": 3.6542, "step": 66930 }, { "epoch": 4.547832585949178, "grad_norm": 0.7266154289245605, "learning_rate": 0.0004315209267563528, "loss": 3.3583, "step": 66935 }, { "epoch": 4.54817230601984, "grad_norm": 1.2068816423416138, "learning_rate": 0.00043147846174752005, "loss": 3.3139, "step": 66940 }, { "epoch": 4.548512026090501, "grad_norm": 0.8246268033981323, "learning_rate": 0.00043143599673868733, "loss": 3.5793, "step": 66945 }, { "epoch": 4.548851746161163, "grad_norm": 1.0142943859100342, "learning_rate": 0.0004313935317298546, "loss": 3.6196, "step": 66950 }, { "epoch": 4.549191466231825, "grad_norm": 1.0291627645492554, "learning_rate": 0.0004313510667210219, "loss": 3.5292, "step": 66955 }, { "epoch": 4.549531186302486, "grad_norm": 0.8368141651153564, "learning_rate": 0.0004313086017121892, "loss": 3.5346, "step": 66960 }, { "epoch": 4.549870906373148, "grad_norm": 0.968543291091919, "learning_rate": 0.00043126613670335645, "loss": 3.4048, "step": 66965 }, { "epoch": 4.5502106264438105, "grad_norm": 0.8095539808273315, "learning_rate": 0.0004312236716945237, "loss": 3.4625, "step": 66970 }, { "epoch": 4.550550346514472, "grad_norm": 0.9477177262306213, "learning_rate": 0.000431181206685691, "loss": 3.3573, "step": 66975 }, { "epoch": 4.550890066585134, "grad_norm": 0.8332407474517822, "learning_rate": 0.0004311387416768583, "loss": 3.4069, "step": 66980 }, { "epoch": 4.551229786655796, "grad_norm": 0.8267947435379028, "learning_rate": 0.0004310962766680255, "loss": 3.5259, "step": 66985 }, { "epoch": 4.551569506726457, "grad_norm": 1.01543128490448, "learning_rate": 0.00043105381165919285, "loss": 3.4005, "step": 66990 }, { "epoch": 4.551909226797119, "grad_norm": 0.911358118057251, "learning_rate": 0.00043101134665036013, "loss": 3.525, "step": 66995 }, { "epoch": 4.552248946867781, "grad_norm": 0.9308170676231384, "learning_rate": 0.00043096888164152736, "loss": 3.2679, "step": 67000 }, { "epoch": 4.552588666938442, "grad_norm": 0.7572134137153625, "learning_rate": 0.00043092641663269464, "loss": 3.4205, "step": 67005 }, { "epoch": 4.552928387009104, "grad_norm": 0.6687334775924683, "learning_rate": 0.000430883951623862, "loss": 3.4311, "step": 67010 }, { "epoch": 4.5532681070797665, "grad_norm": 0.9449138641357422, "learning_rate": 0.0004308414866150292, "loss": 3.4864, "step": 67015 }, { "epoch": 4.553607827150428, "grad_norm": 0.9813666939735413, "learning_rate": 0.0004307990216061965, "loss": 3.3891, "step": 67020 }, { "epoch": 4.55394754722109, "grad_norm": 1.0032155513763428, "learning_rate": 0.0004307565565973638, "loss": 3.5482, "step": 67025 }, { "epoch": 4.554287267291752, "grad_norm": 1.0856163501739502, "learning_rate": 0.00043071409158853104, "loss": 3.5185, "step": 67030 }, { "epoch": 4.554626987362413, "grad_norm": 1.0040206909179688, "learning_rate": 0.0004306716265796983, "loss": 3.3138, "step": 67035 }, { "epoch": 4.554966707433075, "grad_norm": 1.2751375436782837, "learning_rate": 0.0004306291615708656, "loss": 3.6216, "step": 67040 }, { "epoch": 4.555306427503737, "grad_norm": 0.9331932067871094, "learning_rate": 0.00043058669656203293, "loss": 3.4206, "step": 67045 }, { "epoch": 4.555646147574398, "grad_norm": 0.879595935344696, "learning_rate": 0.00043054423155320016, "loss": 3.3224, "step": 67050 }, { "epoch": 4.55598586764506, "grad_norm": 0.9175193309783936, "learning_rate": 0.00043050176654436744, "loss": 3.5689, "step": 67055 }, { "epoch": 4.5563255877157225, "grad_norm": 0.9314781427383423, "learning_rate": 0.0004304593015355348, "loss": 3.5726, "step": 67060 }, { "epoch": 4.556665307786384, "grad_norm": 1.1162173748016357, "learning_rate": 0.000430416836526702, "loss": 3.6819, "step": 67065 }, { "epoch": 4.557005027857046, "grad_norm": 1.0832526683807373, "learning_rate": 0.0004303743715178693, "loss": 3.4827, "step": 67070 }, { "epoch": 4.557344747927708, "grad_norm": 0.9020607471466064, "learning_rate": 0.00043033190650903656, "loss": 3.8058, "step": 67075 }, { "epoch": 4.557684467998369, "grad_norm": 0.854320764541626, "learning_rate": 0.00043028944150020384, "loss": 3.4311, "step": 67080 }, { "epoch": 4.558024188069031, "grad_norm": 0.9069700837135315, "learning_rate": 0.0004302469764913711, "loss": 3.6863, "step": 67085 }, { "epoch": 4.558363908139693, "grad_norm": 0.6865811347961426, "learning_rate": 0.0004302045114825384, "loss": 3.4372, "step": 67090 }, { "epoch": 4.558703628210354, "grad_norm": 0.7132288813591003, "learning_rate": 0.0004301620464737057, "loss": 3.5355, "step": 67095 }, { "epoch": 4.559043348281016, "grad_norm": 1.061149001121521, "learning_rate": 0.00043011958146487296, "loss": 3.2397, "step": 67100 }, { "epoch": 4.5593830683516785, "grad_norm": 1.0076769590377808, "learning_rate": 0.00043007711645604024, "loss": 3.6232, "step": 67105 }, { "epoch": 4.55972278842234, "grad_norm": 0.8603944778442383, "learning_rate": 0.00043003465144720747, "loss": 3.34, "step": 67110 }, { "epoch": 4.560062508493002, "grad_norm": 0.7744854092597961, "learning_rate": 0.0004299921864383748, "loss": 3.4745, "step": 67115 }, { "epoch": 4.560402228563664, "grad_norm": 1.0107568502426147, "learning_rate": 0.0004299497214295421, "loss": 3.6018, "step": 67120 }, { "epoch": 4.560741948634325, "grad_norm": 0.8886268138885498, "learning_rate": 0.0004299072564207093, "loss": 3.2792, "step": 67125 }, { "epoch": 4.561081668704987, "grad_norm": 0.8083389401435852, "learning_rate": 0.00042986479141187664, "loss": 3.4611, "step": 67130 }, { "epoch": 4.561421388775649, "grad_norm": 1.005603551864624, "learning_rate": 0.0004298223264030439, "loss": 3.2947, "step": 67135 }, { "epoch": 4.56176110884631, "grad_norm": 0.7241636514663696, "learning_rate": 0.00042977986139421115, "loss": 3.5465, "step": 67140 }, { "epoch": 4.562100828916972, "grad_norm": 0.905881404876709, "learning_rate": 0.00042973739638537843, "loss": 3.5161, "step": 67145 }, { "epoch": 4.5624405489876345, "grad_norm": 1.0129166841506958, "learning_rate": 0.00042969493137654576, "loss": 3.6072, "step": 67150 }, { "epoch": 4.562780269058296, "grad_norm": 0.838306188583374, "learning_rate": 0.000429652466367713, "loss": 3.5378, "step": 67155 }, { "epoch": 4.563119989128958, "grad_norm": 1.0791294574737549, "learning_rate": 0.00042961000135888027, "loss": 3.5124, "step": 67160 }, { "epoch": 4.56345970919962, "grad_norm": 0.9212916493415833, "learning_rate": 0.0004295675363500476, "loss": 3.5759, "step": 67165 }, { "epoch": 4.563799429270281, "grad_norm": 0.8432183265686035, "learning_rate": 0.00042952507134121483, "loss": 3.6253, "step": 67170 }, { "epoch": 4.564139149340943, "grad_norm": 0.8497002720832825, "learning_rate": 0.0004294826063323821, "loss": 3.5195, "step": 67175 }, { "epoch": 4.564478869411605, "grad_norm": 1.0626378059387207, "learning_rate": 0.00042944014132354944, "loss": 3.5169, "step": 67180 }, { "epoch": 4.564818589482266, "grad_norm": 1.237396001815796, "learning_rate": 0.00042939767631471667, "loss": 3.1062, "step": 67185 }, { "epoch": 4.5651583095529285, "grad_norm": 1.0051499605178833, "learning_rate": 0.00042935521130588395, "loss": 3.5414, "step": 67190 }, { "epoch": 4.5654980296235905, "grad_norm": 0.8467531800270081, "learning_rate": 0.00042931274629705123, "loss": 3.3157, "step": 67195 }, { "epoch": 4.565837749694252, "grad_norm": 0.9624218940734863, "learning_rate": 0.0004292702812882185, "loss": 3.5213, "step": 67200 }, { "epoch": 4.566177469764914, "grad_norm": 1.1289418935775757, "learning_rate": 0.0004292278162793858, "loss": 3.5188, "step": 67205 }, { "epoch": 4.566517189835576, "grad_norm": 0.9390990734100342, "learning_rate": 0.00042918535127055307, "loss": 3.609, "step": 67210 }, { "epoch": 4.566856909906237, "grad_norm": 0.7995396852493286, "learning_rate": 0.0004291428862617204, "loss": 3.1985, "step": 67215 }, { "epoch": 4.567196629976899, "grad_norm": 0.7994973659515381, "learning_rate": 0.00042910042125288763, "loss": 3.2853, "step": 67220 }, { "epoch": 4.567536350047561, "grad_norm": 0.9507366418838501, "learning_rate": 0.0004290579562440549, "loss": 3.305, "step": 67225 }, { "epoch": 4.567876070118222, "grad_norm": 0.879896879196167, "learning_rate": 0.0004290154912352222, "loss": 3.3514, "step": 67230 }, { "epoch": 4.5682157901888845, "grad_norm": 0.7326902747154236, "learning_rate": 0.00042897302622638947, "loss": 3.7267, "step": 67235 }, { "epoch": 4.5685555102595465, "grad_norm": 0.928202211856842, "learning_rate": 0.00042893056121755675, "loss": 3.4733, "step": 67240 }, { "epoch": 4.568895230330208, "grad_norm": 0.6807061433792114, "learning_rate": 0.00042888809620872403, "loss": 3.5264, "step": 67245 }, { "epoch": 4.56923495040087, "grad_norm": 1.1278740167617798, "learning_rate": 0.0004288456311998913, "loss": 3.7306, "step": 67250 }, { "epoch": 4.569574670471532, "grad_norm": 0.8895540237426758, "learning_rate": 0.0004288031661910586, "loss": 3.4121, "step": 67255 }, { "epoch": 4.569914390542193, "grad_norm": 1.134082555770874, "learning_rate": 0.00042876070118222587, "loss": 3.4065, "step": 67260 }, { "epoch": 4.570254110612855, "grad_norm": 1.0165737867355347, "learning_rate": 0.0004287182361733931, "loss": 3.452, "step": 67265 }, { "epoch": 4.570593830683517, "grad_norm": 0.8008056879043579, "learning_rate": 0.00042867577116456043, "loss": 3.3975, "step": 67270 }, { "epoch": 4.570933550754178, "grad_norm": 0.8788750171661377, "learning_rate": 0.0004286333061557277, "loss": 3.4382, "step": 67275 }, { "epoch": 4.5712732708248405, "grad_norm": 0.7224317193031311, "learning_rate": 0.00042859084114689494, "loss": 3.1885, "step": 67280 }, { "epoch": 4.5716129908955025, "grad_norm": 0.9009097814559937, "learning_rate": 0.00042854837613806227, "loss": 3.4035, "step": 67285 }, { "epoch": 4.571952710966164, "grad_norm": 1.8645020723342896, "learning_rate": 0.00042850591112922955, "loss": 3.6646, "step": 67290 }, { "epoch": 4.572292431036826, "grad_norm": 0.880300760269165, "learning_rate": 0.0004284634461203968, "loss": 3.2987, "step": 67295 }, { "epoch": 4.572632151107488, "grad_norm": 1.12764573097229, "learning_rate": 0.00042842098111156406, "loss": 3.6162, "step": 67300 }, { "epoch": 4.572971871178149, "grad_norm": 0.8858887553215027, "learning_rate": 0.0004283785161027314, "loss": 3.346, "step": 67305 }, { "epoch": 4.573311591248811, "grad_norm": 0.7338926196098328, "learning_rate": 0.0004283360510938986, "loss": 3.7431, "step": 67310 }, { "epoch": 4.573651311319473, "grad_norm": 0.9242263436317444, "learning_rate": 0.0004282935860850659, "loss": 3.5025, "step": 67315 }, { "epoch": 4.573991031390134, "grad_norm": 0.8383481502532959, "learning_rate": 0.00042825112107623323, "loss": 3.3255, "step": 67320 }, { "epoch": 4.5743307514607965, "grad_norm": 1.196877121925354, "learning_rate": 0.00042820865606740046, "loss": 3.4673, "step": 67325 }, { "epoch": 4.574670471531459, "grad_norm": 0.7028875350952148, "learning_rate": 0.00042816619105856774, "loss": 3.5659, "step": 67330 }, { "epoch": 4.57501019160212, "grad_norm": 0.8862816691398621, "learning_rate": 0.000428123726049735, "loss": 3.5513, "step": 67335 }, { "epoch": 4.575349911672782, "grad_norm": 0.661256730556488, "learning_rate": 0.0004280812610409023, "loss": 3.0223, "step": 67340 }, { "epoch": 4.575689631743444, "grad_norm": 0.8158573508262634, "learning_rate": 0.0004280387960320696, "loss": 3.5244, "step": 67345 }, { "epoch": 4.576029351814105, "grad_norm": 0.9288393259048462, "learning_rate": 0.00042799633102323686, "loss": 3.5275, "step": 67350 }, { "epoch": 4.576369071884767, "grad_norm": 2.9138591289520264, "learning_rate": 0.00042795386601440414, "loss": 3.5622, "step": 67355 }, { "epoch": 4.576708791955428, "grad_norm": 1.1179980039596558, "learning_rate": 0.0004279114010055714, "loss": 3.5335, "step": 67360 }, { "epoch": 4.57704851202609, "grad_norm": 0.9151772856712341, "learning_rate": 0.0004278689359967387, "loss": 3.3553, "step": 67365 }, { "epoch": 4.5773882320967525, "grad_norm": 1.1000335216522217, "learning_rate": 0.0004278264709879059, "loss": 3.2875, "step": 67370 }, { "epoch": 4.577727952167414, "grad_norm": 0.8894139528274536, "learning_rate": 0.00042778400597907326, "loss": 3.3601, "step": 67375 }, { "epoch": 4.578067672238076, "grad_norm": 0.9011496901512146, "learning_rate": 0.00042774154097024054, "loss": 3.6231, "step": 67380 }, { "epoch": 4.578407392308738, "grad_norm": 0.9043906331062317, "learning_rate": 0.0004276990759614078, "loss": 3.6621, "step": 67385 }, { "epoch": 4.578747112379399, "grad_norm": 0.9889025688171387, "learning_rate": 0.0004276566109525751, "loss": 3.4285, "step": 67390 }, { "epoch": 4.579086832450061, "grad_norm": 0.7129039764404297, "learning_rate": 0.0004276141459437424, "loss": 3.3303, "step": 67395 }, { "epoch": 4.579426552520723, "grad_norm": 1.5924601554870605, "learning_rate": 0.00042757168093490966, "loss": 3.4262, "step": 67400 }, { "epoch": 4.579766272591384, "grad_norm": 0.8330124020576477, "learning_rate": 0.0004275292159260769, "loss": 3.6432, "step": 67405 }, { "epoch": 4.580105992662046, "grad_norm": 0.9130191802978516, "learning_rate": 0.0004274867509172442, "loss": 3.6996, "step": 67410 }, { "epoch": 4.5804457127327085, "grad_norm": 0.885295569896698, "learning_rate": 0.0004274442859084115, "loss": 3.4926, "step": 67415 }, { "epoch": 4.58078543280337, "grad_norm": 0.9824835658073425, "learning_rate": 0.0004274018208995787, "loss": 3.3504, "step": 67420 }, { "epoch": 4.581125152874032, "grad_norm": 0.7631675601005554, "learning_rate": 0.00042735935589074606, "loss": 3.388, "step": 67425 }, { "epoch": 4.581464872944694, "grad_norm": 0.8885135650634766, "learning_rate": 0.00042731689088191334, "loss": 3.3984, "step": 67430 }, { "epoch": 4.581804593015355, "grad_norm": 1.0681157112121582, "learning_rate": 0.00042727442587308056, "loss": 3.6506, "step": 67435 }, { "epoch": 4.582144313086017, "grad_norm": 1.1004154682159424, "learning_rate": 0.00042723196086424784, "loss": 3.2761, "step": 67440 }, { "epoch": 4.582484033156679, "grad_norm": 0.8411985039710999, "learning_rate": 0.0004271894958554152, "loss": 3.4207, "step": 67445 }, { "epoch": 4.58282375322734, "grad_norm": 1.0547019243240356, "learning_rate": 0.0004271470308465824, "loss": 3.3656, "step": 67450 }, { "epoch": 4.583163473298002, "grad_norm": 0.7411400079727173, "learning_rate": 0.0004271045658377497, "loss": 3.3009, "step": 67455 }, { "epoch": 4.5835031933686645, "grad_norm": 1.0234463214874268, "learning_rate": 0.000427062100828917, "loss": 3.6862, "step": 67460 }, { "epoch": 4.583842913439326, "grad_norm": 0.8767651319503784, "learning_rate": 0.00042701963582008424, "loss": 3.3956, "step": 67465 }, { "epoch": 4.584182633509988, "grad_norm": 0.827991783618927, "learning_rate": 0.0004269771708112515, "loss": 3.5, "step": 67470 }, { "epoch": 4.58452235358065, "grad_norm": 1.0578416585922241, "learning_rate": 0.00042693470580241886, "loss": 3.541, "step": 67475 }, { "epoch": 4.584862073651311, "grad_norm": 1.132582187652588, "learning_rate": 0.0004268922407935861, "loss": 3.2494, "step": 67480 }, { "epoch": 4.585201793721973, "grad_norm": 0.7543942332267761, "learning_rate": 0.00042684977578475336, "loss": 3.242, "step": 67485 }, { "epoch": 4.585541513792635, "grad_norm": 1.258089303970337, "learning_rate": 0.00042680731077592064, "loss": 3.6408, "step": 67490 }, { "epoch": 4.585881233863296, "grad_norm": 1.0118377208709717, "learning_rate": 0.0004267648457670879, "loss": 3.4334, "step": 67495 }, { "epoch": 4.5862209539339585, "grad_norm": 0.8488015532493591, "learning_rate": 0.0004267223807582552, "loss": 3.639, "step": 67500 }, { "epoch": 4.5865606740046205, "grad_norm": 0.8111043572425842, "learning_rate": 0.0004266799157494225, "loss": 3.4394, "step": 67505 }, { "epoch": 4.586900394075282, "grad_norm": 0.8887056112289429, "learning_rate": 0.00042663745074058976, "loss": 3.4625, "step": 67510 }, { "epoch": 4.587240114145944, "grad_norm": 0.7388931512832642, "learning_rate": 0.00042659498573175705, "loss": 3.5483, "step": 67515 }, { "epoch": 4.587579834216606, "grad_norm": 0.9170929789543152, "learning_rate": 0.0004265525207229243, "loss": 3.6021, "step": 67520 }, { "epoch": 4.587919554287267, "grad_norm": 0.8375212550163269, "learning_rate": 0.00042651005571409155, "loss": 3.6647, "step": 67525 }, { "epoch": 4.588259274357929, "grad_norm": 0.9961341023445129, "learning_rate": 0.0004264675907052589, "loss": 3.5866, "step": 67530 }, { "epoch": 4.588598994428591, "grad_norm": 0.861946702003479, "learning_rate": 0.00042642512569642617, "loss": 3.407, "step": 67535 }, { "epoch": 4.588938714499252, "grad_norm": 1.1403319835662842, "learning_rate": 0.0004263826606875934, "loss": 3.5012, "step": 67540 }, { "epoch": 4.5892784345699145, "grad_norm": 0.8945421576499939, "learning_rate": 0.0004263401956787607, "loss": 3.6363, "step": 67545 }, { "epoch": 4.5896181546405765, "grad_norm": 1.1353381872177124, "learning_rate": 0.000426297730669928, "loss": 3.3721, "step": 67550 }, { "epoch": 4.589957874711238, "grad_norm": 0.9434921145439148, "learning_rate": 0.0004262552656610953, "loss": 3.4556, "step": 67555 }, { "epoch": 4.5902975947819, "grad_norm": 0.8050035238265991, "learning_rate": 0.0004262128006522625, "loss": 3.6264, "step": 67560 }, { "epoch": 4.590637314852561, "grad_norm": 0.9937885999679565, "learning_rate": 0.00042617033564342985, "loss": 3.1886, "step": 67565 }, { "epoch": 4.590977034923223, "grad_norm": 0.8570193648338318, "learning_rate": 0.0004261278706345971, "loss": 3.372, "step": 67570 }, { "epoch": 4.591316754993885, "grad_norm": 0.7374115586280823, "learning_rate": 0.00042608540562576435, "loss": 3.5548, "step": 67575 }, { "epoch": 4.591656475064546, "grad_norm": 0.9214220643043518, "learning_rate": 0.0004260429406169317, "loss": 3.4317, "step": 67580 }, { "epoch": 4.591996195135208, "grad_norm": 0.7519167065620422, "learning_rate": 0.00042600047560809897, "loss": 3.5474, "step": 67585 }, { "epoch": 4.5923359152058705, "grad_norm": 0.7849289178848267, "learning_rate": 0.0004259580105992662, "loss": 3.4064, "step": 67590 }, { "epoch": 4.592675635276532, "grad_norm": 0.879451334476471, "learning_rate": 0.00042591554559043347, "loss": 3.2379, "step": 67595 }, { "epoch": 4.593015355347194, "grad_norm": 0.8637433052062988, "learning_rate": 0.0004258730805816008, "loss": 3.3319, "step": 67600 }, { "epoch": 4.593355075417856, "grad_norm": 0.9727707505226135, "learning_rate": 0.00042583061557276803, "loss": 3.5231, "step": 67605 }, { "epoch": 4.593694795488517, "grad_norm": 0.8368589878082275, "learning_rate": 0.0004257881505639353, "loss": 3.4176, "step": 67610 }, { "epoch": 4.594034515559179, "grad_norm": 1.0144131183624268, "learning_rate": 0.00042574568555510265, "loss": 3.428, "step": 67615 }, { "epoch": 4.594374235629841, "grad_norm": 0.9642901420593262, "learning_rate": 0.00042570322054626987, "loss": 3.4172, "step": 67620 }, { "epoch": 4.594713955700502, "grad_norm": 1.0930784940719604, "learning_rate": 0.00042566075553743715, "loss": 3.349, "step": 67625 }, { "epoch": 4.595053675771164, "grad_norm": 0.7711846828460693, "learning_rate": 0.00042561829052860443, "loss": 3.5884, "step": 67630 }, { "epoch": 4.5953933958418265, "grad_norm": 0.7511374950408936, "learning_rate": 0.0004255758255197717, "loss": 3.33, "step": 67635 }, { "epoch": 4.595733115912488, "grad_norm": 0.968352198600769, "learning_rate": 0.000425533360510939, "loss": 3.1873, "step": 67640 }, { "epoch": 4.59607283598315, "grad_norm": 1.0428951978683472, "learning_rate": 0.00042549089550210627, "loss": 3.7763, "step": 67645 }, { "epoch": 4.596412556053812, "grad_norm": 0.9782671928405762, "learning_rate": 0.00042544843049327355, "loss": 3.5743, "step": 67650 }, { "epoch": 4.596752276124473, "grad_norm": 0.940528929233551, "learning_rate": 0.00042540596548444083, "loss": 3.7496, "step": 67655 }, { "epoch": 4.597091996195135, "grad_norm": 1.0020475387573242, "learning_rate": 0.0004253635004756081, "loss": 3.2844, "step": 67660 }, { "epoch": 4.597431716265797, "grad_norm": 0.8932842016220093, "learning_rate": 0.00042532103546677534, "loss": 3.4531, "step": 67665 }, { "epoch": 4.597771436336458, "grad_norm": 0.7052213549613953, "learning_rate": 0.0004252785704579427, "loss": 3.6569, "step": 67670 }, { "epoch": 4.59811115640712, "grad_norm": 0.9543061256408691, "learning_rate": 0.00042523610544910995, "loss": 3.4588, "step": 67675 }, { "epoch": 4.5984508764777825, "grad_norm": 0.8197562098503113, "learning_rate": 0.0004251936404402772, "loss": 3.3838, "step": 67680 }, { "epoch": 4.598790596548444, "grad_norm": 1.1790635585784912, "learning_rate": 0.0004251511754314445, "loss": 3.3035, "step": 67685 }, { "epoch": 4.599130316619106, "grad_norm": 0.8613609671592712, "learning_rate": 0.0004251087104226118, "loss": 3.4634, "step": 67690 }, { "epoch": 4.599470036689768, "grad_norm": 0.8218375444412231, "learning_rate": 0.000425066245413779, "loss": 3.2852, "step": 67695 }, { "epoch": 4.599809756760429, "grad_norm": 0.8376889228820801, "learning_rate": 0.0004250237804049463, "loss": 3.6469, "step": 67700 }, { "epoch": 4.600149476831091, "grad_norm": 0.9668734073638916, "learning_rate": 0.00042498131539611363, "loss": 3.6957, "step": 67705 }, { "epoch": 4.600489196901753, "grad_norm": 1.1410876512527466, "learning_rate": 0.00042493885038728086, "loss": 3.2336, "step": 67710 }, { "epoch": 4.600828916972414, "grad_norm": 0.9791898131370544, "learning_rate": 0.00042489638537844814, "loss": 3.1929, "step": 67715 }, { "epoch": 4.601168637043076, "grad_norm": 0.9735149145126343, "learning_rate": 0.0004248539203696155, "loss": 3.4115, "step": 67720 }, { "epoch": 4.6015083571137385, "grad_norm": 0.8024521470069885, "learning_rate": 0.00042481145536078275, "loss": 3.4118, "step": 67725 }, { "epoch": 4.6018480771844, "grad_norm": 0.9405158758163452, "learning_rate": 0.00042476899035195, "loss": 3.4588, "step": 67730 }, { "epoch": 4.602187797255062, "grad_norm": 0.823639452457428, "learning_rate": 0.00042472652534311726, "loss": 3.3947, "step": 67735 }, { "epoch": 4.602527517325724, "grad_norm": 0.9724550843238831, "learning_rate": 0.0004246840603342846, "loss": 3.635, "step": 67740 }, { "epoch": 4.602867237396385, "grad_norm": 0.7851670980453491, "learning_rate": 0.0004246415953254518, "loss": 3.4265, "step": 67745 }, { "epoch": 4.603206957467047, "grad_norm": 0.9942404627799988, "learning_rate": 0.0004245991303166191, "loss": 3.5624, "step": 67750 }, { "epoch": 4.603546677537709, "grad_norm": 1.0582170486450195, "learning_rate": 0.00042455666530778643, "loss": 3.3409, "step": 67755 }, { "epoch": 4.60388639760837, "grad_norm": 1.032801866531372, "learning_rate": 0.00042451420029895366, "loss": 3.5892, "step": 67760 }, { "epoch": 4.6042261176790324, "grad_norm": 0.9317339658737183, "learning_rate": 0.00042447173529012094, "loss": 3.4826, "step": 67765 }, { "epoch": 4.6045658377496945, "grad_norm": 1.2864630222320557, "learning_rate": 0.0004244292702812883, "loss": 3.6659, "step": 67770 }, { "epoch": 4.604905557820356, "grad_norm": 0.9617891311645508, "learning_rate": 0.0004243868052724555, "loss": 3.5835, "step": 67775 }, { "epoch": 4.605245277891018, "grad_norm": 0.868038535118103, "learning_rate": 0.0004243443402636228, "loss": 3.6181, "step": 67780 }, { "epoch": 4.60558499796168, "grad_norm": 0.9905685186386108, "learning_rate": 0.00042430187525479006, "loss": 3.6172, "step": 67785 }, { "epoch": 4.605924718032341, "grad_norm": 0.9184408187866211, "learning_rate": 0.00042425941024595734, "loss": 3.5462, "step": 67790 }, { "epoch": 4.606264438103003, "grad_norm": 0.5900066494941711, "learning_rate": 0.0004242169452371246, "loss": 3.4703, "step": 67795 }, { "epoch": 4.606604158173665, "grad_norm": 0.916496217250824, "learning_rate": 0.0004241744802282919, "loss": 3.3533, "step": 67800 }, { "epoch": 4.606943878244326, "grad_norm": 0.8620443344116211, "learning_rate": 0.0004241320152194592, "loss": 3.464, "step": 67805 }, { "epoch": 4.6072835983149885, "grad_norm": 0.8090815544128418, "learning_rate": 0.00042408955021062646, "loss": 3.2885, "step": 67810 }, { "epoch": 4.6076233183856505, "grad_norm": 0.7371649742126465, "learning_rate": 0.00042404708520179374, "loss": 3.4942, "step": 67815 }, { "epoch": 4.607963038456312, "grad_norm": 0.7098739147186279, "learning_rate": 0.00042400462019296097, "loss": 3.0696, "step": 67820 }, { "epoch": 4.608302758526974, "grad_norm": 0.909998893737793, "learning_rate": 0.0004239621551841283, "loss": 3.6255, "step": 67825 }, { "epoch": 4.608642478597636, "grad_norm": 0.8828195929527283, "learning_rate": 0.0004239196901752956, "loss": 3.8395, "step": 67830 }, { "epoch": 4.608982198668297, "grad_norm": 0.8771922588348389, "learning_rate": 0.0004238772251664628, "loss": 3.4647, "step": 67835 }, { "epoch": 4.609321918738959, "grad_norm": 0.8357383012771606, "learning_rate": 0.00042383476015763014, "loss": 3.354, "step": 67840 }, { "epoch": 4.609661638809621, "grad_norm": 1.0097839832305908, "learning_rate": 0.0004237922951487974, "loss": 3.4152, "step": 67845 }, { "epoch": 4.610001358880282, "grad_norm": 0.9248858094215393, "learning_rate": 0.00042374983013996465, "loss": 3.5807, "step": 67850 }, { "epoch": 4.6103410789509445, "grad_norm": 0.9135956764221191, "learning_rate": 0.00042370736513113193, "loss": 3.4877, "step": 67855 }, { "epoch": 4.6106807990216065, "grad_norm": 0.9012488722801208, "learning_rate": 0.00042366490012229926, "loss": 3.4189, "step": 67860 }, { "epoch": 4.611020519092268, "grad_norm": 0.9534264206886292, "learning_rate": 0.0004236224351134665, "loss": 3.3999, "step": 67865 }, { "epoch": 4.61136023916293, "grad_norm": 0.9386141300201416, "learning_rate": 0.00042357997010463377, "loss": 3.1926, "step": 67870 }, { "epoch": 4.611699959233592, "grad_norm": 0.9828197956085205, "learning_rate": 0.0004235375050958011, "loss": 3.4444, "step": 67875 }, { "epoch": 4.612039679304253, "grad_norm": 0.7708816528320312, "learning_rate": 0.00042349504008696833, "loss": 3.6131, "step": 67880 }, { "epoch": 4.612379399374915, "grad_norm": 0.679082989692688, "learning_rate": 0.0004234525750781356, "loss": 3.4385, "step": 67885 }, { "epoch": 4.612719119445577, "grad_norm": 0.9784545302391052, "learning_rate": 0.0004234101100693029, "loss": 3.6415, "step": 67890 }, { "epoch": 4.613058839516238, "grad_norm": 1.0610295534133911, "learning_rate": 0.0004233676450604702, "loss": 3.2338, "step": 67895 }, { "epoch": 4.6133985595869005, "grad_norm": 0.9436649084091187, "learning_rate": 0.00042332518005163745, "loss": 3.6425, "step": 67900 }, { "epoch": 4.613738279657563, "grad_norm": 0.6794824600219727, "learning_rate": 0.00042328271504280473, "loss": 3.5371, "step": 67905 }, { "epoch": 4.614077999728224, "grad_norm": 1.107001781463623, "learning_rate": 0.00042324025003397206, "loss": 3.2345, "step": 67910 }, { "epoch": 4.614417719798886, "grad_norm": 0.9971826672554016, "learning_rate": 0.0004231977850251393, "loss": 3.4918, "step": 67915 }, { "epoch": 4.614757439869548, "grad_norm": 0.8758735656738281, "learning_rate": 0.00042315532001630657, "loss": 3.4424, "step": 67920 }, { "epoch": 4.615097159940209, "grad_norm": 0.8255982995033264, "learning_rate": 0.00042311285500747385, "loss": 3.7576, "step": 67925 }, { "epoch": 4.615436880010871, "grad_norm": 0.7605299353599548, "learning_rate": 0.00042307038999864113, "loss": 3.3631, "step": 67930 }, { "epoch": 4.615776600081533, "grad_norm": 0.8301802277565002, "learning_rate": 0.0004230279249898084, "loss": 3.4068, "step": 67935 }, { "epoch": 4.616116320152194, "grad_norm": 0.7834235429763794, "learning_rate": 0.0004229854599809757, "loss": 3.4111, "step": 67940 }, { "epoch": 4.6164560402228565, "grad_norm": 0.893020749092102, "learning_rate": 0.00042294299497214297, "loss": 3.6409, "step": 67945 }, { "epoch": 4.616795760293519, "grad_norm": 0.8067091703414917, "learning_rate": 0.00042290052996331025, "loss": 3.5938, "step": 67950 }, { "epoch": 4.61713548036418, "grad_norm": 0.8507208824157715, "learning_rate": 0.00042285806495447753, "loss": 3.6714, "step": 67955 }, { "epoch": 4.617475200434842, "grad_norm": 1.389599323272705, "learning_rate": 0.00042281559994564476, "loss": 3.3246, "step": 67960 }, { "epoch": 4.617814920505504, "grad_norm": 0.8027352690696716, "learning_rate": 0.0004227731349368121, "loss": 3.4091, "step": 67965 }, { "epoch": 4.618154640576165, "grad_norm": 1.2079293727874756, "learning_rate": 0.00042273066992797937, "loss": 3.4204, "step": 67970 }, { "epoch": 4.618494360646827, "grad_norm": 1.0536094903945923, "learning_rate": 0.0004226882049191466, "loss": 3.5493, "step": 67975 }, { "epoch": 4.618834080717489, "grad_norm": 0.8972793817520142, "learning_rate": 0.00042264573991031393, "loss": 3.284, "step": 67980 }, { "epoch": 4.61917380078815, "grad_norm": 0.879063069820404, "learning_rate": 0.0004226032749014812, "loss": 3.4105, "step": 67985 }, { "epoch": 4.6195135208588125, "grad_norm": 0.8161985278129578, "learning_rate": 0.00042256080989264844, "loss": 3.5361, "step": 67990 }, { "epoch": 4.619853240929475, "grad_norm": 0.9861201643943787, "learning_rate": 0.0004225183448838157, "loss": 3.5481, "step": 67995 }, { "epoch": 4.620192961000136, "grad_norm": 0.964358925819397, "learning_rate": 0.00042247587987498305, "loss": 3.2772, "step": 68000 }, { "epoch": 4.620532681070798, "grad_norm": 0.9929901361465454, "learning_rate": 0.0004224334148661503, "loss": 3.6082, "step": 68005 }, { "epoch": 4.62087240114146, "grad_norm": 0.805381715297699, "learning_rate": 0.00042239094985731756, "loss": 3.4793, "step": 68010 }, { "epoch": 4.621212121212121, "grad_norm": 0.7458207011222839, "learning_rate": 0.0004223484848484849, "loss": 3.3525, "step": 68015 }, { "epoch": 4.621551841282783, "grad_norm": 0.8662701845169067, "learning_rate": 0.0004223060198396521, "loss": 3.3581, "step": 68020 }, { "epoch": 4.621891561353445, "grad_norm": 0.8811438083648682, "learning_rate": 0.0004222635548308194, "loss": 3.3201, "step": 68025 }, { "epoch": 4.622231281424106, "grad_norm": 3.1602609157562256, "learning_rate": 0.00042222108982198673, "loss": 3.4015, "step": 68030 }, { "epoch": 4.6225710014947685, "grad_norm": 0.9522108435630798, "learning_rate": 0.00042217862481315396, "loss": 3.4041, "step": 68035 }, { "epoch": 4.62291072156543, "grad_norm": 1.0106728076934814, "learning_rate": 0.00042213615980432124, "loss": 3.5148, "step": 68040 }, { "epoch": 4.623250441636092, "grad_norm": 0.8571694493293762, "learning_rate": 0.0004220936947954885, "loss": 3.2579, "step": 68045 }, { "epoch": 4.623590161706754, "grad_norm": 0.9172520637512207, "learning_rate": 0.0004220512297866558, "loss": 3.617, "step": 68050 }, { "epoch": 4.623929881777415, "grad_norm": 0.7596650123596191, "learning_rate": 0.0004220087647778231, "loss": 3.2947, "step": 68055 }, { "epoch": 4.624269601848077, "grad_norm": 1.0978385210037231, "learning_rate": 0.00042196629976899036, "loss": 3.6102, "step": 68060 }, { "epoch": 4.624609321918739, "grad_norm": 0.9235647916793823, "learning_rate": 0.0004219238347601577, "loss": 3.7215, "step": 68065 }, { "epoch": 4.6249490419894, "grad_norm": 1.0100572109222412, "learning_rate": 0.0004218813697513249, "loss": 3.1625, "step": 68070 }, { "epoch": 4.6252887620600625, "grad_norm": 0.7820902466773987, "learning_rate": 0.0004218389047424922, "loss": 3.5212, "step": 68075 }, { "epoch": 4.6256284821307245, "grad_norm": 1.044643521308899, "learning_rate": 0.0004217964397336595, "loss": 3.4479, "step": 68080 }, { "epoch": 4.625968202201386, "grad_norm": 0.8095884919166565, "learning_rate": 0.00042175397472482676, "loss": 3.4578, "step": 68085 }, { "epoch": 4.626307922272048, "grad_norm": 0.9433499574661255, "learning_rate": 0.00042171150971599404, "loss": 3.3904, "step": 68090 }, { "epoch": 4.62664764234271, "grad_norm": 0.721687912940979, "learning_rate": 0.0004216690447071613, "loss": 3.5964, "step": 68095 }, { "epoch": 4.626987362413371, "grad_norm": 1.1639763116836548, "learning_rate": 0.0004216265796983286, "loss": 3.6244, "step": 68100 }, { "epoch": 4.627327082484033, "grad_norm": 0.9073703289031982, "learning_rate": 0.0004215841146894959, "loss": 3.3582, "step": 68105 }, { "epoch": 4.627666802554695, "grad_norm": 0.7800865173339844, "learning_rate": 0.00042154164968066316, "loss": 3.3294, "step": 68110 }, { "epoch": 4.628006522625356, "grad_norm": 1.0434372425079346, "learning_rate": 0.0004214991846718304, "loss": 3.6298, "step": 68115 }, { "epoch": 4.6283462426960185, "grad_norm": 0.9065384268760681, "learning_rate": 0.0004214567196629977, "loss": 3.474, "step": 68120 }, { "epoch": 4.6286859627666805, "grad_norm": 1.1309349536895752, "learning_rate": 0.000421414254654165, "loss": 3.6029, "step": 68125 }, { "epoch": 4.629025682837342, "grad_norm": 0.9185808300971985, "learning_rate": 0.0004213717896453322, "loss": 3.6367, "step": 68130 }, { "epoch": 4.629365402908004, "grad_norm": 0.7079995274543762, "learning_rate": 0.00042132932463649956, "loss": 3.4511, "step": 68135 }, { "epoch": 4.629705122978666, "grad_norm": 0.8110623955726624, "learning_rate": 0.00042128685962766684, "loss": 3.4244, "step": 68140 }, { "epoch": 4.630044843049327, "grad_norm": 0.7839182615280151, "learning_rate": 0.00042124439461883406, "loss": 3.4715, "step": 68145 }, { "epoch": 4.630384563119989, "grad_norm": 1.0865899324417114, "learning_rate": 0.00042120192961000134, "loss": 3.4708, "step": 68150 }, { "epoch": 4.630724283190651, "grad_norm": 0.999410092830658, "learning_rate": 0.0004211594646011687, "loss": 3.3514, "step": 68155 }, { "epoch": 4.631064003261312, "grad_norm": 0.8788954019546509, "learning_rate": 0.0004211169995923359, "loss": 3.5602, "step": 68160 }, { "epoch": 4.6314037233319745, "grad_norm": 0.9947049617767334, "learning_rate": 0.0004210745345835032, "loss": 3.4767, "step": 68165 }, { "epoch": 4.6317434434026366, "grad_norm": 0.8769662380218506, "learning_rate": 0.0004210320695746705, "loss": 3.5446, "step": 68170 }, { "epoch": 4.632083163473298, "grad_norm": 0.9333018660545349, "learning_rate": 0.00042098960456583774, "loss": 3.4571, "step": 68175 }, { "epoch": 4.63242288354396, "grad_norm": 0.7598336338996887, "learning_rate": 0.000420947139557005, "loss": 3.4049, "step": 68180 }, { "epoch": 4.632762603614622, "grad_norm": 1.095141887664795, "learning_rate": 0.0004209046745481723, "loss": 3.4888, "step": 68185 }, { "epoch": 4.633102323685283, "grad_norm": 0.8638190627098083, "learning_rate": 0.0004208622095393396, "loss": 3.4684, "step": 68190 }, { "epoch": 4.633442043755945, "grad_norm": 0.7676572799682617, "learning_rate": 0.00042081974453050686, "loss": 3.7041, "step": 68195 }, { "epoch": 4.633781763826607, "grad_norm": 0.8033756613731384, "learning_rate": 0.00042077727952167414, "loss": 3.386, "step": 68200 }, { "epoch": 4.634121483897268, "grad_norm": 0.8592642545700073, "learning_rate": 0.0004207348145128414, "loss": 3.4979, "step": 68205 }, { "epoch": 4.6344612039679305, "grad_norm": 0.9765332341194153, "learning_rate": 0.0004206923495040087, "loss": 3.195, "step": 68210 }, { "epoch": 4.634800924038593, "grad_norm": 0.8654045462608337, "learning_rate": 0.000420649884495176, "loss": 3.8721, "step": 68215 }, { "epoch": 4.635140644109254, "grad_norm": 0.8636555671691895, "learning_rate": 0.0004206074194863432, "loss": 3.4109, "step": 68220 }, { "epoch": 4.635480364179916, "grad_norm": 0.786760687828064, "learning_rate": 0.00042056495447751054, "loss": 3.6563, "step": 68225 }, { "epoch": 4.635820084250578, "grad_norm": 0.853145956993103, "learning_rate": 0.0004205224894686778, "loss": 3.3037, "step": 68230 }, { "epoch": 4.636159804321239, "grad_norm": 1.0670366287231445, "learning_rate": 0.0004204800244598451, "loss": 3.4527, "step": 68235 }, { "epoch": 4.636499524391901, "grad_norm": 0.8612810373306274, "learning_rate": 0.0004204375594510124, "loss": 3.5309, "step": 68240 }, { "epoch": 4.636839244462563, "grad_norm": 0.8726534247398376, "learning_rate": 0.00042039509444217967, "loss": 3.5563, "step": 68245 }, { "epoch": 4.637178964533224, "grad_norm": 1.0701817274093628, "learning_rate": 0.00042035262943334695, "loss": 3.4879, "step": 68250 }, { "epoch": 4.6375186846038865, "grad_norm": 0.7773467898368835, "learning_rate": 0.00042031016442451417, "loss": 3.4171, "step": 68255 }, { "epoch": 4.637858404674548, "grad_norm": 0.8920998573303223, "learning_rate": 0.0004202676994156815, "loss": 3.5887, "step": 68260 }, { "epoch": 4.63819812474521, "grad_norm": 0.7680952548980713, "learning_rate": 0.0004202252344068488, "loss": 3.3715, "step": 68265 }, { "epoch": 4.638537844815872, "grad_norm": 0.8944039344787598, "learning_rate": 0.000420182769398016, "loss": 3.2417, "step": 68270 }, { "epoch": 4.638877564886533, "grad_norm": 0.7180835008621216, "learning_rate": 0.00042014030438918335, "loss": 3.611, "step": 68275 }, { "epoch": 4.639217284957195, "grad_norm": 0.9352626204490662, "learning_rate": 0.0004200978393803506, "loss": 3.7235, "step": 68280 }, { "epoch": 4.639557005027857, "grad_norm": 1.074934720993042, "learning_rate": 0.00042005537437151785, "loss": 3.2216, "step": 68285 }, { "epoch": 4.639896725098518, "grad_norm": 0.7853044867515564, "learning_rate": 0.00042001290936268513, "loss": 3.3503, "step": 68290 }, { "epoch": 4.64023644516918, "grad_norm": 1.0257841348648071, "learning_rate": 0.00041997044435385247, "loss": 3.6241, "step": 68295 }, { "epoch": 4.6405761652398425, "grad_norm": 0.9955359697341919, "learning_rate": 0.0004199279793450197, "loss": 3.2571, "step": 68300 }, { "epoch": 4.640915885310504, "grad_norm": 0.8383559584617615, "learning_rate": 0.00041988551433618697, "loss": 3.1925, "step": 68305 }, { "epoch": 4.641255605381166, "grad_norm": 0.9184194803237915, "learning_rate": 0.0004198430493273543, "loss": 3.3999, "step": 68310 }, { "epoch": 4.641595325451828, "grad_norm": 1.046006202697754, "learning_rate": 0.00041980058431852153, "loss": 3.5223, "step": 68315 }, { "epoch": 4.641935045522489, "grad_norm": 0.8355832099914551, "learning_rate": 0.0004197581193096888, "loss": 3.4952, "step": 68320 }, { "epoch": 4.642274765593151, "grad_norm": 0.8756354451179504, "learning_rate": 0.00041971565430085615, "loss": 3.4124, "step": 68325 }, { "epoch": 4.642614485663813, "grad_norm": 1.0545001029968262, "learning_rate": 0.00041967318929202337, "loss": 3.5571, "step": 68330 }, { "epoch": 4.642954205734474, "grad_norm": 0.7567934989929199, "learning_rate": 0.00041963072428319065, "loss": 3.4895, "step": 68335 }, { "epoch": 4.6432939258051364, "grad_norm": 0.9333906173706055, "learning_rate": 0.00041958825927435793, "loss": 3.5048, "step": 68340 }, { "epoch": 4.6436336458757985, "grad_norm": 0.955683171749115, "learning_rate": 0.0004195457942655252, "loss": 3.1004, "step": 68345 }, { "epoch": 4.64397336594646, "grad_norm": 0.9125000834465027, "learning_rate": 0.0004195033292566925, "loss": 3.4811, "step": 68350 }, { "epoch": 4.644313086017122, "grad_norm": 0.8338185548782349, "learning_rate": 0.00041946086424785977, "loss": 3.5393, "step": 68355 }, { "epoch": 4.644652806087784, "grad_norm": 0.8546797633171082, "learning_rate": 0.00041941839923902705, "loss": 3.426, "step": 68360 }, { "epoch": 4.644992526158445, "grad_norm": 1.1002360582351685, "learning_rate": 0.00041937593423019433, "loss": 3.2052, "step": 68365 }, { "epoch": 4.645332246229107, "grad_norm": 0.9460350871086121, "learning_rate": 0.0004193334692213616, "loss": 3.6432, "step": 68370 }, { "epoch": 4.645671966299769, "grad_norm": 0.9006628394126892, "learning_rate": 0.00041929100421252884, "loss": 3.6207, "step": 68375 }, { "epoch": 4.64601168637043, "grad_norm": 0.8346257209777832, "learning_rate": 0.0004192485392036962, "loss": 3.3907, "step": 68380 }, { "epoch": 4.6463514064410925, "grad_norm": 0.7549441456794739, "learning_rate": 0.00041920607419486345, "loss": 3.4689, "step": 68385 }, { "epoch": 4.6466911265117545, "grad_norm": 0.8115181922912598, "learning_rate": 0.0004191636091860307, "loss": 3.3721, "step": 68390 }, { "epoch": 4.647030846582416, "grad_norm": 1.1192134618759155, "learning_rate": 0.000419121144177198, "loss": 3.5807, "step": 68395 }, { "epoch": 4.647370566653078, "grad_norm": 1.872918725013733, "learning_rate": 0.0004190786791683653, "loss": 3.6555, "step": 68400 }, { "epoch": 4.64771028672374, "grad_norm": 1.0189216136932373, "learning_rate": 0.0004190362141595326, "loss": 3.5057, "step": 68405 }, { "epoch": 4.648050006794401, "grad_norm": 0.9271153807640076, "learning_rate": 0.0004189937491506998, "loss": 3.4399, "step": 68410 }, { "epoch": 4.648389726865063, "grad_norm": 1.0508484840393066, "learning_rate": 0.00041895128414186713, "loss": 3.4049, "step": 68415 }, { "epoch": 4.648729446935725, "grad_norm": 0.8426085710525513, "learning_rate": 0.0004189088191330344, "loss": 3.3728, "step": 68420 }, { "epoch": 4.649069167006386, "grad_norm": 0.8193681836128235, "learning_rate": 0.00041886635412420164, "loss": 3.3729, "step": 68425 }, { "epoch": 4.6494088870770485, "grad_norm": 0.9380782842636108, "learning_rate": 0.000418823889115369, "loss": 3.4082, "step": 68430 }, { "epoch": 4.6497486071477105, "grad_norm": 0.9658963084220886, "learning_rate": 0.00041878142410653625, "loss": 3.5177, "step": 68435 }, { "epoch": 4.650088327218372, "grad_norm": 0.7768871784210205, "learning_rate": 0.0004187389590977035, "loss": 3.8404, "step": 68440 }, { "epoch": 4.650428047289034, "grad_norm": 0.8578163385391235, "learning_rate": 0.00041869649408887076, "loss": 3.5243, "step": 68445 }, { "epoch": 4.650767767359696, "grad_norm": 1.1034132242202759, "learning_rate": 0.0004186540290800381, "loss": 3.5499, "step": 68450 }, { "epoch": 4.651107487430357, "grad_norm": 0.7648698687553406, "learning_rate": 0.0004186115640712053, "loss": 3.4263, "step": 68455 }, { "epoch": 4.651447207501019, "grad_norm": 0.7452331185340881, "learning_rate": 0.0004185690990623726, "loss": 3.8387, "step": 68460 }, { "epoch": 4.651786927571681, "grad_norm": 0.7394859790802002, "learning_rate": 0.00041852663405353993, "loss": 3.4112, "step": 68465 }, { "epoch": 4.652126647642342, "grad_norm": 0.8813775777816772, "learning_rate": 0.00041848416904470716, "loss": 3.5976, "step": 68470 }, { "epoch": 4.6524663677130045, "grad_norm": 1.0817413330078125, "learning_rate": 0.00041844170403587444, "loss": 3.323, "step": 68475 }, { "epoch": 4.6528060877836666, "grad_norm": 0.7993396520614624, "learning_rate": 0.0004183992390270417, "loss": 3.1931, "step": 68480 }, { "epoch": 4.653145807854328, "grad_norm": 1.0633779764175415, "learning_rate": 0.000418356774018209, "loss": 3.5304, "step": 68485 }, { "epoch": 4.65348552792499, "grad_norm": 0.8796688318252563, "learning_rate": 0.0004183143090093763, "loss": 3.5452, "step": 68490 }, { "epoch": 4.653825247995652, "grad_norm": 0.9149452447891235, "learning_rate": 0.00041827184400054356, "loss": 3.3748, "step": 68495 }, { "epoch": 4.654164968066313, "grad_norm": 0.8446894884109497, "learning_rate": 0.00041822937899171084, "loss": 3.5857, "step": 68500 }, { "epoch": 4.654504688136975, "grad_norm": 0.8676488995552063, "learning_rate": 0.0004181869139828781, "loss": 3.543, "step": 68505 }, { "epoch": 4.654844408207637, "grad_norm": 0.8865768313407898, "learning_rate": 0.0004181444489740454, "loss": 3.5333, "step": 68510 }, { "epoch": 4.655184128278298, "grad_norm": 0.7793726921081543, "learning_rate": 0.0004181019839652126, "loss": 3.3695, "step": 68515 }, { "epoch": 4.6555238483489605, "grad_norm": 0.8741711378097534, "learning_rate": 0.00041805951895637996, "loss": 3.4442, "step": 68520 }, { "epoch": 4.655863568419623, "grad_norm": 0.9149139523506165, "learning_rate": 0.00041801705394754724, "loss": 3.378, "step": 68525 }, { "epoch": 4.656203288490284, "grad_norm": 1.035473346710205, "learning_rate": 0.00041797458893871447, "loss": 3.4939, "step": 68530 }, { "epoch": 4.656543008560946, "grad_norm": 0.9867655634880066, "learning_rate": 0.0004179321239298818, "loss": 3.5702, "step": 68535 }, { "epoch": 4.656882728631608, "grad_norm": 0.9957058429718018, "learning_rate": 0.0004178896589210491, "loss": 3.5761, "step": 68540 }, { "epoch": 4.657222448702269, "grad_norm": 0.9165830612182617, "learning_rate": 0.0004178471939122163, "loss": 3.54, "step": 68545 }, { "epoch": 4.657562168772931, "grad_norm": 0.8947004675865173, "learning_rate": 0.0004178047289033836, "loss": 3.642, "step": 68550 }, { "epoch": 4.657901888843593, "grad_norm": 0.8200177550315857, "learning_rate": 0.0004177622638945509, "loss": 3.2511, "step": 68555 }, { "epoch": 4.658241608914254, "grad_norm": 0.9102053642272949, "learning_rate": 0.00041771979888571815, "loss": 3.523, "step": 68560 }, { "epoch": 4.6585813289849165, "grad_norm": 0.8251795172691345, "learning_rate": 0.00041767733387688543, "loss": 3.6133, "step": 68565 }, { "epoch": 4.658921049055579, "grad_norm": 1.107084035873413, "learning_rate": 0.00041763486886805276, "loss": 3.7238, "step": 68570 }, { "epoch": 4.65926076912624, "grad_norm": 0.8720716238021851, "learning_rate": 0.00041759240385922004, "loss": 3.3384, "step": 68575 }, { "epoch": 4.659600489196902, "grad_norm": 0.8229120969772339, "learning_rate": 0.00041754993885038727, "loss": 3.6829, "step": 68580 }, { "epoch": 4.659940209267564, "grad_norm": 0.9354588985443115, "learning_rate": 0.00041750747384155455, "loss": 3.6664, "step": 68585 }, { "epoch": 4.660279929338225, "grad_norm": 0.9079734086990356, "learning_rate": 0.0004174650088327219, "loss": 3.4931, "step": 68590 }, { "epoch": 4.660619649408887, "grad_norm": 0.8546583652496338, "learning_rate": 0.0004174225438238891, "loss": 3.4534, "step": 68595 }, { "epoch": 4.660959369479549, "grad_norm": 0.7542752027511597, "learning_rate": 0.0004173800788150564, "loss": 3.0967, "step": 68600 }, { "epoch": 4.66129908955021, "grad_norm": 0.7804221510887146, "learning_rate": 0.0004173376138062237, "loss": 3.4448, "step": 68605 }, { "epoch": 4.6616388096208725, "grad_norm": 0.9289933443069458, "learning_rate": 0.00041729514879739095, "loss": 3.5533, "step": 68610 }, { "epoch": 4.661978529691535, "grad_norm": 0.9845079779624939, "learning_rate": 0.00041725268378855823, "loss": 3.4043, "step": 68615 }, { "epoch": 4.662318249762196, "grad_norm": 1.1165680885314941, "learning_rate": 0.00041721021877972556, "loss": 3.3884, "step": 68620 }, { "epoch": 4.662657969832858, "grad_norm": 0.7916510105133057, "learning_rate": 0.0004171677537708928, "loss": 3.4287, "step": 68625 }, { "epoch": 4.66299768990352, "grad_norm": 0.8921187520027161, "learning_rate": 0.00041712528876206007, "loss": 3.659, "step": 68630 }, { "epoch": 4.663337409974181, "grad_norm": 1.0192402601242065, "learning_rate": 0.00041708282375322735, "loss": 3.4915, "step": 68635 }, { "epoch": 4.663677130044843, "grad_norm": 1.1442757844924927, "learning_rate": 0.00041704035874439463, "loss": 3.3551, "step": 68640 }, { "epoch": 4.664016850115505, "grad_norm": 0.7416129112243652, "learning_rate": 0.0004169978937355619, "loss": 3.474, "step": 68645 }, { "epoch": 4.6643565701861665, "grad_norm": 1.1482139825820923, "learning_rate": 0.0004169554287267292, "loss": 3.3856, "step": 68650 }, { "epoch": 4.6646962902568285, "grad_norm": 0.7678807377815247, "learning_rate": 0.00041691296371789647, "loss": 3.3709, "step": 68655 }, { "epoch": 4.665036010327491, "grad_norm": 0.7061179280281067, "learning_rate": 0.00041687049870906375, "loss": 3.6115, "step": 68660 }, { "epoch": 4.665375730398152, "grad_norm": 0.8391000032424927, "learning_rate": 0.00041682803370023103, "loss": 3.537, "step": 68665 }, { "epoch": 4.665715450468814, "grad_norm": 0.8942126631736755, "learning_rate": 0.00041678556869139825, "loss": 3.5336, "step": 68670 }, { "epoch": 4.666055170539476, "grad_norm": 0.8027516603469849, "learning_rate": 0.0004167431036825656, "loss": 3.6208, "step": 68675 }, { "epoch": 4.666394890610137, "grad_norm": 0.838283360004425, "learning_rate": 0.00041670063867373287, "loss": 3.2632, "step": 68680 }, { "epoch": 4.666734610680799, "grad_norm": 1.0803709030151367, "learning_rate": 0.0004166581736649001, "loss": 3.6309, "step": 68685 }, { "epoch": 4.667074330751461, "grad_norm": 1.142464518547058, "learning_rate": 0.00041661570865606743, "loss": 3.5949, "step": 68690 }, { "epoch": 4.6674140508221225, "grad_norm": 0.8500204086303711, "learning_rate": 0.0004165732436472347, "loss": 3.3203, "step": 68695 }, { "epoch": 4.6677537708927845, "grad_norm": 0.9047169089317322, "learning_rate": 0.00041653077863840194, "loss": 3.5843, "step": 68700 }, { "epoch": 4.668093490963447, "grad_norm": 0.8354958891868591, "learning_rate": 0.0004164883136295692, "loss": 3.4368, "step": 68705 }, { "epoch": 4.668433211034108, "grad_norm": 1.0198941230773926, "learning_rate": 0.00041644584862073655, "loss": 3.3163, "step": 68710 }, { "epoch": 4.66877293110477, "grad_norm": 0.9637662768363953, "learning_rate": 0.0004164033836119038, "loss": 3.4993, "step": 68715 }, { "epoch": 4.669112651175431, "grad_norm": 1.1615840196609497, "learning_rate": 0.00041636091860307106, "loss": 3.5355, "step": 68720 }, { "epoch": 4.669452371246093, "grad_norm": 0.97249436378479, "learning_rate": 0.0004163184535942384, "loss": 3.6188, "step": 68725 }, { "epoch": 4.669792091316755, "grad_norm": 1.0933294296264648, "learning_rate": 0.0004162759885854056, "loss": 3.5181, "step": 68730 }, { "epoch": 4.670131811387416, "grad_norm": 1.008276343345642, "learning_rate": 0.0004162335235765729, "loss": 3.4522, "step": 68735 }, { "epoch": 4.6704715314580785, "grad_norm": 1.266398549079895, "learning_rate": 0.0004161910585677402, "loss": 3.4191, "step": 68740 }, { "epoch": 4.6708112515287405, "grad_norm": 0.9592137932777405, "learning_rate": 0.0004161485935589075, "loss": 3.306, "step": 68745 }, { "epoch": 4.671150971599402, "grad_norm": 1.3021209239959717, "learning_rate": 0.00041610612855007474, "loss": 3.5541, "step": 68750 }, { "epoch": 4.671490691670064, "grad_norm": 1.0275015830993652, "learning_rate": 0.000416063663541242, "loss": 3.4993, "step": 68755 }, { "epoch": 4.671830411740726, "grad_norm": 1.0702340602874756, "learning_rate": 0.00041602119853240935, "loss": 3.5444, "step": 68760 }, { "epoch": 4.672170131811387, "grad_norm": 0.7555716037750244, "learning_rate": 0.0004159787335235766, "loss": 3.6743, "step": 68765 }, { "epoch": 4.672509851882049, "grad_norm": 1.057653784751892, "learning_rate": 0.00041593626851474386, "loss": 3.2856, "step": 68770 }, { "epoch": 4.672849571952711, "grad_norm": 0.6843161582946777, "learning_rate": 0.00041589380350591114, "loss": 3.7136, "step": 68775 }, { "epoch": 4.673189292023372, "grad_norm": 0.8282147645950317, "learning_rate": 0.0004158513384970784, "loss": 3.6058, "step": 68780 }, { "epoch": 4.6735290120940345, "grad_norm": 0.7954680323600769, "learning_rate": 0.0004158088734882457, "loss": 3.5762, "step": 68785 }, { "epoch": 4.673868732164697, "grad_norm": 0.7610088586807251, "learning_rate": 0.000415766408479413, "loss": 3.4351, "step": 68790 }, { "epoch": 4.674208452235358, "grad_norm": 0.8840131759643555, "learning_rate": 0.00041572394347058026, "loss": 3.3526, "step": 68795 }, { "epoch": 4.67454817230602, "grad_norm": 0.8599383234977722, "learning_rate": 0.00041568147846174754, "loss": 3.434, "step": 68800 }, { "epoch": 4.674887892376682, "grad_norm": 0.9170916676521301, "learning_rate": 0.0004156390134529148, "loss": 3.2948, "step": 68805 }, { "epoch": 4.675227612447343, "grad_norm": 0.9280619025230408, "learning_rate": 0.00041559654844408204, "loss": 3.6941, "step": 68810 }, { "epoch": 4.675567332518005, "grad_norm": 0.7576864957809448, "learning_rate": 0.0004155540834352494, "loss": 3.491, "step": 68815 }, { "epoch": 4.675907052588667, "grad_norm": 0.8543628454208374, "learning_rate": 0.00041551161842641666, "loss": 3.5155, "step": 68820 }, { "epoch": 4.676246772659328, "grad_norm": 0.8939573764801025, "learning_rate": 0.0004154691534175839, "loss": 3.3863, "step": 68825 }, { "epoch": 4.6765864927299905, "grad_norm": 0.8860607147216797, "learning_rate": 0.0004154266884087512, "loss": 3.3293, "step": 68830 }, { "epoch": 4.676926212800653, "grad_norm": 1.1328390836715698, "learning_rate": 0.0004153842233999185, "loss": 3.5256, "step": 68835 }, { "epoch": 4.677265932871314, "grad_norm": 0.9975154995918274, "learning_rate": 0.0004153417583910857, "loss": 3.2659, "step": 68840 }, { "epoch": 4.677605652941976, "grad_norm": 0.9693921208381653, "learning_rate": 0.000415299293382253, "loss": 3.4272, "step": 68845 }, { "epoch": 4.677945373012638, "grad_norm": 0.8597924709320068, "learning_rate": 0.00041525682837342034, "loss": 3.3507, "step": 68850 }, { "epoch": 4.678285093083299, "grad_norm": 0.770686686038971, "learning_rate": 0.00041521436336458756, "loss": 3.4368, "step": 68855 }, { "epoch": 4.678624813153961, "grad_norm": 1.0872145891189575, "learning_rate": 0.00041517189835575484, "loss": 3.4364, "step": 68860 }, { "epoch": 4.678964533224623, "grad_norm": 1.0175124406814575, "learning_rate": 0.0004151294333469222, "loss": 3.3223, "step": 68865 }, { "epoch": 4.679304253295284, "grad_norm": 0.9084970951080322, "learning_rate": 0.0004150869683380894, "loss": 3.256, "step": 68870 }, { "epoch": 4.6796439733659465, "grad_norm": 0.9755767583847046, "learning_rate": 0.0004150445033292567, "loss": 3.5321, "step": 68875 }, { "epoch": 4.679983693436609, "grad_norm": 0.8634095788002014, "learning_rate": 0.000415002038320424, "loss": 3.3742, "step": 68880 }, { "epoch": 4.68032341350727, "grad_norm": 0.8646619915962219, "learning_rate": 0.00041495957331159124, "loss": 3.4129, "step": 68885 }, { "epoch": 4.680663133577932, "grad_norm": 1.0422086715698242, "learning_rate": 0.0004149171083027585, "loss": 3.561, "step": 68890 }, { "epoch": 4.681002853648594, "grad_norm": 0.9213076829910278, "learning_rate": 0.0004148746432939258, "loss": 3.4369, "step": 68895 }, { "epoch": 4.681342573719255, "grad_norm": 1.0218091011047363, "learning_rate": 0.0004148321782850931, "loss": 3.4145, "step": 68900 }, { "epoch": 4.681682293789917, "grad_norm": 1.0470527410507202, "learning_rate": 0.00041478971327626036, "loss": 3.5418, "step": 68905 }, { "epoch": 4.682022013860579, "grad_norm": 0.8341613411903381, "learning_rate": 0.00041474724826742764, "loss": 3.3583, "step": 68910 }, { "epoch": 4.68236173393124, "grad_norm": 1.0093225240707397, "learning_rate": 0.000414704783258595, "loss": 3.4729, "step": 68915 }, { "epoch": 4.6827014540019025, "grad_norm": 0.936660647392273, "learning_rate": 0.0004146623182497622, "loss": 3.7194, "step": 68920 }, { "epoch": 4.683041174072565, "grad_norm": 0.9471144080162048, "learning_rate": 0.0004146198532409295, "loss": 3.5766, "step": 68925 }, { "epoch": 4.683380894143226, "grad_norm": 0.8413189649581909, "learning_rate": 0.00041457738823209676, "loss": 3.4148, "step": 68930 }, { "epoch": 4.683720614213888, "grad_norm": 0.8023260235786438, "learning_rate": 0.00041453492322326404, "loss": 3.1295, "step": 68935 }, { "epoch": 4.684060334284549, "grad_norm": 0.769117534160614, "learning_rate": 0.0004144924582144313, "loss": 3.6845, "step": 68940 }, { "epoch": 4.684400054355211, "grad_norm": 0.7573029398918152, "learning_rate": 0.0004144499932055986, "loss": 3.3628, "step": 68945 }, { "epoch": 4.684739774425873, "grad_norm": 0.9057371616363525, "learning_rate": 0.0004144075281967659, "loss": 3.4598, "step": 68950 }, { "epoch": 4.685079494496534, "grad_norm": 0.9903303384780884, "learning_rate": 0.00041436506318793317, "loss": 3.4652, "step": 68955 }, { "epoch": 4.6854192145671965, "grad_norm": 1.0082008838653564, "learning_rate": 0.00041432259817910045, "loss": 3.4539, "step": 68960 }, { "epoch": 4.6857589346378585, "grad_norm": 0.8353513479232788, "learning_rate": 0.00041428013317026767, "loss": 3.4434, "step": 68965 }, { "epoch": 4.68609865470852, "grad_norm": 0.8890240788459778, "learning_rate": 0.000414237668161435, "loss": 3.2423, "step": 68970 }, { "epoch": 4.686438374779182, "grad_norm": 0.9063505530357361, "learning_rate": 0.0004141952031526023, "loss": 3.5485, "step": 68975 }, { "epoch": 4.686778094849844, "grad_norm": 0.7631552815437317, "learning_rate": 0.0004141527381437695, "loss": 3.5129, "step": 68980 }, { "epoch": 4.687117814920505, "grad_norm": 0.9729147553443909, "learning_rate": 0.00041411027313493685, "loss": 3.1509, "step": 68985 }, { "epoch": 4.687457534991167, "grad_norm": 0.8315043449401855, "learning_rate": 0.0004140678081261041, "loss": 3.5632, "step": 68990 }, { "epoch": 4.687797255061829, "grad_norm": 1.5017859935760498, "learning_rate": 0.00041402534311727135, "loss": 3.6263, "step": 68995 }, { "epoch": 4.68813697513249, "grad_norm": 1.2866005897521973, "learning_rate": 0.00041398287810843863, "loss": 3.3679, "step": 69000 }, { "epoch": 4.6884766952031525, "grad_norm": 0.9483434557914734, "learning_rate": 0.00041394041309960597, "loss": 3.684, "step": 69005 }, { "epoch": 4.6888164152738145, "grad_norm": 0.8910866379737854, "learning_rate": 0.0004138979480907732, "loss": 3.6466, "step": 69010 }, { "epoch": 4.689156135344476, "grad_norm": 0.8734315633773804, "learning_rate": 0.00041385548308194047, "loss": 3.7488, "step": 69015 }, { "epoch": 4.689495855415138, "grad_norm": 1.0456229448318481, "learning_rate": 0.0004138130180731078, "loss": 3.541, "step": 69020 }, { "epoch": 4.6898355754858, "grad_norm": 1.072800636291504, "learning_rate": 0.00041377055306427503, "loss": 3.5719, "step": 69025 }, { "epoch": 4.690175295556461, "grad_norm": 0.8045454621315002, "learning_rate": 0.0004137280880554423, "loss": 3.8004, "step": 69030 }, { "epoch": 4.690515015627123, "grad_norm": 0.7314339876174927, "learning_rate": 0.0004136856230466096, "loss": 3.3762, "step": 69035 }, { "epoch": 4.690854735697785, "grad_norm": 0.8361109495162964, "learning_rate": 0.00041364315803777687, "loss": 3.3964, "step": 69040 }, { "epoch": 4.691194455768446, "grad_norm": 0.9029577970504761, "learning_rate": 0.00041360069302894415, "loss": 3.5059, "step": 69045 }, { "epoch": 4.6915341758391085, "grad_norm": 1.2076871395111084, "learning_rate": 0.00041355822802011143, "loss": 3.5094, "step": 69050 }, { "epoch": 4.6918738959097706, "grad_norm": 0.9464370012283325, "learning_rate": 0.0004135157630112787, "loss": 3.6401, "step": 69055 }, { "epoch": 4.692213615980432, "grad_norm": 1.0849424600601196, "learning_rate": 0.000413473298002446, "loss": 3.6149, "step": 69060 }, { "epoch": 4.692553336051094, "grad_norm": 0.7472956776618958, "learning_rate": 0.00041343083299361327, "loss": 3.5002, "step": 69065 }, { "epoch": 4.692893056121756, "grad_norm": 0.9377622008323669, "learning_rate": 0.0004133883679847805, "loss": 3.6878, "step": 69070 }, { "epoch": 4.693232776192417, "grad_norm": 0.9892808198928833, "learning_rate": 0.00041334590297594783, "loss": 3.8907, "step": 69075 }, { "epoch": 4.693572496263079, "grad_norm": 0.7633818984031677, "learning_rate": 0.0004133034379671151, "loss": 3.5556, "step": 69080 }, { "epoch": 4.693912216333741, "grad_norm": 0.7865645885467529, "learning_rate": 0.0004132609729582824, "loss": 3.7338, "step": 69085 }, { "epoch": 4.694251936404402, "grad_norm": 0.9033628702163696, "learning_rate": 0.0004132185079494497, "loss": 3.1633, "step": 69090 }, { "epoch": 4.6945916564750645, "grad_norm": 0.793864369392395, "learning_rate": 0.00041317604294061695, "loss": 3.6217, "step": 69095 }, { "epoch": 4.694931376545727, "grad_norm": 0.9220806360244751, "learning_rate": 0.00041313357793178423, "loss": 3.5634, "step": 69100 }, { "epoch": 4.695271096616388, "grad_norm": 0.9680860638618469, "learning_rate": 0.00041309111292295146, "loss": 3.6296, "step": 69105 }, { "epoch": 4.69561081668705, "grad_norm": 0.8768517971038818, "learning_rate": 0.0004130486479141188, "loss": 3.4488, "step": 69110 }, { "epoch": 4.695950536757712, "grad_norm": 0.9426429271697998, "learning_rate": 0.0004130061829052861, "loss": 3.5465, "step": 69115 }, { "epoch": 4.696290256828373, "grad_norm": 0.7944425344467163, "learning_rate": 0.0004129637178964533, "loss": 3.6723, "step": 69120 }, { "epoch": 4.696629976899035, "grad_norm": 2.1346771717071533, "learning_rate": 0.00041292125288762063, "loss": 3.4964, "step": 69125 }, { "epoch": 4.696969696969697, "grad_norm": 1.229452133178711, "learning_rate": 0.0004128787878787879, "loss": 3.2191, "step": 69130 }, { "epoch": 4.697309417040358, "grad_norm": 0.7481483221054077, "learning_rate": 0.00041283632286995514, "loss": 3.5667, "step": 69135 }, { "epoch": 4.6976491371110205, "grad_norm": 0.8556455373764038, "learning_rate": 0.0004127938578611224, "loss": 3.4788, "step": 69140 }, { "epoch": 4.697988857181683, "grad_norm": 0.8856696486473083, "learning_rate": 0.00041275139285228975, "loss": 3.4867, "step": 69145 }, { "epoch": 4.698328577252344, "grad_norm": 0.8202235102653503, "learning_rate": 0.000412708927843457, "loss": 3.6148, "step": 69150 }, { "epoch": 4.698668297323006, "grad_norm": 1.042313575744629, "learning_rate": 0.00041266646283462426, "loss": 3.5665, "step": 69155 }, { "epoch": 4.699008017393668, "grad_norm": 0.7703211307525635, "learning_rate": 0.0004126239978257916, "loss": 3.6274, "step": 69160 }, { "epoch": 4.699347737464329, "grad_norm": 0.9743807315826416, "learning_rate": 0.0004125815328169588, "loss": 3.3516, "step": 69165 }, { "epoch": 4.699687457534991, "grad_norm": 0.8901051878929138, "learning_rate": 0.0004125390678081261, "loss": 3.5179, "step": 69170 }, { "epoch": 4.700027177605653, "grad_norm": 0.9366716146469116, "learning_rate": 0.00041249660279929343, "loss": 3.3963, "step": 69175 }, { "epoch": 4.700366897676314, "grad_norm": 0.8757122755050659, "learning_rate": 0.00041245413779046066, "loss": 3.348, "step": 69180 }, { "epoch": 4.7007066177469765, "grad_norm": 0.8713012337684631, "learning_rate": 0.00041241167278162794, "loss": 3.4491, "step": 69185 }, { "epoch": 4.701046337817639, "grad_norm": 1.0504965782165527, "learning_rate": 0.0004123692077727952, "loss": 3.1821, "step": 69190 }, { "epoch": 4.7013860578883, "grad_norm": 0.7504411935806274, "learning_rate": 0.0004123267427639625, "loss": 3.7234, "step": 69195 }, { "epoch": 4.701725777958962, "grad_norm": 0.9374606609344482, "learning_rate": 0.0004122842777551298, "loss": 3.5147, "step": 69200 }, { "epoch": 4.702065498029624, "grad_norm": 0.9495920538902283, "learning_rate": 0.00041224181274629706, "loss": 3.5453, "step": 69205 }, { "epoch": 4.702405218100285, "grad_norm": 0.8455723524093628, "learning_rate": 0.00041219934773746434, "loss": 3.5291, "step": 69210 }, { "epoch": 4.702744938170947, "grad_norm": 1.476053237915039, "learning_rate": 0.0004121568827286316, "loss": 3.5161, "step": 69215 }, { "epoch": 4.703084658241609, "grad_norm": 1.0459927320480347, "learning_rate": 0.0004121144177197989, "loss": 3.5756, "step": 69220 }, { "epoch": 4.7034243783122704, "grad_norm": 0.9662806391716003, "learning_rate": 0.0004120719527109661, "loss": 3.5127, "step": 69225 }, { "epoch": 4.7037640983829325, "grad_norm": 0.782564640045166, "learning_rate": 0.00041202948770213346, "loss": 3.6749, "step": 69230 }, { "epoch": 4.704103818453595, "grad_norm": 0.853685736656189, "learning_rate": 0.00041198702269330074, "loss": 3.4926, "step": 69235 }, { "epoch": 4.704443538524256, "grad_norm": 0.8494096994400024, "learning_rate": 0.00041194455768446797, "loss": 3.5054, "step": 69240 }, { "epoch": 4.704783258594918, "grad_norm": 0.866094172000885, "learning_rate": 0.0004119020926756353, "loss": 3.5804, "step": 69245 }, { "epoch": 4.70512297866558, "grad_norm": 0.9784623980522156, "learning_rate": 0.0004118596276668026, "loss": 3.5316, "step": 69250 }, { "epoch": 4.705462698736241, "grad_norm": 0.9873091578483582, "learning_rate": 0.00041181716265796986, "loss": 3.6019, "step": 69255 }, { "epoch": 4.705802418806903, "grad_norm": 1.0187195539474487, "learning_rate": 0.0004117746976491371, "loss": 3.5431, "step": 69260 }, { "epoch": 4.706142138877565, "grad_norm": 0.8958463668823242, "learning_rate": 0.0004117322326403044, "loss": 3.358, "step": 69265 }, { "epoch": 4.7064818589482265, "grad_norm": 0.6892831325531006, "learning_rate": 0.0004116897676314717, "loss": 3.4991, "step": 69270 }, { "epoch": 4.7068215790188885, "grad_norm": 0.8882495760917664, "learning_rate": 0.00041164730262263893, "loss": 3.3297, "step": 69275 }, { "epoch": 4.707161299089551, "grad_norm": 0.8000712394714355, "learning_rate": 0.00041160483761380626, "loss": 3.3443, "step": 69280 }, { "epoch": 4.707501019160212, "grad_norm": 0.91518634557724, "learning_rate": 0.00041156237260497354, "loss": 3.3768, "step": 69285 }, { "epoch": 4.707840739230874, "grad_norm": 0.8943261504173279, "learning_rate": 0.00041151990759614077, "loss": 3.3386, "step": 69290 }, { "epoch": 4.708180459301536, "grad_norm": 0.9731014966964722, "learning_rate": 0.00041147744258730805, "loss": 3.4249, "step": 69295 }, { "epoch": 4.708520179372197, "grad_norm": 0.9764625430107117, "learning_rate": 0.0004114349775784754, "loss": 3.6803, "step": 69300 }, { "epoch": 4.708859899442859, "grad_norm": 0.8385946154594421, "learning_rate": 0.0004113925125696426, "loss": 3.6429, "step": 69305 }, { "epoch": 4.709199619513521, "grad_norm": 0.9026013612747192, "learning_rate": 0.0004113500475608099, "loss": 3.4236, "step": 69310 }, { "epoch": 4.7095393395841825, "grad_norm": 0.6916206479072571, "learning_rate": 0.0004113075825519772, "loss": 3.5538, "step": 69315 }, { "epoch": 4.7098790596548445, "grad_norm": 0.9716611504554749, "learning_rate": 0.00041126511754314445, "loss": 3.5718, "step": 69320 }, { "epoch": 4.710218779725507, "grad_norm": 0.7719985246658325, "learning_rate": 0.00041122265253431173, "loss": 3.4827, "step": 69325 }, { "epoch": 4.710558499796168, "grad_norm": 0.9900364279747009, "learning_rate": 0.000411180187525479, "loss": 3.4345, "step": 69330 }, { "epoch": 4.71089821986683, "grad_norm": 0.8607082366943359, "learning_rate": 0.0004111377225166463, "loss": 3.3395, "step": 69335 }, { "epoch": 4.711237939937492, "grad_norm": 0.8687829375267029, "learning_rate": 0.00041109525750781357, "loss": 3.8701, "step": 69340 }, { "epoch": 4.711577660008153, "grad_norm": 0.9851251840591431, "learning_rate": 0.00041105279249898085, "loss": 3.5265, "step": 69345 }, { "epoch": 4.711917380078815, "grad_norm": 0.9813066124916077, "learning_rate": 0.00041101032749014813, "loss": 3.2969, "step": 69350 }, { "epoch": 4.712257100149477, "grad_norm": 0.9464268684387207, "learning_rate": 0.0004109678624813154, "loss": 3.3318, "step": 69355 }, { "epoch": 4.7125968202201385, "grad_norm": 0.7016441822052002, "learning_rate": 0.0004109253974724827, "loss": 3.46, "step": 69360 }, { "epoch": 4.712936540290801, "grad_norm": 0.6918023228645325, "learning_rate": 0.0004108829324636499, "loss": 3.6066, "step": 69365 }, { "epoch": 4.713276260361463, "grad_norm": 0.949932873249054, "learning_rate": 0.00041084046745481725, "loss": 3.4233, "step": 69370 }, { "epoch": 4.713615980432124, "grad_norm": 1.0643188953399658, "learning_rate": 0.00041079800244598453, "loss": 3.4068, "step": 69375 }, { "epoch": 4.713955700502786, "grad_norm": 0.7567729353904724, "learning_rate": 0.00041075553743715175, "loss": 3.4116, "step": 69380 }, { "epoch": 4.714295420573448, "grad_norm": 0.823922872543335, "learning_rate": 0.0004107130724283191, "loss": 3.3064, "step": 69385 }, { "epoch": 4.714635140644109, "grad_norm": 0.8823032975196838, "learning_rate": 0.00041067060741948637, "loss": 3.2631, "step": 69390 }, { "epoch": 4.714974860714771, "grad_norm": 0.8701813817024231, "learning_rate": 0.0004106281424106536, "loss": 3.6263, "step": 69395 }, { "epoch": 4.715314580785432, "grad_norm": 0.9479604363441467, "learning_rate": 0.0004105856774018209, "loss": 3.5492, "step": 69400 }, { "epoch": 4.7156543008560945, "grad_norm": 1.1740418672561646, "learning_rate": 0.0004105432123929882, "loss": 3.6143, "step": 69405 }, { "epoch": 4.715994020926757, "grad_norm": 0.9979771971702576, "learning_rate": 0.00041050074738415544, "loss": 3.5649, "step": 69410 }, { "epoch": 4.716333740997418, "grad_norm": 0.970445990562439, "learning_rate": 0.0004104582823753227, "loss": 3.2176, "step": 69415 }, { "epoch": 4.71667346106808, "grad_norm": 1.3118610382080078, "learning_rate": 0.00041041581736649005, "loss": 3.31, "step": 69420 }, { "epoch": 4.717013181138742, "grad_norm": 0.784374475479126, "learning_rate": 0.00041037335235765733, "loss": 3.6023, "step": 69425 }, { "epoch": 4.717352901209403, "grad_norm": 0.7657243609428406, "learning_rate": 0.00041033088734882456, "loss": 3.48, "step": 69430 }, { "epoch": 4.717692621280065, "grad_norm": 0.8319544196128845, "learning_rate": 0.00041028842233999184, "loss": 3.467, "step": 69435 }, { "epoch": 4.718032341350727, "grad_norm": 0.7758208513259888, "learning_rate": 0.00041024595733115917, "loss": 3.2968, "step": 69440 }, { "epoch": 4.718372061421388, "grad_norm": 0.9755234122276306, "learning_rate": 0.0004102034923223264, "loss": 3.5437, "step": 69445 }, { "epoch": 4.7187117814920505, "grad_norm": 0.8124076128005981, "learning_rate": 0.0004101610273134937, "loss": 3.5811, "step": 69450 }, { "epoch": 4.719051501562713, "grad_norm": 1.1775474548339844, "learning_rate": 0.000410118562304661, "loss": 3.1302, "step": 69455 }, { "epoch": 4.719391221633374, "grad_norm": 6.396855354309082, "learning_rate": 0.00041007609729582824, "loss": 3.4236, "step": 69460 }, { "epoch": 4.719730941704036, "grad_norm": 0.9627898931503296, "learning_rate": 0.0004100336322869955, "loss": 3.5744, "step": 69465 }, { "epoch": 4.720070661774698, "grad_norm": 0.9507832527160645, "learning_rate": 0.00040999116727816285, "loss": 3.532, "step": 69470 }, { "epoch": 4.720410381845359, "grad_norm": 1.0489801168441772, "learning_rate": 0.0004099487022693301, "loss": 3.3575, "step": 69475 }, { "epoch": 4.720750101916021, "grad_norm": 0.8134302496910095, "learning_rate": 0.00040990623726049736, "loss": 3.5469, "step": 69480 }, { "epoch": 4.721089821986683, "grad_norm": 1.0447328090667725, "learning_rate": 0.00040986377225166464, "loss": 3.2181, "step": 69485 }, { "epoch": 4.721429542057344, "grad_norm": 0.761895477771759, "learning_rate": 0.0004098213072428319, "loss": 3.46, "step": 69490 }, { "epoch": 4.7217692621280065, "grad_norm": 1.0900856256484985, "learning_rate": 0.0004097788422339992, "loss": 3.6524, "step": 69495 }, { "epoch": 4.722108982198669, "grad_norm": 0.8806496262550354, "learning_rate": 0.0004097363772251665, "loss": 3.6766, "step": 69500 }, { "epoch": 4.72244870226933, "grad_norm": 0.8109956383705139, "learning_rate": 0.00040969391221633376, "loss": 3.565, "step": 69505 }, { "epoch": 4.722788422339992, "grad_norm": 0.9513434171676636, "learning_rate": 0.00040965144720750104, "loss": 3.5646, "step": 69510 }, { "epoch": 4.723128142410654, "grad_norm": 1.059083342552185, "learning_rate": 0.0004096089821986683, "loss": 3.4498, "step": 69515 }, { "epoch": 4.723467862481315, "grad_norm": 0.9925141334533691, "learning_rate": 0.00040956651718983554, "loss": 3.4791, "step": 69520 }, { "epoch": 4.723807582551977, "grad_norm": 0.7216336727142334, "learning_rate": 0.0004095240521810029, "loss": 3.429, "step": 69525 }, { "epoch": 4.724147302622639, "grad_norm": 1.161681890487671, "learning_rate": 0.00040948158717217016, "loss": 3.5496, "step": 69530 }, { "epoch": 4.7244870226933005, "grad_norm": 0.9172937870025635, "learning_rate": 0.0004094391221633374, "loss": 3.2494, "step": 69535 }, { "epoch": 4.7248267427639625, "grad_norm": 0.8286041021347046, "learning_rate": 0.0004093966571545047, "loss": 3.4217, "step": 69540 }, { "epoch": 4.725166462834625, "grad_norm": 0.9892429113388062, "learning_rate": 0.000409354192145672, "loss": 3.6047, "step": 69545 }, { "epoch": 4.725506182905286, "grad_norm": 0.6669461131095886, "learning_rate": 0.0004093117271368392, "loss": 3.9375, "step": 69550 }, { "epoch": 4.725845902975948, "grad_norm": 0.7384819388389587, "learning_rate": 0.0004092692621280065, "loss": 3.2732, "step": 69555 }, { "epoch": 4.72618562304661, "grad_norm": 0.9237121939659119, "learning_rate": 0.00040922679711917384, "loss": 3.1799, "step": 69560 }, { "epoch": 4.726525343117271, "grad_norm": 1.162797451019287, "learning_rate": 0.00040918433211034106, "loss": 3.5168, "step": 69565 }, { "epoch": 4.726865063187933, "grad_norm": 0.8678475618362427, "learning_rate": 0.00040914186710150834, "loss": 3.5217, "step": 69570 }, { "epoch": 4.727204783258595, "grad_norm": 0.8531060814857483, "learning_rate": 0.0004090994020926757, "loss": 3.3051, "step": 69575 }, { "epoch": 4.7275445033292565, "grad_norm": 0.8902525901794434, "learning_rate": 0.0004090569370838429, "loss": 3.5042, "step": 69580 }, { "epoch": 4.7278842233999185, "grad_norm": 1.0029677152633667, "learning_rate": 0.0004090144720750102, "loss": 3.3889, "step": 69585 }, { "epoch": 4.728223943470581, "grad_norm": 0.9958492517471313, "learning_rate": 0.00040897200706617746, "loss": 3.5852, "step": 69590 }, { "epoch": 4.728563663541242, "grad_norm": 0.9596347212791443, "learning_rate": 0.0004089295420573448, "loss": 3.4956, "step": 69595 }, { "epoch": 4.728903383611904, "grad_norm": 0.8545126914978027, "learning_rate": 0.000408887077048512, "loss": 3.4664, "step": 69600 }, { "epoch": 4.729243103682566, "grad_norm": 0.8597524762153625, "learning_rate": 0.0004088446120396793, "loss": 3.4044, "step": 69605 }, { "epoch": 4.729582823753227, "grad_norm": 0.7985163331031799, "learning_rate": 0.00040880214703084664, "loss": 3.6948, "step": 69610 }, { "epoch": 4.729922543823889, "grad_norm": 1.2284164428710938, "learning_rate": 0.00040875968202201386, "loss": 3.2078, "step": 69615 }, { "epoch": 4.73026226389455, "grad_norm": 0.7609068751335144, "learning_rate": 0.00040871721701318114, "loss": 3.4171, "step": 69620 }, { "epoch": 4.7306019839652125, "grad_norm": 0.9071590900421143, "learning_rate": 0.0004086747520043484, "loss": 3.5444, "step": 69625 }, { "epoch": 4.7309417040358746, "grad_norm": 0.8441505432128906, "learning_rate": 0.0004086322869955157, "loss": 3.5638, "step": 69630 }, { "epoch": 4.731281424106536, "grad_norm": 0.9613469243049622, "learning_rate": 0.000408589821986683, "loss": 3.3233, "step": 69635 }, { "epoch": 4.731621144177198, "grad_norm": 0.9090893268585205, "learning_rate": 0.00040854735697785026, "loss": 3.589, "step": 69640 }, { "epoch": 4.73196086424786, "grad_norm": 0.8683876395225525, "learning_rate": 0.00040850489196901754, "loss": 3.4098, "step": 69645 }, { "epoch": 4.732300584318521, "grad_norm": 0.9005845785140991, "learning_rate": 0.0004084624269601848, "loss": 3.3478, "step": 69650 }, { "epoch": 4.732640304389183, "grad_norm": 0.945499062538147, "learning_rate": 0.0004084199619513521, "loss": 3.6038, "step": 69655 }, { "epoch": 4.732980024459845, "grad_norm": 0.812657356262207, "learning_rate": 0.00040837749694251933, "loss": 3.478, "step": 69660 }, { "epoch": 4.733319744530506, "grad_norm": 0.846960723400116, "learning_rate": 0.00040833503193368667, "loss": 3.1262, "step": 69665 }, { "epoch": 4.7336594646011685, "grad_norm": 0.7913996577262878, "learning_rate": 0.00040829256692485395, "loss": 3.6322, "step": 69670 }, { "epoch": 4.733999184671831, "grad_norm": 0.9658031463623047, "learning_rate": 0.00040825010191602117, "loss": 3.5695, "step": 69675 }, { "epoch": 4.734338904742492, "grad_norm": 0.8523339033126831, "learning_rate": 0.0004082076369071885, "loss": 3.5488, "step": 69680 }, { "epoch": 4.734678624813154, "grad_norm": 0.7953420877456665, "learning_rate": 0.0004081651718983558, "loss": 3.4607, "step": 69685 }, { "epoch": 4.735018344883816, "grad_norm": 0.860140323638916, "learning_rate": 0.000408122706889523, "loss": 3.7551, "step": 69690 }, { "epoch": 4.735358064954477, "grad_norm": 0.8767080903053284, "learning_rate": 0.0004080802418806903, "loss": 3.3991, "step": 69695 }, { "epoch": 4.735697785025139, "grad_norm": 0.8981719613075256, "learning_rate": 0.0004080377768718576, "loss": 3.2561, "step": 69700 }, { "epoch": 4.736037505095801, "grad_norm": 0.954849898815155, "learning_rate": 0.00040799531186302485, "loss": 3.3468, "step": 69705 }, { "epoch": 4.736377225166462, "grad_norm": 1.090592622756958, "learning_rate": 0.00040795284685419213, "loss": 3.5751, "step": 69710 }, { "epoch": 4.7367169452371245, "grad_norm": 0.9520303606987, "learning_rate": 0.00040791038184535947, "loss": 3.5867, "step": 69715 }, { "epoch": 4.737056665307787, "grad_norm": 0.9403046369552612, "learning_rate": 0.0004078679168365267, "loss": 3.0102, "step": 69720 }, { "epoch": 4.737396385378448, "grad_norm": 0.7786111235618591, "learning_rate": 0.00040782545182769397, "loss": 3.8316, "step": 69725 }, { "epoch": 4.73773610544911, "grad_norm": 1.0550023317337036, "learning_rate": 0.0004077829868188613, "loss": 3.4379, "step": 69730 }, { "epoch": 4.738075825519772, "grad_norm": 0.8314580917358398, "learning_rate": 0.00040774052181002853, "loss": 3.3578, "step": 69735 }, { "epoch": 4.738415545590433, "grad_norm": 0.8088896870613098, "learning_rate": 0.0004076980568011958, "loss": 3.6589, "step": 69740 }, { "epoch": 4.738755265661095, "grad_norm": 0.9897230863571167, "learning_rate": 0.0004076555917923631, "loss": 3.6297, "step": 69745 }, { "epoch": 4.739094985731757, "grad_norm": 0.817943811416626, "learning_rate": 0.00040761312678353037, "loss": 3.6192, "step": 69750 }, { "epoch": 4.739434705802418, "grad_norm": 0.9540905952453613, "learning_rate": 0.00040757066177469765, "loss": 3.6626, "step": 69755 }, { "epoch": 4.7397744258730805, "grad_norm": 1.6650946140289307, "learning_rate": 0.00040752819676586493, "loss": 3.399, "step": 69760 }, { "epoch": 4.740114145943743, "grad_norm": 0.8696034550666809, "learning_rate": 0.00040748573175703227, "loss": 3.6477, "step": 69765 }, { "epoch": 4.740453866014404, "grad_norm": 1.1270350217819214, "learning_rate": 0.0004074432667481995, "loss": 3.4435, "step": 69770 }, { "epoch": 4.740793586085066, "grad_norm": 0.9530674815177917, "learning_rate": 0.00040740080173936677, "loss": 3.3867, "step": 69775 }, { "epoch": 4.741133306155728, "grad_norm": 1.123913288116455, "learning_rate": 0.00040735833673053405, "loss": 3.4357, "step": 69780 }, { "epoch": 4.741473026226389, "grad_norm": 0.9536225199699402, "learning_rate": 0.00040731587172170133, "loss": 3.4923, "step": 69785 }, { "epoch": 4.741812746297051, "grad_norm": 1.0449191331863403, "learning_rate": 0.0004072734067128686, "loss": 3.5092, "step": 69790 }, { "epoch": 4.742152466367713, "grad_norm": 1.021342158317566, "learning_rate": 0.0004072309417040359, "loss": 3.7529, "step": 69795 }, { "epoch": 4.7424921864383744, "grad_norm": 0.954954206943512, "learning_rate": 0.0004071884766952032, "loss": 3.3732, "step": 69800 }, { "epoch": 4.7428319065090365, "grad_norm": 0.8487341403961182, "learning_rate": 0.00040714601168637045, "loss": 3.3324, "step": 69805 }, { "epoch": 4.743171626579699, "grad_norm": 1.2022784948349, "learning_rate": 0.00040710354667753773, "loss": 3.4762, "step": 69810 }, { "epoch": 4.74351134665036, "grad_norm": 0.9926097989082336, "learning_rate": 0.00040706108166870496, "loss": 2.9816, "step": 69815 }, { "epoch": 4.743851066721022, "grad_norm": 0.9748802781105042, "learning_rate": 0.0004070186166598723, "loss": 3.503, "step": 69820 }, { "epoch": 4.744190786791684, "grad_norm": 0.8919101357460022, "learning_rate": 0.0004069761516510396, "loss": 3.5095, "step": 69825 }, { "epoch": 4.744530506862345, "grad_norm": 0.8129463195800781, "learning_rate": 0.0004069336866422068, "loss": 3.4917, "step": 69830 }, { "epoch": 4.744870226933007, "grad_norm": 1.0738404989242554, "learning_rate": 0.00040689122163337413, "loss": 3.6459, "step": 69835 }, { "epoch": 4.745209947003669, "grad_norm": 1.0326906442642212, "learning_rate": 0.0004068487566245414, "loss": 3.2607, "step": 69840 }, { "epoch": 4.7455496670743305, "grad_norm": 0.9654978513717651, "learning_rate": 0.00040680629161570864, "loss": 3.5042, "step": 69845 }, { "epoch": 4.7458893871449925, "grad_norm": 0.9114751219749451, "learning_rate": 0.0004067638266068759, "loss": 3.653, "step": 69850 }, { "epoch": 4.746229107215655, "grad_norm": 0.970568835735321, "learning_rate": 0.00040672136159804325, "loss": 3.7439, "step": 69855 }, { "epoch": 4.746568827286316, "grad_norm": 0.9871333837509155, "learning_rate": 0.0004066788965892105, "loss": 3.1905, "step": 69860 }, { "epoch": 4.746908547356978, "grad_norm": 0.7907553911209106, "learning_rate": 0.00040663643158037776, "loss": 3.6405, "step": 69865 }, { "epoch": 4.74724826742764, "grad_norm": 0.9636352062225342, "learning_rate": 0.0004065939665715451, "loss": 3.2468, "step": 69870 }, { "epoch": 4.747587987498301, "grad_norm": 1.0626206398010254, "learning_rate": 0.0004065515015627123, "loss": 3.4494, "step": 69875 }, { "epoch": 4.747927707568963, "grad_norm": 0.8582151532173157, "learning_rate": 0.0004065090365538796, "loss": 3.6137, "step": 69880 }, { "epoch": 4.748267427639625, "grad_norm": 0.8358616828918457, "learning_rate": 0.0004064665715450469, "loss": 3.5628, "step": 69885 }, { "epoch": 4.7486071477102865, "grad_norm": 0.869587779045105, "learning_rate": 0.00040642410653621416, "loss": 3.7545, "step": 69890 }, { "epoch": 4.7489468677809485, "grad_norm": 1.04090416431427, "learning_rate": 0.00040638164152738144, "loss": 3.5374, "step": 69895 }, { "epoch": 4.749286587851611, "grad_norm": 1.006638765335083, "learning_rate": 0.0004063391765185487, "loss": 3.4869, "step": 69900 }, { "epoch": 4.749626307922272, "grad_norm": 0.9677208065986633, "learning_rate": 0.000406296711509716, "loss": 3.3301, "step": 69905 }, { "epoch": 4.749966027992934, "grad_norm": 0.9345969557762146, "learning_rate": 0.0004062542465008833, "loss": 3.5362, "step": 69910 }, { "epoch": 4.750305748063596, "grad_norm": 0.7895476222038269, "learning_rate": 0.00040621178149205056, "loss": 3.6504, "step": 69915 }, { "epoch": 4.750645468134257, "grad_norm": 0.8894950151443481, "learning_rate": 0.0004061693164832178, "loss": 3.4556, "step": 69920 }, { "epoch": 4.750985188204919, "grad_norm": 0.9063246846199036, "learning_rate": 0.0004061268514743851, "loss": 3.3695, "step": 69925 }, { "epoch": 4.751324908275581, "grad_norm": 0.8349266648292542, "learning_rate": 0.0004060843864655524, "loss": 3.336, "step": 69930 }, { "epoch": 4.7516646283462425, "grad_norm": 1.0347188711166382, "learning_rate": 0.0004060419214567197, "loss": 3.1784, "step": 69935 }, { "epoch": 4.7520043484169046, "grad_norm": 0.7952750325202942, "learning_rate": 0.00040599945644788696, "loss": 3.5084, "step": 69940 }, { "epoch": 4.752344068487567, "grad_norm": 1.0403187274932861, "learning_rate": 0.00040595699143905424, "loss": 3.7953, "step": 69945 }, { "epoch": 4.752683788558228, "grad_norm": 0.8723675608634949, "learning_rate": 0.0004059145264302215, "loss": 3.2913, "step": 69950 }, { "epoch": 4.75302350862889, "grad_norm": 0.7564780116081238, "learning_rate": 0.00040587206142138875, "loss": 3.4079, "step": 69955 }, { "epoch": 4.753363228699552, "grad_norm": 0.9965308308601379, "learning_rate": 0.0004058295964125561, "loss": 3.5497, "step": 69960 }, { "epoch": 4.753702948770213, "grad_norm": 0.8512517213821411, "learning_rate": 0.00040578713140372336, "loss": 3.6836, "step": 69965 }, { "epoch": 4.754042668840875, "grad_norm": 0.9106812477111816, "learning_rate": 0.0004057446663948906, "loss": 3.5245, "step": 69970 }, { "epoch": 4.754382388911537, "grad_norm": 1.1285918951034546, "learning_rate": 0.0004057022013860579, "loss": 3.1913, "step": 69975 }, { "epoch": 4.7547221089821985, "grad_norm": 0.9773391485214233, "learning_rate": 0.0004056597363772252, "loss": 3.5257, "step": 69980 }, { "epoch": 4.755061829052861, "grad_norm": 0.9700713753700256, "learning_rate": 0.00040561727136839243, "loss": 3.4215, "step": 69985 }, { "epoch": 4.755401549123523, "grad_norm": 0.8964201211929321, "learning_rate": 0.0004055748063595597, "loss": 3.4269, "step": 69990 }, { "epoch": 4.755741269194184, "grad_norm": 1.4229077100753784, "learning_rate": 0.00040553234135072704, "loss": 3.3095, "step": 69995 }, { "epoch": 4.756080989264846, "grad_norm": 1.2170642614364624, "learning_rate": 0.00040548987634189427, "loss": 3.5155, "step": 70000 }, { "epoch": 4.756420709335508, "grad_norm": 1.007277011871338, "learning_rate": 0.00040544741133306155, "loss": 3.8152, "step": 70005 }, { "epoch": 4.756760429406169, "grad_norm": 0.7862555384635925, "learning_rate": 0.0004054049463242289, "loss": 3.5159, "step": 70010 }, { "epoch": 4.757100149476831, "grad_norm": 0.8778907656669617, "learning_rate": 0.0004053624813153961, "loss": 3.4717, "step": 70015 }, { "epoch": 4.757439869547493, "grad_norm": 0.8126395344734192, "learning_rate": 0.0004053200163065634, "loss": 3.4318, "step": 70020 }, { "epoch": 4.7577795896181545, "grad_norm": 1.1871123313903809, "learning_rate": 0.0004052775512977307, "loss": 3.4304, "step": 70025 }, { "epoch": 4.758119309688817, "grad_norm": 1.045343279838562, "learning_rate": 0.00040523508628889795, "loss": 3.5022, "step": 70030 }, { "epoch": 4.758459029759479, "grad_norm": 0.8970746994018555, "learning_rate": 0.00040519262128006523, "loss": 3.6819, "step": 70035 }, { "epoch": 4.75879874983014, "grad_norm": 0.8949733972549438, "learning_rate": 0.0004051501562712325, "loss": 3.5433, "step": 70040 }, { "epoch": 4.759138469900802, "grad_norm": 0.8140722513198853, "learning_rate": 0.0004051076912623998, "loss": 3.6664, "step": 70045 }, { "epoch": 4.759478189971464, "grad_norm": 0.7248872518539429, "learning_rate": 0.00040506522625356707, "loss": 3.075, "step": 70050 }, { "epoch": 4.759817910042125, "grad_norm": 0.7907172441482544, "learning_rate": 0.00040502276124473435, "loss": 3.6868, "step": 70055 }, { "epoch": 4.760157630112787, "grad_norm": 0.7529835104942322, "learning_rate": 0.00040498029623590163, "loss": 3.2642, "step": 70060 }, { "epoch": 4.760497350183449, "grad_norm": 1.0483533143997192, "learning_rate": 0.0004049378312270689, "loss": 3.3425, "step": 70065 }, { "epoch": 4.7608370702541105, "grad_norm": 1.021796464920044, "learning_rate": 0.0004048953662182362, "loss": 3.7086, "step": 70070 }, { "epoch": 4.761176790324773, "grad_norm": 1.0853748321533203, "learning_rate": 0.0004048529012094034, "loss": 3.3763, "step": 70075 }, { "epoch": 4.761516510395434, "grad_norm": 0.8721354603767395, "learning_rate": 0.00040481043620057075, "loss": 3.593, "step": 70080 }, { "epoch": 4.761856230466096, "grad_norm": 0.8370148539543152, "learning_rate": 0.00040476797119173803, "loss": 3.4034, "step": 70085 }, { "epoch": 4.762195950536758, "grad_norm": 0.7664744853973389, "learning_rate": 0.00040472550618290525, "loss": 3.0773, "step": 70090 }, { "epoch": 4.762535670607419, "grad_norm": 1.2743449211120605, "learning_rate": 0.0004046830411740726, "loss": 3.3399, "step": 70095 }, { "epoch": 4.762875390678081, "grad_norm": 0.8060517311096191, "learning_rate": 0.00040464057616523987, "loss": 3.6062, "step": 70100 }, { "epoch": 4.763215110748743, "grad_norm": 0.8377586603164673, "learning_rate": 0.00040459811115640715, "loss": 3.4684, "step": 70105 }, { "epoch": 4.7635548308194045, "grad_norm": 0.8594706654548645, "learning_rate": 0.0004045556461475744, "loss": 3.0675, "step": 70110 }, { "epoch": 4.7638945508900665, "grad_norm": 0.9973412156105042, "learning_rate": 0.0004045131811387417, "loss": 3.3142, "step": 70115 }, { "epoch": 4.764234270960729, "grad_norm": 1.1501307487487793, "learning_rate": 0.000404470716129909, "loss": 3.4585, "step": 70120 }, { "epoch": 4.76457399103139, "grad_norm": 0.7482250332832336, "learning_rate": 0.0004044282511210762, "loss": 3.5328, "step": 70125 }, { "epoch": 4.764913711102052, "grad_norm": 0.9305121898651123, "learning_rate": 0.00040438578611224355, "loss": 3.3082, "step": 70130 }, { "epoch": 4.765253431172714, "grad_norm": 0.8204454779624939, "learning_rate": 0.00040434332110341083, "loss": 3.6905, "step": 70135 }, { "epoch": 4.765593151243375, "grad_norm": 0.9567694664001465, "learning_rate": 0.00040430085609457806, "loss": 3.5189, "step": 70140 }, { "epoch": 4.765932871314037, "grad_norm": 0.7904633283615112, "learning_rate": 0.00040425839108574534, "loss": 3.4518, "step": 70145 }, { "epoch": 4.766272591384699, "grad_norm": 0.8971450328826904, "learning_rate": 0.00040421592607691267, "loss": 3.3259, "step": 70150 }, { "epoch": 4.7666123114553605, "grad_norm": 0.7756615281105042, "learning_rate": 0.0004041734610680799, "loss": 3.3068, "step": 70155 }, { "epoch": 4.7669520315260225, "grad_norm": 1.0709162950515747, "learning_rate": 0.0004041309960592472, "loss": 3.6885, "step": 70160 }, { "epoch": 4.767291751596685, "grad_norm": 0.8551867008209229, "learning_rate": 0.0004040885310504145, "loss": 3.6002, "step": 70165 }, { "epoch": 4.767631471667346, "grad_norm": 1.1767950057983398, "learning_rate": 0.00040404606604158174, "loss": 3.5945, "step": 70170 }, { "epoch": 4.767971191738008, "grad_norm": 0.7802050113677979, "learning_rate": 0.000404003601032749, "loss": 3.423, "step": 70175 }, { "epoch": 4.76831091180867, "grad_norm": 0.8028398156166077, "learning_rate": 0.0004039611360239163, "loss": 3.7693, "step": 70180 }, { "epoch": 4.768650631879331, "grad_norm": 0.7945271730422974, "learning_rate": 0.0004039186710150836, "loss": 3.5705, "step": 70185 }, { "epoch": 4.768990351949993, "grad_norm": 0.845060408115387, "learning_rate": 0.00040387620600625086, "loss": 3.6422, "step": 70190 }, { "epoch": 4.769330072020655, "grad_norm": 0.7944519519805908, "learning_rate": 0.00040383374099741814, "loss": 3.4642, "step": 70195 }, { "epoch": 4.7696697920913165, "grad_norm": 0.8013132810592651, "learning_rate": 0.0004037912759885854, "loss": 3.4119, "step": 70200 }, { "epoch": 4.7700095121619785, "grad_norm": 0.776978611946106, "learning_rate": 0.0004037488109797527, "loss": 3.5294, "step": 70205 }, { "epoch": 4.770349232232641, "grad_norm": 0.7108356952667236, "learning_rate": 0.00040370634597092, "loss": 3.4783, "step": 70210 }, { "epoch": 4.770688952303302, "grad_norm": 0.8817977905273438, "learning_rate": 0.0004036638809620872, "loss": 3.4817, "step": 70215 }, { "epoch": 4.771028672373964, "grad_norm": 0.8841917514801025, "learning_rate": 0.00040362141595325454, "loss": 3.5253, "step": 70220 }, { "epoch": 4.771368392444626, "grad_norm": 0.8310331106185913, "learning_rate": 0.0004035789509444218, "loss": 3.4334, "step": 70225 }, { "epoch": 4.771708112515287, "grad_norm": 0.8968549370765686, "learning_rate": 0.00040353648593558904, "loss": 3.6191, "step": 70230 }, { "epoch": 4.772047832585949, "grad_norm": 0.9409182071685791, "learning_rate": 0.0004034940209267564, "loss": 3.3376, "step": 70235 }, { "epoch": 4.772387552656611, "grad_norm": 0.8187596201896667, "learning_rate": 0.00040345155591792366, "loss": 3.2615, "step": 70240 }, { "epoch": 4.7727272727272725, "grad_norm": 0.8205277919769287, "learning_rate": 0.0004034090909090909, "loss": 3.5039, "step": 70245 }, { "epoch": 4.773066992797935, "grad_norm": 0.8254560828208923, "learning_rate": 0.00040336662590025816, "loss": 3.4936, "step": 70250 }, { "epoch": 4.773406712868597, "grad_norm": 0.8043600916862488, "learning_rate": 0.0004033241608914255, "loss": 3.5621, "step": 70255 }, { "epoch": 4.773746432939258, "grad_norm": 0.9181062579154968, "learning_rate": 0.0004032816958825927, "loss": 3.5296, "step": 70260 }, { "epoch": 4.77408615300992, "grad_norm": 0.9614311456680298, "learning_rate": 0.00040323923087376, "loss": 3.4175, "step": 70265 }, { "epoch": 4.774425873080582, "grad_norm": 1.0629569292068481, "learning_rate": 0.00040319676586492734, "loss": 3.4831, "step": 70270 }, { "epoch": 4.774765593151243, "grad_norm": 0.7729849815368652, "learning_rate": 0.0004031543008560946, "loss": 3.6984, "step": 70275 }, { "epoch": 4.775105313221905, "grad_norm": 0.9532772302627563, "learning_rate": 0.00040311183584726184, "loss": 3.3816, "step": 70280 }, { "epoch": 4.775445033292567, "grad_norm": 0.7529994249343872, "learning_rate": 0.0004030693708384291, "loss": 3.7698, "step": 70285 }, { "epoch": 4.7757847533632285, "grad_norm": 0.8031511306762695, "learning_rate": 0.00040302690582959646, "loss": 3.5054, "step": 70290 }, { "epoch": 4.776124473433891, "grad_norm": 0.9469435214996338, "learning_rate": 0.0004029844408207637, "loss": 3.626, "step": 70295 }, { "epoch": 4.776464193504552, "grad_norm": 1.2148302793502808, "learning_rate": 0.00040294197581193096, "loss": 3.6636, "step": 70300 }, { "epoch": 4.776803913575214, "grad_norm": 1.1191601753234863, "learning_rate": 0.0004028995108030983, "loss": 3.7747, "step": 70305 }, { "epoch": 4.777143633645876, "grad_norm": 1.1783802509307861, "learning_rate": 0.0004028570457942655, "loss": 3.3707, "step": 70310 }, { "epoch": 4.777483353716537, "grad_norm": 0.8672983646392822, "learning_rate": 0.0004028145807854328, "loss": 3.4294, "step": 70315 }, { "epoch": 4.777823073787199, "grad_norm": 0.7727931141853333, "learning_rate": 0.00040277211577660014, "loss": 3.5373, "step": 70320 }, { "epoch": 4.778162793857861, "grad_norm": 0.8452978134155273, "learning_rate": 0.00040272965076776736, "loss": 3.426, "step": 70325 }, { "epoch": 4.778502513928522, "grad_norm": 0.9340426325798035, "learning_rate": 0.00040268718575893464, "loss": 3.3597, "step": 70330 }, { "epoch": 4.7788422339991845, "grad_norm": 0.8888667821884155, "learning_rate": 0.0004026447207501019, "loss": 3.473, "step": 70335 }, { "epoch": 4.779181954069847, "grad_norm": 0.8960254788398743, "learning_rate": 0.0004026022557412692, "loss": 3.3074, "step": 70340 }, { "epoch": 4.779521674140508, "grad_norm": 0.8607752919197083, "learning_rate": 0.0004025597907324365, "loss": 3.4672, "step": 70345 }, { "epoch": 4.77986139421117, "grad_norm": 0.9392121434211731, "learning_rate": 0.00040251732572360376, "loss": 3.2868, "step": 70350 }, { "epoch": 4.780201114281832, "grad_norm": 0.8517521619796753, "learning_rate": 0.00040247486071477104, "loss": 3.7123, "step": 70355 }, { "epoch": 4.780540834352493, "grad_norm": 0.6652432084083557, "learning_rate": 0.0004024323957059383, "loss": 3.5565, "step": 70360 }, { "epoch": 4.780880554423155, "grad_norm": 1.2006573677062988, "learning_rate": 0.0004023899306971056, "loss": 3.2623, "step": 70365 }, { "epoch": 4.781220274493817, "grad_norm": 0.7220340967178345, "learning_rate": 0.00040234746568827283, "loss": 3.6234, "step": 70370 }, { "epoch": 4.7815599945644784, "grad_norm": 0.881195604801178, "learning_rate": 0.00040230500067944016, "loss": 3.57, "step": 70375 }, { "epoch": 4.7818997146351405, "grad_norm": 1.1372443437576294, "learning_rate": 0.00040226253567060745, "loss": 3.4509, "step": 70380 }, { "epoch": 4.782239434705803, "grad_norm": 0.8250408172607422, "learning_rate": 0.00040222007066177467, "loss": 3.671, "step": 70385 }, { "epoch": 4.782579154776464, "grad_norm": 0.8903779983520508, "learning_rate": 0.000402177605652942, "loss": 3.4131, "step": 70390 }, { "epoch": 4.782918874847126, "grad_norm": 0.9633232355117798, "learning_rate": 0.0004021351406441093, "loss": 3.4399, "step": 70395 }, { "epoch": 4.783258594917788, "grad_norm": 0.8965213298797607, "learning_rate": 0.0004020926756352765, "loss": 3.2851, "step": 70400 }, { "epoch": 4.783598314988449, "grad_norm": 0.9689022898674011, "learning_rate": 0.0004020502106264438, "loss": 3.6069, "step": 70405 }, { "epoch": 4.783938035059111, "grad_norm": 0.8349127173423767, "learning_rate": 0.0004020077456176111, "loss": 3.6365, "step": 70410 }, { "epoch": 4.784277755129773, "grad_norm": 0.909548282623291, "learning_rate": 0.00040196528060877835, "loss": 3.7422, "step": 70415 }, { "epoch": 4.7846174752004345, "grad_norm": 0.9999193549156189, "learning_rate": 0.00040192281559994563, "loss": 3.4312, "step": 70420 }, { "epoch": 4.7849571952710965, "grad_norm": 0.8660966157913208, "learning_rate": 0.00040188035059111297, "loss": 3.2884, "step": 70425 }, { "epoch": 4.785296915341759, "grad_norm": 0.7895473837852478, "learning_rate": 0.0004018378855822802, "loss": 3.6481, "step": 70430 }, { "epoch": 4.78563663541242, "grad_norm": 0.7956475615501404, "learning_rate": 0.00040179542057344747, "loss": 3.4492, "step": 70435 }, { "epoch": 4.785976355483082, "grad_norm": 1.088138461112976, "learning_rate": 0.00040175295556461475, "loss": 3.3027, "step": 70440 }, { "epoch": 4.786316075553744, "grad_norm": 0.9060842394828796, "learning_rate": 0.0004017104905557821, "loss": 3.4391, "step": 70445 }, { "epoch": 4.786655795624405, "grad_norm": 0.8917579054832458, "learning_rate": 0.0004016680255469493, "loss": 3.5534, "step": 70450 }, { "epoch": 4.786995515695067, "grad_norm": 0.8706673979759216, "learning_rate": 0.0004016255605381166, "loss": 3.244, "step": 70455 }, { "epoch": 4.787335235765729, "grad_norm": 0.8494575023651123, "learning_rate": 0.0004015830955292839, "loss": 3.4548, "step": 70460 }, { "epoch": 4.7876749558363905, "grad_norm": 0.882722020149231, "learning_rate": 0.00040154063052045115, "loss": 3.4462, "step": 70465 }, { "epoch": 4.7880146759070525, "grad_norm": 1.0654890537261963, "learning_rate": 0.00040149816551161843, "loss": 3.6834, "step": 70470 }, { "epoch": 4.788354395977715, "grad_norm": 0.9525165557861328, "learning_rate": 0.0004014557005027857, "loss": 3.5394, "step": 70475 }, { "epoch": 4.788694116048376, "grad_norm": 0.8042345643043518, "learning_rate": 0.000401413235493953, "loss": 3.3865, "step": 70480 }, { "epoch": 4.789033836119038, "grad_norm": 1.1183325052261353, "learning_rate": 0.00040137077048512027, "loss": 3.434, "step": 70485 }, { "epoch": 4.7893735561897, "grad_norm": 0.7519111633300781, "learning_rate": 0.00040132830547628755, "loss": 3.3815, "step": 70490 }, { "epoch": 4.789713276260361, "grad_norm": 0.8972079753875732, "learning_rate": 0.00040128584046745483, "loss": 3.4321, "step": 70495 }, { "epoch": 4.790052996331023, "grad_norm": 1.0112940073013306, "learning_rate": 0.0004012433754586221, "loss": 3.8096, "step": 70500 }, { "epoch": 4.790392716401685, "grad_norm": 0.7080385684967041, "learning_rate": 0.0004012009104497894, "loss": 3.3596, "step": 70505 }, { "epoch": 4.7907324364723465, "grad_norm": 0.8304780125617981, "learning_rate": 0.0004011584454409566, "loss": 3.4605, "step": 70510 }, { "epoch": 4.7910721565430086, "grad_norm": 0.8705866932868958, "learning_rate": 0.00040111598043212395, "loss": 3.3854, "step": 70515 }, { "epoch": 4.791411876613671, "grad_norm": 1.3136351108551025, "learning_rate": 0.00040107351542329123, "loss": 3.7872, "step": 70520 }, { "epoch": 4.791751596684332, "grad_norm": 0.761170506477356, "learning_rate": 0.00040103105041445846, "loss": 3.4968, "step": 70525 }, { "epoch": 4.792091316754994, "grad_norm": 1.1485087871551514, "learning_rate": 0.0004009885854056258, "loss": 3.3972, "step": 70530 }, { "epoch": 4.792431036825656, "grad_norm": 0.9570496082305908, "learning_rate": 0.0004009461203967931, "loss": 3.582, "step": 70535 }, { "epoch": 4.792770756896317, "grad_norm": 0.9269013404846191, "learning_rate": 0.0004009036553879603, "loss": 3.3016, "step": 70540 }, { "epoch": 4.793110476966979, "grad_norm": 1.1033341884613037, "learning_rate": 0.0004008611903791276, "loss": 3.3826, "step": 70545 }, { "epoch": 4.793450197037641, "grad_norm": 1.1749680042266846, "learning_rate": 0.0004008187253702949, "loss": 3.5565, "step": 70550 }, { "epoch": 4.7937899171083025, "grad_norm": 0.8548731207847595, "learning_rate": 0.00040077626036146214, "loss": 3.6999, "step": 70555 }, { "epoch": 4.794129637178965, "grad_norm": 0.7880840301513672, "learning_rate": 0.0004007337953526294, "loss": 3.7293, "step": 70560 }, { "epoch": 4.794469357249627, "grad_norm": 1.099001169204712, "learning_rate": 0.00040069133034379675, "loss": 3.5774, "step": 70565 }, { "epoch": 4.794809077320288, "grad_norm": 1.1060079336166382, "learning_rate": 0.000400648865334964, "loss": 3.7722, "step": 70570 }, { "epoch": 4.79514879739095, "grad_norm": 1.2185269594192505, "learning_rate": 0.00040060640032613126, "loss": 3.3087, "step": 70575 }, { "epoch": 4.795488517461612, "grad_norm": 0.9283305406570435, "learning_rate": 0.00040056393531729854, "loss": 3.403, "step": 70580 }, { "epoch": 4.795828237532273, "grad_norm": 0.7855352759361267, "learning_rate": 0.0004005214703084658, "loss": 3.3949, "step": 70585 }, { "epoch": 4.796167957602935, "grad_norm": 0.8654841184616089, "learning_rate": 0.0004004790052996331, "loss": 3.5531, "step": 70590 }, { "epoch": 4.796507677673597, "grad_norm": 1.021836519241333, "learning_rate": 0.0004004365402908004, "loss": 3.3865, "step": 70595 }, { "epoch": 4.7968473977442585, "grad_norm": 0.8132767081260681, "learning_rate": 0.00040039407528196766, "loss": 3.4587, "step": 70600 }, { "epoch": 4.797187117814921, "grad_norm": 0.8168739080429077, "learning_rate": 0.00040035161027313494, "loss": 3.57, "step": 70605 }, { "epoch": 4.797526837885583, "grad_norm": 0.7915118336677551, "learning_rate": 0.0004003091452643022, "loss": 3.5561, "step": 70610 }, { "epoch": 4.797866557956244, "grad_norm": 0.8955094218254089, "learning_rate": 0.00040026668025546955, "loss": 3.3737, "step": 70615 }, { "epoch": 4.798206278026906, "grad_norm": 0.7943336963653564, "learning_rate": 0.0004002242152466368, "loss": 3.6874, "step": 70620 }, { "epoch": 4.798545998097568, "grad_norm": 1.0302733182907104, "learning_rate": 0.00040018175023780406, "loss": 3.5091, "step": 70625 }, { "epoch": 4.798885718168229, "grad_norm": 0.8550623655319214, "learning_rate": 0.00040013928522897134, "loss": 3.2809, "step": 70630 }, { "epoch": 4.799225438238891, "grad_norm": 0.8888876438140869, "learning_rate": 0.0004000968202201386, "loss": 3.4225, "step": 70635 }, { "epoch": 4.799565158309553, "grad_norm": 0.9619299173355103, "learning_rate": 0.0004000543552113059, "loss": 3.4654, "step": 70640 }, { "epoch": 4.7999048783802145, "grad_norm": 0.9343862533569336, "learning_rate": 0.0004000118902024732, "loss": 3.2595, "step": 70645 }, { "epoch": 4.800244598450877, "grad_norm": 0.8767639398574829, "learning_rate": 0.00039996942519364046, "loss": 3.4761, "step": 70650 }, { "epoch": 4.800584318521539, "grad_norm": 1.057349681854248, "learning_rate": 0.00039992696018480774, "loss": 3.5013, "step": 70655 }, { "epoch": 4.8009240385922, "grad_norm": 0.8815264701843262, "learning_rate": 0.000399884495175975, "loss": 3.6289, "step": 70660 }, { "epoch": 4.801263758662862, "grad_norm": 0.7199167013168335, "learning_rate": 0.00039984203016714225, "loss": 3.6424, "step": 70665 }, { "epoch": 4.801603478733524, "grad_norm": 0.9723081588745117, "learning_rate": 0.0003997995651583096, "loss": 3.2603, "step": 70670 }, { "epoch": 4.801943198804185, "grad_norm": 0.8908737301826477, "learning_rate": 0.00039975710014947686, "loss": 3.8391, "step": 70675 }, { "epoch": 4.802282918874847, "grad_norm": 0.818849503993988, "learning_rate": 0.0003997146351406441, "loss": 3.497, "step": 70680 }, { "epoch": 4.802622638945509, "grad_norm": 0.8584309816360474, "learning_rate": 0.0003996721701318114, "loss": 3.4156, "step": 70685 }, { "epoch": 4.8029623590161705, "grad_norm": 1.2230474948883057, "learning_rate": 0.0003996297051229787, "loss": 3.5136, "step": 70690 }, { "epoch": 4.803302079086833, "grad_norm": 1.0068387985229492, "learning_rate": 0.00039958724011414593, "loss": 3.6272, "step": 70695 }, { "epoch": 4.803641799157495, "grad_norm": 0.9747718572616577, "learning_rate": 0.0003995447751053132, "loss": 3.3668, "step": 70700 }, { "epoch": 4.803981519228156, "grad_norm": 1.0465805530548096, "learning_rate": 0.00039950231009648054, "loss": 3.439, "step": 70705 }, { "epoch": 4.804321239298818, "grad_norm": 0.8880668878555298, "learning_rate": 0.00039945984508764777, "loss": 3.5246, "step": 70710 }, { "epoch": 4.80466095936948, "grad_norm": 0.903103768825531, "learning_rate": 0.00039941738007881505, "loss": 3.4039, "step": 70715 }, { "epoch": 4.805000679440141, "grad_norm": 0.6945569515228271, "learning_rate": 0.0003993749150699824, "loss": 3.3442, "step": 70720 }, { "epoch": 4.805340399510803, "grad_norm": 0.8089251518249512, "learning_rate": 0.0003993324500611496, "loss": 3.2967, "step": 70725 }, { "epoch": 4.805680119581465, "grad_norm": 0.8446237444877625, "learning_rate": 0.0003992899850523169, "loss": 3.5811, "step": 70730 }, { "epoch": 4.8060198396521265, "grad_norm": 0.9648834466934204, "learning_rate": 0.00039924752004348417, "loss": 3.5093, "step": 70735 }, { "epoch": 4.806359559722789, "grad_norm": 0.8614630103111267, "learning_rate": 0.00039920505503465145, "loss": 3.4752, "step": 70740 }, { "epoch": 4.806699279793451, "grad_norm": 1.0510776042938232, "learning_rate": 0.00039916259002581873, "loss": 3.587, "step": 70745 }, { "epoch": 4.807038999864112, "grad_norm": 0.958268404006958, "learning_rate": 0.000399120125016986, "loss": 3.6617, "step": 70750 }, { "epoch": 4.807378719934774, "grad_norm": 0.7463881373405457, "learning_rate": 0.0003990776600081533, "loss": 3.523, "step": 70755 }, { "epoch": 4.807718440005435, "grad_norm": 0.9886858463287354, "learning_rate": 0.00039903519499932057, "loss": 3.3387, "step": 70760 }, { "epoch": 4.808058160076097, "grad_norm": 0.7606908082962036, "learning_rate": 0.00039899272999048785, "loss": 3.6394, "step": 70765 }, { "epoch": 4.808397880146759, "grad_norm": 1.026638388633728, "learning_rate": 0.0003989502649816551, "loss": 3.5217, "step": 70770 }, { "epoch": 4.8087376002174205, "grad_norm": 0.836662769317627, "learning_rate": 0.0003989077999728224, "loss": 3.5468, "step": 70775 }, { "epoch": 4.8090773202880825, "grad_norm": 0.7752577662467957, "learning_rate": 0.0003988653349639897, "loss": 3.2342, "step": 70780 }, { "epoch": 4.809417040358745, "grad_norm": 0.6935116052627563, "learning_rate": 0.00039882286995515697, "loss": 3.4789, "step": 70785 }, { "epoch": 4.809756760429406, "grad_norm": 0.9013590216636658, "learning_rate": 0.00039878040494632425, "loss": 3.3288, "step": 70790 }, { "epoch": 4.810096480500068, "grad_norm": 0.9464393854141235, "learning_rate": 0.00039873793993749153, "loss": 3.3385, "step": 70795 }, { "epoch": 4.81043620057073, "grad_norm": 0.879513680934906, "learning_rate": 0.0003986954749286588, "loss": 3.4001, "step": 70800 }, { "epoch": 4.810775920641391, "grad_norm": 0.8569079041481018, "learning_rate": 0.00039865300991982603, "loss": 3.4249, "step": 70805 }, { "epoch": 4.811115640712053, "grad_norm": 0.841816782951355, "learning_rate": 0.00039861054491099337, "loss": 3.6598, "step": 70810 }, { "epoch": 4.811455360782715, "grad_norm": 0.9392424821853638, "learning_rate": 0.00039856807990216065, "loss": 3.7141, "step": 70815 }, { "epoch": 4.8117950808533765, "grad_norm": 1.0502564907073975, "learning_rate": 0.0003985256148933279, "loss": 3.435, "step": 70820 }, { "epoch": 4.812134800924039, "grad_norm": 0.8750007152557373, "learning_rate": 0.0003984831498844952, "loss": 3.572, "step": 70825 }, { "epoch": 4.812474520994701, "grad_norm": 0.9074890613555908, "learning_rate": 0.0003984406848756625, "loss": 3.5931, "step": 70830 }, { "epoch": 4.812814241065362, "grad_norm": 1.1666885614395142, "learning_rate": 0.0003983982198668297, "loss": 3.5679, "step": 70835 }, { "epoch": 4.813153961136024, "grad_norm": 1.1100159883499146, "learning_rate": 0.000398355754857997, "loss": 3.5664, "step": 70840 }, { "epoch": 4.813493681206686, "grad_norm": 1.0145869255065918, "learning_rate": 0.00039831328984916433, "loss": 4.0115, "step": 70845 }, { "epoch": 4.813833401277347, "grad_norm": 0.7869913578033447, "learning_rate": 0.00039827082484033156, "loss": 3.4886, "step": 70850 }, { "epoch": 4.814173121348009, "grad_norm": 0.8371909856796265, "learning_rate": 0.00039822835983149884, "loss": 3.7582, "step": 70855 }, { "epoch": 4.814512841418671, "grad_norm": 0.6600121259689331, "learning_rate": 0.00039818589482266617, "loss": 3.3642, "step": 70860 }, { "epoch": 4.8148525614893325, "grad_norm": 0.9040317535400391, "learning_rate": 0.0003981434298138334, "loss": 3.4999, "step": 70865 }, { "epoch": 4.815192281559995, "grad_norm": 0.8087959289550781, "learning_rate": 0.0003981009648050007, "loss": 3.3932, "step": 70870 }, { "epoch": 4.815532001630657, "grad_norm": 0.823777437210083, "learning_rate": 0.000398058499796168, "loss": 3.375, "step": 70875 }, { "epoch": 4.815871721701318, "grad_norm": 1.1131092309951782, "learning_rate": 0.00039801603478733524, "loss": 3.7234, "step": 70880 }, { "epoch": 4.81621144177198, "grad_norm": 0.7496495246887207, "learning_rate": 0.0003979735697785025, "loss": 3.4625, "step": 70885 }, { "epoch": 4.816551161842642, "grad_norm": 1.0396727323532104, "learning_rate": 0.0003979311047696698, "loss": 3.5946, "step": 70890 }, { "epoch": 4.816890881913303, "grad_norm": 0.7298014760017395, "learning_rate": 0.0003978886397608371, "loss": 3.6737, "step": 70895 }, { "epoch": 4.817230601983965, "grad_norm": 0.9425898790359497, "learning_rate": 0.00039784617475200436, "loss": 3.6772, "step": 70900 }, { "epoch": 4.817570322054627, "grad_norm": 0.9819394946098328, "learning_rate": 0.00039780370974317164, "loss": 3.7238, "step": 70905 }, { "epoch": 4.8179100421252885, "grad_norm": 0.9634224772453308, "learning_rate": 0.0003977612447343389, "loss": 3.7637, "step": 70910 }, { "epoch": 4.818249762195951, "grad_norm": 1.0291748046875, "learning_rate": 0.0003977187797255062, "loss": 3.4317, "step": 70915 }, { "epoch": 4.818589482266613, "grad_norm": 0.7748332619667053, "learning_rate": 0.0003976763147166735, "loss": 3.2661, "step": 70920 }, { "epoch": 4.818929202337274, "grad_norm": 0.9932260513305664, "learning_rate": 0.0003976338497078407, "loss": 3.4519, "step": 70925 }, { "epoch": 4.819268922407936, "grad_norm": 0.7910274267196655, "learning_rate": 0.00039759138469900804, "loss": 3.4691, "step": 70930 }, { "epoch": 4.819608642478598, "grad_norm": 0.9375197887420654, "learning_rate": 0.0003975489196901753, "loss": 3.4805, "step": 70935 }, { "epoch": 4.819948362549259, "grad_norm": 0.7566654086112976, "learning_rate": 0.00039750645468134254, "loss": 3.6637, "step": 70940 }, { "epoch": 4.820288082619921, "grad_norm": 0.8344612121582031, "learning_rate": 0.0003974639896725099, "loss": 3.7541, "step": 70945 }, { "epoch": 4.820627802690583, "grad_norm": 0.73778235912323, "learning_rate": 0.00039742152466367716, "loss": 3.4652, "step": 70950 }, { "epoch": 4.8209675227612445, "grad_norm": 0.7157381176948547, "learning_rate": 0.00039737905965484444, "loss": 3.3576, "step": 70955 }, { "epoch": 4.821307242831907, "grad_norm": 0.7827205061912537, "learning_rate": 0.00039733659464601166, "loss": 3.5221, "step": 70960 }, { "epoch": 4.821646962902569, "grad_norm": 0.8035179376602173, "learning_rate": 0.000397294129637179, "loss": 3.6959, "step": 70965 }, { "epoch": 4.82198668297323, "grad_norm": 0.833808183670044, "learning_rate": 0.0003972516646283463, "loss": 3.6502, "step": 70970 }, { "epoch": 4.822326403043892, "grad_norm": 0.764222264289856, "learning_rate": 0.0003972091996195135, "loss": 3.3818, "step": 70975 }, { "epoch": 4.822666123114553, "grad_norm": 0.7710103392601013, "learning_rate": 0.00039716673461068084, "loss": 3.2253, "step": 70980 }, { "epoch": 4.823005843185215, "grad_norm": 0.9523244500160217, "learning_rate": 0.0003971242696018481, "loss": 3.513, "step": 70985 }, { "epoch": 4.823345563255877, "grad_norm": 0.9166908860206604, "learning_rate": 0.00039708180459301534, "loss": 3.4102, "step": 70990 }, { "epoch": 4.8236852833265385, "grad_norm": 1.000309705734253, "learning_rate": 0.0003970393395841826, "loss": 3.5204, "step": 70995 }, { "epoch": 4.8240250033972005, "grad_norm": 1.0685782432556152, "learning_rate": 0.00039699687457534996, "loss": 3.5553, "step": 71000 }, { "epoch": 4.824364723467863, "grad_norm": 0.7866259217262268, "learning_rate": 0.0003969544095665172, "loss": 3.4962, "step": 71005 }, { "epoch": 4.824704443538524, "grad_norm": 1.2047789096832275, "learning_rate": 0.00039691194455768446, "loss": 3.5054, "step": 71010 }, { "epoch": 4.825044163609186, "grad_norm": 0.8946297764778137, "learning_rate": 0.0003968694795488518, "loss": 2.9987, "step": 71015 }, { "epoch": 4.825383883679848, "grad_norm": 0.7953545451164246, "learning_rate": 0.000396827014540019, "loss": 3.3606, "step": 71020 }, { "epoch": 4.825723603750509, "grad_norm": 1.0809776782989502, "learning_rate": 0.0003967845495311863, "loss": 3.5998, "step": 71025 }, { "epoch": 4.826063323821171, "grad_norm": 0.9757300019264221, "learning_rate": 0.0003967420845223536, "loss": 3.6223, "step": 71030 }, { "epoch": 4.826403043891833, "grad_norm": 0.9193054437637329, "learning_rate": 0.00039669961951352086, "loss": 3.574, "step": 71035 }, { "epoch": 4.8267427639624945, "grad_norm": 1.0651136636734009, "learning_rate": 0.00039665715450468814, "loss": 3.3917, "step": 71040 }, { "epoch": 4.8270824840331565, "grad_norm": 0.9391628503799438, "learning_rate": 0.0003966146894958554, "loss": 3.3141, "step": 71045 }, { "epoch": 4.827422204103819, "grad_norm": 0.8234797120094299, "learning_rate": 0.0003965722244870227, "loss": 3.6276, "step": 71050 }, { "epoch": 4.82776192417448, "grad_norm": 0.9170812368392944, "learning_rate": 0.00039652975947819, "loss": 3.4826, "step": 71055 }, { "epoch": 4.828101644245142, "grad_norm": 0.9189901947975159, "learning_rate": 0.00039648729446935726, "loss": 3.5287, "step": 71060 }, { "epoch": 4.828441364315804, "grad_norm": 0.9926113486289978, "learning_rate": 0.0003964448294605245, "loss": 3.6459, "step": 71065 }, { "epoch": 4.828781084386465, "grad_norm": 0.9156104922294617, "learning_rate": 0.0003964023644516918, "loss": 3.4621, "step": 71070 }, { "epoch": 4.829120804457127, "grad_norm": 0.9316727519035339, "learning_rate": 0.0003963598994428591, "loss": 3.4291, "step": 71075 }, { "epoch": 4.829460524527789, "grad_norm": 0.8657076954841614, "learning_rate": 0.00039631743443402633, "loss": 3.4678, "step": 71080 }, { "epoch": 4.8298002445984505, "grad_norm": 0.802844226360321, "learning_rate": 0.00039627496942519366, "loss": 3.6113, "step": 71085 }, { "epoch": 4.8301399646691126, "grad_norm": 0.7883496880531311, "learning_rate": 0.00039623250441636094, "loss": 3.6489, "step": 71090 }, { "epoch": 4.830479684739775, "grad_norm": 1.0074280500411987, "learning_rate": 0.00039619003940752817, "loss": 3.4418, "step": 71095 }, { "epoch": 4.830819404810436, "grad_norm": 0.822624921798706, "learning_rate": 0.00039614757439869545, "loss": 3.5269, "step": 71100 }, { "epoch": 4.831159124881098, "grad_norm": 1.05500328540802, "learning_rate": 0.0003961051093898628, "loss": 3.449, "step": 71105 }, { "epoch": 4.83149884495176, "grad_norm": 0.9108061790466309, "learning_rate": 0.00039606264438103, "loss": 3.5857, "step": 71110 }, { "epoch": 4.831838565022421, "grad_norm": 0.928350031375885, "learning_rate": 0.0003960201793721973, "loss": 3.5903, "step": 71115 }, { "epoch": 4.832178285093083, "grad_norm": 0.7900652289390564, "learning_rate": 0.0003959777143633646, "loss": 3.327, "step": 71120 }, { "epoch": 4.832518005163745, "grad_norm": 0.932620644569397, "learning_rate": 0.0003959352493545319, "loss": 3.5904, "step": 71125 }, { "epoch": 4.8328577252344065, "grad_norm": 0.8778575658798218, "learning_rate": 0.00039589278434569913, "loss": 3.3157, "step": 71130 }, { "epoch": 4.833197445305069, "grad_norm": 0.755192756652832, "learning_rate": 0.0003958503193368664, "loss": 3.6799, "step": 71135 }, { "epoch": 4.833537165375731, "grad_norm": 0.8667948842048645, "learning_rate": 0.00039580785432803375, "loss": 3.2194, "step": 71140 }, { "epoch": 4.833876885446392, "grad_norm": 2.013303518295288, "learning_rate": 0.00039576538931920097, "loss": 3.4203, "step": 71145 }, { "epoch": 4.834216605517054, "grad_norm": 0.7738672494888306, "learning_rate": 0.00039572292431036825, "loss": 3.5796, "step": 71150 }, { "epoch": 4.834556325587716, "grad_norm": 1.3464653491973877, "learning_rate": 0.0003956804593015356, "loss": 3.4588, "step": 71155 }, { "epoch": 4.834896045658377, "grad_norm": 0.9314467906951904, "learning_rate": 0.0003956379942927028, "loss": 3.3887, "step": 71160 }, { "epoch": 4.835235765729039, "grad_norm": 0.963745653629303, "learning_rate": 0.0003955955292838701, "loss": 3.6891, "step": 71165 }, { "epoch": 4.835575485799701, "grad_norm": 0.7535337209701538, "learning_rate": 0.0003955530642750374, "loss": 3.5377, "step": 71170 }, { "epoch": 4.8359152058703625, "grad_norm": 1.0974199771881104, "learning_rate": 0.00039551059926620465, "loss": 3.3874, "step": 71175 }, { "epoch": 4.836254925941025, "grad_norm": 0.6512888669967651, "learning_rate": 0.00039546813425737193, "loss": 3.4582, "step": 71180 }, { "epoch": 4.836594646011687, "grad_norm": 1.0083938837051392, "learning_rate": 0.0003954256692485392, "loss": 3.5647, "step": 71185 }, { "epoch": 4.836934366082348, "grad_norm": 0.8498887419700623, "learning_rate": 0.0003953832042397065, "loss": 3.3454, "step": 71190 }, { "epoch": 4.83727408615301, "grad_norm": 0.9310884475708008, "learning_rate": 0.00039534073923087377, "loss": 3.4524, "step": 71195 }, { "epoch": 4.837613806223672, "grad_norm": 0.9496724009513855, "learning_rate": 0.00039529827422204105, "loss": 3.4226, "step": 71200 }, { "epoch": 4.837953526294333, "grad_norm": 1.0194867849349976, "learning_rate": 0.00039525580921320833, "loss": 3.5058, "step": 71205 }, { "epoch": 4.838293246364995, "grad_norm": 1.0465986728668213, "learning_rate": 0.0003952133442043756, "loss": 3.5605, "step": 71210 }, { "epoch": 4.838632966435657, "grad_norm": 0.9374005794525146, "learning_rate": 0.0003951708791955429, "loss": 3.5367, "step": 71215 }, { "epoch": 4.8389726865063185, "grad_norm": 0.9078034162521362, "learning_rate": 0.0003951284141867101, "loss": 3.4267, "step": 71220 }, { "epoch": 4.839312406576981, "grad_norm": 0.750032365322113, "learning_rate": 0.00039508594917787745, "loss": 3.3284, "step": 71225 }, { "epoch": 4.839652126647643, "grad_norm": 0.771081805229187, "learning_rate": 0.00039504348416904473, "loss": 3.1585, "step": 71230 }, { "epoch": 4.839991846718304, "grad_norm": 0.9651888012886047, "learning_rate": 0.00039500101916021196, "loss": 3.4434, "step": 71235 }, { "epoch": 4.840331566788966, "grad_norm": 0.8739444613456726, "learning_rate": 0.0003949585541513793, "loss": 3.6145, "step": 71240 }, { "epoch": 4.840671286859628, "grad_norm": 1.0616283416748047, "learning_rate": 0.0003949160891425466, "loss": 3.3929, "step": 71245 }, { "epoch": 4.841011006930289, "grad_norm": 0.7762076258659363, "learning_rate": 0.0003948736241337138, "loss": 3.5844, "step": 71250 }, { "epoch": 4.841350727000951, "grad_norm": 0.8426217436790466, "learning_rate": 0.0003948311591248811, "loss": 3.5423, "step": 71255 }, { "epoch": 4.841690447071613, "grad_norm": 0.8437306880950928, "learning_rate": 0.0003947886941160484, "loss": 3.4296, "step": 71260 }, { "epoch": 4.8420301671422745, "grad_norm": 0.9964501857757568, "learning_rate": 0.00039474622910721564, "loss": 3.5089, "step": 71265 }, { "epoch": 4.842369887212937, "grad_norm": 0.7669007182121277, "learning_rate": 0.0003947037640983829, "loss": 3.3466, "step": 71270 }, { "epoch": 4.842709607283599, "grad_norm": 0.8554624915122986, "learning_rate": 0.00039466129908955025, "loss": 3.5947, "step": 71275 }, { "epoch": 4.84304932735426, "grad_norm": 0.8738998770713806, "learning_rate": 0.0003946188340807175, "loss": 3.5015, "step": 71280 }, { "epoch": 4.843389047424922, "grad_norm": 0.9077485203742981, "learning_rate": 0.00039457636907188476, "loss": 3.4556, "step": 71285 }, { "epoch": 4.843728767495584, "grad_norm": 0.9887559413909912, "learning_rate": 0.00039453390406305204, "loss": 3.4521, "step": 71290 }, { "epoch": 4.844068487566245, "grad_norm": 0.8477239608764648, "learning_rate": 0.0003944914390542194, "loss": 3.4471, "step": 71295 }, { "epoch": 4.844408207636907, "grad_norm": 0.7347681522369385, "learning_rate": 0.0003944489740453866, "loss": 3.2089, "step": 71300 }, { "epoch": 4.844747927707569, "grad_norm": 0.6829935908317566, "learning_rate": 0.0003944065090365539, "loss": 3.637, "step": 71305 }, { "epoch": 4.8450876477782305, "grad_norm": 0.9322537779808044, "learning_rate": 0.0003943640440277212, "loss": 3.3431, "step": 71310 }, { "epoch": 4.845427367848893, "grad_norm": 1.0419809818267822, "learning_rate": 0.00039432157901888844, "loss": 3.5384, "step": 71315 }, { "epoch": 4.845767087919555, "grad_norm": 0.8937583565711975, "learning_rate": 0.0003942791140100557, "loss": 3.7709, "step": 71320 }, { "epoch": 4.846106807990216, "grad_norm": 0.9244757294654846, "learning_rate": 0.000394236649001223, "loss": 3.5872, "step": 71325 }, { "epoch": 4.846446528060878, "grad_norm": 1.5295947790145874, "learning_rate": 0.0003941941839923903, "loss": 3.6434, "step": 71330 }, { "epoch": 4.84678624813154, "grad_norm": 1.030714750289917, "learning_rate": 0.00039415171898355756, "loss": 3.6061, "step": 71335 }, { "epoch": 4.847125968202201, "grad_norm": 1.011336088180542, "learning_rate": 0.00039410925397472484, "loss": 3.3618, "step": 71340 }, { "epoch": 4.847465688272863, "grad_norm": 0.8177810311317444, "learning_rate": 0.0003940667889658921, "loss": 3.6246, "step": 71345 }, { "epoch": 4.847805408343525, "grad_norm": 0.8188793659210205, "learning_rate": 0.0003940243239570594, "loss": 3.5037, "step": 71350 }, { "epoch": 4.8481451284141865, "grad_norm": 1.1574666500091553, "learning_rate": 0.0003939818589482267, "loss": 3.8081, "step": 71355 }, { "epoch": 4.848484848484849, "grad_norm": 0.730268657207489, "learning_rate": 0.0003939393939393939, "loss": 3.3167, "step": 71360 }, { "epoch": 4.848824568555511, "grad_norm": 0.7519198060035706, "learning_rate": 0.00039389692893056124, "loss": 3.6142, "step": 71365 }, { "epoch": 4.849164288626172, "grad_norm": 0.8022430539131165, "learning_rate": 0.0003938544639217285, "loss": 3.2648, "step": 71370 }, { "epoch": 4.849504008696834, "grad_norm": 0.81453537940979, "learning_rate": 0.00039381199891289575, "loss": 3.4568, "step": 71375 }, { "epoch": 4.849843728767496, "grad_norm": 0.9617143869400024, "learning_rate": 0.0003937695339040631, "loss": 3.4988, "step": 71380 }, { "epoch": 4.850183448838157, "grad_norm": 1.1162461042404175, "learning_rate": 0.00039372706889523036, "loss": 3.3558, "step": 71385 }, { "epoch": 4.850523168908819, "grad_norm": 0.7714512348175049, "learning_rate": 0.0003936846038863976, "loss": 3.4987, "step": 71390 }, { "epoch": 4.850862888979481, "grad_norm": 0.9156549572944641, "learning_rate": 0.00039364213887756487, "loss": 3.2639, "step": 71395 }, { "epoch": 4.8512026090501426, "grad_norm": 0.7589934468269348, "learning_rate": 0.0003935996738687322, "loss": 3.4715, "step": 71400 }, { "epoch": 4.851542329120805, "grad_norm": 1.130082607269287, "learning_rate": 0.00039355720885989943, "loss": 3.4675, "step": 71405 }, { "epoch": 4.851882049191467, "grad_norm": 0.96710205078125, "learning_rate": 0.0003935147438510667, "loss": 3.3725, "step": 71410 }, { "epoch": 4.852221769262128, "grad_norm": 0.8954606652259827, "learning_rate": 0.00039347227884223404, "loss": 3.5633, "step": 71415 }, { "epoch": 4.85256148933279, "grad_norm": 0.8415889143943787, "learning_rate": 0.00039342981383340127, "loss": 3.3142, "step": 71420 }, { "epoch": 4.852901209403452, "grad_norm": 0.7687771320343018, "learning_rate": 0.00039338734882456855, "loss": 3.3821, "step": 71425 }, { "epoch": 4.853240929474113, "grad_norm": 0.8235529661178589, "learning_rate": 0.00039334488381573583, "loss": 3.7031, "step": 71430 }, { "epoch": 4.853580649544775, "grad_norm": 0.8341265320777893, "learning_rate": 0.0003933024188069031, "loss": 3.5259, "step": 71435 }, { "epoch": 4.8539203696154365, "grad_norm": 0.8923922777175903, "learning_rate": 0.0003932599537980704, "loss": 3.7096, "step": 71440 }, { "epoch": 4.854260089686099, "grad_norm": 0.9525147676467896, "learning_rate": 0.00039321748878923767, "loss": 3.298, "step": 71445 }, { "epoch": 4.854599809756761, "grad_norm": 0.756129264831543, "learning_rate": 0.00039317502378040495, "loss": 3.4222, "step": 71450 }, { "epoch": 4.854939529827422, "grad_norm": 0.9516236782073975, "learning_rate": 0.00039313255877157223, "loss": 3.4279, "step": 71455 }, { "epoch": 4.855279249898084, "grad_norm": 0.9401823282241821, "learning_rate": 0.0003930900937627395, "loss": 3.4066, "step": 71460 }, { "epoch": 4.855618969968746, "grad_norm": 0.9439297318458557, "learning_rate": 0.00039304762875390684, "loss": 3.3759, "step": 71465 }, { "epoch": 4.855958690039407, "grad_norm": 0.9848126173019409, "learning_rate": 0.00039300516374507407, "loss": 3.4462, "step": 71470 }, { "epoch": 4.856298410110069, "grad_norm": 1.0566965341567993, "learning_rate": 0.00039296269873624135, "loss": 3.69, "step": 71475 }, { "epoch": 4.856638130180731, "grad_norm": 0.7110351920127869, "learning_rate": 0.00039292023372740863, "loss": 3.2803, "step": 71480 }, { "epoch": 4.8569778502513925, "grad_norm": 0.8948861956596375, "learning_rate": 0.0003928777687185759, "loss": 3.7736, "step": 71485 }, { "epoch": 4.857317570322055, "grad_norm": 0.9920291304588318, "learning_rate": 0.0003928353037097432, "loss": 3.6373, "step": 71490 }, { "epoch": 4.857657290392717, "grad_norm": 0.8694120645523071, "learning_rate": 0.00039279283870091047, "loss": 3.3105, "step": 71495 }, { "epoch": 4.857997010463378, "grad_norm": 0.7927242517471313, "learning_rate": 0.00039275037369207775, "loss": 3.7072, "step": 71500 }, { "epoch": 4.85833673053404, "grad_norm": 1.2033480405807495, "learning_rate": 0.00039270790868324503, "loss": 3.2108, "step": 71505 }, { "epoch": 4.858676450604702, "grad_norm": 0.8016459941864014, "learning_rate": 0.0003926654436744123, "loss": 3.2821, "step": 71510 }, { "epoch": 4.859016170675363, "grad_norm": 1.0020811557769775, "learning_rate": 0.00039262297866557953, "loss": 3.3145, "step": 71515 }, { "epoch": 4.859355890746025, "grad_norm": 0.7208513021469116, "learning_rate": 0.00039258051365674687, "loss": 3.3792, "step": 71520 }, { "epoch": 4.859695610816687, "grad_norm": 0.96617591381073, "learning_rate": 0.00039253804864791415, "loss": 3.4933, "step": 71525 }, { "epoch": 4.8600353308873485, "grad_norm": 0.9417550563812256, "learning_rate": 0.0003924955836390814, "loss": 3.4073, "step": 71530 }, { "epoch": 4.860375050958011, "grad_norm": 1.1125959157943726, "learning_rate": 0.0003924531186302487, "loss": 3.4119, "step": 71535 }, { "epoch": 4.860714771028673, "grad_norm": 0.9877198338508606, "learning_rate": 0.000392410653621416, "loss": 3.54, "step": 71540 }, { "epoch": 4.861054491099334, "grad_norm": 0.8026974201202393, "learning_rate": 0.0003923681886125832, "loss": 3.7745, "step": 71545 }, { "epoch": 4.861394211169996, "grad_norm": 0.7902126312255859, "learning_rate": 0.0003923257236037505, "loss": 3.4443, "step": 71550 }, { "epoch": 4.861733931240658, "grad_norm": 1.0389958620071411, "learning_rate": 0.00039228325859491783, "loss": 3.6793, "step": 71555 }, { "epoch": 4.862073651311319, "grad_norm": 0.9055476784706116, "learning_rate": 0.00039224079358608506, "loss": 3.262, "step": 71560 }, { "epoch": 4.862413371381981, "grad_norm": 1.3160901069641113, "learning_rate": 0.00039219832857725234, "loss": 3.3541, "step": 71565 }, { "epoch": 4.862753091452643, "grad_norm": 1.0848639011383057, "learning_rate": 0.00039215586356841967, "loss": 3.4138, "step": 71570 }, { "epoch": 4.8630928115233045, "grad_norm": 0.8789510130882263, "learning_rate": 0.0003921133985595869, "loss": 3.5598, "step": 71575 }, { "epoch": 4.863432531593967, "grad_norm": 0.9375132322311401, "learning_rate": 0.0003920709335507542, "loss": 3.535, "step": 71580 }, { "epoch": 4.863772251664629, "grad_norm": 0.8606034517288208, "learning_rate": 0.00039202846854192146, "loss": 3.726, "step": 71585 }, { "epoch": 4.86411197173529, "grad_norm": 0.8120411038398743, "learning_rate": 0.00039198600353308874, "loss": 3.2808, "step": 71590 }, { "epoch": 4.864451691805952, "grad_norm": 0.7670430541038513, "learning_rate": 0.000391943538524256, "loss": 3.5733, "step": 71595 }, { "epoch": 4.864791411876614, "grad_norm": 0.6934714913368225, "learning_rate": 0.0003919010735154233, "loss": 3.3441, "step": 71600 }, { "epoch": 4.865131131947275, "grad_norm": 0.7447838187217712, "learning_rate": 0.0003918586085065906, "loss": 3.6007, "step": 71605 }, { "epoch": 4.865470852017937, "grad_norm": 0.9286085367202759, "learning_rate": 0.00039181614349775786, "loss": 3.3004, "step": 71610 }, { "epoch": 4.865810572088599, "grad_norm": 0.9078155755996704, "learning_rate": 0.00039177367848892514, "loss": 3.3515, "step": 71615 }, { "epoch": 4.8661502921592605, "grad_norm": 1.0792973041534424, "learning_rate": 0.00039173121348009236, "loss": 3.4091, "step": 71620 }, { "epoch": 4.866490012229923, "grad_norm": 0.8293854594230652, "learning_rate": 0.0003916887484712597, "loss": 3.6954, "step": 71625 }, { "epoch": 4.866829732300585, "grad_norm": 0.9288709759712219, "learning_rate": 0.000391646283462427, "loss": 3.3769, "step": 71630 }, { "epoch": 4.867169452371246, "grad_norm": 0.9325634241104126, "learning_rate": 0.00039160381845359426, "loss": 3.4428, "step": 71635 }, { "epoch": 4.867509172441908, "grad_norm": 0.7453815937042236, "learning_rate": 0.00039156135344476154, "loss": 3.5358, "step": 71640 }, { "epoch": 4.86784889251257, "grad_norm": 0.9887683391571045, "learning_rate": 0.0003915188884359288, "loss": 3.727, "step": 71645 }, { "epoch": 4.868188612583231, "grad_norm": 0.709135115146637, "learning_rate": 0.0003914764234270961, "loss": 3.4177, "step": 71650 }, { "epoch": 4.868528332653893, "grad_norm": 0.8720343112945557, "learning_rate": 0.0003914339584182633, "loss": 3.5084, "step": 71655 }, { "epoch": 4.8688680527245545, "grad_norm": 1.0377472639083862, "learning_rate": 0.00039139149340943066, "loss": 3.3586, "step": 71660 }, { "epoch": 4.8692077727952165, "grad_norm": 0.7515466213226318, "learning_rate": 0.00039134902840059794, "loss": 3.5479, "step": 71665 }, { "epoch": 4.869547492865879, "grad_norm": 0.7997063994407654, "learning_rate": 0.00039130656339176516, "loss": 3.8232, "step": 71670 }, { "epoch": 4.86988721293654, "grad_norm": 0.7731438279151917, "learning_rate": 0.0003912640983829325, "loss": 3.5762, "step": 71675 }, { "epoch": 4.870226933007202, "grad_norm": 0.8113502860069275, "learning_rate": 0.0003912216333740998, "loss": 3.5236, "step": 71680 }, { "epoch": 4.870566653077864, "grad_norm": 1.0951547622680664, "learning_rate": 0.000391179168365267, "loss": 3.1938, "step": 71685 }, { "epoch": 4.870906373148525, "grad_norm": 0.8797723054885864, "learning_rate": 0.0003911367033564343, "loss": 3.4559, "step": 71690 }, { "epoch": 4.871246093219187, "grad_norm": 0.9059113264083862, "learning_rate": 0.0003910942383476016, "loss": 3.4115, "step": 71695 }, { "epoch": 4.871585813289849, "grad_norm": 0.8071296215057373, "learning_rate": 0.00039105177333876884, "loss": 3.5085, "step": 71700 }, { "epoch": 4.8719255333605105, "grad_norm": 0.8111851811408997, "learning_rate": 0.0003910093083299361, "loss": 3.2816, "step": 71705 }, { "epoch": 4.872265253431173, "grad_norm": 0.8736326098442078, "learning_rate": 0.00039096684332110346, "loss": 3.0608, "step": 71710 }, { "epoch": 4.872604973501835, "grad_norm": 0.9330747723579407, "learning_rate": 0.0003909243783122707, "loss": 3.2157, "step": 71715 }, { "epoch": 4.872944693572496, "grad_norm": 0.8230901956558228, "learning_rate": 0.00039088191330343796, "loss": 3.707, "step": 71720 }, { "epoch": 4.873284413643158, "grad_norm": 1.0135942697525024, "learning_rate": 0.0003908394482946053, "loss": 3.5181, "step": 71725 }, { "epoch": 4.87362413371382, "grad_norm": 0.7113049030303955, "learning_rate": 0.0003907969832857725, "loss": 3.6278, "step": 71730 }, { "epoch": 4.873963853784481, "grad_norm": 0.9025107026100159, "learning_rate": 0.0003907545182769398, "loss": 3.5791, "step": 71735 }, { "epoch": 4.874303573855143, "grad_norm": 0.7336267232894897, "learning_rate": 0.0003907120532681071, "loss": 3.481, "step": 71740 }, { "epoch": 4.874643293925805, "grad_norm": 0.9404609203338623, "learning_rate": 0.00039066958825927436, "loss": 3.6503, "step": 71745 }, { "epoch": 4.8749830139964665, "grad_norm": 0.7735083103179932, "learning_rate": 0.00039062712325044164, "loss": 3.4503, "step": 71750 }, { "epoch": 4.875322734067129, "grad_norm": 0.8202487230300903, "learning_rate": 0.0003905846582416089, "loss": 3.3182, "step": 71755 }, { "epoch": 4.875662454137791, "grad_norm": 0.7175875902175903, "learning_rate": 0.0003905421932327762, "loss": 3.4519, "step": 71760 }, { "epoch": 4.876002174208452, "grad_norm": 1.2140263319015503, "learning_rate": 0.0003904997282239435, "loss": 3.729, "step": 71765 }, { "epoch": 4.876341894279114, "grad_norm": 1.1485114097595215, "learning_rate": 0.00039045726321511076, "loss": 3.3844, "step": 71770 }, { "epoch": 4.876681614349776, "grad_norm": 0.8044224381446838, "learning_rate": 0.000390414798206278, "loss": 3.592, "step": 71775 }, { "epoch": 4.877021334420437, "grad_norm": 1.0518856048583984, "learning_rate": 0.0003903723331974453, "loss": 3.6643, "step": 71780 }, { "epoch": 4.877361054491099, "grad_norm": 0.7046890258789062, "learning_rate": 0.0003903298681886126, "loss": 3.4309, "step": 71785 }, { "epoch": 4.877700774561761, "grad_norm": 0.9900513887405396, "learning_rate": 0.00039028740317977983, "loss": 3.5369, "step": 71790 }, { "epoch": 4.8780404946324225, "grad_norm": 0.8486366868019104, "learning_rate": 0.00039024493817094716, "loss": 3.2129, "step": 71795 }, { "epoch": 4.878380214703085, "grad_norm": 0.7431617379188538, "learning_rate": 0.00039020247316211444, "loss": 3.5017, "step": 71800 }, { "epoch": 4.878719934773747, "grad_norm": 1.0684456825256348, "learning_rate": 0.0003901600081532817, "loss": 3.3193, "step": 71805 }, { "epoch": 4.879059654844408, "grad_norm": 1.158445119857788, "learning_rate": 0.00039011754314444895, "loss": 3.6273, "step": 71810 }, { "epoch": 4.87939937491507, "grad_norm": 0.953860342502594, "learning_rate": 0.0003900750781356163, "loss": 3.4245, "step": 71815 }, { "epoch": 4.879739094985732, "grad_norm": 1.0015419721603394, "learning_rate": 0.00039003261312678357, "loss": 3.4759, "step": 71820 }, { "epoch": 4.880078815056393, "grad_norm": 0.7116113305091858, "learning_rate": 0.0003899901481179508, "loss": 3.5564, "step": 71825 }, { "epoch": 4.880418535127055, "grad_norm": 0.909771740436554, "learning_rate": 0.0003899476831091181, "loss": 3.586, "step": 71830 }, { "epoch": 4.880758255197717, "grad_norm": 0.8496811985969543, "learning_rate": 0.0003899052181002854, "loss": 3.6646, "step": 71835 }, { "epoch": 4.8810979752683785, "grad_norm": 0.8220270872116089, "learning_rate": 0.00038986275309145263, "loss": 3.4962, "step": 71840 }, { "epoch": 4.881437695339041, "grad_norm": 1.0029504299163818, "learning_rate": 0.0003898202880826199, "loss": 3.4967, "step": 71845 }, { "epoch": 4.881777415409703, "grad_norm": 1.0897222757339478, "learning_rate": 0.00038977782307378725, "loss": 3.4652, "step": 71850 }, { "epoch": 4.882117135480364, "grad_norm": 0.7791353464126587, "learning_rate": 0.00038973535806495447, "loss": 3.451, "step": 71855 }, { "epoch": 4.882456855551026, "grad_norm": 0.8146374225616455, "learning_rate": 0.00038969289305612175, "loss": 3.6017, "step": 71860 }, { "epoch": 4.882796575621688, "grad_norm": 0.8084748387336731, "learning_rate": 0.0003896504280472891, "loss": 3.7175, "step": 71865 }, { "epoch": 4.883136295692349, "grad_norm": 0.7945273518562317, "learning_rate": 0.0003896079630384563, "loss": 3.6651, "step": 71870 }, { "epoch": 4.883476015763011, "grad_norm": 1.1460814476013184, "learning_rate": 0.0003895654980296236, "loss": 3.4716, "step": 71875 }, { "epoch": 4.883815735833673, "grad_norm": 0.8777429461479187, "learning_rate": 0.00038952303302079087, "loss": 3.6556, "step": 71880 }, { "epoch": 4.8841554559043345, "grad_norm": 1.0097410678863525, "learning_rate": 0.00038948056801195815, "loss": 3.4465, "step": 71885 }, { "epoch": 4.884495175974997, "grad_norm": 0.9611325263977051, "learning_rate": 0.00038943810300312543, "loss": 3.4304, "step": 71890 }, { "epoch": 4.884834896045659, "grad_norm": 0.8928402066230774, "learning_rate": 0.0003893956379942927, "loss": 3.4371, "step": 71895 }, { "epoch": 4.88517461611632, "grad_norm": 1.0716354846954346, "learning_rate": 0.00038935317298546, "loss": 3.3305, "step": 71900 }, { "epoch": 4.885514336186982, "grad_norm": 0.7230565547943115, "learning_rate": 0.00038931070797662727, "loss": 3.5492, "step": 71905 }, { "epoch": 4.885854056257644, "grad_norm": 1.0003303289413452, "learning_rate": 0.00038926824296779455, "loss": 3.3361, "step": 71910 }, { "epoch": 4.886193776328305, "grad_norm": 0.7246459722518921, "learning_rate": 0.0003892257779589618, "loss": 3.3967, "step": 71915 }, { "epoch": 4.886533496398967, "grad_norm": 0.8626651763916016, "learning_rate": 0.0003891833129501291, "loss": 3.4974, "step": 71920 }, { "epoch": 4.886873216469629, "grad_norm": 0.966983437538147, "learning_rate": 0.0003891408479412964, "loss": 3.3502, "step": 71925 }, { "epoch": 4.8872129365402905, "grad_norm": 0.8597303032875061, "learning_rate": 0.0003890983829324636, "loss": 3.5668, "step": 71930 }, { "epoch": 4.887552656610953, "grad_norm": 0.9356186985969543, "learning_rate": 0.00038905591792363095, "loss": 3.4582, "step": 71935 }, { "epoch": 4.887892376681615, "grad_norm": 0.8280949592590332, "learning_rate": 0.00038901345291479823, "loss": 3.7715, "step": 71940 }, { "epoch": 4.888232096752276, "grad_norm": 0.8792530298233032, "learning_rate": 0.00038897098790596546, "loss": 3.5101, "step": 71945 }, { "epoch": 4.888571816822938, "grad_norm": 0.8395430445671082, "learning_rate": 0.00038892852289713274, "loss": 3.5229, "step": 71950 }, { "epoch": 4.8889115368936, "grad_norm": 0.8534421324729919, "learning_rate": 0.0003888860578883001, "loss": 3.7282, "step": 71955 }, { "epoch": 4.889251256964261, "grad_norm": 0.8459038734436035, "learning_rate": 0.0003888435928794673, "loss": 3.1269, "step": 71960 }, { "epoch": 4.889590977034923, "grad_norm": 0.9300627112388611, "learning_rate": 0.0003888011278706346, "loss": 3.3672, "step": 71965 }, { "epoch": 4.889930697105585, "grad_norm": 0.8684754371643066, "learning_rate": 0.0003887586628618019, "loss": 3.3354, "step": 71970 }, { "epoch": 4.8902704171762466, "grad_norm": 0.8709257245063782, "learning_rate": 0.0003887161978529692, "loss": 3.6272, "step": 71975 }, { "epoch": 4.890610137246909, "grad_norm": 1.0498430728912354, "learning_rate": 0.0003886737328441364, "loss": 3.4873, "step": 71980 }, { "epoch": 4.890949857317571, "grad_norm": 0.8447078466415405, "learning_rate": 0.0003886312678353037, "loss": 3.6963, "step": 71985 }, { "epoch": 4.891289577388232, "grad_norm": 0.9700011014938354, "learning_rate": 0.00038858880282647103, "loss": 3.6354, "step": 71990 }, { "epoch": 4.891629297458894, "grad_norm": 0.958055853843689, "learning_rate": 0.00038854633781763826, "loss": 3.7072, "step": 71995 }, { "epoch": 4.891969017529556, "grad_norm": 0.7666856646537781, "learning_rate": 0.00038850387280880554, "loss": 3.533, "step": 72000 }, { "epoch": 4.892308737600217, "grad_norm": 0.8994258046150208, "learning_rate": 0.0003884614077999729, "loss": 3.5281, "step": 72005 }, { "epoch": 4.892648457670879, "grad_norm": 1.167443037033081, "learning_rate": 0.0003884189427911401, "loss": 3.2858, "step": 72010 }, { "epoch": 4.892988177741541, "grad_norm": 1.1294502019882202, "learning_rate": 0.0003883764777823074, "loss": 3.686, "step": 72015 }, { "epoch": 4.893327897812203, "grad_norm": 1.0230016708374023, "learning_rate": 0.0003883340127734747, "loss": 3.6137, "step": 72020 }, { "epoch": 4.893667617882865, "grad_norm": 0.9003035426139832, "learning_rate": 0.00038829154776464194, "loss": 3.4534, "step": 72025 }, { "epoch": 4.894007337953527, "grad_norm": 0.9287586212158203, "learning_rate": 0.0003882490827558092, "loss": 3.7582, "step": 72030 }, { "epoch": 4.894347058024188, "grad_norm": 0.6925762891769409, "learning_rate": 0.0003882066177469765, "loss": 3.1505, "step": 72035 }, { "epoch": 4.89468677809485, "grad_norm": 1.0912725925445557, "learning_rate": 0.0003881641527381438, "loss": 3.7986, "step": 72040 }, { "epoch": 4.895026498165512, "grad_norm": 1.1536036729812622, "learning_rate": 0.00038812168772931106, "loss": 3.5491, "step": 72045 }, { "epoch": 4.895366218236173, "grad_norm": 0.9433629512786865, "learning_rate": 0.00038807922272047834, "loss": 3.5042, "step": 72050 }, { "epoch": 4.895705938306835, "grad_norm": 0.8793943524360657, "learning_rate": 0.0003880367577116456, "loss": 3.5939, "step": 72055 }, { "epoch": 4.896045658377497, "grad_norm": 1.0590320825576782, "learning_rate": 0.0003879942927028129, "loss": 3.5035, "step": 72060 }, { "epoch": 4.896385378448159, "grad_norm": 0.8599629402160645, "learning_rate": 0.0003879518276939802, "loss": 3.5696, "step": 72065 }, { "epoch": 4.896725098518821, "grad_norm": 1.0057350397109985, "learning_rate": 0.0003879093626851474, "loss": 3.5252, "step": 72070 }, { "epoch": 4.897064818589483, "grad_norm": 0.7453026175498962, "learning_rate": 0.00038786689767631474, "loss": 3.5433, "step": 72075 }, { "epoch": 4.897404538660144, "grad_norm": 1.050764799118042, "learning_rate": 0.000387824432667482, "loss": 3.4845, "step": 72080 }, { "epoch": 4.897744258730806, "grad_norm": 0.7781047224998474, "learning_rate": 0.00038778196765864925, "loss": 3.6845, "step": 72085 }, { "epoch": 4.898083978801468, "grad_norm": 0.9952058792114258, "learning_rate": 0.0003877395026498166, "loss": 3.6216, "step": 72090 }, { "epoch": 4.898423698872129, "grad_norm": 1.109413743019104, "learning_rate": 0.00038769703764098386, "loss": 3.3974, "step": 72095 }, { "epoch": 4.898763418942791, "grad_norm": 0.8139770030975342, "learning_rate": 0.0003876545726321511, "loss": 3.455, "step": 72100 }, { "epoch": 4.899103139013453, "grad_norm": 0.9477381706237793, "learning_rate": 0.00038761210762331837, "loss": 3.66, "step": 72105 }, { "epoch": 4.899442859084115, "grad_norm": 1.145743489265442, "learning_rate": 0.0003875696426144857, "loss": 3.6234, "step": 72110 }, { "epoch": 4.899782579154777, "grad_norm": 1.0026410818099976, "learning_rate": 0.00038752717760565293, "loss": 3.1466, "step": 72115 }, { "epoch": 4.900122299225439, "grad_norm": 0.885558009147644, "learning_rate": 0.0003874847125968202, "loss": 3.4774, "step": 72120 }, { "epoch": 4.9004620192961, "grad_norm": 0.7886387705802917, "learning_rate": 0.00038744224758798754, "loss": 3.3185, "step": 72125 }, { "epoch": 4.900801739366762, "grad_norm": 0.9096794724464417, "learning_rate": 0.00038739978257915477, "loss": 3.6874, "step": 72130 }, { "epoch": 4.901141459437423, "grad_norm": 0.7408117055892944, "learning_rate": 0.00038735731757032205, "loss": 3.4992, "step": 72135 }, { "epoch": 4.901481179508085, "grad_norm": 1.1832871437072754, "learning_rate": 0.00038731485256148933, "loss": 3.3447, "step": 72140 }, { "epoch": 4.901820899578747, "grad_norm": 0.925692081451416, "learning_rate": 0.00038727238755265666, "loss": 3.4921, "step": 72145 }, { "epoch": 4.9021606196494085, "grad_norm": 0.7824732661247253, "learning_rate": 0.0003872299225438239, "loss": 3.6578, "step": 72150 }, { "epoch": 4.902500339720071, "grad_norm": 0.9270050525665283, "learning_rate": 0.00038718745753499117, "loss": 3.3486, "step": 72155 }, { "epoch": 4.902840059790733, "grad_norm": 1.3210622072219849, "learning_rate": 0.0003871449925261585, "loss": 3.4937, "step": 72160 }, { "epoch": 4.903179779861394, "grad_norm": 0.738833487033844, "learning_rate": 0.00038710252751732573, "loss": 3.3761, "step": 72165 }, { "epoch": 4.903519499932056, "grad_norm": 0.9803553819656372, "learning_rate": 0.000387060062508493, "loss": 3.4879, "step": 72170 }, { "epoch": 4.903859220002718, "grad_norm": 0.8227695822715759, "learning_rate": 0.0003870175974996603, "loss": 3.5295, "step": 72175 }, { "epoch": 4.904198940073379, "grad_norm": 0.7833067178726196, "learning_rate": 0.00038697513249082757, "loss": 3.5768, "step": 72180 }, { "epoch": 4.904538660144041, "grad_norm": 0.9094823002815247, "learning_rate": 0.00038693266748199485, "loss": 3.6476, "step": 72185 }, { "epoch": 4.904878380214703, "grad_norm": 0.694635808467865, "learning_rate": 0.00038689020247316213, "loss": 3.7602, "step": 72190 }, { "epoch": 4.9052181002853645, "grad_norm": 1.3866500854492188, "learning_rate": 0.0003868477374643294, "loss": 3.4395, "step": 72195 }, { "epoch": 4.905557820356027, "grad_norm": 0.9556769728660583, "learning_rate": 0.0003868052724554967, "loss": 3.5459, "step": 72200 }, { "epoch": 4.905897540426689, "grad_norm": 0.8479629158973694, "learning_rate": 0.00038676280744666397, "loss": 3.4713, "step": 72205 }, { "epoch": 4.90623726049735, "grad_norm": 0.8694672584533691, "learning_rate": 0.0003867203424378312, "loss": 3.5988, "step": 72210 }, { "epoch": 4.906576980568012, "grad_norm": 0.7101976871490479, "learning_rate": 0.00038667787742899853, "loss": 3.4705, "step": 72215 }, { "epoch": 4.906916700638674, "grad_norm": 1.0196577310562134, "learning_rate": 0.0003866354124201658, "loss": 3.3522, "step": 72220 }, { "epoch": 4.907256420709335, "grad_norm": 0.8992969393730164, "learning_rate": 0.00038659294741133303, "loss": 3.4922, "step": 72225 }, { "epoch": 4.907596140779997, "grad_norm": 1.0028029680252075, "learning_rate": 0.00038655048240250037, "loss": 3.2312, "step": 72230 }, { "epoch": 4.907935860850659, "grad_norm": 0.8917765021324158, "learning_rate": 0.00038650801739366765, "loss": 3.31, "step": 72235 }, { "epoch": 4.9082755809213205, "grad_norm": 0.8796781897544861, "learning_rate": 0.0003864655523848349, "loss": 3.5904, "step": 72240 }, { "epoch": 4.908615300991983, "grad_norm": 0.8835317492485046, "learning_rate": 0.00038642308737600215, "loss": 3.6431, "step": 72245 }, { "epoch": 4.908955021062645, "grad_norm": 0.9377812147140503, "learning_rate": 0.0003863806223671695, "loss": 3.3825, "step": 72250 }, { "epoch": 4.909294741133306, "grad_norm": 1.1719474792480469, "learning_rate": 0.0003863381573583367, "loss": 3.4916, "step": 72255 }, { "epoch": 4.909634461203968, "grad_norm": 0.9674480557441711, "learning_rate": 0.000386295692349504, "loss": 3.6128, "step": 72260 }, { "epoch": 4.90997418127463, "grad_norm": 0.8809230923652649, "learning_rate": 0.00038625322734067133, "loss": 3.5105, "step": 72265 }, { "epoch": 4.910313901345291, "grad_norm": 1.0579838752746582, "learning_rate": 0.00038621076233183856, "loss": 3.3242, "step": 72270 }, { "epoch": 4.910653621415953, "grad_norm": 1.203029990196228, "learning_rate": 0.00038616829732300584, "loss": 3.4563, "step": 72275 }, { "epoch": 4.910993341486615, "grad_norm": 0.7604086399078369, "learning_rate": 0.0003861258323141731, "loss": 3.38, "step": 72280 }, { "epoch": 4.911333061557277, "grad_norm": 0.9430040121078491, "learning_rate": 0.0003860833673053404, "loss": 3.4707, "step": 72285 }, { "epoch": 4.911672781627939, "grad_norm": 0.7291462421417236, "learning_rate": 0.0003860409022965077, "loss": 3.1049, "step": 72290 }, { "epoch": 4.912012501698601, "grad_norm": 0.8246901631355286, "learning_rate": 0.00038599843728767496, "loss": 3.2076, "step": 72295 }, { "epoch": 4.912352221769262, "grad_norm": 0.7850285172462463, "learning_rate": 0.00038595597227884224, "loss": 3.3194, "step": 72300 }, { "epoch": 4.912691941839924, "grad_norm": 0.8531397581100464, "learning_rate": 0.0003859135072700095, "loss": 3.6342, "step": 72305 }, { "epoch": 4.913031661910586, "grad_norm": 1.2020094394683838, "learning_rate": 0.0003858710422611768, "loss": 3.488, "step": 72310 }, { "epoch": 4.913371381981247, "grad_norm": 1.1814930438995361, "learning_rate": 0.00038582857725234413, "loss": 3.4106, "step": 72315 }, { "epoch": 4.913711102051909, "grad_norm": 0.8131977319717407, "learning_rate": 0.00038578611224351136, "loss": 3.4921, "step": 72320 }, { "epoch": 4.914050822122571, "grad_norm": 0.9900912046432495, "learning_rate": 0.00038574364723467864, "loss": 3.5224, "step": 72325 }, { "epoch": 4.914390542193233, "grad_norm": 0.8348836898803711, "learning_rate": 0.0003857011822258459, "loss": 3.2632, "step": 72330 }, { "epoch": 4.914730262263895, "grad_norm": 1.109505534172058, "learning_rate": 0.0003856587172170132, "loss": 3.4626, "step": 72335 }, { "epoch": 4.915069982334556, "grad_norm": 0.9630661010742188, "learning_rate": 0.0003856162522081805, "loss": 3.556, "step": 72340 }, { "epoch": 4.915409702405218, "grad_norm": 1.0526721477508545, "learning_rate": 0.00038557378719934776, "loss": 3.5273, "step": 72345 }, { "epoch": 4.91574942247588, "grad_norm": 1.1559056043624878, "learning_rate": 0.00038553132219051504, "loss": 3.4502, "step": 72350 }, { "epoch": 4.916089142546541, "grad_norm": 0.7480113506317139, "learning_rate": 0.0003854888571816823, "loss": 3.5422, "step": 72355 }, { "epoch": 4.916428862617203, "grad_norm": 0.7670375108718872, "learning_rate": 0.0003854463921728496, "loss": 3.4582, "step": 72360 }, { "epoch": 4.916768582687865, "grad_norm": 0.9430740475654602, "learning_rate": 0.0003854039271640168, "loss": 3.5059, "step": 72365 }, { "epoch": 4.9171083027585265, "grad_norm": 0.9023682475090027, "learning_rate": 0.00038536146215518416, "loss": 3.4228, "step": 72370 }, { "epoch": 4.917448022829189, "grad_norm": 0.7877408862113953, "learning_rate": 0.00038531899714635144, "loss": 3.5894, "step": 72375 }, { "epoch": 4.917787742899851, "grad_norm": 0.9517087340354919, "learning_rate": 0.00038527653213751866, "loss": 3.4393, "step": 72380 }, { "epoch": 4.918127462970512, "grad_norm": 1.4943435192108154, "learning_rate": 0.000385234067128686, "loss": 3.4386, "step": 72385 }, { "epoch": 4.918467183041174, "grad_norm": 0.8636531233787537, "learning_rate": 0.0003851916021198533, "loss": 3.5766, "step": 72390 }, { "epoch": 4.918806903111836, "grad_norm": 1.0968360900878906, "learning_rate": 0.0003851491371110205, "loss": 3.5288, "step": 72395 }, { "epoch": 4.919146623182497, "grad_norm": 0.7035086750984192, "learning_rate": 0.0003851066721021878, "loss": 3.623, "step": 72400 }, { "epoch": 4.919486343253159, "grad_norm": 0.7638811469078064, "learning_rate": 0.0003850642070933551, "loss": 3.3787, "step": 72405 }, { "epoch": 4.919826063323821, "grad_norm": 1.0079468488693237, "learning_rate": 0.00038502174208452234, "loss": 3.5519, "step": 72410 }, { "epoch": 4.9201657833944825, "grad_norm": 2.7593867778778076, "learning_rate": 0.0003849792770756896, "loss": 3.5095, "step": 72415 }, { "epoch": 4.920505503465145, "grad_norm": 0.7488455772399902, "learning_rate": 0.00038493681206685696, "loss": 3.4229, "step": 72420 }, { "epoch": 4.920845223535807, "grad_norm": 1.2424204349517822, "learning_rate": 0.0003848943470580242, "loss": 3.2775, "step": 72425 }, { "epoch": 4.921184943606468, "grad_norm": 1.3415158987045288, "learning_rate": 0.00038485188204919146, "loss": 3.3861, "step": 72430 }, { "epoch": 4.92152466367713, "grad_norm": 0.7716087698936462, "learning_rate": 0.00038480941704035874, "loss": 3.2917, "step": 72435 }, { "epoch": 4.921864383747792, "grad_norm": 0.8157476186752319, "learning_rate": 0.000384766952031526, "loss": 3.4595, "step": 72440 }, { "epoch": 4.922204103818453, "grad_norm": 1.0020833015441895, "learning_rate": 0.0003847244870226933, "loss": 3.605, "step": 72445 }, { "epoch": 4.922543823889115, "grad_norm": 1.0010167360305786, "learning_rate": 0.0003846820220138606, "loss": 3.2341, "step": 72450 }, { "epoch": 4.922883543959777, "grad_norm": 0.844659149646759, "learning_rate": 0.00038463955700502786, "loss": 3.455, "step": 72455 }, { "epoch": 4.9232232640304385, "grad_norm": 0.6769673228263855, "learning_rate": 0.00038459709199619514, "loss": 3.524, "step": 72460 }, { "epoch": 4.923562984101101, "grad_norm": 0.8233169317245483, "learning_rate": 0.0003845546269873624, "loss": 3.5392, "step": 72465 }, { "epoch": 4.923902704171763, "grad_norm": 0.6875226497650146, "learning_rate": 0.00038451216197852965, "loss": 3.6913, "step": 72470 }, { "epoch": 4.924242424242424, "grad_norm": 0.8477721810340881, "learning_rate": 0.000384469696969697, "loss": 3.6157, "step": 72475 }, { "epoch": 4.924582144313086, "grad_norm": 0.7096152305603027, "learning_rate": 0.00038442723196086426, "loss": 3.315, "step": 72480 }, { "epoch": 4.924921864383748, "grad_norm": 1.0758352279663086, "learning_rate": 0.00038438476695203154, "loss": 3.4009, "step": 72485 }, { "epoch": 4.925261584454409, "grad_norm": 0.9256690740585327, "learning_rate": 0.0003843423019431988, "loss": 3.3645, "step": 72490 }, { "epoch": 4.925601304525071, "grad_norm": 1.2663222551345825, "learning_rate": 0.0003842998369343661, "loss": 3.3535, "step": 72495 }, { "epoch": 4.925941024595733, "grad_norm": 1.4317413568496704, "learning_rate": 0.0003842573719255334, "loss": 3.4024, "step": 72500 }, { "epoch": 4.9262807446663945, "grad_norm": 1.0781514644622803, "learning_rate": 0.0003842149069167006, "loss": 3.3881, "step": 72505 }, { "epoch": 4.926620464737057, "grad_norm": 1.000072956085205, "learning_rate": 0.00038417244190786794, "loss": 3.6698, "step": 72510 }, { "epoch": 4.926960184807719, "grad_norm": 0.9730064868927002, "learning_rate": 0.0003841299768990352, "loss": 3.7727, "step": 72515 }, { "epoch": 4.92729990487838, "grad_norm": 0.8389480710029602, "learning_rate": 0.00038408751189020245, "loss": 3.5537, "step": 72520 }, { "epoch": 4.927639624949042, "grad_norm": 0.9581026434898376, "learning_rate": 0.0003840450468813698, "loss": 3.4466, "step": 72525 }, { "epoch": 4.927979345019704, "grad_norm": 0.8313193321228027, "learning_rate": 0.00038400258187253707, "loss": 3.2836, "step": 72530 }, { "epoch": 4.928319065090365, "grad_norm": 0.7633522748947144, "learning_rate": 0.0003839601168637043, "loss": 3.3872, "step": 72535 }, { "epoch": 4.928658785161027, "grad_norm": 0.7295286059379578, "learning_rate": 0.00038391765185487157, "loss": 3.7002, "step": 72540 }, { "epoch": 4.928998505231689, "grad_norm": 0.9855348467826843, "learning_rate": 0.0003838751868460389, "loss": 3.3514, "step": 72545 }, { "epoch": 4.9293382253023506, "grad_norm": 0.8892372250556946, "learning_rate": 0.00038383272183720613, "loss": 3.5097, "step": 72550 }, { "epoch": 4.929677945373013, "grad_norm": 0.7191735506057739, "learning_rate": 0.0003837902568283734, "loss": 3.5299, "step": 72555 }, { "epoch": 4.930017665443675, "grad_norm": 0.812643826007843, "learning_rate": 0.00038374779181954075, "loss": 3.4148, "step": 72560 }, { "epoch": 4.930357385514336, "grad_norm": 1.0945944786071777, "learning_rate": 0.00038370532681070797, "loss": 3.5501, "step": 72565 }, { "epoch": 4.930697105584998, "grad_norm": 0.8771920204162598, "learning_rate": 0.00038366286180187525, "loss": 3.4961, "step": 72570 }, { "epoch": 4.93103682565566, "grad_norm": 1.0528018474578857, "learning_rate": 0.00038362039679304253, "loss": 3.7025, "step": 72575 }, { "epoch": 4.931376545726321, "grad_norm": 0.9493814706802368, "learning_rate": 0.0003835779317842098, "loss": 3.5905, "step": 72580 }, { "epoch": 4.931716265796983, "grad_norm": 0.8297226428985596, "learning_rate": 0.0003835354667753771, "loss": 3.5571, "step": 72585 }, { "epoch": 4.932055985867645, "grad_norm": 0.9901010990142822, "learning_rate": 0.00038349300176654437, "loss": 3.4256, "step": 72590 }, { "epoch": 4.932395705938307, "grad_norm": 0.8764402270317078, "learning_rate": 0.00038345053675771165, "loss": 3.638, "step": 72595 }, { "epoch": 4.932735426008969, "grad_norm": 0.9738648533821106, "learning_rate": 0.00038340807174887893, "loss": 3.5583, "step": 72600 }, { "epoch": 4.933075146079631, "grad_norm": 0.7603336572647095, "learning_rate": 0.0003833656067400462, "loss": 3.7526, "step": 72605 }, { "epoch": 4.933414866150292, "grad_norm": 0.8929620385169983, "learning_rate": 0.00038332314173121344, "loss": 3.5815, "step": 72610 }, { "epoch": 4.933754586220954, "grad_norm": 0.8852488398551941, "learning_rate": 0.00038328067672238077, "loss": 3.4233, "step": 72615 }, { "epoch": 4.934094306291616, "grad_norm": 0.791915774345398, "learning_rate": 0.00038323821171354805, "loss": 3.6983, "step": 72620 }, { "epoch": 4.934434026362277, "grad_norm": 0.9024913311004639, "learning_rate": 0.0003831957467047153, "loss": 3.1075, "step": 72625 }, { "epoch": 4.934773746432939, "grad_norm": 0.9006919860839844, "learning_rate": 0.0003831532816958826, "loss": 3.5865, "step": 72630 }, { "epoch": 4.935113466503601, "grad_norm": 0.7446363568305969, "learning_rate": 0.0003831108166870499, "loss": 3.1251, "step": 72635 }, { "epoch": 4.935453186574263, "grad_norm": 0.8133948445320129, "learning_rate": 0.0003830683516782171, "loss": 3.4633, "step": 72640 }, { "epoch": 4.935792906644925, "grad_norm": 0.9396703243255615, "learning_rate": 0.00038302588666938445, "loss": 3.4689, "step": 72645 }, { "epoch": 4.936132626715587, "grad_norm": 0.9660793542861938, "learning_rate": 0.00038298342166055173, "loss": 3.4466, "step": 72650 }, { "epoch": 4.936472346786248, "grad_norm": 0.7830461263656616, "learning_rate": 0.000382940956651719, "loss": 3.2608, "step": 72655 }, { "epoch": 4.93681206685691, "grad_norm": 0.8772631883621216, "learning_rate": 0.00038289849164288624, "loss": 3.6248, "step": 72660 }, { "epoch": 4.937151786927572, "grad_norm": 0.9882521629333496, "learning_rate": 0.0003828560266340536, "loss": 3.5253, "step": 72665 }, { "epoch": 4.937491506998233, "grad_norm": 0.7614195942878723, "learning_rate": 0.00038281356162522085, "loss": 3.4193, "step": 72670 }, { "epoch": 4.937831227068895, "grad_norm": 0.75351881980896, "learning_rate": 0.0003827710966163881, "loss": 3.4155, "step": 72675 }, { "epoch": 4.938170947139557, "grad_norm": 0.6771587133407593, "learning_rate": 0.0003827286316075554, "loss": 3.3386, "step": 72680 }, { "epoch": 4.938510667210219, "grad_norm": 0.767123281955719, "learning_rate": 0.0003826861665987227, "loss": 3.2334, "step": 72685 }, { "epoch": 4.938850387280881, "grad_norm": 0.8292311429977417, "learning_rate": 0.0003826437015898899, "loss": 3.4624, "step": 72690 }, { "epoch": 4.939190107351543, "grad_norm": 1.039386510848999, "learning_rate": 0.0003826012365810572, "loss": 3.1914, "step": 72695 }, { "epoch": 4.939529827422204, "grad_norm": 0.9476038217544556, "learning_rate": 0.00038255877157222453, "loss": 3.5195, "step": 72700 }, { "epoch": 4.939869547492866, "grad_norm": 0.8351473212242126, "learning_rate": 0.00038251630656339176, "loss": 3.4681, "step": 72705 }, { "epoch": 4.940209267563528, "grad_norm": 0.9407406449317932, "learning_rate": 0.00038247384155455904, "loss": 3.3212, "step": 72710 }, { "epoch": 4.940548987634189, "grad_norm": 0.7730169892311096, "learning_rate": 0.0003824313765457264, "loss": 3.5276, "step": 72715 }, { "epoch": 4.940888707704851, "grad_norm": 0.8480351567268372, "learning_rate": 0.0003823889115368936, "loss": 3.5714, "step": 72720 }, { "epoch": 4.941228427775513, "grad_norm": 0.7709962725639343, "learning_rate": 0.0003823464465280609, "loss": 3.6067, "step": 72725 }, { "epoch": 4.941568147846175, "grad_norm": 0.9260118007659912, "learning_rate": 0.00038230398151922816, "loss": 3.2889, "step": 72730 }, { "epoch": 4.941907867916837, "grad_norm": 0.8809016942977905, "learning_rate": 0.00038226151651039544, "loss": 3.4433, "step": 72735 }, { "epoch": 4.942247587987499, "grad_norm": 0.9832111597061157, "learning_rate": 0.0003822190515015627, "loss": 3.6212, "step": 72740 }, { "epoch": 4.94258730805816, "grad_norm": 1.0022366046905518, "learning_rate": 0.00038217658649273, "loss": 3.4021, "step": 72745 }, { "epoch": 4.942927028128822, "grad_norm": 0.6964163780212402, "learning_rate": 0.0003821341214838973, "loss": 3.3166, "step": 72750 }, { "epoch": 4.943266748199484, "grad_norm": 1.1478503942489624, "learning_rate": 0.00038209165647506456, "loss": 3.5629, "step": 72755 }, { "epoch": 4.943606468270145, "grad_norm": 1.2431979179382324, "learning_rate": 0.00038204919146623184, "loss": 3.4476, "step": 72760 }, { "epoch": 4.943946188340807, "grad_norm": 0.8297187089920044, "learning_rate": 0.00038200672645739907, "loss": 3.6483, "step": 72765 }, { "epoch": 4.944285908411469, "grad_norm": 2.0198402404785156, "learning_rate": 0.0003819642614485664, "loss": 3.3752, "step": 72770 }, { "epoch": 4.944625628482131, "grad_norm": 0.8844216465950012, "learning_rate": 0.0003819217964397337, "loss": 3.6008, "step": 72775 }, { "epoch": 4.944965348552793, "grad_norm": 0.908076822757721, "learning_rate": 0.0003818793314309009, "loss": 3.7295, "step": 72780 }, { "epoch": 4.945305068623455, "grad_norm": 0.8818939328193665, "learning_rate": 0.00038183686642206824, "loss": 3.2834, "step": 72785 }, { "epoch": 4.945644788694116, "grad_norm": 1.0867094993591309, "learning_rate": 0.0003817944014132355, "loss": 3.6106, "step": 72790 }, { "epoch": 4.945984508764778, "grad_norm": 0.6961829662322998, "learning_rate": 0.00038175193640440275, "loss": 3.4984, "step": 72795 }, { "epoch": 4.94632422883544, "grad_norm": 0.9492461085319519, "learning_rate": 0.00038170947139557, "loss": 3.7323, "step": 72800 }, { "epoch": 4.946663948906101, "grad_norm": 0.9092548489570618, "learning_rate": 0.00038166700638673736, "loss": 3.3685, "step": 72805 }, { "epoch": 4.947003668976763, "grad_norm": 1.0423119068145752, "learning_rate": 0.0003816245413779046, "loss": 3.3472, "step": 72810 }, { "epoch": 4.9473433890474245, "grad_norm": 0.8639079928398132, "learning_rate": 0.00038158207636907187, "loss": 3.6149, "step": 72815 }, { "epoch": 4.947683109118087, "grad_norm": 0.7837599515914917, "learning_rate": 0.0003815396113602392, "loss": 3.1586, "step": 72820 }, { "epoch": 4.948022829188749, "grad_norm": 0.9410359263420105, "learning_rate": 0.0003814971463514065, "loss": 3.3679, "step": 72825 }, { "epoch": 4.94836254925941, "grad_norm": 1.3209228515625, "learning_rate": 0.0003814546813425737, "loss": 3.3612, "step": 72830 }, { "epoch": 4.948702269330072, "grad_norm": 1.0441685914993286, "learning_rate": 0.000381412216333741, "loss": 3.6098, "step": 72835 }, { "epoch": 4.949041989400734, "grad_norm": 0.8973045349121094, "learning_rate": 0.0003813697513249083, "loss": 3.5738, "step": 72840 }, { "epoch": 4.949381709471395, "grad_norm": 1.0114058256149292, "learning_rate": 0.00038132728631607555, "loss": 3.5167, "step": 72845 }, { "epoch": 4.949721429542057, "grad_norm": 0.891346275806427, "learning_rate": 0.00038128482130724283, "loss": 3.5584, "step": 72850 }, { "epoch": 4.950061149612719, "grad_norm": 0.8078318238258362, "learning_rate": 0.00038124235629841016, "loss": 3.0296, "step": 72855 }, { "epoch": 4.9504008696833806, "grad_norm": 0.8377205729484558, "learning_rate": 0.0003811998912895774, "loss": 3.5673, "step": 72860 }, { "epoch": 4.950740589754043, "grad_norm": 0.931049108505249, "learning_rate": 0.00038115742628074467, "loss": 3.6596, "step": 72865 }, { "epoch": 4.951080309824705, "grad_norm": 0.8170061707496643, "learning_rate": 0.000381114961271912, "loss": 3.6792, "step": 72870 }, { "epoch": 4.951420029895366, "grad_norm": 0.9256570935249329, "learning_rate": 0.00038107249626307923, "loss": 3.5823, "step": 72875 }, { "epoch": 4.951759749966028, "grad_norm": 0.7770896553993225, "learning_rate": 0.0003810300312542465, "loss": 3.446, "step": 72880 }, { "epoch": 4.95209947003669, "grad_norm": 0.8677752017974854, "learning_rate": 0.0003809875662454138, "loss": 3.5394, "step": 72885 }, { "epoch": 4.952439190107351, "grad_norm": 0.8032394051551819, "learning_rate": 0.00038094510123658107, "loss": 3.4439, "step": 72890 }, { "epoch": 4.952778910178013, "grad_norm": 0.8636519908905029, "learning_rate": 0.00038090263622774835, "loss": 3.6256, "step": 72895 }, { "epoch": 4.953118630248675, "grad_norm": 0.9593454003334045, "learning_rate": 0.00038086017121891563, "loss": 3.2868, "step": 72900 }, { "epoch": 4.953458350319337, "grad_norm": 0.8624973297119141, "learning_rate": 0.0003808177062100829, "loss": 3.3436, "step": 72905 }, { "epoch": 4.953798070389999, "grad_norm": 0.789435625076294, "learning_rate": 0.0003807752412012502, "loss": 3.2679, "step": 72910 }, { "epoch": 4.954137790460661, "grad_norm": 0.7102778553962708, "learning_rate": 0.00038073277619241747, "loss": 3.667, "step": 72915 }, { "epoch": 4.954477510531322, "grad_norm": 1.2156705856323242, "learning_rate": 0.0003806903111835847, "loss": 3.5418, "step": 72920 }, { "epoch": 4.954817230601984, "grad_norm": 0.9636473059654236, "learning_rate": 0.00038064784617475203, "loss": 3.6461, "step": 72925 }, { "epoch": 4.955156950672646, "grad_norm": 0.8010683059692383, "learning_rate": 0.0003806053811659193, "loss": 3.5698, "step": 72930 }, { "epoch": 4.955496670743307, "grad_norm": 0.9754721522331238, "learning_rate": 0.00038056291615708653, "loss": 3.3762, "step": 72935 }, { "epoch": 4.955836390813969, "grad_norm": 1.1058012247085571, "learning_rate": 0.00038052045114825387, "loss": 3.7296, "step": 72940 }, { "epoch": 4.956176110884631, "grad_norm": 1.0266730785369873, "learning_rate": 0.00038047798613942115, "loss": 3.3175, "step": 72945 }, { "epoch": 4.956515830955293, "grad_norm": 1.173547387123108, "learning_rate": 0.0003804355211305884, "loss": 3.5551, "step": 72950 }, { "epoch": 4.956855551025955, "grad_norm": 0.8839654326438904, "learning_rate": 0.00038039305612175565, "loss": 3.3509, "step": 72955 }, { "epoch": 4.957195271096617, "grad_norm": 0.7036253809928894, "learning_rate": 0.000380350591112923, "loss": 3.5851, "step": 72960 }, { "epoch": 4.957534991167278, "grad_norm": 2.0955846309661865, "learning_rate": 0.0003803081261040902, "loss": 3.6668, "step": 72965 }, { "epoch": 4.95787471123794, "grad_norm": 0.8587763905525208, "learning_rate": 0.0003802656610952575, "loss": 3.6165, "step": 72970 }, { "epoch": 4.958214431308602, "grad_norm": 0.7819929122924805, "learning_rate": 0.00038022319608642483, "loss": 3.6493, "step": 72975 }, { "epoch": 4.958554151379263, "grad_norm": 0.8358395099639893, "learning_rate": 0.00038018073107759206, "loss": 3.5317, "step": 72980 }, { "epoch": 4.958893871449925, "grad_norm": 0.923384964466095, "learning_rate": 0.00038013826606875934, "loss": 3.5399, "step": 72985 }, { "epoch": 4.959233591520587, "grad_norm": 0.9469045400619507, "learning_rate": 0.0003800958010599266, "loss": 3.3604, "step": 72990 }, { "epoch": 4.959573311591249, "grad_norm": 0.9319455027580261, "learning_rate": 0.00038005333605109395, "loss": 3.4129, "step": 72995 }, { "epoch": 4.959913031661911, "grad_norm": 0.9811966419219971, "learning_rate": 0.0003800108710422612, "loss": 3.4527, "step": 73000 }, { "epoch": 4.960252751732573, "grad_norm": 0.7282987236976624, "learning_rate": 0.00037996840603342846, "loss": 3.3695, "step": 73005 }, { "epoch": 4.960592471803234, "grad_norm": 0.8049964904785156, "learning_rate": 0.0003799259410245958, "loss": 3.6708, "step": 73010 }, { "epoch": 4.960932191873896, "grad_norm": 0.7169439792633057, "learning_rate": 0.000379883476015763, "loss": 3.2985, "step": 73015 }, { "epoch": 4.961271911944557, "grad_norm": 0.9029344916343689, "learning_rate": 0.0003798410110069303, "loss": 3.7529, "step": 73020 }, { "epoch": 4.961611632015219, "grad_norm": 1.1080904006958008, "learning_rate": 0.0003797985459980976, "loss": 3.4338, "step": 73025 }, { "epoch": 4.961951352085881, "grad_norm": 0.8612424731254578, "learning_rate": 0.00037975608098926486, "loss": 3.4819, "step": 73030 }, { "epoch": 4.9622910721565425, "grad_norm": 1.0260770320892334, "learning_rate": 0.00037971361598043214, "loss": 3.6983, "step": 73035 }, { "epoch": 4.962630792227205, "grad_norm": 0.7672381401062012, "learning_rate": 0.0003796711509715994, "loss": 3.6114, "step": 73040 }, { "epoch": 4.962970512297867, "grad_norm": 0.881515383720398, "learning_rate": 0.0003796286859627667, "loss": 3.6383, "step": 73045 }, { "epoch": 4.963310232368528, "grad_norm": 0.932043731212616, "learning_rate": 0.000379586220953934, "loss": 3.4375, "step": 73050 }, { "epoch": 4.96364995243919, "grad_norm": 0.7924324870109558, "learning_rate": 0.00037954375594510126, "loss": 3.2162, "step": 73055 }, { "epoch": 4.963989672509852, "grad_norm": 0.9654383659362793, "learning_rate": 0.0003795012909362685, "loss": 3.4231, "step": 73060 }, { "epoch": 4.964329392580513, "grad_norm": 1.075207233428955, "learning_rate": 0.0003794588259274358, "loss": 3.3183, "step": 73065 }, { "epoch": 4.964669112651175, "grad_norm": 1.3674366474151611, "learning_rate": 0.0003794163609186031, "loss": 3.6306, "step": 73070 }, { "epoch": 4.965008832721837, "grad_norm": 0.8143147826194763, "learning_rate": 0.0003793738959097703, "loss": 3.4534, "step": 73075 }, { "epoch": 4.9653485527924985, "grad_norm": 0.8441619873046875, "learning_rate": 0.00037933143090093766, "loss": 3.5078, "step": 73080 }, { "epoch": 4.965688272863161, "grad_norm": 0.8493859767913818, "learning_rate": 0.00037928896589210494, "loss": 3.6795, "step": 73085 }, { "epoch": 4.966027992933823, "grad_norm": 0.7269042134284973, "learning_rate": 0.00037924650088327216, "loss": 3.5263, "step": 73090 }, { "epoch": 4.966367713004484, "grad_norm": 0.9164868593215942, "learning_rate": 0.00037920403587443944, "loss": 3.3496, "step": 73095 }, { "epoch": 4.966707433075146, "grad_norm": 0.8955481648445129, "learning_rate": 0.0003791615708656068, "loss": 3.3098, "step": 73100 }, { "epoch": 4.967047153145808, "grad_norm": 0.6655365228652954, "learning_rate": 0.000379119105856774, "loss": 3.5347, "step": 73105 }, { "epoch": 4.967386873216469, "grad_norm": 0.787166178226471, "learning_rate": 0.0003790766408479413, "loss": 3.3866, "step": 73110 }, { "epoch": 4.967726593287131, "grad_norm": 0.8208472728729248, "learning_rate": 0.0003790341758391086, "loss": 3.4175, "step": 73115 }, { "epoch": 4.968066313357793, "grad_norm": 0.7683771252632141, "learning_rate": 0.00037899171083027584, "loss": 3.7887, "step": 73120 }, { "epoch": 4.9684060334284545, "grad_norm": 1.0410525798797607, "learning_rate": 0.0003789492458214431, "loss": 3.6291, "step": 73125 }, { "epoch": 4.968745753499117, "grad_norm": 0.9626865386962891, "learning_rate": 0.0003789067808126104, "loss": 3.4851, "step": 73130 }, { "epoch": 4.969085473569779, "grad_norm": 0.7364782691001892, "learning_rate": 0.0003788643158037777, "loss": 3.2666, "step": 73135 }, { "epoch": 4.96942519364044, "grad_norm": 0.8087607622146606, "learning_rate": 0.00037882185079494496, "loss": 3.5486, "step": 73140 }, { "epoch": 4.969764913711102, "grad_norm": 0.8406544923782349, "learning_rate": 0.00037877938578611224, "loss": 3.259, "step": 73145 }, { "epoch": 4.970104633781764, "grad_norm": 0.8677491545677185, "learning_rate": 0.0003787369207772795, "loss": 3.7233, "step": 73150 }, { "epoch": 4.970444353852425, "grad_norm": 0.8326682448387146, "learning_rate": 0.0003786944557684468, "loss": 3.4337, "step": 73155 }, { "epoch": 4.970784073923087, "grad_norm": 0.7446854710578918, "learning_rate": 0.0003786519907596141, "loss": 3.6641, "step": 73160 }, { "epoch": 4.971123793993749, "grad_norm": 1.0132875442504883, "learning_rate": 0.0003786095257507814, "loss": 3.667, "step": 73165 }, { "epoch": 4.971463514064411, "grad_norm": 0.8101994395256042, "learning_rate": 0.00037856706074194864, "loss": 3.4631, "step": 73170 }, { "epoch": 4.971803234135073, "grad_norm": 1.0126091241836548, "learning_rate": 0.0003785245957331159, "loss": 3.6341, "step": 73175 }, { "epoch": 4.972142954205735, "grad_norm": 0.8275250196456909, "learning_rate": 0.0003784821307242832, "loss": 3.3744, "step": 73180 }, { "epoch": 4.972482674276396, "grad_norm": 0.8241539001464844, "learning_rate": 0.0003784396657154505, "loss": 3.3362, "step": 73185 }, { "epoch": 4.972822394347058, "grad_norm": 0.8202075958251953, "learning_rate": 0.00037839720070661776, "loss": 3.629, "step": 73190 }, { "epoch": 4.97316211441772, "grad_norm": 0.9991371631622314, "learning_rate": 0.00037835473569778504, "loss": 3.6351, "step": 73195 }, { "epoch": 4.973501834488381, "grad_norm": 0.8576650619506836, "learning_rate": 0.0003783122706889523, "loss": 3.5665, "step": 73200 }, { "epoch": 4.973841554559043, "grad_norm": 0.8946138620376587, "learning_rate": 0.0003782698056801196, "loss": 3.5967, "step": 73205 }, { "epoch": 4.974181274629705, "grad_norm": 0.7914770245552063, "learning_rate": 0.0003782273406712869, "loss": 3.5534, "step": 73210 }, { "epoch": 4.974520994700367, "grad_norm": 0.993091881275177, "learning_rate": 0.0003781848756624541, "loss": 3.4491, "step": 73215 }, { "epoch": 4.974860714771029, "grad_norm": 0.8123006820678711, "learning_rate": 0.00037814241065362144, "loss": 3.4633, "step": 73220 }, { "epoch": 4.975200434841691, "grad_norm": 0.9387556910514832, "learning_rate": 0.0003780999456447887, "loss": 3.4497, "step": 73225 }, { "epoch": 4.975540154912352, "grad_norm": 0.9280674457550049, "learning_rate": 0.00037805748063595595, "loss": 3.1282, "step": 73230 }, { "epoch": 4.975879874983014, "grad_norm": 0.7899430394172668, "learning_rate": 0.0003780150156271233, "loss": 3.405, "step": 73235 }, { "epoch": 4.976219595053676, "grad_norm": 0.9494094848632812, "learning_rate": 0.00037797255061829057, "loss": 3.2476, "step": 73240 }, { "epoch": 4.976559315124337, "grad_norm": 1.1741997003555298, "learning_rate": 0.0003779300856094578, "loss": 3.6463, "step": 73245 }, { "epoch": 4.976899035194999, "grad_norm": 0.7904856204986572, "learning_rate": 0.00037788762060062507, "loss": 3.769, "step": 73250 }, { "epoch": 4.977238755265661, "grad_norm": 0.7196319103240967, "learning_rate": 0.0003778451555917924, "loss": 3.5531, "step": 73255 }, { "epoch": 4.977578475336323, "grad_norm": 0.8789392709732056, "learning_rate": 0.00037780269058295963, "loss": 3.5042, "step": 73260 }, { "epoch": 4.977918195406985, "grad_norm": 0.7937629222869873, "learning_rate": 0.0003777602255741269, "loss": 3.6949, "step": 73265 }, { "epoch": 4.978257915477647, "grad_norm": 0.932063102722168, "learning_rate": 0.00037771776056529425, "loss": 3.4947, "step": 73270 }, { "epoch": 4.978597635548308, "grad_norm": 1.0593739748001099, "learning_rate": 0.00037767529555646147, "loss": 3.6267, "step": 73275 }, { "epoch": 4.97893735561897, "grad_norm": 0.9431940913200378, "learning_rate": 0.00037763283054762875, "loss": 3.3194, "step": 73280 }, { "epoch": 4.979277075689632, "grad_norm": 0.8862378597259521, "learning_rate": 0.00037759036553879603, "loss": 3.3446, "step": 73285 }, { "epoch": 4.979616795760293, "grad_norm": 0.7941207885742188, "learning_rate": 0.0003775479005299633, "loss": 3.1871, "step": 73290 }, { "epoch": 4.979956515830955, "grad_norm": 1.252215027809143, "learning_rate": 0.0003775054355211306, "loss": 3.4554, "step": 73295 }, { "epoch": 4.980296235901617, "grad_norm": 0.8796611428260803, "learning_rate": 0.00037746297051229787, "loss": 3.2651, "step": 73300 }, { "epoch": 4.980635955972279, "grad_norm": 0.8147224187850952, "learning_rate": 0.00037742050550346515, "loss": 3.5774, "step": 73305 }, { "epoch": 4.980975676042941, "grad_norm": 0.7524916529655457, "learning_rate": 0.00037737804049463243, "loss": 3.559, "step": 73310 }, { "epoch": 4.981315396113603, "grad_norm": 0.9657232761383057, "learning_rate": 0.0003773355754857997, "loss": 3.53, "step": 73315 }, { "epoch": 4.981655116184264, "grad_norm": 1.0182019472122192, "learning_rate": 0.00037729311047696694, "loss": 3.4873, "step": 73320 }, { "epoch": 4.981994836254926, "grad_norm": 0.8427720665931702, "learning_rate": 0.00037725064546813427, "loss": 3.4541, "step": 73325 }, { "epoch": 4.982334556325588, "grad_norm": 0.841276228427887, "learning_rate": 0.00037720818045930155, "loss": 3.2288, "step": 73330 }, { "epoch": 4.982674276396249, "grad_norm": 0.9103279113769531, "learning_rate": 0.00037716571545046883, "loss": 3.6237, "step": 73335 }, { "epoch": 4.983013996466911, "grad_norm": 0.8511722683906555, "learning_rate": 0.0003771232504416361, "loss": 3.5972, "step": 73340 }, { "epoch": 4.983353716537573, "grad_norm": 1.1891944408416748, "learning_rate": 0.0003770807854328034, "loss": 3.2608, "step": 73345 }, { "epoch": 4.983693436608235, "grad_norm": 0.8335211873054504, "learning_rate": 0.00037703832042397067, "loss": 3.5282, "step": 73350 }, { "epoch": 4.984033156678897, "grad_norm": 0.7981144189834595, "learning_rate": 0.0003769958554151379, "loss": 3.4086, "step": 73355 }, { "epoch": 4.984372876749559, "grad_norm": 0.9157492518424988, "learning_rate": 0.00037695339040630523, "loss": 3.3645, "step": 73360 }, { "epoch": 4.98471259682022, "grad_norm": 0.8134139180183411, "learning_rate": 0.0003769109253974725, "loss": 3.6794, "step": 73365 }, { "epoch": 4.985052316890882, "grad_norm": 0.7623462677001953, "learning_rate": 0.00037686846038863974, "loss": 3.5264, "step": 73370 }, { "epoch": 4.985392036961544, "grad_norm": 0.8374656438827515, "learning_rate": 0.0003768259953798071, "loss": 3.5954, "step": 73375 }, { "epoch": 4.985731757032205, "grad_norm": 1.047549843788147, "learning_rate": 0.00037678353037097435, "loss": 3.4, "step": 73380 }, { "epoch": 4.986071477102867, "grad_norm": 0.9485633969306946, "learning_rate": 0.0003767410653621416, "loss": 3.2877, "step": 73385 }, { "epoch": 4.986411197173529, "grad_norm": 0.8473796844482422, "learning_rate": 0.00037669860035330886, "loss": 3.4551, "step": 73390 }, { "epoch": 4.986750917244191, "grad_norm": 0.8003768920898438, "learning_rate": 0.0003766561353444762, "loss": 3.2913, "step": 73395 }, { "epoch": 4.987090637314853, "grad_norm": 0.8610634207725525, "learning_rate": 0.0003766136703356434, "loss": 3.4413, "step": 73400 }, { "epoch": 4.987430357385515, "grad_norm": 0.8350740671157837, "learning_rate": 0.0003765712053268107, "loss": 3.5257, "step": 73405 }, { "epoch": 4.987770077456176, "grad_norm": 0.9380072355270386, "learning_rate": 0.00037652874031797803, "loss": 3.5897, "step": 73410 }, { "epoch": 4.988109797526838, "grad_norm": 1.0014680624008179, "learning_rate": 0.00037648627530914526, "loss": 3.3416, "step": 73415 }, { "epoch": 4.9884495175975, "grad_norm": 0.8285419344902039, "learning_rate": 0.00037644381030031254, "loss": 3.5234, "step": 73420 }, { "epoch": 4.988789237668161, "grad_norm": 0.9550110101699829, "learning_rate": 0.0003764013452914798, "loss": 3.6373, "step": 73425 }, { "epoch": 4.989128957738823, "grad_norm": 1.15923011302948, "learning_rate": 0.0003763588802826471, "loss": 3.5653, "step": 73430 }, { "epoch": 4.989468677809485, "grad_norm": 0.9602484107017517, "learning_rate": 0.0003763164152738144, "loss": 3.6752, "step": 73435 }, { "epoch": 4.989808397880147, "grad_norm": 0.7451989650726318, "learning_rate": 0.00037627395026498166, "loss": 3.548, "step": 73440 }, { "epoch": 4.990148117950809, "grad_norm": 0.907376766204834, "learning_rate": 0.00037623148525614894, "loss": 3.6844, "step": 73445 }, { "epoch": 4.990487838021471, "grad_norm": 0.7486577033996582, "learning_rate": 0.0003761890202473162, "loss": 3.2842, "step": 73450 }, { "epoch": 4.990827558092132, "grad_norm": 1.1977856159210205, "learning_rate": 0.0003761465552384835, "loss": 3.3526, "step": 73455 }, { "epoch": 4.991167278162794, "grad_norm": 0.8403291702270508, "learning_rate": 0.0003761040902296507, "loss": 3.5536, "step": 73460 }, { "epoch": 4.991506998233456, "grad_norm": 1.088760495185852, "learning_rate": 0.00037606162522081806, "loss": 3.2718, "step": 73465 }, { "epoch": 4.991846718304117, "grad_norm": 0.9433627128601074, "learning_rate": 0.00037601916021198534, "loss": 3.4703, "step": 73470 }, { "epoch": 4.992186438374779, "grad_norm": 0.8565873503684998, "learning_rate": 0.00037597669520315257, "loss": 3.547, "step": 73475 }, { "epoch": 4.9925261584454415, "grad_norm": 0.7623013257980347, "learning_rate": 0.0003759342301943199, "loss": 3.4805, "step": 73480 }, { "epoch": 4.992865878516103, "grad_norm": 0.7753994464874268, "learning_rate": 0.0003758917651854872, "loss": 3.6177, "step": 73485 }, { "epoch": 4.993205598586765, "grad_norm": 0.9224596619606018, "learning_rate": 0.0003758493001766544, "loss": 3.5296, "step": 73490 }, { "epoch": 4.993545318657426, "grad_norm": 0.7430421710014343, "learning_rate": 0.00037580683516782174, "loss": 3.6136, "step": 73495 }, { "epoch": 4.993885038728088, "grad_norm": 0.8180851936340332, "learning_rate": 0.000375764370158989, "loss": 3.3634, "step": 73500 }, { "epoch": 4.99422475879875, "grad_norm": 0.9090211987495422, "learning_rate": 0.0003757219051501563, "loss": 3.3945, "step": 73505 }, { "epoch": 4.994564478869411, "grad_norm": 0.8929524421691895, "learning_rate": 0.0003756794401413235, "loss": 3.6991, "step": 73510 }, { "epoch": 4.994904198940073, "grad_norm": 0.9579527378082275, "learning_rate": 0.00037563697513249086, "loss": 3.5372, "step": 73515 }, { "epoch": 4.995243919010735, "grad_norm": 1.0293318033218384, "learning_rate": 0.00037559451012365814, "loss": 3.4771, "step": 73520 }, { "epoch": 4.995583639081397, "grad_norm": 0.9263602495193481, "learning_rate": 0.00037555204511482537, "loss": 3.3371, "step": 73525 }, { "epoch": 4.995923359152059, "grad_norm": 0.9338143467903137, "learning_rate": 0.0003755095801059927, "loss": 3.4752, "step": 73530 }, { "epoch": 4.996263079222721, "grad_norm": 0.8178796768188477, "learning_rate": 0.00037546711509716, "loss": 3.364, "step": 73535 }, { "epoch": 4.996602799293382, "grad_norm": 0.8574473857879639, "learning_rate": 0.0003754246500883272, "loss": 3.5046, "step": 73540 }, { "epoch": 4.996942519364044, "grad_norm": 0.8701178431510925, "learning_rate": 0.0003753821850794945, "loss": 3.6173, "step": 73545 }, { "epoch": 4.997282239434706, "grad_norm": 1.0161793231964111, "learning_rate": 0.0003753397200706618, "loss": 3.5068, "step": 73550 }, { "epoch": 4.997621959505367, "grad_norm": 0.870212972164154, "learning_rate": 0.00037529725506182905, "loss": 3.556, "step": 73555 }, { "epoch": 4.997961679576029, "grad_norm": 0.8367553949356079, "learning_rate": 0.00037525479005299633, "loss": 3.6131, "step": 73560 }, { "epoch": 4.998301399646691, "grad_norm": 1.0269298553466797, "learning_rate": 0.00037521232504416366, "loss": 3.2674, "step": 73565 }, { "epoch": 4.998641119717353, "grad_norm": 0.7627138495445251, "learning_rate": 0.0003751698600353309, "loss": 3.4733, "step": 73570 }, { "epoch": 4.998980839788015, "grad_norm": 0.862865149974823, "learning_rate": 0.00037512739502649817, "loss": 3.3547, "step": 73575 }, { "epoch": 4.999320559858677, "grad_norm": 0.7803202867507935, "learning_rate": 0.00037508493001766545, "loss": 3.4797, "step": 73580 }, { "epoch": 4.999660279929338, "grad_norm": 1.0289307832717896, "learning_rate": 0.00037504246500883273, "loss": 3.6836, "step": 73585 }, { "epoch": 5.0, "grad_norm": 2.0020124912261963, "learning_rate": 0.000375, "loss": 3.4413, "step": 73590 }, { "epoch": 5.0, "eval_bertscore": { "f1": 0.8397619288343597, "precision": 0.8431473464822108, "recall": 0.8371518092719676 }, "eval_bleu_4": 0.016062640135109264, "eval_exact_match": 0.00038763446070355656, "eval_loss": 3.414961099624634, "eval_meteor": 0.08555663894503344, "eval_rouge": { "rouge1": 0.12268302599839292, "rouge2": 0.018551820188759085, "rougeL": 0.10623085847093988, "rougeLsum": 0.10624618490200036 }, "eval_runtime": 2350.1573, "eval_samples_per_second": 4.391, "eval_steps_per_second": 0.549, "step": 73590 }, { "epoch": 5.000339720070662, "grad_norm": 0.9625300765037537, "learning_rate": 0.0003749575349911673, "loss": 3.4398, "step": 73595 }, { "epoch": 5.000679440141323, "grad_norm": 0.8478984832763672, "learning_rate": 0.00037491506998233457, "loss": 3.3439, "step": 73600 }, { "epoch": 5.001019160211985, "grad_norm": 0.8582783937454224, "learning_rate": 0.00037487260497350185, "loss": 3.598, "step": 73605 }, { "epoch": 5.001358880282647, "grad_norm": 1.2371875047683716, "learning_rate": 0.00037483013996466913, "loss": 3.6129, "step": 73610 }, { "epoch": 5.001698600353309, "grad_norm": 0.9397746920585632, "learning_rate": 0.00037478767495583635, "loss": 3.4562, "step": 73615 }, { "epoch": 5.002038320423971, "grad_norm": 0.9677167534828186, "learning_rate": 0.0003747452099470037, "loss": 3.4077, "step": 73620 }, { "epoch": 5.002378040494633, "grad_norm": 0.8125355243682861, "learning_rate": 0.00037470274493817097, "loss": 3.5458, "step": 73625 }, { "epoch": 5.002717760565294, "grad_norm": 0.7614694237709045, "learning_rate": 0.0003746602799293382, "loss": 3.2286, "step": 73630 }, { "epoch": 5.003057480635956, "grad_norm": 0.7842497825622559, "learning_rate": 0.00037461781492050553, "loss": 3.5837, "step": 73635 }, { "epoch": 5.003397200706618, "grad_norm": 0.9299313426017761, "learning_rate": 0.0003745753499116728, "loss": 3.2729, "step": 73640 }, { "epoch": 5.003736920777279, "grad_norm": 0.8807242512702942, "learning_rate": 0.00037453288490284003, "loss": 3.3807, "step": 73645 }, { "epoch": 5.004076640847941, "grad_norm": 1.0075852870941162, "learning_rate": 0.0003744904198940073, "loss": 3.4866, "step": 73650 }, { "epoch": 5.004416360918603, "grad_norm": 0.8257735967636108, "learning_rate": 0.00037444795488517465, "loss": 3.5268, "step": 73655 }, { "epoch": 5.004756080989265, "grad_norm": 0.9911938309669495, "learning_rate": 0.0003744054898763419, "loss": 3.52, "step": 73660 }, { "epoch": 5.005095801059927, "grad_norm": 1.0119799375534058, "learning_rate": 0.00037436302486750915, "loss": 3.51, "step": 73665 }, { "epoch": 5.005435521130589, "grad_norm": 0.9239062070846558, "learning_rate": 0.0003743205598586765, "loss": 3.2797, "step": 73670 }, { "epoch": 5.00577524120125, "grad_norm": 0.657340943813324, "learning_rate": 0.0003742780948498437, "loss": 3.5172, "step": 73675 }, { "epoch": 5.006114961271912, "grad_norm": 0.7681381702423096, "learning_rate": 0.000374235629841011, "loss": 3.4016, "step": 73680 }, { "epoch": 5.006454681342574, "grad_norm": 0.8912808299064636, "learning_rate": 0.0003741931648321783, "loss": 3.5468, "step": 73685 }, { "epoch": 5.006794401413235, "grad_norm": 1.073547124862671, "learning_rate": 0.0003741506998233456, "loss": 3.3653, "step": 73690 }, { "epoch": 5.007134121483897, "grad_norm": 1.0875983238220215, "learning_rate": 0.00037410823481451284, "loss": 3.4569, "step": 73695 }, { "epoch": 5.007473841554559, "grad_norm": 0.8415685296058655, "learning_rate": 0.0003740657698056801, "loss": 3.3325, "step": 73700 }, { "epoch": 5.007813561625221, "grad_norm": 0.8602301478385925, "learning_rate": 0.00037402330479684745, "loss": 3.5463, "step": 73705 }, { "epoch": 5.008153281695883, "grad_norm": 1.1192108392715454, "learning_rate": 0.0003739808397880147, "loss": 3.4879, "step": 73710 }, { "epoch": 5.008493001766545, "grad_norm": 0.9308758974075317, "learning_rate": 0.00037393837477918196, "loss": 3.5845, "step": 73715 }, { "epoch": 5.008832721837206, "grad_norm": 0.8004947304725647, "learning_rate": 0.0003738959097703493, "loss": 3.4369, "step": 73720 }, { "epoch": 5.009172441907868, "grad_norm": 0.7972015142440796, "learning_rate": 0.0003738534447615165, "loss": 3.2662, "step": 73725 }, { "epoch": 5.00951216197853, "grad_norm": 1.042711853981018, "learning_rate": 0.0003738109797526838, "loss": 3.2658, "step": 73730 }, { "epoch": 5.009851882049191, "grad_norm": 0.7924750447273254, "learning_rate": 0.0003737685147438511, "loss": 3.4404, "step": 73735 }, { "epoch": 5.010191602119853, "grad_norm": 0.7492609024047852, "learning_rate": 0.00037372604973501836, "loss": 3.4125, "step": 73740 }, { "epoch": 5.0105313221905154, "grad_norm": 0.7353882789611816, "learning_rate": 0.00037368358472618564, "loss": 3.2018, "step": 73745 }, { "epoch": 5.010871042261177, "grad_norm": 0.8370790481567383, "learning_rate": 0.0003736411197173529, "loss": 3.1775, "step": 73750 }, { "epoch": 5.011210762331839, "grad_norm": 0.9480472803115845, "learning_rate": 0.0003735986547085202, "loss": 3.3791, "step": 73755 }, { "epoch": 5.0115504824025, "grad_norm": 0.864469051361084, "learning_rate": 0.0003735561896996875, "loss": 3.4058, "step": 73760 }, { "epoch": 5.011890202473162, "grad_norm": 1.7910126447677612, "learning_rate": 0.00037351372469085476, "loss": 3.4723, "step": 73765 }, { "epoch": 5.012229922543824, "grad_norm": 1.1197491884231567, "learning_rate": 0.000373471259682022, "loss": 3.1865, "step": 73770 }, { "epoch": 5.012569642614485, "grad_norm": 0.8969390392303467, "learning_rate": 0.0003734287946731893, "loss": 3.3448, "step": 73775 }, { "epoch": 5.012909362685147, "grad_norm": 0.9955067038536072, "learning_rate": 0.0003733863296643566, "loss": 3.4922, "step": 73780 }, { "epoch": 5.013249082755809, "grad_norm": 0.7771676182746887, "learning_rate": 0.0003733438646555238, "loss": 3.5055, "step": 73785 }, { "epoch": 5.013588802826471, "grad_norm": 0.799666702747345, "learning_rate": 0.00037330139964669116, "loss": 3.3613, "step": 73790 }, { "epoch": 5.013928522897133, "grad_norm": 0.9385126829147339, "learning_rate": 0.00037325893463785844, "loss": 3.516, "step": 73795 }, { "epoch": 5.014268242967795, "grad_norm": 0.9211181402206421, "learning_rate": 0.00037321646962902566, "loss": 3.4719, "step": 73800 }, { "epoch": 5.014607963038456, "grad_norm": 0.9400629997253418, "learning_rate": 0.00037317400462019294, "loss": 3.1372, "step": 73805 }, { "epoch": 5.014947683109118, "grad_norm": 0.9275919198989868, "learning_rate": 0.0003731315396113603, "loss": 3.5, "step": 73810 }, { "epoch": 5.01528740317978, "grad_norm": 1.0918574333190918, "learning_rate": 0.0003730890746025275, "loss": 3.3463, "step": 73815 }, { "epoch": 5.015627123250441, "grad_norm": 1.045360803604126, "learning_rate": 0.0003730466095936948, "loss": 3.509, "step": 73820 }, { "epoch": 5.015966843321103, "grad_norm": 0.8133660554885864, "learning_rate": 0.0003730041445848621, "loss": 3.2984, "step": 73825 }, { "epoch": 5.016306563391765, "grad_norm": 0.8697634339332581, "learning_rate": 0.00037296167957602934, "loss": 3.4741, "step": 73830 }, { "epoch": 5.016646283462427, "grad_norm": 0.8753909468650818, "learning_rate": 0.0003729192145671966, "loss": 3.4926, "step": 73835 }, { "epoch": 5.016986003533089, "grad_norm": 0.8569296002388, "learning_rate": 0.0003728767495583639, "loss": 3.2955, "step": 73840 }, { "epoch": 5.017325723603751, "grad_norm": 0.8232488036155701, "learning_rate": 0.0003728342845495312, "loss": 3.203, "step": 73845 }, { "epoch": 5.017665443674412, "grad_norm": 1.2155956029891968, "learning_rate": 0.00037279181954069846, "loss": 3.3313, "step": 73850 }, { "epoch": 5.018005163745074, "grad_norm": 1.01833176612854, "learning_rate": 0.00037274935453186574, "loss": 3.4159, "step": 73855 }, { "epoch": 5.018344883815736, "grad_norm": 0.7775917053222656, "learning_rate": 0.0003727068895230331, "loss": 3.3012, "step": 73860 }, { "epoch": 5.018684603886397, "grad_norm": 0.9397168755531311, "learning_rate": 0.0003726644245142003, "loss": 3.3867, "step": 73865 }, { "epoch": 5.019024323957059, "grad_norm": 0.8575489521026611, "learning_rate": 0.0003726219595053676, "loss": 3.4924, "step": 73870 }, { "epoch": 5.019364044027721, "grad_norm": 0.8601073622703552, "learning_rate": 0.00037257949449653486, "loss": 3.5496, "step": 73875 }, { "epoch": 5.019703764098383, "grad_norm": 0.8570936918258667, "learning_rate": 0.00037253702948770214, "loss": 3.3207, "step": 73880 }, { "epoch": 5.020043484169045, "grad_norm": 1.0787328481674194, "learning_rate": 0.0003724945644788694, "loss": 3.3904, "step": 73885 }, { "epoch": 5.020383204239707, "grad_norm": 1.147847294807434, "learning_rate": 0.0003724520994700367, "loss": 3.6, "step": 73890 }, { "epoch": 5.020722924310368, "grad_norm": 0.9208913445472717, "learning_rate": 0.000372409634461204, "loss": 3.2863, "step": 73895 }, { "epoch": 5.02106264438103, "grad_norm": 1.4002976417541504, "learning_rate": 0.00037236716945237126, "loss": 3.5237, "step": 73900 }, { "epoch": 5.021402364451692, "grad_norm": 0.8220402002334595, "learning_rate": 0.00037232470444353854, "loss": 3.5198, "step": 73905 }, { "epoch": 5.021742084522353, "grad_norm": 1.1295868158340454, "learning_rate": 0.00037228223943470577, "loss": 3.4473, "step": 73910 }, { "epoch": 5.022081804593015, "grad_norm": 1.1026285886764526, "learning_rate": 0.0003722397744258731, "loss": 3.4684, "step": 73915 }, { "epoch": 5.022421524663677, "grad_norm": 0.9584364891052246, "learning_rate": 0.0003721973094170404, "loss": 3.4253, "step": 73920 }, { "epoch": 5.022761244734339, "grad_norm": 1.0277055501937866, "learning_rate": 0.0003721548444082076, "loss": 3.3799, "step": 73925 }, { "epoch": 5.023100964805001, "grad_norm": 0.6846737861633301, "learning_rate": 0.00037211237939937494, "loss": 3.3739, "step": 73930 }, { "epoch": 5.023440684875663, "grad_norm": 0.9156801700592041, "learning_rate": 0.0003720699143905422, "loss": 3.7134, "step": 73935 }, { "epoch": 5.023780404946324, "grad_norm": 0.8014858961105347, "learning_rate": 0.00037202744938170945, "loss": 3.2951, "step": 73940 }, { "epoch": 5.024120125016986, "grad_norm": 0.7329127788543701, "learning_rate": 0.00037198498437287673, "loss": 3.236, "step": 73945 }, { "epoch": 5.024459845087648, "grad_norm": 0.7832569479942322, "learning_rate": 0.00037194251936404406, "loss": 3.21, "step": 73950 }, { "epoch": 5.024799565158309, "grad_norm": 1.0011848211288452, "learning_rate": 0.0003719000543552113, "loss": 3.3147, "step": 73955 }, { "epoch": 5.025139285228971, "grad_norm": 0.8342908620834351, "learning_rate": 0.00037185758934637857, "loss": 3.482, "step": 73960 }, { "epoch": 5.025479005299633, "grad_norm": 0.8564026355743408, "learning_rate": 0.0003718151243375459, "loss": 3.674, "step": 73965 }, { "epoch": 5.025818725370295, "grad_norm": 0.9077079892158508, "learning_rate": 0.00037177265932871313, "loss": 3.5887, "step": 73970 }, { "epoch": 5.026158445440957, "grad_norm": 0.7185171842575073, "learning_rate": 0.0003717301943198804, "loss": 3.4358, "step": 73975 }, { "epoch": 5.026498165511619, "grad_norm": 0.9403607845306396, "learning_rate": 0.0003716877293110477, "loss": 3.4459, "step": 73980 }, { "epoch": 5.02683788558228, "grad_norm": 0.8399714231491089, "learning_rate": 0.00037164526430221497, "loss": 3.344, "step": 73985 }, { "epoch": 5.027177605652942, "grad_norm": 0.8505078554153442, "learning_rate": 0.00037160279929338225, "loss": 3.3943, "step": 73990 }, { "epoch": 5.027517325723604, "grad_norm": 0.6879952549934387, "learning_rate": 0.00037156033428454953, "loss": 3.3964, "step": 73995 }, { "epoch": 5.027857045794265, "grad_norm": 0.8246248960494995, "learning_rate": 0.0003715178692757168, "loss": 3.2592, "step": 74000 }, { "epoch": 5.028196765864927, "grad_norm": 0.9673071503639221, "learning_rate": 0.0003714754042668841, "loss": 3.6342, "step": 74005 }, { "epoch": 5.028536485935589, "grad_norm": 1.0281087160110474, "learning_rate": 0.00037143293925805137, "loss": 3.4217, "step": 74010 }, { "epoch": 5.028876206006251, "grad_norm": 1.1327872276306152, "learning_rate": 0.0003713904742492186, "loss": 3.4083, "step": 74015 }, { "epoch": 5.029215926076913, "grad_norm": 1.0981696844100952, "learning_rate": 0.00037134800924038593, "loss": 3.2709, "step": 74020 }, { "epoch": 5.029555646147575, "grad_norm": 0.9994499683380127, "learning_rate": 0.0003713055442315532, "loss": 3.6781, "step": 74025 }, { "epoch": 5.029895366218236, "grad_norm": 0.8725029230117798, "learning_rate": 0.0003712630792227205, "loss": 3.4174, "step": 74030 }, { "epoch": 5.030235086288898, "grad_norm": 0.9097709059715271, "learning_rate": 0.00037122061421388777, "loss": 3.4403, "step": 74035 }, { "epoch": 5.03057480635956, "grad_norm": 0.9016314148902893, "learning_rate": 0.00037117814920505505, "loss": 3.3919, "step": 74040 }, { "epoch": 5.030914526430221, "grad_norm": 1.0539602041244507, "learning_rate": 0.00037113568419622233, "loss": 3.2056, "step": 74045 }, { "epoch": 5.031254246500883, "grad_norm": 1.0248746871948242, "learning_rate": 0.0003710932191873896, "loss": 3.7167, "step": 74050 }, { "epoch": 5.0315939665715455, "grad_norm": 1.0809632539749146, "learning_rate": 0.0003710507541785569, "loss": 3.0941, "step": 74055 }, { "epoch": 5.031933686642207, "grad_norm": 1.271547555923462, "learning_rate": 0.00037100828916972417, "loss": 3.168, "step": 74060 }, { "epoch": 5.032273406712869, "grad_norm": 0.8147267699241638, "learning_rate": 0.0003709658241608914, "loss": 3.606, "step": 74065 }, { "epoch": 5.032613126783531, "grad_norm": 1.0186887979507446, "learning_rate": 0.00037092335915205873, "loss": 3.2369, "step": 74070 }, { "epoch": 5.032952846854192, "grad_norm": 1.0792646408081055, "learning_rate": 0.000370880894143226, "loss": 3.3055, "step": 74075 }, { "epoch": 5.033292566924854, "grad_norm": 1.4125255346298218, "learning_rate": 0.00037083842913439324, "loss": 3.1663, "step": 74080 }, { "epoch": 5.033632286995516, "grad_norm": 0.9496142864227295, "learning_rate": 0.00037079596412556057, "loss": 3.4917, "step": 74085 }, { "epoch": 5.033972007066177, "grad_norm": 0.9657579660415649, "learning_rate": 0.00037075349911672785, "loss": 3.3606, "step": 74090 }, { "epoch": 5.034311727136839, "grad_norm": 0.7909177541732788, "learning_rate": 0.0003707110341078951, "loss": 3.7262, "step": 74095 }, { "epoch": 5.0346514472075015, "grad_norm": 0.8715478181838989, "learning_rate": 0.00037066856909906236, "loss": 3.5546, "step": 74100 }, { "epoch": 5.034991167278163, "grad_norm": 0.9149139523506165, "learning_rate": 0.0003706261040902297, "loss": 3.4745, "step": 74105 }, { "epoch": 5.035330887348825, "grad_norm": 0.9541917443275452, "learning_rate": 0.0003705836390813969, "loss": 3.4438, "step": 74110 }, { "epoch": 5.035670607419486, "grad_norm": 0.804902970790863, "learning_rate": 0.0003705411740725642, "loss": 3.8125, "step": 74115 }, { "epoch": 5.036010327490148, "grad_norm": 0.6748700141906738, "learning_rate": 0.00037049870906373153, "loss": 3.1888, "step": 74120 }, { "epoch": 5.03635004756081, "grad_norm": 0.9283285737037659, "learning_rate": 0.00037045624405489876, "loss": 3.5064, "step": 74125 }, { "epoch": 5.036689767631471, "grad_norm": 0.902118444442749, "learning_rate": 0.00037041377904606604, "loss": 3.6009, "step": 74130 }, { "epoch": 5.037029487702133, "grad_norm": 1.2101463079452515, "learning_rate": 0.0003703713140372333, "loss": 3.317, "step": 74135 }, { "epoch": 5.037369207772795, "grad_norm": 0.8732280731201172, "learning_rate": 0.0003703288490284006, "loss": 3.5872, "step": 74140 }, { "epoch": 5.037708927843457, "grad_norm": 0.9206206202507019, "learning_rate": 0.0003702863840195679, "loss": 3.5645, "step": 74145 }, { "epoch": 5.038048647914119, "grad_norm": 0.7951322197914124, "learning_rate": 0.00037024391901073516, "loss": 3.4903, "step": 74150 }, { "epoch": 5.038388367984781, "grad_norm": 0.7873854041099548, "learning_rate": 0.00037020145400190244, "loss": 3.5114, "step": 74155 }, { "epoch": 5.038728088055442, "grad_norm": 0.9790129661560059, "learning_rate": 0.0003701589889930697, "loss": 3.2833, "step": 74160 }, { "epoch": 5.039067808126104, "grad_norm": 0.9435854554176331, "learning_rate": 0.000370116523984237, "loss": 3.8741, "step": 74165 }, { "epoch": 5.039407528196766, "grad_norm": 0.8869902491569519, "learning_rate": 0.0003700740589754042, "loss": 3.4746, "step": 74170 }, { "epoch": 5.039747248267427, "grad_norm": 1.0121889114379883, "learning_rate": 0.00037003159396657156, "loss": 3.7717, "step": 74175 }, { "epoch": 5.040086968338089, "grad_norm": 0.8299087285995483, "learning_rate": 0.00036998912895773884, "loss": 3.4276, "step": 74180 }, { "epoch": 5.040426688408751, "grad_norm": 0.7216662168502808, "learning_rate": 0.00036994666394890607, "loss": 3.3851, "step": 74185 }, { "epoch": 5.040766408479413, "grad_norm": 0.7863749265670776, "learning_rate": 0.0003699041989400734, "loss": 3.5774, "step": 74190 }, { "epoch": 5.041106128550075, "grad_norm": 0.8873997926712036, "learning_rate": 0.0003698617339312407, "loss": 3.4276, "step": 74195 }, { "epoch": 5.041445848620737, "grad_norm": 0.9233851432800293, "learning_rate": 0.00036981926892240796, "loss": 3.6503, "step": 74200 }, { "epoch": 5.041785568691398, "grad_norm": 0.7669897079467773, "learning_rate": 0.0003697768039135752, "loss": 3.3367, "step": 74205 }, { "epoch": 5.04212528876206, "grad_norm": 1.0702768564224243, "learning_rate": 0.0003697343389047425, "loss": 3.7899, "step": 74210 }, { "epoch": 5.042465008832722, "grad_norm": 0.8733883500099182, "learning_rate": 0.0003696918738959098, "loss": 3.4937, "step": 74215 }, { "epoch": 5.042804728903383, "grad_norm": 0.8090338706970215, "learning_rate": 0.000369649408887077, "loss": 3.6192, "step": 74220 }, { "epoch": 5.043144448974045, "grad_norm": 1.8956526517868042, "learning_rate": 0.00036960694387824436, "loss": 3.365, "step": 74225 }, { "epoch": 5.043484169044707, "grad_norm": 1.0194029808044434, "learning_rate": 0.00036956447886941164, "loss": 3.6157, "step": 74230 }, { "epoch": 5.043823889115369, "grad_norm": 0.8100787997245789, "learning_rate": 0.00036952201386057887, "loss": 3.2819, "step": 74235 }, { "epoch": 5.044163609186031, "grad_norm": 1.025274395942688, "learning_rate": 0.00036947954885174615, "loss": 3.1214, "step": 74240 }, { "epoch": 5.044503329256693, "grad_norm": 1.0486228466033936, "learning_rate": 0.0003694370838429135, "loss": 3.3469, "step": 74245 }, { "epoch": 5.044843049327354, "grad_norm": 0.9265202879905701, "learning_rate": 0.0003693946188340807, "loss": 3.3911, "step": 74250 }, { "epoch": 5.045182769398016, "grad_norm": 0.8730506300926208, "learning_rate": 0.000369352153825248, "loss": 3.645, "step": 74255 }, { "epoch": 5.045522489468678, "grad_norm": 0.9687416553497314, "learning_rate": 0.0003693096888164153, "loss": 3.4244, "step": 74260 }, { "epoch": 5.045862209539339, "grad_norm": 0.9287399649620056, "learning_rate": 0.00036926722380758255, "loss": 3.6303, "step": 74265 }, { "epoch": 5.046201929610001, "grad_norm": 0.856453537940979, "learning_rate": 0.00036922475879874983, "loss": 3.4933, "step": 74270 }, { "epoch": 5.046541649680663, "grad_norm": 1.32974374294281, "learning_rate": 0.0003691822937899171, "loss": 3.4778, "step": 74275 }, { "epoch": 5.046881369751325, "grad_norm": 0.93451327085495, "learning_rate": 0.0003691398287810844, "loss": 3.1467, "step": 74280 }, { "epoch": 5.047221089821987, "grad_norm": 0.8263443112373352, "learning_rate": 0.00036909736377225167, "loss": 3.2399, "step": 74285 }, { "epoch": 5.047560809892649, "grad_norm": 0.7969293594360352, "learning_rate": 0.00036905489876341895, "loss": 3.489, "step": 74290 }, { "epoch": 5.04790052996331, "grad_norm": 0.8431703448295593, "learning_rate": 0.00036901243375458623, "loss": 3.3196, "step": 74295 }, { "epoch": 5.048240250033972, "grad_norm": 0.7845717072486877, "learning_rate": 0.0003689699687457535, "loss": 3.7004, "step": 74300 }, { "epoch": 5.048579970104634, "grad_norm": 1.1778630018234253, "learning_rate": 0.0003689275037369208, "loss": 3.5474, "step": 74305 }, { "epoch": 5.048919690175295, "grad_norm": 0.8924747705459595, "learning_rate": 0.000368885038728088, "loss": 3.8841, "step": 74310 }, { "epoch": 5.049259410245957, "grad_norm": 0.984935462474823, "learning_rate": 0.00036884257371925535, "loss": 3.5027, "step": 74315 }, { "epoch": 5.0495991303166194, "grad_norm": 0.8996263146400452, "learning_rate": 0.00036880010871042263, "loss": 3.6692, "step": 74320 }, { "epoch": 5.049938850387281, "grad_norm": 0.6358514428138733, "learning_rate": 0.00036875764370158985, "loss": 3.4147, "step": 74325 }, { "epoch": 5.050278570457943, "grad_norm": 0.8601886034011841, "learning_rate": 0.0003687151786927572, "loss": 3.1693, "step": 74330 }, { "epoch": 5.050618290528605, "grad_norm": 0.9155727028846741, "learning_rate": 0.00036867271368392447, "loss": 3.5716, "step": 74335 }, { "epoch": 5.050958010599266, "grad_norm": 0.9294515252113342, "learning_rate": 0.0003686302486750917, "loss": 3.2805, "step": 74340 }, { "epoch": 5.051297730669928, "grad_norm": 0.9784578680992126, "learning_rate": 0.00036858778366625903, "loss": 3.1581, "step": 74345 }, { "epoch": 5.05163745074059, "grad_norm": 0.7737109065055847, "learning_rate": 0.0003685453186574263, "loss": 3.4391, "step": 74350 }, { "epoch": 5.051977170811251, "grad_norm": 0.7958144545555115, "learning_rate": 0.00036850285364859353, "loss": 3.5141, "step": 74355 }, { "epoch": 5.052316890881913, "grad_norm": 1.7831401824951172, "learning_rate": 0.0003684603886397608, "loss": 3.3923, "step": 74360 }, { "epoch": 5.0526566109525755, "grad_norm": 0.8024105429649353, "learning_rate": 0.00036841792363092815, "loss": 3.3207, "step": 74365 }, { "epoch": 5.052996331023237, "grad_norm": 0.9502870440483093, "learning_rate": 0.00036837545862209543, "loss": 3.4673, "step": 74370 }, { "epoch": 5.053336051093899, "grad_norm": 0.9811963438987732, "learning_rate": 0.00036833299361326265, "loss": 3.3953, "step": 74375 }, { "epoch": 5.053675771164561, "grad_norm": 0.7844878435134888, "learning_rate": 0.00036829052860443, "loss": 3.1985, "step": 74380 }, { "epoch": 5.054015491235222, "grad_norm": 1.2959247827529907, "learning_rate": 0.00036824806359559727, "loss": 3.302, "step": 74385 }, { "epoch": 5.054355211305884, "grad_norm": 1.0022271871566772, "learning_rate": 0.0003682055985867645, "loss": 3.5885, "step": 74390 }, { "epoch": 5.054694931376546, "grad_norm": 0.7763844132423401, "learning_rate": 0.0003681631335779318, "loss": 3.3302, "step": 74395 }, { "epoch": 5.055034651447207, "grad_norm": 0.8863420486450195, "learning_rate": 0.0003681206685690991, "loss": 3.1322, "step": 74400 }, { "epoch": 5.055374371517869, "grad_norm": 0.9745915532112122, "learning_rate": 0.00036807820356026634, "loss": 3.4016, "step": 74405 }, { "epoch": 5.0557140915885315, "grad_norm": 0.8081738948822021, "learning_rate": 0.0003680357385514336, "loss": 3.3853, "step": 74410 }, { "epoch": 5.056053811659193, "grad_norm": 0.8080282211303711, "learning_rate": 0.00036799327354260095, "loss": 3.462, "step": 74415 }, { "epoch": 5.056393531729855, "grad_norm": 0.9290738701820374, "learning_rate": 0.0003679508085337682, "loss": 3.5414, "step": 74420 }, { "epoch": 5.056733251800517, "grad_norm": 0.8939818143844604, "learning_rate": 0.00036790834352493546, "loss": 3.7895, "step": 74425 }, { "epoch": 5.057072971871178, "grad_norm": 0.8926640152931213, "learning_rate": 0.00036786587851610274, "loss": 3.5479, "step": 74430 }, { "epoch": 5.05741269194184, "grad_norm": 0.9758835434913635, "learning_rate": 0.00036782341350727, "loss": 3.3993, "step": 74435 }, { "epoch": 5.057752412012501, "grad_norm": 1.341876745223999, "learning_rate": 0.0003677809484984373, "loss": 3.4464, "step": 74440 }, { "epoch": 5.058092132083163, "grad_norm": 0.8545658588409424, "learning_rate": 0.0003677384834896046, "loss": 3.4534, "step": 74445 }, { "epoch": 5.058431852153825, "grad_norm": 1.0669986009597778, "learning_rate": 0.00036769601848077186, "loss": 3.1932, "step": 74450 }, { "epoch": 5.058771572224487, "grad_norm": 0.8410337567329407, "learning_rate": 0.00036765355347193914, "loss": 3.431, "step": 74455 }, { "epoch": 5.059111292295149, "grad_norm": 1.0390547513961792, "learning_rate": 0.0003676110884631064, "loss": 3.4483, "step": 74460 }, { "epoch": 5.059451012365811, "grad_norm": 1.0414466857910156, "learning_rate": 0.00036756862345427364, "loss": 3.4586, "step": 74465 }, { "epoch": 5.059790732436472, "grad_norm": 0.8129729628562927, "learning_rate": 0.000367526158445441, "loss": 3.2754, "step": 74470 }, { "epoch": 5.060130452507134, "grad_norm": 0.880726158618927, "learning_rate": 0.00036748369343660826, "loss": 3.502, "step": 74475 }, { "epoch": 5.060470172577796, "grad_norm": 1.1044102907180786, "learning_rate": 0.0003674412284277755, "loss": 3.6778, "step": 74480 }, { "epoch": 5.060809892648457, "grad_norm": 0.8495376110076904, "learning_rate": 0.0003673987634189428, "loss": 3.6414, "step": 74485 }, { "epoch": 5.061149612719119, "grad_norm": 1.061542272567749, "learning_rate": 0.0003673562984101101, "loss": 3.3403, "step": 74490 }, { "epoch": 5.061489332789781, "grad_norm": 1.0390080213546753, "learning_rate": 0.0003673138334012773, "loss": 3.4795, "step": 74495 }, { "epoch": 5.061829052860443, "grad_norm": 1.0464240312576294, "learning_rate": 0.0003672713683924446, "loss": 3.6371, "step": 74500 }, { "epoch": 5.062168772931105, "grad_norm": 0.9165144562721252, "learning_rate": 0.00036722890338361194, "loss": 3.2411, "step": 74505 }, { "epoch": 5.062508493001767, "grad_norm": 0.8787360787391663, "learning_rate": 0.00036718643837477916, "loss": 3.4691, "step": 74510 }, { "epoch": 5.062848213072428, "grad_norm": 0.9694712162017822, "learning_rate": 0.00036714397336594644, "loss": 3.4866, "step": 74515 }, { "epoch": 5.06318793314309, "grad_norm": 1.0332369804382324, "learning_rate": 0.0003671015083571138, "loss": 3.5234, "step": 74520 }, { "epoch": 5.063527653213752, "grad_norm": 0.8263710737228394, "learning_rate": 0.000367059043348281, "loss": 3.1496, "step": 74525 }, { "epoch": 5.063867373284413, "grad_norm": 1.0203291177749634, "learning_rate": 0.0003670165783394483, "loss": 3.6649, "step": 74530 }, { "epoch": 5.064207093355075, "grad_norm": 0.9047434329986572, "learning_rate": 0.00036697411333061556, "loss": 3.446, "step": 74535 }, { "epoch": 5.064546813425737, "grad_norm": 1.1405476331710815, "learning_rate": 0.0003669316483217829, "loss": 3.3901, "step": 74540 }, { "epoch": 5.064886533496399, "grad_norm": 0.9893170595169067, "learning_rate": 0.0003668891833129501, "loss": 3.4564, "step": 74545 }, { "epoch": 5.065226253567061, "grad_norm": 0.8183785080909729, "learning_rate": 0.0003668467183041174, "loss": 3.359, "step": 74550 }, { "epoch": 5.065565973637723, "grad_norm": 0.9511227011680603, "learning_rate": 0.00036680425329528474, "loss": 3.5132, "step": 74555 }, { "epoch": 5.065905693708384, "grad_norm": 0.8187382221221924, "learning_rate": 0.00036676178828645196, "loss": 3.2912, "step": 74560 }, { "epoch": 5.066245413779046, "grad_norm": 0.9733322858810425, "learning_rate": 0.00036671932327761924, "loss": 3.5882, "step": 74565 }, { "epoch": 5.066585133849708, "grad_norm": 1.0464316606521606, "learning_rate": 0.0003666768582687866, "loss": 3.4884, "step": 74570 }, { "epoch": 5.066924853920369, "grad_norm": 1.106223225593567, "learning_rate": 0.0003666343932599538, "loss": 3.6416, "step": 74575 }, { "epoch": 5.067264573991031, "grad_norm": 0.8453660607337952, "learning_rate": 0.0003665919282511211, "loss": 3.177, "step": 74580 }, { "epoch": 5.067604294061693, "grad_norm": 1.0638985633850098, "learning_rate": 0.00036654946324228836, "loss": 3.6729, "step": 74585 }, { "epoch": 5.067944014132355, "grad_norm": 0.89426589012146, "learning_rate": 0.00036650699823345564, "loss": 3.4266, "step": 74590 }, { "epoch": 5.068283734203017, "grad_norm": 1.0200386047363281, "learning_rate": 0.0003664645332246229, "loss": 3.4966, "step": 74595 }, { "epoch": 5.068623454273679, "grad_norm": 1.131991982460022, "learning_rate": 0.0003664220682157902, "loss": 3.5852, "step": 74600 }, { "epoch": 5.06896317434434, "grad_norm": 0.8159005641937256, "learning_rate": 0.0003663796032069575, "loss": 3.4803, "step": 74605 }, { "epoch": 5.069302894415002, "grad_norm": 1.0808874368667603, "learning_rate": 0.00036633713819812476, "loss": 3.7532, "step": 74610 }, { "epoch": 5.069642614485664, "grad_norm": 0.7710488438606262, "learning_rate": 0.00036629467318929204, "loss": 3.5908, "step": 74615 }, { "epoch": 5.069982334556325, "grad_norm": 0.7490910887718201, "learning_rate": 0.00036625220818045927, "loss": 3.5817, "step": 74620 }, { "epoch": 5.070322054626987, "grad_norm": 1.0362935066223145, "learning_rate": 0.0003662097431716266, "loss": 3.187, "step": 74625 }, { "epoch": 5.0706617746976494, "grad_norm": 1.1559901237487793, "learning_rate": 0.0003661672781627939, "loss": 3.3659, "step": 74630 }, { "epoch": 5.071001494768311, "grad_norm": 0.9697914123535156, "learning_rate": 0.0003661248131539611, "loss": 3.5432, "step": 74635 }, { "epoch": 5.071341214838973, "grad_norm": 0.8743848204612732, "learning_rate": 0.00036608234814512844, "loss": 3.5865, "step": 74640 }, { "epoch": 5.071680934909635, "grad_norm": 1.7949402332305908, "learning_rate": 0.0003660398831362957, "loss": 3.3712, "step": 74645 }, { "epoch": 5.072020654980296, "grad_norm": 0.798200249671936, "learning_rate": 0.00036599741812746295, "loss": 3.3011, "step": 74650 }, { "epoch": 5.072360375050958, "grad_norm": 0.7571073770523071, "learning_rate": 0.00036595495311863023, "loss": 3.4232, "step": 74655 }, { "epoch": 5.07270009512162, "grad_norm": 1.1301369667053223, "learning_rate": 0.00036591248810979756, "loss": 3.4011, "step": 74660 }, { "epoch": 5.073039815192281, "grad_norm": 0.7377216815948486, "learning_rate": 0.0003658700231009648, "loss": 3.6746, "step": 74665 }, { "epoch": 5.073379535262943, "grad_norm": 0.8606254458427429, "learning_rate": 0.00036582755809213207, "loss": 3.9043, "step": 74670 }, { "epoch": 5.0737192553336055, "grad_norm": 1.015848159790039, "learning_rate": 0.0003657850930832994, "loss": 3.2423, "step": 74675 }, { "epoch": 5.074058975404267, "grad_norm": 0.9080487489700317, "learning_rate": 0.00036574262807446663, "loss": 3.344, "step": 74680 }, { "epoch": 5.074398695474929, "grad_norm": 0.8189088106155396, "learning_rate": 0.0003657001630656339, "loss": 3.542, "step": 74685 }, { "epoch": 5.074738415545591, "grad_norm": 0.8650960326194763, "learning_rate": 0.0003656576980568012, "loss": 3.2139, "step": 74690 }, { "epoch": 5.075078135616252, "grad_norm": 0.9276291728019714, "learning_rate": 0.00036561523304796847, "loss": 3.1652, "step": 74695 }, { "epoch": 5.075417855686914, "grad_norm": 0.7726927995681763, "learning_rate": 0.00036557276803913575, "loss": 3.2818, "step": 74700 }, { "epoch": 5.075757575757576, "grad_norm": 1.0042513608932495, "learning_rate": 0.00036553030303030303, "loss": 3.3603, "step": 74705 }, { "epoch": 5.076097295828237, "grad_norm": 0.9540241360664368, "learning_rate": 0.00036548783802147037, "loss": 3.4962, "step": 74710 }, { "epoch": 5.076437015898899, "grad_norm": 0.9946152567863464, "learning_rate": 0.0003654453730126376, "loss": 3.5943, "step": 74715 }, { "epoch": 5.0767767359695615, "grad_norm": 0.9931941628456116, "learning_rate": 0.00036540290800380487, "loss": 3.543, "step": 74720 }, { "epoch": 5.077116456040223, "grad_norm": 0.8514108061790466, "learning_rate": 0.00036536044299497215, "loss": 3.5234, "step": 74725 }, { "epoch": 5.077456176110885, "grad_norm": 0.9903560280799866, "learning_rate": 0.00036531797798613943, "loss": 3.6574, "step": 74730 }, { "epoch": 5.077795896181547, "grad_norm": 0.8982710838317871, "learning_rate": 0.0003652755129773067, "loss": 3.302, "step": 74735 }, { "epoch": 5.078135616252208, "grad_norm": 1.347965121269226, "learning_rate": 0.000365233047968474, "loss": 3.4945, "step": 74740 }, { "epoch": 5.07847533632287, "grad_norm": 0.9903069734573364, "learning_rate": 0.00036519058295964127, "loss": 3.3341, "step": 74745 }, { "epoch": 5.078815056393532, "grad_norm": 0.9816784262657166, "learning_rate": 0.00036514811795080855, "loss": 3.3578, "step": 74750 }, { "epoch": 5.079154776464193, "grad_norm": 0.8616023063659668, "learning_rate": 0.00036510565294197583, "loss": 3.662, "step": 74755 }, { "epoch": 5.079494496534855, "grad_norm": 1.0039408206939697, "learning_rate": 0.00036506318793314306, "loss": 3.5201, "step": 74760 }, { "epoch": 5.0798342166055175, "grad_norm": 1.0445092916488647, "learning_rate": 0.0003650207229243104, "loss": 3.4898, "step": 74765 }, { "epoch": 5.080173936676179, "grad_norm": 0.9555175304412842, "learning_rate": 0.00036497825791547767, "loss": 3.5357, "step": 74770 }, { "epoch": 5.080513656746841, "grad_norm": 1.346887230873108, "learning_rate": 0.0003649357929066449, "loss": 3.6041, "step": 74775 }, { "epoch": 5.080853376817503, "grad_norm": 1.1599987745285034, "learning_rate": 0.00036489332789781223, "loss": 3.4223, "step": 74780 }, { "epoch": 5.081193096888164, "grad_norm": 1.493691086769104, "learning_rate": 0.0003648508628889795, "loss": 3.4207, "step": 74785 }, { "epoch": 5.081532816958826, "grad_norm": 0.9721444845199585, "learning_rate": 0.00036480839788014674, "loss": 3.371, "step": 74790 }, { "epoch": 5.081872537029487, "grad_norm": 0.9574981331825256, "learning_rate": 0.000364765932871314, "loss": 3.4279, "step": 74795 }, { "epoch": 5.082212257100149, "grad_norm": 0.7686623334884644, "learning_rate": 0.00036472346786248135, "loss": 3.386, "step": 74800 }, { "epoch": 5.082551977170811, "grad_norm": 0.7745791673660278, "learning_rate": 0.0003646810028536486, "loss": 3.4058, "step": 74805 }, { "epoch": 5.082891697241473, "grad_norm": 0.983389675617218, "learning_rate": 0.00036463853784481586, "loss": 3.4161, "step": 74810 }, { "epoch": 5.083231417312135, "grad_norm": 1.2050780057907104, "learning_rate": 0.0003645960728359832, "loss": 3.6003, "step": 74815 }, { "epoch": 5.083571137382797, "grad_norm": 1.0247797966003418, "learning_rate": 0.0003645536078271504, "loss": 3.4164, "step": 74820 }, { "epoch": 5.083910857453458, "grad_norm": 0.9998893737792969, "learning_rate": 0.0003645111428183177, "loss": 3.6561, "step": 74825 }, { "epoch": 5.08425057752412, "grad_norm": 0.978882908821106, "learning_rate": 0.000364468677809485, "loss": 3.5452, "step": 74830 }, { "epoch": 5.084590297594782, "grad_norm": 0.8294420838356018, "learning_rate": 0.00036442621280065226, "loss": 3.4461, "step": 74835 }, { "epoch": 5.084930017665443, "grad_norm": 1.2561691999435425, "learning_rate": 0.00036438374779181954, "loss": 3.3592, "step": 74840 }, { "epoch": 5.085269737736105, "grad_norm": 1.1057792901992798, "learning_rate": 0.0003643412827829868, "loss": 3.751, "step": 74845 }, { "epoch": 5.085609457806767, "grad_norm": 0.7867390513420105, "learning_rate": 0.0003642988177741541, "loss": 3.5661, "step": 74850 }, { "epoch": 5.085949177877429, "grad_norm": 0.9753893613815308, "learning_rate": 0.0003642563527653214, "loss": 3.272, "step": 74855 }, { "epoch": 5.086288897948091, "grad_norm": 0.69923996925354, "learning_rate": 0.00036421388775648866, "loss": 3.4372, "step": 74860 }, { "epoch": 5.086628618018753, "grad_norm": 0.9283983111381531, "learning_rate": 0.0003641714227476559, "loss": 3.4698, "step": 74865 }, { "epoch": 5.086968338089414, "grad_norm": 0.9221273064613342, "learning_rate": 0.0003641289577388232, "loss": 3.2585, "step": 74870 }, { "epoch": 5.087308058160076, "grad_norm": 1.0572346448898315, "learning_rate": 0.0003640864927299905, "loss": 3.4023, "step": 74875 }, { "epoch": 5.087647778230738, "grad_norm": 0.8362987041473389, "learning_rate": 0.0003640440277211578, "loss": 3.6335, "step": 74880 }, { "epoch": 5.087987498301399, "grad_norm": 0.8806021809577942, "learning_rate": 0.00036400156271232506, "loss": 3.318, "step": 74885 }, { "epoch": 5.088327218372061, "grad_norm": 1.0497980117797852, "learning_rate": 0.00036395909770349234, "loss": 3.3717, "step": 74890 }, { "epoch": 5.088666938442723, "grad_norm": 0.8034532070159912, "learning_rate": 0.0003639166326946596, "loss": 3.1862, "step": 74895 }, { "epoch": 5.089006658513385, "grad_norm": 1.1501727104187012, "learning_rate": 0.0003638741676858269, "loss": 3.5848, "step": 74900 }, { "epoch": 5.089346378584047, "grad_norm": 0.8831846117973328, "learning_rate": 0.0003638317026769942, "loss": 3.3995, "step": 74905 }, { "epoch": 5.089686098654709, "grad_norm": 0.9833759069442749, "learning_rate": 0.00036378923766816146, "loss": 3.2713, "step": 74910 }, { "epoch": 5.09002581872537, "grad_norm": 0.8408897519111633, "learning_rate": 0.0003637467726593287, "loss": 3.4248, "step": 74915 }, { "epoch": 5.090365538796032, "grad_norm": 0.7591267228126526, "learning_rate": 0.000363704307650496, "loss": 3.4615, "step": 74920 }, { "epoch": 5.090705258866694, "grad_norm": 0.8523525595664978, "learning_rate": 0.0003636618426416633, "loss": 3.3067, "step": 74925 }, { "epoch": 5.091044978937355, "grad_norm": 0.8751924633979797, "learning_rate": 0.0003636193776328305, "loss": 3.3235, "step": 74930 }, { "epoch": 5.091384699008017, "grad_norm": 0.8093482255935669, "learning_rate": 0.00036357691262399786, "loss": 3.3978, "step": 74935 }, { "epoch": 5.0917244190786795, "grad_norm": 0.8375375866889954, "learning_rate": 0.00036353444761516514, "loss": 3.6556, "step": 74940 }, { "epoch": 5.092064139149341, "grad_norm": 0.920310378074646, "learning_rate": 0.00036349198260633237, "loss": 3.338, "step": 74945 }, { "epoch": 5.092403859220003, "grad_norm": 0.8462808728218079, "learning_rate": 0.00036344951759749965, "loss": 3.2296, "step": 74950 }, { "epoch": 5.092743579290665, "grad_norm": 0.9120371341705322, "learning_rate": 0.000363407052588667, "loss": 3.5621, "step": 74955 }, { "epoch": 5.093083299361326, "grad_norm": 0.817236065864563, "learning_rate": 0.0003633645875798342, "loss": 3.4538, "step": 74960 }, { "epoch": 5.093423019431988, "grad_norm": 0.7250436544418335, "learning_rate": 0.0003633221225710015, "loss": 3.5455, "step": 74965 }, { "epoch": 5.09376273950265, "grad_norm": 1.3266916275024414, "learning_rate": 0.0003632796575621688, "loss": 3.659, "step": 74970 }, { "epoch": 5.094102459573311, "grad_norm": 1.0124485492706299, "learning_rate": 0.00036323719255333605, "loss": 3.502, "step": 74975 }, { "epoch": 5.094442179643973, "grad_norm": 0.9973366856575012, "learning_rate": 0.00036319472754450333, "loss": 3.2221, "step": 74980 }, { "epoch": 5.0947818997146355, "grad_norm": 0.9812241196632385, "learning_rate": 0.0003631522625356706, "loss": 3.5178, "step": 74985 }, { "epoch": 5.095121619785297, "grad_norm": 1.0427446365356445, "learning_rate": 0.0003631097975268379, "loss": 3.6051, "step": 74990 }, { "epoch": 5.095461339855959, "grad_norm": 0.7722885608673096, "learning_rate": 0.00036306733251800517, "loss": 3.7283, "step": 74995 }, { "epoch": 5.095801059926621, "grad_norm": 0.9766445755958557, "learning_rate": 0.00036302486750917245, "loss": 3.6169, "step": 75000 }, { "epoch": 5.096140779997282, "grad_norm": 0.8959020376205444, "learning_rate": 0.00036298240250033973, "loss": 3.3327, "step": 75005 }, { "epoch": 5.096480500067944, "grad_norm": 0.886807382106781, "learning_rate": 0.000362939937491507, "loss": 3.6109, "step": 75010 }, { "epoch": 5.096820220138606, "grad_norm": 0.7945123910903931, "learning_rate": 0.0003628974724826743, "loss": 3.7052, "step": 75015 }, { "epoch": 5.097159940209267, "grad_norm": 1.22524893283844, "learning_rate": 0.0003628550074738415, "loss": 3.4699, "step": 75020 }, { "epoch": 5.097499660279929, "grad_norm": 0.7815542221069336, "learning_rate": 0.00036281254246500885, "loss": 3.4951, "step": 75025 }, { "epoch": 5.0978393803505915, "grad_norm": 0.951709508895874, "learning_rate": 0.00036277007745617613, "loss": 3.3323, "step": 75030 }, { "epoch": 5.098179100421253, "grad_norm": 0.9922013282775879, "learning_rate": 0.00036272761244734335, "loss": 3.2862, "step": 75035 }, { "epoch": 5.098518820491915, "grad_norm": 0.8157073855400085, "learning_rate": 0.0003626851474385107, "loss": 3.3791, "step": 75040 }, { "epoch": 5.098858540562577, "grad_norm": 0.7490096688270569, "learning_rate": 0.00036264268242967797, "loss": 3.606, "step": 75045 }, { "epoch": 5.099198260633238, "grad_norm": 1.0346990823745728, "learning_rate": 0.00036260021742084525, "loss": 3.1534, "step": 75050 }, { "epoch": 5.0995379807039, "grad_norm": 0.9536871910095215, "learning_rate": 0.0003625577524120125, "loss": 3.233, "step": 75055 }, { "epoch": 5.099877700774562, "grad_norm": 0.8778948783874512, "learning_rate": 0.0003625152874031798, "loss": 3.5092, "step": 75060 }, { "epoch": 5.100217420845223, "grad_norm": 0.7315804958343506, "learning_rate": 0.0003624728223943471, "loss": 3.3183, "step": 75065 }, { "epoch": 5.100557140915885, "grad_norm": 0.891664445400238, "learning_rate": 0.0003624303573855143, "loss": 3.5301, "step": 75070 }, { "epoch": 5.1008968609865475, "grad_norm": 1.0686360597610474, "learning_rate": 0.00036238789237668165, "loss": 3.5231, "step": 75075 }, { "epoch": 5.101236581057209, "grad_norm": 0.8331325054168701, "learning_rate": 0.00036234542736784893, "loss": 3.5402, "step": 75080 }, { "epoch": 5.101576301127871, "grad_norm": 1.1884500980377197, "learning_rate": 0.00036230296235901615, "loss": 3.4176, "step": 75085 }, { "epoch": 5.101916021198533, "grad_norm": 0.9484277963638306, "learning_rate": 0.00036226049735018343, "loss": 3.6443, "step": 75090 }, { "epoch": 5.102255741269194, "grad_norm": 0.903917670249939, "learning_rate": 0.00036221803234135077, "loss": 3.3796, "step": 75095 }, { "epoch": 5.102595461339856, "grad_norm": 0.9545102715492249, "learning_rate": 0.000362175567332518, "loss": 3.4833, "step": 75100 }, { "epoch": 5.102935181410518, "grad_norm": 0.9166658520698547, "learning_rate": 0.0003621331023236853, "loss": 3.5251, "step": 75105 }, { "epoch": 5.103274901481179, "grad_norm": 0.8696542382240295, "learning_rate": 0.0003620906373148526, "loss": 3.3384, "step": 75110 }, { "epoch": 5.103614621551841, "grad_norm": 0.8936814069747925, "learning_rate": 0.00036204817230601984, "loss": 3.449, "step": 75115 }, { "epoch": 5.103954341622503, "grad_norm": 0.7980839610099792, "learning_rate": 0.0003620057072971871, "loss": 3.3708, "step": 75120 }, { "epoch": 5.104294061693165, "grad_norm": 0.9942446947097778, "learning_rate": 0.0003619632422883544, "loss": 3.393, "step": 75125 }, { "epoch": 5.104633781763827, "grad_norm": 0.9312781691551208, "learning_rate": 0.0003619207772795217, "loss": 3.4643, "step": 75130 }, { "epoch": 5.104973501834488, "grad_norm": 1.1152287721633911, "learning_rate": 0.00036187831227068896, "loss": 3.3477, "step": 75135 }, { "epoch": 5.10531322190515, "grad_norm": 1.013667345046997, "learning_rate": 0.00036183584726185624, "loss": 3.1521, "step": 75140 }, { "epoch": 5.105652941975812, "grad_norm": 0.8019955158233643, "learning_rate": 0.0003617933822530235, "loss": 3.2059, "step": 75145 }, { "epoch": 5.105992662046473, "grad_norm": 0.9575315117835999, "learning_rate": 0.0003617509172441908, "loss": 3.544, "step": 75150 }, { "epoch": 5.106332382117135, "grad_norm": 0.8219946622848511, "learning_rate": 0.0003617084522353581, "loss": 3.4609, "step": 75155 }, { "epoch": 5.106672102187797, "grad_norm": 1.164125680923462, "learning_rate": 0.0003616659872265253, "loss": 3.2738, "step": 75160 }, { "epoch": 5.107011822258459, "grad_norm": 1.005372166633606, "learning_rate": 0.00036162352221769264, "loss": 3.7366, "step": 75165 }, { "epoch": 5.107351542329121, "grad_norm": 0.7238408327102661, "learning_rate": 0.0003615810572088599, "loss": 3.0939, "step": 75170 }, { "epoch": 5.107691262399783, "grad_norm": 0.798172116279602, "learning_rate": 0.00036153859220002714, "loss": 3.5253, "step": 75175 }, { "epoch": 5.108030982470444, "grad_norm": 0.9550918936729431, "learning_rate": 0.0003614961271911945, "loss": 3.2822, "step": 75180 }, { "epoch": 5.108370702541106, "grad_norm": 0.995512068271637, "learning_rate": 0.00036145366218236176, "loss": 3.3756, "step": 75185 }, { "epoch": 5.108710422611768, "grad_norm": 0.9895334243774414, "learning_rate": 0.000361411197173529, "loss": 3.308, "step": 75190 }, { "epoch": 5.109050142682429, "grad_norm": 1.1365963220596313, "learning_rate": 0.0003613687321646963, "loss": 3.4375, "step": 75195 }, { "epoch": 5.109389862753091, "grad_norm": 1.0738314390182495, "learning_rate": 0.0003613262671558636, "loss": 3.4143, "step": 75200 }, { "epoch": 5.1097295828237534, "grad_norm": 0.7811956405639648, "learning_rate": 0.0003612838021470308, "loss": 3.4044, "step": 75205 }, { "epoch": 5.110069302894415, "grad_norm": 0.804663896560669, "learning_rate": 0.0003612413371381981, "loss": 3.5527, "step": 75210 }, { "epoch": 5.110409022965077, "grad_norm": 0.9579253196716309, "learning_rate": 0.00036119887212936544, "loss": 3.3637, "step": 75215 }, { "epoch": 5.110748743035739, "grad_norm": 1.0096447467803955, "learning_rate": 0.0003611564071205327, "loss": 3.4818, "step": 75220 }, { "epoch": 5.1110884631064, "grad_norm": 0.7714332938194275, "learning_rate": 0.00036111394211169994, "loss": 3.5344, "step": 75225 }, { "epoch": 5.111428183177062, "grad_norm": 0.9728423357009888, "learning_rate": 0.0003610714771028673, "loss": 3.6027, "step": 75230 }, { "epoch": 5.111767903247724, "grad_norm": 0.830195426940918, "learning_rate": 0.00036102901209403456, "loss": 3.4362, "step": 75235 }, { "epoch": 5.112107623318385, "grad_norm": 0.8785569071769714, "learning_rate": 0.0003609865470852018, "loss": 3.39, "step": 75240 }, { "epoch": 5.112447343389047, "grad_norm": 1.0836104154586792, "learning_rate": 0.00036094408207636906, "loss": 3.2883, "step": 75245 }, { "epoch": 5.1127870634597095, "grad_norm": 1.0558947324752808, "learning_rate": 0.0003609016170675364, "loss": 3.6536, "step": 75250 }, { "epoch": 5.113126783530371, "grad_norm": 0.9367012977600098, "learning_rate": 0.0003608591520587036, "loss": 3.1756, "step": 75255 }, { "epoch": 5.113466503601033, "grad_norm": 0.8422859907150269, "learning_rate": 0.0003608166870498709, "loss": 3.3402, "step": 75260 }, { "epoch": 5.113806223671695, "grad_norm": 0.9152809977531433, "learning_rate": 0.00036077422204103824, "loss": 3.5203, "step": 75265 }, { "epoch": 5.114145943742356, "grad_norm": 0.876521646976471, "learning_rate": 0.00036073175703220546, "loss": 3.5446, "step": 75270 }, { "epoch": 5.114485663813018, "grad_norm": 0.8896933794021606, "learning_rate": 0.00036068929202337274, "loss": 3.409, "step": 75275 }, { "epoch": 5.11482538388368, "grad_norm": 0.7991227507591248, "learning_rate": 0.00036064682701454, "loss": 3.5709, "step": 75280 }, { "epoch": 5.115165103954341, "grad_norm": 1.0065653324127197, "learning_rate": 0.0003606043620057073, "loss": 3.5866, "step": 75285 }, { "epoch": 5.115504824025003, "grad_norm": 1.005728840827942, "learning_rate": 0.0003605618969968746, "loss": 3.3197, "step": 75290 }, { "epoch": 5.1158445440956655, "grad_norm": 1.068459391593933, "learning_rate": 0.00036051943198804186, "loss": 3.505, "step": 75295 }, { "epoch": 5.116184264166327, "grad_norm": 1.0139633417129517, "learning_rate": 0.00036047696697920914, "loss": 3.5115, "step": 75300 }, { "epoch": 5.116523984236989, "grad_norm": 1.1516929864883423, "learning_rate": 0.0003604345019703764, "loss": 3.1883, "step": 75305 }, { "epoch": 5.116863704307651, "grad_norm": 0.9546001553535461, "learning_rate": 0.0003603920369615437, "loss": 3.6388, "step": 75310 }, { "epoch": 5.117203424378312, "grad_norm": 1.0090304613113403, "learning_rate": 0.00036034957195271093, "loss": 3.3868, "step": 75315 }, { "epoch": 5.117543144448974, "grad_norm": 0.7460254430770874, "learning_rate": 0.00036030710694387826, "loss": 3.4881, "step": 75320 }, { "epoch": 5.117882864519636, "grad_norm": 0.8624724745750427, "learning_rate": 0.00036026464193504554, "loss": 3.5399, "step": 75325 }, { "epoch": 5.118222584590297, "grad_norm": 0.9487879872322083, "learning_rate": 0.00036022217692621277, "loss": 3.4304, "step": 75330 }, { "epoch": 5.118562304660959, "grad_norm": 1.0515177249908447, "learning_rate": 0.0003601797119173801, "loss": 3.4221, "step": 75335 }, { "epoch": 5.1189020247316215, "grad_norm": 0.9962283372879028, "learning_rate": 0.0003601372469085474, "loss": 3.477, "step": 75340 }, { "epoch": 5.119241744802283, "grad_norm": 1.152889609336853, "learning_rate": 0.0003600947818997146, "loss": 3.3088, "step": 75345 }, { "epoch": 5.119581464872945, "grad_norm": 0.8251771330833435, "learning_rate": 0.0003600523168908819, "loss": 3.748, "step": 75350 }, { "epoch": 5.119921184943607, "grad_norm": 0.8877094984054565, "learning_rate": 0.0003600098518820492, "loss": 3.377, "step": 75355 }, { "epoch": 5.120260905014268, "grad_norm": 0.7578794360160828, "learning_rate": 0.00035996738687321645, "loss": 3.2087, "step": 75360 }, { "epoch": 5.12060062508493, "grad_norm": 0.7884721159934998, "learning_rate": 0.00035992492186438373, "loss": 3.5021, "step": 75365 }, { "epoch": 5.120940345155592, "grad_norm": 1.4379349946975708, "learning_rate": 0.00035988245685555106, "loss": 3.5711, "step": 75370 }, { "epoch": 5.121280065226253, "grad_norm": 1.1530681848526, "learning_rate": 0.0003598399918467183, "loss": 3.4801, "step": 75375 }, { "epoch": 5.121619785296915, "grad_norm": 0.9859174489974976, "learning_rate": 0.00035979752683788557, "loss": 3.3444, "step": 75380 }, { "epoch": 5.1219595053675775, "grad_norm": 0.9901753664016724, "learning_rate": 0.00035975506182905285, "loss": 3.1273, "step": 75385 }, { "epoch": 5.122299225438239, "grad_norm": 0.7243141531944275, "learning_rate": 0.0003597125968202202, "loss": 3.1836, "step": 75390 }, { "epoch": 5.122638945508901, "grad_norm": 0.9931227564811707, "learning_rate": 0.0003596701318113874, "loss": 3.3004, "step": 75395 }, { "epoch": 5.122978665579563, "grad_norm": 0.8207472562789917, "learning_rate": 0.0003596276668025547, "loss": 3.503, "step": 75400 }, { "epoch": 5.123318385650224, "grad_norm": 0.8760255575180054, "learning_rate": 0.000359585201793722, "loss": 3.5104, "step": 75405 }, { "epoch": 5.123658105720886, "grad_norm": 0.9737474918365479, "learning_rate": 0.00035954273678488925, "loss": 3.6006, "step": 75410 }, { "epoch": 5.123997825791548, "grad_norm": 1.2021145820617676, "learning_rate": 0.00035950027177605653, "loss": 3.3088, "step": 75415 }, { "epoch": 5.124337545862209, "grad_norm": 0.815832793712616, "learning_rate": 0.0003594578067672238, "loss": 3.4062, "step": 75420 }, { "epoch": 5.124677265932871, "grad_norm": 0.776910126209259, "learning_rate": 0.0003594153417583911, "loss": 3.2368, "step": 75425 }, { "epoch": 5.1250169860035335, "grad_norm": 0.8547676205635071, "learning_rate": 0.00035937287674955837, "loss": 3.5613, "step": 75430 }, { "epoch": 5.125356706074195, "grad_norm": 0.974787175655365, "learning_rate": 0.00035933041174072565, "loss": 3.4011, "step": 75435 }, { "epoch": 5.125696426144857, "grad_norm": 1.0379118919372559, "learning_rate": 0.00035928794673189293, "loss": 3.5029, "step": 75440 }, { "epoch": 5.126036146215519, "grad_norm": 0.9388253688812256, "learning_rate": 0.0003592454817230602, "loss": 3.6932, "step": 75445 }, { "epoch": 5.12637586628618, "grad_norm": 0.9530711770057678, "learning_rate": 0.0003592030167142275, "loss": 3.4744, "step": 75450 }, { "epoch": 5.126715586356842, "grad_norm": 0.973609983921051, "learning_rate": 0.0003591605517053947, "loss": 3.6192, "step": 75455 }, { "epoch": 5.127055306427504, "grad_norm": 1.034703016281128, "learning_rate": 0.00035911808669656205, "loss": 3.2962, "step": 75460 }, { "epoch": 5.127395026498165, "grad_norm": 1.0627164840698242, "learning_rate": 0.00035907562168772933, "loss": 3.3745, "step": 75465 }, { "epoch": 5.127734746568827, "grad_norm": 0.9506585597991943, "learning_rate": 0.00035903315667889656, "loss": 3.2617, "step": 75470 }, { "epoch": 5.1280744666394895, "grad_norm": 0.986581563949585, "learning_rate": 0.0003589906916700639, "loss": 2.9607, "step": 75475 }, { "epoch": 5.128414186710151, "grad_norm": 0.9458901882171631, "learning_rate": 0.00035894822666123117, "loss": 3.3489, "step": 75480 }, { "epoch": 5.128753906780813, "grad_norm": 1.1331102848052979, "learning_rate": 0.0003589057616523984, "loss": 3.3257, "step": 75485 }, { "epoch": 5.129093626851474, "grad_norm": 0.7538685202598572, "learning_rate": 0.00035886329664356573, "loss": 3.3868, "step": 75490 }, { "epoch": 5.129433346922136, "grad_norm": 0.8945348858833313, "learning_rate": 0.000358820831634733, "loss": 3.5551, "step": 75495 }, { "epoch": 5.129773066992798, "grad_norm": 0.7266561985015869, "learning_rate": 0.00035877836662590024, "loss": 3.3456, "step": 75500 }, { "epoch": 5.130112787063459, "grad_norm": 0.8124348521232605, "learning_rate": 0.0003587359016170675, "loss": 3.4371, "step": 75505 }, { "epoch": 5.130452507134121, "grad_norm": 0.9724119901657104, "learning_rate": 0.00035869343660823485, "loss": 3.4692, "step": 75510 }, { "epoch": 5.1307922272047835, "grad_norm": 0.9436006546020508, "learning_rate": 0.0003586509715994021, "loss": 3.619, "step": 75515 }, { "epoch": 5.131131947275445, "grad_norm": 0.9247618913650513, "learning_rate": 0.00035860850659056936, "loss": 3.4033, "step": 75520 }, { "epoch": 5.131471667346107, "grad_norm": 0.7660719752311707, "learning_rate": 0.0003585660415817367, "loss": 3.3354, "step": 75525 }, { "epoch": 5.131811387416769, "grad_norm": 0.8121278285980225, "learning_rate": 0.0003585235765729039, "loss": 3.3272, "step": 75530 }, { "epoch": 5.13215110748743, "grad_norm": 0.896870493888855, "learning_rate": 0.0003584811115640712, "loss": 3.3307, "step": 75535 }, { "epoch": 5.132490827558092, "grad_norm": 0.8842107057571411, "learning_rate": 0.0003584386465552385, "loss": 3.4925, "step": 75540 }, { "epoch": 5.132830547628754, "grad_norm": 0.6370797753334045, "learning_rate": 0.00035839618154640576, "loss": 3.8845, "step": 75545 }, { "epoch": 5.133170267699415, "grad_norm": 0.9411664009094238, "learning_rate": 0.00035835371653757304, "loss": 3.249, "step": 75550 }, { "epoch": 5.133509987770077, "grad_norm": 0.7810190320014954, "learning_rate": 0.0003583112515287403, "loss": 3.448, "step": 75555 }, { "epoch": 5.1338497078407395, "grad_norm": 0.9644551277160645, "learning_rate": 0.00035826878651990765, "loss": 3.6004, "step": 75560 }, { "epoch": 5.134189427911401, "grad_norm": 0.7850888967514038, "learning_rate": 0.0003582263215110749, "loss": 3.5523, "step": 75565 }, { "epoch": 5.134529147982063, "grad_norm": 0.8360772132873535, "learning_rate": 0.00035818385650224216, "loss": 3.3816, "step": 75570 }, { "epoch": 5.134868868052725, "grad_norm": 0.8692635893821716, "learning_rate": 0.00035814139149340944, "loss": 3.4182, "step": 75575 }, { "epoch": 5.135208588123386, "grad_norm": 1.0855294466018677, "learning_rate": 0.0003580989264845767, "loss": 3.4477, "step": 75580 }, { "epoch": 5.135548308194048, "grad_norm": 0.691785991191864, "learning_rate": 0.000358056461475744, "loss": 3.6313, "step": 75585 }, { "epoch": 5.13588802826471, "grad_norm": 1.1764471530914307, "learning_rate": 0.0003580139964669113, "loss": 3.1752, "step": 75590 }, { "epoch": 5.136227748335371, "grad_norm": 0.8215623497962952, "learning_rate": 0.00035797153145807856, "loss": 3.523, "step": 75595 }, { "epoch": 5.136567468406033, "grad_norm": 0.9227715134620667, "learning_rate": 0.00035792906644924584, "loss": 3.4223, "step": 75600 }, { "epoch": 5.1369071884766955, "grad_norm": 1.0373984575271606, "learning_rate": 0.0003578866014404131, "loss": 3.6489, "step": 75605 }, { "epoch": 5.137246908547357, "grad_norm": 0.7685438394546509, "learning_rate": 0.00035784413643158035, "loss": 3.5668, "step": 75610 }, { "epoch": 5.137586628618019, "grad_norm": 0.8674196600914001, "learning_rate": 0.0003578016714227477, "loss": 3.5386, "step": 75615 }, { "epoch": 5.137926348688681, "grad_norm": 1.0068711042404175, "learning_rate": 0.00035775920641391496, "loss": 3.8871, "step": 75620 }, { "epoch": 5.138266068759342, "grad_norm": 0.9329433441162109, "learning_rate": 0.0003577167414050822, "loss": 3.6836, "step": 75625 }, { "epoch": 5.138605788830004, "grad_norm": 1.1959995031356812, "learning_rate": 0.0003576742763962495, "loss": 3.4469, "step": 75630 }, { "epoch": 5.138945508900666, "grad_norm": 0.8033577799797058, "learning_rate": 0.0003576318113874168, "loss": 3.5078, "step": 75635 }, { "epoch": 5.139285228971327, "grad_norm": 0.9147692918777466, "learning_rate": 0.000357589346378584, "loss": 3.2276, "step": 75640 }, { "epoch": 5.139624949041989, "grad_norm": 0.8372762203216553, "learning_rate": 0.0003575468813697513, "loss": 3.3919, "step": 75645 }, { "epoch": 5.1399646691126515, "grad_norm": 0.9196427464485168, "learning_rate": 0.00035750441636091864, "loss": 3.5758, "step": 75650 }, { "epoch": 5.140304389183313, "grad_norm": 0.8652243614196777, "learning_rate": 0.00035746195135208587, "loss": 3.5547, "step": 75655 }, { "epoch": 5.140644109253975, "grad_norm": 0.7755324244499207, "learning_rate": 0.00035741948634325315, "loss": 3.4065, "step": 75660 }, { "epoch": 5.140983829324637, "grad_norm": 0.7884494066238403, "learning_rate": 0.0003573770213344205, "loss": 3.5399, "step": 75665 }, { "epoch": 5.141323549395298, "grad_norm": 0.862424373626709, "learning_rate": 0.0003573345563255877, "loss": 3.4619, "step": 75670 }, { "epoch": 5.14166326946596, "grad_norm": 0.9353483319282532, "learning_rate": 0.000357292091316755, "loss": 3.7566, "step": 75675 }, { "epoch": 5.142002989536622, "grad_norm": 0.7188577055931091, "learning_rate": 0.00035724962630792227, "loss": 3.4008, "step": 75680 }, { "epoch": 5.142342709607283, "grad_norm": 0.7510799765586853, "learning_rate": 0.00035720716129908955, "loss": 3.4323, "step": 75685 }, { "epoch": 5.142682429677945, "grad_norm": 0.8518757224082947, "learning_rate": 0.0003571646962902568, "loss": 3.4956, "step": 75690 }, { "epoch": 5.1430221497486075, "grad_norm": 0.9330177307128906, "learning_rate": 0.0003571222312814241, "loss": 3.4332, "step": 75695 }, { "epoch": 5.143361869819269, "grad_norm": 0.863366961479187, "learning_rate": 0.0003570797662725914, "loss": 3.5285, "step": 75700 }, { "epoch": 5.143701589889931, "grad_norm": 0.7815716862678528, "learning_rate": 0.00035703730126375867, "loss": 3.6571, "step": 75705 }, { "epoch": 5.144041309960593, "grad_norm": 0.9466231465339661, "learning_rate": 0.00035699483625492595, "loss": 3.5089, "step": 75710 }, { "epoch": 5.144381030031254, "grad_norm": 0.8485906720161438, "learning_rate": 0.0003569523712460932, "loss": 3.5112, "step": 75715 }, { "epoch": 5.144720750101916, "grad_norm": 0.8381757140159607, "learning_rate": 0.0003569099062372605, "loss": 3.5854, "step": 75720 }, { "epoch": 5.145060470172578, "grad_norm": 1.1675273180007935, "learning_rate": 0.0003568674412284278, "loss": 3.664, "step": 75725 }, { "epoch": 5.145400190243239, "grad_norm": 1.169937252998352, "learning_rate": 0.00035682497621959507, "loss": 3.5185, "step": 75730 }, { "epoch": 5.145739910313901, "grad_norm": 0.9915021657943726, "learning_rate": 0.00035678251121076235, "loss": 3.3516, "step": 75735 }, { "epoch": 5.1460796303845635, "grad_norm": 0.7466602921485901, "learning_rate": 0.00035674004620192963, "loss": 3.3274, "step": 75740 }, { "epoch": 5.146419350455225, "grad_norm": 1.2071409225463867, "learning_rate": 0.0003566975811930969, "loss": 3.4037, "step": 75745 }, { "epoch": 5.146759070525887, "grad_norm": 0.9449678659439087, "learning_rate": 0.0003566551161842642, "loss": 3.6486, "step": 75750 }, { "epoch": 5.147098790596549, "grad_norm": 0.8937907218933105, "learning_rate": 0.00035661265117543147, "loss": 3.3399, "step": 75755 }, { "epoch": 5.14743851066721, "grad_norm": 1.1577504873275757, "learning_rate": 0.00035657018616659875, "loss": 3.3409, "step": 75760 }, { "epoch": 5.147778230737872, "grad_norm": 0.8591466546058655, "learning_rate": 0.000356527721157766, "loss": 3.8231, "step": 75765 }, { "epoch": 5.148117950808534, "grad_norm": 1.0236706733703613, "learning_rate": 0.0003564852561489333, "loss": 3.4876, "step": 75770 }, { "epoch": 5.148457670879195, "grad_norm": 1.023123025894165, "learning_rate": 0.0003564427911401006, "loss": 3.4156, "step": 75775 }, { "epoch": 5.1487973909498574, "grad_norm": 0.8643351793289185, "learning_rate": 0.0003564003261312678, "loss": 3.337, "step": 75780 }, { "epoch": 5.1491371110205195, "grad_norm": 1.390157699584961, "learning_rate": 0.00035635786112243515, "loss": 3.3757, "step": 75785 }, { "epoch": 5.149476831091181, "grad_norm": 0.8398975729942322, "learning_rate": 0.00035631539611360243, "loss": 3.4433, "step": 75790 }, { "epoch": 5.149816551161843, "grad_norm": 0.9058846831321716, "learning_rate": 0.00035627293110476965, "loss": 3.3929, "step": 75795 }, { "epoch": 5.150156271232504, "grad_norm": 0.7347093820571899, "learning_rate": 0.00035623046609593693, "loss": 3.4943, "step": 75800 }, { "epoch": 5.150495991303166, "grad_norm": 0.9572609066963196, "learning_rate": 0.00035618800108710427, "loss": 3.2051, "step": 75805 }, { "epoch": 5.150835711373828, "grad_norm": 0.859819233417511, "learning_rate": 0.0003561455360782715, "loss": 3.6448, "step": 75810 }, { "epoch": 5.151175431444489, "grad_norm": 0.9241170883178711, "learning_rate": 0.0003561030710694388, "loss": 3.2694, "step": 75815 }, { "epoch": 5.151515151515151, "grad_norm": 1.0995243787765503, "learning_rate": 0.0003560606060606061, "loss": 3.53, "step": 75820 }, { "epoch": 5.1518548715858135, "grad_norm": 1.004812240600586, "learning_rate": 0.00035601814105177333, "loss": 3.4095, "step": 75825 }, { "epoch": 5.152194591656475, "grad_norm": 0.8107056021690369, "learning_rate": 0.0003559756760429406, "loss": 3.6253, "step": 75830 }, { "epoch": 5.152534311727137, "grad_norm": 0.842532753944397, "learning_rate": 0.0003559332110341079, "loss": 3.6321, "step": 75835 }, { "epoch": 5.152874031797799, "grad_norm": 1.0183223485946655, "learning_rate": 0.0003558907460252752, "loss": 3.5184, "step": 75840 }, { "epoch": 5.15321375186846, "grad_norm": 0.8119643330574036, "learning_rate": 0.00035584828101644246, "loss": 3.2714, "step": 75845 }, { "epoch": 5.153553471939122, "grad_norm": 0.9546394944190979, "learning_rate": 0.00035580581600760974, "loss": 3.488, "step": 75850 }, { "epoch": 5.153893192009784, "grad_norm": 0.9778090715408325, "learning_rate": 0.000355763350998777, "loss": 3.4579, "step": 75855 }, { "epoch": 5.154232912080445, "grad_norm": 0.8450209498405457, "learning_rate": 0.0003557208859899443, "loss": 3.5419, "step": 75860 }, { "epoch": 5.154572632151107, "grad_norm": 0.9734489321708679, "learning_rate": 0.0003556784209811116, "loss": 3.4745, "step": 75865 }, { "epoch": 5.1549123522217695, "grad_norm": 0.895376443862915, "learning_rate": 0.0003556359559722788, "loss": 3.257, "step": 75870 }, { "epoch": 5.155252072292431, "grad_norm": 1.0125356912612915, "learning_rate": 0.00035559349096344614, "loss": 3.5082, "step": 75875 }, { "epoch": 5.155591792363093, "grad_norm": 1.5127090215682983, "learning_rate": 0.0003555510259546134, "loss": 3.3798, "step": 75880 }, { "epoch": 5.155931512433755, "grad_norm": 1.0071715116500854, "learning_rate": 0.00035550856094578064, "loss": 3.4963, "step": 75885 }, { "epoch": 5.156271232504416, "grad_norm": 0.9824620485305786, "learning_rate": 0.000355466095936948, "loss": 3.4602, "step": 75890 }, { "epoch": 5.156610952575078, "grad_norm": 0.9819023609161377, "learning_rate": 0.00035542363092811526, "loss": 3.2881, "step": 75895 }, { "epoch": 5.15695067264574, "grad_norm": 0.7317568063735962, "learning_rate": 0.00035538116591928254, "loss": 3.3952, "step": 75900 }, { "epoch": 5.157290392716401, "grad_norm": 0.8449393510818481, "learning_rate": 0.00035533870091044976, "loss": 3.3514, "step": 75905 }, { "epoch": 5.157630112787063, "grad_norm": 0.873768150806427, "learning_rate": 0.0003552962359016171, "loss": 3.4807, "step": 75910 }, { "epoch": 5.1579698328577255, "grad_norm": 0.8254892230033875, "learning_rate": 0.0003552537708927844, "loss": 2.9235, "step": 75915 }, { "epoch": 5.158309552928387, "grad_norm": 0.9388298988342285, "learning_rate": 0.0003552113058839516, "loss": 3.451, "step": 75920 }, { "epoch": 5.158649272999049, "grad_norm": 0.8015468120574951, "learning_rate": 0.00035516884087511894, "loss": 3.5578, "step": 75925 }, { "epoch": 5.158988993069711, "grad_norm": 0.827369749546051, "learning_rate": 0.0003551263758662862, "loss": 3.3029, "step": 75930 }, { "epoch": 5.159328713140372, "grad_norm": 1.0710492134094238, "learning_rate": 0.00035508391085745344, "loss": 3.3515, "step": 75935 }, { "epoch": 5.159668433211034, "grad_norm": 1.1144198179244995, "learning_rate": 0.0003550414458486207, "loss": 3.5147, "step": 75940 }, { "epoch": 5.160008153281696, "grad_norm": 0.9546234607696533, "learning_rate": 0.00035499898083978806, "loss": 3.4145, "step": 75945 }, { "epoch": 5.160347873352357, "grad_norm": 0.7645173668861389, "learning_rate": 0.0003549565158309553, "loss": 3.3884, "step": 75950 }, { "epoch": 5.160687593423019, "grad_norm": 0.8998569250106812, "learning_rate": 0.00035491405082212256, "loss": 3.2952, "step": 75955 }, { "epoch": 5.1610273134936815, "grad_norm": 1.0389199256896973, "learning_rate": 0.0003548715858132899, "loss": 3.5468, "step": 75960 }, { "epoch": 5.161367033564343, "grad_norm": 1.5395762920379639, "learning_rate": 0.0003548291208044571, "loss": 3.6489, "step": 75965 }, { "epoch": 5.161706753635005, "grad_norm": 0.9465786814689636, "learning_rate": 0.0003547866557956244, "loss": 3.3647, "step": 75970 }, { "epoch": 5.162046473705667, "grad_norm": 0.9340637922286987, "learning_rate": 0.0003547441907867917, "loss": 3.3644, "step": 75975 }, { "epoch": 5.162386193776328, "grad_norm": 0.7783132195472717, "learning_rate": 0.00035470172577795896, "loss": 3.6665, "step": 75980 }, { "epoch": 5.16272591384699, "grad_norm": 0.7712247967720032, "learning_rate": 0.00035465926076912624, "loss": 3.2758, "step": 75985 }, { "epoch": 5.163065633917652, "grad_norm": 0.8787266612052917, "learning_rate": 0.0003546167957602935, "loss": 3.4424, "step": 75990 }, { "epoch": 5.163405353988313, "grad_norm": 0.9124729633331299, "learning_rate": 0.0003545743307514608, "loss": 3.5243, "step": 75995 }, { "epoch": 5.163745074058975, "grad_norm": 0.6655544638633728, "learning_rate": 0.0003545318657426281, "loss": 3.67, "step": 76000 }, { "epoch": 5.1640847941296375, "grad_norm": 0.7486337423324585, "learning_rate": 0.00035448940073379536, "loss": 3.5914, "step": 76005 }, { "epoch": 5.164424514200299, "grad_norm": 1.1658858060836792, "learning_rate": 0.0003544469357249626, "loss": 3.3833, "step": 76010 }, { "epoch": 5.164764234270961, "grad_norm": 0.7355794310569763, "learning_rate": 0.0003544044707161299, "loss": 3.5292, "step": 76015 }, { "epoch": 5.165103954341623, "grad_norm": 1.1136115789413452, "learning_rate": 0.0003543620057072972, "loss": 3.6442, "step": 76020 }, { "epoch": 5.165443674412284, "grad_norm": 1.0001121759414673, "learning_rate": 0.00035431954069846443, "loss": 3.2544, "step": 76025 }, { "epoch": 5.165783394482946, "grad_norm": 1.1436100006103516, "learning_rate": 0.00035427707568963176, "loss": 3.2595, "step": 76030 }, { "epoch": 5.166123114553608, "grad_norm": 1.583150029182434, "learning_rate": 0.00035423461068079904, "loss": 3.5627, "step": 76035 }, { "epoch": 5.166462834624269, "grad_norm": 1.078120231628418, "learning_rate": 0.00035419214567196627, "loss": 3.4065, "step": 76040 }, { "epoch": 5.166802554694931, "grad_norm": 0.90113365650177, "learning_rate": 0.0003541496806631336, "loss": 3.3448, "step": 76045 }, { "epoch": 5.1671422747655935, "grad_norm": 0.9087955355644226, "learning_rate": 0.0003541072156543009, "loss": 3.5512, "step": 76050 }, { "epoch": 5.167481994836255, "grad_norm": 1.0898863077163696, "learning_rate": 0.0003540647506454681, "loss": 3.4231, "step": 76055 }, { "epoch": 5.167821714906917, "grad_norm": 0.9989370107650757, "learning_rate": 0.0003540222856366354, "loss": 3.2217, "step": 76060 }, { "epoch": 5.168161434977579, "grad_norm": 0.7573986053466797, "learning_rate": 0.0003539798206278027, "loss": 3.5368, "step": 76065 }, { "epoch": 5.16850115504824, "grad_norm": 0.8380848169326782, "learning_rate": 0.00035393735561897, "loss": 3.7962, "step": 76070 }, { "epoch": 5.168840875118902, "grad_norm": 1.0448063611984253, "learning_rate": 0.00035389489061013723, "loss": 3.3586, "step": 76075 }, { "epoch": 5.169180595189564, "grad_norm": 1.0887398719787598, "learning_rate": 0.00035385242560130456, "loss": 3.4509, "step": 76080 }, { "epoch": 5.169520315260225, "grad_norm": 1.0384142398834229, "learning_rate": 0.00035380996059247184, "loss": 3.608, "step": 76085 }, { "epoch": 5.1698600353308874, "grad_norm": 1.0547727346420288, "learning_rate": 0.00035376749558363907, "loss": 3.6586, "step": 76090 }, { "epoch": 5.1701997554015495, "grad_norm": 0.9284372329711914, "learning_rate": 0.00035372503057480635, "loss": 3.5019, "step": 76095 }, { "epoch": 5.170539475472211, "grad_norm": 0.981079638004303, "learning_rate": 0.0003536825655659737, "loss": 3.4747, "step": 76100 }, { "epoch": 5.170879195542873, "grad_norm": 0.8659055829048157, "learning_rate": 0.0003536401005571409, "loss": 3.4038, "step": 76105 }, { "epoch": 5.171218915613535, "grad_norm": 0.903406023979187, "learning_rate": 0.0003535976355483082, "loss": 3.3725, "step": 76110 }, { "epoch": 5.171558635684196, "grad_norm": 1.0536607503890991, "learning_rate": 0.0003535551705394755, "loss": 3.6094, "step": 76115 }, { "epoch": 5.171898355754858, "grad_norm": 0.8169236183166504, "learning_rate": 0.00035351270553064275, "loss": 3.6252, "step": 76120 }, { "epoch": 5.17223807582552, "grad_norm": 0.917647659778595, "learning_rate": 0.00035347024052181003, "loss": 3.1956, "step": 76125 }, { "epoch": 5.172577795896181, "grad_norm": 0.8692314624786377, "learning_rate": 0.0003534277755129773, "loss": 3.4552, "step": 76130 }, { "epoch": 5.1729175159668435, "grad_norm": 1.124666452407837, "learning_rate": 0.0003533853105041446, "loss": 3.5335, "step": 76135 }, { "epoch": 5.1732572360375055, "grad_norm": 0.7767394781112671, "learning_rate": 0.00035334284549531187, "loss": 3.2862, "step": 76140 }, { "epoch": 5.173596956108167, "grad_norm": 0.9920660853385925, "learning_rate": 0.00035330038048647915, "loss": 3.2546, "step": 76145 }, { "epoch": 5.173936676178829, "grad_norm": 0.7940046787261963, "learning_rate": 0.00035325791547764643, "loss": 3.3597, "step": 76150 }, { "epoch": 5.174276396249491, "grad_norm": 0.9845365285873413, "learning_rate": 0.0003532154504688137, "loss": 3.4329, "step": 76155 }, { "epoch": 5.174616116320152, "grad_norm": 0.8418992757797241, "learning_rate": 0.000353172985459981, "loss": 3.6057, "step": 76160 }, { "epoch": 5.174955836390814, "grad_norm": 0.9770155549049377, "learning_rate": 0.0003531305204511482, "loss": 3.474, "step": 76165 }, { "epoch": 5.175295556461475, "grad_norm": 0.8373923897743225, "learning_rate": 0.00035308805544231555, "loss": 3.2185, "step": 76170 }, { "epoch": 5.175635276532137, "grad_norm": 0.9353631734848022, "learning_rate": 0.00035304559043348283, "loss": 3.5639, "step": 76175 }, { "epoch": 5.1759749966027995, "grad_norm": 0.9114130735397339, "learning_rate": 0.00035300312542465006, "loss": 3.5495, "step": 76180 }, { "epoch": 5.176314716673461, "grad_norm": 0.7746999263763428, "learning_rate": 0.0003529606604158174, "loss": 3.6279, "step": 76185 }, { "epoch": 5.176654436744123, "grad_norm": 1.0255323648452759, "learning_rate": 0.00035291819540698467, "loss": 3.283, "step": 76190 }, { "epoch": 5.176994156814785, "grad_norm": 0.9495078325271606, "learning_rate": 0.0003528757303981519, "loss": 3.5158, "step": 76195 }, { "epoch": 5.177333876885446, "grad_norm": 0.9692737460136414, "learning_rate": 0.0003528332653893192, "loss": 3.7453, "step": 76200 }, { "epoch": 5.177673596956108, "grad_norm": 1.1697654724121094, "learning_rate": 0.0003527908003804865, "loss": 3.3254, "step": 76205 }, { "epoch": 5.17801331702677, "grad_norm": 0.97832852602005, "learning_rate": 0.00035274833537165374, "loss": 3.548, "step": 76210 }, { "epoch": 5.178353037097431, "grad_norm": 1.0987861156463623, "learning_rate": 0.000352705870362821, "loss": 3.5084, "step": 76215 }, { "epoch": 5.178692757168093, "grad_norm": 0.7683600187301636, "learning_rate": 0.00035266340535398835, "loss": 3.8778, "step": 76220 }, { "epoch": 5.1790324772387555, "grad_norm": 0.8573612570762634, "learning_rate": 0.0003526209403451556, "loss": 3.7402, "step": 76225 }, { "epoch": 5.179372197309417, "grad_norm": 0.8639705181121826, "learning_rate": 0.00035257847533632286, "loss": 3.3174, "step": 76230 }, { "epoch": 5.179711917380079, "grad_norm": 0.909771203994751, "learning_rate": 0.00035253601032749014, "loss": 3.1867, "step": 76235 }, { "epoch": 5.180051637450741, "grad_norm": 0.912753701210022, "learning_rate": 0.0003524935453186575, "loss": 3.5072, "step": 76240 }, { "epoch": 5.180391357521402, "grad_norm": 0.7627174854278564, "learning_rate": 0.0003524510803098247, "loss": 3.4902, "step": 76245 }, { "epoch": 5.180731077592064, "grad_norm": 0.8061987161636353, "learning_rate": 0.000352408615300992, "loss": 3.5993, "step": 76250 }, { "epoch": 5.181070797662726, "grad_norm": 0.9024948477745056, "learning_rate": 0.0003523661502921593, "loss": 3.2906, "step": 76255 }, { "epoch": 5.181410517733387, "grad_norm": 0.9783357977867126, "learning_rate": 0.00035232368528332654, "loss": 3.4517, "step": 76260 }, { "epoch": 5.181750237804049, "grad_norm": 1.037613868713379, "learning_rate": 0.0003522812202744938, "loss": 3.5339, "step": 76265 }, { "epoch": 5.1820899578747115, "grad_norm": 0.9578565955162048, "learning_rate": 0.0003522387552656611, "loss": 3.413, "step": 76270 }, { "epoch": 5.182429677945373, "grad_norm": 0.7837210297584534, "learning_rate": 0.0003521962902568284, "loss": 3.5118, "step": 76275 }, { "epoch": 5.182769398016035, "grad_norm": 0.910256028175354, "learning_rate": 0.00035215382524799566, "loss": 3.4385, "step": 76280 }, { "epoch": 5.183109118086697, "grad_norm": 1.1504288911819458, "learning_rate": 0.00035211136023916294, "loss": 3.3793, "step": 76285 }, { "epoch": 5.183448838157358, "grad_norm": 0.7109076976776123, "learning_rate": 0.0003520688952303302, "loss": 3.6799, "step": 76290 }, { "epoch": 5.18378855822802, "grad_norm": 0.9681477546691895, "learning_rate": 0.0003520264302214975, "loss": 3.0246, "step": 76295 }, { "epoch": 5.184128278298682, "grad_norm": 1.149387001991272, "learning_rate": 0.0003519839652126648, "loss": 3.4356, "step": 76300 }, { "epoch": 5.184467998369343, "grad_norm": 0.9286701083183289, "learning_rate": 0.000351941500203832, "loss": 3.759, "step": 76305 }, { "epoch": 5.184807718440005, "grad_norm": 0.9569326639175415, "learning_rate": 0.00035189903519499934, "loss": 3.4523, "step": 76310 }, { "epoch": 5.1851474385106675, "grad_norm": 0.8626444935798645, "learning_rate": 0.0003518565701861666, "loss": 3.4732, "step": 76315 }, { "epoch": 5.185487158581329, "grad_norm": 0.914947509765625, "learning_rate": 0.00035181410517733385, "loss": 3.4052, "step": 76320 }, { "epoch": 5.185826878651991, "grad_norm": 1.0069433450698853, "learning_rate": 0.0003517716401685012, "loss": 3.3944, "step": 76325 }, { "epoch": 5.186166598722653, "grad_norm": 1.0876588821411133, "learning_rate": 0.00035172917515966846, "loss": 3.4808, "step": 76330 }, { "epoch": 5.186506318793314, "grad_norm": 0.8869509100914001, "learning_rate": 0.0003516867101508357, "loss": 3.4608, "step": 76335 }, { "epoch": 5.186846038863976, "grad_norm": 1.1144438982009888, "learning_rate": 0.000351644245142003, "loss": 3.374, "step": 76340 }, { "epoch": 5.187185758934638, "grad_norm": 0.8701178431510925, "learning_rate": 0.0003516017801331703, "loss": 3.5223, "step": 76345 }, { "epoch": 5.187525479005299, "grad_norm": 0.9228264689445496, "learning_rate": 0.0003515593151243375, "loss": 3.4482, "step": 76350 }, { "epoch": 5.187865199075961, "grad_norm": 0.9200606942176819, "learning_rate": 0.0003515168501155048, "loss": 3.5946, "step": 76355 }, { "epoch": 5.1882049191466235, "grad_norm": 0.8377779722213745, "learning_rate": 0.00035147438510667214, "loss": 3.316, "step": 76360 }, { "epoch": 5.188544639217285, "grad_norm": 0.7952613234519958, "learning_rate": 0.00035143192009783937, "loss": 3.3757, "step": 76365 }, { "epoch": 5.188884359287947, "grad_norm": 0.7033195495605469, "learning_rate": 0.00035138945508900665, "loss": 3.6887, "step": 76370 }, { "epoch": 5.189224079358609, "grad_norm": 0.8251524567604065, "learning_rate": 0.000351346990080174, "loss": 3.3597, "step": 76375 }, { "epoch": 5.18956379942927, "grad_norm": 0.8204817175865173, "learning_rate": 0.0003513045250713412, "loss": 3.2293, "step": 76380 }, { "epoch": 5.189903519499932, "grad_norm": 0.9454706311225891, "learning_rate": 0.0003512620600625085, "loss": 3.5732, "step": 76385 }, { "epoch": 5.190243239570594, "grad_norm": 0.9697344303131104, "learning_rate": 0.00035121959505367577, "loss": 3.5723, "step": 76390 }, { "epoch": 5.190582959641255, "grad_norm": 0.9776793718338013, "learning_rate": 0.00035117713004484305, "loss": 3.7623, "step": 76395 }, { "epoch": 5.1909226797119175, "grad_norm": 0.8809166550636292, "learning_rate": 0.0003511346650360103, "loss": 3.3736, "step": 76400 }, { "epoch": 5.1912623997825795, "grad_norm": 0.8176227807998657, "learning_rate": 0.0003510922000271776, "loss": 3.3903, "step": 76405 }, { "epoch": 5.191602119853241, "grad_norm": 0.901653528213501, "learning_rate": 0.00035104973501834494, "loss": 3.582, "step": 76410 }, { "epoch": 5.191941839923903, "grad_norm": 0.8055301308631897, "learning_rate": 0.00035100727000951217, "loss": 3.2316, "step": 76415 }, { "epoch": 5.192281559994565, "grad_norm": 0.7280771136283875, "learning_rate": 0.00035096480500067945, "loss": 3.1486, "step": 76420 }, { "epoch": 5.192621280065226, "grad_norm": 0.73358154296875, "learning_rate": 0.00035092233999184673, "loss": 3.7185, "step": 76425 }, { "epoch": 5.192961000135888, "grad_norm": 0.9224925637245178, "learning_rate": 0.000350879874983014, "loss": 3.3484, "step": 76430 }, { "epoch": 5.19330072020655, "grad_norm": 0.7639920711517334, "learning_rate": 0.0003508374099741813, "loss": 3.387, "step": 76435 }, { "epoch": 5.193640440277211, "grad_norm": 0.8350674510002136, "learning_rate": 0.00035079494496534857, "loss": 3.4073, "step": 76440 }, { "epoch": 5.1939801603478735, "grad_norm": 0.9176893830299377, "learning_rate": 0.00035075247995651585, "loss": 3.7313, "step": 76445 }, { "epoch": 5.1943198804185355, "grad_norm": 0.7075564861297607, "learning_rate": 0.00035071001494768313, "loss": 3.6113, "step": 76450 }, { "epoch": 5.194659600489197, "grad_norm": 0.9309481978416443, "learning_rate": 0.0003506675499388504, "loss": 3.5021, "step": 76455 }, { "epoch": 5.194999320559859, "grad_norm": 0.7149673104286194, "learning_rate": 0.00035062508493001763, "loss": 3.6009, "step": 76460 }, { "epoch": 5.195339040630521, "grad_norm": 0.8282140493392944, "learning_rate": 0.00035058261992118497, "loss": 3.5054, "step": 76465 }, { "epoch": 5.195678760701182, "grad_norm": 0.8848045468330383, "learning_rate": 0.00035054015491235225, "loss": 3.4156, "step": 76470 }, { "epoch": 5.196018480771844, "grad_norm": 0.9471142292022705, "learning_rate": 0.0003504976899035195, "loss": 3.5037, "step": 76475 }, { "epoch": 5.196358200842505, "grad_norm": 0.7341698408126831, "learning_rate": 0.0003504552248946868, "loss": 3.539, "step": 76480 }, { "epoch": 5.196697920913167, "grad_norm": 0.9290366768836975, "learning_rate": 0.0003504127598858541, "loss": 3.52, "step": 76485 }, { "epoch": 5.1970376409838295, "grad_norm": 0.8360311388969421, "learning_rate": 0.0003503702948770213, "loss": 3.5625, "step": 76490 }, { "epoch": 5.197377361054491, "grad_norm": 1.1073541641235352, "learning_rate": 0.0003503278298681886, "loss": 3.3813, "step": 76495 }, { "epoch": 5.197717081125153, "grad_norm": 1.1822370290756226, "learning_rate": 0.00035028536485935593, "loss": 3.4282, "step": 76500 }, { "epoch": 5.198056801195815, "grad_norm": 0.9698997139930725, "learning_rate": 0.00035024289985052315, "loss": 3.3957, "step": 76505 }, { "epoch": 5.198396521266476, "grad_norm": 0.9957197904586792, "learning_rate": 0.00035020043484169043, "loss": 3.449, "step": 76510 }, { "epoch": 5.198736241337138, "grad_norm": 0.7667668461799622, "learning_rate": 0.00035015796983285777, "loss": 3.769, "step": 76515 }, { "epoch": 5.1990759614078, "grad_norm": 0.6687655448913574, "learning_rate": 0.000350115504824025, "loss": 3.3098, "step": 76520 }, { "epoch": 5.199415681478461, "grad_norm": 1.1261318922042847, "learning_rate": 0.0003500730398151923, "loss": 3.458, "step": 76525 }, { "epoch": 5.199755401549123, "grad_norm": 0.70208340883255, "learning_rate": 0.00035003057480635955, "loss": 3.4758, "step": 76530 }, { "epoch": 5.2000951216197855, "grad_norm": 0.9253193736076355, "learning_rate": 0.00034998810979752683, "loss": 3.7037, "step": 76535 }, { "epoch": 5.200434841690447, "grad_norm": 1.0528656244277954, "learning_rate": 0.0003499456447886941, "loss": 3.5488, "step": 76540 }, { "epoch": 5.200774561761109, "grad_norm": 0.9178515672683716, "learning_rate": 0.0003499031797798614, "loss": 3.4896, "step": 76545 }, { "epoch": 5.201114281831771, "grad_norm": 1.137354850769043, "learning_rate": 0.0003498607147710287, "loss": 3.4848, "step": 76550 }, { "epoch": 5.201454001902432, "grad_norm": 0.8029659390449524, "learning_rate": 0.00034981824976219596, "loss": 3.2579, "step": 76555 }, { "epoch": 5.201793721973094, "grad_norm": 0.9434231519699097, "learning_rate": 0.00034977578475336324, "loss": 3.431, "step": 76560 }, { "epoch": 5.202133442043756, "grad_norm": 0.945254385471344, "learning_rate": 0.00034973331974453046, "loss": 3.465, "step": 76565 }, { "epoch": 5.202473162114417, "grad_norm": 1.1073884963989258, "learning_rate": 0.0003496908547356978, "loss": 3.3489, "step": 76570 }, { "epoch": 5.202812882185079, "grad_norm": 0.787460446357727, "learning_rate": 0.0003496483897268651, "loss": 3.579, "step": 76575 }, { "epoch": 5.2031526022557415, "grad_norm": 0.9890434741973877, "learning_rate": 0.00034960592471803236, "loss": 3.6738, "step": 76580 }, { "epoch": 5.203492322326403, "grad_norm": 0.7592213749885559, "learning_rate": 0.00034956345970919964, "loss": 3.24, "step": 76585 }, { "epoch": 5.203832042397065, "grad_norm": 0.9535008668899536, "learning_rate": 0.0003495209947003669, "loss": 3.4982, "step": 76590 }, { "epoch": 5.204171762467727, "grad_norm": 0.7897887825965881, "learning_rate": 0.0003494785296915342, "loss": 3.3998, "step": 76595 }, { "epoch": 5.204511482538388, "grad_norm": 0.931279182434082, "learning_rate": 0.0003494360646827015, "loss": 3.5603, "step": 76600 }, { "epoch": 5.20485120260905, "grad_norm": 0.8753998875617981, "learning_rate": 0.00034939359967386876, "loss": 3.2714, "step": 76605 }, { "epoch": 5.205190922679712, "grad_norm": 0.8961510062217712, "learning_rate": 0.00034935113466503604, "loss": 3.3529, "step": 76610 }, { "epoch": 5.205530642750373, "grad_norm": 0.928054690361023, "learning_rate": 0.00034930866965620326, "loss": 3.5166, "step": 76615 }, { "epoch": 5.205870362821035, "grad_norm": 0.8334048986434937, "learning_rate": 0.0003492662046473706, "loss": 3.5691, "step": 76620 }, { "epoch": 5.2062100828916975, "grad_norm": 1.1574712991714478, "learning_rate": 0.0003492237396385379, "loss": 3.6802, "step": 76625 }, { "epoch": 5.206549802962359, "grad_norm": 0.8638137578964233, "learning_rate": 0.0003491812746297051, "loss": 3.4555, "step": 76630 }, { "epoch": 5.206889523033021, "grad_norm": 0.9776730537414551, "learning_rate": 0.00034913880962087244, "loss": 3.642, "step": 76635 }, { "epoch": 5.207229243103683, "grad_norm": 0.8237279653549194, "learning_rate": 0.0003490963446120397, "loss": 3.4523, "step": 76640 }, { "epoch": 5.207568963174344, "grad_norm": 0.908808171749115, "learning_rate": 0.00034905387960320694, "loss": 3.2078, "step": 76645 }, { "epoch": 5.207908683245006, "grad_norm": 0.9231775999069214, "learning_rate": 0.0003490114145943742, "loss": 3.3212, "step": 76650 }, { "epoch": 5.208248403315668, "grad_norm": 0.8304939866065979, "learning_rate": 0.00034896894958554156, "loss": 3.6634, "step": 76655 }, { "epoch": 5.208588123386329, "grad_norm": 0.8265734910964966, "learning_rate": 0.0003489264845767088, "loss": 3.2778, "step": 76660 }, { "epoch": 5.2089278434569914, "grad_norm": 0.8625829219818115, "learning_rate": 0.00034888401956787606, "loss": 3.5005, "step": 76665 }, { "epoch": 5.2092675635276535, "grad_norm": 0.9527860283851624, "learning_rate": 0.0003488415545590434, "loss": 3.2424, "step": 76670 }, { "epoch": 5.209607283598315, "grad_norm": 0.8635834455490112, "learning_rate": 0.0003487990895502106, "loss": 3.4766, "step": 76675 }, { "epoch": 5.209947003668977, "grad_norm": 0.8672298789024353, "learning_rate": 0.0003487566245413779, "loss": 3.4566, "step": 76680 }, { "epoch": 5.210286723739639, "grad_norm": 0.9842963814735413, "learning_rate": 0.0003487141595325452, "loss": 3.3418, "step": 76685 }, { "epoch": 5.2106264438103, "grad_norm": 0.9216187000274658, "learning_rate": 0.00034867169452371246, "loss": 3.2503, "step": 76690 }, { "epoch": 5.210966163880962, "grad_norm": 0.8554538488388062, "learning_rate": 0.00034862922951487974, "loss": 3.4797, "step": 76695 }, { "epoch": 5.211305883951624, "grad_norm": 0.7324966192245483, "learning_rate": 0.000348586764506047, "loss": 3.5288, "step": 76700 }, { "epoch": 5.211645604022285, "grad_norm": 0.9857914447784424, "learning_rate": 0.0003485442994972143, "loss": 3.3858, "step": 76705 }, { "epoch": 5.2119853240929475, "grad_norm": 0.9953253269195557, "learning_rate": 0.0003485018344883816, "loss": 3.4752, "step": 76710 }, { "epoch": 5.2123250441636095, "grad_norm": 1.0670654773712158, "learning_rate": 0.00034845936947954886, "loss": 3.3798, "step": 76715 }, { "epoch": 5.212664764234271, "grad_norm": 0.9089499115943909, "learning_rate": 0.0003484169044707161, "loss": 3.6627, "step": 76720 }, { "epoch": 5.213004484304933, "grad_norm": 0.7362671494483948, "learning_rate": 0.0003483744394618834, "loss": 3.55, "step": 76725 }, { "epoch": 5.213344204375595, "grad_norm": 1.0961148738861084, "learning_rate": 0.0003483319744530507, "loss": 3.4246, "step": 76730 }, { "epoch": 5.213683924446256, "grad_norm": 0.8753455281257629, "learning_rate": 0.00034828950944421793, "loss": 3.5948, "step": 76735 }, { "epoch": 5.214023644516918, "grad_norm": 1.0000290870666504, "learning_rate": 0.00034824704443538526, "loss": 3.4863, "step": 76740 }, { "epoch": 5.21436336458758, "grad_norm": 0.9124593734741211, "learning_rate": 0.00034820457942655254, "loss": 3.4278, "step": 76745 }, { "epoch": 5.214703084658241, "grad_norm": 1.09718656539917, "learning_rate": 0.0003481621144177198, "loss": 3.2415, "step": 76750 }, { "epoch": 5.2150428047289035, "grad_norm": 1.1463552713394165, "learning_rate": 0.00034811964940888705, "loss": 3.2756, "step": 76755 }, { "epoch": 5.2153825247995655, "grad_norm": 0.8576882481575012, "learning_rate": 0.0003480771844000544, "loss": 3.4802, "step": 76760 }, { "epoch": 5.215722244870227, "grad_norm": 1.0838369131088257, "learning_rate": 0.00034803471939122166, "loss": 3.4458, "step": 76765 }, { "epoch": 5.216061964940889, "grad_norm": 1.0962196588516235, "learning_rate": 0.0003479922543823889, "loss": 3.3023, "step": 76770 }, { "epoch": 5.216401685011551, "grad_norm": 0.8995252251625061, "learning_rate": 0.0003479497893735562, "loss": 3.346, "step": 76775 }, { "epoch": 5.216741405082212, "grad_norm": 0.9401818513870239, "learning_rate": 0.0003479073243647235, "loss": 3.3527, "step": 76780 }, { "epoch": 5.217081125152874, "grad_norm": 1.015880823135376, "learning_rate": 0.00034786485935589073, "loss": 3.5075, "step": 76785 }, { "epoch": 5.217420845223536, "grad_norm": 1.1867128610610962, "learning_rate": 0.000347822394347058, "loss": 3.7586, "step": 76790 }, { "epoch": 5.217760565294197, "grad_norm": 1.0693485736846924, "learning_rate": 0.00034777992933822534, "loss": 3.2909, "step": 76795 }, { "epoch": 5.2181002853648595, "grad_norm": 0.9281983375549316, "learning_rate": 0.00034773746432939257, "loss": 3.3984, "step": 76800 }, { "epoch": 5.2184400054355216, "grad_norm": 0.6397440433502197, "learning_rate": 0.00034769499932055985, "loss": 3.6465, "step": 76805 }, { "epoch": 5.218779725506183, "grad_norm": 0.805755615234375, "learning_rate": 0.0003476525343117272, "loss": 3.3819, "step": 76810 }, { "epoch": 5.219119445576845, "grad_norm": 0.9991214275360107, "learning_rate": 0.0003476100693028944, "loss": 3.4524, "step": 76815 }, { "epoch": 5.219459165647507, "grad_norm": 0.8811479210853577, "learning_rate": 0.0003475676042940617, "loss": 3.3575, "step": 76820 }, { "epoch": 5.219798885718168, "grad_norm": 0.809653639793396, "learning_rate": 0.00034752513928522897, "loss": 3.5167, "step": 76825 }, { "epoch": 5.22013860578883, "grad_norm": 0.8513479828834534, "learning_rate": 0.00034748267427639625, "loss": 3.3427, "step": 76830 }, { "epoch": 5.220478325859492, "grad_norm": 0.847898006439209, "learning_rate": 0.00034744020926756353, "loss": 3.2258, "step": 76835 }, { "epoch": 5.220818045930153, "grad_norm": 1.0589715242385864, "learning_rate": 0.0003473977442587308, "loss": 3.4146, "step": 76840 }, { "epoch": 5.2211577660008155, "grad_norm": 0.682145357131958, "learning_rate": 0.0003473552792498981, "loss": 3.3282, "step": 76845 }, { "epoch": 5.221497486071477, "grad_norm": 0.9379629492759705, "learning_rate": 0.00034731281424106537, "loss": 3.5568, "step": 76850 }, { "epoch": 5.221837206142139, "grad_norm": 0.8612335324287415, "learning_rate": 0.00034727034923223265, "loss": 3.3878, "step": 76855 }, { "epoch": 5.222176926212801, "grad_norm": 1.0778228044509888, "learning_rate": 0.0003472278842233999, "loss": 3.6026, "step": 76860 }, { "epoch": 5.222516646283462, "grad_norm": 1.1476625204086304, "learning_rate": 0.0003471854192145672, "loss": 3.5481, "step": 76865 }, { "epoch": 5.222856366354124, "grad_norm": 0.8820012211799622, "learning_rate": 0.0003471429542057345, "loss": 3.5452, "step": 76870 }, { "epoch": 5.223196086424786, "grad_norm": 0.9331788420677185, "learning_rate": 0.0003471004891969017, "loss": 3.4671, "step": 76875 }, { "epoch": 5.223535806495447, "grad_norm": 1.0273224115371704, "learning_rate": 0.00034705802418806905, "loss": 3.2284, "step": 76880 }, { "epoch": 5.223875526566109, "grad_norm": 0.8899880647659302, "learning_rate": 0.00034701555917923633, "loss": 3.3736, "step": 76885 }, { "epoch": 5.2242152466367715, "grad_norm": 0.7657882571220398, "learning_rate": 0.00034697309417040356, "loss": 3.6028, "step": 76890 }, { "epoch": 5.224554966707433, "grad_norm": 0.9197964668273926, "learning_rate": 0.0003469306291615709, "loss": 3.248, "step": 76895 }, { "epoch": 5.224894686778095, "grad_norm": 1.0294588804244995, "learning_rate": 0.00034688816415273817, "loss": 3.4955, "step": 76900 }, { "epoch": 5.225234406848757, "grad_norm": 0.8837618231773376, "learning_rate": 0.0003468456991439054, "loss": 3.6551, "step": 76905 }, { "epoch": 5.225574126919418, "grad_norm": 0.8975844383239746, "learning_rate": 0.0003468032341350727, "loss": 3.4292, "step": 76910 }, { "epoch": 5.22591384699008, "grad_norm": 0.9473903775215149, "learning_rate": 0.00034676076912624, "loss": 3.4393, "step": 76915 }, { "epoch": 5.226253567060742, "grad_norm": 1.0238534212112427, "learning_rate": 0.0003467183041174073, "loss": 3.5072, "step": 76920 }, { "epoch": 5.226593287131403, "grad_norm": 0.8331368565559387, "learning_rate": 0.0003466758391085745, "loss": 3.5172, "step": 76925 }, { "epoch": 5.226933007202065, "grad_norm": 1.4924378395080566, "learning_rate": 0.00034663337409974185, "loss": 3.6588, "step": 76930 }, { "epoch": 5.2272727272727275, "grad_norm": 0.8971142768859863, "learning_rate": 0.00034659090909090913, "loss": 3.2659, "step": 76935 }, { "epoch": 5.227612447343389, "grad_norm": 0.7911790609359741, "learning_rate": 0.00034654844408207636, "loss": 3.3704, "step": 76940 }, { "epoch": 5.227952167414051, "grad_norm": 0.837675154209137, "learning_rate": 0.00034650597907324364, "loss": 3.5078, "step": 76945 }, { "epoch": 5.228291887484713, "grad_norm": 0.8000457882881165, "learning_rate": 0.00034646351406441097, "loss": 3.4418, "step": 76950 }, { "epoch": 5.228631607555374, "grad_norm": 0.750470757484436, "learning_rate": 0.0003464210490555782, "loss": 3.5679, "step": 76955 }, { "epoch": 5.228971327626036, "grad_norm": 0.9276978373527527, "learning_rate": 0.0003463785840467455, "loss": 3.4533, "step": 76960 }, { "epoch": 5.229311047696698, "grad_norm": 0.8411605358123779, "learning_rate": 0.0003463361190379128, "loss": 3.4244, "step": 76965 }, { "epoch": 5.229650767767359, "grad_norm": 0.9118165969848633, "learning_rate": 0.00034629365402908004, "loss": 3.5258, "step": 76970 }, { "epoch": 5.2299904878380215, "grad_norm": 1.0030111074447632, "learning_rate": 0.0003462511890202473, "loss": 3.6466, "step": 76975 }, { "epoch": 5.2303302079086835, "grad_norm": 0.8474104404449463, "learning_rate": 0.0003462087240114146, "loss": 3.5637, "step": 76980 }, { "epoch": 5.230669927979345, "grad_norm": 0.94627445936203, "learning_rate": 0.0003461662590025819, "loss": 3.372, "step": 76985 }, { "epoch": 5.231009648050007, "grad_norm": 1.0711040496826172, "learning_rate": 0.00034612379399374916, "loss": 3.3669, "step": 76990 }, { "epoch": 5.231349368120669, "grad_norm": 0.7858566045761108, "learning_rate": 0.00034608132898491644, "loss": 3.5099, "step": 76995 }, { "epoch": 5.23168908819133, "grad_norm": 0.8284093141555786, "learning_rate": 0.0003460388639760837, "loss": 3.4334, "step": 77000 }, { "epoch": 5.232028808261992, "grad_norm": 0.8450635671615601, "learning_rate": 0.000345996398967251, "loss": 3.4288, "step": 77005 }, { "epoch": 5.232368528332654, "grad_norm": 0.9665061831474304, "learning_rate": 0.0003459539339584183, "loss": 3.2788, "step": 77010 }, { "epoch": 5.232708248403315, "grad_norm": 0.8939473032951355, "learning_rate": 0.0003459114689495855, "loss": 3.3627, "step": 77015 }, { "epoch": 5.2330479684739775, "grad_norm": 1.0376514196395874, "learning_rate": 0.00034586900394075284, "loss": 3.403, "step": 77020 }, { "epoch": 5.2333876885446395, "grad_norm": 0.913378894329071, "learning_rate": 0.0003458265389319201, "loss": 3.5094, "step": 77025 }, { "epoch": 5.233727408615301, "grad_norm": 0.8235164284706116, "learning_rate": 0.00034578407392308735, "loss": 3.5257, "step": 77030 }, { "epoch": 5.234067128685963, "grad_norm": 1.1945749521255493, "learning_rate": 0.0003457416089142547, "loss": 3.5233, "step": 77035 }, { "epoch": 5.234406848756625, "grad_norm": 0.8422659039497375, "learning_rate": 0.00034569914390542196, "loss": 3.5116, "step": 77040 }, { "epoch": 5.234746568827286, "grad_norm": 0.9381842017173767, "learning_rate": 0.0003456566788965892, "loss": 3.4176, "step": 77045 }, { "epoch": 5.235086288897948, "grad_norm": 0.8261088728904724, "learning_rate": 0.00034561421388775647, "loss": 3.5836, "step": 77050 }, { "epoch": 5.23542600896861, "grad_norm": 0.8078585863113403, "learning_rate": 0.0003455717488789238, "loss": 3.409, "step": 77055 }, { "epoch": 5.235765729039271, "grad_norm": 0.9065710306167603, "learning_rate": 0.000345529283870091, "loss": 3.3325, "step": 77060 }, { "epoch": 5.2361054491099335, "grad_norm": 1.1779924631118774, "learning_rate": 0.0003454868188612583, "loss": 3.5113, "step": 77065 }, { "epoch": 5.2364451691805955, "grad_norm": 0.871133029460907, "learning_rate": 0.00034544435385242564, "loss": 3.5837, "step": 77070 }, { "epoch": 5.236784889251257, "grad_norm": 0.830227255821228, "learning_rate": 0.00034540188884359287, "loss": 3.4585, "step": 77075 }, { "epoch": 5.237124609321919, "grad_norm": 0.8452599048614502, "learning_rate": 0.00034535942383476015, "loss": 3.4631, "step": 77080 }, { "epoch": 5.237464329392581, "grad_norm": 1.071105718612671, "learning_rate": 0.0003453169588259274, "loss": 3.6018, "step": 77085 }, { "epoch": 5.237804049463242, "grad_norm": 1.0948405265808105, "learning_rate": 0.00034527449381709476, "loss": 3.444, "step": 77090 }, { "epoch": 5.238143769533904, "grad_norm": 0.9461437463760376, "learning_rate": 0.000345232028808262, "loss": 3.6905, "step": 77095 }, { "epoch": 5.238483489604566, "grad_norm": 0.7600189447402954, "learning_rate": 0.00034518956379942927, "loss": 3.543, "step": 77100 }, { "epoch": 5.238823209675227, "grad_norm": 0.882175087928772, "learning_rate": 0.0003451470987905966, "loss": 3.3845, "step": 77105 }, { "epoch": 5.2391629297458895, "grad_norm": 1.045780897140503, "learning_rate": 0.0003451046337817638, "loss": 3.4564, "step": 77110 }, { "epoch": 5.239502649816552, "grad_norm": 0.9692131876945496, "learning_rate": 0.0003450621687729311, "loss": 3.3799, "step": 77115 }, { "epoch": 5.239842369887213, "grad_norm": 0.8713414669036865, "learning_rate": 0.0003450197037640984, "loss": 3.5173, "step": 77120 }, { "epoch": 5.240182089957875, "grad_norm": 0.8626394867897034, "learning_rate": 0.00034497723875526567, "loss": 3.3149, "step": 77125 }, { "epoch": 5.240521810028537, "grad_norm": 0.9501742124557495, "learning_rate": 0.00034493477374643295, "loss": 3.5311, "step": 77130 }, { "epoch": 5.240861530099198, "grad_norm": 1.1224428415298462, "learning_rate": 0.00034489230873760023, "loss": 3.4602, "step": 77135 }, { "epoch": 5.24120125016986, "grad_norm": 0.9698891043663025, "learning_rate": 0.0003448498437287675, "loss": 3.6282, "step": 77140 }, { "epoch": 5.241540970240522, "grad_norm": 0.8749942779541016, "learning_rate": 0.0003448073787199348, "loss": 3.4227, "step": 77145 }, { "epoch": 5.241880690311183, "grad_norm": 0.8608420491218567, "learning_rate": 0.00034476491371110207, "loss": 3.6252, "step": 77150 }, { "epoch": 5.2422204103818455, "grad_norm": 0.7576118111610413, "learning_rate": 0.0003447224487022693, "loss": 3.3753, "step": 77155 }, { "epoch": 5.242560130452507, "grad_norm": 0.7377164363861084, "learning_rate": 0.00034467998369343663, "loss": 3.3654, "step": 77160 }, { "epoch": 5.242899850523169, "grad_norm": 0.7708435654640198, "learning_rate": 0.0003446375186846039, "loss": 3.5324, "step": 77165 }, { "epoch": 5.243239570593831, "grad_norm": 0.8772801160812378, "learning_rate": 0.00034459505367577113, "loss": 3.5919, "step": 77170 }, { "epoch": 5.243579290664492, "grad_norm": 1.2384616136550903, "learning_rate": 0.00034455258866693847, "loss": 3.3026, "step": 77175 }, { "epoch": 5.243919010735154, "grad_norm": 1.0649309158325195, "learning_rate": 0.00034451012365810575, "loss": 3.5133, "step": 77180 }, { "epoch": 5.244258730805816, "grad_norm": 0.8944038152694702, "learning_rate": 0.000344467658649273, "loss": 3.4353, "step": 77185 }, { "epoch": 5.244598450876477, "grad_norm": 0.8924391865730286, "learning_rate": 0.0003444251936404403, "loss": 3.1274, "step": 77190 }, { "epoch": 5.244938170947139, "grad_norm": 0.8794419765472412, "learning_rate": 0.0003443827286316076, "loss": 3.6789, "step": 77195 }, { "epoch": 5.2452778910178015, "grad_norm": 0.7768568992614746, "learning_rate": 0.0003443402636227748, "loss": 3.4674, "step": 77200 }, { "epoch": 5.245617611088463, "grad_norm": 0.850680947303772, "learning_rate": 0.0003442977986139421, "loss": 3.5749, "step": 77205 }, { "epoch": 5.245957331159125, "grad_norm": 0.9409108757972717, "learning_rate": 0.00034425533360510943, "loss": 3.2891, "step": 77210 }, { "epoch": 5.246297051229787, "grad_norm": 0.8805908560752869, "learning_rate": 0.00034421286859627665, "loss": 3.3258, "step": 77215 }, { "epoch": 5.246636771300448, "grad_norm": 0.9379807710647583, "learning_rate": 0.00034417040358744393, "loss": 3.585, "step": 77220 }, { "epoch": 5.24697649137111, "grad_norm": 0.8435922861099243, "learning_rate": 0.00034412793857861127, "loss": 3.298, "step": 77225 }, { "epoch": 5.247316211441772, "grad_norm": 1.1961607933044434, "learning_rate": 0.0003440854735697785, "loss": 3.5345, "step": 77230 }, { "epoch": 5.247655931512433, "grad_norm": 0.7640591263771057, "learning_rate": 0.0003440430085609458, "loss": 3.3496, "step": 77235 }, { "epoch": 5.2479956515830954, "grad_norm": 1.0892095565795898, "learning_rate": 0.00034400054355211305, "loss": 3.5472, "step": 77240 }, { "epoch": 5.2483353716537575, "grad_norm": 0.9508739709854126, "learning_rate": 0.00034395807854328033, "loss": 3.5762, "step": 77245 }, { "epoch": 5.248675091724419, "grad_norm": 0.9460146427154541, "learning_rate": 0.0003439156135344476, "loss": 3.4755, "step": 77250 }, { "epoch": 5.249014811795081, "grad_norm": 0.9860759973526001, "learning_rate": 0.0003438731485256149, "loss": 3.2882, "step": 77255 }, { "epoch": 5.249354531865743, "grad_norm": 0.7265598177909851, "learning_rate": 0.00034383068351678223, "loss": 3.4948, "step": 77260 }, { "epoch": 5.249694251936404, "grad_norm": 1.2614529132843018, "learning_rate": 0.00034378821850794946, "loss": 3.5251, "step": 77265 }, { "epoch": 5.250033972007066, "grad_norm": 0.9185158610343933, "learning_rate": 0.00034374575349911674, "loss": 3.39, "step": 77270 }, { "epoch": 5.250373692077728, "grad_norm": 0.9545618891716003, "learning_rate": 0.000343703288490284, "loss": 3.4426, "step": 77275 }, { "epoch": 5.250713412148389, "grad_norm": 0.8243758678436279, "learning_rate": 0.0003436608234814513, "loss": 3.5945, "step": 77280 }, { "epoch": 5.2510531322190515, "grad_norm": 1.0781502723693848, "learning_rate": 0.0003436183584726186, "loss": 3.4631, "step": 77285 }, { "epoch": 5.2513928522897135, "grad_norm": 0.7528315782546997, "learning_rate": 0.00034357589346378586, "loss": 3.4565, "step": 77290 }, { "epoch": 5.251732572360375, "grad_norm": 0.7889813184738159, "learning_rate": 0.00034353342845495314, "loss": 3.3936, "step": 77295 }, { "epoch": 5.252072292431037, "grad_norm": 1.0313286781311035, "learning_rate": 0.0003434909634461204, "loss": 3.1429, "step": 77300 }, { "epoch": 5.252412012501699, "grad_norm": 0.7810640931129456, "learning_rate": 0.0003434484984372877, "loss": 3.4239, "step": 77305 }, { "epoch": 5.25275173257236, "grad_norm": 0.9785544276237488, "learning_rate": 0.0003434060334284549, "loss": 3.5322, "step": 77310 }, { "epoch": 5.253091452643022, "grad_norm": 0.747261106967926, "learning_rate": 0.00034336356841962226, "loss": 3.5368, "step": 77315 }, { "epoch": 5.253431172713684, "grad_norm": 0.8015008568763733, "learning_rate": 0.00034332110341078954, "loss": 3.3648, "step": 77320 }, { "epoch": 5.253770892784345, "grad_norm": 0.8469849824905396, "learning_rate": 0.00034327863840195676, "loss": 3.6406, "step": 77325 }, { "epoch": 5.2541106128550075, "grad_norm": 1.0149059295654297, "learning_rate": 0.0003432361733931241, "loss": 3.4733, "step": 77330 }, { "epoch": 5.2544503329256695, "grad_norm": 0.8491706252098083, "learning_rate": 0.0003431937083842914, "loss": 3.4712, "step": 77335 }, { "epoch": 5.254790052996331, "grad_norm": 0.6522251963615417, "learning_rate": 0.0003431512433754586, "loss": 3.5576, "step": 77340 }, { "epoch": 5.255129773066993, "grad_norm": 0.8650041818618774, "learning_rate": 0.0003431087783666259, "loss": 3.617, "step": 77345 }, { "epoch": 5.255469493137655, "grad_norm": 1.4981704950332642, "learning_rate": 0.0003430663133577932, "loss": 3.5542, "step": 77350 }, { "epoch": 5.255809213208316, "grad_norm": 0.9435403347015381, "learning_rate": 0.00034302384834896044, "loss": 3.3994, "step": 77355 }, { "epoch": 5.256148933278978, "grad_norm": 0.8579195141792297, "learning_rate": 0.0003429813833401277, "loss": 3.4151, "step": 77360 }, { "epoch": 5.25648865334964, "grad_norm": 0.8084255456924438, "learning_rate": 0.00034293891833129506, "loss": 3.5837, "step": 77365 }, { "epoch": 5.256828373420301, "grad_norm": 0.7241343855857849, "learning_rate": 0.0003428964533224623, "loss": 3.6721, "step": 77370 }, { "epoch": 5.2571680934909635, "grad_norm": 0.8361693024635315, "learning_rate": 0.00034285398831362956, "loss": 3.4814, "step": 77375 }, { "epoch": 5.2575078135616256, "grad_norm": 1.0515402555465698, "learning_rate": 0.00034281152330479684, "loss": 3.5293, "step": 77380 }, { "epoch": 5.257847533632287, "grad_norm": 0.9794366955757141, "learning_rate": 0.0003427690582959641, "loss": 3.3919, "step": 77385 }, { "epoch": 5.258187253702949, "grad_norm": 1.2499045133590698, "learning_rate": 0.0003427265932871314, "loss": 3.5296, "step": 77390 }, { "epoch": 5.258526973773611, "grad_norm": 0.9121119379997253, "learning_rate": 0.0003426841282782987, "loss": 3.5849, "step": 77395 }, { "epoch": 5.258866693844272, "grad_norm": 0.8153026700019836, "learning_rate": 0.00034264166326946596, "loss": 3.285, "step": 77400 }, { "epoch": 5.259206413914934, "grad_norm": 0.9129648208618164, "learning_rate": 0.00034259919826063324, "loss": 3.527, "step": 77405 }, { "epoch": 5.259546133985596, "grad_norm": 0.8096426129341125, "learning_rate": 0.0003425567332518005, "loss": 3.5453, "step": 77410 }, { "epoch": 5.259885854056257, "grad_norm": 0.9756879806518555, "learning_rate": 0.00034251426824296775, "loss": 3.4736, "step": 77415 }, { "epoch": 5.2602255741269195, "grad_norm": 1.1648398637771606, "learning_rate": 0.0003424718032341351, "loss": 3.711, "step": 77420 }, { "epoch": 5.260565294197582, "grad_norm": 1.3503143787384033, "learning_rate": 0.00034242933822530236, "loss": 3.2874, "step": 77425 }, { "epoch": 5.260905014268243, "grad_norm": 1.1657476425170898, "learning_rate": 0.00034238687321646964, "loss": 3.5511, "step": 77430 }, { "epoch": 5.261244734338905, "grad_norm": 0.9445148706436157, "learning_rate": 0.0003423444082076369, "loss": 3.5345, "step": 77435 }, { "epoch": 5.261584454409567, "grad_norm": 0.855963945388794, "learning_rate": 0.0003423019431988042, "loss": 3.5521, "step": 77440 }, { "epoch": 5.261924174480228, "grad_norm": 0.9673268795013428, "learning_rate": 0.0003422594781899715, "loss": 3.3943, "step": 77445 }, { "epoch": 5.26226389455089, "grad_norm": 1.0579267740249634, "learning_rate": 0.0003422170131811387, "loss": 3.4273, "step": 77450 }, { "epoch": 5.262603614621552, "grad_norm": 1.1987358331680298, "learning_rate": 0.00034217454817230604, "loss": 3.5302, "step": 77455 }, { "epoch": 5.262943334692213, "grad_norm": 0.9026594758033752, "learning_rate": 0.0003421320831634733, "loss": 3.5334, "step": 77460 }, { "epoch": 5.2632830547628755, "grad_norm": 0.7517818808555603, "learning_rate": 0.00034208961815464055, "loss": 3.6289, "step": 77465 }, { "epoch": 5.263622774833538, "grad_norm": 0.956368625164032, "learning_rate": 0.0003420471531458079, "loss": 3.3108, "step": 77470 }, { "epoch": 5.263962494904199, "grad_norm": 1.05348801612854, "learning_rate": 0.00034200468813697516, "loss": 3.7044, "step": 77475 }, { "epoch": 5.264302214974861, "grad_norm": 1.2108911275863647, "learning_rate": 0.0003419622231281424, "loss": 3.5544, "step": 77480 }, { "epoch": 5.264641935045523, "grad_norm": 0.7418484091758728, "learning_rate": 0.0003419197581193097, "loss": 3.3635, "step": 77485 }, { "epoch": 5.264981655116184, "grad_norm": 0.8016573190689087, "learning_rate": 0.000341877293110477, "loss": 3.3417, "step": 77490 }, { "epoch": 5.265321375186846, "grad_norm": 1.0444839000701904, "learning_rate": 0.00034183482810164423, "loss": 3.398, "step": 77495 }, { "epoch": 5.265661095257508, "grad_norm": 0.8531420826911926, "learning_rate": 0.0003417923630928115, "loss": 3.5893, "step": 77500 }, { "epoch": 5.266000815328169, "grad_norm": 0.9030084609985352, "learning_rate": 0.00034174989808397884, "loss": 3.4608, "step": 77505 }, { "epoch": 5.2663405353988315, "grad_norm": 0.8647962808609009, "learning_rate": 0.00034170743307514607, "loss": 3.4272, "step": 77510 }, { "epoch": 5.266680255469494, "grad_norm": 0.7373958826065063, "learning_rate": 0.00034166496806631335, "loss": 3.5256, "step": 77515 }, { "epoch": 5.267019975540155, "grad_norm": 1.1547341346740723, "learning_rate": 0.0003416225030574807, "loss": 3.5097, "step": 77520 }, { "epoch": 5.267359695610817, "grad_norm": 0.7432810664176941, "learning_rate": 0.0003415800380486479, "loss": 3.4815, "step": 77525 }, { "epoch": 5.267699415681479, "grad_norm": 0.9177041053771973, "learning_rate": 0.0003415375730398152, "loss": 3.3127, "step": 77530 }, { "epoch": 5.26803913575214, "grad_norm": 1.1121187210083008, "learning_rate": 0.00034149510803098247, "loss": 3.431, "step": 77535 }, { "epoch": 5.268378855822802, "grad_norm": 0.9663049578666687, "learning_rate": 0.00034145264302214975, "loss": 3.4525, "step": 77540 }, { "epoch": 5.268718575893463, "grad_norm": 0.7763304710388184, "learning_rate": 0.00034141017801331703, "loss": 3.5386, "step": 77545 }, { "epoch": 5.2690582959641254, "grad_norm": 0.8114166855812073, "learning_rate": 0.0003413677130044843, "loss": 3.4439, "step": 77550 }, { "epoch": 5.2693980160347875, "grad_norm": 0.8475888967514038, "learning_rate": 0.0003413252479956516, "loss": 3.3476, "step": 77555 }, { "epoch": 5.269737736105449, "grad_norm": 0.9251770377159119, "learning_rate": 0.00034128278298681887, "loss": 3.4258, "step": 77560 }, { "epoch": 5.270077456176111, "grad_norm": 0.9561187028884888, "learning_rate": 0.00034124031797798615, "loss": 3.6615, "step": 77565 }, { "epoch": 5.270417176246773, "grad_norm": 0.8605806231498718, "learning_rate": 0.0003411978529691534, "loss": 3.5582, "step": 77570 }, { "epoch": 5.270756896317434, "grad_norm": 0.8247023820877075, "learning_rate": 0.0003411553879603207, "loss": 3.7738, "step": 77575 }, { "epoch": 5.271096616388096, "grad_norm": 1.1633988618850708, "learning_rate": 0.000341112922951488, "loss": 3.4195, "step": 77580 }, { "epoch": 5.271436336458758, "grad_norm": 0.9595105648040771, "learning_rate": 0.0003410704579426552, "loss": 3.3964, "step": 77585 }, { "epoch": 5.271776056529419, "grad_norm": 0.8360251188278198, "learning_rate": 0.00034102799293382255, "loss": 3.2223, "step": 77590 }, { "epoch": 5.2721157766000815, "grad_norm": 0.8594487309455872, "learning_rate": 0.00034098552792498983, "loss": 3.5877, "step": 77595 }, { "epoch": 5.2724554966707435, "grad_norm": 1.0351351499557495, "learning_rate": 0.0003409430629161571, "loss": 3.5165, "step": 77600 }, { "epoch": 5.272795216741405, "grad_norm": 0.8016113638877869, "learning_rate": 0.00034090059790732434, "loss": 3.3266, "step": 77605 }, { "epoch": 5.273134936812067, "grad_norm": 1.0015082359313965, "learning_rate": 0.00034085813289849167, "loss": 3.3806, "step": 77610 }, { "epoch": 5.273474656882729, "grad_norm": 0.9104819893836975, "learning_rate": 0.00034081566788965895, "loss": 3.4976, "step": 77615 }, { "epoch": 5.27381437695339, "grad_norm": 0.9334174990653992, "learning_rate": 0.0003407732028808262, "loss": 3.5475, "step": 77620 }, { "epoch": 5.274154097024052, "grad_norm": 1.066830039024353, "learning_rate": 0.0003407307378719935, "loss": 3.3643, "step": 77625 }, { "epoch": 5.274493817094714, "grad_norm": 0.885061502456665, "learning_rate": 0.0003406882728631608, "loss": 3.5078, "step": 77630 }, { "epoch": 5.274833537165375, "grad_norm": 0.8743835091590881, "learning_rate": 0.000340645807854328, "loss": 3.6292, "step": 77635 }, { "epoch": 5.2751732572360375, "grad_norm": 0.8150795698165894, "learning_rate": 0.0003406033428454953, "loss": 3.5228, "step": 77640 }, { "epoch": 5.2755129773066995, "grad_norm": 0.865960955619812, "learning_rate": 0.00034056087783666263, "loss": 3.3299, "step": 77645 }, { "epoch": 5.275852697377361, "grad_norm": 1.1821253299713135, "learning_rate": 0.00034051841282782986, "loss": 3.3974, "step": 77650 }, { "epoch": 5.276192417448023, "grad_norm": 0.8339661955833435, "learning_rate": 0.00034047594781899714, "loss": 3.4177, "step": 77655 }, { "epoch": 5.276532137518685, "grad_norm": 0.9023543000221252, "learning_rate": 0.00034043348281016447, "loss": 3.5395, "step": 77660 }, { "epoch": 5.276871857589346, "grad_norm": 0.9916113615036011, "learning_rate": 0.0003403910178013317, "loss": 3.3426, "step": 77665 }, { "epoch": 5.277211577660008, "grad_norm": 0.8041478395462036, "learning_rate": 0.000340348552792499, "loss": 3.579, "step": 77670 }, { "epoch": 5.27755129773067, "grad_norm": 0.9110507965087891, "learning_rate": 0.00034030608778366626, "loss": 3.2674, "step": 77675 }, { "epoch": 5.277891017801331, "grad_norm": 1.0961023569107056, "learning_rate": 0.00034026362277483354, "loss": 3.1566, "step": 77680 }, { "epoch": 5.2782307378719935, "grad_norm": 0.9122074246406555, "learning_rate": 0.0003402211577660008, "loss": 3.5073, "step": 77685 }, { "epoch": 5.278570457942656, "grad_norm": 0.9889321327209473, "learning_rate": 0.0003401786927571681, "loss": 3.3913, "step": 77690 }, { "epoch": 5.278910178013317, "grad_norm": 1.0775107145309448, "learning_rate": 0.0003401362277483354, "loss": 3.6616, "step": 77695 }, { "epoch": 5.279249898083979, "grad_norm": 0.680305004119873, "learning_rate": 0.00034009376273950266, "loss": 3.2457, "step": 77700 }, { "epoch": 5.279589618154641, "grad_norm": 0.8493651151657104, "learning_rate": 0.00034005129773066994, "loss": 3.73, "step": 77705 }, { "epoch": 5.279929338225302, "grad_norm": 0.8255748748779297, "learning_rate": 0.00034000883272183717, "loss": 3.2006, "step": 77710 }, { "epoch": 5.280269058295964, "grad_norm": 0.8718971014022827, "learning_rate": 0.0003399663677130045, "loss": 3.3868, "step": 77715 }, { "epoch": 5.280608778366626, "grad_norm": 0.9038297533988953, "learning_rate": 0.0003399239027041718, "loss": 3.4532, "step": 77720 }, { "epoch": 5.280948498437287, "grad_norm": 1.0276360511779785, "learning_rate": 0.000339881437695339, "loss": 3.5573, "step": 77725 }, { "epoch": 5.2812882185079495, "grad_norm": 0.9996660351753235, "learning_rate": 0.00033983897268650634, "loss": 3.3857, "step": 77730 }, { "epoch": 5.281627938578612, "grad_norm": 0.9069861173629761, "learning_rate": 0.0003397965076776736, "loss": 3.5874, "step": 77735 }, { "epoch": 5.281967658649273, "grad_norm": 0.9187392592430115, "learning_rate": 0.00033975404266884085, "loss": 3.3536, "step": 77740 }, { "epoch": 5.282307378719935, "grad_norm": 0.8645111322402954, "learning_rate": 0.0003397115776600082, "loss": 3.4406, "step": 77745 }, { "epoch": 5.282647098790597, "grad_norm": 0.8234793543815613, "learning_rate": 0.00033966911265117546, "loss": 3.2381, "step": 77750 }, { "epoch": 5.282986818861258, "grad_norm": 0.8133503794670105, "learning_rate": 0.0003396266476423427, "loss": 3.5607, "step": 77755 }, { "epoch": 5.28332653893192, "grad_norm": 1.179540753364563, "learning_rate": 0.00033958418263350997, "loss": 3.5443, "step": 77760 }, { "epoch": 5.283666259002582, "grad_norm": 1.2166885137557983, "learning_rate": 0.0003395417176246773, "loss": 3.045, "step": 77765 }, { "epoch": 5.284005979073243, "grad_norm": 0.8534504175186157, "learning_rate": 0.0003394992526158446, "loss": 3.4228, "step": 77770 }, { "epoch": 5.2843456991439055, "grad_norm": 0.8876750469207764, "learning_rate": 0.0003394567876070118, "loss": 3.5048, "step": 77775 }, { "epoch": 5.284685419214568, "grad_norm": 0.9431858062744141, "learning_rate": 0.00033941432259817914, "loss": 3.2948, "step": 77780 }, { "epoch": 5.285025139285229, "grad_norm": 0.880521833896637, "learning_rate": 0.0003393718575893464, "loss": 3.4363, "step": 77785 }, { "epoch": 5.285364859355891, "grad_norm": 1.0114389657974243, "learning_rate": 0.00033932939258051365, "loss": 3.5478, "step": 77790 }, { "epoch": 5.285704579426553, "grad_norm": 1.0734858512878418, "learning_rate": 0.0003392869275716809, "loss": 3.3463, "step": 77795 }, { "epoch": 5.286044299497214, "grad_norm": 1.027349829673767, "learning_rate": 0.00033924446256284826, "loss": 3.3553, "step": 77800 }, { "epoch": 5.286384019567876, "grad_norm": 0.8302164077758789, "learning_rate": 0.0003392019975540155, "loss": 3.5204, "step": 77805 }, { "epoch": 5.286723739638538, "grad_norm": 0.8433977365493774, "learning_rate": 0.00033915953254518277, "loss": 3.5084, "step": 77810 }, { "epoch": 5.287063459709199, "grad_norm": 1.0863436460494995, "learning_rate": 0.0003391170675363501, "loss": 3.3816, "step": 77815 }, { "epoch": 5.2874031797798615, "grad_norm": 0.9788549542427063, "learning_rate": 0.0003390746025275173, "loss": 3.4923, "step": 77820 }, { "epoch": 5.287742899850523, "grad_norm": 0.8325121402740479, "learning_rate": 0.0003390321375186846, "loss": 3.5172, "step": 77825 }, { "epoch": 5.288082619921185, "grad_norm": 0.7759940028190613, "learning_rate": 0.0003389896725098519, "loss": 3.2122, "step": 77830 }, { "epoch": 5.288422339991847, "grad_norm": 0.9254586696624756, "learning_rate": 0.00033894720750101917, "loss": 3.504, "step": 77835 }, { "epoch": 5.288762060062508, "grad_norm": 0.800954282283783, "learning_rate": 0.00033890474249218645, "loss": 3.7182, "step": 77840 }, { "epoch": 5.28910178013317, "grad_norm": 1.0384767055511475, "learning_rate": 0.00033886227748335373, "loss": 3.4101, "step": 77845 }, { "epoch": 5.289441500203832, "grad_norm": 0.8862746357917786, "learning_rate": 0.000338819812474521, "loss": 3.3924, "step": 77850 }, { "epoch": 5.289781220274493, "grad_norm": 1.1366990804672241, "learning_rate": 0.0003387773474656883, "loss": 3.5512, "step": 77855 }, { "epoch": 5.2901209403451555, "grad_norm": 0.8754532337188721, "learning_rate": 0.00033873488245685557, "loss": 3.5402, "step": 77860 }, { "epoch": 5.2904606604158175, "grad_norm": 0.9401914477348328, "learning_rate": 0.0003386924174480228, "loss": 3.2838, "step": 77865 }, { "epoch": 5.290800380486479, "grad_norm": 0.6615973711013794, "learning_rate": 0.00033864995243919013, "loss": 3.3303, "step": 77870 }, { "epoch": 5.291140100557141, "grad_norm": 0.9023000001907349, "learning_rate": 0.0003386074874303574, "loss": 3.6885, "step": 77875 }, { "epoch": 5.291479820627803, "grad_norm": 1.2444915771484375, "learning_rate": 0.00033856502242152463, "loss": 3.2479, "step": 77880 }, { "epoch": 5.291819540698464, "grad_norm": 0.8478114604949951, "learning_rate": 0.00033852255741269197, "loss": 3.3692, "step": 77885 }, { "epoch": 5.292159260769126, "grad_norm": 0.8026512861251831, "learning_rate": 0.00033848009240385925, "loss": 3.5581, "step": 77890 }, { "epoch": 5.292498980839788, "grad_norm": 1.0923928022384644, "learning_rate": 0.0003384376273950265, "loss": 3.3897, "step": 77895 }, { "epoch": 5.292838700910449, "grad_norm": 0.9207879900932312, "learning_rate": 0.00033839516238619375, "loss": 3.4057, "step": 77900 }, { "epoch": 5.2931784209811115, "grad_norm": 1.3061646223068237, "learning_rate": 0.0003383526973773611, "loss": 3.3043, "step": 77905 }, { "epoch": 5.2935181410517735, "grad_norm": 1.0283002853393555, "learning_rate": 0.0003383102323685283, "loss": 3.4014, "step": 77910 }, { "epoch": 5.293857861122435, "grad_norm": 0.8849559426307678, "learning_rate": 0.0003382677673596956, "loss": 3.4991, "step": 77915 }, { "epoch": 5.294197581193097, "grad_norm": 1.2612664699554443, "learning_rate": 0.00033822530235086293, "loss": 3.3864, "step": 77920 }, { "epoch": 5.294537301263759, "grad_norm": 0.8992102742195129, "learning_rate": 0.00033818283734203015, "loss": 3.2981, "step": 77925 }, { "epoch": 5.29487702133442, "grad_norm": 0.9072105884552002, "learning_rate": 0.00033814037233319743, "loss": 3.4569, "step": 77930 }, { "epoch": 5.295216741405082, "grad_norm": 0.7484358549118042, "learning_rate": 0.0003380979073243647, "loss": 3.6891, "step": 77935 }, { "epoch": 5.295556461475744, "grad_norm": 1.1159650087356567, "learning_rate": 0.00033805544231553205, "loss": 3.3747, "step": 77940 }, { "epoch": 5.295896181546405, "grad_norm": 1.0430089235305786, "learning_rate": 0.0003380129773066993, "loss": 3.5658, "step": 77945 }, { "epoch": 5.2962359016170675, "grad_norm": 0.810356855392456, "learning_rate": 0.00033797051229786655, "loss": 3.6344, "step": 77950 }, { "epoch": 5.2965756216877296, "grad_norm": 0.8487858176231384, "learning_rate": 0.0003379280472890339, "loss": 3.5575, "step": 77955 }, { "epoch": 5.296915341758391, "grad_norm": 0.8651509881019592, "learning_rate": 0.0003378855822802011, "loss": 3.3303, "step": 77960 }, { "epoch": 5.297255061829053, "grad_norm": 1.00436270236969, "learning_rate": 0.0003378431172713684, "loss": 3.44, "step": 77965 }, { "epoch": 5.297594781899715, "grad_norm": 0.833680272102356, "learning_rate": 0.0003378006522625357, "loss": 3.4714, "step": 77970 }, { "epoch": 5.297934501970376, "grad_norm": 1.0223634243011475, "learning_rate": 0.00033775818725370295, "loss": 3.3929, "step": 77975 }, { "epoch": 5.298274222041038, "grad_norm": 0.8521704077720642, "learning_rate": 0.00033771572224487024, "loss": 3.736, "step": 77980 }, { "epoch": 5.2986139421117, "grad_norm": 0.8357382416725159, "learning_rate": 0.0003376732572360375, "loss": 3.4238, "step": 77985 }, { "epoch": 5.298953662182361, "grad_norm": 0.8170688152313232, "learning_rate": 0.0003376307922272048, "loss": 3.484, "step": 77990 }, { "epoch": 5.2992933822530235, "grad_norm": 1.4321448802947998, "learning_rate": 0.0003375883272183721, "loss": 3.325, "step": 77995 }, { "epoch": 5.299633102323686, "grad_norm": 0.771216094493866, "learning_rate": 0.00033754586220953936, "loss": 3.3302, "step": 78000 }, { "epoch": 5.299972822394347, "grad_norm": 1.1680614948272705, "learning_rate": 0.0003375033972007066, "loss": 3.4355, "step": 78005 }, { "epoch": 5.300312542465009, "grad_norm": 0.9365093111991882, "learning_rate": 0.0003374609321918739, "loss": 3.1969, "step": 78010 }, { "epoch": 5.300652262535671, "grad_norm": 0.9794655442237854, "learning_rate": 0.0003374184671830412, "loss": 3.4951, "step": 78015 }, { "epoch": 5.300991982606332, "grad_norm": 0.8563889861106873, "learning_rate": 0.0003373760021742084, "loss": 3.3351, "step": 78020 }, { "epoch": 5.301331702676994, "grad_norm": 1.0258111953735352, "learning_rate": 0.00033733353716537576, "loss": 3.4315, "step": 78025 }, { "epoch": 5.301671422747656, "grad_norm": 0.9076101183891296, "learning_rate": 0.00033729107215654304, "loss": 3.3646, "step": 78030 }, { "epoch": 5.302011142818317, "grad_norm": 1.192469835281372, "learning_rate": 0.00033724860714771026, "loss": 3.5457, "step": 78035 }, { "epoch": 5.3023508628889795, "grad_norm": 0.8477970957756042, "learning_rate": 0.0003372061421388776, "loss": 3.4808, "step": 78040 }, { "epoch": 5.302690582959642, "grad_norm": 0.7046147584915161, "learning_rate": 0.0003371636771300449, "loss": 3.5354, "step": 78045 }, { "epoch": 5.303030303030303, "grad_norm": 1.4049218893051147, "learning_rate": 0.0003371212121212121, "loss": 3.2365, "step": 78050 }, { "epoch": 5.303370023100965, "grad_norm": 1.1192388534545898, "learning_rate": 0.0003370787471123794, "loss": 3.5744, "step": 78055 }, { "epoch": 5.303709743171627, "grad_norm": 0.947353720664978, "learning_rate": 0.0003370362821035467, "loss": 3.4303, "step": 78060 }, { "epoch": 5.304049463242288, "grad_norm": 0.9471931457519531, "learning_rate": 0.00033699381709471394, "loss": 3.6548, "step": 78065 }, { "epoch": 5.30438918331295, "grad_norm": 0.9532833099365234, "learning_rate": 0.0003369513520858812, "loss": 3.3835, "step": 78070 }, { "epoch": 5.304728903383612, "grad_norm": 1.1301982402801514, "learning_rate": 0.00033690888707704856, "loss": 3.2814, "step": 78075 }, { "epoch": 5.305068623454273, "grad_norm": 0.9535732865333557, "learning_rate": 0.0003368664220682158, "loss": 3.5504, "step": 78080 }, { "epoch": 5.3054083435249355, "grad_norm": 0.9016448259353638, "learning_rate": 0.00033682395705938306, "loss": 3.3218, "step": 78085 }, { "epoch": 5.305748063595598, "grad_norm": 1.0480573177337646, "learning_rate": 0.00033678149205055034, "loss": 3.457, "step": 78090 }, { "epoch": 5.306087783666259, "grad_norm": 0.8733727335929871, "learning_rate": 0.0003367390270417176, "loss": 3.5374, "step": 78095 }, { "epoch": 5.306427503736921, "grad_norm": 0.954719603061676, "learning_rate": 0.0003366965620328849, "loss": 3.2684, "step": 78100 }, { "epoch": 5.306767223807583, "grad_norm": 0.7705663442611694, "learning_rate": 0.0003366540970240522, "loss": 3.6258, "step": 78105 }, { "epoch": 5.307106943878244, "grad_norm": 1.1256740093231201, "learning_rate": 0.0003366116320152195, "loss": 3.8243, "step": 78110 }, { "epoch": 5.307446663948906, "grad_norm": 0.725265622138977, "learning_rate": 0.00033656916700638674, "loss": 3.6238, "step": 78115 }, { "epoch": 5.307786384019568, "grad_norm": 0.683428168296814, "learning_rate": 0.000336526701997554, "loss": 3.3564, "step": 78120 }, { "epoch": 5.3081261040902294, "grad_norm": 0.743855357170105, "learning_rate": 0.0003364842369887213, "loss": 3.2717, "step": 78125 }, { "epoch": 5.3084658241608915, "grad_norm": 0.9205508232116699, "learning_rate": 0.0003364417719798886, "loss": 3.6135, "step": 78130 }, { "epoch": 5.308805544231554, "grad_norm": 0.709912896156311, "learning_rate": 0.00033639930697105586, "loss": 3.243, "step": 78135 }, { "epoch": 5.309145264302215, "grad_norm": 0.7392258048057556, "learning_rate": 0.00033635684196222314, "loss": 3.3344, "step": 78140 }, { "epoch": 5.309484984372877, "grad_norm": 1.0169382095336914, "learning_rate": 0.0003363143769533904, "loss": 3.2144, "step": 78145 }, { "epoch": 5.309824704443539, "grad_norm": 0.8694301247596741, "learning_rate": 0.0003362719119445577, "loss": 3.5338, "step": 78150 }, { "epoch": 5.3101644245142, "grad_norm": 0.8137326240539551, "learning_rate": 0.000336229446935725, "loss": 3.5901, "step": 78155 }, { "epoch": 5.310504144584862, "grad_norm": 0.8788939118385315, "learning_rate": 0.0003361869819268922, "loss": 3.5348, "step": 78160 }, { "epoch": 5.310843864655524, "grad_norm": 0.7957729697227478, "learning_rate": 0.00033614451691805954, "loss": 3.8183, "step": 78165 }, { "epoch": 5.3111835847261855, "grad_norm": 0.787786066532135, "learning_rate": 0.0003361020519092268, "loss": 3.4331, "step": 78170 }, { "epoch": 5.3115233047968475, "grad_norm": 0.986140251159668, "learning_rate": 0.00033605958690039405, "loss": 3.65, "step": 78175 }, { "epoch": 5.31186302486751, "grad_norm": 1.3899834156036377, "learning_rate": 0.0003360171218915614, "loss": 3.8465, "step": 78180 }, { "epoch": 5.312202744938171, "grad_norm": 0.7742891907691956, "learning_rate": 0.00033597465688272866, "loss": 3.0846, "step": 78185 }, { "epoch": 5.312542465008833, "grad_norm": 0.8650239706039429, "learning_rate": 0.0003359321918738959, "loss": 3.2248, "step": 78190 }, { "epoch": 5.312882185079495, "grad_norm": 0.858525812625885, "learning_rate": 0.00033588972686506317, "loss": 3.4152, "step": 78195 }, { "epoch": 5.313221905150156, "grad_norm": 0.8648572564125061, "learning_rate": 0.0003358472618562305, "loss": 3.5071, "step": 78200 }, { "epoch": 5.313561625220818, "grad_norm": 0.9774354696273804, "learning_rate": 0.00033580479684739773, "loss": 3.7684, "step": 78205 }, { "epoch": 5.31390134529148, "grad_norm": 0.8625220656394958, "learning_rate": 0.000335762331838565, "loss": 3.5771, "step": 78210 }, { "epoch": 5.3142410653621415, "grad_norm": 1.0874325037002563, "learning_rate": 0.00033571986682973234, "loss": 3.1582, "step": 78215 }, { "epoch": 5.3145807854328035, "grad_norm": 0.7907547354698181, "learning_rate": 0.00033567740182089957, "loss": 3.4979, "step": 78220 }, { "epoch": 5.314920505503465, "grad_norm": 0.8776171803474426, "learning_rate": 0.00033563493681206685, "loss": 3.6613, "step": 78225 }, { "epoch": 5.315260225574127, "grad_norm": 1.592774748802185, "learning_rate": 0.00033559247180323413, "loss": 3.426, "step": 78230 }, { "epoch": 5.315599945644789, "grad_norm": 0.7880361080169678, "learning_rate": 0.0003355500067944014, "loss": 3.2163, "step": 78235 }, { "epoch": 5.31593966571545, "grad_norm": 0.7951199412345886, "learning_rate": 0.0003355075417855687, "loss": 3.5935, "step": 78240 }, { "epoch": 5.316279385786112, "grad_norm": 0.9114543795585632, "learning_rate": 0.00033546507677673597, "loss": 3.4764, "step": 78245 }, { "epoch": 5.316619105856774, "grad_norm": 1.035859227180481, "learning_rate": 0.00033542261176790325, "loss": 3.3884, "step": 78250 }, { "epoch": 5.316958825927435, "grad_norm": 1.095029354095459, "learning_rate": 0.00033538014675907053, "loss": 3.6163, "step": 78255 }, { "epoch": 5.3172985459980975, "grad_norm": 0.7649465203285217, "learning_rate": 0.0003353376817502378, "loss": 3.5173, "step": 78260 }, { "epoch": 5.31763826606876, "grad_norm": 0.8430017232894897, "learning_rate": 0.00033529521674140504, "loss": 3.2459, "step": 78265 }, { "epoch": 5.317977986139421, "grad_norm": 0.8822588920593262, "learning_rate": 0.00033525275173257237, "loss": 3.5022, "step": 78270 }, { "epoch": 5.318317706210083, "grad_norm": 0.9492565989494324, "learning_rate": 0.00033521028672373965, "loss": 3.4351, "step": 78275 }, { "epoch": 5.318657426280745, "grad_norm": 0.870532751083374, "learning_rate": 0.00033516782171490693, "loss": 3.1444, "step": 78280 }, { "epoch": 5.318997146351406, "grad_norm": 1.094956636428833, "learning_rate": 0.0003351253567060742, "loss": 3.379, "step": 78285 }, { "epoch": 5.319336866422068, "grad_norm": 0.7777120471000671, "learning_rate": 0.0003350828916972415, "loss": 3.4448, "step": 78290 }, { "epoch": 5.31967658649273, "grad_norm": 1.013400912284851, "learning_rate": 0.00033504042668840877, "loss": 3.495, "step": 78295 }, { "epoch": 5.320016306563391, "grad_norm": 0.839445173740387, "learning_rate": 0.000334997961679576, "loss": 3.2495, "step": 78300 }, { "epoch": 5.3203560266340535, "grad_norm": 0.8748689889907837, "learning_rate": 0.00033495549667074333, "loss": 3.4332, "step": 78305 }, { "epoch": 5.320695746704716, "grad_norm": 1.0204155445098877, "learning_rate": 0.0003349130316619106, "loss": 3.3113, "step": 78310 }, { "epoch": 5.321035466775377, "grad_norm": 0.9042012691497803, "learning_rate": 0.00033487056665307784, "loss": 3.6321, "step": 78315 }, { "epoch": 5.321375186846039, "grad_norm": 0.9822362065315247, "learning_rate": 0.00033482810164424517, "loss": 3.301, "step": 78320 }, { "epoch": 5.321714906916701, "grad_norm": 0.8440858125686646, "learning_rate": 0.00033478563663541245, "loss": 3.3994, "step": 78325 }, { "epoch": 5.322054626987362, "grad_norm": 0.8402474522590637, "learning_rate": 0.0003347431716265797, "loss": 3.2968, "step": 78330 }, { "epoch": 5.322394347058024, "grad_norm": 0.8311792016029358, "learning_rate": 0.000334700706617747, "loss": 3.4533, "step": 78335 }, { "epoch": 5.322734067128686, "grad_norm": 0.8462395071983337, "learning_rate": 0.0003346582416089143, "loss": 3.5706, "step": 78340 }, { "epoch": 5.323073787199347, "grad_norm": 0.9269742965698242, "learning_rate": 0.0003346157766000815, "loss": 3.5576, "step": 78345 }, { "epoch": 5.3234135072700095, "grad_norm": 0.7809523344039917, "learning_rate": 0.0003345733115912488, "loss": 3.3557, "step": 78350 }, { "epoch": 5.323753227340672, "grad_norm": 1.105766773223877, "learning_rate": 0.00033453084658241613, "loss": 3.5614, "step": 78355 }, { "epoch": 5.324092947411333, "grad_norm": 0.8884000182151794, "learning_rate": 0.00033448838157358336, "loss": 3.5279, "step": 78360 }, { "epoch": 5.324432667481995, "grad_norm": 0.9326366186141968, "learning_rate": 0.00033444591656475064, "loss": 3.3706, "step": 78365 }, { "epoch": 5.324772387552657, "grad_norm": 0.8913698792457581, "learning_rate": 0.00033440345155591797, "loss": 3.4211, "step": 78370 }, { "epoch": 5.325112107623318, "grad_norm": 0.9085662961006165, "learning_rate": 0.0003343609865470852, "loss": 3.3774, "step": 78375 }, { "epoch": 5.32545182769398, "grad_norm": 0.8610137701034546, "learning_rate": 0.0003343185215382525, "loss": 3.6278, "step": 78380 }, { "epoch": 5.325791547764642, "grad_norm": 0.9544657468795776, "learning_rate": 0.00033427605652941976, "loss": 3.6404, "step": 78385 }, { "epoch": 5.326131267835303, "grad_norm": 0.8266671299934387, "learning_rate": 0.00033423359152058704, "loss": 3.3385, "step": 78390 }, { "epoch": 5.3264709879059655, "grad_norm": 1.0041829347610474, "learning_rate": 0.0003341911265117543, "loss": 3.483, "step": 78395 }, { "epoch": 5.326810707976628, "grad_norm": 1.0303107500076294, "learning_rate": 0.0003341486615029216, "loss": 3.3032, "step": 78400 }, { "epoch": 5.327150428047289, "grad_norm": 0.8947954177856445, "learning_rate": 0.0003341061964940889, "loss": 3.4383, "step": 78405 }, { "epoch": 5.327490148117951, "grad_norm": 0.7742104530334473, "learning_rate": 0.00033406373148525616, "loss": 3.4288, "step": 78410 }, { "epoch": 5.327829868188613, "grad_norm": 0.9854509830474854, "learning_rate": 0.00033402126647642344, "loss": 3.4261, "step": 78415 }, { "epoch": 5.328169588259274, "grad_norm": 1.046756625175476, "learning_rate": 0.00033397880146759067, "loss": 3.1523, "step": 78420 }, { "epoch": 5.328509308329936, "grad_norm": 0.8694136738777161, "learning_rate": 0.000333936336458758, "loss": 3.3615, "step": 78425 }, { "epoch": 5.328849028400598, "grad_norm": 0.7377488613128662, "learning_rate": 0.0003338938714499253, "loss": 3.3582, "step": 78430 }, { "epoch": 5.3291887484712595, "grad_norm": 0.9118674993515015, "learning_rate": 0.0003338514064410925, "loss": 3.3059, "step": 78435 }, { "epoch": 5.3295284685419215, "grad_norm": 1.1664576530456543, "learning_rate": 0.00033380894143225984, "loss": 3.617, "step": 78440 }, { "epoch": 5.329868188612584, "grad_norm": 0.8813289403915405, "learning_rate": 0.0003337664764234271, "loss": 3.3742, "step": 78445 }, { "epoch": 5.330207908683245, "grad_norm": 1.0263441801071167, "learning_rate": 0.0003337240114145944, "loss": 3.5435, "step": 78450 }, { "epoch": 5.330547628753907, "grad_norm": 0.9200456738471985, "learning_rate": 0.0003336815464057616, "loss": 3.5949, "step": 78455 }, { "epoch": 5.330887348824569, "grad_norm": 0.8677195906639099, "learning_rate": 0.00033363908139692896, "loss": 3.6441, "step": 78460 }, { "epoch": 5.33122706889523, "grad_norm": 1.16849946975708, "learning_rate": 0.00033359661638809624, "loss": 3.5849, "step": 78465 }, { "epoch": 5.331566788965892, "grad_norm": 0.9480420351028442, "learning_rate": 0.00033355415137926347, "loss": 3.258, "step": 78470 }, { "epoch": 5.331906509036554, "grad_norm": 1.0793460607528687, "learning_rate": 0.0003335116863704308, "loss": 3.4073, "step": 78475 }, { "epoch": 5.3322462291072155, "grad_norm": 0.8402575254440308, "learning_rate": 0.0003334692213615981, "loss": 3.6542, "step": 78480 }, { "epoch": 5.3325859491778775, "grad_norm": 1.0523964166641235, "learning_rate": 0.0003334267563527653, "loss": 3.4659, "step": 78485 }, { "epoch": 5.33292566924854, "grad_norm": 1.1226567029953003, "learning_rate": 0.0003333842913439326, "loss": 3.3645, "step": 78490 }, { "epoch": 5.333265389319201, "grad_norm": 0.9237992167472839, "learning_rate": 0.0003333418263350999, "loss": 3.2472, "step": 78495 }, { "epoch": 5.333605109389863, "grad_norm": 0.7687498927116394, "learning_rate": 0.00033329936132626715, "loss": 3.3268, "step": 78500 }, { "epoch": 5.333944829460524, "grad_norm": 0.8323100209236145, "learning_rate": 0.0003332568963174344, "loss": 3.6368, "step": 78505 }, { "epoch": 5.334284549531186, "grad_norm": 0.9502173066139221, "learning_rate": 0.00033321443130860176, "loss": 3.2524, "step": 78510 }, { "epoch": 5.334624269601848, "grad_norm": 1.1399176120758057, "learning_rate": 0.000333171966299769, "loss": 3.2917, "step": 78515 }, { "epoch": 5.334963989672509, "grad_norm": 0.9796766638755798, "learning_rate": 0.00033312950129093627, "loss": 3.5698, "step": 78520 }, { "epoch": 5.3353037097431715, "grad_norm": 0.7864508032798767, "learning_rate": 0.00033308703628210355, "loss": 3.7325, "step": 78525 }, { "epoch": 5.3356434298138335, "grad_norm": 0.8081654906272888, "learning_rate": 0.0003330445712732708, "loss": 3.5918, "step": 78530 }, { "epoch": 5.335983149884495, "grad_norm": 0.7844996452331543, "learning_rate": 0.0003330021062644381, "loss": 3.5729, "step": 78535 }, { "epoch": 5.336322869955157, "grad_norm": 0.9379047155380249, "learning_rate": 0.0003329596412556054, "loss": 3.1392, "step": 78540 }, { "epoch": 5.336662590025819, "grad_norm": 0.8122665882110596, "learning_rate": 0.00033291717624677267, "loss": 3.3793, "step": 78545 }, { "epoch": 5.33700231009648, "grad_norm": 0.8361498713493347, "learning_rate": 0.00033287471123793995, "loss": 3.2066, "step": 78550 }, { "epoch": 5.337342030167142, "grad_norm": 0.7282031178474426, "learning_rate": 0.00033283224622910723, "loss": 3.3412, "step": 78555 }, { "epoch": 5.337681750237804, "grad_norm": 0.9100168943405151, "learning_rate": 0.00033278978122027445, "loss": 3.7036, "step": 78560 }, { "epoch": 5.338021470308465, "grad_norm": 1.0003844499588013, "learning_rate": 0.0003327473162114418, "loss": 3.2551, "step": 78565 }, { "epoch": 5.3383611903791275, "grad_norm": 1.0355535745620728, "learning_rate": 0.00033270485120260907, "loss": 3.1545, "step": 78570 }, { "epoch": 5.33870091044979, "grad_norm": 0.8881214261054993, "learning_rate": 0.0003326623861937763, "loss": 3.1973, "step": 78575 }, { "epoch": 5.339040630520451, "grad_norm": 0.9087323546409607, "learning_rate": 0.00033261992118494363, "loss": 3.4466, "step": 78580 }, { "epoch": 5.339380350591113, "grad_norm": 1.2324131727218628, "learning_rate": 0.0003325774561761109, "loss": 3.1033, "step": 78585 }, { "epoch": 5.339720070661775, "grad_norm": 1.1290315389633179, "learning_rate": 0.00033253499116727813, "loss": 3.3005, "step": 78590 }, { "epoch": 5.340059790732436, "grad_norm": 0.8952938914299011, "learning_rate": 0.00033249252615844547, "loss": 3.482, "step": 78595 }, { "epoch": 5.340399510803098, "grad_norm": 1.058024525642395, "learning_rate": 0.00033245006114961275, "loss": 3.5366, "step": 78600 }, { "epoch": 5.34073923087376, "grad_norm": 1.1427899599075317, "learning_rate": 0.00033240759614078, "loss": 3.4054, "step": 78605 }, { "epoch": 5.341078950944421, "grad_norm": 0.9927167296409607, "learning_rate": 0.00033236513113194725, "loss": 3.3624, "step": 78610 }, { "epoch": 5.3414186710150835, "grad_norm": 0.9428116083145142, "learning_rate": 0.0003323226661231146, "loss": 3.4031, "step": 78615 }, { "epoch": 5.341758391085746, "grad_norm": 1.1242990493774414, "learning_rate": 0.00033228020111428187, "loss": 3.5939, "step": 78620 }, { "epoch": 5.342098111156407, "grad_norm": 1.199422836303711, "learning_rate": 0.0003322377361054491, "loss": 3.357, "step": 78625 }, { "epoch": 5.342437831227069, "grad_norm": 0.8705763220787048, "learning_rate": 0.00033219527109661643, "loss": 3.7788, "step": 78630 }, { "epoch": 5.342777551297731, "grad_norm": 0.9926672577857971, "learning_rate": 0.0003321528060877837, "loss": 3.4802, "step": 78635 }, { "epoch": 5.343117271368392, "grad_norm": 0.7579404711723328, "learning_rate": 0.00033211034107895093, "loss": 3.1995, "step": 78640 }, { "epoch": 5.343456991439054, "grad_norm": 1.2075010538101196, "learning_rate": 0.0003320678760701182, "loss": 3.5443, "step": 78645 }, { "epoch": 5.343796711509716, "grad_norm": 1.172472596168518, "learning_rate": 0.00033202541106128555, "loss": 3.2991, "step": 78650 }, { "epoch": 5.344136431580377, "grad_norm": 0.8851138949394226, "learning_rate": 0.0003319829460524528, "loss": 3.3717, "step": 78655 }, { "epoch": 5.3444761516510395, "grad_norm": 0.939117968082428, "learning_rate": 0.00033194048104362005, "loss": 3.6789, "step": 78660 }, { "epoch": 5.344815871721702, "grad_norm": 1.0617202520370483, "learning_rate": 0.0003318980160347874, "loss": 3.4704, "step": 78665 }, { "epoch": 5.345155591792363, "grad_norm": 0.9368365406990051, "learning_rate": 0.0003318555510259546, "loss": 3.5109, "step": 78670 }, { "epoch": 5.345495311863025, "grad_norm": 0.8902553915977478, "learning_rate": 0.0003318130860171219, "loss": 3.3894, "step": 78675 }, { "epoch": 5.345835031933687, "grad_norm": 1.0431864261627197, "learning_rate": 0.0003317706210082892, "loss": 3.4123, "step": 78680 }, { "epoch": 5.346174752004348, "grad_norm": 0.8060005903244019, "learning_rate": 0.00033172815599945645, "loss": 3.4044, "step": 78685 }, { "epoch": 5.34651447207501, "grad_norm": 0.7717661261558533, "learning_rate": 0.00033168569099062373, "loss": 3.5782, "step": 78690 }, { "epoch": 5.346854192145672, "grad_norm": 0.8121126890182495, "learning_rate": 0.000331643225981791, "loss": 3.6172, "step": 78695 }, { "epoch": 5.3471939122163334, "grad_norm": 0.8378850221633911, "learning_rate": 0.0003316007609729583, "loss": 3.5973, "step": 78700 }, { "epoch": 5.3475336322869955, "grad_norm": 0.8326839208602905, "learning_rate": 0.0003315582959641256, "loss": 3.5054, "step": 78705 }, { "epoch": 5.347873352357658, "grad_norm": 0.7965877056121826, "learning_rate": 0.00033151583095529286, "loss": 3.0914, "step": 78710 }, { "epoch": 5.348213072428319, "grad_norm": 0.7931357622146606, "learning_rate": 0.0003314733659464601, "loss": 3.3131, "step": 78715 }, { "epoch": 5.348552792498981, "grad_norm": 0.9721649289131165, "learning_rate": 0.0003314309009376274, "loss": 3.3039, "step": 78720 }, { "epoch": 5.348892512569643, "grad_norm": 0.8588797450065613, "learning_rate": 0.0003313884359287947, "loss": 3.6082, "step": 78725 }, { "epoch": 5.349232232640304, "grad_norm": 0.9456138014793396, "learning_rate": 0.0003313459709199619, "loss": 3.5917, "step": 78730 }, { "epoch": 5.349571952710966, "grad_norm": 0.818286120891571, "learning_rate": 0.00033130350591112926, "loss": 3.407, "step": 78735 }, { "epoch": 5.349911672781628, "grad_norm": 0.8564689755439758, "learning_rate": 0.00033126104090229654, "loss": 3.4993, "step": 78740 }, { "epoch": 5.3502513928522895, "grad_norm": 1.0860605239868164, "learning_rate": 0.00033121857589346376, "loss": 3.3591, "step": 78745 }, { "epoch": 5.3505911129229515, "grad_norm": 0.9525011777877808, "learning_rate": 0.00033117611088463104, "loss": 3.504, "step": 78750 }, { "epoch": 5.350930832993614, "grad_norm": 0.8574268817901611, "learning_rate": 0.0003311336458757984, "loss": 3.5251, "step": 78755 }, { "epoch": 5.351270553064275, "grad_norm": 0.843839168548584, "learning_rate": 0.0003310911808669656, "loss": 3.4859, "step": 78760 }, { "epoch": 5.351610273134937, "grad_norm": 0.902587354183197, "learning_rate": 0.0003310487158581329, "loss": 3.6356, "step": 78765 }, { "epoch": 5.351949993205599, "grad_norm": 0.8871144652366638, "learning_rate": 0.0003310062508493002, "loss": 3.3233, "step": 78770 }, { "epoch": 5.35228971327626, "grad_norm": 0.8606464266777039, "learning_rate": 0.00033096378584046744, "loss": 3.324, "step": 78775 }, { "epoch": 5.352629433346922, "grad_norm": 0.8877620100975037, "learning_rate": 0.0003309213208316347, "loss": 3.4972, "step": 78780 }, { "epoch": 5.352969153417584, "grad_norm": 1.071241855621338, "learning_rate": 0.000330878855822802, "loss": 3.2461, "step": 78785 }, { "epoch": 5.3533088734882455, "grad_norm": 0.7963054180145264, "learning_rate": 0.00033083639081396934, "loss": 3.5382, "step": 78790 }, { "epoch": 5.3536485935589075, "grad_norm": 0.8161731958389282, "learning_rate": 0.00033079392580513656, "loss": 3.2946, "step": 78795 }, { "epoch": 5.35398831362957, "grad_norm": 0.8352603316307068, "learning_rate": 0.00033075146079630384, "loss": 3.1941, "step": 78800 }, { "epoch": 5.354328033700231, "grad_norm": 0.9309561848640442, "learning_rate": 0.0003307089957874712, "loss": 3.4028, "step": 78805 }, { "epoch": 5.354667753770893, "grad_norm": 1.0791726112365723, "learning_rate": 0.0003306665307786384, "loss": 3.4282, "step": 78810 }, { "epoch": 5.355007473841555, "grad_norm": 0.7636476159095764, "learning_rate": 0.0003306240657698057, "loss": 3.3885, "step": 78815 }, { "epoch": 5.355347193912216, "grad_norm": 0.8618056774139404, "learning_rate": 0.00033058160076097296, "loss": 3.5726, "step": 78820 }, { "epoch": 5.355686913982878, "grad_norm": 0.6847010254859924, "learning_rate": 0.00033053913575214024, "loss": 3.4468, "step": 78825 }, { "epoch": 5.35602663405354, "grad_norm": 0.8719287514686584, "learning_rate": 0.0003304966707433075, "loss": 3.3847, "step": 78830 }, { "epoch": 5.3563663541242015, "grad_norm": 1.0832884311676025, "learning_rate": 0.0003304542057344748, "loss": 3.4289, "step": 78835 }, { "epoch": 5.3567060741948636, "grad_norm": 0.9755337834358215, "learning_rate": 0.0003304117407256421, "loss": 3.4823, "step": 78840 }, { "epoch": 5.357045794265526, "grad_norm": 0.8966276049613953, "learning_rate": 0.00033036927571680936, "loss": 3.4288, "step": 78845 }, { "epoch": 5.357385514336187, "grad_norm": 0.7190046906471252, "learning_rate": 0.00033032681070797664, "loss": 3.4137, "step": 78850 }, { "epoch": 5.357725234406849, "grad_norm": 0.9158503413200378, "learning_rate": 0.00033028434569914387, "loss": 3.527, "step": 78855 }, { "epoch": 5.358064954477511, "grad_norm": 1.1003084182739258, "learning_rate": 0.0003302418806903112, "loss": 3.3097, "step": 78860 }, { "epoch": 5.358404674548172, "grad_norm": 0.8053983449935913, "learning_rate": 0.0003301994156814785, "loss": 3.4382, "step": 78865 }, { "epoch": 5.358744394618834, "grad_norm": 0.738785445690155, "learning_rate": 0.0003301569506726457, "loss": 3.3709, "step": 78870 }, { "epoch": 5.359084114689496, "grad_norm": 0.8625181913375854, "learning_rate": 0.00033011448566381304, "loss": 3.507, "step": 78875 }, { "epoch": 5.3594238347601575, "grad_norm": 0.7818925976753235, "learning_rate": 0.0003300720206549803, "loss": 3.5248, "step": 78880 }, { "epoch": 5.35976355483082, "grad_norm": 0.7681019306182861, "learning_rate": 0.00033002955564614755, "loss": 3.4928, "step": 78885 }, { "epoch": 5.360103274901482, "grad_norm": 0.8309361338615417, "learning_rate": 0.0003299870906373149, "loss": 3.6648, "step": 78890 }, { "epoch": 5.360442994972143, "grad_norm": 0.8129064440727234, "learning_rate": 0.00032994462562848216, "loss": 3.4279, "step": 78895 }, { "epoch": 5.360782715042805, "grad_norm": 0.9137693047523499, "learning_rate": 0.0003299021606196494, "loss": 3.3189, "step": 78900 }, { "epoch": 5.361122435113466, "grad_norm": 0.86076420545578, "learning_rate": 0.00032985969561081667, "loss": 3.4705, "step": 78905 }, { "epoch": 5.361462155184128, "grad_norm": 0.8551079630851746, "learning_rate": 0.000329817230601984, "loss": 3.2848, "step": 78910 }, { "epoch": 5.36180187525479, "grad_norm": 1.1348166465759277, "learning_rate": 0.00032977476559315123, "loss": 3.3761, "step": 78915 }, { "epoch": 5.362141595325451, "grad_norm": 0.9199367761611938, "learning_rate": 0.0003297323005843185, "loss": 3.6059, "step": 78920 }, { "epoch": 5.3624813153961135, "grad_norm": 1.0361356735229492, "learning_rate": 0.00032968983557548584, "loss": 3.4922, "step": 78925 }, { "epoch": 5.362821035466776, "grad_norm": 0.7577531933784485, "learning_rate": 0.00032964737056665307, "loss": 3.4155, "step": 78930 }, { "epoch": 5.363160755537437, "grad_norm": 0.9130941033363342, "learning_rate": 0.00032960490555782035, "loss": 3.5112, "step": 78935 }, { "epoch": 5.363500475608099, "grad_norm": 1.0524455308914185, "learning_rate": 0.00032956244054898763, "loss": 3.4349, "step": 78940 }, { "epoch": 5.363840195678761, "grad_norm": 1.1161905527114868, "learning_rate": 0.0003295199755401549, "loss": 3.4375, "step": 78945 }, { "epoch": 5.364179915749422, "grad_norm": 0.8131364583969116, "learning_rate": 0.0003294775105313222, "loss": 3.5253, "step": 78950 }, { "epoch": 5.364519635820084, "grad_norm": 1.1910043954849243, "learning_rate": 0.00032943504552248947, "loss": 3.5102, "step": 78955 }, { "epoch": 5.364859355890746, "grad_norm": 0.8790878057479858, "learning_rate": 0.0003293925805136568, "loss": 3.2448, "step": 78960 }, { "epoch": 5.365199075961407, "grad_norm": 0.9880867600440979, "learning_rate": 0.00032935011550482403, "loss": 3.3322, "step": 78965 }, { "epoch": 5.3655387960320695, "grad_norm": 0.8204182982444763, "learning_rate": 0.0003293076504959913, "loss": 3.4402, "step": 78970 }, { "epoch": 5.365878516102732, "grad_norm": 0.737518846988678, "learning_rate": 0.0003292651854871586, "loss": 3.7246, "step": 78975 }, { "epoch": 5.366218236173393, "grad_norm": 0.8896304368972778, "learning_rate": 0.00032922272047832587, "loss": 3.3018, "step": 78980 }, { "epoch": 5.366557956244055, "grad_norm": 0.8662015199661255, "learning_rate": 0.00032918025546949315, "loss": 3.388, "step": 78985 }, { "epoch": 5.366897676314717, "grad_norm": 0.885512113571167, "learning_rate": 0.00032913779046066043, "loss": 3.2371, "step": 78990 }, { "epoch": 5.367237396385378, "grad_norm": 0.8255910873413086, "learning_rate": 0.0003290953254518277, "loss": 3.4129, "step": 78995 }, { "epoch": 5.36757711645604, "grad_norm": 0.9838322997093201, "learning_rate": 0.000329052860442995, "loss": 3.3574, "step": 79000 }, { "epoch": 5.367916836526702, "grad_norm": 1.1750190258026123, "learning_rate": 0.00032901039543416227, "loss": 3.4185, "step": 79005 }, { "epoch": 5.3682565565973634, "grad_norm": 0.8219663500785828, "learning_rate": 0.0003289679304253295, "loss": 3.6836, "step": 79010 }, { "epoch": 5.3685962766680255, "grad_norm": 1.0848491191864014, "learning_rate": 0.00032892546541649683, "loss": 3.393, "step": 79015 }, { "epoch": 5.368935996738688, "grad_norm": 1.1454834938049316, "learning_rate": 0.0003288830004076641, "loss": 3.5149, "step": 79020 }, { "epoch": 5.369275716809349, "grad_norm": 0.9534928202629089, "learning_rate": 0.00032884053539883134, "loss": 3.33, "step": 79025 }, { "epoch": 5.369615436880011, "grad_norm": 0.9042555689811707, "learning_rate": 0.00032879807038999867, "loss": 3.549, "step": 79030 }, { "epoch": 5.369955156950673, "grad_norm": 0.785645067691803, "learning_rate": 0.00032875560538116595, "loss": 3.5386, "step": 79035 }, { "epoch": 5.370294877021334, "grad_norm": 0.9614004492759705, "learning_rate": 0.0003287131403723332, "loss": 3.5598, "step": 79040 }, { "epoch": 5.370634597091996, "grad_norm": 0.9631426334381104, "learning_rate": 0.00032867067536350046, "loss": 3.3806, "step": 79045 }, { "epoch": 5.370974317162658, "grad_norm": 1.1190179586410522, "learning_rate": 0.0003286282103546678, "loss": 3.3421, "step": 79050 }, { "epoch": 5.3713140372333195, "grad_norm": 1.0577495098114014, "learning_rate": 0.000328585745345835, "loss": 3.4473, "step": 79055 }, { "epoch": 5.3716537573039815, "grad_norm": 0.8098303079605103, "learning_rate": 0.0003285432803370023, "loss": 3.4139, "step": 79060 }, { "epoch": 5.371993477374644, "grad_norm": 0.9690068364143372, "learning_rate": 0.00032850081532816963, "loss": 3.5535, "step": 79065 }, { "epoch": 5.372333197445305, "grad_norm": 0.9729496836662292, "learning_rate": 0.00032845835031933686, "loss": 3.3113, "step": 79070 }, { "epoch": 5.372672917515967, "grad_norm": 0.6891639828681946, "learning_rate": 0.00032841588531050414, "loss": 3.3658, "step": 79075 }, { "epoch": 5.373012637586629, "grad_norm": 0.7098758220672607, "learning_rate": 0.0003283734203016714, "loss": 3.3595, "step": 79080 }, { "epoch": 5.37335235765729, "grad_norm": 0.8433969020843506, "learning_rate": 0.0003283309552928387, "loss": 3.3943, "step": 79085 }, { "epoch": 5.373692077727952, "grad_norm": 1.0230627059936523, "learning_rate": 0.000328288490284006, "loss": 3.3344, "step": 79090 }, { "epoch": 5.374031797798614, "grad_norm": 1.0803172588348389, "learning_rate": 0.00032824602527517326, "loss": 3.2611, "step": 79095 }, { "epoch": 5.3743715178692755, "grad_norm": 0.8001290559768677, "learning_rate": 0.00032820356026634054, "loss": 3.2865, "step": 79100 }, { "epoch": 5.3747112379399375, "grad_norm": 0.928577184677124, "learning_rate": 0.0003281610952575078, "loss": 3.4242, "step": 79105 }, { "epoch": 5.3750509580106, "grad_norm": 0.8327693343162537, "learning_rate": 0.0003281186302486751, "loss": 3.3438, "step": 79110 }, { "epoch": 5.375390678081261, "grad_norm": 0.9437814950942993, "learning_rate": 0.0003280761652398423, "loss": 3.2908, "step": 79115 }, { "epoch": 5.375730398151923, "grad_norm": 1.0406646728515625, "learning_rate": 0.00032803370023100966, "loss": 3.4995, "step": 79120 }, { "epoch": 5.376070118222585, "grad_norm": 0.8902111053466797, "learning_rate": 0.00032799123522217694, "loss": 3.364, "step": 79125 }, { "epoch": 5.376409838293246, "grad_norm": 0.9598656296730042, "learning_rate": 0.0003279487702133442, "loss": 3.5307, "step": 79130 }, { "epoch": 5.376749558363908, "grad_norm": 1.0133708715438843, "learning_rate": 0.0003279063052045115, "loss": 3.3971, "step": 79135 }, { "epoch": 5.37708927843457, "grad_norm": 0.9632912874221802, "learning_rate": 0.0003278638401956788, "loss": 3.4648, "step": 79140 }, { "epoch": 5.3774289985052315, "grad_norm": 1.2473727464675903, "learning_rate": 0.00032782137518684606, "loss": 3.6466, "step": 79145 }, { "epoch": 5.377768718575894, "grad_norm": 0.8775381445884705, "learning_rate": 0.0003277789101780133, "loss": 3.4368, "step": 79150 }, { "epoch": 5.378108438646556, "grad_norm": 0.9144920706748962, "learning_rate": 0.0003277364451691806, "loss": 3.3794, "step": 79155 }, { "epoch": 5.378448158717217, "grad_norm": 0.75979083776474, "learning_rate": 0.0003276939801603479, "loss": 3.4923, "step": 79160 }, { "epoch": 5.378787878787879, "grad_norm": 1.0801957845687866, "learning_rate": 0.0003276515151515151, "loss": 3.3828, "step": 79165 }, { "epoch": 5.379127598858541, "grad_norm": 0.7748429775238037, "learning_rate": 0.00032760905014268246, "loss": 3.6666, "step": 79170 }, { "epoch": 5.379467318929202, "grad_norm": 1.0417946577072144, "learning_rate": 0.00032756658513384974, "loss": 3.257, "step": 79175 }, { "epoch": 5.379807038999864, "grad_norm": 0.9298911690711975, "learning_rate": 0.00032752412012501697, "loss": 3.6153, "step": 79180 }, { "epoch": 5.380146759070525, "grad_norm": 0.8416727185249329, "learning_rate": 0.0003274816551161843, "loss": 3.4407, "step": 79185 }, { "epoch": 5.3804864791411875, "grad_norm": 0.7464411854743958, "learning_rate": 0.0003274391901073516, "loss": 3.5058, "step": 79190 }, { "epoch": 5.38082619921185, "grad_norm": 0.7843184471130371, "learning_rate": 0.0003273967250985188, "loss": 3.5201, "step": 79195 }, { "epoch": 5.381165919282511, "grad_norm": 1.0229194164276123, "learning_rate": 0.0003273542600896861, "loss": 3.5614, "step": 79200 }, { "epoch": 5.381505639353173, "grad_norm": 0.7081983685493469, "learning_rate": 0.0003273117950808534, "loss": 3.3381, "step": 79205 }, { "epoch": 5.381845359423835, "grad_norm": 0.890373170375824, "learning_rate": 0.00032726933007202065, "loss": 3.4309, "step": 79210 }, { "epoch": 5.382185079494496, "grad_norm": 1.044782280921936, "learning_rate": 0.0003272268650631879, "loss": 3.3992, "step": 79215 }, { "epoch": 5.382524799565158, "grad_norm": 0.7827876806259155, "learning_rate": 0.00032718440005435526, "loss": 3.4095, "step": 79220 }, { "epoch": 5.38286451963582, "grad_norm": 0.838117778301239, "learning_rate": 0.0003271419350455225, "loss": 3.3811, "step": 79225 }, { "epoch": 5.383204239706481, "grad_norm": 1.0035732984542847, "learning_rate": 0.00032709947003668977, "loss": 3.631, "step": 79230 }, { "epoch": 5.3835439597771435, "grad_norm": 1.2847179174423218, "learning_rate": 0.00032705700502785705, "loss": 3.6543, "step": 79235 }, { "epoch": 5.383883679847806, "grad_norm": 0.85878986120224, "learning_rate": 0.0003270145400190243, "loss": 3.6099, "step": 79240 }, { "epoch": 5.384223399918467, "grad_norm": 0.8680986762046814, "learning_rate": 0.0003269720750101916, "loss": 3.3341, "step": 79245 }, { "epoch": 5.384563119989129, "grad_norm": 0.9104351997375488, "learning_rate": 0.0003269296100013589, "loss": 3.2486, "step": 79250 }, { "epoch": 5.384902840059791, "grad_norm": 1.06619131565094, "learning_rate": 0.00032688714499252617, "loss": 3.2033, "step": 79255 }, { "epoch": 5.385242560130452, "grad_norm": 0.8084930777549744, "learning_rate": 0.00032684467998369345, "loss": 3.4819, "step": 79260 }, { "epoch": 5.385582280201114, "grad_norm": 1.017953872680664, "learning_rate": 0.0003268022149748607, "loss": 3.4906, "step": 79265 }, { "epoch": 5.385922000271776, "grad_norm": 0.8811764717102051, "learning_rate": 0.00032675974996602795, "loss": 3.3632, "step": 79270 }, { "epoch": 5.386261720342437, "grad_norm": 0.8606991767883301, "learning_rate": 0.0003267172849571953, "loss": 3.4615, "step": 79275 }, { "epoch": 5.3866014404130995, "grad_norm": 0.7611544132232666, "learning_rate": 0.00032667481994836257, "loss": 3.4547, "step": 79280 }, { "epoch": 5.386941160483762, "grad_norm": 0.7933346033096313, "learning_rate": 0.0003266323549395298, "loss": 3.2346, "step": 79285 }, { "epoch": 5.387280880554423, "grad_norm": 0.7401696443557739, "learning_rate": 0.00032658988993069713, "loss": 3.3983, "step": 79290 }, { "epoch": 5.387620600625085, "grad_norm": 0.7758470177650452, "learning_rate": 0.0003265474249218644, "loss": 3.4201, "step": 79295 }, { "epoch": 5.387960320695747, "grad_norm": 0.8723129630088806, "learning_rate": 0.0003265049599130317, "loss": 3.225, "step": 79300 }, { "epoch": 5.388300040766408, "grad_norm": 1.0921236276626587, "learning_rate": 0.0003264624949041989, "loss": 3.3489, "step": 79305 }, { "epoch": 5.38863976083707, "grad_norm": 0.6928749084472656, "learning_rate": 0.00032642002989536625, "loss": 3.4743, "step": 79310 }, { "epoch": 5.388979480907732, "grad_norm": 0.935758113861084, "learning_rate": 0.00032637756488653353, "loss": 3.3816, "step": 79315 }, { "epoch": 5.3893192009783935, "grad_norm": 0.9053452014923096, "learning_rate": 0.00032633509987770075, "loss": 3.4927, "step": 79320 }, { "epoch": 5.3896589210490555, "grad_norm": 0.9204387068748474, "learning_rate": 0.0003262926348688681, "loss": 3.6408, "step": 79325 }, { "epoch": 5.389998641119718, "grad_norm": 0.8386547565460205, "learning_rate": 0.00032625016986003537, "loss": 3.544, "step": 79330 }, { "epoch": 5.390338361190379, "grad_norm": 1.1313090324401855, "learning_rate": 0.0003262077048512026, "loss": 3.3465, "step": 79335 }, { "epoch": 5.390678081261041, "grad_norm": 0.6973360180854797, "learning_rate": 0.0003261652398423699, "loss": 3.5456, "step": 79340 }, { "epoch": 5.391017801331703, "grad_norm": 1.0095964670181274, "learning_rate": 0.0003261227748335372, "loss": 3.3621, "step": 79345 }, { "epoch": 5.391357521402364, "grad_norm": 1.1098952293395996, "learning_rate": 0.00032608030982470443, "loss": 3.5496, "step": 79350 }, { "epoch": 5.391697241473026, "grad_norm": 0.9603663086891174, "learning_rate": 0.0003260378448158717, "loss": 3.3416, "step": 79355 }, { "epoch": 5.392036961543688, "grad_norm": 0.8548449873924255, "learning_rate": 0.00032599537980703905, "loss": 3.3143, "step": 79360 }, { "epoch": 5.3923766816143495, "grad_norm": 0.987759530544281, "learning_rate": 0.0003259529147982063, "loss": 3.3978, "step": 79365 }, { "epoch": 5.3927164016850115, "grad_norm": 0.9195972084999084, "learning_rate": 0.00032591044978937355, "loss": 3.1941, "step": 79370 }, { "epoch": 5.393056121755674, "grad_norm": 0.8617194294929504, "learning_rate": 0.00032586798478054083, "loss": 3.438, "step": 79375 }, { "epoch": 5.393395841826335, "grad_norm": 0.8858255743980408, "learning_rate": 0.0003258255197717081, "loss": 3.177, "step": 79380 }, { "epoch": 5.393735561896997, "grad_norm": 0.987254798412323, "learning_rate": 0.0003257830547628754, "loss": 3.4465, "step": 79385 }, { "epoch": 5.394075281967659, "grad_norm": 0.929149329662323, "learning_rate": 0.0003257405897540427, "loss": 3.574, "step": 79390 }, { "epoch": 5.39441500203832, "grad_norm": 1.1347044706344604, "learning_rate": 0.00032569812474520995, "loss": 3.3371, "step": 79395 }, { "epoch": 5.394754722108982, "grad_norm": 1.0154848098754883, "learning_rate": 0.00032565565973637723, "loss": 3.3966, "step": 79400 }, { "epoch": 5.395094442179644, "grad_norm": 0.9472141265869141, "learning_rate": 0.0003256131947275445, "loss": 3.2876, "step": 79405 }, { "epoch": 5.3954341622503055, "grad_norm": 1.385036587715149, "learning_rate": 0.00032557072971871174, "loss": 3.6713, "step": 79410 }, { "epoch": 5.3957738823209676, "grad_norm": 1.0803108215332031, "learning_rate": 0.0003255282647098791, "loss": 3.5299, "step": 79415 }, { "epoch": 5.39611360239163, "grad_norm": 0.6747039556503296, "learning_rate": 0.00032548579970104636, "loss": 3.4854, "step": 79420 }, { "epoch": 5.396453322462291, "grad_norm": 0.8044858574867249, "learning_rate": 0.0003254433346922136, "loss": 3.4284, "step": 79425 }, { "epoch": 5.396793042532953, "grad_norm": 0.927371084690094, "learning_rate": 0.0003254008696833809, "loss": 3.4305, "step": 79430 }, { "epoch": 5.397132762603615, "grad_norm": 1.0387723445892334, "learning_rate": 0.0003253584046745482, "loss": 3.4859, "step": 79435 }, { "epoch": 5.397472482674276, "grad_norm": 0.6959279775619507, "learning_rate": 0.0003253159396657154, "loss": 3.7497, "step": 79440 }, { "epoch": 5.397812202744938, "grad_norm": 0.8004673719406128, "learning_rate": 0.00032527347465688276, "loss": 3.4356, "step": 79445 }, { "epoch": 5.3981519228156, "grad_norm": 0.9559466242790222, "learning_rate": 0.00032523100964805004, "loss": 3.2928, "step": 79450 }, { "epoch": 5.3984916428862615, "grad_norm": 1.3047997951507568, "learning_rate": 0.00032518854463921726, "loss": 3.2568, "step": 79455 }, { "epoch": 5.398831362956924, "grad_norm": 0.8160944581031799, "learning_rate": 0.00032514607963038454, "loss": 3.6309, "step": 79460 }, { "epoch": 5.399171083027586, "grad_norm": 1.0529931783676147, "learning_rate": 0.0003251036146215519, "loss": 3.0671, "step": 79465 }, { "epoch": 5.399510803098247, "grad_norm": 0.8836321234703064, "learning_rate": 0.00032506114961271916, "loss": 3.439, "step": 79470 }, { "epoch": 5.399850523168909, "grad_norm": 1.0307550430297852, "learning_rate": 0.0003250186846038864, "loss": 3.5278, "step": 79475 }, { "epoch": 5.400190243239571, "grad_norm": 0.7754269242286682, "learning_rate": 0.0003249762195950537, "loss": 3.4648, "step": 79480 }, { "epoch": 5.400529963310232, "grad_norm": 0.8441033959388733, "learning_rate": 0.000324933754586221, "loss": 3.1308, "step": 79485 }, { "epoch": 5.400869683380894, "grad_norm": 1.055978775024414, "learning_rate": 0.0003248912895773882, "loss": 3.582, "step": 79490 }, { "epoch": 5.401209403451556, "grad_norm": 0.8787696957588196, "learning_rate": 0.0003248488245685555, "loss": 3.55, "step": 79495 }, { "epoch": 5.4015491235222175, "grad_norm": 0.9810678958892822, "learning_rate": 0.00032480635955972284, "loss": 3.4666, "step": 79500 }, { "epoch": 5.40188884359288, "grad_norm": 1.0133280754089355, "learning_rate": 0.00032476389455089006, "loss": 3.3455, "step": 79505 }, { "epoch": 5.402228563663542, "grad_norm": 0.8044341206550598, "learning_rate": 0.00032472142954205734, "loss": 3.2825, "step": 79510 }, { "epoch": 5.402568283734203, "grad_norm": 1.0735983848571777, "learning_rate": 0.0003246789645332247, "loss": 3.4489, "step": 79515 }, { "epoch": 5.402908003804865, "grad_norm": 0.9021583199501038, "learning_rate": 0.0003246364995243919, "loss": 3.4252, "step": 79520 }, { "epoch": 5.403247723875527, "grad_norm": 0.9457162022590637, "learning_rate": 0.0003245940345155592, "loss": 3.5134, "step": 79525 }, { "epoch": 5.403587443946188, "grad_norm": 1.0457149744033813, "learning_rate": 0.00032455156950672646, "loss": 3.5163, "step": 79530 }, { "epoch": 5.40392716401685, "grad_norm": 0.9417846202850342, "learning_rate": 0.00032450910449789374, "loss": 3.2179, "step": 79535 }, { "epoch": 5.404266884087512, "grad_norm": 0.9906789660453796, "learning_rate": 0.000324466639489061, "loss": 3.4197, "step": 79540 }, { "epoch": 5.4046066041581735, "grad_norm": 0.8001586198806763, "learning_rate": 0.0003244241744802283, "loss": 3.4652, "step": 79545 }, { "epoch": 5.404946324228836, "grad_norm": 0.8556780219078064, "learning_rate": 0.0003243817094713956, "loss": 3.5436, "step": 79550 }, { "epoch": 5.405286044299498, "grad_norm": 0.8944543600082397, "learning_rate": 0.00032433924446256286, "loss": 3.6504, "step": 79555 }, { "epoch": 5.405625764370159, "grad_norm": 0.8708217740058899, "learning_rate": 0.00032429677945373014, "loss": 3.5562, "step": 79560 }, { "epoch": 5.405965484440821, "grad_norm": 0.9657374620437622, "learning_rate": 0.00032425431444489737, "loss": 3.3559, "step": 79565 }, { "epoch": 5.406305204511483, "grad_norm": 0.8089213371276855, "learning_rate": 0.0003242118494360647, "loss": 3.5497, "step": 79570 }, { "epoch": 5.406644924582144, "grad_norm": 1.0684189796447754, "learning_rate": 0.000324169384427232, "loss": 3.313, "step": 79575 }, { "epoch": 5.406984644652806, "grad_norm": 1.1513299942016602, "learning_rate": 0.0003241269194183992, "loss": 3.5311, "step": 79580 }, { "epoch": 5.4073243647234674, "grad_norm": 0.8903722763061523, "learning_rate": 0.00032408445440956654, "loss": 3.2516, "step": 79585 }, { "epoch": 5.4076640847941295, "grad_norm": 0.8179778456687927, "learning_rate": 0.0003240419894007338, "loss": 3.3555, "step": 79590 }, { "epoch": 5.408003804864792, "grad_norm": 1.0604883432388306, "learning_rate": 0.00032399952439190105, "loss": 3.1671, "step": 79595 }, { "epoch": 5.408343524935453, "grad_norm": 0.7640420794487, "learning_rate": 0.00032395705938306833, "loss": 3.8101, "step": 79600 }, { "epoch": 5.408683245006115, "grad_norm": 0.7050489783287048, "learning_rate": 0.00032391459437423566, "loss": 3.3212, "step": 79605 }, { "epoch": 5.409022965076777, "grad_norm": 0.916081964969635, "learning_rate": 0.0003238721293654029, "loss": 3.4027, "step": 79610 }, { "epoch": 5.409362685147438, "grad_norm": 0.8492266535758972, "learning_rate": 0.00032382966435657017, "loss": 3.1091, "step": 79615 }, { "epoch": 5.4097024052181, "grad_norm": 0.8164465427398682, "learning_rate": 0.0003237871993477375, "loss": 3.2132, "step": 79620 }, { "epoch": 5.410042125288762, "grad_norm": 0.8457860946655273, "learning_rate": 0.00032374473433890473, "loss": 3.6395, "step": 79625 }, { "epoch": 5.4103818453594235, "grad_norm": 1.1181838512420654, "learning_rate": 0.000323702269330072, "loss": 3.3983, "step": 79630 }, { "epoch": 5.4107215654300855, "grad_norm": 0.8906224370002747, "learning_rate": 0.0003236598043212393, "loss": 3.5768, "step": 79635 }, { "epoch": 5.411061285500748, "grad_norm": 0.8479645848274231, "learning_rate": 0.0003236173393124066, "loss": 3.2742, "step": 79640 }, { "epoch": 5.411401005571409, "grad_norm": 0.8457931876182556, "learning_rate": 0.00032357487430357385, "loss": 3.3738, "step": 79645 }, { "epoch": 5.411740725642071, "grad_norm": 0.8787240386009216, "learning_rate": 0.00032353240929474113, "loss": 3.2301, "step": 79650 }, { "epoch": 5.412080445712733, "grad_norm": 0.7399342656135559, "learning_rate": 0.00032348994428590846, "loss": 3.5646, "step": 79655 }, { "epoch": 5.412420165783394, "grad_norm": 0.9568170309066772, "learning_rate": 0.0003234474792770757, "loss": 3.3103, "step": 79660 }, { "epoch": 5.412759885854056, "grad_norm": 1.0208185911178589, "learning_rate": 0.00032340501426824297, "loss": 3.5883, "step": 79665 }, { "epoch": 5.413099605924718, "grad_norm": 1.0199062824249268, "learning_rate": 0.00032336254925941025, "loss": 3.4654, "step": 79670 }, { "epoch": 5.4134393259953795, "grad_norm": 0.8192639350891113, "learning_rate": 0.00032332008425057753, "loss": 3.5133, "step": 79675 }, { "epoch": 5.4137790460660415, "grad_norm": 1.002755880355835, "learning_rate": 0.0003232776192417448, "loss": 3.3442, "step": 79680 }, { "epoch": 5.414118766136704, "grad_norm": 0.8229334354400635, "learning_rate": 0.0003232351542329121, "loss": 3.3488, "step": 79685 }, { "epoch": 5.414458486207365, "grad_norm": 0.9102748036384583, "learning_rate": 0.00032319268922407937, "loss": 3.4106, "step": 79690 }, { "epoch": 5.414798206278027, "grad_norm": 0.8709344267845154, "learning_rate": 0.00032315022421524665, "loss": 3.4867, "step": 79695 }, { "epoch": 5.415137926348689, "grad_norm": 1.1115503311157227, "learning_rate": 0.00032310775920641393, "loss": 3.3963, "step": 79700 }, { "epoch": 5.41547764641935, "grad_norm": 0.6895166039466858, "learning_rate": 0.00032306529419758116, "loss": 3.4755, "step": 79705 }, { "epoch": 5.415817366490012, "grad_norm": 1.021589994430542, "learning_rate": 0.0003230228291887485, "loss": 3.4243, "step": 79710 }, { "epoch": 5.416157086560674, "grad_norm": 1.0303107500076294, "learning_rate": 0.00032298036417991577, "loss": 3.427, "step": 79715 }, { "epoch": 5.4164968066313355, "grad_norm": 0.9345182776451111, "learning_rate": 0.000322937899171083, "loss": 3.433, "step": 79720 }, { "epoch": 5.416836526701998, "grad_norm": 0.9277019500732422, "learning_rate": 0.00032289543416225033, "loss": 3.4604, "step": 79725 }, { "epoch": 5.41717624677266, "grad_norm": 0.8564796447753906, "learning_rate": 0.0003228529691534176, "loss": 3.6353, "step": 79730 }, { "epoch": 5.417515966843321, "grad_norm": 0.8231950998306274, "learning_rate": 0.00032281050414458484, "loss": 3.3832, "step": 79735 }, { "epoch": 5.417855686913983, "grad_norm": 1.0379101037979126, "learning_rate": 0.00032276803913575217, "loss": 3.3708, "step": 79740 }, { "epoch": 5.418195406984645, "grad_norm": 1.0698096752166748, "learning_rate": 0.00032272557412691945, "loss": 3.3811, "step": 79745 }, { "epoch": 5.418535127055306, "grad_norm": 1.1276267766952515, "learning_rate": 0.0003226831091180867, "loss": 3.4829, "step": 79750 }, { "epoch": 5.418874847125968, "grad_norm": 1.2868626117706299, "learning_rate": 0.00032264064410925396, "loss": 3.641, "step": 79755 }, { "epoch": 5.41921456719663, "grad_norm": 1.095262050628662, "learning_rate": 0.0003225981791004213, "loss": 3.1516, "step": 79760 }, { "epoch": 5.4195542872672915, "grad_norm": 0.9112256169319153, "learning_rate": 0.0003225557140915885, "loss": 3.3517, "step": 79765 }, { "epoch": 5.419894007337954, "grad_norm": 0.9246969223022461, "learning_rate": 0.0003225132490827558, "loss": 3.3469, "step": 79770 }, { "epoch": 5.420233727408616, "grad_norm": 0.829845130443573, "learning_rate": 0.00032247078407392313, "loss": 3.5544, "step": 79775 }, { "epoch": 5.420573447479277, "grad_norm": 0.8716671466827393, "learning_rate": 0.00032242831906509036, "loss": 3.6626, "step": 79780 }, { "epoch": 5.420913167549939, "grad_norm": 0.8169354796409607, "learning_rate": 0.00032238585405625764, "loss": 3.5782, "step": 79785 }, { "epoch": 5.421252887620601, "grad_norm": 1.022579312324524, "learning_rate": 0.0003223433890474249, "loss": 3.4576, "step": 79790 }, { "epoch": 5.421592607691262, "grad_norm": 0.9283916354179382, "learning_rate": 0.0003223009240385922, "loss": 3.2583, "step": 79795 }, { "epoch": 5.421932327761924, "grad_norm": 0.9453358054161072, "learning_rate": 0.0003222584590297595, "loss": 3.2979, "step": 79800 }, { "epoch": 5.422272047832586, "grad_norm": 0.8371495604515076, "learning_rate": 0.00032221599402092676, "loss": 3.2128, "step": 79805 }, { "epoch": 5.4226117679032475, "grad_norm": 1.037532091140747, "learning_rate": 0.0003221735290120941, "loss": 3.3836, "step": 79810 }, { "epoch": 5.42295148797391, "grad_norm": 1.3357410430908203, "learning_rate": 0.0003221310640032613, "loss": 3.6313, "step": 79815 }, { "epoch": 5.423291208044572, "grad_norm": 0.8828228116035461, "learning_rate": 0.0003220885989944286, "loss": 3.4883, "step": 79820 }, { "epoch": 5.423630928115233, "grad_norm": 0.9556611776351929, "learning_rate": 0.0003220461339855959, "loss": 3.5646, "step": 79825 }, { "epoch": 5.423970648185895, "grad_norm": 0.9585297703742981, "learning_rate": 0.00032200366897676316, "loss": 3.5546, "step": 79830 }, { "epoch": 5.424310368256557, "grad_norm": 0.809064507484436, "learning_rate": 0.00032196120396793044, "loss": 3.6639, "step": 79835 }, { "epoch": 5.424650088327218, "grad_norm": 0.916098415851593, "learning_rate": 0.0003219187389590977, "loss": 3.5239, "step": 79840 }, { "epoch": 5.42498980839788, "grad_norm": 0.8454807996749878, "learning_rate": 0.000321876273950265, "loss": 3.5873, "step": 79845 }, { "epoch": 5.425329528468542, "grad_norm": 0.7013819217681885, "learning_rate": 0.0003218338089414323, "loss": 3.4357, "step": 79850 }, { "epoch": 5.4256692485392035, "grad_norm": 0.8472039699554443, "learning_rate": 0.00032179134393259956, "loss": 3.5815, "step": 79855 }, { "epoch": 5.426008968609866, "grad_norm": 1.1921658515930176, "learning_rate": 0.0003217488789237668, "loss": 3.3115, "step": 79860 }, { "epoch": 5.426348688680527, "grad_norm": 0.9890744090080261, "learning_rate": 0.0003217064139149341, "loss": 3.5892, "step": 79865 }, { "epoch": 5.426688408751189, "grad_norm": 0.8726601600646973, "learning_rate": 0.0003216639489061014, "loss": 3.1432, "step": 79870 }, { "epoch": 5.427028128821851, "grad_norm": 1.2211672067642212, "learning_rate": 0.0003216214838972686, "loss": 3.3863, "step": 79875 }, { "epoch": 5.427367848892512, "grad_norm": 0.6898414492607117, "learning_rate": 0.00032157901888843596, "loss": 3.2701, "step": 79880 }, { "epoch": 5.427707568963174, "grad_norm": 0.7372936010360718, "learning_rate": 0.00032153655387960324, "loss": 3.1812, "step": 79885 }, { "epoch": 5.428047289033836, "grad_norm": 0.896615743637085, "learning_rate": 0.00032149408887077047, "loss": 3.5661, "step": 79890 }, { "epoch": 5.4283870091044975, "grad_norm": 0.754237711429596, "learning_rate": 0.00032145162386193775, "loss": 3.6133, "step": 79895 }, { "epoch": 5.4287267291751595, "grad_norm": 1.1617298126220703, "learning_rate": 0.0003214091588531051, "loss": 3.5067, "step": 79900 }, { "epoch": 5.429066449245822, "grad_norm": 0.8100389242172241, "learning_rate": 0.0003213666938442723, "loss": 3.4381, "step": 79905 }, { "epoch": 5.429406169316483, "grad_norm": 0.9084051251411438, "learning_rate": 0.0003213242288354396, "loss": 3.586, "step": 79910 }, { "epoch": 5.429745889387145, "grad_norm": 0.9597190022468567, "learning_rate": 0.0003212817638266069, "loss": 3.5202, "step": 79915 }, { "epoch": 5.430085609457807, "grad_norm": 1.2846983671188354, "learning_rate": 0.00032123929881777415, "loss": 2.9802, "step": 79920 }, { "epoch": 5.430425329528468, "grad_norm": 0.6910883784294128, "learning_rate": 0.0003211968338089414, "loss": 3.4769, "step": 79925 }, { "epoch": 5.43076504959913, "grad_norm": 0.876245379447937, "learning_rate": 0.0003211543688001087, "loss": 3.2965, "step": 79930 }, { "epoch": 5.431104769669792, "grad_norm": 0.9904869794845581, "learning_rate": 0.000321111903791276, "loss": 3.0782, "step": 79935 }, { "epoch": 5.4314444897404535, "grad_norm": 0.8907330632209778, "learning_rate": 0.00032106943878244327, "loss": 3.4998, "step": 79940 }, { "epoch": 5.4317842098111155, "grad_norm": 0.8442257642745972, "learning_rate": 0.00032102697377361055, "loss": 3.506, "step": 79945 }, { "epoch": 5.432123929881778, "grad_norm": 0.759250819683075, "learning_rate": 0.0003209845087647778, "loss": 3.2729, "step": 79950 }, { "epoch": 5.432463649952439, "grad_norm": 0.9117350578308105, "learning_rate": 0.0003209420437559451, "loss": 3.3955, "step": 79955 }, { "epoch": 5.432803370023101, "grad_norm": 1.2261059284210205, "learning_rate": 0.0003208995787471124, "loss": 3.4951, "step": 79960 }, { "epoch": 5.433143090093763, "grad_norm": 1.1697813272476196, "learning_rate": 0.0003208571137382796, "loss": 3.489, "step": 79965 }, { "epoch": 5.433482810164424, "grad_norm": 0.8588851690292358, "learning_rate": 0.00032081464872944695, "loss": 3.5286, "step": 79970 }, { "epoch": 5.433822530235086, "grad_norm": 0.8793575167655945, "learning_rate": 0.0003207721837206142, "loss": 3.339, "step": 79975 }, { "epoch": 5.434162250305748, "grad_norm": 0.9356627464294434, "learning_rate": 0.0003207297187117815, "loss": 3.2649, "step": 79980 }, { "epoch": 5.4345019703764095, "grad_norm": 0.9652442336082458, "learning_rate": 0.0003206872537029488, "loss": 3.3139, "step": 79985 }, { "epoch": 5.4348416904470715, "grad_norm": 0.8223676681518555, "learning_rate": 0.00032064478869411607, "loss": 3.6272, "step": 79990 }, { "epoch": 5.435181410517734, "grad_norm": 0.7432926893234253, "learning_rate": 0.00032060232368528335, "loss": 3.303, "step": 79995 }, { "epoch": 5.435521130588395, "grad_norm": 0.8935067057609558, "learning_rate": 0.0003205598586764506, "loss": 3.4762, "step": 80000 }, { "epoch": 5.435860850659057, "grad_norm": 0.8305067420005798, "learning_rate": 0.0003205173936676179, "loss": 3.5124, "step": 80005 }, { "epoch": 5.436200570729719, "grad_norm": 0.9069252610206604, "learning_rate": 0.0003204749286587852, "loss": 3.4791, "step": 80010 }, { "epoch": 5.43654029080038, "grad_norm": 0.8048125505447388, "learning_rate": 0.0003204324636499524, "loss": 3.4871, "step": 80015 }, { "epoch": 5.436880010871042, "grad_norm": 0.9554992914199829, "learning_rate": 0.00032038999864111975, "loss": 3.6489, "step": 80020 }, { "epoch": 5.437219730941704, "grad_norm": 0.8812788128852844, "learning_rate": 0.00032034753363228703, "loss": 3.2659, "step": 80025 }, { "epoch": 5.4375594510123655, "grad_norm": 1.128759503364563, "learning_rate": 0.00032030506862345425, "loss": 3.7024, "step": 80030 }, { "epoch": 5.437899171083028, "grad_norm": 1.1227883100509644, "learning_rate": 0.0003202626036146216, "loss": 3.557, "step": 80035 }, { "epoch": 5.43823889115369, "grad_norm": 0.7424842715263367, "learning_rate": 0.00032022013860578887, "loss": 3.4154, "step": 80040 }, { "epoch": 5.438578611224351, "grad_norm": 0.7760342359542847, "learning_rate": 0.0003201776735969561, "loss": 3.3055, "step": 80045 }, { "epoch": 5.438918331295013, "grad_norm": 0.9621506333351135, "learning_rate": 0.0003201352085881234, "loss": 3.5439, "step": 80050 }, { "epoch": 5.439258051365675, "grad_norm": 0.9321199655532837, "learning_rate": 0.0003200927435792907, "loss": 3.5214, "step": 80055 }, { "epoch": 5.439597771436336, "grad_norm": 0.7454168796539307, "learning_rate": 0.00032005027857045793, "loss": 3.3582, "step": 80060 }, { "epoch": 5.439937491506998, "grad_norm": 1.0646920204162598, "learning_rate": 0.0003200078135616252, "loss": 3.2409, "step": 80065 }, { "epoch": 5.44027721157766, "grad_norm": 0.9061108231544495, "learning_rate": 0.00031996534855279255, "loss": 3.2801, "step": 80070 }, { "epoch": 5.4406169316483215, "grad_norm": 0.8825212121009827, "learning_rate": 0.0003199228835439598, "loss": 3.2948, "step": 80075 }, { "epoch": 5.440956651718984, "grad_norm": 0.9839466214179993, "learning_rate": 0.00031988041853512705, "loss": 3.5077, "step": 80080 }, { "epoch": 5.441296371789646, "grad_norm": 0.7808550000190735, "learning_rate": 0.00031983795352629433, "loss": 3.6127, "step": 80085 }, { "epoch": 5.441636091860307, "grad_norm": 0.8348985910415649, "learning_rate": 0.0003197954885174616, "loss": 3.1995, "step": 80090 }, { "epoch": 5.441975811930969, "grad_norm": 1.035544514656067, "learning_rate": 0.0003197530235086289, "loss": 3.4584, "step": 80095 }, { "epoch": 5.442315532001631, "grad_norm": 0.9968258142471313, "learning_rate": 0.0003197105584997962, "loss": 3.4169, "step": 80100 }, { "epoch": 5.442655252072292, "grad_norm": 0.7317793965339661, "learning_rate": 0.00031966809349096345, "loss": 3.415, "step": 80105 }, { "epoch": 5.442994972142954, "grad_norm": 1.0015166997909546, "learning_rate": 0.00031962562848213073, "loss": 3.4404, "step": 80110 }, { "epoch": 5.443334692213616, "grad_norm": 0.7679945230484009, "learning_rate": 0.000319583163473298, "loss": 3.5983, "step": 80115 }, { "epoch": 5.4436744122842775, "grad_norm": 0.966122031211853, "learning_rate": 0.00031954069846446524, "loss": 3.4565, "step": 80120 }, { "epoch": 5.44401413235494, "grad_norm": 0.8979265689849854, "learning_rate": 0.0003194982334556326, "loss": 3.2175, "step": 80125 }, { "epoch": 5.444353852425602, "grad_norm": 0.7221465110778809, "learning_rate": 0.00031945576844679986, "loss": 3.6694, "step": 80130 }, { "epoch": 5.444693572496263, "grad_norm": 0.9527706503868103, "learning_rate": 0.0003194133034379671, "loss": 3.4806, "step": 80135 }, { "epoch": 5.445033292566925, "grad_norm": 1.1826707124710083, "learning_rate": 0.0003193708384291344, "loss": 3.7939, "step": 80140 }, { "epoch": 5.445373012637587, "grad_norm": 1.6172720193862915, "learning_rate": 0.0003193283734203017, "loss": 3.6342, "step": 80145 }, { "epoch": 5.445712732708248, "grad_norm": 0.8139119744300842, "learning_rate": 0.000319285908411469, "loss": 3.4507, "step": 80150 }, { "epoch": 5.44605245277891, "grad_norm": 0.8033959269523621, "learning_rate": 0.0003192434434026362, "loss": 3.3415, "step": 80155 }, { "epoch": 5.446392172849572, "grad_norm": 0.8399551510810852, "learning_rate": 0.00031920097839380354, "loss": 3.3689, "step": 80160 }, { "epoch": 5.4467318929202335, "grad_norm": 0.6771608591079712, "learning_rate": 0.0003191585133849708, "loss": 3.648, "step": 80165 }, { "epoch": 5.447071612990896, "grad_norm": 0.9032678008079529, "learning_rate": 0.00031911604837613804, "loss": 3.5641, "step": 80170 }, { "epoch": 5.447411333061558, "grad_norm": 1.138333797454834, "learning_rate": 0.0003190735833673054, "loss": 3.5364, "step": 80175 }, { "epoch": 5.447751053132219, "grad_norm": 1.1148674488067627, "learning_rate": 0.00031903111835847266, "loss": 3.3039, "step": 80180 }, { "epoch": 5.448090773202881, "grad_norm": 0.8793702721595764, "learning_rate": 0.0003189886533496399, "loss": 3.566, "step": 80185 }, { "epoch": 5.448430493273543, "grad_norm": 0.7411450743675232, "learning_rate": 0.00031894618834080716, "loss": 3.5402, "step": 80190 }, { "epoch": 5.448770213344204, "grad_norm": 0.9839465618133545, "learning_rate": 0.0003189037233319745, "loss": 3.4557, "step": 80195 }, { "epoch": 5.449109933414866, "grad_norm": 0.9993599653244019, "learning_rate": 0.0003188612583231417, "loss": 3.4291, "step": 80200 }, { "epoch": 5.449449653485528, "grad_norm": 0.860223114490509, "learning_rate": 0.000318818793314309, "loss": 3.413, "step": 80205 }, { "epoch": 5.4497893735561895, "grad_norm": 1.0983389616012573, "learning_rate": 0.00031877632830547634, "loss": 3.2946, "step": 80210 }, { "epoch": 5.450129093626852, "grad_norm": 0.9472988247871399, "learning_rate": 0.00031873386329664356, "loss": 3.7485, "step": 80215 }, { "epoch": 5.450468813697514, "grad_norm": 0.8879375457763672, "learning_rate": 0.00031869139828781084, "loss": 3.3158, "step": 80220 }, { "epoch": 5.450808533768175, "grad_norm": 1.0611835718154907, "learning_rate": 0.0003186489332789781, "loss": 3.3014, "step": 80225 }, { "epoch": 5.451148253838837, "grad_norm": 1.0114688873291016, "learning_rate": 0.0003186064682701454, "loss": 3.4739, "step": 80230 }, { "epoch": 5.451487973909499, "grad_norm": 0.8104035258293152, "learning_rate": 0.0003185640032613127, "loss": 3.5576, "step": 80235 }, { "epoch": 5.45182769398016, "grad_norm": 0.7817025780677795, "learning_rate": 0.00031852153825247996, "loss": 3.3327, "step": 80240 }, { "epoch": 5.452167414050822, "grad_norm": 0.8456599116325378, "learning_rate": 0.00031847907324364724, "loss": 3.3876, "step": 80245 }, { "epoch": 5.452507134121484, "grad_norm": 0.9094346761703491, "learning_rate": 0.0003184366082348145, "loss": 3.2931, "step": 80250 }, { "epoch": 5.4528468541921455, "grad_norm": 0.7786800861358643, "learning_rate": 0.0003183941432259818, "loss": 3.7141, "step": 80255 }, { "epoch": 5.453186574262808, "grad_norm": 1.046402096748352, "learning_rate": 0.00031835167821714903, "loss": 3.4595, "step": 80260 }, { "epoch": 5.45352629433347, "grad_norm": 1.0273611545562744, "learning_rate": 0.00031830921320831636, "loss": 3.4208, "step": 80265 }, { "epoch": 5.453866014404131, "grad_norm": 0.9352371692657471, "learning_rate": 0.00031826674819948364, "loss": 3.4424, "step": 80270 }, { "epoch": 5.454205734474793, "grad_norm": 0.9333164691925049, "learning_rate": 0.00031822428319065087, "loss": 3.3087, "step": 80275 }, { "epoch": 5.454545454545454, "grad_norm": 1.1025381088256836, "learning_rate": 0.0003181818181818182, "loss": 3.3989, "step": 80280 }, { "epoch": 5.454885174616116, "grad_norm": 1.0340526103973389, "learning_rate": 0.0003181393531729855, "loss": 3.4063, "step": 80285 }, { "epoch": 5.455224894686778, "grad_norm": 1.2037047147750854, "learning_rate": 0.0003180968881641527, "loss": 3.2613, "step": 80290 }, { "epoch": 5.4555646147574395, "grad_norm": 0.8071075081825256, "learning_rate": 0.00031805442315532, "loss": 3.6881, "step": 80295 }, { "epoch": 5.4559043348281016, "grad_norm": 0.8655932545661926, "learning_rate": 0.0003180119581464873, "loss": 3.6935, "step": 80300 }, { "epoch": 5.456244054898764, "grad_norm": 1.0303689241409302, "learning_rate": 0.00031796949313765455, "loss": 3.463, "step": 80305 }, { "epoch": 5.456583774969425, "grad_norm": 0.8138473033905029, "learning_rate": 0.00031792702812882183, "loss": 3.4379, "step": 80310 }, { "epoch": 5.456923495040087, "grad_norm": 0.9940857291221619, "learning_rate": 0.00031788456311998916, "loss": 3.3135, "step": 80315 }, { "epoch": 5.457263215110749, "grad_norm": 0.9272728562355042, "learning_rate": 0.00031784209811115644, "loss": 3.2598, "step": 80320 }, { "epoch": 5.45760293518141, "grad_norm": 0.9566553831100464, "learning_rate": 0.00031779963310232367, "loss": 3.4085, "step": 80325 }, { "epoch": 5.457942655252072, "grad_norm": 0.8881889581680298, "learning_rate": 0.000317757168093491, "loss": 3.7319, "step": 80330 }, { "epoch": 5.458282375322734, "grad_norm": 0.7367042899131775, "learning_rate": 0.0003177147030846583, "loss": 3.5754, "step": 80335 }, { "epoch": 5.4586220953933955, "grad_norm": 0.9288102984428406, "learning_rate": 0.0003176722380758255, "loss": 3.3549, "step": 80340 }, { "epoch": 5.458961815464058, "grad_norm": 1.0950753688812256, "learning_rate": 0.0003176297730669928, "loss": 3.5794, "step": 80345 }, { "epoch": 5.45930153553472, "grad_norm": 0.8329585790634155, "learning_rate": 0.0003175873080581601, "loss": 3.3791, "step": 80350 }, { "epoch": 5.459641255605381, "grad_norm": 0.9043397307395935, "learning_rate": 0.00031754484304932735, "loss": 3.5061, "step": 80355 }, { "epoch": 5.459980975676043, "grad_norm": 1.1477543115615845, "learning_rate": 0.00031750237804049463, "loss": 3.204, "step": 80360 }, { "epoch": 5.460320695746705, "grad_norm": 0.9725548028945923, "learning_rate": 0.00031745991303166196, "loss": 3.5959, "step": 80365 }, { "epoch": 5.460660415817366, "grad_norm": 1.1579054594039917, "learning_rate": 0.0003174174480228292, "loss": 3.3758, "step": 80370 }, { "epoch": 5.461000135888028, "grad_norm": 0.8023281097412109, "learning_rate": 0.00031737498301399647, "loss": 3.5489, "step": 80375 }, { "epoch": 5.46133985595869, "grad_norm": 0.9975463151931763, "learning_rate": 0.00031733251800516375, "loss": 3.5338, "step": 80380 }, { "epoch": 5.4616795760293515, "grad_norm": 0.7677999138832092, "learning_rate": 0.00031729005299633103, "loss": 3.4013, "step": 80385 }, { "epoch": 5.462019296100014, "grad_norm": 0.9919224381446838, "learning_rate": 0.0003172475879874983, "loss": 3.6213, "step": 80390 }, { "epoch": 5.462359016170676, "grad_norm": 0.8394951820373535, "learning_rate": 0.0003172051229786656, "loss": 3.5269, "step": 80395 }, { "epoch": 5.462698736241337, "grad_norm": 0.8577165603637695, "learning_rate": 0.00031716265796983287, "loss": 3.43, "step": 80400 }, { "epoch": 5.463038456311999, "grad_norm": 0.9205718040466309, "learning_rate": 0.00031712019296100015, "loss": 3.44, "step": 80405 }, { "epoch": 5.463378176382661, "grad_norm": 0.8181172609329224, "learning_rate": 0.00031707772795216743, "loss": 3.5706, "step": 80410 }, { "epoch": 5.463717896453322, "grad_norm": 0.9352506399154663, "learning_rate": 0.00031703526294333466, "loss": 3.644, "step": 80415 }, { "epoch": 5.464057616523984, "grad_norm": 0.897623598575592, "learning_rate": 0.000316992797934502, "loss": 3.4525, "step": 80420 }, { "epoch": 5.464397336594646, "grad_norm": 0.7987215518951416, "learning_rate": 0.00031695033292566927, "loss": 3.0445, "step": 80425 }, { "epoch": 5.4647370566653075, "grad_norm": 1.0621827840805054, "learning_rate": 0.0003169078679168365, "loss": 3.5239, "step": 80430 }, { "epoch": 5.46507677673597, "grad_norm": 0.7452366948127747, "learning_rate": 0.00031686540290800383, "loss": 3.5867, "step": 80435 }, { "epoch": 5.465416496806632, "grad_norm": 0.8148128390312195, "learning_rate": 0.0003168229378991711, "loss": 3.5795, "step": 80440 }, { "epoch": 5.465756216877293, "grad_norm": 0.9951159358024597, "learning_rate": 0.00031678047289033834, "loss": 3.7857, "step": 80445 }, { "epoch": 5.466095936947955, "grad_norm": 0.9115222692489624, "learning_rate": 0.0003167380078815056, "loss": 3.7717, "step": 80450 }, { "epoch": 5.466435657018617, "grad_norm": 0.7671780586242676, "learning_rate": 0.00031669554287267295, "loss": 3.4688, "step": 80455 }, { "epoch": 5.466775377089278, "grad_norm": 0.8651946187019348, "learning_rate": 0.0003166530778638402, "loss": 3.5941, "step": 80460 }, { "epoch": 5.46711509715994, "grad_norm": 1.0602905750274658, "learning_rate": 0.00031661061285500746, "loss": 3.7414, "step": 80465 }, { "epoch": 5.467454817230602, "grad_norm": 1.0174369812011719, "learning_rate": 0.0003165681478461748, "loss": 3.3238, "step": 80470 }, { "epoch": 5.4677945373012635, "grad_norm": 1.053489327430725, "learning_rate": 0.000316525682837342, "loss": 3.3682, "step": 80475 }, { "epoch": 5.468134257371926, "grad_norm": 0.9659205675125122, "learning_rate": 0.0003164832178285093, "loss": 3.5042, "step": 80480 }, { "epoch": 5.468473977442588, "grad_norm": 0.7990891337394714, "learning_rate": 0.0003164407528196766, "loss": 3.2517, "step": 80485 }, { "epoch": 5.468813697513249, "grad_norm": 1.1702691316604614, "learning_rate": 0.0003163982878108439, "loss": 3.4422, "step": 80490 }, { "epoch": 5.469153417583911, "grad_norm": 1.1447176933288574, "learning_rate": 0.00031635582280201114, "loss": 3.3085, "step": 80495 }, { "epoch": 5.469493137654573, "grad_norm": 1.006929636001587, "learning_rate": 0.0003163133577931784, "loss": 3.2478, "step": 80500 }, { "epoch": 5.469832857725234, "grad_norm": 0.8605318665504456, "learning_rate": 0.00031627089278434575, "loss": 3.5052, "step": 80505 }, { "epoch": 5.470172577795896, "grad_norm": 1.0493879318237305, "learning_rate": 0.000316228427775513, "loss": 3.4422, "step": 80510 }, { "epoch": 5.470512297866558, "grad_norm": 1.1290351152420044, "learning_rate": 0.00031618596276668026, "loss": 3.5213, "step": 80515 }, { "epoch": 5.4708520179372195, "grad_norm": 1.5247526168823242, "learning_rate": 0.00031614349775784754, "loss": 3.2364, "step": 80520 }, { "epoch": 5.471191738007882, "grad_norm": 1.0497881174087524, "learning_rate": 0.0003161010327490148, "loss": 3.3401, "step": 80525 }, { "epoch": 5.471531458078544, "grad_norm": 0.7145054936408997, "learning_rate": 0.0003160585677401821, "loss": 3.4485, "step": 80530 }, { "epoch": 5.471871178149205, "grad_norm": 0.7623755931854248, "learning_rate": 0.0003160161027313494, "loss": 3.2687, "step": 80535 }, { "epoch": 5.472210898219867, "grad_norm": 1.2699828147888184, "learning_rate": 0.00031597363772251666, "loss": 3.4635, "step": 80540 }, { "epoch": 5.472550618290528, "grad_norm": 1.2134913206100464, "learning_rate": 0.00031593117271368394, "loss": 3.5687, "step": 80545 }, { "epoch": 5.47289033836119, "grad_norm": 0.8634642958641052, "learning_rate": 0.0003158887077048512, "loss": 3.4872, "step": 80550 }, { "epoch": 5.473230058431852, "grad_norm": 0.7784424424171448, "learning_rate": 0.00031584624269601844, "loss": 3.4812, "step": 80555 }, { "epoch": 5.4735697785025135, "grad_norm": 0.9676445722579956, "learning_rate": 0.0003158037776871858, "loss": 3.4027, "step": 80560 }, { "epoch": 5.4739094985731755, "grad_norm": 1.1016380786895752, "learning_rate": 0.00031576131267835306, "loss": 3.3296, "step": 80565 }, { "epoch": 5.474249218643838, "grad_norm": 0.7825928926467896, "learning_rate": 0.0003157188476695203, "loss": 3.5501, "step": 80570 }, { "epoch": 5.474588938714499, "grad_norm": 0.8966153264045715, "learning_rate": 0.0003156763826606876, "loss": 3.4232, "step": 80575 }, { "epoch": 5.474928658785161, "grad_norm": 0.9040058851242065, "learning_rate": 0.0003156339176518549, "loss": 3.6934, "step": 80580 }, { "epoch": 5.475268378855823, "grad_norm": 0.9499273896217346, "learning_rate": 0.0003155914526430221, "loss": 3.6648, "step": 80585 }, { "epoch": 5.475608098926484, "grad_norm": 0.8520113825798035, "learning_rate": 0.00031554898763418946, "loss": 3.6436, "step": 80590 }, { "epoch": 5.475947818997146, "grad_norm": 1.1939960718154907, "learning_rate": 0.00031550652262535674, "loss": 3.5838, "step": 80595 }, { "epoch": 5.476287539067808, "grad_norm": 0.7153595089912415, "learning_rate": 0.00031546405761652397, "loss": 3.6443, "step": 80600 }, { "epoch": 5.4766272591384695, "grad_norm": 0.9678404331207275, "learning_rate": 0.00031542159260769125, "loss": 3.2944, "step": 80605 }, { "epoch": 5.476966979209132, "grad_norm": 0.8010450601577759, "learning_rate": 0.0003153791275988586, "loss": 3.3811, "step": 80610 }, { "epoch": 5.477306699279794, "grad_norm": 1.4086612462997437, "learning_rate": 0.0003153366625900258, "loss": 3.6514, "step": 80615 }, { "epoch": 5.477646419350455, "grad_norm": 0.762890636920929, "learning_rate": 0.0003152941975811931, "loss": 3.715, "step": 80620 }, { "epoch": 5.477986139421117, "grad_norm": 0.8555885553359985, "learning_rate": 0.0003152517325723604, "loss": 3.7097, "step": 80625 }, { "epoch": 5.478325859491779, "grad_norm": 0.8632957935333252, "learning_rate": 0.00031520926756352765, "loss": 3.3016, "step": 80630 }, { "epoch": 5.47866557956244, "grad_norm": 0.8068735003471375, "learning_rate": 0.0003151668025546949, "loss": 3.3411, "step": 80635 }, { "epoch": 5.479005299633102, "grad_norm": 0.9254390001296997, "learning_rate": 0.0003151243375458622, "loss": 3.6879, "step": 80640 }, { "epoch": 5.479345019703764, "grad_norm": 0.9061561226844788, "learning_rate": 0.0003150818725370295, "loss": 3.4157, "step": 80645 }, { "epoch": 5.4796847397744255, "grad_norm": 0.7267497777938843, "learning_rate": 0.00031503940752819677, "loss": 3.5233, "step": 80650 }, { "epoch": 5.480024459845088, "grad_norm": 1.007405161857605, "learning_rate": 0.00031499694251936405, "loss": 3.547, "step": 80655 }, { "epoch": 5.48036417991575, "grad_norm": 0.83275306224823, "learning_rate": 0.0003149544775105314, "loss": 3.3576, "step": 80660 }, { "epoch": 5.480703899986411, "grad_norm": 1.0754060745239258, "learning_rate": 0.0003149120125016986, "loss": 3.3933, "step": 80665 }, { "epoch": 5.481043620057073, "grad_norm": 0.7492450475692749, "learning_rate": 0.0003148695474928659, "loss": 3.4898, "step": 80670 }, { "epoch": 5.481383340127735, "grad_norm": 1.040816307067871, "learning_rate": 0.00031482708248403317, "loss": 3.6259, "step": 80675 }, { "epoch": 5.481723060198396, "grad_norm": 0.8718187808990479, "learning_rate": 0.00031478461747520045, "loss": 3.4567, "step": 80680 }, { "epoch": 5.482062780269058, "grad_norm": 0.962226390838623, "learning_rate": 0.0003147421524663677, "loss": 3.5857, "step": 80685 }, { "epoch": 5.48240250033972, "grad_norm": 0.827300488948822, "learning_rate": 0.000314699687457535, "loss": 3.266, "step": 80690 }, { "epoch": 5.4827422204103815, "grad_norm": 1.0786817073822021, "learning_rate": 0.0003146572224487023, "loss": 3.6099, "step": 80695 }, { "epoch": 5.483081940481044, "grad_norm": 0.7140036821365356, "learning_rate": 0.00031461475743986957, "loss": 3.5164, "step": 80700 }, { "epoch": 5.483421660551706, "grad_norm": 0.6709192395210266, "learning_rate": 0.00031457229243103685, "loss": 3.2747, "step": 80705 }, { "epoch": 5.483761380622367, "grad_norm": 0.7061362266540527, "learning_rate": 0.0003145298274222041, "loss": 3.5543, "step": 80710 }, { "epoch": 5.484101100693029, "grad_norm": 0.8526470065116882, "learning_rate": 0.0003144873624133714, "loss": 3.4602, "step": 80715 }, { "epoch": 5.484440820763691, "grad_norm": 0.9458170533180237, "learning_rate": 0.0003144448974045387, "loss": 3.6625, "step": 80720 }, { "epoch": 5.484780540834352, "grad_norm": 0.7395976781845093, "learning_rate": 0.0003144024323957059, "loss": 3.4018, "step": 80725 }, { "epoch": 5.485120260905014, "grad_norm": 0.7012442350387573, "learning_rate": 0.00031435996738687325, "loss": 3.7142, "step": 80730 }, { "epoch": 5.485459980975676, "grad_norm": 1.1292365789413452, "learning_rate": 0.00031431750237804053, "loss": 3.5992, "step": 80735 }, { "epoch": 5.4857997010463375, "grad_norm": 0.8322694301605225, "learning_rate": 0.00031427503736920775, "loss": 3.3813, "step": 80740 }, { "epoch": 5.486139421117, "grad_norm": 0.7894576191902161, "learning_rate": 0.00031423257236037503, "loss": 3.2208, "step": 80745 }, { "epoch": 5.486479141187662, "grad_norm": 0.8858613967895508, "learning_rate": 0.00031419010735154237, "loss": 3.6104, "step": 80750 }, { "epoch": 5.486818861258323, "grad_norm": 0.8454750776290894, "learning_rate": 0.0003141476423427096, "loss": 3.4152, "step": 80755 }, { "epoch": 5.487158581328985, "grad_norm": 0.7485643029212952, "learning_rate": 0.0003141051773338769, "loss": 3.2746, "step": 80760 }, { "epoch": 5.487498301399647, "grad_norm": 0.9465708136558533, "learning_rate": 0.0003140627123250442, "loss": 3.3082, "step": 80765 }, { "epoch": 5.487838021470308, "grad_norm": 0.8312684893608093, "learning_rate": 0.00031402024731621143, "loss": 3.6827, "step": 80770 }, { "epoch": 5.48817774154097, "grad_norm": 0.825872540473938, "learning_rate": 0.0003139777823073787, "loss": 3.0648, "step": 80775 }, { "epoch": 5.488517461611632, "grad_norm": 1.0275200605392456, "learning_rate": 0.000313935317298546, "loss": 3.3599, "step": 80780 }, { "epoch": 5.4888571816822935, "grad_norm": 0.9383062124252319, "learning_rate": 0.0003138928522897133, "loss": 3.3585, "step": 80785 }, { "epoch": 5.489196901752956, "grad_norm": 0.8302780985832214, "learning_rate": 0.00031385038728088055, "loss": 3.4288, "step": 80790 }, { "epoch": 5.489536621823618, "grad_norm": 0.7149053812026978, "learning_rate": 0.00031380792227204783, "loss": 3.509, "step": 80795 }, { "epoch": 5.489876341894279, "grad_norm": 0.7897120118141174, "learning_rate": 0.0003137654572632151, "loss": 3.2316, "step": 80800 }, { "epoch": 5.490216061964941, "grad_norm": 0.8637712001800537, "learning_rate": 0.0003137229922543824, "loss": 3.5196, "step": 80805 }, { "epoch": 5.490555782035603, "grad_norm": 1.3040629625320435, "learning_rate": 0.0003136805272455497, "loss": 3.6338, "step": 80810 }, { "epoch": 5.490895502106264, "grad_norm": 0.8398987650871277, "learning_rate": 0.0003136380622367169, "loss": 3.5479, "step": 80815 }, { "epoch": 5.491235222176926, "grad_norm": 0.9866546988487244, "learning_rate": 0.00031359559722788423, "loss": 3.2831, "step": 80820 }, { "epoch": 5.491574942247588, "grad_norm": 0.9165909290313721, "learning_rate": 0.0003135531322190515, "loss": 3.4444, "step": 80825 }, { "epoch": 5.4919146623182495, "grad_norm": 0.9387881755828857, "learning_rate": 0.0003135106672102188, "loss": 3.4661, "step": 80830 }, { "epoch": 5.492254382388912, "grad_norm": 0.8058584332466125, "learning_rate": 0.0003134682022013861, "loss": 3.4278, "step": 80835 }, { "epoch": 5.492594102459574, "grad_norm": 0.7975155115127563, "learning_rate": 0.00031342573719255336, "loss": 3.6128, "step": 80840 }, { "epoch": 5.492933822530235, "grad_norm": 1.0221580266952515, "learning_rate": 0.00031338327218372064, "loss": 3.177, "step": 80845 }, { "epoch": 5.493273542600897, "grad_norm": 0.8448030352592468, "learning_rate": 0.00031334080717488786, "loss": 3.3803, "step": 80850 }, { "epoch": 5.493613262671559, "grad_norm": 0.9093098044395447, "learning_rate": 0.0003132983421660552, "loss": 3.3047, "step": 80855 }, { "epoch": 5.49395298274222, "grad_norm": 0.7240036129951477, "learning_rate": 0.0003132558771572225, "loss": 3.7104, "step": 80860 }, { "epoch": 5.494292702812882, "grad_norm": 0.8219504356384277, "learning_rate": 0.0003132134121483897, "loss": 3.3929, "step": 80865 }, { "epoch": 5.494632422883544, "grad_norm": 0.91567462682724, "learning_rate": 0.00031317094713955704, "loss": 3.7203, "step": 80870 }, { "epoch": 5.4949721429542056, "grad_norm": 0.9021643996238708, "learning_rate": 0.0003131284821307243, "loss": 3.5359, "step": 80875 }, { "epoch": 5.495311863024868, "grad_norm": 0.9679797291755676, "learning_rate": 0.00031308601712189154, "loss": 3.6355, "step": 80880 }, { "epoch": 5.49565158309553, "grad_norm": 0.8407672047615051, "learning_rate": 0.0003130435521130589, "loss": 3.2346, "step": 80885 }, { "epoch": 5.495991303166191, "grad_norm": 0.8310339450836182, "learning_rate": 0.00031300108710422616, "loss": 3.4323, "step": 80890 }, { "epoch": 5.496331023236853, "grad_norm": 0.8400173783302307, "learning_rate": 0.0003129586220953934, "loss": 3.4867, "step": 80895 }, { "epoch": 5.496670743307515, "grad_norm": 0.8919368386268616, "learning_rate": 0.00031291615708656066, "loss": 3.1451, "step": 80900 }, { "epoch": 5.497010463378176, "grad_norm": 0.9530535936355591, "learning_rate": 0.000312873692077728, "loss": 3.7148, "step": 80905 }, { "epoch": 5.497350183448838, "grad_norm": 1.0884747505187988, "learning_rate": 0.0003128312270688952, "loss": 3.3274, "step": 80910 }, { "epoch": 5.4976899035195, "grad_norm": 0.8325161933898926, "learning_rate": 0.0003127887620600625, "loss": 3.8002, "step": 80915 }, { "epoch": 5.498029623590162, "grad_norm": 0.7367621064186096, "learning_rate": 0.00031274629705122984, "loss": 3.4238, "step": 80920 }, { "epoch": 5.498369343660824, "grad_norm": 1.0020225048065186, "learning_rate": 0.00031270383204239706, "loss": 3.464, "step": 80925 }, { "epoch": 5.498709063731486, "grad_norm": 1.4268810749053955, "learning_rate": 0.00031266136703356434, "loss": 3.7458, "step": 80930 }, { "epoch": 5.499048783802147, "grad_norm": 0.941967248916626, "learning_rate": 0.0003126189020247316, "loss": 3.544, "step": 80935 }, { "epoch": 5.499388503872809, "grad_norm": 0.6870529055595398, "learning_rate": 0.0003125764370158989, "loss": 3.3328, "step": 80940 }, { "epoch": 5.499728223943471, "grad_norm": 0.7225164175033569, "learning_rate": 0.0003125339720070662, "loss": 3.5093, "step": 80945 }, { "epoch": 5.500067944014132, "grad_norm": 0.8607456088066101, "learning_rate": 0.00031249150699823346, "loss": 3.4201, "step": 80950 }, { "epoch": 5.500407664084794, "grad_norm": 0.8955379128456116, "learning_rate": 0.00031244904198940074, "loss": 3.501, "step": 80955 }, { "epoch": 5.500747384155456, "grad_norm": 1.0551795959472656, "learning_rate": 0.000312406576980568, "loss": 3.5283, "step": 80960 }, { "epoch": 5.501087104226118, "grad_norm": 0.8208785057067871, "learning_rate": 0.0003123641119717353, "loss": 3.1868, "step": 80965 }, { "epoch": 5.50142682429678, "grad_norm": 0.8330332636833191, "learning_rate": 0.00031232164696290253, "loss": 3.5737, "step": 80970 }, { "epoch": 5.501766544367442, "grad_norm": 0.8054198026657104, "learning_rate": 0.00031227918195406986, "loss": 3.3362, "step": 80975 }, { "epoch": 5.502106264438103, "grad_norm": 0.8531404137611389, "learning_rate": 0.00031223671694523714, "loss": 3.6338, "step": 80980 }, { "epoch": 5.502445984508765, "grad_norm": 0.8784241676330566, "learning_rate": 0.00031219425193640437, "loss": 3.4301, "step": 80985 }, { "epoch": 5.502785704579426, "grad_norm": 1.2337952852249146, "learning_rate": 0.0003121517869275717, "loss": 3.2355, "step": 80990 }, { "epoch": 5.503125424650088, "grad_norm": 0.9148406982421875, "learning_rate": 0.000312109321918739, "loss": 3.0834, "step": 80995 }, { "epoch": 5.50346514472075, "grad_norm": 0.8355448842048645, "learning_rate": 0.0003120668569099062, "loss": 3.5353, "step": 81000 }, { "epoch": 5.5038048647914115, "grad_norm": 0.8001610040664673, "learning_rate": 0.0003120243919010735, "loss": 3.5185, "step": 81005 }, { "epoch": 5.504144584862074, "grad_norm": 0.7142758965492249, "learning_rate": 0.0003119819268922408, "loss": 3.2596, "step": 81010 }, { "epoch": 5.504484304932736, "grad_norm": 1.0178089141845703, "learning_rate": 0.0003119394618834081, "loss": 3.3429, "step": 81015 }, { "epoch": 5.504824025003397, "grad_norm": 0.7752333879470825, "learning_rate": 0.00031189699687457533, "loss": 3.5237, "step": 81020 }, { "epoch": 5.505163745074059, "grad_norm": 0.9457226991653442, "learning_rate": 0.00031185453186574266, "loss": 3.3177, "step": 81025 }, { "epoch": 5.505503465144721, "grad_norm": 0.7255674004554749, "learning_rate": 0.00031181206685690994, "loss": 3.3541, "step": 81030 }, { "epoch": 5.505843185215382, "grad_norm": 0.8334184885025024, "learning_rate": 0.00031176960184807717, "loss": 3.4337, "step": 81035 }, { "epoch": 5.506182905286044, "grad_norm": 0.9134833812713623, "learning_rate": 0.00031172713683924445, "loss": 3.4832, "step": 81040 }, { "epoch": 5.506522625356706, "grad_norm": 1.888919472694397, "learning_rate": 0.0003116846718304118, "loss": 3.5251, "step": 81045 }, { "epoch": 5.5068623454273675, "grad_norm": 0.7706668972969055, "learning_rate": 0.000311642206821579, "loss": 3.4754, "step": 81050 }, { "epoch": 5.50720206549803, "grad_norm": 0.8986238837242126, "learning_rate": 0.0003115997418127463, "loss": 3.5841, "step": 81055 }, { "epoch": 5.507541785568692, "grad_norm": 1.1827826499938965, "learning_rate": 0.0003115572768039136, "loss": 3.3321, "step": 81060 }, { "epoch": 5.507881505639353, "grad_norm": 0.8335099816322327, "learning_rate": 0.00031151481179508085, "loss": 3.4912, "step": 81065 }, { "epoch": 5.508221225710015, "grad_norm": 0.7566081285476685, "learning_rate": 0.00031147234678624813, "loss": 3.5137, "step": 81070 }, { "epoch": 5.508560945780677, "grad_norm": 1.066643476486206, "learning_rate": 0.0003114298817774154, "loss": 3.4599, "step": 81075 }, { "epoch": 5.508900665851338, "grad_norm": 0.9735884666442871, "learning_rate": 0.0003113874167685827, "loss": 3.1983, "step": 81080 }, { "epoch": 5.509240385922, "grad_norm": 0.9810009598731995, "learning_rate": 0.00031134495175974997, "loss": 3.419, "step": 81085 }, { "epoch": 5.509580105992662, "grad_norm": 0.8970182538032532, "learning_rate": 0.00031130248675091725, "loss": 3.3858, "step": 81090 }, { "epoch": 5.5099198260633235, "grad_norm": 1.007788896560669, "learning_rate": 0.00031126002174208453, "loss": 3.5593, "step": 81095 }, { "epoch": 5.510259546133986, "grad_norm": 0.8802146911621094, "learning_rate": 0.0003112175567332518, "loss": 3.5622, "step": 81100 }, { "epoch": 5.510599266204648, "grad_norm": 1.0850130319595337, "learning_rate": 0.0003111750917244191, "loss": 3.5951, "step": 81105 }, { "epoch": 5.510938986275309, "grad_norm": 0.9907547831535339, "learning_rate": 0.0003111326267155863, "loss": 3.5906, "step": 81110 }, { "epoch": 5.511278706345971, "grad_norm": 0.7389823794364929, "learning_rate": 0.00031109016170675365, "loss": 3.3717, "step": 81115 }, { "epoch": 5.511618426416633, "grad_norm": 0.6855267286300659, "learning_rate": 0.00031104769669792093, "loss": 3.4491, "step": 81120 }, { "epoch": 5.511958146487294, "grad_norm": 0.8731095194816589, "learning_rate": 0.00031100523168908816, "loss": 3.2034, "step": 81125 }, { "epoch": 5.512297866557956, "grad_norm": 1.0679022073745728, "learning_rate": 0.0003109627666802555, "loss": 3.5769, "step": 81130 }, { "epoch": 5.512637586628618, "grad_norm": 0.9910141229629517, "learning_rate": 0.00031092030167142277, "loss": 3.4737, "step": 81135 }, { "epoch": 5.5129773066992795, "grad_norm": 0.8359428644180298, "learning_rate": 0.00031087783666259, "loss": 3.3912, "step": 81140 }, { "epoch": 5.513317026769942, "grad_norm": 1.4860799312591553, "learning_rate": 0.0003108353716537573, "loss": 3.5919, "step": 81145 }, { "epoch": 5.513656746840604, "grad_norm": 0.8569153547286987, "learning_rate": 0.0003107929066449246, "loss": 3.5638, "step": 81150 }, { "epoch": 5.513996466911265, "grad_norm": 0.6971043348312378, "learning_rate": 0.00031075044163609184, "loss": 3.363, "step": 81155 }, { "epoch": 5.514336186981927, "grad_norm": 1.0334975719451904, "learning_rate": 0.0003107079766272591, "loss": 3.7928, "step": 81160 }, { "epoch": 5.514675907052589, "grad_norm": 0.9720942974090576, "learning_rate": 0.00031066551161842645, "loss": 3.652, "step": 81165 }, { "epoch": 5.51501562712325, "grad_norm": 1.0383622646331787, "learning_rate": 0.0003106230466095937, "loss": 3.5169, "step": 81170 }, { "epoch": 5.515355347193912, "grad_norm": 1.071237564086914, "learning_rate": 0.00031058058160076096, "loss": 3.3401, "step": 81175 }, { "epoch": 5.515695067264574, "grad_norm": 0.8561421632766724, "learning_rate": 0.0003105381165919283, "loss": 3.528, "step": 81180 }, { "epoch": 5.516034787335236, "grad_norm": 1.0647273063659668, "learning_rate": 0.00031049565158309557, "loss": 3.5147, "step": 81185 }, { "epoch": 5.516374507405898, "grad_norm": 0.8341363072395325, "learning_rate": 0.0003104531865742628, "loss": 3.5733, "step": 81190 }, { "epoch": 5.516714227476559, "grad_norm": 0.9674589037895203, "learning_rate": 0.0003104107215654301, "loss": 3.4614, "step": 81195 }, { "epoch": 5.517053947547221, "grad_norm": 0.7738350033760071, "learning_rate": 0.0003103682565565974, "loss": 3.5352, "step": 81200 }, { "epoch": 5.517393667617883, "grad_norm": 0.9460663199424744, "learning_rate": 0.00031032579154776464, "loss": 3.3693, "step": 81205 }, { "epoch": 5.517733387688544, "grad_norm": 0.9426915049552917, "learning_rate": 0.0003102833265389319, "loss": 3.6383, "step": 81210 }, { "epoch": 5.518073107759206, "grad_norm": 0.7740843296051025, "learning_rate": 0.00031024086153009925, "loss": 3.5264, "step": 81215 }, { "epoch": 5.518412827829868, "grad_norm": 0.763244092464447, "learning_rate": 0.0003101983965212665, "loss": 3.5959, "step": 81220 }, { "epoch": 5.5187525479005295, "grad_norm": 0.8581451177597046, "learning_rate": 0.00031015593151243376, "loss": 3.7106, "step": 81225 }, { "epoch": 5.519092267971192, "grad_norm": 0.9373214840888977, "learning_rate": 0.00031011346650360104, "loss": 3.3907, "step": 81230 }, { "epoch": 5.519431988041854, "grad_norm": 0.784247636795044, "learning_rate": 0.0003100710014947683, "loss": 3.6591, "step": 81235 }, { "epoch": 5.519771708112515, "grad_norm": 0.7847091555595398, "learning_rate": 0.0003100285364859356, "loss": 3.7057, "step": 81240 }, { "epoch": 5.520111428183177, "grad_norm": 0.7519651055335999, "learning_rate": 0.0003099860714771029, "loss": 3.736, "step": 81245 }, { "epoch": 5.520451148253839, "grad_norm": 0.8794171810150146, "learning_rate": 0.00030994360646827016, "loss": 3.7029, "step": 81250 }, { "epoch": 5.5207908683245, "grad_norm": 0.8762409687042236, "learning_rate": 0.00030990114145943744, "loss": 3.4056, "step": 81255 }, { "epoch": 5.521130588395162, "grad_norm": 1.152618169784546, "learning_rate": 0.0003098586764506047, "loss": 3.7391, "step": 81260 }, { "epoch": 5.521470308465824, "grad_norm": 1.0222599506378174, "learning_rate": 0.00030981621144177194, "loss": 3.383, "step": 81265 }, { "epoch": 5.5218100285364855, "grad_norm": 0.9656743407249451, "learning_rate": 0.0003097737464329393, "loss": 3.3963, "step": 81270 }, { "epoch": 5.522149748607148, "grad_norm": 0.9820765852928162, "learning_rate": 0.00030973128142410656, "loss": 3.3651, "step": 81275 }, { "epoch": 5.52248946867781, "grad_norm": 0.9623928666114807, "learning_rate": 0.0003096888164152738, "loss": 3.6043, "step": 81280 }, { "epoch": 5.522829188748471, "grad_norm": 0.8027721047401428, "learning_rate": 0.0003096463514064411, "loss": 3.6281, "step": 81285 }, { "epoch": 5.523168908819133, "grad_norm": 1.0896185636520386, "learning_rate": 0.0003096038863976084, "loss": 3.704, "step": 81290 }, { "epoch": 5.523508628889795, "grad_norm": 1.062348484992981, "learning_rate": 0.0003095614213887756, "loss": 3.4156, "step": 81295 }, { "epoch": 5.523848348960456, "grad_norm": 0.8389953970909119, "learning_rate": 0.0003095189563799429, "loss": 3.4855, "step": 81300 }, { "epoch": 5.524188069031118, "grad_norm": 0.7962811589241028, "learning_rate": 0.00030947649137111024, "loss": 3.5014, "step": 81305 }, { "epoch": 5.52452778910178, "grad_norm": 1.152745008468628, "learning_rate": 0.00030943402636227747, "loss": 3.6012, "step": 81310 }, { "epoch": 5.5248675091724415, "grad_norm": 0.880626380443573, "learning_rate": 0.00030939156135344475, "loss": 3.3709, "step": 81315 }, { "epoch": 5.525207229243104, "grad_norm": 1.0217185020446777, "learning_rate": 0.0003093490963446121, "loss": 3.2734, "step": 81320 }, { "epoch": 5.525546949313766, "grad_norm": 0.7716484665870667, "learning_rate": 0.0003093066313357793, "loss": 3.3719, "step": 81325 }, { "epoch": 5.525886669384427, "grad_norm": 0.7668516039848328, "learning_rate": 0.0003092641663269466, "loss": 3.4088, "step": 81330 }, { "epoch": 5.526226389455089, "grad_norm": 0.8431546688079834, "learning_rate": 0.00030922170131811387, "loss": 3.7341, "step": 81335 }, { "epoch": 5.526566109525751, "grad_norm": 0.7565270066261292, "learning_rate": 0.00030917923630928115, "loss": 3.5849, "step": 81340 }, { "epoch": 5.526905829596412, "grad_norm": 1.013279914855957, "learning_rate": 0.0003091367713004484, "loss": 3.4597, "step": 81345 }, { "epoch": 5.527245549667074, "grad_norm": 0.8725472092628479, "learning_rate": 0.0003090943062916157, "loss": 3.6743, "step": 81350 }, { "epoch": 5.527585269737736, "grad_norm": 0.9197516441345215, "learning_rate": 0.00030905184128278304, "loss": 3.5573, "step": 81355 }, { "epoch": 5.5279249898083975, "grad_norm": 0.8649563789367676, "learning_rate": 0.00030900937627395027, "loss": 3.2833, "step": 81360 }, { "epoch": 5.52826470987906, "grad_norm": 0.9522911906242371, "learning_rate": 0.00030896691126511755, "loss": 3.4647, "step": 81365 }, { "epoch": 5.528604429949722, "grad_norm": 0.971309244632721, "learning_rate": 0.0003089244462562848, "loss": 3.098, "step": 81370 }, { "epoch": 5.528944150020383, "grad_norm": 0.8210400342941284, "learning_rate": 0.0003088819812474521, "loss": 3.4412, "step": 81375 }, { "epoch": 5.529283870091045, "grad_norm": 0.922982931137085, "learning_rate": 0.0003088395162386194, "loss": 3.4404, "step": 81380 }, { "epoch": 5.529623590161707, "grad_norm": 0.864836573600769, "learning_rate": 0.00030879705122978667, "loss": 3.6085, "step": 81385 }, { "epoch": 5.529963310232368, "grad_norm": 0.7084240913391113, "learning_rate": 0.00030875458622095395, "loss": 3.46, "step": 81390 }, { "epoch": 5.53030303030303, "grad_norm": 0.9100731611251831, "learning_rate": 0.0003087121212121212, "loss": 3.3204, "step": 81395 }, { "epoch": 5.530642750373692, "grad_norm": 1.1598457098007202, "learning_rate": 0.0003086696562032885, "loss": 3.3535, "step": 81400 }, { "epoch": 5.5309824704443535, "grad_norm": 0.759475827217102, "learning_rate": 0.00030862719119445573, "loss": 3.5396, "step": 81405 }, { "epoch": 5.531322190515016, "grad_norm": 0.9689304828643799, "learning_rate": 0.00030858472618562307, "loss": 3.4808, "step": 81410 }, { "epoch": 5.531661910585678, "grad_norm": 0.9662784934043884, "learning_rate": 0.00030854226117679035, "loss": 3.638, "step": 81415 }, { "epoch": 5.532001630656339, "grad_norm": 0.7969033718109131, "learning_rate": 0.0003084997961679576, "loss": 3.6212, "step": 81420 }, { "epoch": 5.532341350727001, "grad_norm": 0.8373355865478516, "learning_rate": 0.0003084573311591249, "loss": 3.2992, "step": 81425 }, { "epoch": 5.532681070797663, "grad_norm": 0.75406414270401, "learning_rate": 0.0003084148661502922, "loss": 3.3365, "step": 81430 }, { "epoch": 5.533020790868324, "grad_norm": 0.8164325952529907, "learning_rate": 0.0003083724011414594, "loss": 3.5745, "step": 81435 }, { "epoch": 5.533360510938986, "grad_norm": 0.8856513500213623, "learning_rate": 0.00030832993613262675, "loss": 3.5332, "step": 81440 }, { "epoch": 5.533700231009648, "grad_norm": 0.92999666929245, "learning_rate": 0.00030828747112379403, "loss": 3.5749, "step": 81445 }, { "epoch": 5.5340399510803095, "grad_norm": 1.1994752883911133, "learning_rate": 0.00030824500611496125, "loss": 3.2454, "step": 81450 }, { "epoch": 5.534379671150972, "grad_norm": 0.7879796624183655, "learning_rate": 0.00030820254110612853, "loss": 3.4838, "step": 81455 }, { "epoch": 5.534719391221634, "grad_norm": 0.9186739921569824, "learning_rate": 0.00030816007609729587, "loss": 3.4186, "step": 81460 }, { "epoch": 5.535059111292295, "grad_norm": 0.9426395297050476, "learning_rate": 0.0003081176110884631, "loss": 3.5337, "step": 81465 }, { "epoch": 5.535398831362957, "grad_norm": 0.9048250317573547, "learning_rate": 0.0003080751460796304, "loss": 3.3633, "step": 81470 }, { "epoch": 5.535738551433619, "grad_norm": 0.8763076663017273, "learning_rate": 0.0003080326810707977, "loss": 3.6328, "step": 81475 }, { "epoch": 5.53607827150428, "grad_norm": 0.9969365000724792, "learning_rate": 0.00030799021606196493, "loss": 3.4891, "step": 81480 }, { "epoch": 5.536417991574942, "grad_norm": 1.0034382343292236, "learning_rate": 0.0003079477510531322, "loss": 3.4807, "step": 81485 }, { "epoch": 5.536757711645604, "grad_norm": 0.7597475051879883, "learning_rate": 0.0003079052860442995, "loss": 3.4156, "step": 81490 }, { "epoch": 5.537097431716266, "grad_norm": 0.9130927324295044, "learning_rate": 0.0003078628210354668, "loss": 3.6135, "step": 81495 }, { "epoch": 5.537437151786928, "grad_norm": 0.8239119052886963, "learning_rate": 0.00030782035602663405, "loss": 3.3812, "step": 81500 }, { "epoch": 5.53777687185759, "grad_norm": 0.8413383364677429, "learning_rate": 0.00030777789101780133, "loss": 3.8013, "step": 81505 }, { "epoch": 5.538116591928251, "grad_norm": 1.042863130569458, "learning_rate": 0.0003077354260089686, "loss": 3.2555, "step": 81510 }, { "epoch": 5.538456311998913, "grad_norm": 1.0980210304260254, "learning_rate": 0.0003076929610001359, "loss": 3.4268, "step": 81515 }, { "epoch": 5.538796032069575, "grad_norm": 0.9523441195487976, "learning_rate": 0.0003076504959913032, "loss": 3.4618, "step": 81520 }, { "epoch": 5.539135752140236, "grad_norm": 0.7750239968299866, "learning_rate": 0.00030760803098247045, "loss": 3.6177, "step": 81525 }, { "epoch": 5.539475472210898, "grad_norm": 0.8557862043380737, "learning_rate": 0.00030756556597363773, "loss": 3.6136, "step": 81530 }, { "epoch": 5.53981519228156, "grad_norm": 0.9203059673309326, "learning_rate": 0.000307523100964805, "loss": 3.611, "step": 81535 }, { "epoch": 5.540154912352222, "grad_norm": 0.9897331595420837, "learning_rate": 0.0003074806359559723, "loss": 3.5575, "step": 81540 }, { "epoch": 5.540494632422884, "grad_norm": 0.725673496723175, "learning_rate": 0.0003074381709471396, "loss": 3.503, "step": 81545 }, { "epoch": 5.540834352493546, "grad_norm": 0.845152735710144, "learning_rate": 0.00030739570593830685, "loss": 3.5146, "step": 81550 }, { "epoch": 5.541174072564207, "grad_norm": 0.92283034324646, "learning_rate": 0.00030735324092947414, "loss": 3.5243, "step": 81555 }, { "epoch": 5.541513792634869, "grad_norm": 0.7134180068969727, "learning_rate": 0.00030731077592064136, "loss": 3.6435, "step": 81560 }, { "epoch": 5.541853512705531, "grad_norm": 0.815041720867157, "learning_rate": 0.0003072683109118087, "loss": 3.232, "step": 81565 }, { "epoch": 5.542193232776192, "grad_norm": 0.95195072889328, "learning_rate": 0.000307225845902976, "loss": 3.4998, "step": 81570 }, { "epoch": 5.542532952846854, "grad_norm": 1.0015026330947876, "learning_rate": 0.0003071833808941432, "loss": 3.2192, "step": 81575 }, { "epoch": 5.542872672917516, "grad_norm": 0.7975209355354309, "learning_rate": 0.00030714091588531054, "loss": 3.5386, "step": 81580 }, { "epoch": 5.543212392988178, "grad_norm": 1.0146900415420532, "learning_rate": 0.0003070984508764778, "loss": 3.5969, "step": 81585 }, { "epoch": 5.54355211305884, "grad_norm": 1.1870993375778198, "learning_rate": 0.00030705598586764504, "loss": 3.4088, "step": 81590 }, { "epoch": 5.543891833129502, "grad_norm": 0.7946213483810425, "learning_rate": 0.0003070135208588123, "loss": 3.3111, "step": 81595 }, { "epoch": 5.544231553200163, "grad_norm": 0.741422712802887, "learning_rate": 0.00030697105584997966, "loss": 3.4492, "step": 81600 }, { "epoch": 5.544571273270825, "grad_norm": 0.8405625820159912, "learning_rate": 0.0003069285908411469, "loss": 3.5581, "step": 81605 }, { "epoch": 5.544910993341487, "grad_norm": 0.791593611240387, "learning_rate": 0.00030688612583231416, "loss": 3.5571, "step": 81610 }, { "epoch": 5.545250713412148, "grad_norm": 0.9306030869483948, "learning_rate": 0.0003068436608234815, "loss": 3.2705, "step": 81615 }, { "epoch": 5.54559043348281, "grad_norm": 1.1521319150924683, "learning_rate": 0.0003068011958146487, "loss": 3.246, "step": 81620 }, { "epoch": 5.545930153553472, "grad_norm": 0.9561100602149963, "learning_rate": 0.000306758730805816, "loss": 3.5496, "step": 81625 }, { "epoch": 5.546269873624134, "grad_norm": 1.022749900817871, "learning_rate": 0.0003067162657969833, "loss": 3.4377, "step": 81630 }, { "epoch": 5.546609593694796, "grad_norm": 0.7863710522651672, "learning_rate": 0.00030667380078815056, "loss": 3.256, "step": 81635 }, { "epoch": 5.546949313765458, "grad_norm": 1.0092450380325317, "learning_rate": 0.00030663133577931784, "loss": 3.0453, "step": 81640 }, { "epoch": 5.547289033836119, "grad_norm": 0.7912927865982056, "learning_rate": 0.0003065888707704851, "loss": 3.3555, "step": 81645 }, { "epoch": 5.547628753906781, "grad_norm": 0.6961928606033325, "learning_rate": 0.0003065464057616524, "loss": 3.734, "step": 81650 }, { "epoch": 5.547968473977443, "grad_norm": 0.9818047881126404, "learning_rate": 0.0003065039407528197, "loss": 3.1864, "step": 81655 }, { "epoch": 5.548308194048104, "grad_norm": 0.6745341420173645, "learning_rate": 0.00030646147574398696, "loss": 3.5434, "step": 81660 }, { "epoch": 5.548647914118766, "grad_norm": 0.8936575055122375, "learning_rate": 0.0003064190107351542, "loss": 3.5217, "step": 81665 }, { "epoch": 5.5489876341894275, "grad_norm": 1.0344130992889404, "learning_rate": 0.0003063765457263215, "loss": 3.5695, "step": 81670 }, { "epoch": 5.54932735426009, "grad_norm": 0.7889003753662109, "learning_rate": 0.0003063340807174888, "loss": 3.1696, "step": 81675 }, { "epoch": 5.549667074330752, "grad_norm": 1.233446717262268, "learning_rate": 0.00030629161570865603, "loss": 3.4801, "step": 81680 }, { "epoch": 5.550006794401413, "grad_norm": 0.7653019428253174, "learning_rate": 0.00030624915069982336, "loss": 3.4087, "step": 81685 }, { "epoch": 5.550346514472075, "grad_norm": 0.867329478263855, "learning_rate": 0.00030620668569099064, "loss": 3.3949, "step": 81690 }, { "epoch": 5.550686234542737, "grad_norm": 0.950331449508667, "learning_rate": 0.0003061642206821579, "loss": 3.4376, "step": 81695 }, { "epoch": 5.551025954613398, "grad_norm": 1.0427284240722656, "learning_rate": 0.00030612175567332515, "loss": 3.5348, "step": 81700 }, { "epoch": 5.55136567468406, "grad_norm": 0.9118134379386902, "learning_rate": 0.0003060792906644925, "loss": 3.4512, "step": 81705 }, { "epoch": 5.551705394754722, "grad_norm": 0.9592965841293335, "learning_rate": 0.00030603682565565976, "loss": 3.157, "step": 81710 }, { "epoch": 5.5520451148253835, "grad_norm": 0.9162136316299438, "learning_rate": 0.000305994360646827, "loss": 3.4959, "step": 81715 }, { "epoch": 5.552384834896046, "grad_norm": 1.0603339672088623, "learning_rate": 0.0003059518956379943, "loss": 3.4895, "step": 81720 }, { "epoch": 5.552724554966708, "grad_norm": 1.4533050060272217, "learning_rate": 0.0003059094306291616, "loss": 3.6457, "step": 81725 }, { "epoch": 5.553064275037369, "grad_norm": 0.9384618997573853, "learning_rate": 0.00030586696562032883, "loss": 3.33, "step": 81730 }, { "epoch": 5.553403995108031, "grad_norm": 2.9775397777557373, "learning_rate": 0.00030582450061149616, "loss": 3.5713, "step": 81735 }, { "epoch": 5.553743715178693, "grad_norm": 0.8903524875640869, "learning_rate": 0.00030578203560266344, "loss": 3.7137, "step": 81740 }, { "epoch": 5.554083435249354, "grad_norm": 0.730891764163971, "learning_rate": 0.00030573957059383067, "loss": 3.5549, "step": 81745 }, { "epoch": 5.554423155320016, "grad_norm": 1.0943650007247925, "learning_rate": 0.00030569710558499795, "loss": 3.2535, "step": 81750 }, { "epoch": 5.554762875390678, "grad_norm": 0.7178515195846558, "learning_rate": 0.0003056546405761653, "loss": 3.3708, "step": 81755 }, { "epoch": 5.5551025954613396, "grad_norm": 0.8439033031463623, "learning_rate": 0.0003056121755673325, "loss": 3.4483, "step": 81760 }, { "epoch": 5.555442315532002, "grad_norm": 1.1084353923797607, "learning_rate": 0.0003055697105584998, "loss": 3.3225, "step": 81765 }, { "epoch": 5.555782035602664, "grad_norm": 0.864029049873352, "learning_rate": 0.0003055272455496671, "loss": 3.3363, "step": 81770 }, { "epoch": 5.556121755673325, "grad_norm": 1.131706714630127, "learning_rate": 0.00030548478054083435, "loss": 3.5936, "step": 81775 }, { "epoch": 5.556461475743987, "grad_norm": 0.9713962078094482, "learning_rate": 0.00030544231553200163, "loss": 3.4988, "step": 81780 }, { "epoch": 5.556801195814649, "grad_norm": 0.815556526184082, "learning_rate": 0.0003053998505231689, "loss": 3.704, "step": 81785 }, { "epoch": 5.55714091588531, "grad_norm": 0.9397998452186584, "learning_rate": 0.0003053573855143362, "loss": 3.3946, "step": 81790 }, { "epoch": 5.557480635955972, "grad_norm": 0.7259123921394348, "learning_rate": 0.00030531492050550347, "loss": 3.2194, "step": 81795 }, { "epoch": 5.557820356026634, "grad_norm": 0.9877852201461792, "learning_rate": 0.00030527245549667075, "loss": 3.4022, "step": 81800 }, { "epoch": 5.558160076097296, "grad_norm": 0.8011809587478638, "learning_rate": 0.00030522999048783803, "loss": 3.5469, "step": 81805 }, { "epoch": 5.558499796167958, "grad_norm": 1.0196787118911743, "learning_rate": 0.0003051875254790053, "loss": 3.3883, "step": 81810 }, { "epoch": 5.55883951623862, "grad_norm": 0.8918800950050354, "learning_rate": 0.0003051450604701726, "loss": 3.5412, "step": 81815 }, { "epoch": 5.559179236309281, "grad_norm": 0.9680745005607605, "learning_rate": 0.0003051025954613398, "loss": 3.5482, "step": 81820 }, { "epoch": 5.559518956379943, "grad_norm": 1.0865769386291504, "learning_rate": 0.00030506013045250715, "loss": 3.4213, "step": 81825 }, { "epoch": 5.559858676450605, "grad_norm": 0.8647723197937012, "learning_rate": 0.00030501766544367443, "loss": 3.4375, "step": 81830 }, { "epoch": 5.560198396521266, "grad_norm": 0.9392686486244202, "learning_rate": 0.00030497520043484166, "loss": 3.5077, "step": 81835 }, { "epoch": 5.560538116591928, "grad_norm": 0.91786789894104, "learning_rate": 0.000304932735426009, "loss": 3.5334, "step": 81840 }, { "epoch": 5.56087783666259, "grad_norm": 0.9021332859992981, "learning_rate": 0.00030489027041717627, "loss": 3.527, "step": 81845 }, { "epoch": 5.561217556733252, "grad_norm": 0.8272998929023743, "learning_rate": 0.0003048478054083435, "loss": 3.4024, "step": 81850 }, { "epoch": 5.561557276803914, "grad_norm": 0.9198755621910095, "learning_rate": 0.0003048053403995108, "loss": 3.657, "step": 81855 }, { "epoch": 5.561896996874576, "grad_norm": 0.832099437713623, "learning_rate": 0.0003047628753906781, "loss": 3.4243, "step": 81860 }, { "epoch": 5.562236716945237, "grad_norm": 0.887787401676178, "learning_rate": 0.0003047204103818454, "loss": 3.043, "step": 81865 }, { "epoch": 5.562576437015899, "grad_norm": 0.9349852800369263, "learning_rate": 0.0003046779453730126, "loss": 3.541, "step": 81870 }, { "epoch": 5.56291615708656, "grad_norm": 0.7738796472549438, "learning_rate": 0.00030463548036417995, "loss": 3.2429, "step": 81875 }, { "epoch": 5.563255877157222, "grad_norm": 0.9025421142578125, "learning_rate": 0.00030459301535534723, "loss": 3.1446, "step": 81880 }, { "epoch": 5.563595597227884, "grad_norm": 0.9583965539932251, "learning_rate": 0.00030455055034651446, "loss": 3.2242, "step": 81885 }, { "epoch": 5.5639353172985455, "grad_norm": 0.8384267091751099, "learning_rate": 0.00030450808533768174, "loss": 3.4183, "step": 81890 }, { "epoch": 5.564275037369208, "grad_norm": 0.885161817073822, "learning_rate": 0.00030446562032884907, "loss": 3.2714, "step": 81895 }, { "epoch": 5.56461475743987, "grad_norm": 0.8946053981781006, "learning_rate": 0.0003044231553200163, "loss": 3.3755, "step": 81900 }, { "epoch": 5.564954477510531, "grad_norm": 0.9393993616104126, "learning_rate": 0.0003043806903111836, "loss": 3.504, "step": 81905 }, { "epoch": 5.565294197581193, "grad_norm": 0.7906018495559692, "learning_rate": 0.0003043382253023509, "loss": 3.5454, "step": 81910 }, { "epoch": 5.565633917651855, "grad_norm": 0.8550724387168884, "learning_rate": 0.00030429576029351814, "loss": 3.4956, "step": 81915 }, { "epoch": 5.565973637722516, "grad_norm": 1.1295901536941528, "learning_rate": 0.0003042532952846854, "loss": 3.0116, "step": 81920 }, { "epoch": 5.566313357793178, "grad_norm": 0.987320065498352, "learning_rate": 0.0003042108302758527, "loss": 3.4867, "step": 81925 }, { "epoch": 5.56665307786384, "grad_norm": 0.8098143339157104, "learning_rate": 0.00030416836526702, "loss": 3.2854, "step": 81930 }, { "epoch": 5.5669927979345015, "grad_norm": 1.0826642513275146, "learning_rate": 0.00030412590025818726, "loss": 3.4618, "step": 81935 }, { "epoch": 5.567332518005164, "grad_norm": 0.7229319214820862, "learning_rate": 0.00030408343524935454, "loss": 3.7403, "step": 81940 }, { "epoch": 5.567672238075826, "grad_norm": 1.0227519273757935, "learning_rate": 0.0003040409702405218, "loss": 3.3575, "step": 81945 }, { "epoch": 5.568011958146487, "grad_norm": 0.928403913974762, "learning_rate": 0.0003039985052316891, "loss": 3.6202, "step": 81950 }, { "epoch": 5.568351678217149, "grad_norm": 1.0340694189071655, "learning_rate": 0.0003039560402228564, "loss": 3.2517, "step": 81955 }, { "epoch": 5.568691398287811, "grad_norm": 0.906521201133728, "learning_rate": 0.0003039135752140236, "loss": 3.4598, "step": 81960 }, { "epoch": 5.569031118358472, "grad_norm": 0.8268351554870605, "learning_rate": 0.00030387111020519094, "loss": 3.433, "step": 81965 }, { "epoch": 5.569370838429134, "grad_norm": 0.8038462400436401, "learning_rate": 0.0003038286451963582, "loss": 3.5709, "step": 81970 }, { "epoch": 5.569710558499796, "grad_norm": 1.0649347305297852, "learning_rate": 0.00030378618018752544, "loss": 3.5176, "step": 81975 }, { "epoch": 5.5700502785704575, "grad_norm": 0.9329836368560791, "learning_rate": 0.0003037437151786928, "loss": 3.4801, "step": 81980 }, { "epoch": 5.57038999864112, "grad_norm": 0.8207629323005676, "learning_rate": 0.00030370125016986006, "loss": 3.5338, "step": 81985 }, { "epoch": 5.570729718711782, "grad_norm": 0.9361269474029541, "learning_rate": 0.0003036587851610273, "loss": 3.2225, "step": 81990 }, { "epoch": 5.571069438782443, "grad_norm": 0.8225507140159607, "learning_rate": 0.00030361632015219456, "loss": 3.4942, "step": 81995 }, { "epoch": 5.571409158853105, "grad_norm": 0.9554885029792786, "learning_rate": 0.0003035738551433619, "loss": 3.4684, "step": 82000 }, { "epoch": 5.571748878923767, "grad_norm": 0.8908816576004028, "learning_rate": 0.0003035313901345291, "loss": 3.3062, "step": 82005 }, { "epoch": 5.572088598994428, "grad_norm": 1.008134365081787, "learning_rate": 0.0003034889251256964, "loss": 3.4236, "step": 82010 }, { "epoch": 5.57242831906509, "grad_norm": 1.0869617462158203, "learning_rate": 0.00030344646011686374, "loss": 3.5957, "step": 82015 }, { "epoch": 5.572768039135752, "grad_norm": 0.9642946124076843, "learning_rate": 0.00030340399510803097, "loss": 3.3498, "step": 82020 }, { "epoch": 5.5731077592064135, "grad_norm": 0.719622015953064, "learning_rate": 0.00030336153009919825, "loss": 3.6525, "step": 82025 }, { "epoch": 5.573447479277076, "grad_norm": 0.7496836185455322, "learning_rate": 0.0003033190650903656, "loss": 3.6613, "step": 82030 }, { "epoch": 5.573787199347738, "grad_norm": 0.9266257286071777, "learning_rate": 0.00030327660008153286, "loss": 3.8335, "step": 82035 }, { "epoch": 5.574126919418399, "grad_norm": 0.861636757850647, "learning_rate": 0.0003032341350727001, "loss": 3.6411, "step": 82040 }, { "epoch": 5.574466639489061, "grad_norm": 0.8968741297721863, "learning_rate": 0.00030319167006386737, "loss": 3.4372, "step": 82045 }, { "epoch": 5.574806359559723, "grad_norm": 0.8568574786186218, "learning_rate": 0.0003031492050550347, "loss": 3.5561, "step": 82050 }, { "epoch": 5.575146079630384, "grad_norm": 0.817346453666687, "learning_rate": 0.0003031067400462019, "loss": 3.3599, "step": 82055 }, { "epoch": 5.575485799701046, "grad_norm": 0.9702475070953369, "learning_rate": 0.0003030642750373692, "loss": 3.429, "step": 82060 }, { "epoch": 5.575825519771708, "grad_norm": 0.8469544053077698, "learning_rate": 0.00030302181002853654, "loss": 3.2703, "step": 82065 }, { "epoch": 5.57616523984237, "grad_norm": 0.7062801718711853, "learning_rate": 0.00030297934501970377, "loss": 3.4251, "step": 82070 }, { "epoch": 5.576504959913032, "grad_norm": 0.8736145496368408, "learning_rate": 0.00030293688001087105, "loss": 3.3132, "step": 82075 }, { "epoch": 5.576844679983694, "grad_norm": 0.8844289183616638, "learning_rate": 0.0003028944150020383, "loss": 3.5241, "step": 82080 }, { "epoch": 5.577184400054355, "grad_norm": 1.1152395009994507, "learning_rate": 0.0003028519499932056, "loss": 3.6191, "step": 82085 }, { "epoch": 5.577524120125017, "grad_norm": 0.8079606890678406, "learning_rate": 0.0003028094849843729, "loss": 3.6467, "step": 82090 }, { "epoch": 5.577863840195679, "grad_norm": 0.951145589351654, "learning_rate": 0.00030276701997554017, "loss": 3.3736, "step": 82095 }, { "epoch": 5.57820356026634, "grad_norm": 1.0148407220840454, "learning_rate": 0.00030272455496670745, "loss": 3.6894, "step": 82100 }, { "epoch": 5.578543280337002, "grad_norm": 0.8269964456558228, "learning_rate": 0.0003026820899578747, "loss": 3.4921, "step": 82105 }, { "epoch": 5.578883000407664, "grad_norm": 0.8297431468963623, "learning_rate": 0.000302639624949042, "loss": 3.4525, "step": 82110 }, { "epoch": 5.579222720478326, "grad_norm": 1.009473204612732, "learning_rate": 0.00030259715994020923, "loss": 3.1861, "step": 82115 }, { "epoch": 5.579562440548988, "grad_norm": 0.9926433563232422, "learning_rate": 0.00030255469493137657, "loss": 3.5976, "step": 82120 }, { "epoch": 5.57990216061965, "grad_norm": 0.9074004888534546, "learning_rate": 0.00030251222992254385, "loss": 3.4014, "step": 82125 }, { "epoch": 5.580241880690311, "grad_norm": 1.0215978622436523, "learning_rate": 0.00030246976491371107, "loss": 3.3353, "step": 82130 }, { "epoch": 5.580581600760973, "grad_norm": 1.1065857410430908, "learning_rate": 0.0003024272999048784, "loss": 3.4733, "step": 82135 }, { "epoch": 5.580921320831635, "grad_norm": 0.9878951907157898, "learning_rate": 0.0003023848348960457, "loss": 3.4551, "step": 82140 }, { "epoch": 5.581261040902296, "grad_norm": 0.8208097219467163, "learning_rate": 0.0003023423698872129, "loss": 3.7223, "step": 82145 }, { "epoch": 5.581600760972958, "grad_norm": 1.0541378259658813, "learning_rate": 0.0003022999048783802, "loss": 3.6887, "step": 82150 }, { "epoch": 5.58194048104362, "grad_norm": 0.8775576949119568, "learning_rate": 0.00030225743986954753, "loss": 3.5467, "step": 82155 }, { "epoch": 5.582280201114282, "grad_norm": 0.9530203938484192, "learning_rate": 0.00030221497486071475, "loss": 3.7089, "step": 82160 }, { "epoch": 5.582619921184944, "grad_norm": 0.822160005569458, "learning_rate": 0.00030217250985188203, "loss": 3.4028, "step": 82165 }, { "epoch": 5.582959641255606, "grad_norm": 1.0128660202026367, "learning_rate": 0.00030213004484304937, "loss": 3.5106, "step": 82170 }, { "epoch": 5.583299361326267, "grad_norm": 0.8754316568374634, "learning_rate": 0.0003020875798342166, "loss": 3.6489, "step": 82175 }, { "epoch": 5.583639081396929, "grad_norm": 0.8674492835998535, "learning_rate": 0.0003020451148253839, "loss": 3.1899, "step": 82180 }, { "epoch": 5.583978801467591, "grad_norm": 0.9375045299530029, "learning_rate": 0.00030200264981655115, "loss": 3.3223, "step": 82185 }, { "epoch": 5.584318521538252, "grad_norm": 0.963342547416687, "learning_rate": 0.00030196018480771843, "loss": 3.4384, "step": 82190 }, { "epoch": 5.584658241608914, "grad_norm": 0.9302758574485779, "learning_rate": 0.0003019177197988857, "loss": 3.2992, "step": 82195 }, { "epoch": 5.584997961679576, "grad_norm": 0.7429613471031189, "learning_rate": 0.000301875254790053, "loss": 3.7038, "step": 82200 }, { "epoch": 5.585337681750238, "grad_norm": 0.8120657205581665, "learning_rate": 0.00030183278978122033, "loss": 3.4356, "step": 82205 }, { "epoch": 5.5856774018209, "grad_norm": 0.8632986545562744, "learning_rate": 0.00030179032477238755, "loss": 3.1139, "step": 82210 }, { "epoch": 5.586017121891562, "grad_norm": 0.9031504392623901, "learning_rate": 0.00030174785976355483, "loss": 3.5911, "step": 82215 }, { "epoch": 5.586356841962223, "grad_norm": 0.8817985653877258, "learning_rate": 0.0003017053947547221, "loss": 3.3429, "step": 82220 }, { "epoch": 5.586696562032885, "grad_norm": 1.2318918704986572, "learning_rate": 0.0003016629297458894, "loss": 3.5567, "step": 82225 }, { "epoch": 5.587036282103547, "grad_norm": 0.9268613457679749, "learning_rate": 0.0003016204647370567, "loss": 3.4792, "step": 82230 }, { "epoch": 5.587376002174208, "grad_norm": 0.9057188034057617, "learning_rate": 0.00030157799972822395, "loss": 3.6509, "step": 82235 }, { "epoch": 5.58771572224487, "grad_norm": 1.0919981002807617, "learning_rate": 0.00030153553471939123, "loss": 3.2178, "step": 82240 }, { "epoch": 5.588055442315532, "grad_norm": 0.8234882950782776, "learning_rate": 0.0003014930697105585, "loss": 3.5765, "step": 82245 }, { "epoch": 5.588395162386194, "grad_norm": 0.8547834157943726, "learning_rate": 0.0003014506047017258, "loss": 3.434, "step": 82250 }, { "epoch": 5.588734882456856, "grad_norm": 0.7923328876495361, "learning_rate": 0.000301408139692893, "loss": 3.6263, "step": 82255 }, { "epoch": 5.589074602527518, "grad_norm": 0.7645610570907593, "learning_rate": 0.00030136567468406035, "loss": 3.7071, "step": 82260 }, { "epoch": 5.589414322598179, "grad_norm": 0.8536590933799744, "learning_rate": 0.00030132320967522763, "loss": 3.3274, "step": 82265 }, { "epoch": 5.589754042668841, "grad_norm": 1.1325973272323608, "learning_rate": 0.00030128074466639486, "loss": 3.5502, "step": 82270 }, { "epoch": 5.590093762739503, "grad_norm": 0.9416400790214539, "learning_rate": 0.0003012382796575622, "loss": 3.3037, "step": 82275 }, { "epoch": 5.590433482810164, "grad_norm": 1.1977800130844116, "learning_rate": 0.0003011958146487295, "loss": 3.3514, "step": 82280 }, { "epoch": 5.590773202880826, "grad_norm": 0.8603570461273193, "learning_rate": 0.0003011533496398967, "loss": 3.1126, "step": 82285 }, { "epoch": 5.591112922951488, "grad_norm": 1.0111265182495117, "learning_rate": 0.000301110884631064, "loss": 3.4435, "step": 82290 }, { "epoch": 5.59145264302215, "grad_norm": 1.0420277118682861, "learning_rate": 0.0003010684196222313, "loss": 3.2959, "step": 82295 }, { "epoch": 5.591792363092812, "grad_norm": 0.8204990029335022, "learning_rate": 0.00030102595461339854, "loss": 3.5823, "step": 82300 }, { "epoch": 5.592132083163474, "grad_norm": 0.8032492995262146, "learning_rate": 0.0003009834896045658, "loss": 3.4152, "step": 82305 }, { "epoch": 5.592471803234135, "grad_norm": 0.8204959630966187, "learning_rate": 0.00030094102459573316, "loss": 3.4062, "step": 82310 }, { "epoch": 5.592811523304797, "grad_norm": 1.252389669418335, "learning_rate": 0.0003008985595869004, "loss": 3.5131, "step": 82315 }, { "epoch": 5.593151243375459, "grad_norm": 0.7937635183334351, "learning_rate": 0.00030085609457806766, "loss": 3.373, "step": 82320 }, { "epoch": 5.59349096344612, "grad_norm": 0.972148597240448, "learning_rate": 0.000300813629569235, "loss": 3.3431, "step": 82325 }, { "epoch": 5.593830683516782, "grad_norm": 1.1939748525619507, "learning_rate": 0.0003007711645604022, "loss": 3.6258, "step": 82330 }, { "epoch": 5.594170403587444, "grad_norm": 0.9143811464309692, "learning_rate": 0.0003007286995515695, "loss": 3.484, "step": 82335 }, { "epoch": 5.594510123658106, "grad_norm": 1.2436470985412598, "learning_rate": 0.0003006862345427368, "loss": 3.646, "step": 82340 }, { "epoch": 5.594849843728768, "grad_norm": 1.177282691001892, "learning_rate": 0.00030064376953390406, "loss": 3.4994, "step": 82345 }, { "epoch": 5.595189563799429, "grad_norm": 0.8559488654136658, "learning_rate": 0.00030060130452507134, "loss": 3.1118, "step": 82350 }, { "epoch": 5.595529283870091, "grad_norm": 0.9672789573669434, "learning_rate": 0.0003005588395162386, "loss": 3.7992, "step": 82355 }, { "epoch": 5.595869003940753, "grad_norm": 0.804573655128479, "learning_rate": 0.0003005163745074059, "loss": 3.6284, "step": 82360 }, { "epoch": 5.596208724011414, "grad_norm": 0.8855844140052795, "learning_rate": 0.0003004739094985732, "loss": 3.7174, "step": 82365 }, { "epoch": 5.596548444082076, "grad_norm": 1.0248981714248657, "learning_rate": 0.00030043144448974046, "loss": 3.5449, "step": 82370 }, { "epoch": 5.596888164152738, "grad_norm": 0.8718444108963013, "learning_rate": 0.00030038897948090774, "loss": 3.3219, "step": 82375 }, { "epoch": 5.5972278842234, "grad_norm": 1.1217143535614014, "learning_rate": 0.000300346514472075, "loss": 3.5923, "step": 82380 }, { "epoch": 5.597567604294062, "grad_norm": 1.0151934623718262, "learning_rate": 0.0003003040494632423, "loss": 3.2789, "step": 82385 }, { "epoch": 5.597907324364724, "grad_norm": 0.8510819673538208, "learning_rate": 0.0003002615844544096, "loss": 3.4468, "step": 82390 }, { "epoch": 5.598247044435385, "grad_norm": 0.717692494392395, "learning_rate": 0.00030021911944557686, "loss": 3.4964, "step": 82395 }, { "epoch": 5.598586764506047, "grad_norm": 0.8688861131668091, "learning_rate": 0.00030017665443674414, "loss": 3.4789, "step": 82400 }, { "epoch": 5.598926484576709, "grad_norm": 0.8709871172904968, "learning_rate": 0.0003001341894279114, "loss": 3.3047, "step": 82405 }, { "epoch": 5.59926620464737, "grad_norm": 0.7539446949958801, "learning_rate": 0.00030009172441907865, "loss": 3.3955, "step": 82410 }, { "epoch": 5.599605924718032, "grad_norm": 1.0892256498336792, "learning_rate": 0.000300049259410246, "loss": 3.6333, "step": 82415 }, { "epoch": 5.599945644788694, "grad_norm": 0.9068273901939392, "learning_rate": 0.00030000679440141326, "loss": 3.5849, "step": 82420 }, { "epoch": 5.600285364859356, "grad_norm": 0.8445501923561096, "learning_rate": 0.0002999643293925805, "loss": 3.4866, "step": 82425 }, { "epoch": 5.600625084930018, "grad_norm": 0.8555974364280701, "learning_rate": 0.0002999218643837478, "loss": 3.4909, "step": 82430 }, { "epoch": 5.60096480500068, "grad_norm": 0.8991062641143799, "learning_rate": 0.0002998793993749151, "loss": 3.4082, "step": 82435 }, { "epoch": 5.601304525071341, "grad_norm": 0.8401778936386108, "learning_rate": 0.00029983693436608233, "loss": 3.3235, "step": 82440 }, { "epoch": 5.601644245142003, "grad_norm": 0.7574227452278137, "learning_rate": 0.0002997944693572496, "loss": 3.62, "step": 82445 }, { "epoch": 5.601983965212665, "grad_norm": 0.9731252789497375, "learning_rate": 0.00029975200434841694, "loss": 3.4487, "step": 82450 }, { "epoch": 5.602323685283326, "grad_norm": 0.9891376495361328, "learning_rate": 0.00029970953933958417, "loss": 3.6082, "step": 82455 }, { "epoch": 5.602663405353988, "grad_norm": 0.9286349415779114, "learning_rate": 0.00029966707433075145, "loss": 3.3799, "step": 82460 }, { "epoch": 5.60300312542465, "grad_norm": 1.0710163116455078, "learning_rate": 0.0002996246093219188, "loss": 3.4492, "step": 82465 }, { "epoch": 5.603342845495312, "grad_norm": 1.0180164575576782, "learning_rate": 0.000299582144313086, "loss": 3.6341, "step": 82470 }, { "epoch": 5.603682565565974, "grad_norm": 0.9899030327796936, "learning_rate": 0.0002995396793042533, "loss": 3.5178, "step": 82475 }, { "epoch": 5.604022285636636, "grad_norm": 0.8733294606208801, "learning_rate": 0.00029949721429542057, "loss": 3.4119, "step": 82480 }, { "epoch": 5.604362005707297, "grad_norm": 0.8700166940689087, "learning_rate": 0.00029945474928658785, "loss": 3.3, "step": 82485 }, { "epoch": 5.604701725777959, "grad_norm": 0.8279207944869995, "learning_rate": 0.00029941228427775513, "loss": 3.2474, "step": 82490 }, { "epoch": 5.605041445848621, "grad_norm": 1.1284507513046265, "learning_rate": 0.0002993698192689224, "loss": 3.3856, "step": 82495 }, { "epoch": 5.605381165919282, "grad_norm": 0.8666946291923523, "learning_rate": 0.0002993273542600897, "loss": 3.4129, "step": 82500 }, { "epoch": 5.605720885989944, "grad_norm": 1.0889859199523926, "learning_rate": 0.00029928488925125697, "loss": 3.542, "step": 82505 }, { "epoch": 5.606060606060606, "grad_norm": 0.8910405039787292, "learning_rate": 0.00029924242424242425, "loss": 3.5168, "step": 82510 }, { "epoch": 5.606400326131268, "grad_norm": 0.6801512837409973, "learning_rate": 0.0002991999592335915, "loss": 3.5961, "step": 82515 }, { "epoch": 5.60674004620193, "grad_norm": 0.8186582922935486, "learning_rate": 0.0002991574942247588, "loss": 3.5429, "step": 82520 }, { "epoch": 5.607079766272592, "grad_norm": 0.7269889712333679, "learning_rate": 0.0002991150292159261, "loss": 3.3532, "step": 82525 }, { "epoch": 5.607419486343253, "grad_norm": 1.0843061208724976, "learning_rate": 0.0002990725642070933, "loss": 3.6661, "step": 82530 }, { "epoch": 5.607759206413915, "grad_norm": 0.6639176607131958, "learning_rate": 0.00029903009919826065, "loss": 3.4398, "step": 82535 }, { "epoch": 5.608098926484577, "grad_norm": 1.049216389656067, "learning_rate": 0.00029898763418942793, "loss": 3.4465, "step": 82540 }, { "epoch": 5.608438646555238, "grad_norm": 0.7577747106552124, "learning_rate": 0.0002989451691805952, "loss": 3.5817, "step": 82545 }, { "epoch": 5.6087783666259, "grad_norm": 1.0697002410888672, "learning_rate": 0.00029890270417176244, "loss": 3.656, "step": 82550 }, { "epoch": 5.6091180866965615, "grad_norm": 1.2251524925231934, "learning_rate": 0.00029886023916292977, "loss": 3.162, "step": 82555 }, { "epoch": 5.609457806767224, "grad_norm": 0.6835396885871887, "learning_rate": 0.00029881777415409705, "loss": 3.376, "step": 82560 }, { "epoch": 5.609797526837886, "grad_norm": 0.7857998013496399, "learning_rate": 0.0002987753091452643, "loss": 3.4854, "step": 82565 }, { "epoch": 5.610137246908547, "grad_norm": 0.8086017370223999, "learning_rate": 0.0002987328441364316, "loss": 3.5351, "step": 82570 }, { "epoch": 5.610476966979209, "grad_norm": 1.0776985883712769, "learning_rate": 0.0002986903791275989, "loss": 3.232, "step": 82575 }, { "epoch": 5.610816687049871, "grad_norm": 0.8785591125488281, "learning_rate": 0.0002986479141187661, "loss": 3.4459, "step": 82580 }, { "epoch": 5.611156407120532, "grad_norm": 0.907768964767456, "learning_rate": 0.00029860544910993345, "loss": 3.6141, "step": 82585 }, { "epoch": 5.611496127191194, "grad_norm": 1.0870754718780518, "learning_rate": 0.00029856298410110073, "loss": 3.4037, "step": 82590 }, { "epoch": 5.611835847261856, "grad_norm": 0.8357994556427002, "learning_rate": 0.00029852051909226796, "loss": 3.4175, "step": 82595 }, { "epoch": 5.6121755673325175, "grad_norm": 0.8477683663368225, "learning_rate": 0.00029847805408343524, "loss": 3.2592, "step": 82600 }, { "epoch": 5.61251528740318, "grad_norm": 0.8902766108512878, "learning_rate": 0.00029843558907460257, "loss": 3.3841, "step": 82605 }, { "epoch": 5.612855007473842, "grad_norm": 0.9501713514328003, "learning_rate": 0.0002983931240657698, "loss": 3.3027, "step": 82610 }, { "epoch": 5.613194727544503, "grad_norm": 1.094098448753357, "learning_rate": 0.0002983506590569371, "loss": 3.3747, "step": 82615 }, { "epoch": 5.613534447615165, "grad_norm": 1.1341629028320312, "learning_rate": 0.0002983081940481044, "loss": 3.4693, "step": 82620 }, { "epoch": 5.613874167685827, "grad_norm": 1.0982837677001953, "learning_rate": 0.00029826572903927164, "loss": 3.6718, "step": 82625 }, { "epoch": 5.614213887756488, "grad_norm": 1.18268883228302, "learning_rate": 0.0002982232640304389, "loss": 3.5742, "step": 82630 }, { "epoch": 5.61455360782715, "grad_norm": 0.7063042521476746, "learning_rate": 0.0002981807990216062, "loss": 3.317, "step": 82635 }, { "epoch": 5.614893327897812, "grad_norm": 0.8306232690811157, "learning_rate": 0.0002981383340127735, "loss": 3.2071, "step": 82640 }, { "epoch": 5.615233047968474, "grad_norm": 0.9124130010604858, "learning_rate": 0.00029809586900394076, "loss": 3.4146, "step": 82645 }, { "epoch": 5.615572768039136, "grad_norm": 0.8041988015174866, "learning_rate": 0.00029805340399510804, "loss": 3.4698, "step": 82650 }, { "epoch": 5.615912488109798, "grad_norm": 0.9369439482688904, "learning_rate": 0.0002980109389862753, "loss": 3.43, "step": 82655 }, { "epoch": 5.616252208180459, "grad_norm": 0.7106771469116211, "learning_rate": 0.0002979684739774426, "loss": 3.5662, "step": 82660 }, { "epoch": 5.616591928251121, "grad_norm": 0.9854356050491333, "learning_rate": 0.0002979260089686099, "loss": 3.3666, "step": 82665 }, { "epoch": 5.616931648321783, "grad_norm": 1.1525728702545166, "learning_rate": 0.0002978835439597771, "loss": 3.1969, "step": 82670 }, { "epoch": 5.617271368392444, "grad_norm": 0.8339700698852539, "learning_rate": 0.00029784107895094444, "loss": 3.4877, "step": 82675 }, { "epoch": 5.617611088463106, "grad_norm": 0.9361932277679443, "learning_rate": 0.0002977986139421117, "loss": 3.4459, "step": 82680 }, { "epoch": 5.617950808533768, "grad_norm": 1.0960323810577393, "learning_rate": 0.00029775614893327894, "loss": 3.5247, "step": 82685 }, { "epoch": 5.61829052860443, "grad_norm": 0.8406267762184143, "learning_rate": 0.0002977136839244463, "loss": 3.3203, "step": 82690 }, { "epoch": 5.618630248675092, "grad_norm": 0.7366962432861328, "learning_rate": 0.00029767121891561356, "loss": 3.4736, "step": 82695 }, { "epoch": 5.618969968745754, "grad_norm": 1.05165433883667, "learning_rate": 0.0002976287539067808, "loss": 3.7444, "step": 82700 }, { "epoch": 5.619309688816415, "grad_norm": 1.192793369293213, "learning_rate": 0.00029758628889794806, "loss": 3.5398, "step": 82705 }, { "epoch": 5.619649408887077, "grad_norm": 0.9434531927108765, "learning_rate": 0.0002975438238891154, "loss": 3.5305, "step": 82710 }, { "epoch": 5.619989128957739, "grad_norm": 0.7988432049751282, "learning_rate": 0.0002975013588802827, "loss": 3.3358, "step": 82715 }, { "epoch": 5.6203288490284, "grad_norm": 0.8350309729576111, "learning_rate": 0.0002974588938714499, "loss": 3.1934, "step": 82720 }, { "epoch": 5.620668569099062, "grad_norm": 0.783007800579071, "learning_rate": 0.00029741642886261724, "loss": 3.6222, "step": 82725 }, { "epoch": 5.621008289169724, "grad_norm": 1.0800529718399048, "learning_rate": 0.0002973739638537845, "loss": 3.2762, "step": 82730 }, { "epoch": 5.621348009240386, "grad_norm": 0.7524433135986328, "learning_rate": 0.00029733149884495175, "loss": 3.62, "step": 82735 }, { "epoch": 5.621687729311048, "grad_norm": 0.7950888276100159, "learning_rate": 0.000297289033836119, "loss": 3.5863, "step": 82740 }, { "epoch": 5.62202744938171, "grad_norm": 0.914823591709137, "learning_rate": 0.00029724656882728636, "loss": 3.3415, "step": 82745 }, { "epoch": 5.622367169452371, "grad_norm": 0.7424940466880798, "learning_rate": 0.0002972041038184536, "loss": 3.4915, "step": 82750 }, { "epoch": 5.622706889523033, "grad_norm": 0.9059765338897705, "learning_rate": 0.00029716163880962087, "loss": 3.5569, "step": 82755 }, { "epoch": 5.623046609593695, "grad_norm": 0.9582024812698364, "learning_rate": 0.0002971191738007882, "loss": 3.5173, "step": 82760 }, { "epoch": 5.623386329664356, "grad_norm": 0.879798173904419, "learning_rate": 0.0002970767087919554, "loss": 3.4334, "step": 82765 }, { "epoch": 5.623726049735018, "grad_norm": 1.0461952686309814, "learning_rate": 0.0002970342437831227, "loss": 3.4366, "step": 82770 }, { "epoch": 5.62406576980568, "grad_norm": 1.1164079904556274, "learning_rate": 0.00029699177877429, "loss": 3.3137, "step": 82775 }, { "epoch": 5.624405489876342, "grad_norm": 1.247663140296936, "learning_rate": 0.00029694931376545727, "loss": 3.5439, "step": 82780 }, { "epoch": 5.624745209947004, "grad_norm": 1.0324838161468506, "learning_rate": 0.00029690684875662455, "loss": 3.8394, "step": 82785 }, { "epoch": 5.625084930017666, "grad_norm": 0.7930219173431396, "learning_rate": 0.0002968643837477918, "loss": 3.4222, "step": 82790 }, { "epoch": 5.625424650088327, "grad_norm": 0.9668696522712708, "learning_rate": 0.0002968219187389591, "loss": 3.4342, "step": 82795 }, { "epoch": 5.625764370158989, "grad_norm": 1.05758798122406, "learning_rate": 0.0002967794537301264, "loss": 3.6813, "step": 82800 }, { "epoch": 5.626104090229651, "grad_norm": 0.9212996959686279, "learning_rate": 0.00029673698872129367, "loss": 3.5015, "step": 82805 }, { "epoch": 5.626443810300312, "grad_norm": 1.0778874158859253, "learning_rate": 0.0002966945237124609, "loss": 3.3348, "step": 82810 }, { "epoch": 5.626783530370974, "grad_norm": 1.132836103439331, "learning_rate": 0.0002966520587036282, "loss": 3.4537, "step": 82815 }, { "epoch": 5.627123250441636, "grad_norm": 1.0223679542541504, "learning_rate": 0.0002966095936947955, "loss": 3.2584, "step": 82820 }, { "epoch": 5.627462970512298, "grad_norm": 0.9438114166259766, "learning_rate": 0.00029656712868596273, "loss": 3.3805, "step": 82825 }, { "epoch": 5.62780269058296, "grad_norm": 0.9413001537322998, "learning_rate": 0.00029652466367713007, "loss": 3.499, "step": 82830 }, { "epoch": 5.628142410653622, "grad_norm": 0.8503909707069397, "learning_rate": 0.00029648219866829735, "loss": 3.2795, "step": 82835 }, { "epoch": 5.628482130724283, "grad_norm": 0.7844239473342896, "learning_rate": 0.00029643973365946457, "loss": 3.5971, "step": 82840 }, { "epoch": 5.628821850794945, "grad_norm": 0.8048662543296814, "learning_rate": 0.00029639726865063185, "loss": 3.5358, "step": 82845 }, { "epoch": 5.629161570865607, "grad_norm": 0.7842504978179932, "learning_rate": 0.0002963548036417992, "loss": 3.3627, "step": 82850 }, { "epoch": 5.629501290936268, "grad_norm": 1.1938397884368896, "learning_rate": 0.0002963123386329664, "loss": 3.8508, "step": 82855 }, { "epoch": 5.62984101100693, "grad_norm": 1.0448285341262817, "learning_rate": 0.0002962698736241337, "loss": 3.4857, "step": 82860 }, { "epoch": 5.630180731077592, "grad_norm": 0.67790287733078, "learning_rate": 0.00029622740861530103, "loss": 3.2893, "step": 82865 }, { "epoch": 5.630520451148254, "grad_norm": 0.7852063775062561, "learning_rate": 0.00029618494360646825, "loss": 3.4405, "step": 82870 }, { "epoch": 5.630860171218916, "grad_norm": 1.1292860507965088, "learning_rate": 0.00029614247859763553, "loss": 3.3046, "step": 82875 }, { "epoch": 5.631199891289578, "grad_norm": 1.324384093284607, "learning_rate": 0.00029610001358880287, "loss": 3.6211, "step": 82880 }, { "epoch": 5.631539611360239, "grad_norm": 0.8519487380981445, "learning_rate": 0.00029605754857997015, "loss": 3.3598, "step": 82885 }, { "epoch": 5.631879331430901, "grad_norm": 1.4642263650894165, "learning_rate": 0.0002960150835711374, "loss": 3.566, "step": 82890 }, { "epoch": 5.632219051501563, "grad_norm": 1.4540491104125977, "learning_rate": 0.00029597261856230465, "loss": 3.3431, "step": 82895 }, { "epoch": 5.632558771572224, "grad_norm": 1.0076764822006226, "learning_rate": 0.000295930153553472, "loss": 3.4225, "step": 82900 }, { "epoch": 5.632898491642886, "grad_norm": 0.6310834884643555, "learning_rate": 0.0002958876885446392, "loss": 3.6629, "step": 82905 }, { "epoch": 5.633238211713548, "grad_norm": 0.9057756662368774, "learning_rate": 0.0002958452235358065, "loss": 3.5102, "step": 82910 }, { "epoch": 5.63357793178421, "grad_norm": 0.8441143035888672, "learning_rate": 0.00029580275852697383, "loss": 3.6229, "step": 82915 }, { "epoch": 5.633917651854872, "grad_norm": 0.921527624130249, "learning_rate": 0.00029576029351814105, "loss": 3.4045, "step": 82920 }, { "epoch": 5.634257371925534, "grad_norm": 0.8931593894958496, "learning_rate": 0.00029571782850930833, "loss": 3.2953, "step": 82925 }, { "epoch": 5.634597091996195, "grad_norm": 1.0581187009811401, "learning_rate": 0.0002956753635004756, "loss": 3.4309, "step": 82930 }, { "epoch": 5.634936812066857, "grad_norm": 1.0409610271453857, "learning_rate": 0.0002956328984916429, "loss": 3.4529, "step": 82935 }, { "epoch": 5.635276532137519, "grad_norm": 0.7524537444114685, "learning_rate": 0.0002955904334828102, "loss": 2.9636, "step": 82940 }, { "epoch": 5.63561625220818, "grad_norm": 1.0245060920715332, "learning_rate": 0.00029554796847397745, "loss": 3.4309, "step": 82945 }, { "epoch": 5.635955972278842, "grad_norm": 0.9190667271614075, "learning_rate": 0.00029550550346514473, "loss": 3.465, "step": 82950 }, { "epoch": 5.6362956923495044, "grad_norm": 0.7650712728500366, "learning_rate": 0.000295463038456312, "loss": 3.4938, "step": 82955 }, { "epoch": 5.636635412420166, "grad_norm": 1.1156895160675049, "learning_rate": 0.0002954205734474793, "loss": 3.7372, "step": 82960 }, { "epoch": 5.636975132490828, "grad_norm": 0.7459725737571716, "learning_rate": 0.0002953781084386465, "loss": 3.2657, "step": 82965 }, { "epoch": 5.63731485256149, "grad_norm": 0.7078388929367065, "learning_rate": 0.00029533564342981385, "loss": 3.4057, "step": 82970 }, { "epoch": 5.637654572632151, "grad_norm": 0.9115403890609741, "learning_rate": 0.00029529317842098113, "loss": 3.8266, "step": 82975 }, { "epoch": 5.637994292702813, "grad_norm": 0.9840308427810669, "learning_rate": 0.00029525071341214836, "loss": 3.3176, "step": 82980 }, { "epoch": 5.638334012773475, "grad_norm": 1.2488564252853394, "learning_rate": 0.0002952082484033157, "loss": 3.4897, "step": 82985 }, { "epoch": 5.638673732844136, "grad_norm": 0.9639122486114502, "learning_rate": 0.000295165783394483, "loss": 3.5619, "step": 82990 }, { "epoch": 5.639013452914798, "grad_norm": 0.7075731158256531, "learning_rate": 0.0002951233183856502, "loss": 3.2407, "step": 82995 }, { "epoch": 5.6393531729854605, "grad_norm": 0.7933985590934753, "learning_rate": 0.0002950808533768175, "loss": 3.5547, "step": 83000 }, { "epoch": 5.639692893056122, "grad_norm": 0.8658381700515747, "learning_rate": 0.0002950383883679848, "loss": 3.3827, "step": 83005 }, { "epoch": 5.640032613126784, "grad_norm": 0.905755341053009, "learning_rate": 0.00029499592335915204, "loss": 3.6374, "step": 83010 }, { "epoch": 5.640372333197446, "grad_norm": 1.0243123769760132, "learning_rate": 0.0002949534583503193, "loss": 3.5549, "step": 83015 }, { "epoch": 5.640712053268107, "grad_norm": 1.1589465141296387, "learning_rate": 0.00029491099334148666, "loss": 3.2596, "step": 83020 }, { "epoch": 5.641051773338769, "grad_norm": 0.9430339932441711, "learning_rate": 0.0002948685283326539, "loss": 3.6477, "step": 83025 }, { "epoch": 5.64139149340943, "grad_norm": 0.7901031970977783, "learning_rate": 0.00029482606332382116, "loss": 3.444, "step": 83030 }, { "epoch": 5.641731213480092, "grad_norm": 0.9946638345718384, "learning_rate": 0.00029478359831498844, "loss": 3.4469, "step": 83035 }, { "epoch": 5.642070933550754, "grad_norm": 0.8680121302604675, "learning_rate": 0.0002947411333061557, "loss": 3.6005, "step": 83040 }, { "epoch": 5.642410653621416, "grad_norm": 1.1426748037338257, "learning_rate": 0.000294698668297323, "loss": 3.3126, "step": 83045 }, { "epoch": 5.642750373692078, "grad_norm": 0.8053727149963379, "learning_rate": 0.0002946562032884903, "loss": 3.5187, "step": 83050 }, { "epoch": 5.64309009376274, "grad_norm": 0.9972378015518188, "learning_rate": 0.0002946137382796576, "loss": 3.5392, "step": 83055 }, { "epoch": 5.643429813833401, "grad_norm": 0.9491370320320129, "learning_rate": 0.00029457127327082484, "loss": 3.5256, "step": 83060 }, { "epoch": 5.643769533904063, "grad_norm": 0.7507970929145813, "learning_rate": 0.0002945288082619921, "loss": 3.5311, "step": 83065 }, { "epoch": 5.644109253974725, "grad_norm": 0.7628971338272095, "learning_rate": 0.0002944863432531594, "loss": 3.3957, "step": 83070 }, { "epoch": 5.644448974045386, "grad_norm": 0.7985711693763733, "learning_rate": 0.0002944438782443267, "loss": 3.3527, "step": 83075 }, { "epoch": 5.644788694116048, "grad_norm": 0.8339905738830566, "learning_rate": 0.00029440141323549396, "loss": 3.2678, "step": 83080 }, { "epoch": 5.64512841418671, "grad_norm": 1.2876183986663818, "learning_rate": 0.00029435894822666124, "loss": 3.3757, "step": 83085 }, { "epoch": 5.645468134257372, "grad_norm": 0.9001734256744385, "learning_rate": 0.0002943164832178285, "loss": 3.4509, "step": 83090 }, { "epoch": 5.645807854328034, "grad_norm": 0.8219545483589172, "learning_rate": 0.0002942740182089958, "loss": 3.426, "step": 83095 }, { "epoch": 5.646147574398696, "grad_norm": 0.9660037755966187, "learning_rate": 0.0002942315532001631, "loss": 3.5295, "step": 83100 }, { "epoch": 5.646487294469357, "grad_norm": 1.0905600786209106, "learning_rate": 0.0002941890881913303, "loss": 3.3748, "step": 83105 }, { "epoch": 5.646827014540019, "grad_norm": 0.7156327366828918, "learning_rate": 0.00029414662318249764, "loss": 3.3843, "step": 83110 }, { "epoch": 5.647166734610681, "grad_norm": 0.968073308467865, "learning_rate": 0.0002941041581736649, "loss": 3.7197, "step": 83115 }, { "epoch": 5.647506454681342, "grad_norm": 0.8887528777122498, "learning_rate": 0.00029406169316483215, "loss": 3.4534, "step": 83120 }, { "epoch": 5.647846174752004, "grad_norm": 1.0617400407791138, "learning_rate": 0.0002940192281559995, "loss": 3.383, "step": 83125 }, { "epoch": 5.648185894822666, "grad_norm": 0.9726810455322266, "learning_rate": 0.00029397676314716676, "loss": 3.5561, "step": 83130 }, { "epoch": 5.648525614893328, "grad_norm": 0.8281834125518799, "learning_rate": 0.000293934298138334, "loss": 3.4291, "step": 83135 }, { "epoch": 5.64886533496399, "grad_norm": 0.8042598366737366, "learning_rate": 0.00029389183312950127, "loss": 3.4293, "step": 83140 }, { "epoch": 5.649205055034652, "grad_norm": 0.8832095265388489, "learning_rate": 0.0002938493681206686, "loss": 3.1667, "step": 83145 }, { "epoch": 5.649544775105313, "grad_norm": 0.7883518934249878, "learning_rate": 0.00029380690311183583, "loss": 3.5576, "step": 83150 }, { "epoch": 5.649884495175975, "grad_norm": 0.9501177668571472, "learning_rate": 0.0002937644381030031, "loss": 3.4591, "step": 83155 }, { "epoch": 5.650224215246637, "grad_norm": 1.1314674615859985, "learning_rate": 0.00029372197309417044, "loss": 3.7386, "step": 83160 }, { "epoch": 5.650563935317298, "grad_norm": 0.9495134949684143, "learning_rate": 0.00029367950808533767, "loss": 3.5485, "step": 83165 }, { "epoch": 5.65090365538796, "grad_norm": 0.868606448173523, "learning_rate": 0.00029363704307650495, "loss": 3.4141, "step": 83170 }, { "epoch": 5.651243375458622, "grad_norm": 1.0000779628753662, "learning_rate": 0.0002935945780676723, "loss": 3.5225, "step": 83175 }, { "epoch": 5.651583095529284, "grad_norm": 1.0089242458343506, "learning_rate": 0.0002935521130588395, "loss": 3.2065, "step": 83180 }, { "epoch": 5.651922815599946, "grad_norm": 1.2553049325942993, "learning_rate": 0.0002935096480500068, "loss": 3.4016, "step": 83185 }, { "epoch": 5.652262535670608, "grad_norm": 1.1665568351745605, "learning_rate": 0.00029346718304117407, "loss": 3.3261, "step": 83190 }, { "epoch": 5.652602255741269, "grad_norm": 0.8038183450698853, "learning_rate": 0.00029342471803234135, "loss": 3.3971, "step": 83195 }, { "epoch": 5.652941975811931, "grad_norm": 0.7910287380218506, "learning_rate": 0.00029338225302350863, "loss": 3.4475, "step": 83200 }, { "epoch": 5.653281695882593, "grad_norm": 0.7997781038284302, "learning_rate": 0.0002933397880146759, "loss": 3.3672, "step": 83205 }, { "epoch": 5.653621415953254, "grad_norm": 0.9859561920166016, "learning_rate": 0.0002932973230058432, "loss": 3.6528, "step": 83210 }, { "epoch": 5.653961136023916, "grad_norm": 0.9638369083404541, "learning_rate": 0.00029325485799701047, "loss": 3.4317, "step": 83215 }, { "epoch": 5.654300856094578, "grad_norm": 0.7725545167922974, "learning_rate": 0.00029321239298817775, "loss": 3.5945, "step": 83220 }, { "epoch": 5.65464057616524, "grad_norm": 0.8084056377410889, "learning_rate": 0.00029316992797934503, "loss": 3.4803, "step": 83225 }, { "epoch": 5.654980296235902, "grad_norm": 0.8588467240333557, "learning_rate": 0.0002931274629705123, "loss": 3.2939, "step": 83230 }, { "epoch": 5.655320016306564, "grad_norm": 0.9415102005004883, "learning_rate": 0.0002930849979616796, "loss": 3.395, "step": 83235 }, { "epoch": 5.655659736377225, "grad_norm": 1.0770758390426636, "learning_rate": 0.00029304253295284687, "loss": 3.4179, "step": 83240 }, { "epoch": 5.655999456447887, "grad_norm": 0.8784584403038025, "learning_rate": 0.00029300006794401415, "loss": 3.3974, "step": 83245 }, { "epoch": 5.656339176518548, "grad_norm": 0.7465420365333557, "learning_rate": 0.00029295760293518143, "loss": 3.1769, "step": 83250 }, { "epoch": 5.65667889658921, "grad_norm": 1.0063602924346924, "learning_rate": 0.0002929151379263487, "loss": 3.7082, "step": 83255 }, { "epoch": 5.657018616659872, "grad_norm": 0.9924637675285339, "learning_rate": 0.00029287267291751594, "loss": 3.2542, "step": 83260 }, { "epoch": 5.657358336730534, "grad_norm": 1.0415393114089966, "learning_rate": 0.00029283020790868327, "loss": 3.625, "step": 83265 }, { "epoch": 5.657698056801196, "grad_norm": 0.7322259545326233, "learning_rate": 0.00029278774289985055, "loss": 3.6835, "step": 83270 }, { "epoch": 5.658037776871858, "grad_norm": 0.6387506723403931, "learning_rate": 0.0002927452778910178, "loss": 3.1596, "step": 83275 }, { "epoch": 5.658377496942519, "grad_norm": 0.8852750062942505, "learning_rate": 0.0002927028128821851, "loss": 3.3895, "step": 83280 }, { "epoch": 5.658717217013181, "grad_norm": 1.015770673751831, "learning_rate": 0.0002926603478733524, "loss": 3.249, "step": 83285 }, { "epoch": 5.659056937083843, "grad_norm": 0.9906133413314819, "learning_rate": 0.0002926178828645196, "loss": 3.519, "step": 83290 }, { "epoch": 5.659396657154504, "grad_norm": 0.9971023201942444, "learning_rate": 0.0002925754178556869, "loss": 3.0304, "step": 83295 }, { "epoch": 5.659736377225166, "grad_norm": 0.9242939949035645, "learning_rate": 0.00029253295284685423, "loss": 3.3278, "step": 83300 }, { "epoch": 5.660076097295828, "grad_norm": 0.8084380030632019, "learning_rate": 0.00029249048783802146, "loss": 3.2948, "step": 83305 }, { "epoch": 5.66041581736649, "grad_norm": 0.9707916975021362, "learning_rate": 0.00029244802282918874, "loss": 3.231, "step": 83310 }, { "epoch": 5.660755537437152, "grad_norm": 0.7198179960250854, "learning_rate": 0.00029240555782035607, "loss": 3.3629, "step": 83315 }, { "epoch": 5.661095257507814, "grad_norm": 1.0670853853225708, "learning_rate": 0.0002923630928115233, "loss": 3.5328, "step": 83320 }, { "epoch": 5.661434977578475, "grad_norm": 0.9983049035072327, "learning_rate": 0.0002923206278026906, "loss": 3.4819, "step": 83325 }, { "epoch": 5.661774697649137, "grad_norm": 0.8845999836921692, "learning_rate": 0.00029227816279385786, "loss": 3.6444, "step": 83330 }, { "epoch": 5.662114417719799, "grad_norm": 0.780309796333313, "learning_rate": 0.00029223569778502514, "loss": 3.2808, "step": 83335 }, { "epoch": 5.66245413779046, "grad_norm": 0.7731478214263916, "learning_rate": 0.0002921932327761924, "loss": 3.5712, "step": 83340 }, { "epoch": 5.662793857861122, "grad_norm": 0.9698320031166077, "learning_rate": 0.0002921507677673597, "loss": 3.5264, "step": 83345 }, { "epoch": 5.663133577931784, "grad_norm": 1.28143310546875, "learning_rate": 0.000292108302758527, "loss": 3.3969, "step": 83350 }, { "epoch": 5.663473298002446, "grad_norm": 0.8373331427574158, "learning_rate": 0.00029206583774969426, "loss": 3.5784, "step": 83355 }, { "epoch": 5.663813018073108, "grad_norm": 0.8787707686424255, "learning_rate": 0.00029202337274086154, "loss": 3.578, "step": 83360 }, { "epoch": 5.66415273814377, "grad_norm": 1.591157078742981, "learning_rate": 0.00029198090773202876, "loss": 3.3844, "step": 83365 }, { "epoch": 5.664492458214431, "grad_norm": 0.9360778331756592, "learning_rate": 0.0002919384427231961, "loss": 3.2784, "step": 83370 }, { "epoch": 5.664832178285093, "grad_norm": 0.7707211971282959, "learning_rate": 0.0002918959777143634, "loss": 3.3723, "step": 83375 }, { "epoch": 5.665171898355755, "grad_norm": 0.8232119679450989, "learning_rate": 0.0002918535127055306, "loss": 3.3123, "step": 83380 }, { "epoch": 5.665511618426416, "grad_norm": 0.907619833946228, "learning_rate": 0.00029181104769669794, "loss": 3.2408, "step": 83385 }, { "epoch": 5.665851338497078, "grad_norm": 0.7776772379875183, "learning_rate": 0.0002917685826878652, "loss": 3.6471, "step": 83390 }, { "epoch": 5.66619105856774, "grad_norm": 0.8521826267242432, "learning_rate": 0.0002917261176790325, "loss": 3.4327, "step": 83395 }, { "epoch": 5.666530778638402, "grad_norm": 1.1201339960098267, "learning_rate": 0.0002916836526701997, "loss": 3.1758, "step": 83400 }, { "epoch": 5.666870498709064, "grad_norm": 0.9783396124839783, "learning_rate": 0.00029164118766136706, "loss": 3.336, "step": 83405 }, { "epoch": 5.667210218779726, "grad_norm": 0.8204969763755798, "learning_rate": 0.00029159872265253434, "loss": 3.2709, "step": 83410 }, { "epoch": 5.667549938850387, "grad_norm": 0.793096661567688, "learning_rate": 0.00029155625764370156, "loss": 3.6273, "step": 83415 }, { "epoch": 5.667889658921049, "grad_norm": 0.9060960412025452, "learning_rate": 0.0002915137926348689, "loss": 3.4827, "step": 83420 }, { "epoch": 5.668229378991711, "grad_norm": 0.9079042077064514, "learning_rate": 0.0002914713276260362, "loss": 3.3715, "step": 83425 }, { "epoch": 5.668569099062372, "grad_norm": 1.024854302406311, "learning_rate": 0.0002914288626172034, "loss": 3.4902, "step": 83430 }, { "epoch": 5.668908819133034, "grad_norm": 0.8753691911697388, "learning_rate": 0.00029138639760837074, "loss": 3.6279, "step": 83435 }, { "epoch": 5.669248539203696, "grad_norm": 1.3571269512176514, "learning_rate": 0.000291343932599538, "loss": 3.4124, "step": 83440 }, { "epoch": 5.669588259274358, "grad_norm": 0.9249932169914246, "learning_rate": 0.00029130146759070525, "loss": 3.5921, "step": 83445 }, { "epoch": 5.66992797934502, "grad_norm": 0.8799342513084412, "learning_rate": 0.0002912590025818725, "loss": 3.243, "step": 83450 }, { "epoch": 5.670267699415682, "grad_norm": 0.947935938835144, "learning_rate": 0.00029121653757303986, "loss": 3.4729, "step": 83455 }, { "epoch": 5.670607419486343, "grad_norm": 1.3947057723999023, "learning_rate": 0.0002911740725642071, "loss": 3.5035, "step": 83460 }, { "epoch": 5.670947139557005, "grad_norm": 0.8693006038665771, "learning_rate": 0.00029113160755537437, "loss": 3.6941, "step": 83465 }, { "epoch": 5.671286859627667, "grad_norm": 1.843916893005371, "learning_rate": 0.0002910891425465417, "loss": 3.4625, "step": 83470 }, { "epoch": 5.671626579698328, "grad_norm": 0.9616157412528992, "learning_rate": 0.0002910466775377089, "loss": 3.1795, "step": 83475 }, { "epoch": 5.67196629976899, "grad_norm": 1.0847221612930298, "learning_rate": 0.0002910042125288762, "loss": 3.237, "step": 83480 }, { "epoch": 5.672306019839652, "grad_norm": 0.9928697347640991, "learning_rate": 0.0002909617475200435, "loss": 3.4038, "step": 83485 }, { "epoch": 5.672645739910314, "grad_norm": 0.8215965628623962, "learning_rate": 0.00029091928251121077, "loss": 3.6302, "step": 83490 }, { "epoch": 5.672985459980976, "grad_norm": 1.1190478801727295, "learning_rate": 0.00029087681750237805, "loss": 3.384, "step": 83495 }, { "epoch": 5.673325180051638, "grad_norm": 0.7992681860923767, "learning_rate": 0.0002908343524935453, "loss": 3.4899, "step": 83500 }, { "epoch": 5.673664900122299, "grad_norm": 1.1197290420532227, "learning_rate": 0.0002907918874847126, "loss": 3.3736, "step": 83505 }, { "epoch": 5.674004620192961, "grad_norm": 0.9365699887275696, "learning_rate": 0.0002907494224758799, "loss": 3.4236, "step": 83510 }, { "epoch": 5.674344340263623, "grad_norm": 1.0207115411758423, "learning_rate": 0.00029070695746704717, "loss": 3.5253, "step": 83515 }, { "epoch": 5.674684060334284, "grad_norm": 0.7943755984306335, "learning_rate": 0.0002906644924582144, "loss": 3.507, "step": 83520 }, { "epoch": 5.675023780404946, "grad_norm": 0.694978654384613, "learning_rate": 0.0002906220274493817, "loss": 3.201, "step": 83525 }, { "epoch": 5.6753635004756084, "grad_norm": 0.8559492230415344, "learning_rate": 0.000290579562440549, "loss": 3.6477, "step": 83530 }, { "epoch": 5.67570322054627, "grad_norm": 1.0569499731063843, "learning_rate": 0.00029053709743171623, "loss": 3.4724, "step": 83535 }, { "epoch": 5.676042940616932, "grad_norm": 0.947289228439331, "learning_rate": 0.00029049463242288357, "loss": 3.5101, "step": 83540 }, { "epoch": 5.676382660687594, "grad_norm": 1.0206961631774902, "learning_rate": 0.00029045216741405085, "loss": 3.364, "step": 83545 }, { "epoch": 5.676722380758255, "grad_norm": 1.0054506063461304, "learning_rate": 0.00029040970240521807, "loss": 3.1569, "step": 83550 }, { "epoch": 5.677062100828917, "grad_norm": 0.8027119636535645, "learning_rate": 0.00029036723739638535, "loss": 3.3997, "step": 83555 }, { "epoch": 5.677401820899579, "grad_norm": 0.8249996304512024, "learning_rate": 0.0002903247723875527, "loss": 3.3876, "step": 83560 }, { "epoch": 5.67774154097024, "grad_norm": 0.9305970668792725, "learning_rate": 0.00029028230737871997, "loss": 3.4435, "step": 83565 }, { "epoch": 5.678081261040902, "grad_norm": 1.16923189163208, "learning_rate": 0.0002902398423698872, "loss": 3.4565, "step": 83570 }, { "epoch": 5.6784209811115645, "grad_norm": 0.9335669875144958, "learning_rate": 0.00029019737736105453, "loss": 3.4037, "step": 83575 }, { "epoch": 5.678760701182226, "grad_norm": 0.7997127771377563, "learning_rate": 0.0002901549123522218, "loss": 3.3241, "step": 83580 }, { "epoch": 5.679100421252888, "grad_norm": 0.9237399101257324, "learning_rate": 0.00029011244734338903, "loss": 3.3929, "step": 83585 }, { "epoch": 5.67944014132355, "grad_norm": 0.8259170651435852, "learning_rate": 0.0002900699823345563, "loss": 3.4499, "step": 83590 }, { "epoch": 5.679779861394211, "grad_norm": 0.8382492065429688, "learning_rate": 0.00029002751732572365, "loss": 3.3591, "step": 83595 }, { "epoch": 5.680119581464873, "grad_norm": 0.9118777513504028, "learning_rate": 0.0002899850523168909, "loss": 3.4997, "step": 83600 }, { "epoch": 5.680459301535535, "grad_norm": 0.9826738238334656, "learning_rate": 0.00028994258730805815, "loss": 3.4835, "step": 83605 }, { "epoch": 5.680799021606196, "grad_norm": 0.8213613629341125, "learning_rate": 0.0002899001222992255, "loss": 3.63, "step": 83610 }, { "epoch": 5.681138741676858, "grad_norm": 0.856124222278595, "learning_rate": 0.0002898576572903927, "loss": 3.6009, "step": 83615 }, { "epoch": 5.6814784617475205, "grad_norm": 0.713871419429779, "learning_rate": 0.00028981519228156, "loss": 3.3987, "step": 83620 }, { "epoch": 5.681818181818182, "grad_norm": 0.7922143936157227, "learning_rate": 0.0002897727272727273, "loss": 3.5835, "step": 83625 }, { "epoch": 5.682157901888844, "grad_norm": 0.9641343951225281, "learning_rate": 0.00028973026226389455, "loss": 3.4919, "step": 83630 }, { "epoch": 5.682497621959506, "grad_norm": 0.8714730143547058, "learning_rate": 0.00028968779725506183, "loss": 3.3982, "step": 83635 }, { "epoch": 5.682837342030167, "grad_norm": 1.1657137870788574, "learning_rate": 0.0002896453322462291, "loss": 3.7124, "step": 83640 }, { "epoch": 5.683177062100829, "grad_norm": 0.7330372333526611, "learning_rate": 0.0002896028672373964, "loss": 3.541, "step": 83645 }, { "epoch": 5.683516782171491, "grad_norm": 0.8605338335037231, "learning_rate": 0.0002895604022285637, "loss": 3.0809, "step": 83650 }, { "epoch": 5.683856502242152, "grad_norm": 1.101027250289917, "learning_rate": 0.00028951793721973095, "loss": 3.1177, "step": 83655 }, { "epoch": 5.684196222312814, "grad_norm": 0.6473367214202881, "learning_rate": 0.0002894754722108982, "loss": 3.6106, "step": 83660 }, { "epoch": 5.6845359423834765, "grad_norm": 0.9344337582588196, "learning_rate": 0.0002894330072020655, "loss": 3.2412, "step": 83665 }, { "epoch": 5.684875662454138, "grad_norm": 0.8824185132980347, "learning_rate": 0.0002893905421932328, "loss": 3.4567, "step": 83670 }, { "epoch": 5.6852153825248, "grad_norm": 0.8569685220718384, "learning_rate": 0.0002893480771844, "loss": 3.3529, "step": 83675 }, { "epoch": 5.685555102595462, "grad_norm": 1.3566203117370605, "learning_rate": 0.00028930561217556735, "loss": 3.6026, "step": 83680 }, { "epoch": 5.685894822666123, "grad_norm": 0.7413835525512695, "learning_rate": 0.00028926314716673463, "loss": 3.5377, "step": 83685 }, { "epoch": 5.686234542736785, "grad_norm": 1.0775066614151, "learning_rate": 0.00028922068215790186, "loss": 3.4886, "step": 83690 }, { "epoch": 5.686574262807447, "grad_norm": 1.0997930765151978, "learning_rate": 0.00028917821714906914, "loss": 3.4644, "step": 83695 }, { "epoch": 5.686913982878108, "grad_norm": 0.7739960551261902, "learning_rate": 0.0002891357521402365, "loss": 3.5414, "step": 83700 }, { "epoch": 5.68725370294877, "grad_norm": 0.9319838285446167, "learning_rate": 0.0002890932871314037, "loss": 3.3576, "step": 83705 }, { "epoch": 5.687593423019432, "grad_norm": 0.8731487989425659, "learning_rate": 0.000289050822122571, "loss": 3.1742, "step": 83710 }, { "epoch": 5.687933143090094, "grad_norm": 0.9512740969657898, "learning_rate": 0.0002890083571137383, "loss": 3.3325, "step": 83715 }, { "epoch": 5.688272863160756, "grad_norm": 0.8766419887542725, "learning_rate": 0.00028896589210490554, "loss": 3.4934, "step": 83720 }, { "epoch": 5.688612583231417, "grad_norm": 1.0418423414230347, "learning_rate": 0.0002889234270960728, "loss": 3.5246, "step": 83725 }, { "epoch": 5.688952303302079, "grad_norm": 0.9664839506149292, "learning_rate": 0.00028888096208724016, "loss": 3.6018, "step": 83730 }, { "epoch": 5.689292023372741, "grad_norm": 0.8024872541427612, "learning_rate": 0.00028883849707840744, "loss": 3.3275, "step": 83735 }, { "epoch": 5.689631743443402, "grad_norm": 0.853101909160614, "learning_rate": 0.00028879603206957466, "loss": 3.4575, "step": 83740 }, { "epoch": 5.689971463514064, "grad_norm": 0.8780530095100403, "learning_rate": 0.00028875356706074194, "loss": 3.5338, "step": 83745 }, { "epoch": 5.690311183584726, "grad_norm": 0.7772284150123596, "learning_rate": 0.0002887111020519093, "loss": 3.4744, "step": 83750 }, { "epoch": 5.690650903655388, "grad_norm": 0.9718529582023621, "learning_rate": 0.0002886686370430765, "loss": 3.5455, "step": 83755 }, { "epoch": 5.69099062372605, "grad_norm": 0.7898495197296143, "learning_rate": 0.0002886261720342438, "loss": 3.492, "step": 83760 }, { "epoch": 5.691330343796712, "grad_norm": 0.9698404669761658, "learning_rate": 0.0002885837070254111, "loss": 3.526, "step": 83765 }, { "epoch": 5.691670063867373, "grad_norm": 0.9017671346664429, "learning_rate": 0.00028854124201657834, "loss": 3.3497, "step": 83770 }, { "epoch": 5.692009783938035, "grad_norm": 0.8094810247421265, "learning_rate": 0.0002884987770077456, "loss": 3.4283, "step": 83775 }, { "epoch": 5.692349504008697, "grad_norm": 0.8384556174278259, "learning_rate": 0.0002884563119989129, "loss": 3.4772, "step": 83780 }, { "epoch": 5.692689224079358, "grad_norm": 0.9147753715515137, "learning_rate": 0.0002884138469900802, "loss": 3.4849, "step": 83785 }, { "epoch": 5.69302894415002, "grad_norm": 1.1709409952163696, "learning_rate": 0.00028837138198124746, "loss": 3.1604, "step": 83790 }, { "epoch": 5.693368664220682, "grad_norm": 0.877680242061615, "learning_rate": 0.00028832891697241474, "loss": 3.5849, "step": 83795 }, { "epoch": 5.693708384291344, "grad_norm": 0.9595133662223816, "learning_rate": 0.000288286451963582, "loss": 3.4772, "step": 83800 }, { "epoch": 5.694048104362006, "grad_norm": 0.8448981642723083, "learning_rate": 0.0002882439869547493, "loss": 3.2734, "step": 83805 }, { "epoch": 5.694387824432668, "grad_norm": 1.5855991840362549, "learning_rate": 0.0002882015219459166, "loss": 3.4545, "step": 83810 }, { "epoch": 5.694727544503329, "grad_norm": 1.0869280099868774, "learning_rate": 0.0002881590569370838, "loss": 3.5631, "step": 83815 }, { "epoch": 5.695067264573991, "grad_norm": 0.9549832344055176, "learning_rate": 0.00028811659192825114, "loss": 3.4173, "step": 83820 }, { "epoch": 5.695406984644653, "grad_norm": 0.9095388054847717, "learning_rate": 0.0002880741269194184, "loss": 3.2966, "step": 83825 }, { "epoch": 5.695746704715314, "grad_norm": 1.0798704624176025, "learning_rate": 0.00028803166191058565, "loss": 3.5168, "step": 83830 }, { "epoch": 5.696086424785976, "grad_norm": 0.8289989829063416, "learning_rate": 0.000287989196901753, "loss": 3.7472, "step": 83835 }, { "epoch": 5.6964261448566385, "grad_norm": 0.8996767997741699, "learning_rate": 0.00028794673189292026, "loss": 3.3144, "step": 83840 }, { "epoch": 5.6967658649273, "grad_norm": 1.0706709623336792, "learning_rate": 0.0002879042668840875, "loss": 3.3853, "step": 83845 }, { "epoch": 5.697105584997962, "grad_norm": 0.8196334838867188, "learning_rate": 0.00028786180187525477, "loss": 3.434, "step": 83850 }, { "epoch": 5.697445305068624, "grad_norm": 1.0109295845031738, "learning_rate": 0.0002878193368664221, "loss": 3.4064, "step": 83855 }, { "epoch": 5.697785025139285, "grad_norm": 1.053186058998108, "learning_rate": 0.00028777687185758933, "loss": 3.4234, "step": 83860 }, { "epoch": 5.698124745209947, "grad_norm": 1.072319507598877, "learning_rate": 0.0002877344068487566, "loss": 3.4044, "step": 83865 }, { "epoch": 5.698464465280609, "grad_norm": 1.0606738328933716, "learning_rate": 0.00028769194183992394, "loss": 3.3686, "step": 83870 }, { "epoch": 5.69880418535127, "grad_norm": 1.1386884450912476, "learning_rate": 0.00028764947683109117, "loss": 3.5223, "step": 83875 }, { "epoch": 5.699143905421932, "grad_norm": 1.0773673057556152, "learning_rate": 0.00028760701182225845, "loss": 3.5415, "step": 83880 }, { "epoch": 5.6994836254925945, "grad_norm": 1.0632858276367188, "learning_rate": 0.00028756454681342573, "loss": 3.301, "step": 83885 }, { "epoch": 5.699823345563256, "grad_norm": 0.8226331472396851, "learning_rate": 0.000287522081804593, "loss": 3.3034, "step": 83890 }, { "epoch": 5.700163065633918, "grad_norm": 0.6676120162010193, "learning_rate": 0.0002874796167957603, "loss": 3.3912, "step": 83895 }, { "epoch": 5.70050278570458, "grad_norm": 0.8970184326171875, "learning_rate": 0.00028743715178692757, "loss": 3.3206, "step": 83900 }, { "epoch": 5.700842505775241, "grad_norm": 0.9870097041130066, "learning_rate": 0.0002873946867780949, "loss": 3.4802, "step": 83905 }, { "epoch": 5.701182225845903, "grad_norm": 0.9885196685791016, "learning_rate": 0.00028735222176926213, "loss": 3.5585, "step": 83910 }, { "epoch": 5.701521945916565, "grad_norm": 0.7322256565093994, "learning_rate": 0.0002873097567604294, "loss": 3.5418, "step": 83915 }, { "epoch": 5.701861665987226, "grad_norm": 1.0818402767181396, "learning_rate": 0.0002872672917515967, "loss": 3.3878, "step": 83920 }, { "epoch": 5.702201386057888, "grad_norm": 0.8178190588951111, "learning_rate": 0.00028722482674276397, "loss": 3.3554, "step": 83925 }, { "epoch": 5.70254110612855, "grad_norm": 0.8919997215270996, "learning_rate": 0.00028718236173393125, "loss": 3.4222, "step": 83930 }, { "epoch": 5.702880826199212, "grad_norm": 1.0931775569915771, "learning_rate": 0.00028713989672509853, "loss": 3.3344, "step": 83935 }, { "epoch": 5.703220546269874, "grad_norm": 0.9028447270393372, "learning_rate": 0.0002870974317162658, "loss": 3.1894, "step": 83940 }, { "epoch": 5.703560266340535, "grad_norm": 0.9204580783843994, "learning_rate": 0.0002870549667074331, "loss": 3.6648, "step": 83945 }, { "epoch": 5.703899986411197, "grad_norm": 1.0741089582443237, "learning_rate": 0.00028701250169860037, "loss": 3.2224, "step": 83950 }, { "epoch": 5.704239706481859, "grad_norm": 0.9022413492202759, "learning_rate": 0.0002869700366897676, "loss": 3.4032, "step": 83955 }, { "epoch": 5.70457942655252, "grad_norm": 1.0687668323516846, "learning_rate": 0.00028692757168093493, "loss": 3.303, "step": 83960 }, { "epoch": 5.704919146623182, "grad_norm": 0.8772162795066833, "learning_rate": 0.0002868851066721022, "loss": 3.3588, "step": 83965 }, { "epoch": 5.705258866693844, "grad_norm": 1.0311598777770996, "learning_rate": 0.00028684264166326944, "loss": 3.4788, "step": 83970 }, { "epoch": 5.705598586764506, "grad_norm": 1.0922843217849731, "learning_rate": 0.00028680017665443677, "loss": 3.5475, "step": 83975 }, { "epoch": 5.705938306835168, "grad_norm": 0.798789381980896, "learning_rate": 0.00028675771164560405, "loss": 3.6184, "step": 83980 }, { "epoch": 5.70627802690583, "grad_norm": 0.8494711518287659, "learning_rate": 0.0002867152466367713, "loss": 3.3918, "step": 83985 }, { "epoch": 5.706617746976491, "grad_norm": 0.8345401287078857, "learning_rate": 0.00028667278162793856, "loss": 3.2829, "step": 83990 }, { "epoch": 5.706957467047153, "grad_norm": 0.9448081851005554, "learning_rate": 0.0002866303166191059, "loss": 3.5346, "step": 83995 }, { "epoch": 5.707297187117815, "grad_norm": 0.8514798879623413, "learning_rate": 0.0002865878516102731, "loss": 3.3315, "step": 84000 }, { "epoch": 5.707636907188476, "grad_norm": 0.9723079800605774, "learning_rate": 0.0002865453866014404, "loss": 3.4747, "step": 84005 }, { "epoch": 5.707976627259138, "grad_norm": 0.8446614146232605, "learning_rate": 0.00028650292159260773, "loss": 3.5318, "step": 84010 }, { "epoch": 5.7083163473298, "grad_norm": 0.9322373270988464, "learning_rate": 0.00028646045658377496, "loss": 3.2718, "step": 84015 }, { "epoch": 5.708656067400462, "grad_norm": 0.9467368125915527, "learning_rate": 0.00028641799157494224, "loss": 3.4314, "step": 84020 }, { "epoch": 5.708995787471124, "grad_norm": 0.8080707788467407, "learning_rate": 0.00028637552656610957, "loss": 3.2791, "step": 84025 }, { "epoch": 5.709335507541786, "grad_norm": 1.0159244537353516, "learning_rate": 0.0002863330615572768, "loss": 3.2554, "step": 84030 }, { "epoch": 5.709675227612447, "grad_norm": 0.8671897649765015, "learning_rate": 0.0002862905965484441, "loss": 3.4122, "step": 84035 }, { "epoch": 5.710014947683109, "grad_norm": 0.9156672954559326, "learning_rate": 0.00028624813153961136, "loss": 3.5787, "step": 84040 }, { "epoch": 5.710354667753771, "grad_norm": 0.9359909892082214, "learning_rate": 0.00028620566653077864, "loss": 3.573, "step": 84045 }, { "epoch": 5.710694387824432, "grad_norm": 0.7729071974754333, "learning_rate": 0.0002861632015219459, "loss": 3.6875, "step": 84050 }, { "epoch": 5.711034107895094, "grad_norm": 0.9090317487716675, "learning_rate": 0.0002861207365131132, "loss": 3.5562, "step": 84055 }, { "epoch": 5.711373827965756, "grad_norm": 0.9269712567329407, "learning_rate": 0.0002860782715042805, "loss": 3.4594, "step": 84060 }, { "epoch": 5.711713548036418, "grad_norm": 1.371489405632019, "learning_rate": 0.00028603580649544776, "loss": 3.3133, "step": 84065 }, { "epoch": 5.71205326810708, "grad_norm": 1.0069010257720947, "learning_rate": 0.00028599334148661504, "loss": 3.5283, "step": 84070 }, { "epoch": 5.712392988177742, "grad_norm": 1.005874752998352, "learning_rate": 0.0002859508764777823, "loss": 3.5683, "step": 84075 }, { "epoch": 5.712732708248403, "grad_norm": 0.804942786693573, "learning_rate": 0.0002859084114689496, "loss": 3.6965, "step": 84080 }, { "epoch": 5.713072428319065, "grad_norm": 0.8429903984069824, "learning_rate": 0.0002858659464601169, "loss": 3.3661, "step": 84085 }, { "epoch": 5.713412148389727, "grad_norm": 0.9244166612625122, "learning_rate": 0.00028582348145128416, "loss": 3.2561, "step": 84090 }, { "epoch": 5.713751868460388, "grad_norm": 0.7285259366035461, "learning_rate": 0.00028578101644245144, "loss": 3.4882, "step": 84095 }, { "epoch": 5.71409158853105, "grad_norm": 0.7426473498344421, "learning_rate": 0.0002857385514336187, "loss": 3.3409, "step": 84100 }, { "epoch": 5.7144313086017124, "grad_norm": 0.9187553524971008, "learning_rate": 0.000285696086424786, "loss": 3.4546, "step": 84105 }, { "epoch": 5.714771028672374, "grad_norm": 0.9087973237037659, "learning_rate": 0.0002856536214159532, "loss": 3.3987, "step": 84110 }, { "epoch": 5.715110748743036, "grad_norm": 0.9256554841995239, "learning_rate": 0.00028561115640712056, "loss": 3.4038, "step": 84115 }, { "epoch": 5.715450468813698, "grad_norm": 0.8801407217979431, "learning_rate": 0.00028556869139828784, "loss": 3.5577, "step": 84120 }, { "epoch": 5.715790188884359, "grad_norm": 1.1676042079925537, "learning_rate": 0.00028552622638945506, "loss": 3.4637, "step": 84125 }, { "epoch": 5.716129908955021, "grad_norm": 1.0718412399291992, "learning_rate": 0.0002854837613806224, "loss": 3.2875, "step": 84130 }, { "epoch": 5.716469629025683, "grad_norm": 0.887121319770813, "learning_rate": 0.0002854412963717897, "loss": 3.5386, "step": 84135 }, { "epoch": 5.716809349096344, "grad_norm": 0.8285028338432312, "learning_rate": 0.0002853988313629569, "loss": 3.6977, "step": 84140 }, { "epoch": 5.717149069167006, "grad_norm": 1.046920895576477, "learning_rate": 0.0002853563663541242, "loss": 3.612, "step": 84145 }, { "epoch": 5.7174887892376685, "grad_norm": 0.7470901608467102, "learning_rate": 0.0002853139013452915, "loss": 3.4974, "step": 84150 }, { "epoch": 5.71782850930833, "grad_norm": 0.7823852896690369, "learning_rate": 0.00028527143633645875, "loss": 3.7629, "step": 84155 }, { "epoch": 5.718168229378992, "grad_norm": 0.9573970437049866, "learning_rate": 0.000285228971327626, "loss": 3.3715, "step": 84160 }, { "epoch": 5.718507949449654, "grad_norm": 0.9378623366355896, "learning_rate": 0.00028518650631879336, "loss": 3.5928, "step": 84165 }, { "epoch": 5.718847669520315, "grad_norm": 0.8206192255020142, "learning_rate": 0.0002851440413099606, "loss": 3.6126, "step": 84170 }, { "epoch": 5.719187389590977, "grad_norm": 1.2133420705795288, "learning_rate": 0.00028510157630112787, "loss": 3.1956, "step": 84175 }, { "epoch": 5.719527109661639, "grad_norm": 0.8665253520011902, "learning_rate": 0.00028505911129229515, "loss": 3.2586, "step": 84180 }, { "epoch": 5.7198668297323, "grad_norm": 0.8450859189033508, "learning_rate": 0.0002850166462834624, "loss": 3.535, "step": 84185 }, { "epoch": 5.720206549802962, "grad_norm": 0.7366791367530823, "learning_rate": 0.0002849741812746297, "loss": 3.3075, "step": 84190 }, { "epoch": 5.7205462698736245, "grad_norm": 0.9411636590957642, "learning_rate": 0.000284931716265797, "loss": 3.3706, "step": 84195 }, { "epoch": 5.720885989944286, "grad_norm": 1.2940138578414917, "learning_rate": 0.00028488925125696427, "loss": 3.4005, "step": 84200 }, { "epoch": 5.721225710014948, "grad_norm": 0.7453096508979797, "learning_rate": 0.00028484678624813155, "loss": 3.3809, "step": 84205 }, { "epoch": 5.72156543008561, "grad_norm": 0.8241920471191406, "learning_rate": 0.0002848043212392988, "loss": 3.494, "step": 84210 }, { "epoch": 5.721905150156271, "grad_norm": 0.7937658429145813, "learning_rate": 0.00028476185623046605, "loss": 3.5053, "step": 84215 }, { "epoch": 5.722244870226933, "grad_norm": 1.191135048866272, "learning_rate": 0.0002847193912216334, "loss": 3.5977, "step": 84220 }, { "epoch": 5.722584590297595, "grad_norm": 0.8079379200935364, "learning_rate": 0.00028467692621280067, "loss": 3.4345, "step": 84225 }, { "epoch": 5.722924310368256, "grad_norm": 0.7136418223381042, "learning_rate": 0.0002846344612039679, "loss": 3.4114, "step": 84230 }, { "epoch": 5.723264030438918, "grad_norm": 0.7274147868156433, "learning_rate": 0.0002845919961951352, "loss": 3.2753, "step": 84235 }, { "epoch": 5.7236037505095805, "grad_norm": 0.7918118238449097, "learning_rate": 0.0002845495311863025, "loss": 3.5911, "step": 84240 }, { "epoch": 5.723943470580242, "grad_norm": 0.9004654884338379, "learning_rate": 0.0002845070661774698, "loss": 3.4257, "step": 84245 }, { "epoch": 5.724283190650904, "grad_norm": 0.9150378704071045, "learning_rate": 0.000284464601168637, "loss": 3.5169, "step": 84250 }, { "epoch": 5.724622910721566, "grad_norm": 0.9237611889839172, "learning_rate": 0.00028442213615980435, "loss": 3.2728, "step": 84255 }, { "epoch": 5.724962630792227, "grad_norm": 1.181609869003296, "learning_rate": 0.0002843796711509716, "loss": 3.0971, "step": 84260 }, { "epoch": 5.725302350862889, "grad_norm": 0.7847892642021179, "learning_rate": 0.00028433720614213885, "loss": 3.4945, "step": 84265 }, { "epoch": 5.725642070933551, "grad_norm": 0.9372137784957886, "learning_rate": 0.0002842947411333062, "loss": 3.4669, "step": 84270 }, { "epoch": 5.725981791004212, "grad_norm": 0.8832663893699646, "learning_rate": 0.00028425227612447347, "loss": 3.5059, "step": 84275 }, { "epoch": 5.726321511074874, "grad_norm": 0.9269825220108032, "learning_rate": 0.0002842098111156407, "loss": 3.422, "step": 84280 }, { "epoch": 5.7266612311455365, "grad_norm": 0.8080892562866211, "learning_rate": 0.00028416734610680803, "loss": 3.2264, "step": 84285 }, { "epoch": 5.727000951216198, "grad_norm": 0.7354010343551636, "learning_rate": 0.0002841248810979753, "loss": 3.3669, "step": 84290 }, { "epoch": 5.72734067128686, "grad_norm": 0.8802018761634827, "learning_rate": 0.00028408241608914253, "loss": 3.1606, "step": 84295 }, { "epoch": 5.727680391357522, "grad_norm": 1.4239526987075806, "learning_rate": 0.0002840399510803098, "loss": 3.4503, "step": 84300 }, { "epoch": 5.728020111428183, "grad_norm": 0.8080979585647583, "learning_rate": 0.00028399748607147715, "loss": 3.4527, "step": 84305 }, { "epoch": 5.728359831498845, "grad_norm": 1.1819610595703125, "learning_rate": 0.0002839550210626444, "loss": 3.3812, "step": 84310 }, { "epoch": 5.728699551569507, "grad_norm": 2.1501505374908447, "learning_rate": 0.00028391255605381165, "loss": 3.4002, "step": 84315 }, { "epoch": 5.729039271640168, "grad_norm": 1.0254167318344116, "learning_rate": 0.000283870091044979, "loss": 3.38, "step": 84320 }, { "epoch": 5.72937899171083, "grad_norm": 0.8922179341316223, "learning_rate": 0.0002838276260361462, "loss": 3.169, "step": 84325 }, { "epoch": 5.7297187117814925, "grad_norm": 0.90912264585495, "learning_rate": 0.0002837851610273135, "loss": 3.2973, "step": 84330 }, { "epoch": 5.730058431852154, "grad_norm": 1.118115782737732, "learning_rate": 0.0002837426960184808, "loss": 3.2776, "step": 84335 }, { "epoch": 5.730398151922816, "grad_norm": 0.7984672784805298, "learning_rate": 0.00028370023100964805, "loss": 3.3826, "step": 84340 }, { "epoch": 5.730737871993478, "grad_norm": 1.1564778089523315, "learning_rate": 0.00028365776600081533, "loss": 3.5756, "step": 84345 }, { "epoch": 5.731077592064139, "grad_norm": 0.8630908727645874, "learning_rate": 0.0002836153009919826, "loss": 3.3204, "step": 84350 }, { "epoch": 5.731417312134801, "grad_norm": 0.8502505421638489, "learning_rate": 0.0002835728359831499, "loss": 3.5247, "step": 84355 }, { "epoch": 5.731757032205463, "grad_norm": 0.9214577674865723, "learning_rate": 0.0002835303709743172, "loss": 3.2942, "step": 84360 }, { "epoch": 5.732096752276124, "grad_norm": 1.0285985469818115, "learning_rate": 0.00028348790596548445, "loss": 3.3868, "step": 84365 }, { "epoch": 5.732436472346786, "grad_norm": 0.9314363598823547, "learning_rate": 0.0002834454409566517, "loss": 3.3335, "step": 84370 }, { "epoch": 5.7327761924174485, "grad_norm": 0.7926965951919556, "learning_rate": 0.000283402975947819, "loss": 3.2671, "step": 84375 }, { "epoch": 5.73311591248811, "grad_norm": 0.9742127060890198, "learning_rate": 0.0002833605109389863, "loss": 3.3638, "step": 84380 }, { "epoch": 5.733455632558772, "grad_norm": 0.9291610717773438, "learning_rate": 0.0002833180459301535, "loss": 3.3129, "step": 84385 }, { "epoch": 5.733795352629433, "grad_norm": 0.9314466118812561, "learning_rate": 0.00028327558092132085, "loss": 3.5246, "step": 84390 }, { "epoch": 5.734135072700095, "grad_norm": 1.0701738595962524, "learning_rate": 0.00028323311591248813, "loss": 3.8348, "step": 84395 }, { "epoch": 5.734474792770757, "grad_norm": 0.8623260259628296, "learning_rate": 0.00028319065090365536, "loss": 3.3738, "step": 84400 }, { "epoch": 5.734814512841418, "grad_norm": 1.0144683122634888, "learning_rate": 0.00028314818589482264, "loss": 3.1789, "step": 84405 }, { "epoch": 5.73515423291208, "grad_norm": 0.7138890027999878, "learning_rate": 0.00028310572088599, "loss": 3.6743, "step": 84410 }, { "epoch": 5.7354939529827424, "grad_norm": 0.83744215965271, "learning_rate": 0.00028306325587715725, "loss": 3.4298, "step": 84415 }, { "epoch": 5.735833673053404, "grad_norm": 1.1548726558685303, "learning_rate": 0.0002830207908683245, "loss": 3.5322, "step": 84420 }, { "epoch": 5.736173393124066, "grad_norm": 0.8313130736351013, "learning_rate": 0.0002829783258594918, "loss": 3.5726, "step": 84425 }, { "epoch": 5.736513113194728, "grad_norm": 0.8563076257705688, "learning_rate": 0.0002829358608506591, "loss": 3.4189, "step": 84430 }, { "epoch": 5.736852833265389, "grad_norm": 1.0885120630264282, "learning_rate": 0.0002828933958418263, "loss": 3.3929, "step": 84435 }, { "epoch": 5.737192553336051, "grad_norm": 0.7187303304672241, "learning_rate": 0.0002828509308329936, "loss": 3.5041, "step": 84440 }, { "epoch": 5.737532273406713, "grad_norm": 0.9216341972351074, "learning_rate": 0.00028280846582416094, "loss": 3.1429, "step": 84445 }, { "epoch": 5.737871993477374, "grad_norm": 0.9204725027084351, "learning_rate": 0.00028276600081532816, "loss": 3.591, "step": 84450 }, { "epoch": 5.738211713548036, "grad_norm": 0.7729880809783936, "learning_rate": 0.00028272353580649544, "loss": 3.3622, "step": 84455 }, { "epoch": 5.7385514336186985, "grad_norm": 0.889034628868103, "learning_rate": 0.0002826810707976628, "loss": 3.3659, "step": 84460 }, { "epoch": 5.73889115368936, "grad_norm": 1.0555063486099243, "learning_rate": 0.00028263860578883, "loss": 3.4863, "step": 84465 }, { "epoch": 5.739230873760022, "grad_norm": 1.2278649806976318, "learning_rate": 0.0002825961407799973, "loss": 3.6559, "step": 84470 }, { "epoch": 5.739570593830684, "grad_norm": 0.8109133839607239, "learning_rate": 0.00028255367577116456, "loss": 3.5988, "step": 84475 }, { "epoch": 5.739910313901345, "grad_norm": 1.1261653900146484, "learning_rate": 0.00028251121076233184, "loss": 3.407, "step": 84480 }, { "epoch": 5.740250033972007, "grad_norm": 0.796308696269989, "learning_rate": 0.0002824687457534991, "loss": 3.4035, "step": 84485 }, { "epoch": 5.740589754042669, "grad_norm": 0.9848004579544067, "learning_rate": 0.0002824262807446664, "loss": 3.6228, "step": 84490 }, { "epoch": 5.74092947411333, "grad_norm": 0.6940716505050659, "learning_rate": 0.0002823838157358337, "loss": 3.5295, "step": 84495 }, { "epoch": 5.741269194183992, "grad_norm": 0.9717973470687866, "learning_rate": 0.00028234135072700096, "loss": 3.4517, "step": 84500 }, { "epoch": 5.7416089142546545, "grad_norm": 0.8272750377655029, "learning_rate": 0.00028229888571816824, "loss": 3.08, "step": 84505 }, { "epoch": 5.741948634325316, "grad_norm": 0.8971176743507385, "learning_rate": 0.00028225642070933547, "loss": 3.1578, "step": 84510 }, { "epoch": 5.742288354395978, "grad_norm": 0.8350014090538025, "learning_rate": 0.0002822139557005028, "loss": 3.474, "step": 84515 }, { "epoch": 5.74262807446664, "grad_norm": 0.9874699115753174, "learning_rate": 0.0002821714906916701, "loss": 3.5072, "step": 84520 }, { "epoch": 5.742967794537301, "grad_norm": 0.8679519295692444, "learning_rate": 0.0002821290256828373, "loss": 3.5679, "step": 84525 }, { "epoch": 5.743307514607963, "grad_norm": 0.7251117825508118, "learning_rate": 0.00028208656067400464, "loss": 3.5299, "step": 84530 }, { "epoch": 5.743647234678625, "grad_norm": 1.0036567449569702, "learning_rate": 0.0002820440956651719, "loss": 3.4496, "step": 84535 }, { "epoch": 5.743986954749286, "grad_norm": 0.8619116544723511, "learning_rate": 0.00028200163065633915, "loss": 3.5888, "step": 84540 }, { "epoch": 5.744326674819948, "grad_norm": 1.024381160736084, "learning_rate": 0.00028195916564750643, "loss": 3.4979, "step": 84545 }, { "epoch": 5.7446663948906105, "grad_norm": 0.9617547392845154, "learning_rate": 0.00028191670063867376, "loss": 3.4353, "step": 84550 }, { "epoch": 5.745006114961272, "grad_norm": 1.1640223264694214, "learning_rate": 0.000281874235629841, "loss": 3.3353, "step": 84555 }, { "epoch": 5.745345835031934, "grad_norm": 0.7917426824569702, "learning_rate": 0.00028183177062100827, "loss": 3.5303, "step": 84560 }, { "epoch": 5.745685555102596, "grad_norm": 1.0829036235809326, "learning_rate": 0.0002817893056121756, "loss": 3.3918, "step": 84565 }, { "epoch": 5.746025275173257, "grad_norm": 0.8225312232971191, "learning_rate": 0.00028174684060334283, "loss": 3.5657, "step": 84570 }, { "epoch": 5.746364995243919, "grad_norm": 0.9610084295272827, "learning_rate": 0.0002817043755945101, "loss": 3.5816, "step": 84575 }, { "epoch": 5.746704715314581, "grad_norm": 0.9116448163986206, "learning_rate": 0.00028166191058567744, "loss": 3.2602, "step": 84580 }, { "epoch": 5.747044435385242, "grad_norm": 0.9722238183021545, "learning_rate": 0.0002816194455768447, "loss": 3.5461, "step": 84585 }, { "epoch": 5.747384155455904, "grad_norm": 0.9347169995307922, "learning_rate": 0.00028157698056801195, "loss": 3.6777, "step": 84590 }, { "epoch": 5.7477238755265665, "grad_norm": 0.9015772938728333, "learning_rate": 0.00028153451555917923, "loss": 3.3544, "step": 84595 }, { "epoch": 5.748063595597228, "grad_norm": 0.8130584955215454, "learning_rate": 0.00028149205055034656, "loss": 3.1738, "step": 84600 }, { "epoch": 5.74840331566789, "grad_norm": 0.7512129545211792, "learning_rate": 0.0002814495855415138, "loss": 3.5225, "step": 84605 }, { "epoch": 5.748743035738551, "grad_norm": 1.1360578536987305, "learning_rate": 0.00028140712053268107, "loss": 3.4459, "step": 84610 }, { "epoch": 5.749082755809213, "grad_norm": 0.7968753576278687, "learning_rate": 0.0002813646555238484, "loss": 3.3326, "step": 84615 }, { "epoch": 5.749422475879875, "grad_norm": 0.8719421625137329, "learning_rate": 0.00028132219051501563, "loss": 3.4085, "step": 84620 }, { "epoch": 5.749762195950536, "grad_norm": 0.7664800882339478, "learning_rate": 0.0002812797255061829, "loss": 3.4528, "step": 84625 }, { "epoch": 5.750101916021198, "grad_norm": 1.0982551574707031, "learning_rate": 0.0002812372604973502, "loss": 3.3311, "step": 84630 }, { "epoch": 5.75044163609186, "grad_norm": 0.9693065881729126, "learning_rate": 0.00028119479548851747, "loss": 3.3093, "step": 84635 }, { "epoch": 5.750781356162522, "grad_norm": 0.8258723616600037, "learning_rate": 0.00028115233047968475, "loss": 3.353, "step": 84640 }, { "epoch": 5.751121076233184, "grad_norm": 0.6775805950164795, "learning_rate": 0.00028110986547085203, "loss": 3.3371, "step": 84645 }, { "epoch": 5.751460796303846, "grad_norm": 0.7492602467536926, "learning_rate": 0.0002810674004620193, "loss": 3.3218, "step": 84650 }, { "epoch": 5.751800516374507, "grad_norm": 0.8571633696556091, "learning_rate": 0.0002810249354531866, "loss": 3.4108, "step": 84655 }, { "epoch": 5.752140236445169, "grad_norm": 0.7492689490318298, "learning_rate": 0.00028098247044435387, "loss": 3.6325, "step": 84660 }, { "epoch": 5.752479956515831, "grad_norm": 0.8897091746330261, "learning_rate": 0.0002809400054355211, "loss": 3.3558, "step": 84665 }, { "epoch": 5.752819676586492, "grad_norm": 1.013622522354126, "learning_rate": 0.00028089754042668843, "loss": 3.4798, "step": 84670 }, { "epoch": 5.753159396657154, "grad_norm": 0.9857528805732727, "learning_rate": 0.0002808550754178557, "loss": 3.3363, "step": 84675 }, { "epoch": 5.753499116727816, "grad_norm": 1.493955373764038, "learning_rate": 0.00028081261040902294, "loss": 3.585, "step": 84680 }, { "epoch": 5.753838836798478, "grad_norm": 0.8143149614334106, "learning_rate": 0.00028077014540019027, "loss": 3.232, "step": 84685 }, { "epoch": 5.75417855686914, "grad_norm": 1.0209901332855225, "learning_rate": 0.00028072768039135755, "loss": 3.4786, "step": 84690 }, { "epoch": 5.754518276939802, "grad_norm": 1.0197370052337646, "learning_rate": 0.0002806852153825248, "loss": 3.5541, "step": 84695 }, { "epoch": 5.754857997010463, "grad_norm": 1.0470458269119263, "learning_rate": 0.00028064275037369206, "loss": 3.1511, "step": 84700 }, { "epoch": 5.755197717081125, "grad_norm": 0.9887160658836365, "learning_rate": 0.0002806002853648594, "loss": 3.3463, "step": 84705 }, { "epoch": 5.755537437151787, "grad_norm": 1.0099499225616455, "learning_rate": 0.0002805578203560266, "loss": 3.3159, "step": 84710 }, { "epoch": 5.755877157222448, "grad_norm": 1.040313959121704, "learning_rate": 0.0002805153553471939, "loss": 2.9884, "step": 84715 }, { "epoch": 5.75621687729311, "grad_norm": 0.985922634601593, "learning_rate": 0.00028047289033836123, "loss": 3.3712, "step": 84720 }, { "epoch": 5.7565565973637725, "grad_norm": 1.0711133480072021, "learning_rate": 0.00028043042532952846, "loss": 3.3369, "step": 84725 }, { "epoch": 5.756896317434434, "grad_norm": 0.9990330338478088, "learning_rate": 0.00028038796032069574, "loss": 3.2786, "step": 84730 }, { "epoch": 5.757236037505096, "grad_norm": 0.819372832775116, "learning_rate": 0.000280345495311863, "loss": 3.2619, "step": 84735 }, { "epoch": 5.757575757575758, "grad_norm": 0.8904964923858643, "learning_rate": 0.0002803030303030303, "loss": 3.4609, "step": 84740 }, { "epoch": 5.757915477646419, "grad_norm": 1.4461841583251953, "learning_rate": 0.0002802605652941976, "loss": 3.242, "step": 84745 }, { "epoch": 5.758255197717081, "grad_norm": 0.7628354430198669, "learning_rate": 0.00028021810028536486, "loss": 3.4557, "step": 84750 }, { "epoch": 5.758594917787743, "grad_norm": 0.9564782381057739, "learning_rate": 0.0002801756352765322, "loss": 3.717, "step": 84755 }, { "epoch": 5.758934637858404, "grad_norm": 1.012934684753418, "learning_rate": 0.0002801331702676994, "loss": 3.338, "step": 84760 }, { "epoch": 5.759274357929066, "grad_norm": 1.0095961093902588, "learning_rate": 0.0002800907052588667, "loss": 3.7653, "step": 84765 }, { "epoch": 5.7596140779997285, "grad_norm": 0.9408828616142273, "learning_rate": 0.000280048240250034, "loss": 3.3584, "step": 84770 }, { "epoch": 5.75995379807039, "grad_norm": 0.9987916946411133, "learning_rate": 0.00028000577524120126, "loss": 3.3689, "step": 84775 }, { "epoch": 5.760293518141052, "grad_norm": 0.9112595319747925, "learning_rate": 0.00027996331023236854, "loss": 3.65, "step": 84780 }, { "epoch": 5.760633238211714, "grad_norm": 0.9903378486633301, "learning_rate": 0.0002799208452235358, "loss": 3.4764, "step": 84785 }, { "epoch": 5.760972958282375, "grad_norm": 0.8739778995513916, "learning_rate": 0.0002798783802147031, "loss": 3.3431, "step": 84790 }, { "epoch": 5.761312678353037, "grad_norm": 0.941175639629364, "learning_rate": 0.0002798359152058704, "loss": 3.4752, "step": 84795 }, { "epoch": 5.761652398423699, "grad_norm": 0.9631157517433167, "learning_rate": 0.00027979345019703766, "loss": 3.3741, "step": 84800 }, { "epoch": 5.76199211849436, "grad_norm": 0.8805457949638367, "learning_rate": 0.0002797509851882049, "loss": 3.0029, "step": 84805 }, { "epoch": 5.762331838565022, "grad_norm": 0.8393582701683044, "learning_rate": 0.0002797085201793722, "loss": 3.3259, "step": 84810 }, { "epoch": 5.7626715586356845, "grad_norm": 0.9385985732078552, "learning_rate": 0.0002796660551705395, "loss": 3.6487, "step": 84815 }, { "epoch": 5.763011278706346, "grad_norm": 0.9378626346588135, "learning_rate": 0.0002796235901617067, "loss": 3.6157, "step": 84820 }, { "epoch": 5.763350998777008, "grad_norm": 0.7376296520233154, "learning_rate": 0.00027958112515287406, "loss": 3.4403, "step": 84825 }, { "epoch": 5.76369071884767, "grad_norm": 1.0918469429016113, "learning_rate": 0.00027953866014404134, "loss": 3.5284, "step": 84830 }, { "epoch": 5.764030438918331, "grad_norm": 0.8113758563995361, "learning_rate": 0.00027949619513520856, "loss": 3.4687, "step": 84835 }, { "epoch": 5.764370158988993, "grad_norm": 0.8024821877479553, "learning_rate": 0.00027945373012637584, "loss": 3.259, "step": 84840 }, { "epoch": 5.764709879059655, "grad_norm": 1.0318459272384644, "learning_rate": 0.0002794112651175432, "loss": 3.4653, "step": 84845 }, { "epoch": 5.765049599130316, "grad_norm": 0.9057720303535461, "learning_rate": 0.0002793688001087104, "loss": 3.2315, "step": 84850 }, { "epoch": 5.765389319200978, "grad_norm": 0.9232569932937622, "learning_rate": 0.0002793263350998777, "loss": 3.5287, "step": 84855 }, { "epoch": 5.7657290392716405, "grad_norm": 0.8076748847961426, "learning_rate": 0.000279283870091045, "loss": 3.4549, "step": 84860 }, { "epoch": 5.766068759342302, "grad_norm": 0.8412861227989197, "learning_rate": 0.00027924140508221225, "loss": 3.4378, "step": 84865 }, { "epoch": 5.766408479412964, "grad_norm": 0.7184168696403503, "learning_rate": 0.0002791989400733795, "loss": 3.5305, "step": 84870 }, { "epoch": 5.766748199483626, "grad_norm": 0.9143257141113281, "learning_rate": 0.00027915647506454686, "loss": 3.4888, "step": 84875 }, { "epoch": 5.767087919554287, "grad_norm": 0.8010311126708984, "learning_rate": 0.0002791140100557141, "loss": 3.3894, "step": 84880 }, { "epoch": 5.767427639624949, "grad_norm": 0.824298083782196, "learning_rate": 0.00027907154504688137, "loss": 3.3623, "step": 84885 }, { "epoch": 5.767767359695611, "grad_norm": 0.9923129677772522, "learning_rate": 0.00027902908003804865, "loss": 3.6162, "step": 84890 }, { "epoch": 5.768107079766272, "grad_norm": 0.9098342061042786, "learning_rate": 0.0002789866150292159, "loss": 3.4115, "step": 84895 }, { "epoch": 5.768446799836934, "grad_norm": 0.7669000029563904, "learning_rate": 0.0002789441500203832, "loss": 3.543, "step": 84900 }, { "epoch": 5.7687865199075965, "grad_norm": 0.7977213263511658, "learning_rate": 0.0002789016850115505, "loss": 3.6517, "step": 84905 }, { "epoch": 5.769126239978258, "grad_norm": 0.7996681332588196, "learning_rate": 0.00027885922000271777, "loss": 3.5461, "step": 84910 }, { "epoch": 5.76946596004892, "grad_norm": 1.1067283153533936, "learning_rate": 0.00027881675499388505, "loss": 3.4234, "step": 84915 }, { "epoch": 5.769805680119582, "grad_norm": 0.9065130352973938, "learning_rate": 0.0002787742899850523, "loss": 3.3491, "step": 84920 }, { "epoch": 5.770145400190243, "grad_norm": 1.1819778680801392, "learning_rate": 0.0002787318249762196, "loss": 3.5745, "step": 84925 }, { "epoch": 5.770485120260905, "grad_norm": 0.7864854335784912, "learning_rate": 0.0002786893599673869, "loss": 3.6009, "step": 84930 }, { "epoch": 5.770824840331567, "grad_norm": 0.8967171311378479, "learning_rate": 0.00027864689495855417, "loss": 3.6565, "step": 84935 }, { "epoch": 5.771164560402228, "grad_norm": 0.8215663433074951, "learning_rate": 0.00027860442994972145, "loss": 3.473, "step": 84940 }, { "epoch": 5.77150428047289, "grad_norm": 0.7403429746627808, "learning_rate": 0.0002785619649408887, "loss": 3.4333, "step": 84945 }, { "epoch": 5.7718440005435525, "grad_norm": 1.0635896921157837, "learning_rate": 0.000278519499932056, "loss": 3.4566, "step": 84950 }, { "epoch": 5.772183720614214, "grad_norm": 0.6742855906486511, "learning_rate": 0.0002784770349232233, "loss": 3.5216, "step": 84955 }, { "epoch": 5.772523440684876, "grad_norm": 0.8137953281402588, "learning_rate": 0.0002784345699143905, "loss": 3.4716, "step": 84960 }, { "epoch": 5.772863160755538, "grad_norm": 0.884519100189209, "learning_rate": 0.00027839210490555785, "loss": 3.3545, "step": 84965 }, { "epoch": 5.773202880826199, "grad_norm": 0.7287304401397705, "learning_rate": 0.0002783496398967251, "loss": 3.407, "step": 84970 }, { "epoch": 5.773542600896861, "grad_norm": 0.9417672753334045, "learning_rate": 0.00027830717488789235, "loss": 3.4167, "step": 84975 }, { "epoch": 5.773882320967523, "grad_norm": 0.9843248128890991, "learning_rate": 0.0002782647098790597, "loss": 3.3122, "step": 84980 }, { "epoch": 5.774222041038184, "grad_norm": 0.814001739025116, "learning_rate": 0.00027822224487022697, "loss": 3.2245, "step": 84985 }, { "epoch": 5.7745617611088464, "grad_norm": 0.8859785199165344, "learning_rate": 0.0002781797798613942, "loss": 3.6028, "step": 84990 }, { "epoch": 5.7749014811795085, "grad_norm": 1.0831137895584106, "learning_rate": 0.00027813731485256147, "loss": 3.5136, "step": 84995 }, { "epoch": 5.77524120125017, "grad_norm": 0.6970549821853638, "learning_rate": 0.0002780948498437288, "loss": 3.4374, "step": 85000 }, { "epoch": 5.775580921320832, "grad_norm": 0.8316484689712524, "learning_rate": 0.00027805238483489603, "loss": 3.2437, "step": 85005 }, { "epoch": 5.775920641391494, "grad_norm": 1.0049972534179688, "learning_rate": 0.0002780099198260633, "loss": 3.46, "step": 85010 }, { "epoch": 5.776260361462155, "grad_norm": 0.7613423466682434, "learning_rate": 0.00027796745481723065, "loss": 3.6322, "step": 85015 }, { "epoch": 5.776600081532817, "grad_norm": 0.8280127644538879, "learning_rate": 0.0002779249898083979, "loss": 3.4453, "step": 85020 }, { "epoch": 5.776939801603479, "grad_norm": 0.8864409327507019, "learning_rate": 0.00027788252479956515, "loss": 3.3708, "step": 85025 }, { "epoch": 5.77727952167414, "grad_norm": 0.8070069551467896, "learning_rate": 0.00027784005979073243, "loss": 3.4299, "step": 85030 }, { "epoch": 5.7776192417448025, "grad_norm": 0.8413402438163757, "learning_rate": 0.0002777975947818997, "loss": 3.5294, "step": 85035 }, { "epoch": 5.7779589618154645, "grad_norm": 0.9970145225524902, "learning_rate": 0.000277755129773067, "loss": 3.3096, "step": 85040 }, { "epoch": 5.778298681886126, "grad_norm": 0.8554643392562866, "learning_rate": 0.0002777126647642343, "loss": 3.7774, "step": 85045 }, { "epoch": 5.778638401956788, "grad_norm": 1.105699062347412, "learning_rate": 0.00027767019975540155, "loss": 3.3027, "step": 85050 }, { "epoch": 5.77897812202745, "grad_norm": 0.784903347492218, "learning_rate": 0.00027762773474656883, "loss": 3.4916, "step": 85055 }, { "epoch": 5.779317842098111, "grad_norm": 1.0892821550369263, "learning_rate": 0.0002775852697377361, "loss": 3.2721, "step": 85060 }, { "epoch": 5.779657562168773, "grad_norm": 0.8801475167274475, "learning_rate": 0.00027754280472890334, "loss": 3.6024, "step": 85065 }, { "epoch": 5.779997282239434, "grad_norm": 0.7461523413658142, "learning_rate": 0.0002775003397200707, "loss": 3.2954, "step": 85070 }, { "epoch": 5.780337002310096, "grad_norm": 1.2777671813964844, "learning_rate": 0.00027745787471123795, "loss": 3.3099, "step": 85075 }, { "epoch": 5.7806767223807585, "grad_norm": 0.8527796864509583, "learning_rate": 0.0002774154097024052, "loss": 3.5161, "step": 85080 }, { "epoch": 5.78101644245142, "grad_norm": 0.9096696972846985, "learning_rate": 0.0002773729446935725, "loss": 3.3977, "step": 85085 }, { "epoch": 5.781356162522082, "grad_norm": 0.9232884645462036, "learning_rate": 0.0002773304796847398, "loss": 3.4829, "step": 85090 }, { "epoch": 5.781695882592744, "grad_norm": 0.8620707988739014, "learning_rate": 0.0002772880146759071, "loss": 3.5398, "step": 85095 }, { "epoch": 5.782035602663405, "grad_norm": 0.8821724057197571, "learning_rate": 0.0002772455496670743, "loss": 3.3148, "step": 85100 }, { "epoch": 5.782375322734067, "grad_norm": 1.108919620513916, "learning_rate": 0.00027720308465824163, "loss": 3.1349, "step": 85105 }, { "epoch": 5.782715042804729, "grad_norm": 0.7483648657798767, "learning_rate": 0.0002771606196494089, "loss": 3.4792, "step": 85110 }, { "epoch": 5.78305476287539, "grad_norm": 0.8516687154769897, "learning_rate": 0.00027711815464057614, "loss": 3.5186, "step": 85115 }, { "epoch": 5.783394482946052, "grad_norm": 1.0146435499191284, "learning_rate": 0.0002770756896317435, "loss": 3.1839, "step": 85120 }, { "epoch": 5.7837342030167145, "grad_norm": 0.7424401640892029, "learning_rate": 0.00027703322462291075, "loss": 3.4199, "step": 85125 }, { "epoch": 5.784073923087376, "grad_norm": 1.0545909404754639, "learning_rate": 0.000276990759614078, "loss": 3.3169, "step": 85130 }, { "epoch": 5.784413643158038, "grad_norm": 0.9177398681640625, "learning_rate": 0.00027694829460524526, "loss": 3.7042, "step": 85135 }, { "epoch": 5.7847533632287, "grad_norm": 0.8496518731117249, "learning_rate": 0.0002769058295964126, "loss": 3.3361, "step": 85140 }, { "epoch": 5.785093083299361, "grad_norm": 0.8929536938667297, "learning_rate": 0.0002768633645875798, "loss": 3.4954, "step": 85145 }, { "epoch": 5.785432803370023, "grad_norm": 0.8576931953430176, "learning_rate": 0.0002768208995787471, "loss": 3.4054, "step": 85150 }, { "epoch": 5.785772523440685, "grad_norm": 0.9505293369293213, "learning_rate": 0.00027677843456991444, "loss": 3.382, "step": 85155 }, { "epoch": 5.786112243511346, "grad_norm": 0.9586608409881592, "learning_rate": 0.00027673596956108166, "loss": 3.4526, "step": 85160 }, { "epoch": 5.786451963582008, "grad_norm": 0.8742292523384094, "learning_rate": 0.00027669350455224894, "loss": 3.4869, "step": 85165 }, { "epoch": 5.7867916836526705, "grad_norm": 0.8352279663085938, "learning_rate": 0.0002766510395434163, "loss": 3.518, "step": 85170 }, { "epoch": 5.787131403723332, "grad_norm": 0.7971248626708984, "learning_rate": 0.0002766085745345835, "loss": 3.4276, "step": 85175 }, { "epoch": 5.787471123793994, "grad_norm": 0.7960358262062073, "learning_rate": 0.0002765661095257508, "loss": 3.7624, "step": 85180 }, { "epoch": 5.787810843864656, "grad_norm": 0.7510971426963806, "learning_rate": 0.00027652364451691806, "loss": 3.4266, "step": 85185 }, { "epoch": 5.788150563935317, "grad_norm": 0.8277126550674438, "learning_rate": 0.00027648117950808534, "loss": 3.6447, "step": 85190 }, { "epoch": 5.788490284005979, "grad_norm": 0.8296245336532593, "learning_rate": 0.0002764387144992526, "loss": 3.4857, "step": 85195 }, { "epoch": 5.788830004076641, "grad_norm": 1.1003093719482422, "learning_rate": 0.0002763962494904199, "loss": 3.3522, "step": 85200 }, { "epoch": 5.789169724147302, "grad_norm": 0.9212766885757446, "learning_rate": 0.0002763537844815872, "loss": 3.5218, "step": 85205 }, { "epoch": 5.789509444217964, "grad_norm": 0.9671154022216797, "learning_rate": 0.00027631131947275446, "loss": 3.5837, "step": 85210 }, { "epoch": 5.7898491642886265, "grad_norm": 0.9475633502006531, "learning_rate": 0.00027626885446392174, "loss": 3.3139, "step": 85215 }, { "epoch": 5.790188884359288, "grad_norm": 0.9362192153930664, "learning_rate": 0.00027622638945508897, "loss": 3.5035, "step": 85220 }, { "epoch": 5.79052860442995, "grad_norm": 1.1112134456634521, "learning_rate": 0.0002761839244462563, "loss": 3.2641, "step": 85225 }, { "epoch": 5.790868324500612, "grad_norm": 1.018168568611145, "learning_rate": 0.0002761414594374236, "loss": 3.5804, "step": 85230 }, { "epoch": 5.791208044571273, "grad_norm": 1.0218136310577393, "learning_rate": 0.0002760989944285908, "loss": 3.3278, "step": 85235 }, { "epoch": 5.791547764641935, "grad_norm": 0.9222137928009033, "learning_rate": 0.00027605652941975814, "loss": 3.3777, "step": 85240 }, { "epoch": 5.791887484712597, "grad_norm": 0.9385232329368591, "learning_rate": 0.0002760140644109254, "loss": 3.5547, "step": 85245 }, { "epoch": 5.792227204783258, "grad_norm": 0.9414511919021606, "learning_rate": 0.00027597159940209265, "loss": 3.4844, "step": 85250 }, { "epoch": 5.79256692485392, "grad_norm": 0.8871060609817505, "learning_rate": 0.00027592913439325993, "loss": 3.3282, "step": 85255 }, { "epoch": 5.7929066449245825, "grad_norm": 1.0003222227096558, "learning_rate": 0.00027588666938442726, "loss": 3.6555, "step": 85260 }, { "epoch": 5.793246364995244, "grad_norm": 0.8678728938102722, "learning_rate": 0.00027584420437559454, "loss": 3.3329, "step": 85265 }, { "epoch": 5.793586085065906, "grad_norm": 1.3389482498168945, "learning_rate": 0.00027580173936676177, "loss": 3.322, "step": 85270 }, { "epoch": 5.793925805136568, "grad_norm": 0.7082506418228149, "learning_rate": 0.0002757592743579291, "loss": 3.5262, "step": 85275 }, { "epoch": 5.794265525207229, "grad_norm": 0.8104579448699951, "learning_rate": 0.0002757168093490964, "loss": 3.7101, "step": 85280 }, { "epoch": 5.794605245277891, "grad_norm": 0.7900086641311646, "learning_rate": 0.0002756743443402636, "loss": 3.5779, "step": 85285 }, { "epoch": 5.794944965348552, "grad_norm": 0.7708951830863953, "learning_rate": 0.0002756318793314309, "loss": 3.4201, "step": 85290 }, { "epoch": 5.795284685419214, "grad_norm": 0.9626030325889587, "learning_rate": 0.0002755894143225982, "loss": 3.5494, "step": 85295 }, { "epoch": 5.7956244054898765, "grad_norm": 0.8902991414070129, "learning_rate": 0.00027554694931376545, "loss": 3.4811, "step": 85300 }, { "epoch": 5.795964125560538, "grad_norm": 1.0814568996429443, "learning_rate": 0.00027550448430493273, "loss": 3.5386, "step": 85305 }, { "epoch": 5.7963038456312, "grad_norm": 0.8665685653686523, "learning_rate": 0.00027546201929610006, "loss": 3.637, "step": 85310 }, { "epoch": 5.796643565701862, "grad_norm": 0.771484911441803, "learning_rate": 0.0002754195542872673, "loss": 3.3651, "step": 85315 }, { "epoch": 5.796983285772523, "grad_norm": 0.9619312286376953, "learning_rate": 0.00027537708927843457, "loss": 3.1736, "step": 85320 }, { "epoch": 5.797323005843185, "grad_norm": 0.7134144902229309, "learning_rate": 0.00027533462426960185, "loss": 3.2355, "step": 85325 }, { "epoch": 5.797662725913847, "grad_norm": 1.018972396850586, "learning_rate": 0.00027529215926076913, "loss": 3.5379, "step": 85330 }, { "epoch": 5.798002445984508, "grad_norm": 0.6907059550285339, "learning_rate": 0.0002752496942519364, "loss": 3.5623, "step": 85335 }, { "epoch": 5.79834216605517, "grad_norm": 1.268068552017212, "learning_rate": 0.0002752072292431037, "loss": 3.3848, "step": 85340 }, { "epoch": 5.7986818861258325, "grad_norm": 1.4232944250106812, "learning_rate": 0.00027516476423427097, "loss": 3.3481, "step": 85345 }, { "epoch": 5.799021606196494, "grad_norm": 1.2240643501281738, "learning_rate": 0.00027512229922543825, "loss": 3.4591, "step": 85350 }, { "epoch": 5.799361326267156, "grad_norm": 0.8978531360626221, "learning_rate": 0.00027507983421660553, "loss": 3.4038, "step": 85355 }, { "epoch": 5.799701046337818, "grad_norm": 0.9523215889930725, "learning_rate": 0.00027503736920777276, "loss": 3.4074, "step": 85360 }, { "epoch": 5.800040766408479, "grad_norm": 0.9013262391090393, "learning_rate": 0.0002749949041989401, "loss": 3.3333, "step": 85365 }, { "epoch": 5.800380486479141, "grad_norm": 1.0248736143112183, "learning_rate": 0.00027495243919010737, "loss": 3.0011, "step": 85370 }, { "epoch": 5.800720206549803, "grad_norm": 1.0579249858856201, "learning_rate": 0.0002749099741812746, "loss": 3.4031, "step": 85375 }, { "epoch": 5.801059926620464, "grad_norm": 0.8195279836654663, "learning_rate": 0.00027486750917244193, "loss": 3.3102, "step": 85380 }, { "epoch": 5.801399646691126, "grad_norm": 0.8672236800193787, "learning_rate": 0.0002748250441636092, "loss": 3.4618, "step": 85385 }, { "epoch": 5.8017393667617885, "grad_norm": 0.8514004945755005, "learning_rate": 0.00027478257915477644, "loss": 3.3222, "step": 85390 }, { "epoch": 5.80207908683245, "grad_norm": 0.9608245491981506, "learning_rate": 0.0002747401141459437, "loss": 3.7151, "step": 85395 }, { "epoch": 5.802418806903112, "grad_norm": 0.9496618509292603, "learning_rate": 0.00027469764913711105, "loss": 3.5064, "step": 85400 }, { "epoch": 5.802758526973774, "grad_norm": 0.9411175847053528, "learning_rate": 0.0002746551841282783, "loss": 3.5027, "step": 85405 }, { "epoch": 5.803098247044435, "grad_norm": 0.998494029045105, "learning_rate": 0.00027461271911944556, "loss": 3.4552, "step": 85410 }, { "epoch": 5.803437967115097, "grad_norm": 0.9135532975196838, "learning_rate": 0.0002745702541106129, "loss": 3.2788, "step": 85415 }, { "epoch": 5.803777687185759, "grad_norm": 0.7087032198905945, "learning_rate": 0.0002745277891017801, "loss": 3.3046, "step": 85420 }, { "epoch": 5.80411740725642, "grad_norm": 1.1888831853866577, "learning_rate": 0.0002744853240929474, "loss": 3.2063, "step": 85425 }, { "epoch": 5.804457127327082, "grad_norm": 1.206748366355896, "learning_rate": 0.00027444285908411473, "loss": 3.2164, "step": 85430 }, { "epoch": 5.8047968473977445, "grad_norm": 1.1295928955078125, "learning_rate": 0.000274400394075282, "loss": 3.7114, "step": 85435 }, { "epoch": 5.805136567468406, "grad_norm": 0.7978733777999878, "learning_rate": 0.00027435792906644924, "loss": 3.284, "step": 85440 }, { "epoch": 5.805476287539068, "grad_norm": 0.8604005575180054, "learning_rate": 0.0002743154640576165, "loss": 3.5307, "step": 85445 }, { "epoch": 5.80581600760973, "grad_norm": 1.2144805192947388, "learning_rate": 0.00027427299904878385, "loss": 3.4398, "step": 85450 }, { "epoch": 5.806155727680391, "grad_norm": 0.992076575756073, "learning_rate": 0.0002742305340399511, "loss": 3.5094, "step": 85455 }, { "epoch": 5.806495447751053, "grad_norm": 0.8304373025894165, "learning_rate": 0.00027418806903111836, "loss": 3.3764, "step": 85460 }, { "epoch": 5.806835167821715, "grad_norm": 0.7868382930755615, "learning_rate": 0.0002741456040222857, "loss": 3.5291, "step": 85465 }, { "epoch": 5.807174887892376, "grad_norm": 0.9606617093086243, "learning_rate": 0.0002741031390134529, "loss": 3.5315, "step": 85470 }, { "epoch": 5.807514607963038, "grad_norm": 0.8384671807289124, "learning_rate": 0.0002740606740046202, "loss": 3.4263, "step": 85475 }, { "epoch": 5.8078543280337005, "grad_norm": 0.7339631915092468, "learning_rate": 0.0002740182089957875, "loss": 3.4639, "step": 85480 }, { "epoch": 5.808194048104362, "grad_norm": 0.9288833141326904, "learning_rate": 0.00027397574398695476, "loss": 3.5329, "step": 85485 }, { "epoch": 5.808533768175024, "grad_norm": 0.8140423893928528, "learning_rate": 0.00027393327897812204, "loss": 3.5551, "step": 85490 }, { "epoch": 5.808873488245686, "grad_norm": 0.9124660491943359, "learning_rate": 0.0002738908139692893, "loss": 3.3516, "step": 85495 }, { "epoch": 5.809213208316347, "grad_norm": 0.9237702488899231, "learning_rate": 0.0002738483489604566, "loss": 3.4477, "step": 85500 }, { "epoch": 5.809552928387009, "grad_norm": 1.0749143362045288, "learning_rate": 0.0002738058839516239, "loss": 3.3363, "step": 85505 }, { "epoch": 5.809892648457671, "grad_norm": 0.7769162058830261, "learning_rate": 0.00027376341894279116, "loss": 3.3781, "step": 85510 }, { "epoch": 5.810232368528332, "grad_norm": 1.0294381380081177, "learning_rate": 0.0002737209539339584, "loss": 3.4317, "step": 85515 }, { "epoch": 5.810572088598994, "grad_norm": 0.7882481813430786, "learning_rate": 0.0002736784889251257, "loss": 3.4383, "step": 85520 }, { "epoch": 5.8109118086696565, "grad_norm": 0.9073072671890259, "learning_rate": 0.000273636023916293, "loss": 3.7361, "step": 85525 }, { "epoch": 5.811251528740318, "grad_norm": 1.0004278421401978, "learning_rate": 0.0002735935589074602, "loss": 3.5537, "step": 85530 }, { "epoch": 5.81159124881098, "grad_norm": 0.849044680595398, "learning_rate": 0.00027355109389862756, "loss": 3.6018, "step": 85535 }, { "epoch": 5.811930968881642, "grad_norm": 1.3402457237243652, "learning_rate": 0.00027350862888979484, "loss": 3.2131, "step": 85540 }, { "epoch": 5.812270688952303, "grad_norm": 0.7930300235748291, "learning_rate": 0.00027346616388096206, "loss": 3.4557, "step": 85545 }, { "epoch": 5.812610409022965, "grad_norm": 0.8929469585418701, "learning_rate": 0.00027342369887212934, "loss": 3.3156, "step": 85550 }, { "epoch": 5.812950129093627, "grad_norm": 0.9994937777519226, "learning_rate": 0.0002733812338632967, "loss": 3.5438, "step": 85555 }, { "epoch": 5.813289849164288, "grad_norm": 0.7199445962905884, "learning_rate": 0.0002733387688544639, "loss": 3.4977, "step": 85560 }, { "epoch": 5.8136295692349504, "grad_norm": 0.6542371511459351, "learning_rate": 0.0002732963038456312, "loss": 3.5139, "step": 85565 }, { "epoch": 5.8139692893056125, "grad_norm": 0.8727741837501526, "learning_rate": 0.0002732538388367985, "loss": 3.3795, "step": 85570 }, { "epoch": 5.814309009376274, "grad_norm": 0.9861164689064026, "learning_rate": 0.00027321137382796574, "loss": 3.5103, "step": 85575 }, { "epoch": 5.814648729446936, "grad_norm": 0.9636548757553101, "learning_rate": 0.000273168908819133, "loss": 3.57, "step": 85580 }, { "epoch": 5.814988449517598, "grad_norm": 0.8657228350639343, "learning_rate": 0.0002731264438103003, "loss": 3.5232, "step": 85585 }, { "epoch": 5.815328169588259, "grad_norm": 1.0021485090255737, "learning_rate": 0.0002730839788014676, "loss": 3.2022, "step": 85590 }, { "epoch": 5.815667889658921, "grad_norm": 1.1353603601455688, "learning_rate": 0.00027304151379263487, "loss": 3.4679, "step": 85595 }, { "epoch": 5.816007609729583, "grad_norm": 1.0067800283432007, "learning_rate": 0.00027299904878380215, "loss": 3.4992, "step": 85600 }, { "epoch": 5.816347329800244, "grad_norm": 0.7749165296554565, "learning_rate": 0.0002729565837749695, "loss": 3.3805, "step": 85605 }, { "epoch": 5.8166870498709065, "grad_norm": 1.0942476987838745, "learning_rate": 0.0002729141187661367, "loss": 3.3111, "step": 85610 }, { "epoch": 5.8170267699415685, "grad_norm": 0.965944766998291, "learning_rate": 0.000272871653757304, "loss": 3.342, "step": 85615 }, { "epoch": 5.81736649001223, "grad_norm": 0.890684187412262, "learning_rate": 0.00027282918874847127, "loss": 3.2842, "step": 85620 }, { "epoch": 5.817706210082892, "grad_norm": 1.121200680732727, "learning_rate": 0.00027278672373963855, "loss": 3.3264, "step": 85625 }, { "epoch": 5.818045930153554, "grad_norm": 0.794026255607605, "learning_rate": 0.0002727442587308058, "loss": 3.4603, "step": 85630 }, { "epoch": 5.818385650224215, "grad_norm": 0.9887314438819885, "learning_rate": 0.0002727017937219731, "loss": 3.4582, "step": 85635 }, { "epoch": 5.818725370294877, "grad_norm": 0.8179646134376526, "learning_rate": 0.0002726593287131404, "loss": 3.456, "step": 85640 }, { "epoch": 5.819065090365539, "grad_norm": 1.057542085647583, "learning_rate": 0.00027261686370430767, "loss": 3.3893, "step": 85645 }, { "epoch": 5.8194048104362, "grad_norm": 1.0698884725570679, "learning_rate": 0.00027257439869547495, "loss": 3.5208, "step": 85650 }, { "epoch": 5.8197445305068625, "grad_norm": 0.8251063227653503, "learning_rate": 0.00027253193368664217, "loss": 3.5329, "step": 85655 }, { "epoch": 5.8200842505775245, "grad_norm": 1.224420428276062, "learning_rate": 0.0002724894686778095, "loss": 3.5612, "step": 85660 }, { "epoch": 5.820423970648186, "grad_norm": 0.8822425603866577, "learning_rate": 0.0002724470036689768, "loss": 3.5652, "step": 85665 }, { "epoch": 5.820763690718848, "grad_norm": 0.888246476650238, "learning_rate": 0.000272404538660144, "loss": 3.5334, "step": 85670 }, { "epoch": 5.82110341078951, "grad_norm": 0.84879070520401, "learning_rate": 0.00027236207365131135, "loss": 3.4567, "step": 85675 }, { "epoch": 5.821443130860171, "grad_norm": 1.0686910152435303, "learning_rate": 0.0002723196086424786, "loss": 3.5227, "step": 85680 }, { "epoch": 5.821782850930833, "grad_norm": 0.7883431315422058, "learning_rate": 0.00027227714363364585, "loss": 3.2565, "step": 85685 }, { "epoch": 5.822122571001495, "grad_norm": 0.9040455222129822, "learning_rate": 0.00027223467862481313, "loss": 3.3286, "step": 85690 }, { "epoch": 5.822462291072156, "grad_norm": 0.9537210464477539, "learning_rate": 0.00027219221361598047, "loss": 3.6048, "step": 85695 }, { "epoch": 5.8228020111428185, "grad_norm": 1.1601464748382568, "learning_rate": 0.0002721497486071477, "loss": 3.4285, "step": 85700 }, { "epoch": 5.8231417312134806, "grad_norm": 1.0626112222671509, "learning_rate": 0.00027210728359831497, "loss": 3.4138, "step": 85705 }, { "epoch": 5.823481451284142, "grad_norm": 0.8661244511604309, "learning_rate": 0.0002720648185894823, "loss": 3.3547, "step": 85710 }, { "epoch": 5.823821171354804, "grad_norm": 0.8774874210357666, "learning_rate": 0.00027202235358064953, "loss": 3.225, "step": 85715 }, { "epoch": 5.824160891425466, "grad_norm": 0.9934190511703491, "learning_rate": 0.0002719798885718168, "loss": 3.5714, "step": 85720 }, { "epoch": 5.824500611496127, "grad_norm": 1.0539610385894775, "learning_rate": 0.00027193742356298415, "loss": 3.4123, "step": 85725 }, { "epoch": 5.824840331566789, "grad_norm": 1.0325329303741455, "learning_rate": 0.0002718949585541514, "loss": 3.5002, "step": 85730 }, { "epoch": 5.825180051637451, "grad_norm": 1.203180193901062, "learning_rate": 0.00027185249354531865, "loss": 3.2958, "step": 85735 }, { "epoch": 5.825519771708112, "grad_norm": 0.8976393938064575, "learning_rate": 0.00027181002853648593, "loss": 3.4176, "step": 85740 }, { "epoch": 5.8258594917787745, "grad_norm": 0.9067656397819519, "learning_rate": 0.0002717675635276532, "loss": 3.4575, "step": 85745 }, { "epoch": 5.826199211849436, "grad_norm": 0.9201294779777527, "learning_rate": 0.0002717250985188205, "loss": 3.5453, "step": 85750 }, { "epoch": 5.826538931920098, "grad_norm": 1.0084196329116821, "learning_rate": 0.0002716826335099878, "loss": 3.3215, "step": 85755 }, { "epoch": 5.82687865199076, "grad_norm": 0.9513784646987915, "learning_rate": 0.00027164016850115505, "loss": 3.175, "step": 85760 }, { "epoch": 5.827218372061421, "grad_norm": 1.898790717124939, "learning_rate": 0.00027159770349232233, "loss": 3.7159, "step": 85765 }, { "epoch": 5.827558092132083, "grad_norm": 1.0209416151046753, "learning_rate": 0.0002715552384834896, "loss": 3.3393, "step": 85770 }, { "epoch": 5.827897812202745, "grad_norm": 0.968442440032959, "learning_rate": 0.0002715127734746569, "loss": 3.173, "step": 85775 }, { "epoch": 5.828237532273406, "grad_norm": 0.784275233745575, "learning_rate": 0.0002714703084658242, "loss": 3.4726, "step": 85780 }, { "epoch": 5.828577252344068, "grad_norm": 0.9052839875221252, "learning_rate": 0.00027142784345699145, "loss": 3.5144, "step": 85785 }, { "epoch": 5.8289169724147305, "grad_norm": 0.9009918570518494, "learning_rate": 0.00027138537844815873, "loss": 3.5782, "step": 85790 }, { "epoch": 5.829256692485392, "grad_norm": 0.7110707759857178, "learning_rate": 0.000271342913439326, "loss": 3.5126, "step": 85795 }, { "epoch": 5.829596412556054, "grad_norm": 0.8679285645484924, "learning_rate": 0.0002713004484304933, "loss": 3.2702, "step": 85800 }, { "epoch": 5.829936132626716, "grad_norm": 1.185449242591858, "learning_rate": 0.0002712579834216606, "loss": 3.3249, "step": 85805 }, { "epoch": 5.830275852697377, "grad_norm": 0.8555469512939453, "learning_rate": 0.0002712155184128278, "loss": 3.5668, "step": 85810 }, { "epoch": 5.830615572768039, "grad_norm": 0.9341093301773071, "learning_rate": 0.00027117305340399513, "loss": 3.2412, "step": 85815 }, { "epoch": 5.830955292838701, "grad_norm": 0.8699649572372437, "learning_rate": 0.0002711305883951624, "loss": 3.4434, "step": 85820 }, { "epoch": 5.831295012909362, "grad_norm": 0.855373740196228, "learning_rate": 0.00027108812338632964, "loss": 3.6179, "step": 85825 }, { "epoch": 5.831634732980024, "grad_norm": 0.9894996881484985, "learning_rate": 0.000271045658377497, "loss": 3.4534, "step": 85830 }, { "epoch": 5.8319744530506865, "grad_norm": 0.7085078954696655, "learning_rate": 0.00027100319336866425, "loss": 3.7151, "step": 85835 }, { "epoch": 5.832314173121348, "grad_norm": 0.8549900054931641, "learning_rate": 0.0002709607283598315, "loss": 3.6032, "step": 85840 }, { "epoch": 5.83265389319201, "grad_norm": 0.9382591247558594, "learning_rate": 0.00027091826335099876, "loss": 3.4548, "step": 85845 }, { "epoch": 5.832993613262672, "grad_norm": 0.9508116841316223, "learning_rate": 0.0002708757983421661, "loss": 3.3739, "step": 85850 }, { "epoch": 5.833333333333333, "grad_norm": 0.8172245025634766, "learning_rate": 0.0002708333333333333, "loss": 3.6782, "step": 85855 }, { "epoch": 5.833673053403995, "grad_norm": 0.8330725431442261, "learning_rate": 0.0002707908683245006, "loss": 3.5548, "step": 85860 }, { "epoch": 5.834012773474657, "grad_norm": 0.9419605135917664, "learning_rate": 0.00027074840331566794, "loss": 3.3676, "step": 85865 }, { "epoch": 5.834352493545318, "grad_norm": 0.8327247500419617, "learning_rate": 0.00027070593830683516, "loss": 3.5301, "step": 85870 }, { "epoch": 5.8346922136159804, "grad_norm": 1.0354814529418945, "learning_rate": 0.00027066347329800244, "loss": 3.2892, "step": 85875 }, { "epoch": 5.8350319336866425, "grad_norm": 0.8013768196105957, "learning_rate": 0.0002706210082891697, "loss": 3.4534, "step": 85880 }, { "epoch": 5.835371653757304, "grad_norm": 0.8095645904541016, "learning_rate": 0.000270578543280337, "loss": 3.3046, "step": 85885 }, { "epoch": 5.835711373827966, "grad_norm": 1.0984495878219604, "learning_rate": 0.0002705360782715043, "loss": 3.6689, "step": 85890 }, { "epoch": 5.836051093898628, "grad_norm": 0.942044198513031, "learning_rate": 0.00027049361326267156, "loss": 3.4435, "step": 85895 }, { "epoch": 5.836390813969289, "grad_norm": 0.9594922661781311, "learning_rate": 0.00027045114825383884, "loss": 3.3226, "step": 85900 }, { "epoch": 5.836730534039951, "grad_norm": 0.992332398891449, "learning_rate": 0.0002704086832450061, "loss": 3.4097, "step": 85905 }, { "epoch": 5.837070254110613, "grad_norm": 0.993399977684021, "learning_rate": 0.0002703662182361734, "loss": 3.4901, "step": 85910 }, { "epoch": 5.837409974181274, "grad_norm": 0.9158075451850891, "learning_rate": 0.00027032375322734063, "loss": 3.3584, "step": 85915 }, { "epoch": 5.8377496942519365, "grad_norm": 0.9005814790725708, "learning_rate": 0.00027028128821850796, "loss": 3.5667, "step": 85920 }, { "epoch": 5.8380894143225985, "grad_norm": 0.692868173122406, "learning_rate": 0.00027023882320967524, "loss": 3.4387, "step": 85925 }, { "epoch": 5.83842913439326, "grad_norm": 0.9008669853210449, "learning_rate": 0.00027019635820084247, "loss": 3.6579, "step": 85930 }, { "epoch": 5.838768854463922, "grad_norm": 1.2968273162841797, "learning_rate": 0.0002701538931920098, "loss": 3.2465, "step": 85935 }, { "epoch": 5.839108574534584, "grad_norm": 0.7343127131462097, "learning_rate": 0.0002701114281831771, "loss": 3.2198, "step": 85940 }, { "epoch": 5.839448294605245, "grad_norm": 0.8652023673057556, "learning_rate": 0.00027006896317434436, "loss": 3.4782, "step": 85945 }, { "epoch": 5.839788014675907, "grad_norm": 0.7940728664398193, "learning_rate": 0.0002700264981655116, "loss": 3.6836, "step": 85950 }, { "epoch": 5.840127734746569, "grad_norm": 0.8610370755195618, "learning_rate": 0.0002699840331566789, "loss": 3.5552, "step": 85955 }, { "epoch": 5.84046745481723, "grad_norm": 0.8352829813957214, "learning_rate": 0.0002699415681478462, "loss": 3.3348, "step": 85960 }, { "epoch": 5.8408071748878925, "grad_norm": 0.8985799551010132, "learning_rate": 0.00026989910313901343, "loss": 3.3019, "step": 85965 }, { "epoch": 5.841146894958554, "grad_norm": 1.1118210554122925, "learning_rate": 0.00026985663813018076, "loss": 3.1513, "step": 85970 }, { "epoch": 5.841486615029216, "grad_norm": 0.8033086061477661, "learning_rate": 0.00026981417312134804, "loss": 3.5659, "step": 85975 }, { "epoch": 5.841826335099878, "grad_norm": 0.8585745692253113, "learning_rate": 0.00026977170811251527, "loss": 3.3805, "step": 85980 }, { "epoch": 5.842166055170539, "grad_norm": 1.4445583820343018, "learning_rate": 0.00026972924310368255, "loss": 3.8648, "step": 85985 }, { "epoch": 5.842505775241201, "grad_norm": 1.1817612648010254, "learning_rate": 0.0002696867780948499, "loss": 3.4753, "step": 85990 }, { "epoch": 5.842845495311863, "grad_norm": 0.7003769874572754, "learning_rate": 0.0002696443130860171, "loss": 3.4123, "step": 85995 }, { "epoch": 5.843185215382524, "grad_norm": 0.9787039756774902, "learning_rate": 0.0002696018480771844, "loss": 3.1584, "step": 86000 }, { "epoch": 5.843524935453186, "grad_norm": 0.9772524833679199, "learning_rate": 0.0002695593830683517, "loss": 3.3732, "step": 86005 }, { "epoch": 5.8438646555238485, "grad_norm": 0.9459580183029175, "learning_rate": 0.00026951691805951895, "loss": 3.5668, "step": 86010 }, { "epoch": 5.84420437559451, "grad_norm": 1.0837233066558838, "learning_rate": 0.00026947445305068623, "loss": 3.434, "step": 86015 }, { "epoch": 5.844544095665172, "grad_norm": 0.8834533095359802, "learning_rate": 0.00026943198804185356, "loss": 3.375, "step": 86020 }, { "epoch": 5.844883815735834, "grad_norm": 0.9668422341346741, "learning_rate": 0.0002693895230330208, "loss": 3.5324, "step": 86025 }, { "epoch": 5.845223535806495, "grad_norm": 1.0426534414291382, "learning_rate": 0.00026934705802418807, "loss": 3.5978, "step": 86030 }, { "epoch": 5.845563255877157, "grad_norm": 1.7355109453201294, "learning_rate": 0.00026930459301535535, "loss": 3.4554, "step": 86035 }, { "epoch": 5.845902975947819, "grad_norm": 0.9771691560745239, "learning_rate": 0.00026926212800652263, "loss": 3.4304, "step": 86040 }, { "epoch": 5.84624269601848, "grad_norm": 0.9578458666801453, "learning_rate": 0.0002692196629976899, "loss": 3.4267, "step": 86045 }, { "epoch": 5.846582416089142, "grad_norm": 0.9304202795028687, "learning_rate": 0.0002691771979888572, "loss": 3.2788, "step": 86050 }, { "epoch": 5.8469221361598045, "grad_norm": 1.0265262126922607, "learning_rate": 0.00026913473298002447, "loss": 3.1988, "step": 86055 }, { "epoch": 5.847261856230466, "grad_norm": 0.8590037226676941, "learning_rate": 0.00026909226797119175, "loss": 3.4472, "step": 86060 }, { "epoch": 5.847601576301128, "grad_norm": 0.8802750706672668, "learning_rate": 0.00026904980296235903, "loss": 3.226, "step": 86065 }, { "epoch": 5.84794129637179, "grad_norm": 0.9002315402030945, "learning_rate": 0.00026900733795352626, "loss": 3.2233, "step": 86070 }, { "epoch": 5.848281016442451, "grad_norm": 0.8698520660400391, "learning_rate": 0.0002689648729446936, "loss": 3.4278, "step": 86075 }, { "epoch": 5.848620736513113, "grad_norm": 1.0070141553878784, "learning_rate": 0.00026892240793586087, "loss": 3.2801, "step": 86080 }, { "epoch": 5.848960456583775, "grad_norm": 0.9052004218101501, "learning_rate": 0.0002688799429270281, "loss": 3.374, "step": 86085 }, { "epoch": 5.849300176654436, "grad_norm": 0.8487313985824585, "learning_rate": 0.00026883747791819543, "loss": 3.5366, "step": 86090 }, { "epoch": 5.849639896725098, "grad_norm": 0.8681938648223877, "learning_rate": 0.0002687950129093627, "loss": 3.3462, "step": 86095 }, { "epoch": 5.8499796167957605, "grad_norm": 1.2344688177108765, "learning_rate": 0.00026875254790052994, "loss": 3.238, "step": 86100 }, { "epoch": 5.850319336866422, "grad_norm": 0.7453005313873291, "learning_rate": 0.0002687100828916972, "loss": 3.4132, "step": 86105 }, { "epoch": 5.850659056937084, "grad_norm": 0.7762342095375061, "learning_rate": 0.00026866761788286455, "loss": 3.3458, "step": 86110 }, { "epoch": 5.850998777007746, "grad_norm": 0.8245344758033752, "learning_rate": 0.00026862515287403183, "loss": 3.263, "step": 86115 }, { "epoch": 5.851338497078407, "grad_norm": 1.153594970703125, "learning_rate": 0.00026858268786519906, "loss": 3.4077, "step": 86120 }, { "epoch": 5.851678217149069, "grad_norm": 0.8248249292373657, "learning_rate": 0.0002685402228563664, "loss": 3.2343, "step": 86125 }, { "epoch": 5.852017937219731, "grad_norm": 0.7167032957077026, "learning_rate": 0.00026849775784753367, "loss": 3.4012, "step": 86130 }, { "epoch": 5.852357657290392, "grad_norm": 1.0033481121063232, "learning_rate": 0.0002684552928387009, "loss": 3.3844, "step": 86135 }, { "epoch": 5.852697377361054, "grad_norm": 0.9826151728630066, "learning_rate": 0.0002684128278298682, "loss": 3.4357, "step": 86140 }, { "epoch": 5.8530370974317165, "grad_norm": 0.7771371006965637, "learning_rate": 0.0002683703628210355, "loss": 3.5191, "step": 86145 }, { "epoch": 5.853376817502378, "grad_norm": 0.7295055985450745, "learning_rate": 0.00026832789781220274, "loss": 3.4548, "step": 86150 }, { "epoch": 5.85371653757304, "grad_norm": 0.858648955821991, "learning_rate": 0.00026828543280337, "loss": 3.4194, "step": 86155 }, { "epoch": 5.854056257643702, "grad_norm": 0.7147545218467712, "learning_rate": 0.00026824296779453735, "loss": 3.1952, "step": 86160 }, { "epoch": 5.854395977714363, "grad_norm": 0.8920286297798157, "learning_rate": 0.0002682005027857046, "loss": 3.6288, "step": 86165 }, { "epoch": 5.854735697785025, "grad_norm": 1.2790724039077759, "learning_rate": 0.00026815803777687186, "loss": 3.5598, "step": 86170 }, { "epoch": 5.855075417855687, "grad_norm": 0.9622620940208435, "learning_rate": 0.00026811557276803914, "loss": 3.378, "step": 86175 }, { "epoch": 5.855415137926348, "grad_norm": 0.8299192786216736, "learning_rate": 0.0002680731077592064, "loss": 3.556, "step": 86180 }, { "epoch": 5.8557548579970105, "grad_norm": 0.8470836877822876, "learning_rate": 0.0002680306427503737, "loss": 3.5028, "step": 86185 }, { "epoch": 5.8560945780676725, "grad_norm": 0.8886511325836182, "learning_rate": 0.000267988177741541, "loss": 3.4517, "step": 86190 }, { "epoch": 5.856434298138334, "grad_norm": 0.7392469048500061, "learning_rate": 0.00026794571273270826, "loss": 3.4395, "step": 86195 }, { "epoch": 5.856774018208996, "grad_norm": 0.8894254565238953, "learning_rate": 0.00026790324772387554, "loss": 3.3444, "step": 86200 }, { "epoch": 5.857113738279658, "grad_norm": 1.0719871520996094, "learning_rate": 0.0002678607827150428, "loss": 3.4834, "step": 86205 }, { "epoch": 5.857453458350319, "grad_norm": 1.5331881046295166, "learning_rate": 0.00026781831770621004, "loss": 3.4178, "step": 86210 }, { "epoch": 5.857793178420981, "grad_norm": 1.074005365371704, "learning_rate": 0.0002677758526973774, "loss": 3.3821, "step": 86215 }, { "epoch": 5.858132898491643, "grad_norm": 0.776703953742981, "learning_rate": 0.00026773338768854466, "loss": 3.4549, "step": 86220 }, { "epoch": 5.858472618562304, "grad_norm": 0.948914647102356, "learning_rate": 0.0002676909226797119, "loss": 3.2463, "step": 86225 }, { "epoch": 5.8588123386329665, "grad_norm": 0.8874778747558594, "learning_rate": 0.0002676484576708792, "loss": 3.5903, "step": 86230 }, { "epoch": 5.8591520587036285, "grad_norm": 0.8263382315635681, "learning_rate": 0.0002676059926620465, "loss": 3.2143, "step": 86235 }, { "epoch": 5.85949177877429, "grad_norm": 0.7853386998176575, "learning_rate": 0.0002675635276532137, "loss": 3.3434, "step": 86240 }, { "epoch": 5.859831498844952, "grad_norm": 0.824016273021698, "learning_rate": 0.000267521062644381, "loss": 3.5763, "step": 86245 }, { "epoch": 5.860171218915614, "grad_norm": 1.3476810455322266, "learning_rate": 0.00026747859763554834, "loss": 3.3429, "step": 86250 }, { "epoch": 5.860510938986275, "grad_norm": 0.8506530523300171, "learning_rate": 0.00026743613262671556, "loss": 3.2041, "step": 86255 }, { "epoch": 5.860850659056937, "grad_norm": 1.1093811988830566, "learning_rate": 0.00026739366761788284, "loss": 3.4016, "step": 86260 }, { "epoch": 5.861190379127599, "grad_norm": 0.8833467364311218, "learning_rate": 0.0002673512026090502, "loss": 3.3799, "step": 86265 }, { "epoch": 5.86153009919826, "grad_norm": 1.0075180530548096, "learning_rate": 0.0002673087376002174, "loss": 3.5542, "step": 86270 }, { "epoch": 5.8618698192689225, "grad_norm": 0.9027162194252014, "learning_rate": 0.0002672662725913847, "loss": 3.5145, "step": 86275 }, { "epoch": 5.8622095393395846, "grad_norm": 0.7770543694496155, "learning_rate": 0.000267223807582552, "loss": 3.4825, "step": 86280 }, { "epoch": 5.862549259410246, "grad_norm": 0.9155957102775574, "learning_rate": 0.0002671813425737193, "loss": 3.0356, "step": 86285 }, { "epoch": 5.862888979480908, "grad_norm": 0.79121333360672, "learning_rate": 0.0002671388775648865, "loss": 3.2668, "step": 86290 }, { "epoch": 5.86322869955157, "grad_norm": 1.051538109779358, "learning_rate": 0.0002670964125560538, "loss": 3.6053, "step": 86295 }, { "epoch": 5.863568419622231, "grad_norm": 0.7549198865890503, "learning_rate": 0.00026705394754722114, "loss": 3.2647, "step": 86300 }, { "epoch": 5.863908139692893, "grad_norm": 0.7794956564903259, "learning_rate": 0.00026701148253838837, "loss": 3.6437, "step": 86305 }, { "epoch": 5.864247859763555, "grad_norm": 1.0626429319381714, "learning_rate": 0.00026696901752955565, "loss": 3.7291, "step": 86310 }, { "epoch": 5.864587579834216, "grad_norm": 0.8733043670654297, "learning_rate": 0.000266926552520723, "loss": 3.7, "step": 86315 }, { "epoch": 5.8649272999048785, "grad_norm": 0.8317854404449463, "learning_rate": 0.0002668840875118902, "loss": 3.6175, "step": 86320 }, { "epoch": 5.865267019975541, "grad_norm": 0.8838074803352356, "learning_rate": 0.0002668416225030575, "loss": 3.3105, "step": 86325 }, { "epoch": 5.865606740046202, "grad_norm": 0.8156776428222656, "learning_rate": 0.00026679915749422477, "loss": 3.5435, "step": 86330 }, { "epoch": 5.865946460116864, "grad_norm": 1.0593010187149048, "learning_rate": 0.00026675669248539205, "loss": 3.4216, "step": 86335 }, { "epoch": 5.866286180187526, "grad_norm": 0.9067009091377258, "learning_rate": 0.0002667142274765593, "loss": 3.6014, "step": 86340 }, { "epoch": 5.866625900258187, "grad_norm": 1.1322778463363647, "learning_rate": 0.0002666717624677266, "loss": 3.1432, "step": 86345 }, { "epoch": 5.866965620328849, "grad_norm": 0.7949217557907104, "learning_rate": 0.0002666292974588939, "loss": 3.3763, "step": 86350 }, { "epoch": 5.867305340399511, "grad_norm": 0.8863410353660583, "learning_rate": 0.00026658683245006117, "loss": 3.3348, "step": 86355 }, { "epoch": 5.867645060470172, "grad_norm": 0.8905214667320251, "learning_rate": 0.00026654436744122845, "loss": 3.2738, "step": 86360 }, { "epoch": 5.8679847805408345, "grad_norm": 0.8957790732383728, "learning_rate": 0.00026650190243239567, "loss": 3.5278, "step": 86365 }, { "epoch": 5.868324500611497, "grad_norm": 0.7183364629745483, "learning_rate": 0.000266459437423563, "loss": 3.4109, "step": 86370 }, { "epoch": 5.868664220682158, "grad_norm": 0.9085429310798645, "learning_rate": 0.0002664169724147303, "loss": 3.7691, "step": 86375 }, { "epoch": 5.86900394075282, "grad_norm": 1.0386015176773071, "learning_rate": 0.0002663745074058975, "loss": 3.3952, "step": 86380 }, { "epoch": 5.869343660823482, "grad_norm": 0.9703196287155151, "learning_rate": 0.00026633204239706485, "loss": 3.3524, "step": 86385 }, { "epoch": 5.869683380894143, "grad_norm": 0.8766586184501648, "learning_rate": 0.0002662895773882321, "loss": 3.5959, "step": 86390 }, { "epoch": 5.870023100964805, "grad_norm": 0.9453226327896118, "learning_rate": 0.00026624711237939935, "loss": 3.3516, "step": 86395 }, { "epoch": 5.870362821035467, "grad_norm": 0.8838901519775391, "learning_rate": 0.00026620464737056663, "loss": 3.5589, "step": 86400 }, { "epoch": 5.870702541106128, "grad_norm": 1.005314826965332, "learning_rate": 0.00026616218236173397, "loss": 3.45, "step": 86405 }, { "epoch": 5.8710422611767905, "grad_norm": 0.9389330148696899, "learning_rate": 0.0002661197173529012, "loss": 3.3672, "step": 86410 }, { "epoch": 5.871381981247453, "grad_norm": 0.736731231212616, "learning_rate": 0.00026607725234406847, "loss": 3.5602, "step": 86415 }, { "epoch": 5.871721701318114, "grad_norm": 0.8334453701972961, "learning_rate": 0.0002660347873352358, "loss": 3.4295, "step": 86420 }, { "epoch": 5.872061421388776, "grad_norm": 1.2921814918518066, "learning_rate": 0.00026599232232640303, "loss": 3.2611, "step": 86425 }, { "epoch": 5.872401141459437, "grad_norm": 0.9349871277809143, "learning_rate": 0.0002659498573175703, "loss": 3.4054, "step": 86430 }, { "epoch": 5.872740861530099, "grad_norm": 0.8525412082672119, "learning_rate": 0.0002659073923087376, "loss": 3.5783, "step": 86435 }, { "epoch": 5.873080581600761, "grad_norm": 0.9275960922241211, "learning_rate": 0.0002658649272999049, "loss": 3.1863, "step": 86440 }, { "epoch": 5.873420301671422, "grad_norm": 0.8481304049491882, "learning_rate": 0.00026582246229107215, "loss": 3.4148, "step": 86445 }, { "epoch": 5.8737600217420844, "grad_norm": 2.259323835372925, "learning_rate": 0.00026577999728223943, "loss": 3.7092, "step": 86450 }, { "epoch": 5.8740997418127465, "grad_norm": 1.061008095741272, "learning_rate": 0.00026573753227340677, "loss": 3.4568, "step": 86455 }, { "epoch": 5.874439461883408, "grad_norm": 0.8969730138778687, "learning_rate": 0.000265695067264574, "loss": 3.4913, "step": 86460 }, { "epoch": 5.87477918195407, "grad_norm": 0.9192014932632446, "learning_rate": 0.0002656526022557413, "loss": 3.2825, "step": 86465 }, { "epoch": 5.875118902024732, "grad_norm": 0.9566301107406616, "learning_rate": 0.00026561013724690855, "loss": 3.4987, "step": 86470 }, { "epoch": 5.875458622095393, "grad_norm": 0.7487375736236572, "learning_rate": 0.00026556767223807583, "loss": 3.6183, "step": 86475 }, { "epoch": 5.875798342166055, "grad_norm": 0.7771413922309875, "learning_rate": 0.0002655252072292431, "loss": 3.455, "step": 86480 }, { "epoch": 5.876138062236717, "grad_norm": 0.8373425602912903, "learning_rate": 0.0002654827422204104, "loss": 3.3749, "step": 86485 }, { "epoch": 5.876477782307378, "grad_norm": 1.1413938999176025, "learning_rate": 0.0002654402772115777, "loss": 3.3825, "step": 86490 }, { "epoch": 5.8768175023780405, "grad_norm": 0.8059937953948975, "learning_rate": 0.00026539781220274495, "loss": 3.3389, "step": 86495 }, { "epoch": 5.8771572224487025, "grad_norm": 1.1402323246002197, "learning_rate": 0.00026535534719391223, "loss": 3.4738, "step": 86500 }, { "epoch": 5.877496942519364, "grad_norm": 1.1138890981674194, "learning_rate": 0.00026531288218507946, "loss": 3.2426, "step": 86505 }, { "epoch": 5.877836662590026, "grad_norm": 0.8819926977157593, "learning_rate": 0.0002652704171762468, "loss": 3.4258, "step": 86510 }, { "epoch": 5.878176382660688, "grad_norm": 0.6907956004142761, "learning_rate": 0.0002652279521674141, "loss": 3.4881, "step": 86515 }, { "epoch": 5.878516102731349, "grad_norm": 1.0524836778640747, "learning_rate": 0.0002651854871585813, "loss": 3.5037, "step": 86520 }, { "epoch": 5.878855822802011, "grad_norm": 1.0506043434143066, "learning_rate": 0.00026514302214974863, "loss": 3.5152, "step": 86525 }, { "epoch": 5.879195542872673, "grad_norm": 0.9730178713798523, "learning_rate": 0.0002651005571409159, "loss": 3.014, "step": 86530 }, { "epoch": 5.879535262943334, "grad_norm": 1.3492637872695923, "learning_rate": 0.00026505809213208314, "loss": 3.4695, "step": 86535 }, { "epoch": 5.8798749830139965, "grad_norm": 0.9881723523139954, "learning_rate": 0.0002650156271232504, "loss": 3.4573, "step": 86540 }, { "epoch": 5.8802147030846585, "grad_norm": 0.8586419224739075, "learning_rate": 0.00026497316211441775, "loss": 3.2759, "step": 86545 }, { "epoch": 5.88055442315532, "grad_norm": 0.9406324028968811, "learning_rate": 0.000264930697105585, "loss": 3.5943, "step": 86550 }, { "epoch": 5.880894143225982, "grad_norm": 1.0977766513824463, "learning_rate": 0.00026488823209675226, "loss": 3.5053, "step": 86555 }, { "epoch": 5.881233863296644, "grad_norm": 1.0892994403839111, "learning_rate": 0.0002648457670879196, "loss": 3.5523, "step": 86560 }, { "epoch": 5.881573583367305, "grad_norm": 1.0148600339889526, "learning_rate": 0.0002648033020790868, "loss": 3.4684, "step": 86565 }, { "epoch": 5.881913303437967, "grad_norm": 0.8273751735687256, "learning_rate": 0.0002647608370702541, "loss": 3.4998, "step": 86570 }, { "epoch": 5.882253023508629, "grad_norm": 0.9430851340293884, "learning_rate": 0.00026471837206142144, "loss": 3.3568, "step": 86575 }, { "epoch": 5.88259274357929, "grad_norm": 0.8044989109039307, "learning_rate": 0.00026467590705258866, "loss": 3.4841, "step": 86580 }, { "epoch": 5.8829324636499525, "grad_norm": 0.921524703502655, "learning_rate": 0.00026463344204375594, "loss": 3.6414, "step": 86585 }, { "epoch": 5.883272183720615, "grad_norm": 1.083160161972046, "learning_rate": 0.0002645909770349232, "loss": 3.5524, "step": 86590 }, { "epoch": 5.883611903791276, "grad_norm": 1.1565852165222168, "learning_rate": 0.0002645485120260905, "loss": 3.1195, "step": 86595 }, { "epoch": 5.883951623861938, "grad_norm": 0.927943766117096, "learning_rate": 0.0002645060470172578, "loss": 3.4799, "step": 86600 }, { "epoch": 5.8842913439326, "grad_norm": 1.0399051904678345, "learning_rate": 0.00026446358200842506, "loss": 3.3207, "step": 86605 }, { "epoch": 5.884631064003261, "grad_norm": 1.3866750001907349, "learning_rate": 0.00026442111699959234, "loss": 3.1983, "step": 86610 }, { "epoch": 5.884970784073923, "grad_norm": 0.8107403516769409, "learning_rate": 0.0002643786519907596, "loss": 3.5514, "step": 86615 }, { "epoch": 5.885310504144585, "grad_norm": 0.9815630912780762, "learning_rate": 0.0002643361869819269, "loss": 3.4712, "step": 86620 }, { "epoch": 5.885650224215246, "grad_norm": 0.892328679561615, "learning_rate": 0.0002642937219730942, "loss": 3.2969, "step": 86625 }, { "epoch": 5.8859899442859085, "grad_norm": 0.9207344055175781, "learning_rate": 0.00026425125696426146, "loss": 3.4592, "step": 86630 }, { "epoch": 5.886329664356571, "grad_norm": 0.7865584492683411, "learning_rate": 0.00026420879195542874, "loss": 3.357, "step": 86635 }, { "epoch": 5.886669384427232, "grad_norm": 1.0817041397094727, "learning_rate": 0.000264166326946596, "loss": 3.4243, "step": 86640 }, { "epoch": 5.887009104497894, "grad_norm": 1.0846365690231323, "learning_rate": 0.0002641238619377633, "loss": 3.3632, "step": 86645 }, { "epoch": 5.887348824568555, "grad_norm": 0.9023453593254089, "learning_rate": 0.0002640813969289306, "loss": 3.5626, "step": 86650 }, { "epoch": 5.887688544639217, "grad_norm": 0.8457932472229004, "learning_rate": 0.00026403893192009786, "loss": 3.5144, "step": 86655 }, { "epoch": 5.888028264709879, "grad_norm": 0.973366916179657, "learning_rate": 0.0002639964669112651, "loss": 3.5001, "step": 86660 }, { "epoch": 5.88836798478054, "grad_norm": 0.7693150639533997, "learning_rate": 0.0002639540019024324, "loss": 3.6408, "step": 86665 }, { "epoch": 5.888707704851202, "grad_norm": 1.0492876768112183, "learning_rate": 0.0002639115368935997, "loss": 3.391, "step": 86670 }, { "epoch": 5.8890474249218645, "grad_norm": 1.0527756214141846, "learning_rate": 0.00026386907188476693, "loss": 3.2772, "step": 86675 }, { "epoch": 5.889387144992526, "grad_norm": 0.9608847498893738, "learning_rate": 0.00026382660687593426, "loss": 3.2477, "step": 86680 }, { "epoch": 5.889726865063188, "grad_norm": 0.9003650546073914, "learning_rate": 0.00026378414186710154, "loss": 3.5715, "step": 86685 }, { "epoch": 5.89006658513385, "grad_norm": 0.7668390870094299, "learning_rate": 0.00026374167685826877, "loss": 3.2785, "step": 86690 }, { "epoch": 5.890406305204511, "grad_norm": 0.9139150381088257, "learning_rate": 0.00026369921184943605, "loss": 3.464, "step": 86695 }, { "epoch": 5.890746025275173, "grad_norm": 0.8657779693603516, "learning_rate": 0.0002636567468406034, "loss": 3.2761, "step": 86700 }, { "epoch": 5.891085745345835, "grad_norm": 1.1397814750671387, "learning_rate": 0.0002636142818317706, "loss": 3.3309, "step": 86705 }, { "epoch": 5.891425465416496, "grad_norm": 1.1026175022125244, "learning_rate": 0.0002635718168229379, "loss": 3.5543, "step": 86710 }, { "epoch": 5.891765185487158, "grad_norm": 0.9933289885520935, "learning_rate": 0.0002635293518141052, "loss": 3.3644, "step": 86715 }, { "epoch": 5.8921049055578205, "grad_norm": 0.8410524129867554, "learning_rate": 0.00026348688680527245, "loss": 3.3458, "step": 86720 }, { "epoch": 5.892444625628482, "grad_norm": 0.9390279650688171, "learning_rate": 0.00026344442179643973, "loss": 3.7889, "step": 86725 }, { "epoch": 5.892784345699144, "grad_norm": 0.8043792843818665, "learning_rate": 0.000263401956787607, "loss": 3.4558, "step": 86730 }, { "epoch": 5.893124065769806, "grad_norm": 0.8868273496627808, "learning_rate": 0.0002633594917787743, "loss": 3.3326, "step": 86735 }, { "epoch": 5.893463785840467, "grad_norm": 0.8429461121559143, "learning_rate": 0.00026331702676994157, "loss": 3.6529, "step": 86740 }, { "epoch": 5.893803505911129, "grad_norm": 1.2867234945297241, "learning_rate": 0.00026327456176110885, "loss": 3.2304, "step": 86745 }, { "epoch": 5.894143225981791, "grad_norm": 0.9196484684944153, "learning_rate": 0.00026323209675227613, "loss": 3.4653, "step": 86750 }, { "epoch": 5.894482946052452, "grad_norm": 0.9040078520774841, "learning_rate": 0.0002631896317434434, "loss": 3.4672, "step": 86755 }, { "epoch": 5.8948226661231145, "grad_norm": 0.8999331593513489, "learning_rate": 0.0002631471667346107, "loss": 3.4039, "step": 86760 }, { "epoch": 5.8951623861937765, "grad_norm": 0.947849452495575, "learning_rate": 0.0002631047017257779, "loss": 3.1984, "step": 86765 }, { "epoch": 5.895502106264438, "grad_norm": 0.8114612102508545, "learning_rate": 0.00026306223671694525, "loss": 3.5397, "step": 86770 }, { "epoch": 5.8958418263351, "grad_norm": 0.9328486323356628, "learning_rate": 0.00026301977170811253, "loss": 3.5195, "step": 86775 }, { "epoch": 5.896181546405762, "grad_norm": 1.0835790634155273, "learning_rate": 0.00026297730669927976, "loss": 3.3179, "step": 86780 }, { "epoch": 5.896521266476423, "grad_norm": 1.0040569305419922, "learning_rate": 0.0002629348416904471, "loss": 3.5962, "step": 86785 }, { "epoch": 5.896860986547085, "grad_norm": 1.104378581047058, "learning_rate": 0.00026289237668161437, "loss": 3.464, "step": 86790 }, { "epoch": 5.897200706617747, "grad_norm": 0.9844347834587097, "learning_rate": 0.00026284991167278165, "loss": 3.317, "step": 86795 }, { "epoch": 5.897540426688408, "grad_norm": 0.8422542810440063, "learning_rate": 0.0002628074466639489, "loss": 3.3706, "step": 86800 }, { "epoch": 5.8978801467590705, "grad_norm": 1.1742855310440063, "learning_rate": 0.0002627649816551162, "loss": 3.4955, "step": 86805 }, { "epoch": 5.8982198668297325, "grad_norm": 0.8861384391784668, "learning_rate": 0.0002627225166462835, "loss": 3.4515, "step": 86810 }, { "epoch": 5.898559586900394, "grad_norm": 0.8072331547737122, "learning_rate": 0.0002626800516374507, "loss": 3.3623, "step": 86815 }, { "epoch": 5.898899306971056, "grad_norm": 0.8211284279823303, "learning_rate": 0.00026263758662861805, "loss": 3.5454, "step": 86820 }, { "epoch": 5.899239027041718, "grad_norm": 0.7291365265846252, "learning_rate": 0.00026259512161978533, "loss": 3.505, "step": 86825 }, { "epoch": 5.899578747112379, "grad_norm": 0.9404595494270325, "learning_rate": 0.00026255265661095256, "loss": 3.2433, "step": 86830 }, { "epoch": 5.899918467183041, "grad_norm": 0.830746054649353, "learning_rate": 0.00026251019160211984, "loss": 3.3945, "step": 86835 }, { "epoch": 5.900258187253703, "grad_norm": 0.9141514301300049, "learning_rate": 0.00026246772659328717, "loss": 3.4052, "step": 86840 }, { "epoch": 5.900597907324364, "grad_norm": 1.0112676620483398, "learning_rate": 0.0002624252615844544, "loss": 3.6389, "step": 86845 }, { "epoch": 5.9009376273950265, "grad_norm": 0.8237084150314331, "learning_rate": 0.0002623827965756217, "loss": 3.3747, "step": 86850 }, { "epoch": 5.9012773474656885, "grad_norm": 1.1113238334655762, "learning_rate": 0.000262340331566789, "loss": 3.2057, "step": 86855 }, { "epoch": 5.90161706753635, "grad_norm": 0.7811151146888733, "learning_rate": 0.00026229786655795624, "loss": 3.6354, "step": 86860 }, { "epoch": 5.901956787607012, "grad_norm": 0.8714499473571777, "learning_rate": 0.0002622554015491235, "loss": 3.4251, "step": 86865 }, { "epoch": 5.902296507677674, "grad_norm": 579.6832275390625, "learning_rate": 0.00026221293654029085, "loss": 3.6363, "step": 86870 }, { "epoch": 5.902636227748335, "grad_norm": 0.8753916025161743, "learning_rate": 0.0002621704715314581, "loss": 3.4464, "step": 86875 }, { "epoch": 5.902975947818997, "grad_norm": 0.8919311761856079, "learning_rate": 0.00026212800652262536, "loss": 3.4541, "step": 86880 }, { "epoch": 5.903315667889659, "grad_norm": 0.9172821640968323, "learning_rate": 0.00026208554151379264, "loss": 3.4275, "step": 86885 }, { "epoch": 5.90365538796032, "grad_norm": 0.878645122051239, "learning_rate": 0.0002620430765049599, "loss": 3.5749, "step": 86890 }, { "epoch": 5.9039951080309825, "grad_norm": 0.9559105634689331, "learning_rate": 0.0002620006114961272, "loss": 3.4506, "step": 86895 }, { "epoch": 5.904334828101645, "grad_norm": 0.9417399764060974, "learning_rate": 0.0002619581464872945, "loss": 3.454, "step": 86900 }, { "epoch": 5.904674548172306, "grad_norm": 0.9088181257247925, "learning_rate": 0.00026191568147846176, "loss": 3.4495, "step": 86905 }, { "epoch": 5.905014268242968, "grad_norm": 0.7918052077293396, "learning_rate": 0.00026187321646962904, "loss": 3.1733, "step": 86910 }, { "epoch": 5.90535398831363, "grad_norm": 0.9783101677894592, "learning_rate": 0.0002618307514607963, "loss": 3.3448, "step": 86915 }, { "epoch": 5.905693708384291, "grad_norm": 0.8716035485267639, "learning_rate": 0.00026178828645196354, "loss": 3.2641, "step": 86920 }, { "epoch": 5.906033428454953, "grad_norm": 1.1220875978469849, "learning_rate": 0.0002617458214431309, "loss": 3.423, "step": 86925 }, { "epoch": 5.906373148525615, "grad_norm": 0.9642802476882935, "learning_rate": 0.00026170335643429816, "loss": 3.3641, "step": 86930 }, { "epoch": 5.906712868596276, "grad_norm": 0.7496830821037292, "learning_rate": 0.0002616608914254654, "loss": 3.1032, "step": 86935 }, { "epoch": 5.9070525886669385, "grad_norm": 1.0065748691558838, "learning_rate": 0.0002616184264166327, "loss": 3.0922, "step": 86940 }, { "epoch": 5.907392308737601, "grad_norm": 0.858755886554718, "learning_rate": 0.0002615759614078, "loss": 3.3356, "step": 86945 }, { "epoch": 5.907732028808262, "grad_norm": 1.0335866212844849, "learning_rate": 0.0002615334963989672, "loss": 3.7257, "step": 86950 }, { "epoch": 5.908071748878924, "grad_norm": 0.8041394948959351, "learning_rate": 0.0002614910313901345, "loss": 3.4732, "step": 86955 }, { "epoch": 5.908411468949586, "grad_norm": 0.8124260306358337, "learning_rate": 0.00026144856638130184, "loss": 3.3198, "step": 86960 }, { "epoch": 5.908751189020247, "grad_norm": 1.1435471773147583, "learning_rate": 0.0002614061013724691, "loss": 3.5829, "step": 86965 }, { "epoch": 5.909090909090909, "grad_norm": 0.8497371673583984, "learning_rate": 0.00026136363636363634, "loss": 3.4526, "step": 86970 }, { "epoch": 5.909430629161571, "grad_norm": 0.8618152737617493, "learning_rate": 0.0002613211713548037, "loss": 3.742, "step": 86975 }, { "epoch": 5.909770349232232, "grad_norm": 0.9107972979545593, "learning_rate": 0.00026127870634597096, "loss": 3.4161, "step": 86980 }, { "epoch": 5.9101100693028945, "grad_norm": 0.7411313056945801, "learning_rate": 0.0002612362413371382, "loss": 3.3177, "step": 86985 }, { "epoch": 5.910449789373557, "grad_norm": 0.9005715250968933, "learning_rate": 0.00026119377632830546, "loss": 3.4154, "step": 86990 }, { "epoch": 5.910789509444218, "grad_norm": 0.8922621011734009, "learning_rate": 0.0002611513113194728, "loss": 3.5609, "step": 86995 }, { "epoch": 5.91112922951488, "grad_norm": 0.9250367879867554, "learning_rate": 0.00026110884631064, "loss": 3.4561, "step": 87000 }, { "epoch": 5.911468949585542, "grad_norm": 0.8878572583198547, "learning_rate": 0.0002610663813018073, "loss": 3.5806, "step": 87005 }, { "epoch": 5.911808669656203, "grad_norm": 1.0487000942230225, "learning_rate": 0.00026102391629297464, "loss": 3.2145, "step": 87010 }, { "epoch": 5.912148389726865, "grad_norm": 0.8766903281211853, "learning_rate": 0.00026098145128414187, "loss": 3.354, "step": 87015 }, { "epoch": 5.912488109797527, "grad_norm": 0.9046432971954346, "learning_rate": 0.00026093898627530915, "loss": 3.4835, "step": 87020 }, { "epoch": 5.9128278298681884, "grad_norm": 0.9405030012130737, "learning_rate": 0.0002608965212664764, "loss": 3.6776, "step": 87025 }, { "epoch": 5.9131675499388505, "grad_norm": 0.8741708397865295, "learning_rate": 0.0002608540562576437, "loss": 3.7468, "step": 87030 }, { "epoch": 5.913507270009513, "grad_norm": 0.8123577237129211, "learning_rate": 0.000260811591248811, "loss": 3.587, "step": 87035 }, { "epoch": 5.913846990080174, "grad_norm": 0.9039894342422485, "learning_rate": 0.00026076912623997827, "loss": 3.4001, "step": 87040 }, { "epoch": 5.914186710150836, "grad_norm": 0.8513646721839905, "learning_rate": 0.00026072666123114555, "loss": 3.1548, "step": 87045 }, { "epoch": 5.914526430221498, "grad_norm": 0.8191611766815186, "learning_rate": 0.0002606841962223128, "loss": 3.4523, "step": 87050 }, { "epoch": 5.914866150292159, "grad_norm": 1.1811246871948242, "learning_rate": 0.0002606417312134801, "loss": 3.1899, "step": 87055 }, { "epoch": 5.915205870362821, "grad_norm": 0.859306275844574, "learning_rate": 0.00026059926620464733, "loss": 3.3489, "step": 87060 }, { "epoch": 5.915545590433483, "grad_norm": 0.8048444986343384, "learning_rate": 0.00026055680119581467, "loss": 3.4524, "step": 87065 }, { "epoch": 5.9158853105041445, "grad_norm": 0.8702788949012756, "learning_rate": 0.00026051433618698195, "loss": 3.3381, "step": 87070 }, { "epoch": 5.9162250305748065, "grad_norm": 0.7615591287612915, "learning_rate": 0.00026047187117814917, "loss": 3.5811, "step": 87075 }, { "epoch": 5.916564750645469, "grad_norm": 0.9778746962547302, "learning_rate": 0.0002604294061693165, "loss": 3.5486, "step": 87080 }, { "epoch": 5.91690447071613, "grad_norm": 1.114913821220398, "learning_rate": 0.0002603869411604838, "loss": 3.2842, "step": 87085 }, { "epoch": 5.917244190786792, "grad_norm": 0.7593933343887329, "learning_rate": 0.000260344476151651, "loss": 3.7841, "step": 87090 }, { "epoch": 5.917583910857454, "grad_norm": 0.946738600730896, "learning_rate": 0.0002603020111428183, "loss": 3.2942, "step": 87095 }, { "epoch": 5.917923630928115, "grad_norm": 0.8523774147033691, "learning_rate": 0.0002602595461339856, "loss": 3.5062, "step": 87100 }, { "epoch": 5.918263350998777, "grad_norm": 0.9060896635055542, "learning_rate": 0.00026021708112515285, "loss": 3.59, "step": 87105 }, { "epoch": 5.918603071069439, "grad_norm": 0.8794929385185242, "learning_rate": 0.00026017461611632013, "loss": 3.3847, "step": 87110 }, { "epoch": 5.9189427911401005, "grad_norm": 1.282698392868042, "learning_rate": 0.00026013215110748747, "loss": 3.5109, "step": 87115 }, { "epoch": 5.9192825112107625, "grad_norm": 0.8819625377655029, "learning_rate": 0.0002600896860986547, "loss": 3.2781, "step": 87120 }, { "epoch": 5.919622231281424, "grad_norm": 1.569460153579712, "learning_rate": 0.00026004722108982197, "loss": 3.5024, "step": 87125 }, { "epoch": 5.919961951352086, "grad_norm": 0.9171451330184937, "learning_rate": 0.00026000475608098925, "loss": 3.1204, "step": 87130 }, { "epoch": 5.920301671422748, "grad_norm": 1.0037691593170166, "learning_rate": 0.0002599622910721566, "loss": 3.5264, "step": 87135 }, { "epoch": 5.920641391493409, "grad_norm": 0.8538153171539307, "learning_rate": 0.0002599198260633238, "loss": 3.4451, "step": 87140 }, { "epoch": 5.920981111564071, "grad_norm": 1.0676628351211548, "learning_rate": 0.0002598773610544911, "loss": 3.5566, "step": 87145 }, { "epoch": 5.921320831634733, "grad_norm": 0.8805878758430481, "learning_rate": 0.00025983489604565843, "loss": 3.4972, "step": 87150 }, { "epoch": 5.921660551705394, "grad_norm": 0.9083634614944458, "learning_rate": 0.00025979243103682565, "loss": 3.387, "step": 87155 }, { "epoch": 5.9220002717760565, "grad_norm": 0.9054036736488342, "learning_rate": 0.00025974996602799293, "loss": 3.3981, "step": 87160 }, { "epoch": 5.9223399918467186, "grad_norm": 1.0472131967544556, "learning_rate": 0.00025970750101916027, "loss": 3.7891, "step": 87165 }, { "epoch": 5.92267971191738, "grad_norm": 0.9835063219070435, "learning_rate": 0.0002596650360103275, "loss": 3.531, "step": 87170 }, { "epoch": 5.923019431988042, "grad_norm": 0.8201888203620911, "learning_rate": 0.0002596225710014948, "loss": 3.3871, "step": 87175 }, { "epoch": 5.923359152058704, "grad_norm": 0.8548210263252258, "learning_rate": 0.00025958010599266205, "loss": 3.5423, "step": 87180 }, { "epoch": 5.923698872129365, "grad_norm": 0.7816788554191589, "learning_rate": 0.00025953764098382933, "loss": 3.5296, "step": 87185 }, { "epoch": 5.924038592200027, "grad_norm": 0.8344810009002686, "learning_rate": 0.0002594951759749966, "loss": 3.4779, "step": 87190 }, { "epoch": 5.924378312270689, "grad_norm": 0.8944981098175049, "learning_rate": 0.0002594527109661639, "loss": 3.6964, "step": 87195 }, { "epoch": 5.92471803234135, "grad_norm": 0.6969605088233948, "learning_rate": 0.0002594102459573312, "loss": 3.7256, "step": 87200 }, { "epoch": 5.9250577524120125, "grad_norm": 0.8397440910339355, "learning_rate": 0.00025936778094849845, "loss": 3.3472, "step": 87205 }, { "epoch": 5.925397472482675, "grad_norm": 1.1327563524246216, "learning_rate": 0.00025932531593966573, "loss": 3.3953, "step": 87210 }, { "epoch": 5.925737192553336, "grad_norm": 1.0135266780853271, "learning_rate": 0.00025928285093083296, "loss": 3.5325, "step": 87215 }, { "epoch": 5.926076912623998, "grad_norm": 0.9305192828178406, "learning_rate": 0.0002592403859220003, "loss": 3.3799, "step": 87220 }, { "epoch": 5.92641663269466, "grad_norm": 0.9032133221626282, "learning_rate": 0.0002591979209131676, "loss": 3.2738, "step": 87225 }, { "epoch": 5.926756352765321, "grad_norm": 1.152830719947815, "learning_rate": 0.0002591554559043348, "loss": 3.4199, "step": 87230 }, { "epoch": 5.927096072835983, "grad_norm": 0.7360384464263916, "learning_rate": 0.00025911299089550213, "loss": 3.6978, "step": 87235 }, { "epoch": 5.927435792906645, "grad_norm": 0.77264004945755, "learning_rate": 0.0002590705258866694, "loss": 3.3866, "step": 87240 }, { "epoch": 5.927775512977306, "grad_norm": 0.7775171399116516, "learning_rate": 0.00025902806087783664, "loss": 3.0679, "step": 87245 }, { "epoch": 5.9281152330479685, "grad_norm": 0.827771008014679, "learning_rate": 0.0002589855958690039, "loss": 3.4941, "step": 87250 }, { "epoch": 5.928454953118631, "grad_norm": 1.0844062566757202, "learning_rate": 0.00025894313086017125, "loss": 3.382, "step": 87255 }, { "epoch": 5.928794673189292, "grad_norm": 1.0564496517181396, "learning_rate": 0.0002589006658513385, "loss": 3.5151, "step": 87260 }, { "epoch": 5.929134393259954, "grad_norm": 1.0422168970108032, "learning_rate": 0.00025885820084250576, "loss": 3.2498, "step": 87265 }, { "epoch": 5.929474113330616, "grad_norm": 0.9066182971000671, "learning_rate": 0.0002588157358336731, "loss": 3.6542, "step": 87270 }, { "epoch": 5.929813833401277, "grad_norm": 0.9588635563850403, "learning_rate": 0.0002587732708248403, "loss": 3.3069, "step": 87275 }, { "epoch": 5.930153553471939, "grad_norm": 0.8084071278572083, "learning_rate": 0.0002587308058160076, "loss": 3.3211, "step": 87280 }, { "epoch": 5.930493273542601, "grad_norm": 0.8942273259162903, "learning_rate": 0.0002586883408071749, "loss": 3.4001, "step": 87285 }, { "epoch": 5.930832993613262, "grad_norm": 0.8593006730079651, "learning_rate": 0.00025864587579834216, "loss": 3.4387, "step": 87290 }, { "epoch": 5.9311727136839245, "grad_norm": 0.9233217835426331, "learning_rate": 0.00025860341078950944, "loss": 3.4869, "step": 87295 }, { "epoch": 5.931512433754587, "grad_norm": 0.9641635417938232, "learning_rate": 0.0002585609457806767, "loss": 3.3964, "step": 87300 }, { "epoch": 5.931852153825248, "grad_norm": 0.9919977188110352, "learning_rate": 0.00025851848077184406, "loss": 3.4627, "step": 87305 }, { "epoch": 5.93219187389591, "grad_norm": 1.0040230751037598, "learning_rate": 0.0002584760157630113, "loss": 3.4442, "step": 87310 }, { "epoch": 5.932531593966572, "grad_norm": 0.8807284235954285, "learning_rate": 0.00025843355075417856, "loss": 3.5739, "step": 87315 }, { "epoch": 5.932871314037233, "grad_norm": 0.7402127385139465, "learning_rate": 0.00025839108574534584, "loss": 3.4237, "step": 87320 }, { "epoch": 5.933211034107895, "grad_norm": 0.9314968585968018, "learning_rate": 0.0002583486207365131, "loss": 3.4594, "step": 87325 }, { "epoch": 5.933550754178556, "grad_norm": 1.0152008533477783, "learning_rate": 0.0002583061557276804, "loss": 3.4363, "step": 87330 }, { "epoch": 5.9338904742492184, "grad_norm": 1.0330173969268799, "learning_rate": 0.0002582636907188477, "loss": 3.5134, "step": 87335 }, { "epoch": 5.9342301943198805, "grad_norm": 0.8797692656517029, "learning_rate": 0.00025822122571001496, "loss": 3.7714, "step": 87340 }, { "epoch": 5.934569914390542, "grad_norm": 0.8708392977714539, "learning_rate": 0.00025817876070118224, "loss": 3.3061, "step": 87345 }, { "epoch": 5.934909634461204, "grad_norm": 0.8700308799743652, "learning_rate": 0.0002581362956923495, "loss": 3.418, "step": 87350 }, { "epoch": 5.935249354531866, "grad_norm": 0.8791851997375488, "learning_rate": 0.00025809383068351675, "loss": 3.4571, "step": 87355 }, { "epoch": 5.935589074602527, "grad_norm": 1.0171502828598022, "learning_rate": 0.0002580513656746841, "loss": 3.478, "step": 87360 }, { "epoch": 5.935928794673189, "grad_norm": 0.823566734790802, "learning_rate": 0.00025800890066585136, "loss": 3.3414, "step": 87365 }, { "epoch": 5.936268514743851, "grad_norm": 0.7822336554527283, "learning_rate": 0.0002579664356570186, "loss": 3.4607, "step": 87370 }, { "epoch": 5.936608234814512, "grad_norm": 0.9780349731445312, "learning_rate": 0.0002579239706481859, "loss": 3.176, "step": 87375 }, { "epoch": 5.9369479548851745, "grad_norm": 0.8756744861602783, "learning_rate": 0.0002578815056393532, "loss": 3.4159, "step": 87380 }, { "epoch": 5.9372876749558365, "grad_norm": 1.0665007829666138, "learning_rate": 0.00025783904063052043, "loss": 3.4949, "step": 87385 }, { "epoch": 5.937627395026498, "grad_norm": 0.8300045728683472, "learning_rate": 0.0002577965756216877, "loss": 3.3136, "step": 87390 }, { "epoch": 5.93796711509716, "grad_norm": 1.0675162076950073, "learning_rate": 0.00025775411061285504, "loss": 3.1168, "step": 87395 }, { "epoch": 5.938306835167822, "grad_norm": 0.9599024057388306, "learning_rate": 0.00025771164560402227, "loss": 3.0581, "step": 87400 }, { "epoch": 5.938646555238483, "grad_norm": 0.9952685832977295, "learning_rate": 0.00025766918059518955, "loss": 3.392, "step": 87405 }, { "epoch": 5.938986275309145, "grad_norm": 1.0593780279159546, "learning_rate": 0.0002576267155863569, "loss": 3.5892, "step": 87410 }, { "epoch": 5.939325995379807, "grad_norm": 0.8815662264823914, "learning_rate": 0.0002575842505775241, "loss": 3.2007, "step": 87415 }, { "epoch": 5.939665715450468, "grad_norm": 0.7948782444000244, "learning_rate": 0.0002575417855686914, "loss": 3.3731, "step": 87420 }, { "epoch": 5.9400054355211305, "grad_norm": 0.7677506804466248, "learning_rate": 0.0002574993205598587, "loss": 3.2166, "step": 87425 }, { "epoch": 5.9403451555917925, "grad_norm": 0.6899620294570923, "learning_rate": 0.00025745685555102595, "loss": 3.2498, "step": 87430 }, { "epoch": 5.940684875662454, "grad_norm": 0.8404735326766968, "learning_rate": 0.00025741439054219323, "loss": 3.5508, "step": 87435 }, { "epoch": 5.941024595733116, "grad_norm": 0.8968085050582886, "learning_rate": 0.0002573719255333605, "loss": 3.3986, "step": 87440 }, { "epoch": 5.941364315803778, "grad_norm": 0.851989209651947, "learning_rate": 0.0002573294605245278, "loss": 3.2774, "step": 87445 }, { "epoch": 5.941704035874439, "grad_norm": 0.9279588460922241, "learning_rate": 0.00025728699551569507, "loss": 3.4749, "step": 87450 }, { "epoch": 5.942043755945101, "grad_norm": 0.9167598485946655, "learning_rate": 0.00025724453050686235, "loss": 3.4838, "step": 87455 }, { "epoch": 5.942383476015763, "grad_norm": 0.7974656224250793, "learning_rate": 0.00025720206549802963, "loss": 3.5046, "step": 87460 }, { "epoch": 5.942723196086424, "grad_norm": 0.9780206084251404, "learning_rate": 0.0002571596004891969, "loss": 3.2816, "step": 87465 }, { "epoch": 5.9430629161570865, "grad_norm": 0.7584128975868225, "learning_rate": 0.0002571171354803642, "loss": 3.3014, "step": 87470 }, { "epoch": 5.943402636227749, "grad_norm": 0.9550994634628296, "learning_rate": 0.00025707467047153147, "loss": 3.5415, "step": 87475 }, { "epoch": 5.94374235629841, "grad_norm": 0.9063881635665894, "learning_rate": 0.00025703220546269875, "loss": 3.5972, "step": 87480 }, { "epoch": 5.944082076369072, "grad_norm": 0.7920548319816589, "learning_rate": 0.00025698974045386603, "loss": 3.5448, "step": 87485 }, { "epoch": 5.944421796439734, "grad_norm": 0.857038140296936, "learning_rate": 0.0002569472754450333, "loss": 3.3626, "step": 87490 }, { "epoch": 5.944761516510395, "grad_norm": 0.9470358490943909, "learning_rate": 0.0002569048104362006, "loss": 3.2626, "step": 87495 }, { "epoch": 5.945101236581057, "grad_norm": 1.0175539255142212, "learning_rate": 0.00025686234542736787, "loss": 3.5909, "step": 87500 }, { "epoch": 5.945440956651719, "grad_norm": 0.7661160230636597, "learning_rate": 0.00025681988041853515, "loss": 3.4494, "step": 87505 }, { "epoch": 5.94578067672238, "grad_norm": 0.8242473602294922, "learning_rate": 0.0002567774154097024, "loss": 3.2919, "step": 87510 }, { "epoch": 5.9461203967930425, "grad_norm": 0.7870598435401917, "learning_rate": 0.0002567349504008697, "loss": 3.5833, "step": 87515 }, { "epoch": 5.946460116863705, "grad_norm": 1.0757640600204468, "learning_rate": 0.000256692485392037, "loss": 3.6027, "step": 87520 }, { "epoch": 5.946799836934366, "grad_norm": 0.8482693433761597, "learning_rate": 0.0002566500203832042, "loss": 3.643, "step": 87525 }, { "epoch": 5.947139557005028, "grad_norm": 1.0831378698349, "learning_rate": 0.00025660755537437155, "loss": 3.2219, "step": 87530 }, { "epoch": 5.94747927707569, "grad_norm": 0.9210177659988403, "learning_rate": 0.00025656509036553883, "loss": 3.586, "step": 87535 }, { "epoch": 5.947818997146351, "grad_norm": 0.8165308833122253, "learning_rate": 0.00025652262535670606, "loss": 3.4304, "step": 87540 }, { "epoch": 5.948158717217013, "grad_norm": 1.0155463218688965, "learning_rate": 0.00025648016034787334, "loss": 3.3828, "step": 87545 }, { "epoch": 5.948498437287675, "grad_norm": 0.9909318089485168, "learning_rate": 0.00025643769533904067, "loss": 3.247, "step": 87550 }, { "epoch": 5.948838157358336, "grad_norm": 1.0564830303192139, "learning_rate": 0.0002563952303302079, "loss": 3.6905, "step": 87555 }, { "epoch": 5.9491778774289985, "grad_norm": 0.7387513518333435, "learning_rate": 0.0002563527653213752, "loss": 3.1883, "step": 87560 }, { "epoch": 5.949517597499661, "grad_norm": 0.8919906616210938, "learning_rate": 0.0002563103003125425, "loss": 3.3275, "step": 87565 }, { "epoch": 5.949857317570322, "grad_norm": 1.1173657178878784, "learning_rate": 0.00025626783530370974, "loss": 3.493, "step": 87570 }, { "epoch": 5.950197037640984, "grad_norm": 0.8686089515686035, "learning_rate": 0.000256225370294877, "loss": 3.2703, "step": 87575 }, { "epoch": 5.950536757711646, "grad_norm": 1.40020751953125, "learning_rate": 0.0002561829052860443, "loss": 3.4626, "step": 87580 }, { "epoch": 5.950876477782307, "grad_norm": 0.7526452541351318, "learning_rate": 0.0002561404402772116, "loss": 3.618, "step": 87585 }, { "epoch": 5.951216197852969, "grad_norm": 1.0939830541610718, "learning_rate": 0.00025609797526837886, "loss": 3.3209, "step": 87590 }, { "epoch": 5.951555917923631, "grad_norm": 1.2953850030899048, "learning_rate": 0.00025605551025954614, "loss": 3.5557, "step": 87595 }, { "epoch": 5.951895637994292, "grad_norm": 0.8172399401664734, "learning_rate": 0.0002560130452507134, "loss": 3.4038, "step": 87600 }, { "epoch": 5.9522353580649545, "grad_norm": 1.1040102243423462, "learning_rate": 0.0002559705802418807, "loss": 3.5745, "step": 87605 }, { "epoch": 5.952575078135617, "grad_norm": 0.8998822569847107, "learning_rate": 0.000255928115233048, "loss": 3.6826, "step": 87610 }, { "epoch": 5.952914798206278, "grad_norm": 0.8187810778617859, "learning_rate": 0.0002558856502242152, "loss": 3.6587, "step": 87615 }, { "epoch": 5.95325451827694, "grad_norm": 0.9343876242637634, "learning_rate": 0.00025584318521538254, "loss": 3.3489, "step": 87620 }, { "epoch": 5.953594238347602, "grad_norm": 1.072891354560852, "learning_rate": 0.0002558007202065498, "loss": 3.2811, "step": 87625 }, { "epoch": 5.953933958418263, "grad_norm": 0.8833297491073608, "learning_rate": 0.00025575825519771704, "loss": 3.6804, "step": 87630 }, { "epoch": 5.954273678488925, "grad_norm": 0.9450722932815552, "learning_rate": 0.0002557157901888844, "loss": 3.4501, "step": 87635 }, { "epoch": 5.954613398559587, "grad_norm": 0.7401144504547119, "learning_rate": 0.00025567332518005166, "loss": 3.2962, "step": 87640 }, { "epoch": 5.9549531186302485, "grad_norm": 0.8565064072608948, "learning_rate": 0.00025563086017121894, "loss": 3.4133, "step": 87645 }, { "epoch": 5.9552928387009105, "grad_norm": 0.8214325308799744, "learning_rate": 0.00025558839516238616, "loss": 3.5129, "step": 87650 }, { "epoch": 5.955632558771573, "grad_norm": 1.1307978630065918, "learning_rate": 0.0002555459301535535, "loss": 3.4319, "step": 87655 }, { "epoch": 5.955972278842234, "grad_norm": 0.7849156260490417, "learning_rate": 0.0002555034651447208, "loss": 3.4021, "step": 87660 }, { "epoch": 5.956311998912896, "grad_norm": 1.1903949975967407, "learning_rate": 0.000255461000135888, "loss": 3.5383, "step": 87665 }, { "epoch": 5.956651718983558, "grad_norm": 0.953140139579773, "learning_rate": 0.00025541853512705534, "loss": 3.3946, "step": 87670 }, { "epoch": 5.956991439054219, "grad_norm": 1.0677319765090942, "learning_rate": 0.0002553760701182226, "loss": 3.4506, "step": 87675 }, { "epoch": 5.957331159124881, "grad_norm": 0.9141011238098145, "learning_rate": 0.00025533360510938984, "loss": 3.6847, "step": 87680 }, { "epoch": 5.957670879195543, "grad_norm": 1.1151607036590576, "learning_rate": 0.0002552911401005571, "loss": 3.5215, "step": 87685 }, { "epoch": 5.9580105992662045, "grad_norm": 0.7576051950454712, "learning_rate": 0.00025524867509172446, "loss": 3.3482, "step": 87690 }, { "epoch": 5.9583503193368665, "grad_norm": 0.7513319849967957, "learning_rate": 0.0002552062100828917, "loss": 3.3793, "step": 87695 }, { "epoch": 5.958690039407529, "grad_norm": 0.8699908256530762, "learning_rate": 0.00025516374507405896, "loss": 3.6958, "step": 87700 }, { "epoch": 5.95902975947819, "grad_norm": 0.7480401992797852, "learning_rate": 0.0002551212800652263, "loss": 3.5752, "step": 87705 }, { "epoch": 5.959369479548852, "grad_norm": 0.9115986227989197, "learning_rate": 0.0002550788150563935, "loss": 3.4835, "step": 87710 }, { "epoch": 5.959709199619514, "grad_norm": 0.8467034101486206, "learning_rate": 0.0002550363500475608, "loss": 3.3964, "step": 87715 }, { "epoch": 5.960048919690175, "grad_norm": 0.8195412755012512, "learning_rate": 0.00025499388503872814, "loss": 3.4874, "step": 87720 }, { "epoch": 5.960388639760837, "grad_norm": 0.8298139572143555, "learning_rate": 0.00025495142002989537, "loss": 3.445, "step": 87725 }, { "epoch": 5.960728359831499, "grad_norm": 0.8372587561607361, "learning_rate": 0.00025490895502106265, "loss": 3.7782, "step": 87730 }, { "epoch": 5.9610680799021605, "grad_norm": 0.8053191304206848, "learning_rate": 0.0002548664900122299, "loss": 3.5352, "step": 87735 }, { "epoch": 5.9614077999728226, "grad_norm": 0.9222981929779053, "learning_rate": 0.0002548240250033972, "loss": 3.3226, "step": 87740 }, { "epoch": 5.961747520043485, "grad_norm": 1.3866087198257446, "learning_rate": 0.0002547815599945645, "loss": 3.1954, "step": 87745 }, { "epoch": 5.962087240114146, "grad_norm": 0.9325369596481323, "learning_rate": 0.00025473909498573177, "loss": 3.8943, "step": 87750 }, { "epoch": 5.962426960184808, "grad_norm": 0.7968777418136597, "learning_rate": 0.00025469662997689905, "loss": 3.4862, "step": 87755 }, { "epoch": 5.96276668025547, "grad_norm": 0.9184139966964722, "learning_rate": 0.0002546541649680663, "loss": 3.546, "step": 87760 }, { "epoch": 5.963106400326131, "grad_norm": 0.8017551898956299, "learning_rate": 0.0002546116999592336, "loss": 3.7562, "step": 87765 }, { "epoch": 5.963446120396793, "grad_norm": 0.9906613230705261, "learning_rate": 0.00025456923495040083, "loss": 3.5998, "step": 87770 }, { "epoch": 5.963785840467455, "grad_norm": 0.9008069634437561, "learning_rate": 0.00025452676994156817, "loss": 3.3407, "step": 87775 }, { "epoch": 5.9641255605381165, "grad_norm": 0.7324938774108887, "learning_rate": 0.00025448430493273545, "loss": 3.4865, "step": 87780 }, { "epoch": 5.964465280608779, "grad_norm": 0.8969598412513733, "learning_rate": 0.00025444183992390267, "loss": 3.6578, "step": 87785 }, { "epoch": 5.964805000679441, "grad_norm": 0.9423465728759766, "learning_rate": 0.00025439937491507, "loss": 3.547, "step": 87790 }, { "epoch": 5.965144720750102, "grad_norm": 0.7941276431083679, "learning_rate": 0.0002543569099062373, "loss": 3.099, "step": 87795 }, { "epoch": 5.965484440820764, "grad_norm": 0.9506189823150635, "learning_rate": 0.0002543144448974045, "loss": 3.546, "step": 87800 }, { "epoch": 5.965824160891425, "grad_norm": 1.1353613138198853, "learning_rate": 0.0002542719798885718, "loss": 3.2872, "step": 87805 }, { "epoch": 5.966163880962087, "grad_norm": 0.8470731377601624, "learning_rate": 0.0002542295148797391, "loss": 3.6516, "step": 87810 }, { "epoch": 5.966503601032749, "grad_norm": 0.961182713508606, "learning_rate": 0.0002541870498709064, "loss": 3.7925, "step": 87815 }, { "epoch": 5.96684332110341, "grad_norm": 0.8845279216766357, "learning_rate": 0.00025414458486207363, "loss": 3.3264, "step": 87820 }, { "epoch": 5.9671830411740725, "grad_norm": 0.8230481743812561, "learning_rate": 0.00025410211985324097, "loss": 3.481, "step": 87825 }, { "epoch": 5.967522761244735, "grad_norm": 0.8327860832214355, "learning_rate": 0.00025405965484440825, "loss": 3.6837, "step": 87830 }, { "epoch": 5.967862481315396, "grad_norm": 0.9442246556282043, "learning_rate": 0.00025401718983557547, "loss": 3.491, "step": 87835 }, { "epoch": 5.968202201386058, "grad_norm": 0.9645861983299255, "learning_rate": 0.00025397472482674275, "loss": 3.438, "step": 87840 }, { "epoch": 5.96854192145672, "grad_norm": 0.7841061353683472, "learning_rate": 0.0002539322598179101, "loss": 3.5916, "step": 87845 }, { "epoch": 5.968881641527381, "grad_norm": 0.8801999092102051, "learning_rate": 0.0002538897948090773, "loss": 3.3937, "step": 87850 }, { "epoch": 5.969221361598043, "grad_norm": 0.8848710060119629, "learning_rate": 0.0002538473298002446, "loss": 3.5727, "step": 87855 }, { "epoch": 5.969561081668705, "grad_norm": 0.9078718423843384, "learning_rate": 0.00025380486479141193, "loss": 3.4323, "step": 87860 }, { "epoch": 5.969900801739366, "grad_norm": 0.8893359899520874, "learning_rate": 0.00025376239978257915, "loss": 3.5389, "step": 87865 }, { "epoch": 5.9702405218100285, "grad_norm": 0.8817317485809326, "learning_rate": 0.00025371993477374643, "loss": 3.4542, "step": 87870 }, { "epoch": 5.970580241880691, "grad_norm": 0.9319194555282593, "learning_rate": 0.0002536774697649137, "loss": 3.3732, "step": 87875 }, { "epoch": 5.970919961951352, "grad_norm": 0.9150902628898621, "learning_rate": 0.000253635004756081, "loss": 3.366, "step": 87880 }, { "epoch": 5.971259682022014, "grad_norm": 0.7564211487770081, "learning_rate": 0.0002535925397472483, "loss": 3.6287, "step": 87885 }, { "epoch": 5.971599402092676, "grad_norm": 1.2268205881118774, "learning_rate": 0.00025355007473841555, "loss": 3.1855, "step": 87890 }, { "epoch": 5.971939122163337, "grad_norm": 0.902304470539093, "learning_rate": 0.00025350760972958283, "loss": 3.554, "step": 87895 }, { "epoch": 5.972278842233999, "grad_norm": 0.9127666354179382, "learning_rate": 0.0002534651447207501, "loss": 3.2878, "step": 87900 }, { "epoch": 5.972618562304661, "grad_norm": 0.7556972503662109, "learning_rate": 0.0002534226797119174, "loss": 3.3472, "step": 87905 }, { "epoch": 5.9729582823753224, "grad_norm": 0.9941595792770386, "learning_rate": 0.0002533802147030846, "loss": 3.5895, "step": 87910 }, { "epoch": 5.9732980024459845, "grad_norm": 0.7425792813301086, "learning_rate": 0.00025333774969425195, "loss": 3.4895, "step": 87915 }, { "epoch": 5.973637722516647, "grad_norm": 1.030763030052185, "learning_rate": 0.00025329528468541923, "loss": 3.4746, "step": 87920 }, { "epoch": 5.973977442587308, "grad_norm": 0.9264202117919922, "learning_rate": 0.00025325281967658646, "loss": 3.4153, "step": 87925 }, { "epoch": 5.97431716265797, "grad_norm": 0.8829048871994019, "learning_rate": 0.0002532103546677538, "loss": 3.5429, "step": 87930 }, { "epoch": 5.974656882728632, "grad_norm": 0.8642759323120117, "learning_rate": 0.0002531678896589211, "loss": 3.5575, "step": 87935 }, { "epoch": 5.974996602799293, "grad_norm": 0.7715226411819458, "learning_rate": 0.0002531254246500883, "loss": 3.2366, "step": 87940 }, { "epoch": 5.975336322869955, "grad_norm": 0.7336623072624207, "learning_rate": 0.0002530829596412556, "loss": 3.5344, "step": 87945 }, { "epoch": 5.975676042940617, "grad_norm": 0.7674343585968018, "learning_rate": 0.0002530404946324229, "loss": 3.2711, "step": 87950 }, { "epoch": 5.9760157630112785, "grad_norm": 0.8075256943702698, "learning_rate": 0.00025299802962359014, "loss": 3.2749, "step": 87955 }, { "epoch": 5.9763554830819405, "grad_norm": 0.9380080699920654, "learning_rate": 0.0002529555646147574, "loss": 3.6459, "step": 87960 }, { "epoch": 5.976695203152603, "grad_norm": 0.8172228932380676, "learning_rate": 0.00025291309960592475, "loss": 3.3484, "step": 87965 }, { "epoch": 5.977034923223264, "grad_norm": 0.8264505863189697, "learning_rate": 0.000252870634597092, "loss": 3.5681, "step": 87970 }, { "epoch": 5.977374643293926, "grad_norm": 0.8286378383636475, "learning_rate": 0.00025282816958825926, "loss": 3.7217, "step": 87975 }, { "epoch": 5.977714363364588, "grad_norm": 0.866304337978363, "learning_rate": 0.00025278570457942654, "loss": 3.4658, "step": 87980 }, { "epoch": 5.978054083435249, "grad_norm": 0.6695724129676819, "learning_rate": 0.0002527432395705939, "loss": 3.6369, "step": 87985 }, { "epoch": 5.978393803505911, "grad_norm": 0.8788052797317505, "learning_rate": 0.0002527007745617611, "loss": 3.2139, "step": 87990 }, { "epoch": 5.978733523576573, "grad_norm": 0.9803169965744019, "learning_rate": 0.0002526583095529284, "loss": 3.6267, "step": 87995 }, { "epoch": 5.9790732436472345, "grad_norm": 1.0306413173675537, "learning_rate": 0.0002526158445440957, "loss": 3.4312, "step": 88000 }, { "epoch": 5.9794129637178965, "grad_norm": 0.9144333004951477, "learning_rate": 0.00025257337953526294, "loss": 3.3391, "step": 88005 }, { "epoch": 5.979752683788558, "grad_norm": 0.9094138145446777, "learning_rate": 0.0002525309145264302, "loss": 3.2381, "step": 88010 }, { "epoch": 5.98009240385922, "grad_norm": 1.0916175842285156, "learning_rate": 0.00025248844951759756, "loss": 3.4422, "step": 88015 }, { "epoch": 5.980432123929882, "grad_norm": 0.7795401215553284, "learning_rate": 0.0002524459845087648, "loss": 3.5418, "step": 88020 }, { "epoch": 5.980771844000543, "grad_norm": 0.8213106989860535, "learning_rate": 0.00025240351949993206, "loss": 3.3352, "step": 88025 }, { "epoch": 5.981111564071205, "grad_norm": 0.8686926364898682, "learning_rate": 0.00025236105449109934, "loss": 3.5608, "step": 88030 }, { "epoch": 5.981451284141867, "grad_norm": 0.9499101042747498, "learning_rate": 0.0002523185894822666, "loss": 3.6518, "step": 88035 }, { "epoch": 5.981791004212528, "grad_norm": 0.9914563894271851, "learning_rate": 0.0002522761244734339, "loss": 3.5408, "step": 88040 }, { "epoch": 5.9821307242831905, "grad_norm": 1.2207297086715698, "learning_rate": 0.0002522336594646012, "loss": 3.4204, "step": 88045 }, { "epoch": 5.982470444353853, "grad_norm": 0.8428399562835693, "learning_rate": 0.00025219119445576846, "loss": 3.4549, "step": 88050 }, { "epoch": 5.982810164424514, "grad_norm": 0.854900062084198, "learning_rate": 0.00025214872944693574, "loss": 3.3504, "step": 88055 }, { "epoch": 5.983149884495176, "grad_norm": 0.7577596306800842, "learning_rate": 0.000252106264438103, "loss": 3.5553, "step": 88060 }, { "epoch": 5.983489604565838, "grad_norm": 0.8783277273178101, "learning_rate": 0.00025206379942927025, "loss": 3.5637, "step": 88065 }, { "epoch": 5.983829324636499, "grad_norm": 0.8838988542556763, "learning_rate": 0.0002520213344204376, "loss": 3.6819, "step": 88070 }, { "epoch": 5.984169044707161, "grad_norm": 0.839244544506073, "learning_rate": 0.00025197886941160486, "loss": 3.2292, "step": 88075 }, { "epoch": 5.984508764777823, "grad_norm": 1.4854435920715332, "learning_rate": 0.0002519364044027721, "loss": 3.6048, "step": 88080 }, { "epoch": 5.984848484848484, "grad_norm": 1.1846059560775757, "learning_rate": 0.0002518939393939394, "loss": 3.1716, "step": 88085 }, { "epoch": 5.9851882049191465, "grad_norm": 0.8549778461456299, "learning_rate": 0.0002518514743851067, "loss": 3.5488, "step": 88090 }, { "epoch": 5.985527924989809, "grad_norm": 0.9296994209289551, "learning_rate": 0.00025180900937627393, "loss": 3.183, "step": 88095 }, { "epoch": 5.98586764506047, "grad_norm": 0.9364610910415649, "learning_rate": 0.0002517665443674412, "loss": 3.5484, "step": 88100 }, { "epoch": 5.986207365131132, "grad_norm": 0.7126706838607788, "learning_rate": 0.00025172407935860854, "loss": 3.3379, "step": 88105 }, { "epoch": 5.986547085201794, "grad_norm": 0.9163709282875061, "learning_rate": 0.00025168161434977577, "loss": 3.6132, "step": 88110 }, { "epoch": 5.986886805272455, "grad_norm": 0.7285540103912354, "learning_rate": 0.00025163914934094305, "loss": 3.5996, "step": 88115 }, { "epoch": 5.987226525343117, "grad_norm": 0.7629046440124512, "learning_rate": 0.0002515966843321104, "loss": 3.3475, "step": 88120 }, { "epoch": 5.987566245413779, "grad_norm": 0.9181415438652039, "learning_rate": 0.0002515542193232776, "loss": 3.3149, "step": 88125 }, { "epoch": 5.98790596548444, "grad_norm": 0.9522302746772766, "learning_rate": 0.0002515117543144449, "loss": 3.1457, "step": 88130 }, { "epoch": 5.9882456855551025, "grad_norm": 0.9400190114974976, "learning_rate": 0.00025146928930561217, "loss": 3.2017, "step": 88135 }, { "epoch": 5.988585405625765, "grad_norm": 1.0296279191970825, "learning_rate": 0.00025142682429677945, "loss": 3.4343, "step": 88140 }, { "epoch": 5.988925125696426, "grad_norm": 1.015599250793457, "learning_rate": 0.00025138435928794673, "loss": 3.3007, "step": 88145 }, { "epoch": 5.989264845767088, "grad_norm": 0.9581639170646667, "learning_rate": 0.000251341894279114, "loss": 3.3513, "step": 88150 }, { "epoch": 5.98960456583775, "grad_norm": 0.8747519850730896, "learning_rate": 0.00025129942927028134, "loss": 3.4879, "step": 88155 }, { "epoch": 5.989944285908411, "grad_norm": 0.880652129650116, "learning_rate": 0.00025125696426144857, "loss": 3.3327, "step": 88160 }, { "epoch": 5.990284005979073, "grad_norm": 0.9219487309455872, "learning_rate": 0.00025121449925261585, "loss": 3.2546, "step": 88165 }, { "epoch": 5.990623726049735, "grad_norm": 1.0514140129089355, "learning_rate": 0.00025117203424378313, "loss": 3.7368, "step": 88170 }, { "epoch": 5.990963446120396, "grad_norm": 1.0886567831039429, "learning_rate": 0.0002511295692349504, "loss": 3.3659, "step": 88175 }, { "epoch": 5.9913031661910585, "grad_norm": 1.203183650970459, "learning_rate": 0.0002510871042261177, "loss": 3.6497, "step": 88180 }, { "epoch": 5.991642886261721, "grad_norm": 0.9404119849205017, "learning_rate": 0.00025104463921728497, "loss": 3.486, "step": 88185 }, { "epoch": 5.991982606332382, "grad_norm": 0.90493243932724, "learning_rate": 0.00025100217420845225, "loss": 3.3324, "step": 88190 }, { "epoch": 5.992322326403044, "grad_norm": 1.397676706314087, "learning_rate": 0.00025095970919961953, "loss": 3.1644, "step": 88195 }, { "epoch": 5.992662046473706, "grad_norm": 0.861950695514679, "learning_rate": 0.0002509172441907868, "loss": 3.5544, "step": 88200 }, { "epoch": 5.993001766544367, "grad_norm": 0.8270180821418762, "learning_rate": 0.00025087477918195404, "loss": 3.3112, "step": 88205 }, { "epoch": 5.993341486615029, "grad_norm": 0.8583129048347473, "learning_rate": 0.00025083231417312137, "loss": 3.4928, "step": 88210 }, { "epoch": 5.993681206685691, "grad_norm": 0.8138909935951233, "learning_rate": 0.00025078984916428865, "loss": 3.2606, "step": 88215 }, { "epoch": 5.9940209267563525, "grad_norm": 0.8053900599479675, "learning_rate": 0.0002507473841554559, "loss": 3.3827, "step": 88220 }, { "epoch": 5.9943606468270145, "grad_norm": 0.8470441102981567, "learning_rate": 0.0002507049191466232, "loss": 3.5933, "step": 88225 }, { "epoch": 5.994700366897677, "grad_norm": 1.1569738388061523, "learning_rate": 0.0002506624541377905, "loss": 3.6711, "step": 88230 }, { "epoch": 5.995040086968338, "grad_norm": 1.0644514560699463, "learning_rate": 0.0002506199891289577, "loss": 3.6686, "step": 88235 }, { "epoch": 5.995379807039, "grad_norm": 0.8666078448295593, "learning_rate": 0.000250577524120125, "loss": 3.5863, "step": 88240 }, { "epoch": 5.995719527109662, "grad_norm": 0.9564222693443298, "learning_rate": 0.00025053505911129233, "loss": 3.4059, "step": 88245 }, { "epoch": 5.996059247180323, "grad_norm": 1.125313639640808, "learning_rate": 0.00025049259410245956, "loss": 3.6172, "step": 88250 }, { "epoch": 5.996398967250985, "grad_norm": 1.02402663230896, "learning_rate": 0.00025045012909362684, "loss": 3.2075, "step": 88255 }, { "epoch": 5.996738687321647, "grad_norm": 0.8217767477035522, "learning_rate": 0.00025040766408479417, "loss": 3.4003, "step": 88260 }, { "epoch": 5.9970784073923085, "grad_norm": 1.0333209037780762, "learning_rate": 0.0002503651990759614, "loss": 3.5562, "step": 88265 }, { "epoch": 5.9974181274629705, "grad_norm": 1.0327162742614746, "learning_rate": 0.0002503227340671287, "loss": 3.3522, "step": 88270 }, { "epoch": 5.997757847533633, "grad_norm": 0.7804272174835205, "learning_rate": 0.000250280269058296, "loss": 3.3501, "step": 88275 }, { "epoch": 5.998097567604294, "grad_norm": 0.8569813966751099, "learning_rate": 0.00025023780404946324, "loss": 3.5995, "step": 88280 }, { "epoch": 5.998437287674956, "grad_norm": 0.8795720934867859, "learning_rate": 0.0002501953390406305, "loss": 3.5056, "step": 88285 }, { "epoch": 5.998777007745618, "grad_norm": 0.8322656750679016, "learning_rate": 0.0002501528740317978, "loss": 3.5322, "step": 88290 }, { "epoch": 5.999116727816279, "grad_norm": 0.8637499213218689, "learning_rate": 0.0002501104090229651, "loss": 3.3911, "step": 88295 }, { "epoch": 5.999456447886941, "grad_norm": 1.3256415128707886, "learning_rate": 0.00025006794401413236, "loss": 3.5186, "step": 88300 }, { "epoch": 5.999796167957603, "grad_norm": 1.140984058380127, "learning_rate": 0.00025002547900529964, "loss": 3.556, "step": 88305 }, { "epoch": 6.0, "eval_bertscore": { "f1": 0.8412353278812206, "precision": 0.8421136805375586, "recall": 0.8411571180327178 }, "eval_bleu_4": 0.021544713242252534, "eval_exact_match": 0.00038763446070355656, "eval_loss": 3.4019320011138916, "eval_meteor": 0.09107590758767604, "eval_rouge": { "rouge1": 0.1306312601958517, "rouge2": 0.021282046414223994, "rougeL": 0.11342475627469248, "rougeLsum": 0.11345516014889859 }, "eval_runtime": 2390.6995, "eval_samples_per_second": 4.316, "eval_steps_per_second": 0.54, "step": 88308 } ], "logging_steps": 5, "max_steps": 117744, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.340286051015721e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }