{ "best_metric": 0.003442206200937784, "best_model_checkpoint": "./results-cc/plbart/plbart_lora_official_0.001/checkpoint-29436", "epoch": 2.0, "eval_steps": 500, "global_step": 29436, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003397200706617747, "grad_norm": 6.769941329956055, "learning_rate": 0.0009999745209947003, "loss": 12.0866, "step": 5 }, { "epoch": 0.0006794401413235494, "grad_norm": 2.6679279804229736, "learning_rate": 0.0009999320559858677, "loss": 7.0188, "step": 10 }, { "epoch": 0.0010191602119853241, "grad_norm": 4.147106170654297, "learning_rate": 0.000999889590977035, "loss": 5.5429, "step": 15 }, { "epoch": 0.001358880282647099, "grad_norm": 1.7080023288726807, "learning_rate": 0.0009998471259682023, "loss": 4.6101, "step": 20 }, { "epoch": 0.0016986003533088735, "grad_norm": 1.2410838603973389, "learning_rate": 0.0009998046609593695, "loss": 4.1494, "step": 25 }, { "epoch": 0.0020383204239706482, "grad_norm": 1.5808942317962646, "learning_rate": 0.0009997621959505368, "loss": 4.0438, "step": 30 }, { "epoch": 0.002378040494632423, "grad_norm": 1.6446036100387573, "learning_rate": 0.0009997197309417041, "loss": 3.8977, "step": 35 }, { "epoch": 0.002717760565294198, "grad_norm": 2.4118785858154297, "learning_rate": 0.0009996772659328712, "loss": 3.8157, "step": 40 }, { "epoch": 0.0030574806359559724, "grad_norm": 1.1931132078170776, "learning_rate": 0.0009996348009240386, "loss": 4.1119, "step": 45 }, { "epoch": 0.003397200706617747, "grad_norm": 1.3004735708236694, "learning_rate": 0.000999592335915206, "loss": 3.9466, "step": 50 }, { "epoch": 0.0037369207772795215, "grad_norm": 1.1279394626617432, "learning_rate": 0.0009995498709063732, "loss": 3.9202, "step": 55 }, { "epoch": 0.0040766408479412965, "grad_norm": 1.5336614847183228, "learning_rate": 0.0009995074058975404, "loss": 3.8324, "step": 60 }, { "epoch": 0.0044163609186030715, "grad_norm": 0.9337885975837708, "learning_rate": 0.0009994649408887077, "loss": 3.8142, "step": 65 }, { "epoch": 0.004756080989264846, "grad_norm": 0.8880303502082825, "learning_rate": 0.000999422475879875, "loss": 4.065, "step": 70 }, { "epoch": 0.005095801059926621, "grad_norm": 1.0942732095718384, "learning_rate": 0.0009993800108710421, "loss": 3.6652, "step": 75 }, { "epoch": 0.005435521130588396, "grad_norm": 1.1259429454803467, "learning_rate": 0.0009993375458622097, "loss": 3.6717, "step": 80 }, { "epoch": 0.00577524120125017, "grad_norm": 0.8570499420166016, "learning_rate": 0.0009992950808533768, "loss": 3.6509, "step": 85 }, { "epoch": 0.006114961271911945, "grad_norm": 1.2053712606430054, "learning_rate": 0.0009992526158445442, "loss": 3.7417, "step": 90 }, { "epoch": 0.006454681342573719, "grad_norm": 0.9718337059020996, "learning_rate": 0.0009992101508357115, "loss": 3.98, "step": 95 }, { "epoch": 0.006794401413235494, "grad_norm": 1.1109780073165894, "learning_rate": 0.0009991676858268786, "loss": 3.8899, "step": 100 }, { "epoch": 0.007134121483897269, "grad_norm": 0.9223835468292236, "learning_rate": 0.000999125220818046, "loss": 3.4693, "step": 105 }, { "epoch": 0.007473841554559043, "grad_norm": 1.0440263748168945, "learning_rate": 0.0009990827558092133, "loss": 3.8803, "step": 110 }, { "epoch": 0.007813561625220818, "grad_norm": 0.898756206035614, "learning_rate": 0.0009990402908003806, "loss": 3.8081, "step": 115 }, { "epoch": 0.008153281695882593, "grad_norm": 1.2545490264892578, "learning_rate": 0.0009989978257915477, "loss": 4.0245, "step": 120 }, { "epoch": 0.008493001766544368, "grad_norm": 0.9366511106491089, "learning_rate": 0.000998955360782715, "loss": 3.8386, "step": 125 }, { "epoch": 0.008832721837206143, "grad_norm": 1.4074876308441162, "learning_rate": 0.0009989128957738824, "loss": 3.7506, "step": 130 }, { "epoch": 0.009172441907867916, "grad_norm": 1.3254245519638062, "learning_rate": 0.0009988704307650495, "loss": 3.7469, "step": 135 }, { "epoch": 0.009512161978529691, "grad_norm": 2.6460890769958496, "learning_rate": 0.0009988279657562168, "loss": 3.6807, "step": 140 }, { "epoch": 0.009851882049191466, "grad_norm": 0.9989180564880371, "learning_rate": 0.0009987855007473842, "loss": 3.6063, "step": 145 }, { "epoch": 0.010191602119853241, "grad_norm": 1.138648271560669, "learning_rate": 0.0009987430357385515, "loss": 3.8185, "step": 150 }, { "epoch": 0.010531322190515016, "grad_norm": 1.4436720609664917, "learning_rate": 0.0009987005707297188, "loss": 3.7778, "step": 155 }, { "epoch": 0.010871042261176791, "grad_norm": 1.167515754699707, "learning_rate": 0.000998658105720886, "loss": 3.854, "step": 160 }, { "epoch": 0.011210762331838564, "grad_norm": 1.1333575248718262, "learning_rate": 0.0009986156407120533, "loss": 3.7403, "step": 165 }, { "epoch": 0.01155048240250034, "grad_norm": 1.7826014757156372, "learning_rate": 0.0009985731757032206, "loss": 3.8134, "step": 170 }, { "epoch": 0.011890202473162114, "grad_norm": 1.1271919012069702, "learning_rate": 0.0009985307106943877, "loss": 3.7271, "step": 175 }, { "epoch": 0.01222992254382389, "grad_norm": 1.3732924461364746, "learning_rate": 0.000998488245685555, "loss": 3.8299, "step": 180 }, { "epoch": 0.012569642614485664, "grad_norm": 1.1746324300765991, "learning_rate": 0.0009984457806767224, "loss": 3.8285, "step": 185 }, { "epoch": 0.012909362685147438, "grad_norm": 1.0174121856689453, "learning_rate": 0.0009984033156678898, "loss": 3.7326, "step": 190 }, { "epoch": 0.013249082755809213, "grad_norm": 1.2099727392196655, "learning_rate": 0.0009983608506590569, "loss": 3.963, "step": 195 }, { "epoch": 0.013588802826470988, "grad_norm": 1.0665942430496216, "learning_rate": 0.0009983183856502242, "loss": 3.7627, "step": 200 }, { "epoch": 0.013928522897132763, "grad_norm": 1.0860059261322021, "learning_rate": 0.0009982759206413915, "loss": 3.8543, "step": 205 }, { "epoch": 0.014268242967794538, "grad_norm": 1.1611007452011108, "learning_rate": 0.0009982334556325587, "loss": 3.5513, "step": 210 }, { "epoch": 0.014607963038456313, "grad_norm": 0.9061139822006226, "learning_rate": 0.000998190990623726, "loss": 3.7342, "step": 215 }, { "epoch": 0.014947683109118086, "grad_norm": 0.9131039381027222, "learning_rate": 0.0009981485256148933, "loss": 3.7693, "step": 220 }, { "epoch": 0.015287403179779861, "grad_norm": 0.991196870803833, "learning_rate": 0.0009981060606060607, "loss": 3.7196, "step": 225 }, { "epoch": 0.015627123250441636, "grad_norm": 0.9212251901626587, "learning_rate": 0.000998063595597228, "loss": 3.649, "step": 230 }, { "epoch": 0.01596684332110341, "grad_norm": 1.1009888648986816, "learning_rate": 0.0009980211305883951, "loss": 3.7614, "step": 235 }, { "epoch": 0.016306563391765186, "grad_norm": 1.2919204235076904, "learning_rate": 0.0009979786655795624, "loss": 3.5449, "step": 240 }, { "epoch": 0.01664628346242696, "grad_norm": 1.0719887018203735, "learning_rate": 0.0009979362005707298, "loss": 3.6127, "step": 245 }, { "epoch": 0.016986003533088736, "grad_norm": 0.8586204648017883, "learning_rate": 0.000997893735561897, "loss": 3.784, "step": 250 }, { "epoch": 0.01732572360375051, "grad_norm": 1.2499065399169922, "learning_rate": 0.0009978512705530644, "loss": 3.6152, "step": 255 }, { "epoch": 0.017665443674412286, "grad_norm": 1.4381811618804932, "learning_rate": 0.0009978088055442316, "loss": 3.8925, "step": 260 }, { "epoch": 0.01800516374507406, "grad_norm": 1.0378837585449219, "learning_rate": 0.000997766340535399, "loss": 4.0506, "step": 265 }, { "epoch": 0.018344883815735832, "grad_norm": 1.3113740682601929, "learning_rate": 0.0009977238755265662, "loss": 3.6895, "step": 270 }, { "epoch": 0.01868460388639761, "grad_norm": 0.9968258142471313, "learning_rate": 0.0009976814105177334, "loss": 3.5497, "step": 275 }, { "epoch": 0.019024323957059382, "grad_norm": 0.9528198838233948, "learning_rate": 0.0009976389455089007, "loss": 3.6809, "step": 280 }, { "epoch": 0.01936404402772116, "grad_norm": 0.8875699043273926, "learning_rate": 0.000997596480500068, "loss": 3.6897, "step": 285 }, { "epoch": 0.019703764098382932, "grad_norm": 0.9936535954475403, "learning_rate": 0.0009975540154912354, "loss": 3.7821, "step": 290 }, { "epoch": 0.020043484169044706, "grad_norm": 1.119369626045227, "learning_rate": 0.0009975115504824025, "loss": 3.7424, "step": 295 }, { "epoch": 0.020383204239706482, "grad_norm": 1.1052080392837524, "learning_rate": 0.0009974690854735698, "loss": 3.4608, "step": 300 }, { "epoch": 0.020722924310368256, "grad_norm": 1.5735164880752563, "learning_rate": 0.0009974266204647371, "loss": 3.8221, "step": 305 }, { "epoch": 0.021062644381030032, "grad_norm": 1.1015493869781494, "learning_rate": 0.0009973841554559043, "loss": 3.5144, "step": 310 }, { "epoch": 0.021402364451691806, "grad_norm": 1.1641970872879028, "learning_rate": 0.0009973416904470716, "loss": 3.9263, "step": 315 }, { "epoch": 0.021742084522353582, "grad_norm": 1.3643523454666138, "learning_rate": 0.000997299225438239, "loss": 3.668, "step": 320 }, { "epoch": 0.022081804593015356, "grad_norm": 1.2095694541931152, "learning_rate": 0.0009972567604294063, "loss": 3.5615, "step": 325 }, { "epoch": 0.02242152466367713, "grad_norm": 1.0691255331039429, "learning_rate": 0.0009972142954205736, "loss": 3.4005, "step": 330 }, { "epoch": 0.022761244734338906, "grad_norm": 0.8936980962753296, "learning_rate": 0.0009971718304117407, "loss": 3.8179, "step": 335 }, { "epoch": 0.02310096480500068, "grad_norm": 1.394195795059204, "learning_rate": 0.000997129365402908, "loss": 3.7532, "step": 340 }, { "epoch": 0.023440684875662456, "grad_norm": 1.5133311748504639, "learning_rate": 0.0009970869003940754, "loss": 3.7444, "step": 345 }, { "epoch": 0.02378040494632423, "grad_norm": 1.5607279539108276, "learning_rate": 0.0009970444353852425, "loss": 3.6198, "step": 350 }, { "epoch": 0.024120125016986002, "grad_norm": 1.2099229097366333, "learning_rate": 0.0009970019703764098, "loss": 3.9534, "step": 355 }, { "epoch": 0.02445984508764778, "grad_norm": 1.1680022478103638, "learning_rate": 0.0009969595053675772, "loss": 3.7101, "step": 360 }, { "epoch": 0.024799565158309552, "grad_norm": 1.2553199529647827, "learning_rate": 0.0009969170403587445, "loss": 3.7904, "step": 365 }, { "epoch": 0.02513928522897133, "grad_norm": 1.1269673109054565, "learning_rate": 0.0009968745753499116, "loss": 3.7832, "step": 370 }, { "epoch": 0.025479005299633102, "grad_norm": 1.2495825290679932, "learning_rate": 0.000996832110341079, "loss": 3.6187, "step": 375 }, { "epoch": 0.025818725370294875, "grad_norm": 1.1159908771514893, "learning_rate": 0.0009967896453322463, "loss": 3.8536, "step": 380 }, { "epoch": 0.026158445440956652, "grad_norm": 1.6140507459640503, "learning_rate": 0.0009967471803234134, "loss": 3.8515, "step": 385 }, { "epoch": 0.026498165511618425, "grad_norm": 1.0727026462554932, "learning_rate": 0.0009967047153145807, "loss": 3.5245, "step": 390 }, { "epoch": 0.026837885582280202, "grad_norm": 1.158215880393982, "learning_rate": 0.000996662250305748, "loss": 3.3556, "step": 395 }, { "epoch": 0.027177605652941975, "grad_norm": 1.1565126180648804, "learning_rate": 0.0009966197852969154, "loss": 3.5975, "step": 400 }, { "epoch": 0.027517325723603752, "grad_norm": 1.3182094097137451, "learning_rate": 0.0009965773202880827, "loss": 3.5545, "step": 405 }, { "epoch": 0.027857045794265525, "grad_norm": 1.1476361751556396, "learning_rate": 0.0009965348552792499, "loss": 3.8392, "step": 410 }, { "epoch": 0.0281967658649273, "grad_norm": 1.440959095954895, "learning_rate": 0.0009964923902704172, "loss": 3.84, "step": 415 }, { "epoch": 0.028536485935589075, "grad_norm": 1.5907877683639526, "learning_rate": 0.0009964499252615845, "loss": 3.6429, "step": 420 }, { "epoch": 0.02887620600625085, "grad_norm": 1.3278900384902954, "learning_rate": 0.0009964074602527516, "loss": 3.871, "step": 425 }, { "epoch": 0.029215926076912625, "grad_norm": 1.2859190702438354, "learning_rate": 0.000996364995243919, "loss": 3.8477, "step": 430 }, { "epoch": 0.0295556461475744, "grad_norm": 1.2288625240325928, "learning_rate": 0.0009963225302350863, "loss": 3.6946, "step": 435 }, { "epoch": 0.029895366218236172, "grad_norm": 1.1073403358459473, "learning_rate": 0.0009962800652262536, "loss": 3.7043, "step": 440 }, { "epoch": 0.03023508628889795, "grad_norm": 1.3657788038253784, "learning_rate": 0.0009962376002174208, "loss": 3.552, "step": 445 }, { "epoch": 0.030574806359559722, "grad_norm": 1.1544911861419678, "learning_rate": 0.000996195135208588, "loss": 3.4309, "step": 450 }, { "epoch": 0.0309145264302215, "grad_norm": 1.0650800466537476, "learning_rate": 0.0009961526701997554, "loss": 3.6686, "step": 455 }, { "epoch": 0.03125424650088327, "grad_norm": 1.1208282709121704, "learning_rate": 0.0009961102051909226, "loss": 4.0892, "step": 460 }, { "epoch": 0.03159396657154505, "grad_norm": 1.5906338691711426, "learning_rate": 0.00099606774018209, "loss": 3.6993, "step": 465 }, { "epoch": 0.03193368664220682, "grad_norm": 1.2549779415130615, "learning_rate": 0.0009960252751732572, "loss": 3.7224, "step": 470 }, { "epoch": 0.032273406712868595, "grad_norm": 1.3970061540603638, "learning_rate": 0.0009959828101644246, "loss": 3.8228, "step": 475 }, { "epoch": 0.03261312678353037, "grad_norm": 1.626342535018921, "learning_rate": 0.0009959403451555919, "loss": 3.5202, "step": 480 }, { "epoch": 0.03295284685419215, "grad_norm": 1.3544015884399414, "learning_rate": 0.000995897880146759, "loss": 3.5905, "step": 485 }, { "epoch": 0.03329256692485392, "grad_norm": 1.2479504346847534, "learning_rate": 0.0009958554151379263, "loss": 3.5942, "step": 490 }, { "epoch": 0.033632286995515695, "grad_norm": 1.2742217779159546, "learning_rate": 0.0009958129501290937, "loss": 3.7007, "step": 495 }, { "epoch": 0.03397200706617747, "grad_norm": 1.3801411390304565, "learning_rate": 0.000995770485120261, "loss": 3.5685, "step": 500 }, { "epoch": 0.03431172713683924, "grad_norm": 1.1981620788574219, "learning_rate": 0.0009957280201114281, "loss": 3.6588, "step": 505 }, { "epoch": 0.03465144720750102, "grad_norm": 1.5005124807357788, "learning_rate": 0.0009956855551025955, "loss": 3.7866, "step": 510 }, { "epoch": 0.034991167278162795, "grad_norm": 1.4442976713180542, "learning_rate": 0.0009956430900937628, "loss": 3.6142, "step": 515 }, { "epoch": 0.03533088734882457, "grad_norm": 1.6556382179260254, "learning_rate": 0.00099560062508493, "loss": 3.7093, "step": 520 }, { "epoch": 0.03567060741948634, "grad_norm": 1.0017484426498413, "learning_rate": 0.0009955581600760972, "loss": 3.6034, "step": 525 }, { "epoch": 0.03601032749014812, "grad_norm": 1.2169318199157715, "learning_rate": 0.0009955156950672646, "loss": 3.4087, "step": 530 }, { "epoch": 0.036350047560809895, "grad_norm": 1.4535455703735352, "learning_rate": 0.000995473230058432, "loss": 3.7162, "step": 535 }, { "epoch": 0.036689767631471665, "grad_norm": 1.1526368856430054, "learning_rate": 0.0009954307650495992, "loss": 3.5617, "step": 540 }, { "epoch": 0.03702948770213344, "grad_norm": 1.6828385591506958, "learning_rate": 0.0009953883000407664, "loss": 3.679, "step": 545 }, { "epoch": 0.03736920777279522, "grad_norm": 1.231004238128662, "learning_rate": 0.0009953458350319337, "loss": 3.867, "step": 550 }, { "epoch": 0.03770892784345699, "grad_norm": 1.30239737033844, "learning_rate": 0.000995303370023101, "loss": 3.6774, "step": 555 }, { "epoch": 0.038048647914118765, "grad_norm": 1.327228307723999, "learning_rate": 0.0009952609050142682, "loss": 3.6751, "step": 560 }, { "epoch": 0.03838836798478054, "grad_norm": 1.454568862915039, "learning_rate": 0.0009952184400054355, "loss": 3.6821, "step": 565 }, { "epoch": 0.03872808805544232, "grad_norm": 1.295784592628479, "learning_rate": 0.0009951759749966028, "loss": 3.8395, "step": 570 }, { "epoch": 0.03906780812610409, "grad_norm": 1.160823106765747, "learning_rate": 0.0009951335099877702, "loss": 3.8608, "step": 575 }, { "epoch": 0.039407528196765865, "grad_norm": 1.1928030252456665, "learning_rate": 0.0009950910449789373, "loss": 3.5837, "step": 580 }, { "epoch": 0.03974724826742764, "grad_norm": 1.2153632640838623, "learning_rate": 0.0009950485799701046, "loss": 3.5116, "step": 585 }, { "epoch": 0.04008696833808941, "grad_norm": 1.3261703252792358, "learning_rate": 0.000995006114961272, "loss": 3.4671, "step": 590 }, { "epoch": 0.04042668840875119, "grad_norm": 1.2513736486434937, "learning_rate": 0.0009949636499524393, "loss": 3.5266, "step": 595 }, { "epoch": 0.040766408479412965, "grad_norm": 1.222854495048523, "learning_rate": 0.0009949211849436066, "loss": 3.6555, "step": 600 }, { "epoch": 0.04110612855007474, "grad_norm": 1.4421522617340088, "learning_rate": 0.0009948787199347737, "loss": 3.6468, "step": 605 }, { "epoch": 0.04144584862073651, "grad_norm": 1.3831154108047485, "learning_rate": 0.000994836254925941, "loss": 3.7498, "step": 610 }, { "epoch": 0.04178556869139829, "grad_norm": 1.3285719156265259, "learning_rate": 0.0009947937899171084, "loss": 3.655, "step": 615 }, { "epoch": 0.042125288762060065, "grad_norm": 1.1212157011032104, "learning_rate": 0.0009947513249082755, "loss": 3.4706, "step": 620 }, { "epoch": 0.042465008832721834, "grad_norm": 1.026444673538208, "learning_rate": 0.0009947088598994428, "loss": 3.3177, "step": 625 }, { "epoch": 0.04280472890338361, "grad_norm": 1.3012080192565918, "learning_rate": 0.0009946663948906102, "loss": 3.5394, "step": 630 }, { "epoch": 0.04314444897404539, "grad_norm": 1.199526309967041, "learning_rate": 0.0009946239298817775, "loss": 3.5663, "step": 635 }, { "epoch": 0.043484169044707165, "grad_norm": 1.1827527284622192, "learning_rate": 0.0009945814648729449, "loss": 3.607, "step": 640 }, { "epoch": 0.043823889115368934, "grad_norm": 1.3272690773010254, "learning_rate": 0.000994538999864112, "loss": 3.8542, "step": 645 }, { "epoch": 0.04416360918603071, "grad_norm": 1.151440978050232, "learning_rate": 0.0009944965348552793, "loss": 3.7264, "step": 650 }, { "epoch": 0.04450332925669249, "grad_norm": 1.1391223669052124, "learning_rate": 0.0009944540698464466, "loss": 3.5499, "step": 655 }, { "epoch": 0.04484304932735426, "grad_norm": 1.4500011205673218, "learning_rate": 0.0009944116048376138, "loss": 3.6039, "step": 660 }, { "epoch": 0.045182769398016034, "grad_norm": 1.1605967283248901, "learning_rate": 0.000994369139828781, "loss": 3.5574, "step": 665 }, { "epoch": 0.04552248946867781, "grad_norm": 1.2545030117034912, "learning_rate": 0.0009943266748199484, "loss": 3.8259, "step": 670 }, { "epoch": 0.04586220953933958, "grad_norm": 1.613290786743164, "learning_rate": 0.0009942842098111158, "loss": 3.7226, "step": 675 }, { "epoch": 0.04620192961000136, "grad_norm": 1.407639741897583, "learning_rate": 0.0009942417448022829, "loss": 3.6507, "step": 680 }, { "epoch": 0.046541649680663134, "grad_norm": 1.3407589197158813, "learning_rate": 0.0009941992797934502, "loss": 3.7412, "step": 685 }, { "epoch": 0.04688136975132491, "grad_norm": 1.3134586811065674, "learning_rate": 0.0009941568147846175, "loss": 3.6294, "step": 690 }, { "epoch": 0.04722108982198668, "grad_norm": 1.2299553155899048, "learning_rate": 0.0009941143497757847, "loss": 3.7411, "step": 695 }, { "epoch": 0.04756080989264846, "grad_norm": 0.9996513724327087, "learning_rate": 0.000994071884766952, "loss": 3.708, "step": 700 }, { "epoch": 0.047900529963310234, "grad_norm": 1.2299960851669312, "learning_rate": 0.0009940294197581193, "loss": 3.7318, "step": 705 }, { "epoch": 0.048240250033972004, "grad_norm": 1.1426419019699097, "learning_rate": 0.0009939869547492867, "loss": 3.67, "step": 710 }, { "epoch": 0.04857997010463378, "grad_norm": 1.0688164234161377, "learning_rate": 0.000993944489740454, "loss": 3.6111, "step": 715 }, { "epoch": 0.04891969017529556, "grad_norm": 1.2216670513153076, "learning_rate": 0.0009939020247316211, "loss": 3.6257, "step": 720 }, { "epoch": 0.049259410245957334, "grad_norm": 1.3033989667892456, "learning_rate": 0.0009938595597227884, "loss": 3.5072, "step": 725 }, { "epoch": 0.049599130316619104, "grad_norm": 1.662177324295044, "learning_rate": 0.0009938170947139558, "loss": 3.524, "step": 730 }, { "epoch": 0.04993885038728088, "grad_norm": 1.666594386100769, "learning_rate": 0.000993774629705123, "loss": 3.5621, "step": 735 }, { "epoch": 0.05027857045794266, "grad_norm": 1.6109585762023926, "learning_rate": 0.0009937321646962902, "loss": 3.8299, "step": 740 }, { "epoch": 0.05061829052860443, "grad_norm": 1.2702183723449707, "learning_rate": 0.0009936896996874576, "loss": 3.4591, "step": 745 }, { "epoch": 0.050958010599266204, "grad_norm": 1.4666458368301392, "learning_rate": 0.000993647234678625, "loss": 3.6259, "step": 750 }, { "epoch": 0.05129773066992798, "grad_norm": 1.4131566286087036, "learning_rate": 0.000993604769669792, "loss": 3.6377, "step": 755 }, { "epoch": 0.05163745074058975, "grad_norm": 1.4959462881088257, "learning_rate": 0.0009935623046609594, "loss": 3.7686, "step": 760 }, { "epoch": 0.05197717081125153, "grad_norm": 1.3981842994689941, "learning_rate": 0.0009935198396521267, "loss": 3.5853, "step": 765 }, { "epoch": 0.052316890881913304, "grad_norm": 1.3965612649917603, "learning_rate": 0.0009934773746432938, "loss": 3.5535, "step": 770 }, { "epoch": 0.05265661095257508, "grad_norm": 1.2661561965942383, "learning_rate": 0.0009934349096344614, "loss": 3.7495, "step": 775 }, { "epoch": 0.05299633102323685, "grad_norm": 1.2751635313034058, "learning_rate": 0.0009933924446256285, "loss": 3.7802, "step": 780 }, { "epoch": 0.05333605109389863, "grad_norm": 1.3632445335388184, "learning_rate": 0.0009933499796167958, "loss": 3.5762, "step": 785 }, { "epoch": 0.053675771164560404, "grad_norm": 1.3534399271011353, "learning_rate": 0.0009933075146079631, "loss": 3.6723, "step": 790 }, { "epoch": 0.054015491235222174, "grad_norm": 1.230275273323059, "learning_rate": 0.0009932650495991303, "loss": 3.5191, "step": 795 }, { "epoch": 0.05435521130588395, "grad_norm": 1.381050705909729, "learning_rate": 0.0009932225845902976, "loss": 3.888, "step": 800 }, { "epoch": 0.05469493137654573, "grad_norm": 1.2148298025131226, "learning_rate": 0.000993180119581465, "loss": 3.9263, "step": 805 }, { "epoch": 0.055034651447207504, "grad_norm": 1.259735345840454, "learning_rate": 0.0009931376545726323, "loss": 3.6659, "step": 810 }, { "epoch": 0.055374371517869274, "grad_norm": 1.7731395959854126, "learning_rate": 0.0009930951895637994, "loss": 3.7216, "step": 815 }, { "epoch": 0.05571409158853105, "grad_norm": 1.9439359903335571, "learning_rate": 0.0009930527245549667, "loss": 3.5367, "step": 820 }, { "epoch": 0.05605381165919283, "grad_norm": 1.0209859609603882, "learning_rate": 0.000993010259546134, "loss": 3.5674, "step": 825 }, { "epoch": 0.0563935317298546, "grad_norm": 1.2520893812179565, "learning_rate": 0.0009929677945373012, "loss": 3.4791, "step": 830 }, { "epoch": 0.056733251800516374, "grad_norm": 1.531401515007019, "learning_rate": 0.0009929253295284685, "loss": 3.6029, "step": 835 }, { "epoch": 0.05707297187117815, "grad_norm": 1.1550298929214478, "learning_rate": 0.0009928828645196358, "loss": 3.8208, "step": 840 }, { "epoch": 0.05741269194183993, "grad_norm": 1.3272373676300049, "learning_rate": 0.0009928403995108032, "loss": 3.7942, "step": 845 }, { "epoch": 0.0577524120125017, "grad_norm": 1.518879771232605, "learning_rate": 0.0009927979345019705, "loss": 3.6628, "step": 850 }, { "epoch": 0.058092132083163474, "grad_norm": 1.2354364395141602, "learning_rate": 0.0009927554694931376, "loss": 3.6825, "step": 855 }, { "epoch": 0.05843185215382525, "grad_norm": 1.211795449256897, "learning_rate": 0.000992713004484305, "loss": 3.3405, "step": 860 }, { "epoch": 0.05877157222448702, "grad_norm": 1.5916142463684082, "learning_rate": 0.0009926705394754723, "loss": 3.7159, "step": 865 }, { "epoch": 0.0591112922951488, "grad_norm": 1.3939881324768066, "learning_rate": 0.0009926280744666394, "loss": 3.6395, "step": 870 }, { "epoch": 0.059451012365810574, "grad_norm": 1.2881855964660645, "learning_rate": 0.0009925856094578067, "loss": 3.3635, "step": 875 }, { "epoch": 0.059790732436472344, "grad_norm": 1.5193804502487183, "learning_rate": 0.000992543144448974, "loss": 3.6042, "step": 880 }, { "epoch": 0.06013045250713412, "grad_norm": 1.5227971076965332, "learning_rate": 0.0009925006794401414, "loss": 3.562, "step": 885 }, { "epoch": 0.0604701725777959, "grad_norm": 1.7681282758712769, "learning_rate": 0.0009924582144313085, "loss": 3.704, "step": 890 }, { "epoch": 0.060809892648457674, "grad_norm": 1.1892551183700562, "learning_rate": 0.0009924157494224759, "loss": 3.6903, "step": 895 }, { "epoch": 0.061149612719119444, "grad_norm": 1.5224246978759766, "learning_rate": 0.0009923732844136432, "loss": 3.7684, "step": 900 }, { "epoch": 0.06148933278978122, "grad_norm": 1.4574183225631714, "learning_rate": 0.0009923308194048103, "loss": 3.6532, "step": 905 }, { "epoch": 0.061829052860443, "grad_norm": 1.2628153562545776, "learning_rate": 0.0009922883543959776, "loss": 3.4596, "step": 910 }, { "epoch": 0.06216877293110477, "grad_norm": 1.5721406936645508, "learning_rate": 0.000992245889387145, "loss": 3.8238, "step": 915 }, { "epoch": 0.06250849300176654, "grad_norm": 1.5457618236541748, "learning_rate": 0.0009922034243783123, "loss": 3.719, "step": 920 }, { "epoch": 0.06284821307242831, "grad_norm": 1.4470906257629395, "learning_rate": 0.0009921609593694797, "loss": 3.6055, "step": 925 }, { "epoch": 0.0631879331430901, "grad_norm": 1.587275743484497, "learning_rate": 0.0009921184943606468, "loss": 3.7707, "step": 930 }, { "epoch": 0.06352765321375187, "grad_norm": 1.2567776441574097, "learning_rate": 0.000992076029351814, "loss": 3.8722, "step": 935 }, { "epoch": 0.06386737328441364, "grad_norm": 1.046470046043396, "learning_rate": 0.0009920335643429814, "loss": 3.4342, "step": 940 }, { "epoch": 0.06420709335507542, "grad_norm": 1.2958343029022217, "learning_rate": 0.0009919910993341486, "loss": 3.5903, "step": 945 }, { "epoch": 0.06454681342573719, "grad_norm": 1.2214195728302002, "learning_rate": 0.000991948634325316, "loss": 3.5775, "step": 950 }, { "epoch": 0.06488653349639897, "grad_norm": 1.409933090209961, "learning_rate": 0.0009919061693164832, "loss": 3.6414, "step": 955 }, { "epoch": 0.06522625356706074, "grad_norm": 1.4713728427886963, "learning_rate": 0.0009918637043076506, "loss": 3.7683, "step": 960 }, { "epoch": 0.06556597363772251, "grad_norm": 1.2211861610412598, "learning_rate": 0.000991821239298818, "loss": 3.6893, "step": 965 }, { "epoch": 0.0659056937083843, "grad_norm": 1.9625893831253052, "learning_rate": 0.000991778774289985, "loss": 3.9089, "step": 970 }, { "epoch": 0.06624541377904607, "grad_norm": 1.4359312057495117, "learning_rate": 0.0009917363092811523, "loss": 3.8377, "step": 975 }, { "epoch": 0.06658513384970784, "grad_norm": 1.347298264503479, "learning_rate": 0.0009916938442723197, "loss": 3.6799, "step": 980 }, { "epoch": 0.06692485392036962, "grad_norm": 1.4500139951705933, "learning_rate": 0.000991651379263487, "loss": 3.7232, "step": 985 }, { "epoch": 0.06726457399103139, "grad_norm": 1.3447253704071045, "learning_rate": 0.0009916089142546541, "loss": 3.636, "step": 990 }, { "epoch": 0.06760429406169316, "grad_norm": 1.168601632118225, "learning_rate": 0.0009915664492458215, "loss": 3.5297, "step": 995 }, { "epoch": 0.06794401413235494, "grad_norm": 1.690107822418213, "learning_rate": 0.0009915239842369888, "loss": 3.6712, "step": 1000 }, { "epoch": 0.06828373420301671, "grad_norm": 1.4340829849243164, "learning_rate": 0.000991481519228156, "loss": 3.6223, "step": 1005 }, { "epoch": 0.06862345427367848, "grad_norm": 1.333815336227417, "learning_rate": 0.0009914390542193233, "loss": 3.3882, "step": 1010 }, { "epoch": 0.06896317434434027, "grad_norm": 1.397036075592041, "learning_rate": 0.0009913965892104906, "loss": 3.6911, "step": 1015 }, { "epoch": 0.06930289441500204, "grad_norm": 1.087192177772522, "learning_rate": 0.000991354124201658, "loss": 3.4764, "step": 1020 }, { "epoch": 0.0696426144856638, "grad_norm": 1.3069367408752441, "learning_rate": 0.0009913116591928253, "loss": 3.5462, "step": 1025 }, { "epoch": 0.06998233455632559, "grad_norm": 1.2607848644256592, "learning_rate": 0.0009912691941839924, "loss": 3.7122, "step": 1030 }, { "epoch": 0.07032205462698736, "grad_norm": 1.6250845193862915, "learning_rate": 0.0009912267291751597, "loss": 3.5876, "step": 1035 }, { "epoch": 0.07066177469764914, "grad_norm": 1.6649497747421265, "learning_rate": 0.000991184264166327, "loss": 3.6147, "step": 1040 }, { "epoch": 0.07100149476831091, "grad_norm": 1.8978240489959717, "learning_rate": 0.0009911417991574942, "loss": 3.6061, "step": 1045 }, { "epoch": 0.07134121483897268, "grad_norm": 1.5587214231491089, "learning_rate": 0.0009910993341486615, "loss": 3.9562, "step": 1050 }, { "epoch": 0.07168093490963447, "grad_norm": 1.5667973756790161, "learning_rate": 0.0009910568691398288, "loss": 3.4359, "step": 1055 }, { "epoch": 0.07202065498029624, "grad_norm": 1.464034914970398, "learning_rate": 0.0009910144041309962, "loss": 3.4411, "step": 1060 }, { "epoch": 0.072360375050958, "grad_norm": 1.238040804862976, "learning_rate": 0.0009909719391221633, "loss": 3.3294, "step": 1065 }, { "epoch": 0.07270009512161979, "grad_norm": 1.8032987117767334, "learning_rate": 0.0009909294741133306, "loss": 3.9244, "step": 1070 }, { "epoch": 0.07303981519228156, "grad_norm": 1.5286312103271484, "learning_rate": 0.000990887009104498, "loss": 3.5037, "step": 1075 }, { "epoch": 0.07337953526294333, "grad_norm": 1.5415940284729004, "learning_rate": 0.000990844544095665, "loss": 3.855, "step": 1080 }, { "epoch": 0.07371925533360511, "grad_norm": 1.4576892852783203, "learning_rate": 0.0009908020790868326, "loss": 3.6102, "step": 1085 }, { "epoch": 0.07405897540426688, "grad_norm": 1.5198206901550293, "learning_rate": 0.0009907596140779997, "loss": 3.7505, "step": 1090 }, { "epoch": 0.07439869547492865, "grad_norm": 1.630894660949707, "learning_rate": 0.000990717149069167, "loss": 3.7055, "step": 1095 }, { "epoch": 0.07473841554559044, "grad_norm": 1.4341453313827515, "learning_rate": 0.0009906746840603344, "loss": 3.3602, "step": 1100 }, { "epoch": 0.0750781356162522, "grad_norm": 1.4807463884353638, "learning_rate": 0.0009906322190515015, "loss": 3.8356, "step": 1105 }, { "epoch": 0.07541785568691398, "grad_norm": 1.3436760902404785, "learning_rate": 0.0009905897540426689, "loss": 3.5441, "step": 1110 }, { "epoch": 0.07575757575757576, "grad_norm": 1.3255163431167603, "learning_rate": 0.0009905472890338362, "loss": 3.7281, "step": 1115 }, { "epoch": 0.07609729582823753, "grad_norm": 1.217935562133789, "learning_rate": 0.0009905048240250035, "loss": 3.5349, "step": 1120 }, { "epoch": 0.07643701589889931, "grad_norm": 1.3292466402053833, "learning_rate": 0.0009904623590161706, "loss": 3.6247, "step": 1125 }, { "epoch": 0.07677673596956108, "grad_norm": 1.673684000968933, "learning_rate": 0.000990419894007338, "loss": 3.5391, "step": 1130 }, { "epoch": 0.07711645604022285, "grad_norm": 1.5734350681304932, "learning_rate": 0.0009903774289985053, "loss": 3.7004, "step": 1135 }, { "epoch": 0.07745617611088464, "grad_norm": 1.6199370622634888, "learning_rate": 0.0009903349639896724, "loss": 3.3966, "step": 1140 }, { "epoch": 0.0777958961815464, "grad_norm": 1.7481207847595215, "learning_rate": 0.0009902924989808398, "loss": 3.5104, "step": 1145 }, { "epoch": 0.07813561625220818, "grad_norm": 1.6226433515548706, "learning_rate": 0.000990250033972007, "loss": 3.5884, "step": 1150 }, { "epoch": 0.07847533632286996, "grad_norm": 1.473323106765747, "learning_rate": 0.0009902075689631744, "loss": 3.645, "step": 1155 }, { "epoch": 0.07881505639353173, "grad_norm": 1.4731993675231934, "learning_rate": 0.0009901651039543418, "loss": 3.4521, "step": 1160 }, { "epoch": 0.0791547764641935, "grad_norm": 1.5967803001403809, "learning_rate": 0.0009901226389455089, "loss": 3.7706, "step": 1165 }, { "epoch": 0.07949449653485528, "grad_norm": 1.4581245183944702, "learning_rate": 0.0009900801739366762, "loss": 3.702, "step": 1170 }, { "epoch": 0.07983421660551705, "grad_norm": 1.3496938943862915, "learning_rate": 0.0009900377089278435, "loss": 3.454, "step": 1175 }, { "epoch": 0.08017393667617882, "grad_norm": 1.3844796419143677, "learning_rate": 0.0009899952439190107, "loss": 3.9697, "step": 1180 }, { "epoch": 0.0805136567468406, "grad_norm": 1.070082187652588, "learning_rate": 0.000989952778910178, "loss": 3.5826, "step": 1185 }, { "epoch": 0.08085337681750238, "grad_norm": 1.3958414793014526, "learning_rate": 0.0009899103139013453, "loss": 3.7031, "step": 1190 }, { "epoch": 0.08119309688816416, "grad_norm": 1.4403811693191528, "learning_rate": 0.0009898678488925127, "loss": 3.3262, "step": 1195 }, { "epoch": 0.08153281695882593, "grad_norm": 1.5377938747406006, "learning_rate": 0.0009898253838836798, "loss": 3.7577, "step": 1200 }, { "epoch": 0.0818725370294877, "grad_norm": 1.8992164134979248, "learning_rate": 0.0009897829188748471, "loss": 3.8494, "step": 1205 }, { "epoch": 0.08221225710014948, "grad_norm": 1.5007022619247437, "learning_rate": 0.0009897404538660145, "loss": 3.498, "step": 1210 }, { "epoch": 0.08255197717081125, "grad_norm": 1.5261448621749878, "learning_rate": 0.0009896979888571816, "loss": 3.4636, "step": 1215 }, { "epoch": 0.08289169724147302, "grad_norm": 1.3651412725448608, "learning_rate": 0.000989655523848349, "loss": 3.493, "step": 1220 }, { "epoch": 0.0832314173121348, "grad_norm": 1.2653255462646484, "learning_rate": 0.0009896130588395162, "loss": 3.5668, "step": 1225 }, { "epoch": 0.08357113738279658, "grad_norm": 1.4119739532470703, "learning_rate": 0.0009895705938306836, "loss": 3.8061, "step": 1230 }, { "epoch": 0.08391085745345835, "grad_norm": 1.5024135112762451, "learning_rate": 0.000989528128821851, "loss": 3.5818, "step": 1235 }, { "epoch": 0.08425057752412013, "grad_norm": 1.5987701416015625, "learning_rate": 0.000989485663813018, "loss": 3.6748, "step": 1240 }, { "epoch": 0.0845902975947819, "grad_norm": 1.4886471033096313, "learning_rate": 0.0009894431988041854, "loss": 3.4502, "step": 1245 }, { "epoch": 0.08493001766544367, "grad_norm": 1.7029004096984863, "learning_rate": 0.0009894007337953527, "loss": 3.7497, "step": 1250 }, { "epoch": 0.08526973773610545, "grad_norm": 1.3162719011306763, "learning_rate": 0.0009893582687865198, "loss": 3.7539, "step": 1255 }, { "epoch": 0.08560945780676722, "grad_norm": 1.466935634613037, "learning_rate": 0.0009893158037776871, "loss": 3.5212, "step": 1260 }, { "epoch": 0.08594917787742899, "grad_norm": 1.4282779693603516, "learning_rate": 0.0009892733387688545, "loss": 3.5972, "step": 1265 }, { "epoch": 0.08628889794809078, "grad_norm": 1.2548452615737915, "learning_rate": 0.0009892308737600218, "loss": 3.6504, "step": 1270 }, { "epoch": 0.08662861801875255, "grad_norm": 1.5985420942306519, "learning_rate": 0.0009891884087511891, "loss": 3.6354, "step": 1275 }, { "epoch": 0.08696833808941433, "grad_norm": 1.2446298599243164, "learning_rate": 0.0009891459437423563, "loss": 3.7276, "step": 1280 }, { "epoch": 0.0873080581600761, "grad_norm": 1.3400171995162964, "learning_rate": 0.0009891034787335236, "loss": 4.0505, "step": 1285 }, { "epoch": 0.08764777823073787, "grad_norm": 1.1235218048095703, "learning_rate": 0.000989061013724691, "loss": 3.5643, "step": 1290 }, { "epoch": 0.08798749830139965, "grad_norm": 1.3810266256332397, "learning_rate": 0.0009890185487158583, "loss": 3.6139, "step": 1295 }, { "epoch": 0.08832721837206142, "grad_norm": 1.3978073596954346, "learning_rate": 0.0009889760837070254, "loss": 3.8037, "step": 1300 }, { "epoch": 0.08866693844272319, "grad_norm": 1.458299160003662, "learning_rate": 0.0009889336186981927, "loss": 3.5989, "step": 1305 }, { "epoch": 0.08900665851338498, "grad_norm": 1.2256615161895752, "learning_rate": 0.00098889115368936, "loss": 3.7246, "step": 1310 }, { "epoch": 0.08934637858404675, "grad_norm": 1.860431432723999, "learning_rate": 0.0009888486886805272, "loss": 3.5397, "step": 1315 }, { "epoch": 0.08968609865470852, "grad_norm": 1.3502448797225952, "learning_rate": 0.0009888062236716945, "loss": 3.3012, "step": 1320 }, { "epoch": 0.0900258187253703, "grad_norm": 1.4615477323532104, "learning_rate": 0.0009887637586628618, "loss": 3.3653, "step": 1325 }, { "epoch": 0.09036553879603207, "grad_norm": 1.688084363937378, "learning_rate": 0.0009887212936540292, "loss": 3.6943, "step": 1330 }, { "epoch": 0.09070525886669384, "grad_norm": 1.8519351482391357, "learning_rate": 0.0009886788286451965, "loss": 3.7166, "step": 1335 }, { "epoch": 0.09104497893735562, "grad_norm": 1.4299932718276978, "learning_rate": 0.0009886363636363636, "loss": 3.7503, "step": 1340 }, { "epoch": 0.09138469900801739, "grad_norm": 1.2496356964111328, "learning_rate": 0.000988593898627531, "loss": 3.2945, "step": 1345 }, { "epoch": 0.09172441907867916, "grad_norm": 1.6564908027648926, "learning_rate": 0.0009885514336186983, "loss": 3.4076, "step": 1350 }, { "epoch": 0.09206413914934095, "grad_norm": 1.2229890823364258, "learning_rate": 0.0009885089686098654, "loss": 3.6832, "step": 1355 }, { "epoch": 0.09240385922000272, "grad_norm": 1.9372469186782837, "learning_rate": 0.0009884665036010327, "loss": 3.5988, "step": 1360 }, { "epoch": 0.0927435792906645, "grad_norm": 1.4550014734268188, "learning_rate": 0.0009884240385922, "loss": 3.6039, "step": 1365 }, { "epoch": 0.09308329936132627, "grad_norm": 1.4247316122055054, "learning_rate": 0.0009883815735833674, "loss": 3.7509, "step": 1370 }, { "epoch": 0.09342301943198804, "grad_norm": 1.33436918258667, "learning_rate": 0.0009883391085745345, "loss": 3.7646, "step": 1375 }, { "epoch": 0.09376273950264982, "grad_norm": 1.5869226455688477, "learning_rate": 0.0009882966435657019, "loss": 3.5285, "step": 1380 }, { "epoch": 0.09410245957331159, "grad_norm": 1.8075852394104004, "learning_rate": 0.0009882541785568692, "loss": 3.686, "step": 1385 }, { "epoch": 0.09444217964397336, "grad_norm": 1.3173834085464478, "learning_rate": 0.0009882117135480363, "loss": 3.6182, "step": 1390 }, { "epoch": 0.09478189971463515, "grad_norm": 1.3262351751327515, "learning_rate": 0.0009881692485392037, "loss": 3.5984, "step": 1395 }, { "epoch": 0.09512161978529692, "grad_norm": 1.4498381614685059, "learning_rate": 0.000988126783530371, "loss": 3.6288, "step": 1400 }, { "epoch": 0.09546133985595869, "grad_norm": 1.1565841436386108, "learning_rate": 0.0009880843185215383, "loss": 3.6519, "step": 1405 }, { "epoch": 0.09580105992662047, "grad_norm": 1.31528902053833, "learning_rate": 0.0009880418535127057, "loss": 3.6106, "step": 1410 }, { "epoch": 0.09614077999728224, "grad_norm": 1.378267765045166, "learning_rate": 0.0009879993885038728, "loss": 3.6776, "step": 1415 }, { "epoch": 0.09648050006794401, "grad_norm": 1.2516449689865112, "learning_rate": 0.0009879569234950401, "loss": 3.7058, "step": 1420 }, { "epoch": 0.09682022013860579, "grad_norm": 1.7370390892028809, "learning_rate": 0.0009879144584862074, "loss": 3.7196, "step": 1425 }, { "epoch": 0.09715994020926756, "grad_norm": 1.218294620513916, "learning_rate": 0.0009878719934773746, "loss": 3.5335, "step": 1430 }, { "epoch": 0.09749966027992933, "grad_norm": 1.6008306741714478, "learning_rate": 0.000987829528468542, "loss": 3.5454, "step": 1435 }, { "epoch": 0.09783938035059112, "grad_norm": 1.8871667385101318, "learning_rate": 0.0009877870634597092, "loss": 3.6162, "step": 1440 }, { "epoch": 0.09817910042125289, "grad_norm": 1.2360868453979492, "learning_rate": 0.0009877445984508766, "loss": 3.3812, "step": 1445 }, { "epoch": 0.09851882049191467, "grad_norm": 1.2359330654144287, "learning_rate": 0.0009877021334420437, "loss": 3.6129, "step": 1450 }, { "epoch": 0.09885854056257644, "grad_norm": 1.2828547954559326, "learning_rate": 0.000987659668433211, "loss": 3.6697, "step": 1455 }, { "epoch": 0.09919826063323821, "grad_norm": 1.296589970588684, "learning_rate": 0.0009876172034243783, "loss": 3.7068, "step": 1460 }, { "epoch": 0.09953798070389999, "grad_norm": 2.1917073726654053, "learning_rate": 0.0009875747384155455, "loss": 3.7059, "step": 1465 }, { "epoch": 0.09987770077456176, "grad_norm": 1.586691975593567, "learning_rate": 0.000987532273406713, "loss": 3.7489, "step": 1470 }, { "epoch": 0.10021742084522353, "grad_norm": 1.3245091438293457, "learning_rate": 0.0009874898083978801, "loss": 3.3967, "step": 1475 }, { "epoch": 0.10055714091588532, "grad_norm": 1.812783122062683, "learning_rate": 0.0009874473433890475, "loss": 3.7014, "step": 1480 }, { "epoch": 0.10089686098654709, "grad_norm": 1.4093703031539917, "learning_rate": 0.0009874048783802148, "loss": 3.6094, "step": 1485 }, { "epoch": 0.10123658105720885, "grad_norm": 1.320734977722168, "learning_rate": 0.000987362413371382, "loss": 3.63, "step": 1490 }, { "epoch": 0.10157630112787064, "grad_norm": 1.2479645013809204, "learning_rate": 0.0009873199483625493, "loss": 3.6259, "step": 1495 }, { "epoch": 0.10191602119853241, "grad_norm": 1.3046929836273193, "learning_rate": 0.0009872774833537166, "loss": 3.7909, "step": 1500 }, { "epoch": 0.10225574126919418, "grad_norm": 1.5730926990509033, "learning_rate": 0.000987235018344884, "loss": 3.7584, "step": 1505 }, { "epoch": 0.10259546133985596, "grad_norm": 1.4246537685394287, "learning_rate": 0.000987192553336051, "loss": 3.5091, "step": 1510 }, { "epoch": 0.10293518141051773, "grad_norm": 1.2808427810668945, "learning_rate": 0.0009871500883272184, "loss": 3.5409, "step": 1515 }, { "epoch": 0.1032749014811795, "grad_norm": 1.4402869939804077, "learning_rate": 0.0009871076233183857, "loss": 3.5679, "step": 1520 }, { "epoch": 0.10361462155184128, "grad_norm": 1.4754396677017212, "learning_rate": 0.0009870651583095528, "loss": 3.7747, "step": 1525 }, { "epoch": 0.10395434162250305, "grad_norm": 1.2941263914108276, "learning_rate": 0.0009870226933007202, "loss": 3.6041, "step": 1530 }, { "epoch": 0.10429406169316484, "grad_norm": 1.307724952697754, "learning_rate": 0.0009869802282918875, "loss": 3.7856, "step": 1535 }, { "epoch": 0.10463378176382661, "grad_norm": 1.547669768333435, "learning_rate": 0.0009869377632830548, "loss": 3.7343, "step": 1540 }, { "epoch": 0.10497350183448838, "grad_norm": 1.1245697736740112, "learning_rate": 0.0009868952982742222, "loss": 3.6043, "step": 1545 }, { "epoch": 0.10531322190515016, "grad_norm": 1.486863613128662, "learning_rate": 0.0009868528332653893, "loss": 3.676, "step": 1550 }, { "epoch": 0.10565294197581193, "grad_norm": 1.4561771154403687, "learning_rate": 0.0009868103682565566, "loss": 3.603, "step": 1555 }, { "epoch": 0.1059926620464737, "grad_norm": 1.5904299020767212, "learning_rate": 0.000986767903247724, "loss": 3.6425, "step": 1560 }, { "epoch": 0.10633238211713548, "grad_norm": 1.9807040691375732, "learning_rate": 0.000986725438238891, "loss": 3.6754, "step": 1565 }, { "epoch": 0.10667210218779725, "grad_norm": 1.5916246175765991, "learning_rate": 0.0009866829732300584, "loss": 3.5263, "step": 1570 }, { "epoch": 0.10701182225845902, "grad_norm": 1.5537859201431274, "learning_rate": 0.0009866405082212257, "loss": 3.7667, "step": 1575 }, { "epoch": 0.10735154232912081, "grad_norm": 1.2892439365386963, "learning_rate": 0.000986598043212393, "loss": 3.5962, "step": 1580 }, { "epoch": 0.10769126239978258, "grad_norm": 1.3464820384979248, "learning_rate": 0.0009865555782035602, "loss": 3.6533, "step": 1585 }, { "epoch": 0.10803098247044435, "grad_norm": 1.5733516216278076, "learning_rate": 0.0009865131131947275, "loss": 3.5006, "step": 1590 }, { "epoch": 0.10837070254110613, "grad_norm": 1.2690258026123047, "learning_rate": 0.0009864706481858949, "loss": 3.7499, "step": 1595 }, { "epoch": 0.1087104226117679, "grad_norm": 1.1233024597167969, "learning_rate": 0.000986428183177062, "loss": 3.5301, "step": 1600 }, { "epoch": 0.10905014268242967, "grad_norm": 1.537360668182373, "learning_rate": 0.0009863857181682295, "loss": 3.6255, "step": 1605 }, { "epoch": 0.10938986275309145, "grad_norm": 1.790440320968628, "learning_rate": 0.0009863432531593966, "loss": 3.7376, "step": 1610 }, { "epoch": 0.10972958282375322, "grad_norm": 1.4018882513046265, "learning_rate": 0.000986300788150564, "loss": 3.5685, "step": 1615 }, { "epoch": 0.11006930289441501, "grad_norm": 1.5316650867462158, "learning_rate": 0.0009862583231417313, "loss": 3.5103, "step": 1620 }, { "epoch": 0.11040902296507678, "grad_norm": 1.3660943508148193, "learning_rate": 0.0009862158581328984, "loss": 3.4791, "step": 1625 }, { "epoch": 0.11074874303573855, "grad_norm": 1.6862484216690063, "learning_rate": 0.0009861733931240658, "loss": 3.6744, "step": 1630 }, { "epoch": 0.11108846310640033, "grad_norm": 1.5487521886825562, "learning_rate": 0.000986130928115233, "loss": 3.6368, "step": 1635 }, { "epoch": 0.1114281831770621, "grad_norm": 1.4311232566833496, "learning_rate": 0.0009860884631064004, "loss": 3.741, "step": 1640 }, { "epoch": 0.11176790324772387, "grad_norm": 1.2300047874450684, "learning_rate": 0.0009860459980975678, "loss": 3.5558, "step": 1645 }, { "epoch": 0.11210762331838565, "grad_norm": 1.2947553396224976, "learning_rate": 0.0009860035330887349, "loss": 3.4572, "step": 1650 }, { "epoch": 0.11244734338904742, "grad_norm": 1.7059239149093628, "learning_rate": 0.0009859610680799022, "loss": 3.6659, "step": 1655 }, { "epoch": 0.1127870634597092, "grad_norm": 1.4852492809295654, "learning_rate": 0.0009859186030710696, "loss": 3.6203, "step": 1660 }, { "epoch": 0.11312678353037098, "grad_norm": 1.73294997215271, "learning_rate": 0.0009858761380622367, "loss": 3.6897, "step": 1665 }, { "epoch": 0.11346650360103275, "grad_norm": 1.6328109502792358, "learning_rate": 0.000985833673053404, "loss": 3.6978, "step": 1670 }, { "epoch": 0.11380622367169452, "grad_norm": 1.4465856552124023, "learning_rate": 0.0009857912080445713, "loss": 3.671, "step": 1675 }, { "epoch": 0.1141459437423563, "grad_norm": 1.4723976850509644, "learning_rate": 0.0009857487430357387, "loss": 3.5272, "step": 1680 }, { "epoch": 0.11448566381301807, "grad_norm": 1.3679559230804443, "learning_rate": 0.0009857062780269058, "loss": 3.5503, "step": 1685 }, { "epoch": 0.11482538388367985, "grad_norm": 1.431428074836731, "learning_rate": 0.0009856638130180731, "loss": 3.4222, "step": 1690 }, { "epoch": 0.11516510395434162, "grad_norm": 1.3025245666503906, "learning_rate": 0.0009856213480092405, "loss": 3.5442, "step": 1695 }, { "epoch": 0.1155048240250034, "grad_norm": 1.6983662843704224, "learning_rate": 0.0009855788830004076, "loss": 3.7232, "step": 1700 }, { "epoch": 0.11584454409566518, "grad_norm": 1.2664803266525269, "learning_rate": 0.000985536417991575, "loss": 3.4236, "step": 1705 }, { "epoch": 0.11618426416632695, "grad_norm": 1.2528018951416016, "learning_rate": 0.0009854939529827422, "loss": 3.7128, "step": 1710 }, { "epoch": 0.11652398423698872, "grad_norm": 1.582702398300171, "learning_rate": 0.0009854514879739096, "loss": 3.3098, "step": 1715 }, { "epoch": 0.1168637043076505, "grad_norm": 1.3877739906311035, "learning_rate": 0.000985409022965077, "loss": 3.719, "step": 1720 }, { "epoch": 0.11720342437831227, "grad_norm": 1.7697266340255737, "learning_rate": 0.000985366557956244, "loss": 3.4213, "step": 1725 }, { "epoch": 0.11754314444897404, "grad_norm": 1.5338609218597412, "learning_rate": 0.0009853240929474114, "loss": 3.4977, "step": 1730 }, { "epoch": 0.11788286451963582, "grad_norm": 2.268167734146118, "learning_rate": 0.0009852816279385787, "loss": 3.5827, "step": 1735 }, { "epoch": 0.1182225845902976, "grad_norm": 1.744579792022705, "learning_rate": 0.0009852391629297458, "loss": 3.6534, "step": 1740 }, { "epoch": 0.11856230466095936, "grad_norm": 1.4346039295196533, "learning_rate": 0.0009851966979209132, "loss": 3.8399, "step": 1745 }, { "epoch": 0.11890202473162115, "grad_norm": 1.409035086631775, "learning_rate": 0.0009851542329120805, "loss": 3.5953, "step": 1750 }, { "epoch": 0.11924174480228292, "grad_norm": 1.543451189994812, "learning_rate": 0.0009851117679032478, "loss": 3.7654, "step": 1755 }, { "epoch": 0.11958146487294469, "grad_norm": 1.3584561347961426, "learning_rate": 0.000985069302894415, "loss": 3.7213, "step": 1760 }, { "epoch": 0.11992118494360647, "grad_norm": 1.6333565711975098, "learning_rate": 0.0009850268378855823, "loss": 3.6096, "step": 1765 }, { "epoch": 0.12026090501426824, "grad_norm": 1.2999001741409302, "learning_rate": 0.0009849843728767496, "loss": 3.6349, "step": 1770 }, { "epoch": 0.12060062508493002, "grad_norm": 1.5269429683685303, "learning_rate": 0.0009849419078679167, "loss": 3.5973, "step": 1775 }, { "epoch": 0.1209403451555918, "grad_norm": 1.5421810150146484, "learning_rate": 0.0009848994428590843, "loss": 3.6491, "step": 1780 }, { "epoch": 0.12128006522625356, "grad_norm": 1.518696665763855, "learning_rate": 0.0009848569778502514, "loss": 3.8149, "step": 1785 }, { "epoch": 0.12161978529691535, "grad_norm": 1.7794413566589355, "learning_rate": 0.0009848145128414187, "loss": 3.762, "step": 1790 }, { "epoch": 0.12195950536757712, "grad_norm": 1.5299493074417114, "learning_rate": 0.000984772047832586, "loss": 3.4658, "step": 1795 }, { "epoch": 0.12229922543823889, "grad_norm": 1.4742941856384277, "learning_rate": 0.0009847295828237532, "loss": 3.5315, "step": 1800 }, { "epoch": 0.12263894550890067, "grad_norm": 1.5386549234390259, "learning_rate": 0.0009846871178149205, "loss": 3.6572, "step": 1805 }, { "epoch": 0.12297866557956244, "grad_norm": 1.3685286045074463, "learning_rate": 0.0009846446528060878, "loss": 3.5647, "step": 1810 }, { "epoch": 0.12331838565022421, "grad_norm": 1.6030632257461548, "learning_rate": 0.0009846021877972552, "loss": 3.4047, "step": 1815 }, { "epoch": 0.123658105720886, "grad_norm": 1.4342175722122192, "learning_rate": 0.0009845597227884223, "loss": 3.5138, "step": 1820 }, { "epoch": 0.12399782579154776, "grad_norm": 1.6177843809127808, "learning_rate": 0.0009845172577795896, "loss": 3.5946, "step": 1825 }, { "epoch": 0.12433754586220953, "grad_norm": 1.2974354028701782, "learning_rate": 0.000984474792770757, "loss": 3.6365, "step": 1830 }, { "epoch": 0.12467726593287132, "grad_norm": 1.609848976135254, "learning_rate": 0.000984432327761924, "loss": 3.6298, "step": 1835 }, { "epoch": 0.1250169860035331, "grad_norm": 1.6229193210601807, "learning_rate": 0.0009843898627530914, "loss": 3.7329, "step": 1840 }, { "epoch": 0.12535670607419486, "grad_norm": 1.6588777303695679, "learning_rate": 0.0009843473977442588, "loss": 3.4901, "step": 1845 }, { "epoch": 0.12569642614485663, "grad_norm": 1.7322087287902832, "learning_rate": 0.000984304932735426, "loss": 3.7407, "step": 1850 }, { "epoch": 0.12603614621551842, "grad_norm": 1.4579213857650757, "learning_rate": 0.0009842624677265934, "loss": 3.5865, "step": 1855 }, { "epoch": 0.1263758662861802, "grad_norm": 1.5231727361679077, "learning_rate": 0.0009842200027177605, "loss": 3.6569, "step": 1860 }, { "epoch": 0.12671558635684196, "grad_norm": 1.699390172958374, "learning_rate": 0.0009841775377089279, "loss": 3.517, "step": 1865 }, { "epoch": 0.12705530642750373, "grad_norm": 1.4571901559829712, "learning_rate": 0.0009841350727000952, "loss": 3.7315, "step": 1870 }, { "epoch": 0.1273950264981655, "grad_norm": 1.2128453254699707, "learning_rate": 0.0009840926076912623, "loss": 3.5733, "step": 1875 }, { "epoch": 0.12773474656882727, "grad_norm": 1.745569109916687, "learning_rate": 0.0009840501426824297, "loss": 3.6412, "step": 1880 }, { "epoch": 0.12807446663948907, "grad_norm": 1.3335546255111694, "learning_rate": 0.000984007677673597, "loss": 3.7079, "step": 1885 }, { "epoch": 0.12841418671015084, "grad_norm": 1.5606532096862793, "learning_rate": 0.0009839652126647643, "loss": 3.1353, "step": 1890 }, { "epoch": 0.1287539067808126, "grad_norm": 1.6227242946624756, "learning_rate": 0.0009839227476559314, "loss": 3.6482, "step": 1895 }, { "epoch": 0.12909362685147438, "grad_norm": 1.5443379878997803, "learning_rate": 0.0009838802826470988, "loss": 3.4986, "step": 1900 }, { "epoch": 0.12943334692213615, "grad_norm": 1.5466289520263672, "learning_rate": 0.0009838378176382661, "loss": 3.4102, "step": 1905 }, { "epoch": 0.12977306699279795, "grad_norm": 1.4300808906555176, "learning_rate": 0.0009837953526294332, "loss": 3.6403, "step": 1910 }, { "epoch": 0.13011278706345972, "grad_norm": 1.206868052482605, "learning_rate": 0.0009837528876206006, "loss": 3.6595, "step": 1915 }, { "epoch": 0.1304525071341215, "grad_norm": 1.689212679862976, "learning_rate": 0.000983710422611768, "loss": 3.4651, "step": 1920 }, { "epoch": 0.13079222720478326, "grad_norm": 1.4770923852920532, "learning_rate": 0.0009836679576029352, "loss": 3.7208, "step": 1925 }, { "epoch": 0.13113194727544503, "grad_norm": 1.8764562606811523, "learning_rate": 0.0009836254925941026, "loss": 3.7201, "step": 1930 }, { "epoch": 0.1314716673461068, "grad_norm": 1.9378588199615479, "learning_rate": 0.0009835830275852697, "loss": 3.571, "step": 1935 }, { "epoch": 0.1318113874167686, "grad_norm": 1.389882206916809, "learning_rate": 0.000983540562576437, "loss": 3.2071, "step": 1940 }, { "epoch": 0.13215110748743036, "grad_norm": 2.167288064956665, "learning_rate": 0.0009834980975676044, "loss": 3.5753, "step": 1945 }, { "epoch": 0.13249082755809213, "grad_norm": 1.240351676940918, "learning_rate": 0.0009834556325587715, "loss": 3.6513, "step": 1950 }, { "epoch": 0.1328305476287539, "grad_norm": 1.3970073461532593, "learning_rate": 0.000983413167549939, "loss": 3.5928, "step": 1955 }, { "epoch": 0.13317026769941567, "grad_norm": 1.6893250942230225, "learning_rate": 0.0009833707025411061, "loss": 3.6433, "step": 1960 }, { "epoch": 0.13350998777007744, "grad_norm": 1.4890990257263184, "learning_rate": 0.0009833282375322735, "loss": 3.263, "step": 1965 }, { "epoch": 0.13384970784073924, "grad_norm": 1.6086233854293823, "learning_rate": 0.0009832857725234408, "loss": 3.5335, "step": 1970 }, { "epoch": 0.134189427911401, "grad_norm": 1.4980287551879883, "learning_rate": 0.000983243307514608, "loss": 3.7778, "step": 1975 }, { "epoch": 0.13452914798206278, "grad_norm": 1.5103315114974976, "learning_rate": 0.0009832008425057753, "loss": 3.5741, "step": 1980 }, { "epoch": 0.13486886805272455, "grad_norm": 1.3895633220672607, "learning_rate": 0.0009831583774969426, "loss": 3.7084, "step": 1985 }, { "epoch": 0.13520858812338632, "grad_norm": 1.7333984375, "learning_rate": 0.00098311591248811, "loss": 3.6428, "step": 1990 }, { "epoch": 0.13554830819404812, "grad_norm": 1.5087534189224243, "learning_rate": 0.000983073447479277, "loss": 3.743, "step": 1995 }, { "epoch": 0.1358880282647099, "grad_norm": 1.8043196201324463, "learning_rate": 0.0009830309824704444, "loss": 3.5437, "step": 2000 }, { "epoch": 0.13622774833537166, "grad_norm": 1.6856354475021362, "learning_rate": 0.0009829885174616117, "loss": 3.5547, "step": 2005 }, { "epoch": 0.13656746840603343, "grad_norm": 1.5222761631011963, "learning_rate": 0.0009829460524527788, "loss": 3.8376, "step": 2010 }, { "epoch": 0.1369071884766952, "grad_norm": 1.2302701473236084, "learning_rate": 0.0009829035874439462, "loss": 3.5965, "step": 2015 }, { "epoch": 0.13724690854735697, "grad_norm": 1.8439022302627563, "learning_rate": 0.0009828611224351135, "loss": 3.7754, "step": 2020 }, { "epoch": 0.13758662861801876, "grad_norm": 1.4366599321365356, "learning_rate": 0.0009828186574262808, "loss": 3.5327, "step": 2025 }, { "epoch": 0.13792634868868053, "grad_norm": 2.016873836517334, "learning_rate": 0.0009827761924174482, "loss": 3.4745, "step": 2030 }, { "epoch": 0.1382660687593423, "grad_norm": 1.5446686744689941, "learning_rate": 0.0009827337274086153, "loss": 3.8404, "step": 2035 }, { "epoch": 0.13860578883000407, "grad_norm": 1.2228907346725464, "learning_rate": 0.0009826912623997826, "loss": 3.6951, "step": 2040 }, { "epoch": 0.13894550890066584, "grad_norm": 1.2272121906280518, "learning_rate": 0.00098264879739095, "loss": 3.608, "step": 2045 }, { "epoch": 0.1392852289713276, "grad_norm": 1.5167181491851807, "learning_rate": 0.000982606332382117, "loss": 3.6263, "step": 2050 }, { "epoch": 0.1396249490419894, "grad_norm": 1.4843263626098633, "learning_rate": 0.0009825638673732844, "loss": 3.7029, "step": 2055 }, { "epoch": 0.13996466911265118, "grad_norm": 1.4438143968582153, "learning_rate": 0.0009825214023644517, "loss": 3.7193, "step": 2060 }, { "epoch": 0.14030438918331295, "grad_norm": 1.6710935831069946, "learning_rate": 0.000982478937355619, "loss": 3.773, "step": 2065 }, { "epoch": 0.14064410925397472, "grad_norm": 1.913591980934143, "learning_rate": 0.0009824364723467862, "loss": 3.5538, "step": 2070 }, { "epoch": 0.1409838293246365, "grad_norm": 1.8139945268630981, "learning_rate": 0.0009823940073379535, "loss": 3.4968, "step": 2075 }, { "epoch": 0.1413235493952983, "grad_norm": 1.1731719970703125, "learning_rate": 0.0009823515423291209, "loss": 3.6157, "step": 2080 }, { "epoch": 0.14166326946596006, "grad_norm": 1.5042003393173218, "learning_rate": 0.000982309077320288, "loss": 3.7208, "step": 2085 }, { "epoch": 0.14200298953662183, "grad_norm": 1.4241973161697388, "learning_rate": 0.0009822666123114553, "loss": 3.6104, "step": 2090 }, { "epoch": 0.1423427096072836, "grad_norm": 1.2655971050262451, "learning_rate": 0.0009822241473026226, "loss": 3.6753, "step": 2095 }, { "epoch": 0.14268242967794537, "grad_norm": 1.3020150661468506, "learning_rate": 0.00098218168229379, "loss": 3.5562, "step": 2100 }, { "epoch": 0.14302214974860714, "grad_norm": 1.5240980386734009, "learning_rate": 0.0009821392172849573, "loss": 3.7239, "step": 2105 }, { "epoch": 0.14336186981926893, "grad_norm": 1.3814221620559692, "learning_rate": 0.0009820967522761244, "loss": 3.6708, "step": 2110 }, { "epoch": 0.1437015898899307, "grad_norm": 1.3996448516845703, "learning_rate": 0.0009820542872672918, "loss": 3.2637, "step": 2115 }, { "epoch": 0.14404130996059247, "grad_norm": 1.4025073051452637, "learning_rate": 0.000982011822258459, "loss": 3.7301, "step": 2120 }, { "epoch": 0.14438103003125424, "grad_norm": 1.6564865112304688, "learning_rate": 0.0009819693572496262, "loss": 3.6083, "step": 2125 }, { "epoch": 0.144720750101916, "grad_norm": 1.7827240228652954, "learning_rate": 0.0009819268922407936, "loss": 3.6557, "step": 2130 }, { "epoch": 0.14506047017257778, "grad_norm": 1.945308804512024, "learning_rate": 0.0009818844272319609, "loss": 3.6004, "step": 2135 }, { "epoch": 0.14540019024323958, "grad_norm": 1.5102007389068604, "learning_rate": 0.0009818419622231282, "loss": 3.5275, "step": 2140 }, { "epoch": 0.14573991031390135, "grad_norm": 1.3304325342178345, "learning_rate": 0.0009817994972142953, "loss": 3.6559, "step": 2145 }, { "epoch": 0.14607963038456312, "grad_norm": 1.5992482900619507, "learning_rate": 0.0009817570322054627, "loss": 3.427, "step": 2150 }, { "epoch": 0.1464193504552249, "grad_norm": 1.4732508659362793, "learning_rate": 0.00098171456719663, "loss": 4.0519, "step": 2155 }, { "epoch": 0.14675907052588666, "grad_norm": 1.545551061630249, "learning_rate": 0.0009816721021877971, "loss": 3.6015, "step": 2160 }, { "epoch": 0.14709879059654846, "grad_norm": 2.374506950378418, "learning_rate": 0.0009816296371789647, "loss": 3.5503, "step": 2165 }, { "epoch": 0.14743851066721023, "grad_norm": 1.800358772277832, "learning_rate": 0.0009815871721701318, "loss": 3.4707, "step": 2170 }, { "epoch": 0.147778230737872, "grad_norm": 1.5160977840423584, "learning_rate": 0.0009815447071612991, "loss": 3.8202, "step": 2175 }, { "epoch": 0.14811795080853377, "grad_norm": 1.423499345779419, "learning_rate": 0.0009815022421524665, "loss": 3.6027, "step": 2180 }, { "epoch": 0.14845767087919554, "grad_norm": 1.438122034072876, "learning_rate": 0.0009814597771436336, "loss": 3.6229, "step": 2185 }, { "epoch": 0.1487973909498573, "grad_norm": 1.5874353647232056, "learning_rate": 0.000981417312134801, "loss": 3.6496, "step": 2190 }, { "epoch": 0.1491371110205191, "grad_norm": 1.4451318979263306, "learning_rate": 0.0009813748471259682, "loss": 3.5899, "step": 2195 }, { "epoch": 0.14947683109118087, "grad_norm": 1.6036642789840698, "learning_rate": 0.0009813323821171356, "loss": 3.6403, "step": 2200 }, { "epoch": 0.14981655116184264, "grad_norm": 1.4492372274398804, "learning_rate": 0.0009812899171083027, "loss": 3.6216, "step": 2205 }, { "epoch": 0.1501562712325044, "grad_norm": 1.47568678855896, "learning_rate": 0.00098124745209947, "loss": 3.7765, "step": 2210 }, { "epoch": 0.15049599130316618, "grad_norm": 1.5392323732376099, "learning_rate": 0.0009812049870906374, "loss": 3.6645, "step": 2215 }, { "epoch": 0.15083571137382795, "grad_norm": 1.5899097919464111, "learning_rate": 0.0009811625220818045, "loss": 3.6645, "step": 2220 }, { "epoch": 0.15117543144448975, "grad_norm": 1.5435279607772827, "learning_rate": 0.0009811200570729718, "loss": 3.9269, "step": 2225 }, { "epoch": 0.15151515151515152, "grad_norm": 1.7751247882843018, "learning_rate": 0.0009810775920641392, "loss": 3.5132, "step": 2230 }, { "epoch": 0.1518548715858133, "grad_norm": 1.4733456373214722, "learning_rate": 0.0009810351270553065, "loss": 3.7498, "step": 2235 }, { "epoch": 0.15219459165647506, "grad_norm": 1.3130319118499756, "learning_rate": 0.0009809926620464738, "loss": 3.7611, "step": 2240 }, { "epoch": 0.15253431172713683, "grad_norm": 1.190079689025879, "learning_rate": 0.000980950197037641, "loss": 3.739, "step": 2245 }, { "epoch": 0.15287403179779863, "grad_norm": 1.6829626560211182, "learning_rate": 0.0009809077320288083, "loss": 3.4317, "step": 2250 }, { "epoch": 0.1532137518684604, "grad_norm": 1.6301239728927612, "learning_rate": 0.0009808652670199756, "loss": 3.6988, "step": 2255 }, { "epoch": 0.15355347193912217, "grad_norm": 1.8482377529144287, "learning_rate": 0.0009808228020111427, "loss": 3.8689, "step": 2260 }, { "epoch": 0.15389319200978394, "grad_norm": 1.3406578302383423, "learning_rate": 0.00098078033700231, "loss": 3.6325, "step": 2265 }, { "epoch": 0.1542329120804457, "grad_norm": 1.4392603635787964, "learning_rate": 0.0009807378719934774, "loss": 3.6519, "step": 2270 }, { "epoch": 0.15457263215110748, "grad_norm": 1.7763910293579102, "learning_rate": 0.0009806954069846447, "loss": 3.5364, "step": 2275 }, { "epoch": 0.15491235222176927, "grad_norm": 2.020371913909912, "learning_rate": 0.0009806529419758118, "loss": 3.492, "step": 2280 }, { "epoch": 0.15525207229243104, "grad_norm": 1.684931755065918, "learning_rate": 0.0009806104769669792, "loss": 3.5003, "step": 2285 }, { "epoch": 0.1555917923630928, "grad_norm": 1.4635274410247803, "learning_rate": 0.0009805680119581465, "loss": 3.4961, "step": 2290 }, { "epoch": 0.15593151243375458, "grad_norm": 1.5835603475570679, "learning_rate": 0.0009805255469493139, "loss": 3.5683, "step": 2295 }, { "epoch": 0.15627123250441635, "grad_norm": 1.7769798040390015, "learning_rate": 0.0009804830819404812, "loss": 3.5748, "step": 2300 }, { "epoch": 0.15661095257507815, "grad_norm": 1.5859311819076538, "learning_rate": 0.0009804406169316483, "loss": 3.7703, "step": 2305 }, { "epoch": 0.15695067264573992, "grad_norm": 1.5780986547470093, "learning_rate": 0.0009803981519228156, "loss": 3.4027, "step": 2310 }, { "epoch": 0.1572903927164017, "grad_norm": 1.3586820363998413, "learning_rate": 0.000980355686913983, "loss": 3.6633, "step": 2315 }, { "epoch": 0.15763011278706346, "grad_norm": 1.5683012008666992, "learning_rate": 0.00098031322190515, "loss": 3.6629, "step": 2320 }, { "epoch": 0.15796983285772523, "grad_norm": 1.7724145650863647, "learning_rate": 0.0009802707568963174, "loss": 3.7653, "step": 2325 }, { "epoch": 0.158309552928387, "grad_norm": 1.2581329345703125, "learning_rate": 0.0009802282918874848, "loss": 3.4751, "step": 2330 }, { "epoch": 0.1586492729990488, "grad_norm": 1.473171591758728, "learning_rate": 0.000980185826878652, "loss": 3.5887, "step": 2335 }, { "epoch": 0.15898899306971057, "grad_norm": 1.3281471729278564, "learning_rate": 0.0009801433618698194, "loss": 3.6521, "step": 2340 }, { "epoch": 0.15932871314037234, "grad_norm": 1.5148741006851196, "learning_rate": 0.0009801008968609865, "loss": 3.6074, "step": 2345 }, { "epoch": 0.1596684332110341, "grad_norm": 1.709783911705017, "learning_rate": 0.0009800584318521539, "loss": 3.8403, "step": 2350 }, { "epoch": 0.16000815328169588, "grad_norm": 1.299753189086914, "learning_rate": 0.0009800159668433212, "loss": 3.7229, "step": 2355 }, { "epoch": 0.16034787335235764, "grad_norm": 1.4192662239074707, "learning_rate": 0.0009799735018344883, "loss": 3.6954, "step": 2360 }, { "epoch": 0.16068759342301944, "grad_norm": 1.3406012058258057, "learning_rate": 0.0009799310368256557, "loss": 3.3553, "step": 2365 }, { "epoch": 0.1610273134936812, "grad_norm": 1.6090399026870728, "learning_rate": 0.000979888571816823, "loss": 3.5967, "step": 2370 }, { "epoch": 0.16136703356434298, "grad_norm": 1.2359223365783691, "learning_rate": 0.0009798461068079903, "loss": 3.6141, "step": 2375 }, { "epoch": 0.16170675363500475, "grad_norm": 1.7028858661651611, "learning_rate": 0.0009798036417991574, "loss": 3.5832, "step": 2380 }, { "epoch": 0.16204647370566652, "grad_norm": 1.4503439664840698, "learning_rate": 0.0009797611767903248, "loss": 3.566, "step": 2385 }, { "epoch": 0.16238619377632832, "grad_norm": 1.5519297122955322, "learning_rate": 0.0009797187117814921, "loss": 3.7144, "step": 2390 }, { "epoch": 0.1627259138469901, "grad_norm": 2.181450128555298, "learning_rate": 0.0009796762467726592, "loss": 3.4948, "step": 2395 }, { "epoch": 0.16306563391765186, "grad_norm": 1.30222749710083, "learning_rate": 0.0009796337817638266, "loss": 3.6856, "step": 2400 }, { "epoch": 0.16340535398831363, "grad_norm": 1.3557127714157104, "learning_rate": 0.000979591316754994, "loss": 3.3984, "step": 2405 }, { "epoch": 0.1637450740589754, "grad_norm": 1.3560794591903687, "learning_rate": 0.0009795488517461612, "loss": 3.8006, "step": 2410 }, { "epoch": 0.16408479412963717, "grad_norm": 1.4708369970321655, "learning_rate": 0.0009795063867373286, "loss": 3.7104, "step": 2415 }, { "epoch": 0.16442451420029897, "grad_norm": 2.1245174407958984, "learning_rate": 0.0009794639217284957, "loss": 3.6167, "step": 2420 }, { "epoch": 0.16476423427096074, "grad_norm": 1.456233263015747, "learning_rate": 0.000979421456719663, "loss": 3.4853, "step": 2425 }, { "epoch": 0.1651039543416225, "grad_norm": 1.72898268699646, "learning_rate": 0.0009793789917108304, "loss": 3.7052, "step": 2430 }, { "epoch": 0.16544367441228428, "grad_norm": 1.646062970161438, "learning_rate": 0.0009793365267019975, "loss": 3.8069, "step": 2435 }, { "epoch": 0.16578339448294604, "grad_norm": 1.5688307285308838, "learning_rate": 0.0009792940616931648, "loss": 3.7124, "step": 2440 }, { "epoch": 0.16612311455360781, "grad_norm": 2.178927421569824, "learning_rate": 0.0009792515966843321, "loss": 3.6449, "step": 2445 }, { "epoch": 0.1664628346242696, "grad_norm": 1.3855340480804443, "learning_rate": 0.0009792091316754995, "loss": 3.5966, "step": 2450 }, { "epoch": 0.16680255469493138, "grad_norm": 2.211934804916382, "learning_rate": 0.0009791666666666666, "loss": 3.6526, "step": 2455 }, { "epoch": 0.16714227476559315, "grad_norm": 1.7534219026565552, "learning_rate": 0.000979124201657834, "loss": 3.6216, "step": 2460 }, { "epoch": 0.16748199483625492, "grad_norm": 1.6533315181732178, "learning_rate": 0.0009790817366490013, "loss": 3.6484, "step": 2465 }, { "epoch": 0.1678217149069167, "grad_norm": 1.9500545263290405, "learning_rate": 0.0009790392716401684, "loss": 3.6873, "step": 2470 }, { "epoch": 0.1681614349775785, "grad_norm": 2.0050010681152344, "learning_rate": 0.000978996806631336, "loss": 3.7452, "step": 2475 }, { "epoch": 0.16850115504824026, "grad_norm": 1.5508373975753784, "learning_rate": 0.000978954341622503, "loss": 3.6812, "step": 2480 }, { "epoch": 0.16884087511890203, "grad_norm": 1.4572104215621948, "learning_rate": 0.0009789118766136704, "loss": 3.6563, "step": 2485 }, { "epoch": 0.1691805951895638, "grad_norm": 1.2761588096618652, "learning_rate": 0.0009788694116048377, "loss": 3.5729, "step": 2490 }, { "epoch": 0.16952031526022557, "grad_norm": 1.574636697769165, "learning_rate": 0.0009788269465960048, "loss": 3.6695, "step": 2495 }, { "epoch": 0.16986003533088734, "grad_norm": 1.3863581418991089, "learning_rate": 0.0009787844815871722, "loss": 3.852, "step": 2500 }, { "epoch": 0.17019975540154914, "grad_norm": 1.4279425144195557, "learning_rate": 0.0009787420165783395, "loss": 3.7719, "step": 2505 }, { "epoch": 0.1705394754722109, "grad_norm": 1.2918821573257446, "learning_rate": 0.0009786995515695068, "loss": 3.7968, "step": 2510 }, { "epoch": 0.17087919554287267, "grad_norm": 1.2039324045181274, "learning_rate": 0.000978657086560674, "loss": 3.64, "step": 2515 }, { "epoch": 0.17121891561353444, "grad_norm": 1.2826083898544312, "learning_rate": 0.0009786146215518413, "loss": 3.7748, "step": 2520 }, { "epoch": 0.17155863568419621, "grad_norm": 1.656377911567688, "learning_rate": 0.0009785721565430086, "loss": 3.7491, "step": 2525 }, { "epoch": 0.17189835575485798, "grad_norm": 1.6525322198867798, "learning_rate": 0.0009785296915341757, "loss": 3.5904, "step": 2530 }, { "epoch": 0.17223807582551978, "grad_norm": 1.7418395280838013, "learning_rate": 0.000978487226525343, "loss": 3.8189, "step": 2535 }, { "epoch": 0.17257779589618155, "grad_norm": 1.9879206418991089, "learning_rate": 0.0009784447615165104, "loss": 3.4755, "step": 2540 }, { "epoch": 0.17291751596684332, "grad_norm": 1.5148375034332275, "learning_rate": 0.0009784022965076777, "loss": 3.5389, "step": 2545 }, { "epoch": 0.1732572360375051, "grad_norm": 1.256455898284912, "learning_rate": 0.000978359831498845, "loss": 3.754, "step": 2550 }, { "epoch": 0.17359695610816686, "grad_norm": 1.5919709205627441, "learning_rate": 0.0009783173664900122, "loss": 3.511, "step": 2555 }, { "epoch": 0.17393667617882866, "grad_norm": 1.4032975435256958, "learning_rate": 0.0009782749014811795, "loss": 3.4347, "step": 2560 }, { "epoch": 0.17427639624949043, "grad_norm": 2.2082056999206543, "learning_rate": 0.0009782324364723469, "loss": 3.6181, "step": 2565 }, { "epoch": 0.1746161163201522, "grad_norm": 1.6217575073242188, "learning_rate": 0.000978189971463514, "loss": 3.7306, "step": 2570 }, { "epoch": 0.17495583639081397, "grad_norm": 1.7301559448242188, "learning_rate": 0.0009781475064546813, "loss": 3.6289, "step": 2575 }, { "epoch": 0.17529555646147574, "grad_norm": 1.7424930334091187, "learning_rate": 0.0009781050414458487, "loss": 3.7685, "step": 2580 }, { "epoch": 0.1756352765321375, "grad_norm": 1.3441998958587646, "learning_rate": 0.000978062576437016, "loss": 3.456, "step": 2585 }, { "epoch": 0.1759749966027993, "grad_norm": 1.3469640016555786, "learning_rate": 0.000978020111428183, "loss": 3.4722, "step": 2590 }, { "epoch": 0.17631471667346107, "grad_norm": 1.6651843786239624, "learning_rate": 0.0009779776464193504, "loss": 3.5903, "step": 2595 }, { "epoch": 0.17665443674412284, "grad_norm": 1.742973804473877, "learning_rate": 0.0009779351814105178, "loss": 3.4803, "step": 2600 }, { "epoch": 0.17699415681478461, "grad_norm": 1.9894238710403442, "learning_rate": 0.0009778927164016849, "loss": 3.8899, "step": 2605 }, { "epoch": 0.17733387688544638, "grad_norm": 1.6876741647720337, "learning_rate": 0.0009778502513928522, "loss": 3.6899, "step": 2610 }, { "epoch": 0.17767359695610815, "grad_norm": 1.4011814594268799, "learning_rate": 0.0009778077863840196, "loss": 3.303, "step": 2615 }, { "epoch": 0.17801331702676995, "grad_norm": 1.6934436559677124, "learning_rate": 0.000977765321375187, "loss": 3.6579, "step": 2620 }, { "epoch": 0.17835303709743172, "grad_norm": 1.651945948600769, "learning_rate": 0.0009777228563663542, "loss": 3.5459, "step": 2625 }, { "epoch": 0.1786927571680935, "grad_norm": 1.5333261489868164, "learning_rate": 0.0009776803913575213, "loss": 3.4722, "step": 2630 }, { "epoch": 0.17903247723875526, "grad_norm": 1.4776134490966797, "learning_rate": 0.0009776379263486887, "loss": 3.5112, "step": 2635 }, { "epoch": 0.17937219730941703, "grad_norm": 1.5277644395828247, "learning_rate": 0.000977595461339856, "loss": 3.7064, "step": 2640 }, { "epoch": 0.17971191738007883, "grad_norm": 1.3529630899429321, "learning_rate": 0.0009775529963310231, "loss": 3.6815, "step": 2645 }, { "epoch": 0.1800516374507406, "grad_norm": 1.474433422088623, "learning_rate": 0.0009775105313221907, "loss": 3.8214, "step": 2650 }, { "epoch": 0.18039135752140237, "grad_norm": 1.755918025970459, "learning_rate": 0.0009774680663133578, "loss": 3.6521, "step": 2655 }, { "epoch": 0.18073107759206414, "grad_norm": 1.4949427843093872, "learning_rate": 0.0009774256013045251, "loss": 3.5582, "step": 2660 }, { "epoch": 0.1810707976627259, "grad_norm": 1.6098140478134155, "learning_rate": 0.0009773831362956925, "loss": 3.7297, "step": 2665 }, { "epoch": 0.18141051773338768, "grad_norm": 1.8073756694793701, "learning_rate": 0.0009773406712868596, "loss": 3.583, "step": 2670 }, { "epoch": 0.18175023780404947, "grad_norm": 2.260629653930664, "learning_rate": 0.000977298206278027, "loss": 3.6345, "step": 2675 }, { "epoch": 0.18208995787471124, "grad_norm": 1.8748559951782227, "learning_rate": 0.0009772557412691943, "loss": 3.6994, "step": 2680 }, { "epoch": 0.18242967794537301, "grad_norm": 1.1414518356323242, "learning_rate": 0.0009772132762603616, "loss": 3.5229, "step": 2685 }, { "epoch": 0.18276939801603478, "grad_norm": 1.4222573041915894, "learning_rate": 0.0009771708112515287, "loss": 3.5841, "step": 2690 }, { "epoch": 0.18310911808669655, "grad_norm": 1.4198592901229858, "learning_rate": 0.000977128346242696, "loss": 3.2216, "step": 2695 }, { "epoch": 0.18344883815735832, "grad_norm": 1.6801544427871704, "learning_rate": 0.0009770858812338634, "loss": 3.7295, "step": 2700 }, { "epoch": 0.18378855822802012, "grad_norm": 1.6035256385803223, "learning_rate": 0.0009770434162250305, "loss": 3.6097, "step": 2705 }, { "epoch": 0.1841282782986819, "grad_norm": 1.4733257293701172, "learning_rate": 0.0009770009512161978, "loss": 3.5018, "step": 2710 }, { "epoch": 0.18446799836934366, "grad_norm": 1.573048710823059, "learning_rate": 0.0009769584862073652, "loss": 3.6731, "step": 2715 }, { "epoch": 0.18480771844000543, "grad_norm": 2.0174062252044678, "learning_rate": 0.0009769160211985325, "loss": 3.6302, "step": 2720 }, { "epoch": 0.1851474385106672, "grad_norm": 1.5894980430603027, "learning_rate": 0.0009768735561896998, "loss": 3.7227, "step": 2725 }, { "epoch": 0.185487158581329, "grad_norm": 1.5623937845230103, "learning_rate": 0.000976831091180867, "loss": 3.7946, "step": 2730 }, { "epoch": 0.18582687865199077, "grad_norm": 1.5348725318908691, "learning_rate": 0.0009767886261720343, "loss": 3.7892, "step": 2735 }, { "epoch": 0.18616659872265254, "grad_norm": 1.301228404045105, "learning_rate": 0.0009767461611632016, "loss": 3.4488, "step": 2740 }, { "epoch": 0.1865063187933143, "grad_norm": 1.2054224014282227, "learning_rate": 0.0009767036961543687, "loss": 3.5589, "step": 2745 }, { "epoch": 0.18684603886397608, "grad_norm": 1.794548511505127, "learning_rate": 0.000976661231145536, "loss": 3.6466, "step": 2750 }, { "epoch": 0.18718575893463785, "grad_norm": 1.5965689420700073, "learning_rate": 0.0009766187661367034, "loss": 3.8429, "step": 2755 }, { "epoch": 0.18752547900529964, "grad_norm": 1.572657585144043, "learning_rate": 0.0009765763011278707, "loss": 3.6597, "step": 2760 }, { "epoch": 0.18786519907596141, "grad_norm": 1.3402358293533325, "learning_rate": 0.0009765338361190379, "loss": 3.6827, "step": 2765 }, { "epoch": 0.18820491914662318, "grad_norm": 1.1768739223480225, "learning_rate": 0.0009764913711102053, "loss": 3.6132, "step": 2770 }, { "epoch": 0.18854463921728495, "grad_norm": 1.6005046367645264, "learning_rate": 0.0009764489061013725, "loss": 3.7652, "step": 2775 }, { "epoch": 0.18888435928794672, "grad_norm": 1.2249068021774292, "learning_rate": 0.0009764064410925397, "loss": 3.5249, "step": 2780 }, { "epoch": 0.1892240793586085, "grad_norm": 1.7367336750030518, "learning_rate": 0.0009763639760837071, "loss": 3.405, "step": 2785 }, { "epoch": 0.1895637994292703, "grad_norm": 1.653196096420288, "learning_rate": 0.0009763215110748743, "loss": 3.6167, "step": 2790 }, { "epoch": 0.18990351949993206, "grad_norm": 1.6334410905838013, "learning_rate": 0.0009762790460660415, "loss": 3.5724, "step": 2795 }, { "epoch": 0.19024323957059383, "grad_norm": 1.7236627340316772, "learning_rate": 0.0009762365810572089, "loss": 3.6086, "step": 2800 }, { "epoch": 0.1905829596412556, "grad_norm": 1.7642745971679688, "learning_rate": 0.0009761941160483762, "loss": 3.6713, "step": 2805 }, { "epoch": 0.19092267971191737, "grad_norm": 1.3998241424560547, "learning_rate": 0.0009761516510395434, "loss": 3.2658, "step": 2810 }, { "epoch": 0.19126239978257917, "grad_norm": 1.9746315479278564, "learning_rate": 0.0009761091860307108, "loss": 3.697, "step": 2815 }, { "epoch": 0.19160211985324094, "grad_norm": 1.5853697061538696, "learning_rate": 0.000976066721021878, "loss": 3.8224, "step": 2820 }, { "epoch": 0.1919418399239027, "grad_norm": 1.138885259628296, "learning_rate": 0.0009760242560130452, "loss": 3.5141, "step": 2825 }, { "epoch": 0.19228155999456448, "grad_norm": 1.3931362628936768, "learning_rate": 0.0009759817910042125, "loss": 3.8274, "step": 2830 }, { "epoch": 0.19262128006522625, "grad_norm": 1.7421034574508667, "learning_rate": 0.0009759393259953798, "loss": 3.5691, "step": 2835 }, { "epoch": 0.19296100013588802, "grad_norm": 1.770276665687561, "learning_rate": 0.0009758968609865471, "loss": 3.6141, "step": 2840 }, { "epoch": 0.19330072020654981, "grad_norm": 1.2606419324874878, "learning_rate": 0.0009758543959777144, "loss": 3.695, "step": 2845 }, { "epoch": 0.19364044027721158, "grad_norm": 1.3444241285324097, "learning_rate": 0.0009758119309688817, "loss": 3.7263, "step": 2850 }, { "epoch": 0.19398016034787335, "grad_norm": 1.8207935094833374, "learning_rate": 0.0009757694659600489, "loss": 3.7592, "step": 2855 }, { "epoch": 0.19431988041853512, "grad_norm": 2.287984609603882, "learning_rate": 0.0009757270009512162, "loss": 3.6114, "step": 2860 }, { "epoch": 0.1946596004891969, "grad_norm": 2.058689594268799, "learning_rate": 0.0009756845359423835, "loss": 3.6704, "step": 2865 }, { "epoch": 0.19499932055985866, "grad_norm": 2.2893943786621094, "learning_rate": 0.0009756420709335507, "loss": 3.4644, "step": 2870 }, { "epoch": 0.19533904063052046, "grad_norm": 1.8992054462432861, "learning_rate": 0.0009755996059247181, "loss": 3.5464, "step": 2875 }, { "epoch": 0.19567876070118223, "grad_norm": 1.5614662170410156, "learning_rate": 0.0009755571409158853, "loss": 3.7306, "step": 2880 }, { "epoch": 0.196018480771844, "grad_norm": 1.5725581645965576, "learning_rate": 0.0009755146759070526, "loss": 3.6139, "step": 2885 }, { "epoch": 0.19635820084250577, "grad_norm": 1.4979517459869385, "learning_rate": 0.0009754722108982199, "loss": 3.0271, "step": 2890 }, { "epoch": 0.19669792091316754, "grad_norm": 1.7687382698059082, "learning_rate": 0.0009754297458893871, "loss": 3.3116, "step": 2895 }, { "epoch": 0.19703764098382934, "grad_norm": 1.8013548851013184, "learning_rate": 0.0009753872808805544, "loss": 3.6544, "step": 2900 }, { "epoch": 0.1973773610544911, "grad_norm": 1.8245315551757812, "learning_rate": 0.0009753448158717217, "loss": 3.4186, "step": 2905 }, { "epoch": 0.19771708112515288, "grad_norm": 2.1794593334198, "learning_rate": 0.000975302350862889, "loss": 3.6166, "step": 2910 }, { "epoch": 0.19805680119581465, "grad_norm": 1.6466684341430664, "learning_rate": 0.0009752598858540563, "loss": 3.5582, "step": 2915 }, { "epoch": 0.19839652126647642, "grad_norm": 1.613341212272644, "learning_rate": 0.0009752174208452236, "loss": 3.6373, "step": 2920 }, { "epoch": 0.1987362413371382, "grad_norm": 1.4771842956542969, "learning_rate": 0.0009751749558363908, "loss": 3.5691, "step": 2925 }, { "epoch": 0.19907596140779998, "grad_norm": 1.8484675884246826, "learning_rate": 0.000975132490827558, "loss": 3.4312, "step": 2930 }, { "epoch": 0.19941568147846175, "grad_norm": 1.5827974081039429, "learning_rate": 0.0009750900258187254, "loss": 3.7145, "step": 2935 }, { "epoch": 0.19975540154912352, "grad_norm": 2.094165086746216, "learning_rate": 0.0009750475608098926, "loss": 3.3412, "step": 2940 }, { "epoch": 0.2000951216197853, "grad_norm": 1.8033188581466675, "learning_rate": 0.0009750050958010599, "loss": 3.6978, "step": 2945 }, { "epoch": 0.20043484169044706, "grad_norm": 1.9469860792160034, "learning_rate": 0.0009749626307922273, "loss": 3.7308, "step": 2950 }, { "epoch": 0.20077456176110883, "grad_norm": 1.517685055732727, "learning_rate": 0.0009749201657833945, "loss": 3.9636, "step": 2955 }, { "epoch": 0.20111428183177063, "grad_norm": 1.6688555479049683, "learning_rate": 0.0009748777007745617, "loss": 3.7073, "step": 2960 }, { "epoch": 0.2014540019024324, "grad_norm": 1.2724841833114624, "learning_rate": 0.0009748352357657291, "loss": 3.5691, "step": 2965 }, { "epoch": 0.20179372197309417, "grad_norm": 1.4225422143936157, "learning_rate": 0.0009747927707568963, "loss": 3.5942, "step": 2970 }, { "epoch": 0.20213344204375594, "grad_norm": 1.7563380002975464, "learning_rate": 0.0009747503057480636, "loss": 3.5277, "step": 2975 }, { "epoch": 0.2024731621144177, "grad_norm": 1.570784330368042, "learning_rate": 0.000974707840739231, "loss": 3.6054, "step": 2980 }, { "epoch": 0.2028128821850795, "grad_norm": 1.7166695594787598, "learning_rate": 0.0009746653757303982, "loss": 3.4254, "step": 2985 }, { "epoch": 0.20315260225574128, "grad_norm": 1.183107852935791, "learning_rate": 0.0009746229107215655, "loss": 3.5557, "step": 2990 }, { "epoch": 0.20349232232640305, "grad_norm": 2.318495988845825, "learning_rate": 0.0009745804457127327, "loss": 3.7121, "step": 2995 }, { "epoch": 0.20383204239706482, "grad_norm": 1.2137643098831177, "learning_rate": 0.0009745379807039, "loss": 3.556, "step": 3000 }, { "epoch": 0.2041717624677266, "grad_norm": 1.915683388710022, "learning_rate": 0.0009744955156950673, "loss": 3.6223, "step": 3005 }, { "epoch": 0.20451148253838836, "grad_norm": 1.482134222984314, "learning_rate": 0.0009744530506862345, "loss": 3.7466, "step": 3010 }, { "epoch": 0.20485120260905015, "grad_norm": 1.5170459747314453, "learning_rate": 0.0009744105856774019, "loss": 3.6854, "step": 3015 }, { "epoch": 0.20519092267971192, "grad_norm": 1.5258833169937134, "learning_rate": 0.0009743681206685692, "loss": 3.7251, "step": 3020 }, { "epoch": 0.2055306427503737, "grad_norm": 1.5242971181869507, "learning_rate": 0.0009743256556597364, "loss": 3.6968, "step": 3025 }, { "epoch": 0.20587036282103546, "grad_norm": 1.8417788743972778, "learning_rate": 0.0009742831906509036, "loss": 3.6372, "step": 3030 }, { "epoch": 0.20621008289169723, "grad_norm": 1.6017920970916748, "learning_rate": 0.000974240725642071, "loss": 3.5802, "step": 3035 }, { "epoch": 0.206549802962359, "grad_norm": 1.7389906644821167, "learning_rate": 0.0009741982606332382, "loss": 3.6459, "step": 3040 }, { "epoch": 0.2068895230330208, "grad_norm": 1.723463535308838, "learning_rate": 0.0009741557956244054, "loss": 3.4685, "step": 3045 }, { "epoch": 0.20722924310368257, "grad_norm": 1.4747644662857056, "learning_rate": 0.0009741133306155729, "loss": 3.612, "step": 3050 }, { "epoch": 0.20756896317434434, "grad_norm": 1.6986851692199707, "learning_rate": 0.0009740708656067401, "loss": 3.4137, "step": 3055 }, { "epoch": 0.2079086832450061, "grad_norm": 1.8193126916885376, "learning_rate": 0.0009740284005979073, "loss": 3.6603, "step": 3060 }, { "epoch": 0.20824840331566788, "grad_norm": 1.3151273727416992, "learning_rate": 0.0009739859355890747, "loss": 3.4744, "step": 3065 }, { "epoch": 0.20858812338632968, "grad_norm": 1.4841066598892212, "learning_rate": 0.0009739434705802419, "loss": 3.7652, "step": 3070 }, { "epoch": 0.20892784345699145, "grad_norm": 1.260217547416687, "learning_rate": 0.0009739010055714091, "loss": 3.4639, "step": 3075 }, { "epoch": 0.20926756352765322, "grad_norm": 1.630041241645813, "learning_rate": 0.0009738585405625766, "loss": 3.5109, "step": 3080 }, { "epoch": 0.209607283598315, "grad_norm": 1.8857365846633911, "learning_rate": 0.0009738160755537438, "loss": 3.7971, "step": 3085 }, { "epoch": 0.20994700366897676, "grad_norm": 2.018599271774292, "learning_rate": 0.000973773610544911, "loss": 3.592, "step": 3090 }, { "epoch": 0.21028672373963853, "grad_norm": 1.4818916320800781, "learning_rate": 0.0009737311455360783, "loss": 3.7401, "step": 3095 }, { "epoch": 0.21062644381030032, "grad_norm": 1.567958116531372, "learning_rate": 0.0009736886805272456, "loss": 3.8282, "step": 3100 }, { "epoch": 0.2109661638809621, "grad_norm": 1.7037855386734009, "learning_rate": 0.0009736462155184128, "loss": 3.5931, "step": 3105 }, { "epoch": 0.21130588395162386, "grad_norm": 1.8428840637207031, "learning_rate": 0.0009736037505095801, "loss": 3.813, "step": 3110 }, { "epoch": 0.21164560402228563, "grad_norm": 1.456045389175415, "learning_rate": 0.0009735612855007475, "loss": 3.6668, "step": 3115 }, { "epoch": 0.2119853240929474, "grad_norm": 2.9838674068450928, "learning_rate": 0.0009735188204919147, "loss": 3.7345, "step": 3120 }, { "epoch": 0.21232504416360917, "grad_norm": 1.7628111839294434, "learning_rate": 0.000973476355483082, "loss": 3.341, "step": 3125 }, { "epoch": 0.21266476423427097, "grad_norm": 1.8359915018081665, "learning_rate": 0.0009734338904742492, "loss": 3.7871, "step": 3130 }, { "epoch": 0.21300448430493274, "grad_norm": 1.5932973623275757, "learning_rate": 0.0009733914254654165, "loss": 3.6288, "step": 3135 }, { "epoch": 0.2133442043755945, "grad_norm": 1.344259262084961, "learning_rate": 0.0009733489604565838, "loss": 3.5397, "step": 3140 }, { "epoch": 0.21368392444625628, "grad_norm": 1.8849796056747437, "learning_rate": 0.000973306495447751, "loss": 3.6805, "step": 3145 }, { "epoch": 0.21402364451691805, "grad_norm": 1.593395709991455, "learning_rate": 0.0009732640304389184, "loss": 3.2779, "step": 3150 }, { "epoch": 0.21436336458757985, "grad_norm": 1.895630955696106, "learning_rate": 0.0009732215654300857, "loss": 3.6524, "step": 3155 }, { "epoch": 0.21470308465824162, "grad_norm": 1.5237373113632202, "learning_rate": 0.0009731791004212529, "loss": 3.6018, "step": 3160 }, { "epoch": 0.2150428047289034, "grad_norm": 1.244061827659607, "learning_rate": 0.0009731366354124202, "loss": 3.5856, "step": 3165 }, { "epoch": 0.21538252479956516, "grad_norm": 2.2711894512176514, "learning_rate": 0.0009730941704035875, "loss": 3.5809, "step": 3170 }, { "epoch": 0.21572224487022693, "grad_norm": 1.4447957277297974, "learning_rate": 0.0009730517053947547, "loss": 3.5317, "step": 3175 }, { "epoch": 0.2160619649408887, "grad_norm": 1.4298884868621826, "learning_rate": 0.0009730092403859219, "loss": 3.5903, "step": 3180 }, { "epoch": 0.2164016850115505, "grad_norm": 1.5763835906982422, "learning_rate": 0.0009729667753770894, "loss": 3.605, "step": 3185 }, { "epoch": 0.21674140508221226, "grad_norm": 2.096895217895508, "learning_rate": 0.0009729243103682566, "loss": 3.8662, "step": 3190 }, { "epoch": 0.21708112515287403, "grad_norm": 1.4500606060028076, "learning_rate": 0.0009728818453594238, "loss": 3.4747, "step": 3195 }, { "epoch": 0.2174208452235358, "grad_norm": 1.781977891921997, "learning_rate": 0.0009728393803505912, "loss": 3.6476, "step": 3200 }, { "epoch": 0.21776056529419757, "grad_norm": 1.8784739971160889, "learning_rate": 0.0009727969153417584, "loss": 3.9962, "step": 3205 }, { "epoch": 0.21810028536485934, "grad_norm": 1.6660099029541016, "learning_rate": 0.0009727544503329256, "loss": 3.659, "step": 3210 }, { "epoch": 0.21844000543552114, "grad_norm": 1.603079080581665, "learning_rate": 0.000972711985324093, "loss": 3.6907, "step": 3215 }, { "epoch": 0.2187797255061829, "grad_norm": 1.6650390625, "learning_rate": 0.0009726695203152603, "loss": 3.5983, "step": 3220 }, { "epoch": 0.21911944557684468, "grad_norm": 1.557541012763977, "learning_rate": 0.0009726270553064275, "loss": 3.4384, "step": 3225 }, { "epoch": 0.21945916564750645, "grad_norm": 1.519931435585022, "learning_rate": 0.0009725845902975948, "loss": 3.6655, "step": 3230 }, { "epoch": 0.21979888571816822, "grad_norm": 1.4979947805404663, "learning_rate": 0.0009725421252887621, "loss": 3.5783, "step": 3235 }, { "epoch": 0.22013860578883002, "grad_norm": 1.5787291526794434, "learning_rate": 0.0009724996602799293, "loss": 3.5439, "step": 3240 }, { "epoch": 0.22047832585949179, "grad_norm": 1.498077630996704, "learning_rate": 0.0009724571952710966, "loss": 3.5013, "step": 3245 }, { "epoch": 0.22081804593015356, "grad_norm": 1.9145184755325317, "learning_rate": 0.0009724147302622639, "loss": 3.7401, "step": 3250 }, { "epoch": 0.22115776600081533, "grad_norm": 1.4099106788635254, "learning_rate": 0.0009723722652534312, "loss": 3.5277, "step": 3255 }, { "epoch": 0.2214974860714771, "grad_norm": 1.7604225873947144, "learning_rate": 0.0009723298002445985, "loss": 3.5528, "step": 3260 }, { "epoch": 0.22183720614213887, "grad_norm": 1.429775357246399, "learning_rate": 0.0009722873352357658, "loss": 3.5951, "step": 3265 }, { "epoch": 0.22217692621280066, "grad_norm": 1.9229302406311035, "learning_rate": 0.000972244870226933, "loss": 3.7301, "step": 3270 }, { "epoch": 0.22251664628346243, "grad_norm": 1.678483247756958, "learning_rate": 0.0009722024052181003, "loss": 3.7475, "step": 3275 }, { "epoch": 0.2228563663541242, "grad_norm": 1.3851267099380493, "learning_rate": 0.0009721599402092675, "loss": 3.3759, "step": 3280 }, { "epoch": 0.22319608642478597, "grad_norm": 1.571881890296936, "learning_rate": 0.0009721174752004348, "loss": 3.8904, "step": 3285 }, { "epoch": 0.22353580649544774, "grad_norm": 2.4955577850341797, "learning_rate": 0.0009720750101916022, "loss": 3.5888, "step": 3290 }, { "epoch": 0.22387552656610954, "grad_norm": 1.9091697931289673, "learning_rate": 0.0009720325451827694, "loss": 3.3422, "step": 3295 }, { "epoch": 0.2242152466367713, "grad_norm": 1.5635496377944946, "learning_rate": 0.0009719900801739367, "loss": 3.4886, "step": 3300 }, { "epoch": 0.22455496670743308, "grad_norm": 1.4564707279205322, "learning_rate": 0.000971947615165104, "loss": 3.8156, "step": 3305 }, { "epoch": 0.22489468677809485, "grad_norm": 1.6423358917236328, "learning_rate": 0.0009719051501562712, "loss": 3.2523, "step": 3310 }, { "epoch": 0.22523440684875662, "grad_norm": 1.7942570447921753, "learning_rate": 0.0009718626851474386, "loss": 3.4706, "step": 3315 }, { "epoch": 0.2255741269194184, "grad_norm": 1.7438079118728638, "learning_rate": 0.0009718202201386058, "loss": 3.5641, "step": 3320 }, { "epoch": 0.22591384699008019, "grad_norm": 1.6157587766647339, "learning_rate": 0.0009717777551297731, "loss": 3.4285, "step": 3325 }, { "epoch": 0.22625356706074196, "grad_norm": 1.381119728088379, "learning_rate": 0.0009717352901209404, "loss": 3.6949, "step": 3330 }, { "epoch": 0.22659328713140373, "grad_norm": 1.3827497959136963, "learning_rate": 0.0009716928251121077, "loss": 3.5044, "step": 3335 }, { "epoch": 0.2269330072020655, "grad_norm": 2.809739351272583, "learning_rate": 0.0009716503601032749, "loss": 3.4541, "step": 3340 }, { "epoch": 0.22727272727272727, "grad_norm": 1.5545117855072021, "learning_rate": 0.0009716078950944422, "loss": 3.6034, "step": 3345 }, { "epoch": 0.22761244734338903, "grad_norm": 1.4766197204589844, "learning_rate": 0.0009715654300856095, "loss": 3.6981, "step": 3350 }, { "epoch": 0.22795216741405083, "grad_norm": 1.5063202381134033, "learning_rate": 0.0009715229650767767, "loss": 3.5359, "step": 3355 }, { "epoch": 0.2282918874847126, "grad_norm": 1.227472186088562, "learning_rate": 0.0009714805000679441, "loss": 3.6055, "step": 3360 }, { "epoch": 0.22863160755537437, "grad_norm": 1.6416360139846802, "learning_rate": 0.0009714380350591114, "loss": 3.7695, "step": 3365 }, { "epoch": 0.22897132762603614, "grad_norm": 1.8123581409454346, "learning_rate": 0.0009713955700502786, "loss": 3.6336, "step": 3370 }, { "epoch": 0.2293110476966979, "grad_norm": 1.4352260828018188, "learning_rate": 0.0009713531050414459, "loss": 3.6829, "step": 3375 }, { "epoch": 0.2296507677673597, "grad_norm": 1.831451416015625, "learning_rate": 0.0009713106400326131, "loss": 3.5827, "step": 3380 }, { "epoch": 0.22999048783802148, "grad_norm": 1.5248678922653198, "learning_rate": 0.0009712681750237804, "loss": 3.6143, "step": 3385 }, { "epoch": 0.23033020790868325, "grad_norm": 1.574579119682312, "learning_rate": 0.0009712257100149477, "loss": 3.4681, "step": 3390 }, { "epoch": 0.23066992797934502, "grad_norm": 1.5387446880340576, "learning_rate": 0.000971183245006115, "loss": 3.6486, "step": 3395 }, { "epoch": 0.2310096480500068, "grad_norm": 1.8942196369171143, "learning_rate": 0.0009711407799972823, "loss": 3.5279, "step": 3400 }, { "epoch": 0.23134936812066856, "grad_norm": 1.609013319015503, "learning_rate": 0.0009710983149884496, "loss": 3.7229, "step": 3405 }, { "epoch": 0.23168908819133036, "grad_norm": 1.6041289567947388, "learning_rate": 0.0009710558499796168, "loss": 3.592, "step": 3410 }, { "epoch": 0.23202880826199213, "grad_norm": 1.976387619972229, "learning_rate": 0.000971013384970784, "loss": 3.7059, "step": 3415 }, { "epoch": 0.2323685283326539, "grad_norm": 1.3025404214859009, "learning_rate": 0.0009709709199619514, "loss": 3.6626, "step": 3420 }, { "epoch": 0.23270824840331567, "grad_norm": 1.6550575494766235, "learning_rate": 0.0009709284549531186, "loss": 3.7363, "step": 3425 }, { "epoch": 0.23304796847397743, "grad_norm": 1.7177534103393555, "learning_rate": 0.0009708859899442859, "loss": 3.5904, "step": 3430 }, { "epoch": 0.2333876885446392, "grad_norm": 1.1901168823242188, "learning_rate": 0.0009708435249354533, "loss": 3.693, "step": 3435 }, { "epoch": 0.233727408615301, "grad_norm": 1.8354909420013428, "learning_rate": 0.0009708010599266205, "loss": 3.4001, "step": 3440 }, { "epoch": 0.23406712868596277, "grad_norm": 2.0355570316314697, "learning_rate": 0.0009707585949177877, "loss": 3.5455, "step": 3445 }, { "epoch": 0.23440684875662454, "grad_norm": 1.825156331062317, "learning_rate": 0.0009707161299089551, "loss": 3.4719, "step": 3450 }, { "epoch": 0.2347465688272863, "grad_norm": 1.9083362817764282, "learning_rate": 0.0009706736649001223, "loss": 3.6803, "step": 3455 }, { "epoch": 0.23508628889794808, "grad_norm": 1.7045162916183472, "learning_rate": 0.0009706311998912895, "loss": 3.599, "step": 3460 }, { "epoch": 0.23542600896860988, "grad_norm": 1.3844332695007324, "learning_rate": 0.000970588734882457, "loss": 3.7629, "step": 3465 }, { "epoch": 0.23576572903927165, "grad_norm": 1.634311556816101, "learning_rate": 0.0009705462698736242, "loss": 3.5733, "step": 3470 }, { "epoch": 0.23610544910993342, "grad_norm": 1.903748869895935, "learning_rate": 0.0009705038048647914, "loss": 3.5911, "step": 3475 }, { "epoch": 0.2364451691805952, "grad_norm": 2.0329222679138184, "learning_rate": 0.0009704613398559587, "loss": 3.7539, "step": 3480 }, { "epoch": 0.23678488925125696, "grad_norm": 1.6105543375015259, "learning_rate": 0.000970418874847126, "loss": 3.6888, "step": 3485 }, { "epoch": 0.23712460932191873, "grad_norm": 1.507226824760437, "learning_rate": 0.0009703764098382932, "loss": 3.425, "step": 3490 }, { "epoch": 0.23746432939258053, "grad_norm": 1.4066369533538818, "learning_rate": 0.0009703339448294605, "loss": 3.4874, "step": 3495 }, { "epoch": 0.2378040494632423, "grad_norm": 1.6370975971221924, "learning_rate": 0.0009702914798206279, "loss": 3.6423, "step": 3500 }, { "epoch": 0.23814376953390406, "grad_norm": 2.046907663345337, "learning_rate": 0.0009702490148117951, "loss": 3.6404, "step": 3505 }, { "epoch": 0.23848348960456583, "grad_norm": 2.1867551803588867, "learning_rate": 0.0009702065498029624, "loss": 3.5402, "step": 3510 }, { "epoch": 0.2388232096752276, "grad_norm": 1.517886757850647, "learning_rate": 0.0009701640847941296, "loss": 3.6269, "step": 3515 }, { "epoch": 0.23916292974588937, "grad_norm": 1.554821252822876, "learning_rate": 0.0009701216197852969, "loss": 3.5487, "step": 3520 }, { "epoch": 0.23950264981655117, "grad_norm": 1.4033831357955933, "learning_rate": 0.0009700791547764642, "loss": 3.6614, "step": 3525 }, { "epoch": 0.23984236988721294, "grad_norm": 1.6513313055038452, "learning_rate": 0.0009700366897676314, "loss": 3.5866, "step": 3530 }, { "epoch": 0.2401820899578747, "grad_norm": 1.5661497116088867, "learning_rate": 0.0009699942247587988, "loss": 3.4566, "step": 3535 }, { "epoch": 0.24052181002853648, "grad_norm": 1.7507997751235962, "learning_rate": 0.0009699517597499661, "loss": 3.5812, "step": 3540 }, { "epoch": 0.24086153009919825, "grad_norm": 1.8380593061447144, "learning_rate": 0.0009699092947411333, "loss": 3.6883, "step": 3545 }, { "epoch": 0.24120125016986005, "grad_norm": 2.0151917934417725, "learning_rate": 0.0009698668297323006, "loss": 3.8053, "step": 3550 }, { "epoch": 0.24154097024052182, "grad_norm": 1.4926338195800781, "learning_rate": 0.0009698243647234679, "loss": 3.7723, "step": 3555 }, { "epoch": 0.2418806903111836, "grad_norm": 1.9758594036102295, "learning_rate": 0.0009697818997146351, "loss": 3.5305, "step": 3560 }, { "epoch": 0.24222041038184536, "grad_norm": 1.5721960067749023, "learning_rate": 0.0009697394347058023, "loss": 3.5298, "step": 3565 }, { "epoch": 0.24256013045250713, "grad_norm": 1.8581582307815552, "learning_rate": 0.0009696969696969698, "loss": 3.4608, "step": 3570 }, { "epoch": 0.2428998505231689, "grad_norm": 1.577727198600769, "learning_rate": 0.000969654504688137, "loss": 3.7079, "step": 3575 }, { "epoch": 0.2432395705938307, "grad_norm": 1.467142105102539, "learning_rate": 0.0009696120396793042, "loss": 3.6655, "step": 3580 }, { "epoch": 0.24357929066449246, "grad_norm": 1.511665940284729, "learning_rate": 0.0009695695746704716, "loss": 3.7597, "step": 3585 }, { "epoch": 0.24391901073515423, "grad_norm": 1.8030650615692139, "learning_rate": 0.0009695271096616388, "loss": 3.4416, "step": 3590 }, { "epoch": 0.244258730805816, "grad_norm": 1.7697151899337769, "learning_rate": 0.000969484644652806, "loss": 3.5827, "step": 3595 }, { "epoch": 0.24459845087647777, "grad_norm": 1.7617601156234741, "learning_rate": 0.0009694421796439735, "loss": 3.4632, "step": 3600 }, { "epoch": 0.24493817094713954, "grad_norm": 1.9390262365341187, "learning_rate": 0.0009693997146351407, "loss": 3.6195, "step": 3605 }, { "epoch": 0.24527789101780134, "grad_norm": 2.0591506958007812, "learning_rate": 0.0009693572496263079, "loss": 3.6436, "step": 3610 }, { "epoch": 0.2456176110884631, "grad_norm": 1.4490967988967896, "learning_rate": 0.0009693147846174752, "loss": 3.832, "step": 3615 }, { "epoch": 0.24595733115912488, "grad_norm": 1.4150968790054321, "learning_rate": 0.0009692723196086425, "loss": 3.4359, "step": 3620 }, { "epoch": 0.24629705122978665, "grad_norm": 2.152158260345459, "learning_rate": 0.0009692298545998097, "loss": 3.5811, "step": 3625 }, { "epoch": 0.24663677130044842, "grad_norm": 1.7295671701431274, "learning_rate": 0.000969187389590977, "loss": 3.6264, "step": 3630 }, { "epoch": 0.24697649137111022, "grad_norm": 1.724432110786438, "learning_rate": 0.0009691449245821444, "loss": 3.701, "step": 3635 }, { "epoch": 0.247316211441772, "grad_norm": 1.67739737033844, "learning_rate": 0.0009691024595733116, "loss": 3.7374, "step": 3640 }, { "epoch": 0.24765593151243376, "grad_norm": 1.7445039749145508, "learning_rate": 0.0009690599945644789, "loss": 3.8403, "step": 3645 }, { "epoch": 0.24799565158309553, "grad_norm": 1.5156872272491455, "learning_rate": 0.0009690175295556462, "loss": 3.5905, "step": 3650 }, { "epoch": 0.2483353716537573, "grad_norm": 1.6233971118927002, "learning_rate": 0.0009689750645468135, "loss": 3.3996, "step": 3655 }, { "epoch": 0.24867509172441907, "grad_norm": 1.7899446487426758, "learning_rate": 0.0009689325995379807, "loss": 3.5067, "step": 3660 }, { "epoch": 0.24901481179508086, "grad_norm": 1.6112663745880127, "learning_rate": 0.0009688901345291479, "loss": 3.4837, "step": 3665 }, { "epoch": 0.24935453186574263, "grad_norm": 1.7469441890716553, "learning_rate": 0.0009688476695203154, "loss": 3.6904, "step": 3670 }, { "epoch": 0.2496942519364044, "grad_norm": 2.3145127296447754, "learning_rate": 0.0009688052045114826, "loss": 3.8847, "step": 3675 }, { "epoch": 0.2500339720070662, "grad_norm": 1.4789059162139893, "learning_rate": 0.0009687627395026498, "loss": 3.6061, "step": 3680 }, { "epoch": 0.25037369207772797, "grad_norm": 1.709417462348938, "learning_rate": 0.0009687202744938172, "loss": 3.5621, "step": 3685 }, { "epoch": 0.2507134121483897, "grad_norm": 1.5420328378677368, "learning_rate": 0.0009686778094849844, "loss": 3.5282, "step": 3690 }, { "epoch": 0.2510531322190515, "grad_norm": 1.5241730213165283, "learning_rate": 0.0009686353444761516, "loss": 3.7085, "step": 3695 }, { "epoch": 0.25139285228971325, "grad_norm": 2.122814178466797, "learning_rate": 0.000968592879467319, "loss": 3.7586, "step": 3700 }, { "epoch": 0.25173257236037505, "grad_norm": 1.407950758934021, "learning_rate": 0.0009685504144584863, "loss": 3.6128, "step": 3705 }, { "epoch": 0.25207229243103685, "grad_norm": 1.8056846857070923, "learning_rate": 0.0009685079494496535, "loss": 3.5938, "step": 3710 }, { "epoch": 0.2524120125016986, "grad_norm": 1.9822911024093628, "learning_rate": 0.0009684654844408209, "loss": 3.5664, "step": 3715 }, { "epoch": 0.2527517325723604, "grad_norm": 1.328016757965088, "learning_rate": 0.0009684230194319881, "loss": 3.5697, "step": 3720 }, { "epoch": 0.25309145264302213, "grad_norm": 1.3940551280975342, "learning_rate": 0.0009683805544231553, "loss": 3.6419, "step": 3725 }, { "epoch": 0.2534311727136839, "grad_norm": 1.7653921842575073, "learning_rate": 0.0009683380894143226, "loss": 3.8411, "step": 3730 }, { "epoch": 0.2537708927843457, "grad_norm": 1.643896222114563, "learning_rate": 0.0009682956244054899, "loss": 3.5884, "step": 3735 }, { "epoch": 0.25411061285500747, "grad_norm": 1.7140260934829712, "learning_rate": 0.0009682531593966572, "loss": 3.6185, "step": 3740 }, { "epoch": 0.25445033292566926, "grad_norm": 2.233377456665039, "learning_rate": 0.0009682106943878245, "loss": 3.4999, "step": 3745 }, { "epoch": 0.254790052996331, "grad_norm": 1.4244579076766968, "learning_rate": 0.0009681682293789918, "loss": 3.3353, "step": 3750 }, { "epoch": 0.2551297730669928, "grad_norm": 2.051297903060913, "learning_rate": 0.000968125764370159, "loss": 3.7613, "step": 3755 }, { "epoch": 0.25546949313765455, "grad_norm": 1.6325483322143555, "learning_rate": 0.0009680832993613263, "loss": 3.6198, "step": 3760 }, { "epoch": 0.25580921320831634, "grad_norm": 1.3144327402114868, "learning_rate": 0.0009680408343524935, "loss": 3.5509, "step": 3765 }, { "epoch": 0.25614893327897814, "grad_norm": 1.784635305404663, "learning_rate": 0.0009679983693436608, "loss": 3.576, "step": 3770 }, { "epoch": 0.2564886533496399, "grad_norm": 1.3674687147140503, "learning_rate": 0.0009679559043348282, "loss": 3.5237, "step": 3775 }, { "epoch": 0.2568283734203017, "grad_norm": 1.7412995100021362, "learning_rate": 0.0009679134393259954, "loss": 3.7595, "step": 3780 }, { "epoch": 0.2571680934909634, "grad_norm": 1.4789847135543823, "learning_rate": 0.0009678709743171627, "loss": 3.6404, "step": 3785 }, { "epoch": 0.2575078135616252, "grad_norm": 1.5251604318618774, "learning_rate": 0.00096782850930833, "loss": 3.7505, "step": 3790 }, { "epoch": 0.257847533632287, "grad_norm": 1.8849817514419556, "learning_rate": 0.0009677860442994972, "loss": 3.5519, "step": 3795 }, { "epoch": 0.25818725370294876, "grad_norm": 1.5429587364196777, "learning_rate": 0.0009677435792906644, "loss": 3.521, "step": 3800 }, { "epoch": 0.25852697377361056, "grad_norm": 2.0649185180664062, "learning_rate": 0.0009677011142818318, "loss": 3.5655, "step": 3805 }, { "epoch": 0.2588666938442723, "grad_norm": 2.04229474067688, "learning_rate": 0.0009676586492729991, "loss": 3.5545, "step": 3810 }, { "epoch": 0.2592064139149341, "grad_norm": 1.4425851106643677, "learning_rate": 0.0009676161842641663, "loss": 3.6083, "step": 3815 }, { "epoch": 0.2595461339855959, "grad_norm": 1.8348979949951172, "learning_rate": 0.0009675737192553337, "loss": 3.4537, "step": 3820 }, { "epoch": 0.25988585405625764, "grad_norm": 1.6825437545776367, "learning_rate": 0.0009675312542465009, "loss": 3.7005, "step": 3825 }, { "epoch": 0.26022557412691943, "grad_norm": 2.177771806716919, "learning_rate": 0.0009674887892376681, "loss": 3.4438, "step": 3830 }, { "epoch": 0.2605652941975812, "grad_norm": 1.6740189790725708, "learning_rate": 0.0009674463242288355, "loss": 3.5675, "step": 3835 }, { "epoch": 0.260905014268243, "grad_norm": 1.547965407371521, "learning_rate": 0.0009674038592200027, "loss": 3.5831, "step": 3840 }, { "epoch": 0.2612447343389047, "grad_norm": 1.5016427040100098, "learning_rate": 0.00096736139421117, "loss": 3.8128, "step": 3845 }, { "epoch": 0.2615844544095665, "grad_norm": 1.9924545288085938, "learning_rate": 0.0009673189292023374, "loss": 3.6712, "step": 3850 }, { "epoch": 0.2619241744802283, "grad_norm": 1.7119237184524536, "learning_rate": 0.0009672764641935046, "loss": 3.5019, "step": 3855 }, { "epoch": 0.26226389455089005, "grad_norm": 2.047563314437866, "learning_rate": 0.0009672339991846718, "loss": 3.7089, "step": 3860 }, { "epoch": 0.26260361462155185, "grad_norm": 1.9158763885498047, "learning_rate": 0.0009671915341758391, "loss": 3.5196, "step": 3865 }, { "epoch": 0.2629433346922136, "grad_norm": 1.4541079998016357, "learning_rate": 0.0009671490691670064, "loss": 3.4594, "step": 3870 }, { "epoch": 0.2632830547628754, "grad_norm": 1.6790786981582642, "learning_rate": 0.0009671066041581736, "loss": 3.7876, "step": 3875 }, { "epoch": 0.2636227748335372, "grad_norm": 1.4040383100509644, "learning_rate": 0.000967064139149341, "loss": 3.7276, "step": 3880 }, { "epoch": 0.26396249490419893, "grad_norm": 1.7659581899642944, "learning_rate": 0.0009670216741405083, "loss": 3.6695, "step": 3885 }, { "epoch": 0.2643022149748607, "grad_norm": 1.3197238445281982, "learning_rate": 0.0009669792091316755, "loss": 3.6416, "step": 3890 }, { "epoch": 0.26464193504552247, "grad_norm": 1.6076154708862305, "learning_rate": 0.0009669367441228428, "loss": 3.6873, "step": 3895 }, { "epoch": 0.26498165511618427, "grad_norm": 1.7599574327468872, "learning_rate": 0.00096689427911401, "loss": 3.4283, "step": 3900 }, { "epoch": 0.26532137518684606, "grad_norm": 1.913248062133789, "learning_rate": 0.0009668518141051773, "loss": 3.925, "step": 3905 }, { "epoch": 0.2656610952575078, "grad_norm": 1.8878780603408813, "learning_rate": 0.0009668093490963446, "loss": 3.4772, "step": 3910 }, { "epoch": 0.2660008153281696, "grad_norm": 1.7494312524795532, "learning_rate": 0.0009667668840875119, "loss": 3.7985, "step": 3915 }, { "epoch": 0.26634053539883135, "grad_norm": 1.9412028789520264, "learning_rate": 0.0009667244190786792, "loss": 3.6923, "step": 3920 }, { "epoch": 0.26668025546949314, "grad_norm": 1.7971562147140503, "learning_rate": 0.0009666819540698465, "loss": 3.6432, "step": 3925 }, { "epoch": 0.2670199755401549, "grad_norm": 1.655128836631775, "learning_rate": 0.0009666394890610137, "loss": 3.4485, "step": 3930 }, { "epoch": 0.2673596956108167, "grad_norm": 1.7047431468963623, "learning_rate": 0.000966597024052181, "loss": 3.7958, "step": 3935 }, { "epoch": 0.2676994156814785, "grad_norm": 1.3011595010757446, "learning_rate": 0.0009665545590433483, "loss": 3.6709, "step": 3940 }, { "epoch": 0.2680391357521402, "grad_norm": 1.818697452545166, "learning_rate": 0.0009665120940345155, "loss": 3.5815, "step": 3945 }, { "epoch": 0.268378855822802, "grad_norm": 1.6471412181854248, "learning_rate": 0.0009664696290256829, "loss": 3.8168, "step": 3950 }, { "epoch": 0.26871857589346376, "grad_norm": 1.7347959280014038, "learning_rate": 0.0009664271640168502, "loss": 3.3985, "step": 3955 }, { "epoch": 0.26905829596412556, "grad_norm": 1.4409840106964111, "learning_rate": 0.0009663846990080174, "loss": 3.6421, "step": 3960 }, { "epoch": 0.26939801603478736, "grad_norm": 1.8012142181396484, "learning_rate": 0.0009663422339991846, "loss": 3.8372, "step": 3965 }, { "epoch": 0.2697377361054491, "grad_norm": 1.6555296182632446, "learning_rate": 0.000966299768990352, "loss": 3.4977, "step": 3970 }, { "epoch": 0.2700774561761109, "grad_norm": 1.7173460721969604, "learning_rate": 0.0009662573039815192, "loss": 3.8755, "step": 3975 }, { "epoch": 0.27041717624677264, "grad_norm": 1.6031782627105713, "learning_rate": 0.0009662148389726864, "loss": 3.5847, "step": 3980 }, { "epoch": 0.27075689631743444, "grad_norm": 1.401755690574646, "learning_rate": 0.0009661723739638539, "loss": 3.8421, "step": 3985 }, { "epoch": 0.27109661638809623, "grad_norm": 1.3278707265853882, "learning_rate": 0.0009661299089550211, "loss": 3.6455, "step": 3990 }, { "epoch": 0.271436336458758, "grad_norm": 1.7201694250106812, "learning_rate": 0.0009660874439461884, "loss": 3.5649, "step": 3995 }, { "epoch": 0.2717760565294198, "grad_norm": 1.8815011978149414, "learning_rate": 0.0009660449789373557, "loss": 3.9249, "step": 4000 }, { "epoch": 0.2721157766000815, "grad_norm": 1.5198564529418945, "learning_rate": 0.0009660025139285229, "loss": 3.6623, "step": 4005 }, { "epoch": 0.2724554966707433, "grad_norm": 1.4931174516677856, "learning_rate": 0.0009659600489196902, "loss": 3.5131, "step": 4010 }, { "epoch": 0.27279521674140506, "grad_norm": 1.9197564125061035, "learning_rate": 0.0009659175839108574, "loss": 3.8445, "step": 4015 }, { "epoch": 0.27313493681206685, "grad_norm": 1.6390297412872314, "learning_rate": 0.0009658751189020248, "loss": 3.5625, "step": 4020 }, { "epoch": 0.27347465688272865, "grad_norm": 1.6569371223449707, "learning_rate": 0.0009658326538931921, "loss": 3.4895, "step": 4025 }, { "epoch": 0.2738143769533904, "grad_norm": 1.63228440284729, "learning_rate": 0.0009657901888843593, "loss": 3.4045, "step": 4030 }, { "epoch": 0.2741540970240522, "grad_norm": 1.5773730278015137, "learning_rate": 0.0009657477238755266, "loss": 3.6429, "step": 4035 }, { "epoch": 0.27449381709471393, "grad_norm": 1.481484055519104, "learning_rate": 0.0009657052588666939, "loss": 3.4865, "step": 4040 }, { "epoch": 0.27483353716537573, "grad_norm": 1.8164265155792236, "learning_rate": 0.0009656627938578611, "loss": 3.8183, "step": 4045 }, { "epoch": 0.2751732572360375, "grad_norm": 1.9974746704101562, "learning_rate": 0.0009656203288490283, "loss": 3.6882, "step": 4050 }, { "epoch": 0.27551297730669927, "grad_norm": 1.3304774761199951, "learning_rate": 0.0009655778638401958, "loss": 3.6524, "step": 4055 }, { "epoch": 0.27585269737736107, "grad_norm": 1.78144371509552, "learning_rate": 0.000965535398831363, "loss": 3.6019, "step": 4060 }, { "epoch": 0.2761924174480228, "grad_norm": 1.9196199178695679, "learning_rate": 0.0009654929338225302, "loss": 3.566, "step": 4065 }, { "epoch": 0.2765321375186846, "grad_norm": 1.7296453714370728, "learning_rate": 0.0009654504688136976, "loss": 3.4855, "step": 4070 }, { "epoch": 0.2768718575893464, "grad_norm": 1.551680088043213, "learning_rate": 0.0009654080038048648, "loss": 3.7251, "step": 4075 }, { "epoch": 0.27721157766000815, "grad_norm": 1.5293211936950684, "learning_rate": 0.000965365538796032, "loss": 3.6803, "step": 4080 }, { "epoch": 0.27755129773066994, "grad_norm": 1.6264288425445557, "learning_rate": 0.0009653230737871994, "loss": 3.3374, "step": 4085 }, { "epoch": 0.2778910178013317, "grad_norm": 1.5331319570541382, "learning_rate": 0.0009652806087783667, "loss": 3.5872, "step": 4090 }, { "epoch": 0.2782307378719935, "grad_norm": 2.0460410118103027, "learning_rate": 0.0009652381437695339, "loss": 3.6339, "step": 4095 }, { "epoch": 0.2785704579426552, "grad_norm": 1.5382052659988403, "learning_rate": 0.0009651956787607013, "loss": 3.6379, "step": 4100 }, { "epoch": 0.278910178013317, "grad_norm": 1.3795344829559326, "learning_rate": 0.0009651532137518685, "loss": 3.4593, "step": 4105 }, { "epoch": 0.2792498980839788, "grad_norm": 1.5717251300811768, "learning_rate": 0.0009651107487430357, "loss": 3.6838, "step": 4110 }, { "epoch": 0.27958961815464056, "grad_norm": 1.579990267753601, "learning_rate": 0.000965068283734203, "loss": 3.6985, "step": 4115 }, { "epoch": 0.27992933822530236, "grad_norm": 1.3008579015731812, "learning_rate": 0.0009650258187253703, "loss": 3.5882, "step": 4120 }, { "epoch": 0.2802690582959641, "grad_norm": 1.7178348302841187, "learning_rate": 0.0009649833537165376, "loss": 3.5098, "step": 4125 }, { "epoch": 0.2806087783666259, "grad_norm": 2.0299477577209473, "learning_rate": 0.0009649408887077049, "loss": 3.6209, "step": 4130 }, { "epoch": 0.2809484984372877, "grad_norm": 1.343109369277954, "learning_rate": 0.0009648984236988722, "loss": 3.5313, "step": 4135 }, { "epoch": 0.28128821850794944, "grad_norm": 1.4102855920791626, "learning_rate": 0.0009648559586900394, "loss": 3.5522, "step": 4140 }, { "epoch": 0.28162793857861124, "grad_norm": 1.670336127281189, "learning_rate": 0.0009648134936812067, "loss": 3.5512, "step": 4145 }, { "epoch": 0.281967658649273, "grad_norm": 2.0221920013427734, "learning_rate": 0.000964771028672374, "loss": 3.8898, "step": 4150 }, { "epoch": 0.2823073787199348, "grad_norm": 1.6254242658615112, "learning_rate": 0.0009647285636635412, "loss": 3.7428, "step": 4155 }, { "epoch": 0.2826470987905966, "grad_norm": 1.6353269815444946, "learning_rate": 0.0009646860986547086, "loss": 3.6491, "step": 4160 }, { "epoch": 0.2829868188612583, "grad_norm": 1.6046712398529053, "learning_rate": 0.0009646436336458758, "loss": 3.3938, "step": 4165 }, { "epoch": 0.2833265389319201, "grad_norm": 1.4019941091537476, "learning_rate": 0.0009646011686370431, "loss": 3.5599, "step": 4170 }, { "epoch": 0.28366625900258186, "grad_norm": 1.75790274143219, "learning_rate": 0.0009645587036282104, "loss": 3.6403, "step": 4175 }, { "epoch": 0.28400597907324365, "grad_norm": 2.0541579723358154, "learning_rate": 0.0009645162386193776, "loss": 3.1636, "step": 4180 }, { "epoch": 0.2843456991439054, "grad_norm": 1.6164199113845825, "learning_rate": 0.0009644737736105449, "loss": 3.6885, "step": 4185 }, { "epoch": 0.2846854192145672, "grad_norm": 1.9806115627288818, "learning_rate": 0.0009644313086017123, "loss": 3.4432, "step": 4190 }, { "epoch": 0.285025139285229, "grad_norm": 1.8732802867889404, "learning_rate": 0.0009643888435928795, "loss": 3.6186, "step": 4195 }, { "epoch": 0.28536485935589073, "grad_norm": 1.5745054483413696, "learning_rate": 0.0009643463785840467, "loss": 3.5789, "step": 4200 }, { "epoch": 0.28570457942655253, "grad_norm": 1.446146011352539, "learning_rate": 0.0009643039135752141, "loss": 3.7719, "step": 4205 }, { "epoch": 0.28604429949721427, "grad_norm": 1.7768806219100952, "learning_rate": 0.0009642614485663813, "loss": 3.6499, "step": 4210 }, { "epoch": 0.28638401956787607, "grad_norm": 1.832388997077942, "learning_rate": 0.0009642189835575485, "loss": 3.6779, "step": 4215 }, { "epoch": 0.28672373963853787, "grad_norm": 1.8642613887786865, "learning_rate": 0.0009641765185487159, "loss": 3.8679, "step": 4220 }, { "epoch": 0.2870634597091996, "grad_norm": 1.821063756942749, "learning_rate": 0.0009641340535398832, "loss": 3.7495, "step": 4225 }, { "epoch": 0.2874031797798614, "grad_norm": 1.6552175283432007, "learning_rate": 0.0009640915885310504, "loss": 3.6199, "step": 4230 }, { "epoch": 0.28774289985052315, "grad_norm": 1.2822569608688354, "learning_rate": 0.0009640491235222178, "loss": 3.3997, "step": 4235 }, { "epoch": 0.28808261992118495, "grad_norm": 1.6946666240692139, "learning_rate": 0.000964006658513385, "loss": 3.859, "step": 4240 }, { "epoch": 0.28842233999184674, "grad_norm": 1.3912640810012817, "learning_rate": 0.0009639641935045522, "loss": 3.6674, "step": 4245 }, { "epoch": 0.2887620600625085, "grad_norm": 1.8483341932296753, "learning_rate": 0.0009639217284957195, "loss": 3.557, "step": 4250 }, { "epoch": 0.2891017801331703, "grad_norm": 1.3110934495925903, "learning_rate": 0.0009638792634868868, "loss": 3.4674, "step": 4255 }, { "epoch": 0.289441500203832, "grad_norm": 1.8890424966812134, "learning_rate": 0.0009638367984780541, "loss": 3.8876, "step": 4260 }, { "epoch": 0.2897812202744938, "grad_norm": 1.6960710287094116, "learning_rate": 0.0009637943334692214, "loss": 3.5137, "step": 4265 }, { "epoch": 0.29012094034515556, "grad_norm": 1.8817591667175293, "learning_rate": 0.0009637518684603887, "loss": 3.6519, "step": 4270 }, { "epoch": 0.29046066041581736, "grad_norm": 1.8566970825195312, "learning_rate": 0.0009637094034515559, "loss": 3.6941, "step": 4275 }, { "epoch": 0.29080038048647916, "grad_norm": 2.264664649963379, "learning_rate": 0.0009636669384427232, "loss": 3.5708, "step": 4280 }, { "epoch": 0.2911401005571409, "grad_norm": 1.3475514650344849, "learning_rate": 0.0009636244734338905, "loss": 3.7303, "step": 4285 }, { "epoch": 0.2914798206278027, "grad_norm": 1.7354192733764648, "learning_rate": 0.0009635820084250577, "loss": 3.546, "step": 4290 }, { "epoch": 0.29181954069846444, "grad_norm": 1.648453712463379, "learning_rate": 0.0009635395434162251, "loss": 3.8388, "step": 4295 }, { "epoch": 0.29215926076912624, "grad_norm": 1.629280924797058, "learning_rate": 0.0009634970784073923, "loss": 3.3908, "step": 4300 }, { "epoch": 0.29249898083978804, "grad_norm": 1.9828431606292725, "learning_rate": 0.0009634546133985596, "loss": 3.4265, "step": 4305 }, { "epoch": 0.2928387009104498, "grad_norm": 1.9391262531280518, "learning_rate": 0.0009634121483897269, "loss": 3.7146, "step": 4310 }, { "epoch": 0.2931784209811116, "grad_norm": 2.23972749710083, "learning_rate": 0.0009633696833808941, "loss": 3.4829, "step": 4315 }, { "epoch": 0.2935181410517733, "grad_norm": 1.7528808116912842, "learning_rate": 0.0009633272183720614, "loss": 3.6739, "step": 4320 }, { "epoch": 0.2938578611224351, "grad_norm": 1.6169466972351074, "learning_rate": 0.0009632847533632287, "loss": 3.5937, "step": 4325 }, { "epoch": 0.2941975811930969, "grad_norm": 1.7219630479812622, "learning_rate": 0.000963242288354396, "loss": 3.5625, "step": 4330 }, { "epoch": 0.29453730126375866, "grad_norm": 1.7854793071746826, "learning_rate": 0.0009631998233455634, "loss": 3.833, "step": 4335 }, { "epoch": 0.29487702133442045, "grad_norm": 2.0168633460998535, "learning_rate": 0.0009631573583367306, "loss": 3.6145, "step": 4340 }, { "epoch": 0.2952167414050822, "grad_norm": 1.585697889328003, "learning_rate": 0.0009631148933278978, "loss": 3.1314, "step": 4345 }, { "epoch": 0.295556461475744, "grad_norm": 1.606757402420044, "learning_rate": 0.0009630724283190651, "loss": 3.4589, "step": 4350 }, { "epoch": 0.29589618154640573, "grad_norm": 1.6595755815505981, "learning_rate": 0.0009630299633102324, "loss": 3.6355, "step": 4355 }, { "epoch": 0.29623590161706753, "grad_norm": 1.4556337594985962, "learning_rate": 0.0009629874983013996, "loss": 3.4913, "step": 4360 }, { "epoch": 0.29657562168772933, "grad_norm": 1.3990724086761475, "learning_rate": 0.000962945033292567, "loss": 3.7226, "step": 4365 }, { "epoch": 0.29691534175839107, "grad_norm": 1.4031081199645996, "learning_rate": 0.0009629025682837343, "loss": 3.6961, "step": 4370 }, { "epoch": 0.29725506182905287, "grad_norm": 1.8740366697311401, "learning_rate": 0.0009628601032749015, "loss": 3.6036, "step": 4375 }, { "epoch": 0.2975947818997146, "grad_norm": 1.962409496307373, "learning_rate": 0.0009628176382660688, "loss": 3.5769, "step": 4380 }, { "epoch": 0.2979345019703764, "grad_norm": 1.4516059160232544, "learning_rate": 0.0009627751732572361, "loss": 3.703, "step": 4385 }, { "epoch": 0.2982742220410382, "grad_norm": 1.7057605981826782, "learning_rate": 0.0009627327082484033, "loss": 3.7725, "step": 4390 }, { "epoch": 0.29861394211169995, "grad_norm": 1.6146397590637207, "learning_rate": 0.0009626902432395706, "loss": 3.7608, "step": 4395 }, { "epoch": 0.29895366218236175, "grad_norm": 1.8697859048843384, "learning_rate": 0.000962647778230738, "loss": 3.6636, "step": 4400 }, { "epoch": 0.2992933822530235, "grad_norm": 2.0618832111358643, "learning_rate": 0.0009626053132219052, "loss": 3.6108, "step": 4405 }, { "epoch": 0.2996331023236853, "grad_norm": 1.9511274099349976, "learning_rate": 0.0009625628482130725, "loss": 3.8363, "step": 4410 }, { "epoch": 0.2999728223943471, "grad_norm": 2.013293504714966, "learning_rate": 0.0009625203832042397, "loss": 3.7908, "step": 4415 }, { "epoch": 0.3003125424650088, "grad_norm": 2.0576136112213135, "learning_rate": 0.0009624864111971735, "loss": 3.6025, "step": 4420 }, { "epoch": 0.3006522625356706, "grad_norm": 1.3597527742385864, "learning_rate": 0.0009624439461883409, "loss": 3.7897, "step": 4425 }, { "epoch": 0.30099198260633236, "grad_norm": 1.8685541152954102, "learning_rate": 0.0009624014811795081, "loss": 3.7998, "step": 4430 }, { "epoch": 0.30133170267699416, "grad_norm": 2.3048958778381348, "learning_rate": 0.0009623590161706753, "loss": 3.8348, "step": 4435 }, { "epoch": 0.3016714227476559, "grad_norm": 1.7313216924667358, "learning_rate": 0.0009623165511618427, "loss": 3.4732, "step": 4440 }, { "epoch": 0.3020111428183177, "grad_norm": 1.6531474590301514, "learning_rate": 0.0009622740861530099, "loss": 3.5901, "step": 4445 }, { "epoch": 0.3023508628889795, "grad_norm": 1.7028967142105103, "learning_rate": 0.0009622316211441771, "loss": 3.7226, "step": 4450 }, { "epoch": 0.30269058295964124, "grad_norm": 1.3993499279022217, "learning_rate": 0.0009621891561353446, "loss": 3.3465, "step": 4455 }, { "epoch": 0.30303030303030304, "grad_norm": 1.6531774997711182, "learning_rate": 0.0009621466911265118, "loss": 3.6687, "step": 4460 }, { "epoch": 0.3033700231009648, "grad_norm": 1.4721078872680664, "learning_rate": 0.000962104226117679, "loss": 3.5636, "step": 4465 }, { "epoch": 0.3037097431716266, "grad_norm": 1.6204341650009155, "learning_rate": 0.0009620617611088464, "loss": 3.4352, "step": 4470 }, { "epoch": 0.3040494632422884, "grad_norm": 1.5493615865707397, "learning_rate": 0.0009620192961000136, "loss": 3.7168, "step": 4475 }, { "epoch": 0.3043891833129501, "grad_norm": 1.7600418329238892, "learning_rate": 0.0009619768310911808, "loss": 3.8277, "step": 4480 }, { "epoch": 0.3047289033836119, "grad_norm": 1.5024733543395996, "learning_rate": 0.0009619343660823481, "loss": 3.5137, "step": 4485 }, { "epoch": 0.30506862345427366, "grad_norm": 1.5084400177001953, "learning_rate": 0.0009618919010735155, "loss": 3.8947, "step": 4490 }, { "epoch": 0.30540834352493546, "grad_norm": 2.2310216426849365, "learning_rate": 0.0009618494360646827, "loss": 3.6601, "step": 4495 }, { "epoch": 0.30574806359559725, "grad_norm": 1.6771196126937866, "learning_rate": 0.00096180697105585, "loss": 3.6829, "step": 4500 }, { "epoch": 0.306087783666259, "grad_norm": 1.6353970766067505, "learning_rate": 0.0009617645060470173, "loss": 3.5522, "step": 4505 }, { "epoch": 0.3064275037369208, "grad_norm": 1.7752729654312134, "learning_rate": 0.0009617220410381845, "loss": 3.8385, "step": 4510 }, { "epoch": 0.30676722380758253, "grad_norm": 1.2794368267059326, "learning_rate": 0.0009616795760293518, "loss": 3.6293, "step": 4515 }, { "epoch": 0.30710694387824433, "grad_norm": 1.9600614309310913, "learning_rate": 0.000961637111020519, "loss": 3.7696, "step": 4520 }, { "epoch": 0.3074466639489061, "grad_norm": 1.8371652364730835, "learning_rate": 0.0009615946460116864, "loss": 3.4786, "step": 4525 }, { "epoch": 0.30778638401956787, "grad_norm": 2.2029101848602295, "learning_rate": 0.0009615521810028537, "loss": 3.7174, "step": 4530 }, { "epoch": 0.30812610409022967, "grad_norm": 1.8458263874053955, "learning_rate": 0.000961509715994021, "loss": 3.4452, "step": 4535 }, { "epoch": 0.3084658241608914, "grad_norm": 1.5259002447128296, "learning_rate": 0.0009614672509851883, "loss": 3.5522, "step": 4540 }, { "epoch": 0.3088055442315532, "grad_norm": 2.343777656555176, "learning_rate": 0.0009614247859763555, "loss": 3.4196, "step": 4545 }, { "epoch": 0.30914526430221495, "grad_norm": 1.616960048675537, "learning_rate": 0.0009613823209675227, "loss": 3.5186, "step": 4550 }, { "epoch": 0.30948498437287675, "grad_norm": 1.3345574140548706, "learning_rate": 0.0009613398559586901, "loss": 3.6415, "step": 4555 }, { "epoch": 0.30982470444353855, "grad_norm": 1.5231777429580688, "learning_rate": 0.0009612973909498574, "loss": 3.6529, "step": 4560 }, { "epoch": 0.3101644245142003, "grad_norm": 1.9712320566177368, "learning_rate": 0.0009612549259410246, "loss": 3.6515, "step": 4565 }, { "epoch": 0.3105041445848621, "grad_norm": 1.7112114429473877, "learning_rate": 0.000961212460932192, "loss": 3.7356, "step": 4570 }, { "epoch": 0.3108438646555238, "grad_norm": 1.840280294418335, "learning_rate": 0.0009611699959233592, "loss": 3.3969, "step": 4575 }, { "epoch": 0.3111835847261856, "grad_norm": 2.15394926071167, "learning_rate": 0.0009611275309145264, "loss": 3.9063, "step": 4580 }, { "epoch": 0.3115233047968474, "grad_norm": 1.4783368110656738, "learning_rate": 0.0009610850659056937, "loss": 3.5012, "step": 4585 }, { "epoch": 0.31186302486750916, "grad_norm": 2.0047757625579834, "learning_rate": 0.000961042600896861, "loss": 3.602, "step": 4590 }, { "epoch": 0.31220274493817096, "grad_norm": 1.5301594734191895, "learning_rate": 0.0009610001358880283, "loss": 3.621, "step": 4595 }, { "epoch": 0.3125424650088327, "grad_norm": 1.5904452800750732, "learning_rate": 0.0009609576708791956, "loss": 3.695, "step": 4600 }, { "epoch": 0.3128821850794945, "grad_norm": 1.387202262878418, "learning_rate": 0.0009609152058703629, "loss": 3.6807, "step": 4605 }, { "epoch": 0.3132219051501563, "grad_norm": 2.0242550373077393, "learning_rate": 0.0009608727408615301, "loss": 3.6242, "step": 4610 }, { "epoch": 0.31356162522081804, "grad_norm": 1.4115331172943115, "learning_rate": 0.0009608302758526974, "loss": 3.5131, "step": 4615 }, { "epoch": 0.31390134529147984, "grad_norm": 1.7458878755569458, "learning_rate": 0.0009607878108438647, "loss": 3.777, "step": 4620 }, { "epoch": 0.3142410653621416, "grad_norm": 1.5191013813018799, "learning_rate": 0.0009607453458350319, "loss": 3.6579, "step": 4625 }, { "epoch": 0.3145807854328034, "grad_norm": 1.4136855602264404, "learning_rate": 0.0009607028808261993, "loss": 3.3936, "step": 4630 }, { "epoch": 0.3149205055034651, "grad_norm": 2.086223840713501, "learning_rate": 0.0009606604158173665, "loss": 3.5601, "step": 4635 }, { "epoch": 0.3152602255741269, "grad_norm": 1.575417399406433, "learning_rate": 0.0009606179508085338, "loss": 3.7264, "step": 4640 }, { "epoch": 0.3155999456447887, "grad_norm": 1.80500328540802, "learning_rate": 0.0009605754857997011, "loss": 3.5797, "step": 4645 }, { "epoch": 0.31593966571545046, "grad_norm": 1.3692001104354858, "learning_rate": 0.0009605330207908683, "loss": 3.5693, "step": 4650 }, { "epoch": 0.31627938578611225, "grad_norm": 1.9751521348953247, "learning_rate": 0.0009604905557820356, "loss": 3.5339, "step": 4655 }, { "epoch": 0.316619105856774, "grad_norm": 1.5831488370895386, "learning_rate": 0.0009604480907732029, "loss": 3.869, "step": 4660 }, { "epoch": 0.3169588259274358, "grad_norm": 2.1987898349761963, "learning_rate": 0.0009604056257643702, "loss": 3.5806, "step": 4665 }, { "epoch": 0.3172985459980976, "grad_norm": 1.7886168956756592, "learning_rate": 0.0009603631607555375, "loss": 3.787, "step": 4670 }, { "epoch": 0.31763826606875933, "grad_norm": 1.5232422351837158, "learning_rate": 0.0009603206957467048, "loss": 3.6998, "step": 4675 }, { "epoch": 0.31797798613942113, "grad_norm": 1.4255919456481934, "learning_rate": 0.000960278230737872, "loss": 3.6531, "step": 4680 }, { "epoch": 0.3183177062100829, "grad_norm": 1.2251158952713013, "learning_rate": 0.0009602357657290392, "loss": 3.7652, "step": 4685 }, { "epoch": 0.31865742628074467, "grad_norm": 1.5682603120803833, "learning_rate": 0.0009601933007202066, "loss": 3.7353, "step": 4690 }, { "epoch": 0.31899714635140647, "grad_norm": 1.4632774591445923, "learning_rate": 0.0009601508357113738, "loss": 3.7212, "step": 4695 }, { "epoch": 0.3193368664220682, "grad_norm": 1.446112036705017, "learning_rate": 0.0009601083707025411, "loss": 3.585, "step": 4700 }, { "epoch": 0.31967658649273, "grad_norm": 1.3896945714950562, "learning_rate": 0.0009600659056937085, "loss": 3.5947, "step": 4705 }, { "epoch": 0.32001630656339175, "grad_norm": 1.9127269983291626, "learning_rate": 0.0009600234406848757, "loss": 3.4709, "step": 4710 }, { "epoch": 0.32035602663405355, "grad_norm": 1.4718868732452393, "learning_rate": 0.0009599809756760429, "loss": 3.6239, "step": 4715 }, { "epoch": 0.3206957467047153, "grad_norm": 1.4290307760238647, "learning_rate": 0.0009599385106672103, "loss": 3.7101, "step": 4720 }, { "epoch": 0.3210354667753771, "grad_norm": 1.4355024099349976, "learning_rate": 0.0009598960456583775, "loss": 3.6739, "step": 4725 }, { "epoch": 0.3213751868460389, "grad_norm": 1.5839914083480835, "learning_rate": 0.0009598535806495447, "loss": 3.492, "step": 4730 }, { "epoch": 0.3217149069167006, "grad_norm": 1.6268091201782227, "learning_rate": 0.0009598111156407122, "loss": 3.3484, "step": 4735 }, { "epoch": 0.3220546269873624, "grad_norm": 2.2000269889831543, "learning_rate": 0.0009597686506318794, "loss": 3.4255, "step": 4740 }, { "epoch": 0.32239434705802417, "grad_norm": 1.6520785093307495, "learning_rate": 0.0009597261856230466, "loss": 3.4462, "step": 4745 }, { "epoch": 0.32273406712868596, "grad_norm": 1.6519749164581299, "learning_rate": 0.0009596837206142139, "loss": 3.5746, "step": 4750 }, { "epoch": 0.32307378719934776, "grad_norm": 1.9127572774887085, "learning_rate": 0.0009596412556053812, "loss": 3.7475, "step": 4755 }, { "epoch": 0.3234135072700095, "grad_norm": 1.5854241847991943, "learning_rate": 0.0009595987905965484, "loss": 3.5259, "step": 4760 }, { "epoch": 0.3237532273406713, "grad_norm": 1.9638956785202026, "learning_rate": 0.0009595563255877157, "loss": 3.7522, "step": 4765 }, { "epoch": 0.32409294741133304, "grad_norm": 1.93758225440979, "learning_rate": 0.0009595138605788831, "loss": 3.6463, "step": 4770 }, { "epoch": 0.32443266748199484, "grad_norm": 1.4515398740768433, "learning_rate": 0.0009594713955700503, "loss": 3.4718, "step": 4775 }, { "epoch": 0.32477238755265664, "grad_norm": 1.552751064300537, "learning_rate": 0.0009594289305612176, "loss": 3.6724, "step": 4780 }, { "epoch": 0.3251121076233184, "grad_norm": 1.5661653280258179, "learning_rate": 0.0009593864655523848, "loss": 3.6288, "step": 4785 }, { "epoch": 0.3254518276939802, "grad_norm": 1.597206711769104, "learning_rate": 0.0009593440005435521, "loss": 3.4579, "step": 4790 }, { "epoch": 0.3257915477646419, "grad_norm": 1.5299304723739624, "learning_rate": 0.0009593015355347194, "loss": 3.5929, "step": 4795 }, { "epoch": 0.3261312678353037, "grad_norm": 2.04339599609375, "learning_rate": 0.0009592590705258866, "loss": 3.4826, "step": 4800 }, { "epoch": 0.32647098790596546, "grad_norm": 2.173170804977417, "learning_rate": 0.000959216605517054, "loss": 3.5618, "step": 4805 }, { "epoch": 0.32681070797662726, "grad_norm": 1.6006890535354614, "learning_rate": 0.0009591741405082213, "loss": 3.8083, "step": 4810 }, { "epoch": 0.32715042804728905, "grad_norm": 1.8995405435562134, "learning_rate": 0.0009591316754993885, "loss": 3.3341, "step": 4815 }, { "epoch": 0.3274901481179508, "grad_norm": 2.2167584896087646, "learning_rate": 0.0009590892104905557, "loss": 3.5476, "step": 4820 }, { "epoch": 0.3278298681886126, "grad_norm": 1.3474845886230469, "learning_rate": 0.0009590467454817231, "loss": 3.6705, "step": 4825 }, { "epoch": 0.32816958825927434, "grad_norm": 1.8512219190597534, "learning_rate": 0.0009590042804728903, "loss": 3.6945, "step": 4830 }, { "epoch": 0.32850930832993613, "grad_norm": 1.852658748626709, "learning_rate": 0.0009589618154640575, "loss": 3.606, "step": 4835 }, { "epoch": 0.32884902840059793, "grad_norm": 1.4881197214126587, "learning_rate": 0.000958919350455225, "loss": 3.8648, "step": 4840 }, { "epoch": 0.3291887484712597, "grad_norm": 1.704888105392456, "learning_rate": 0.0009588768854463922, "loss": 3.6608, "step": 4845 }, { "epoch": 0.32952846854192147, "grad_norm": 2.285005807876587, "learning_rate": 0.0009588344204375594, "loss": 3.6845, "step": 4850 }, { "epoch": 0.3298681886125832, "grad_norm": 1.8240395784378052, "learning_rate": 0.0009587919554287268, "loss": 3.6804, "step": 4855 }, { "epoch": 0.330207908683245, "grad_norm": 2.0072484016418457, "learning_rate": 0.000958749490419894, "loss": 3.376, "step": 4860 }, { "epoch": 0.3305476287539068, "grad_norm": 2.0886483192443848, "learning_rate": 0.0009587070254110612, "loss": 3.7438, "step": 4865 }, { "epoch": 0.33088734882456855, "grad_norm": 1.637271523475647, "learning_rate": 0.0009586645604022286, "loss": 3.8999, "step": 4870 }, { "epoch": 0.33122706889523035, "grad_norm": 1.6777197122573853, "learning_rate": 0.0009586220953933959, "loss": 3.4233, "step": 4875 }, { "epoch": 0.3315667889658921, "grad_norm": 1.9813554286956787, "learning_rate": 0.0009585796303845632, "loss": 3.3802, "step": 4880 }, { "epoch": 0.3319065090365539, "grad_norm": 1.3997517824172974, "learning_rate": 0.0009585371653757304, "loss": 3.6824, "step": 4885 }, { "epoch": 0.33224622910721563, "grad_norm": 1.9375154972076416, "learning_rate": 0.0009584947003668977, "loss": 3.5905, "step": 4890 }, { "epoch": 0.3325859491778774, "grad_norm": 1.5592892169952393, "learning_rate": 0.000958452235358065, "loss": 3.9367, "step": 4895 }, { "epoch": 0.3329256692485392, "grad_norm": 1.868634819984436, "learning_rate": 0.0009584097703492322, "loss": 3.691, "step": 4900 }, { "epoch": 0.33326538931920097, "grad_norm": 1.94418203830719, "learning_rate": 0.0009583673053403995, "loss": 3.566, "step": 4905 }, { "epoch": 0.33360510938986276, "grad_norm": 1.4222511053085327, "learning_rate": 0.0009583248403315669, "loss": 3.7224, "step": 4910 }, { "epoch": 0.3339448294605245, "grad_norm": 1.8559449911117554, "learning_rate": 0.0009582823753227341, "loss": 3.7144, "step": 4915 }, { "epoch": 0.3342845495311863, "grad_norm": 1.84160578250885, "learning_rate": 0.0009582399103139014, "loss": 3.5189, "step": 4920 }, { "epoch": 0.3346242696018481, "grad_norm": 1.5377627611160278, "learning_rate": 0.0009581974453050687, "loss": 3.8692, "step": 4925 }, { "epoch": 0.33496398967250984, "grad_norm": 1.7074371576309204, "learning_rate": 0.0009581549802962359, "loss": 4.1062, "step": 4930 }, { "epoch": 0.33530370974317164, "grad_norm": 1.8077152967453003, "learning_rate": 0.0009581125152874031, "loss": 3.6807, "step": 4935 }, { "epoch": 0.3356434298138334, "grad_norm": 1.4841220378875732, "learning_rate": 0.0009580700502785706, "loss": 3.4708, "step": 4940 }, { "epoch": 0.3359831498844952, "grad_norm": 1.3518234491348267, "learning_rate": 0.0009580275852697378, "loss": 3.7198, "step": 4945 }, { "epoch": 0.336322869955157, "grad_norm": 2.0118587017059326, "learning_rate": 0.000957985120260905, "loss": 3.6048, "step": 4950 }, { "epoch": 0.3366625900258187, "grad_norm": 1.2426066398620605, "learning_rate": 0.0009579426552520724, "loss": 3.7206, "step": 4955 }, { "epoch": 0.3370023100964805, "grad_norm": 1.4506278038024902, "learning_rate": 0.0009579001902432396, "loss": 3.5966, "step": 4960 }, { "epoch": 0.33734203016714226, "grad_norm": 1.2555594444274902, "learning_rate": 0.0009578577252344068, "loss": 3.5975, "step": 4965 }, { "epoch": 0.33768175023780406, "grad_norm": 2.3078200817108154, "learning_rate": 0.0009578152602255742, "loss": 3.3688, "step": 4970 }, { "epoch": 0.3380214703084658, "grad_norm": 2.2245779037475586, "learning_rate": 0.0009577727952167415, "loss": 3.5733, "step": 4975 }, { "epoch": 0.3383611903791276, "grad_norm": 1.8599636554718018, "learning_rate": 0.0009577303302079087, "loss": 3.6128, "step": 4980 }, { "epoch": 0.3387009104497894, "grad_norm": 1.55093252658844, "learning_rate": 0.000957687865199076, "loss": 3.5443, "step": 4985 }, { "epoch": 0.33904063052045114, "grad_norm": 2.1555163860321045, "learning_rate": 0.0009576454001902433, "loss": 3.6404, "step": 4990 }, { "epoch": 0.33938035059111293, "grad_norm": 1.3362329006195068, "learning_rate": 0.0009576029351814105, "loss": 3.6583, "step": 4995 }, { "epoch": 0.3397200706617747, "grad_norm": 1.7223496437072754, "learning_rate": 0.0009575604701725778, "loss": 3.5734, "step": 5000 }, { "epoch": 0.3400597907324365, "grad_norm": 1.4899802207946777, "learning_rate": 0.0009575180051637451, "loss": 3.7321, "step": 5005 }, { "epoch": 0.34039951080309827, "grad_norm": 1.8359500169754028, "learning_rate": 0.0009574755401549124, "loss": 3.5885, "step": 5010 }, { "epoch": 0.34073923087376, "grad_norm": 1.4441413879394531, "learning_rate": 0.0009574330751460797, "loss": 3.3826, "step": 5015 }, { "epoch": 0.3410789509444218, "grad_norm": 1.6429402828216553, "learning_rate": 0.000957390610137247, "loss": 3.3819, "step": 5020 }, { "epoch": 0.34141867101508355, "grad_norm": 1.8195066452026367, "learning_rate": 0.0009573481451284142, "loss": 3.3954, "step": 5025 }, { "epoch": 0.34175839108574535, "grad_norm": 1.8578399419784546, "learning_rate": 0.0009573056801195815, "loss": 3.5593, "step": 5030 }, { "epoch": 0.34209811115640715, "grad_norm": 1.9258064031600952, "learning_rate": 0.0009572632151107487, "loss": 3.4227, "step": 5035 }, { "epoch": 0.3424378312270689, "grad_norm": 2.0510997772216797, "learning_rate": 0.000957220750101916, "loss": 3.4473, "step": 5040 }, { "epoch": 0.3427775512977307, "grad_norm": 1.8113083839416504, "learning_rate": 0.0009571782850930834, "loss": 3.6772, "step": 5045 }, { "epoch": 0.34311727136839243, "grad_norm": 1.9943455457687378, "learning_rate": 0.0009571358200842506, "loss": 3.6782, "step": 5050 }, { "epoch": 0.3434569914390542, "grad_norm": 1.1487505435943604, "learning_rate": 0.0009570933550754179, "loss": 3.7977, "step": 5055 }, { "epoch": 0.34379671150971597, "grad_norm": 2.0972678661346436, "learning_rate": 0.0009570508900665852, "loss": 3.3582, "step": 5060 }, { "epoch": 0.34413643158037777, "grad_norm": 1.5616191625595093, "learning_rate": 0.0009570084250577524, "loss": 3.2203, "step": 5065 }, { "epoch": 0.34447615165103956, "grad_norm": 1.3170372247695923, "learning_rate": 0.0009569659600489196, "loss": 3.5438, "step": 5070 }, { "epoch": 0.3448158717217013, "grad_norm": 1.9400101900100708, "learning_rate": 0.000956923495040087, "loss": 3.5116, "step": 5075 }, { "epoch": 0.3451555917923631, "grad_norm": 2.191610336303711, "learning_rate": 0.0009568810300312543, "loss": 3.6071, "step": 5080 }, { "epoch": 0.34549531186302485, "grad_norm": 2.1416234970092773, "learning_rate": 0.0009568385650224215, "loss": 3.5738, "step": 5085 }, { "epoch": 0.34583503193368664, "grad_norm": 1.9102461338043213, "learning_rate": 0.0009567961000135889, "loss": 3.4701, "step": 5090 }, { "epoch": 0.34617475200434844, "grad_norm": 1.5989357233047485, "learning_rate": 0.0009567536350047561, "loss": 3.6789, "step": 5095 }, { "epoch": 0.3465144720750102, "grad_norm": 1.6801594495773315, "learning_rate": 0.0009567111699959233, "loss": 3.2716, "step": 5100 }, { "epoch": 0.346854192145672, "grad_norm": 1.423392653465271, "learning_rate": 0.0009566687049870907, "loss": 3.5152, "step": 5105 }, { "epoch": 0.3471939122163337, "grad_norm": 1.3650151491165161, "learning_rate": 0.0009566262399782579, "loss": 3.6675, "step": 5110 }, { "epoch": 0.3475336322869955, "grad_norm": 1.7052830457687378, "learning_rate": 0.0009565837749694252, "loss": 3.6769, "step": 5115 }, { "epoch": 0.3478733523576573, "grad_norm": 1.6170978546142578, "learning_rate": 0.0009565413099605926, "loss": 3.5929, "step": 5120 }, { "epoch": 0.34821307242831906, "grad_norm": 1.534998893737793, "learning_rate": 0.0009564988449517598, "loss": 3.6701, "step": 5125 }, { "epoch": 0.34855279249898086, "grad_norm": 1.4119738340377808, "learning_rate": 0.000956456379942927, "loss": 3.397, "step": 5130 }, { "epoch": 0.3488925125696426, "grad_norm": 1.5991709232330322, "learning_rate": 0.0009564139149340943, "loss": 3.4739, "step": 5135 }, { "epoch": 0.3492322326403044, "grad_norm": 1.3187782764434814, "learning_rate": 0.0009563714499252616, "loss": 3.4964, "step": 5140 }, { "epoch": 0.34957195271096614, "grad_norm": 1.589157223701477, "learning_rate": 0.0009563289849164288, "loss": 3.377, "step": 5145 }, { "epoch": 0.34991167278162794, "grad_norm": 1.541229486465454, "learning_rate": 0.0009562865199075962, "loss": 3.7868, "step": 5150 }, { "epoch": 0.35025139285228973, "grad_norm": 1.4450570344924927, "learning_rate": 0.0009562440548987635, "loss": 3.4866, "step": 5155 }, { "epoch": 0.3505911129229515, "grad_norm": 1.7800214290618896, "learning_rate": 0.0009562015898899307, "loss": 3.7113, "step": 5160 }, { "epoch": 0.3509308329936133, "grad_norm": 1.7120628356933594, "learning_rate": 0.000956159124881098, "loss": 3.6181, "step": 5165 }, { "epoch": 0.351270553064275, "grad_norm": 1.386736512184143, "learning_rate": 0.0009561166598722652, "loss": 3.5039, "step": 5170 }, { "epoch": 0.3516102731349368, "grad_norm": 1.9025224447250366, "learning_rate": 0.0009560741948634325, "loss": 3.3304, "step": 5175 }, { "epoch": 0.3519499932055986, "grad_norm": 2.372744083404541, "learning_rate": 0.0009560317298545998, "loss": 3.7123, "step": 5180 }, { "epoch": 0.35228971327626035, "grad_norm": 1.9807432889938354, "learning_rate": 0.0009559892648457671, "loss": 3.5817, "step": 5185 }, { "epoch": 0.35262943334692215, "grad_norm": 1.5918465852737427, "learning_rate": 0.0009559467998369344, "loss": 3.6833, "step": 5190 }, { "epoch": 0.3529691534175839, "grad_norm": 1.9959990978240967, "learning_rate": 0.0009559043348281017, "loss": 3.4737, "step": 5195 }, { "epoch": 0.3533088734882457, "grad_norm": 1.9834957122802734, "learning_rate": 0.0009558618698192689, "loss": 3.7348, "step": 5200 }, { "epoch": 0.3536485935589075, "grad_norm": 1.7107113599777222, "learning_rate": 0.0009558194048104362, "loss": 3.489, "step": 5205 }, { "epoch": 0.35398831362956923, "grad_norm": 1.8402115106582642, "learning_rate": 0.0009557769398016035, "loss": 3.6471, "step": 5210 }, { "epoch": 0.354328033700231, "grad_norm": 1.5516947507858276, "learning_rate": 0.0009557344747927707, "loss": 3.7709, "step": 5215 }, { "epoch": 0.35466775377089277, "grad_norm": 1.951892375946045, "learning_rate": 0.0009556920097839382, "loss": 3.7723, "step": 5220 }, { "epoch": 0.35500747384155457, "grad_norm": 1.5375219583511353, "learning_rate": 0.0009556495447751054, "loss": 3.5407, "step": 5225 }, { "epoch": 0.3553471939122163, "grad_norm": 1.432762861251831, "learning_rate": 0.0009556070797662726, "loss": 3.5936, "step": 5230 }, { "epoch": 0.3556869139828781, "grad_norm": 1.684902310371399, "learning_rate": 0.0009555646147574399, "loss": 3.3216, "step": 5235 }, { "epoch": 0.3560266340535399, "grad_norm": 1.4199302196502686, "learning_rate": 0.0009555221497486072, "loss": 3.6333, "step": 5240 }, { "epoch": 0.35636635412420165, "grad_norm": 1.7409316301345825, "learning_rate": 0.0009554796847397744, "loss": 3.5079, "step": 5245 }, { "epoch": 0.35670607419486344, "grad_norm": 1.6241892576217651, "learning_rate": 0.0009554372197309417, "loss": 3.8694, "step": 5250 }, { "epoch": 0.3570457942655252, "grad_norm": 1.4146661758422852, "learning_rate": 0.0009553947547221091, "loss": 3.4462, "step": 5255 }, { "epoch": 0.357385514336187, "grad_norm": 1.619876503944397, "learning_rate": 0.0009553522897132763, "loss": 3.5213, "step": 5260 }, { "epoch": 0.3577252344068488, "grad_norm": 1.6829078197479248, "learning_rate": 0.0009553098247044436, "loss": 3.2552, "step": 5265 }, { "epoch": 0.3580649544775105, "grad_norm": 1.6411116123199463, "learning_rate": 0.0009552673596956108, "loss": 3.5967, "step": 5270 }, { "epoch": 0.3584046745481723, "grad_norm": 1.5884488821029663, "learning_rate": 0.0009552248946867781, "loss": 3.8392, "step": 5275 }, { "epoch": 0.35874439461883406, "grad_norm": 1.5533169507980347, "learning_rate": 0.0009551824296779454, "loss": 3.653, "step": 5280 }, { "epoch": 0.35908411468949586, "grad_norm": 1.9502177238464355, "learning_rate": 0.0009551399646691126, "loss": 3.6333, "step": 5285 }, { "epoch": 0.35942383476015766, "grad_norm": 1.9683051109313965, "learning_rate": 0.00095509749966028, "loss": 3.7083, "step": 5290 }, { "epoch": 0.3597635548308194, "grad_norm": 2.1147077083587646, "learning_rate": 0.0009550550346514473, "loss": 3.7211, "step": 5295 }, { "epoch": 0.3601032749014812, "grad_norm": 1.6360818147659302, "learning_rate": 0.0009550125696426145, "loss": 3.7866, "step": 5300 }, { "epoch": 0.36044299497214294, "grad_norm": 1.4183710813522339, "learning_rate": 0.0009549701046337818, "loss": 3.7454, "step": 5305 }, { "epoch": 0.36078271504280474, "grad_norm": 1.4711469411849976, "learning_rate": 0.0009549276396249491, "loss": 3.7324, "step": 5310 }, { "epoch": 0.3611224351134665, "grad_norm": 1.6430301666259766, "learning_rate": 0.0009548851746161163, "loss": 3.8677, "step": 5315 }, { "epoch": 0.3614621551841283, "grad_norm": 1.7611005306243896, "learning_rate": 0.0009548427096072835, "loss": 3.5156, "step": 5320 }, { "epoch": 0.3618018752547901, "grad_norm": 1.701605200767517, "learning_rate": 0.000954800244598451, "loss": 3.8477, "step": 5325 }, { "epoch": 0.3621415953254518, "grad_norm": 1.4010838270187378, "learning_rate": 0.0009547577795896182, "loss": 3.6316, "step": 5330 }, { "epoch": 0.3624813153961136, "grad_norm": 2.4166698455810547, "learning_rate": 0.0009547153145807854, "loss": 3.9031, "step": 5335 }, { "epoch": 0.36282103546677535, "grad_norm": 2.0352272987365723, "learning_rate": 0.0009546728495719528, "loss": 3.5747, "step": 5340 }, { "epoch": 0.36316075553743715, "grad_norm": 1.8467684984207153, "learning_rate": 0.00095463038456312, "loss": 3.4351, "step": 5345 }, { "epoch": 0.36350047560809895, "grad_norm": 1.835755705833435, "learning_rate": 0.0009545879195542872, "loss": 3.6259, "step": 5350 }, { "epoch": 0.3638401956787607, "grad_norm": 1.3579394817352295, "learning_rate": 0.0009545454545454546, "loss": 3.3173, "step": 5355 }, { "epoch": 0.3641799157494225, "grad_norm": 1.7431646585464478, "learning_rate": 0.0009545029895366219, "loss": 3.4545, "step": 5360 }, { "epoch": 0.36451963582008423, "grad_norm": 2.1811561584472656, "learning_rate": 0.0009544605245277891, "loss": 3.7208, "step": 5365 }, { "epoch": 0.36485935589074603, "grad_norm": 1.9143980741500854, "learning_rate": 0.0009544180595189564, "loss": 3.4132, "step": 5370 }, { "epoch": 0.3651990759614078, "grad_norm": 1.5996826887130737, "learning_rate": 0.0009543755945101237, "loss": 3.5009, "step": 5375 }, { "epoch": 0.36553879603206957, "grad_norm": 1.7958800792694092, "learning_rate": 0.0009543331295012909, "loss": 3.5145, "step": 5380 }, { "epoch": 0.36587851610273137, "grad_norm": 1.4278388023376465, "learning_rate": 0.0009542906644924582, "loss": 3.6811, "step": 5385 }, { "epoch": 0.3662182361733931, "grad_norm": 1.6192870140075684, "learning_rate": 0.0009542481994836255, "loss": 3.6332, "step": 5390 }, { "epoch": 0.3665579562440549, "grad_norm": 1.5010921955108643, "learning_rate": 0.0009542057344747928, "loss": 3.4491, "step": 5395 }, { "epoch": 0.36689767631471665, "grad_norm": 1.5759670734405518, "learning_rate": 0.0009541632694659601, "loss": 3.5176, "step": 5400 }, { "epoch": 0.36723739638537845, "grad_norm": 1.9270191192626953, "learning_rate": 0.0009541208044571274, "loss": 3.4926, "step": 5405 }, { "epoch": 0.36757711645604024, "grad_norm": 1.773805022239685, "learning_rate": 0.0009540783394482946, "loss": 3.5389, "step": 5410 }, { "epoch": 0.367916836526702, "grad_norm": 1.5583146810531616, "learning_rate": 0.0009540358744394619, "loss": 3.6431, "step": 5415 }, { "epoch": 0.3682565565973638, "grad_norm": 1.4711490869522095, "learning_rate": 0.0009539934094306291, "loss": 3.5749, "step": 5420 }, { "epoch": 0.3685962766680255, "grad_norm": 2.025629997253418, "learning_rate": 0.0009539509444217964, "loss": 3.3792, "step": 5425 }, { "epoch": 0.3689359967386873, "grad_norm": 1.7150894403457642, "learning_rate": 0.0009539084794129638, "loss": 3.6011, "step": 5430 }, { "epoch": 0.3692757168093491, "grad_norm": 2.5389039516448975, "learning_rate": 0.000953866014404131, "loss": 3.6377, "step": 5435 }, { "epoch": 0.36961543688001086, "grad_norm": 1.8499678373336792, "learning_rate": 0.0009538235493952983, "loss": 3.4062, "step": 5440 }, { "epoch": 0.36995515695067266, "grad_norm": 1.7132196426391602, "learning_rate": 0.0009537810843864656, "loss": 3.6275, "step": 5445 }, { "epoch": 0.3702948770213344, "grad_norm": 1.8304625749588013, "learning_rate": 0.0009537386193776328, "loss": 3.5665, "step": 5450 }, { "epoch": 0.3706345970919962, "grad_norm": 1.631395936012268, "learning_rate": 0.0009536961543688, "loss": 3.819, "step": 5455 }, { "epoch": 0.370974317162658, "grad_norm": 1.4789533615112305, "learning_rate": 0.0009536536893599674, "loss": 3.6115, "step": 5460 }, { "epoch": 0.37131403723331974, "grad_norm": 1.9097459316253662, "learning_rate": 0.0009536112243511347, "loss": 3.4465, "step": 5465 }, { "epoch": 0.37165375730398154, "grad_norm": 2.524022102355957, "learning_rate": 0.0009535687593423019, "loss": 3.6556, "step": 5470 }, { "epoch": 0.3719934773746433, "grad_norm": 1.8448377847671509, "learning_rate": 0.0009535262943334693, "loss": 3.8707, "step": 5475 }, { "epoch": 0.3723331974453051, "grad_norm": 1.8637043237686157, "learning_rate": 0.0009534838293246365, "loss": 3.7764, "step": 5480 }, { "epoch": 0.3726729175159668, "grad_norm": 1.8092262744903564, "learning_rate": 0.0009534413643158037, "loss": 3.841, "step": 5485 }, { "epoch": 0.3730126375866286, "grad_norm": 2.529362678527832, "learning_rate": 0.0009533988993069711, "loss": 3.6604, "step": 5490 }, { "epoch": 0.3733523576572904, "grad_norm": 1.5834816694259644, "learning_rate": 0.0009533564342981383, "loss": 3.6166, "step": 5495 }, { "epoch": 0.37369207772795215, "grad_norm": 1.6027281284332275, "learning_rate": 0.0009533139692893056, "loss": 3.5428, "step": 5500 }, { "epoch": 0.37403179779861395, "grad_norm": 1.705225944519043, "learning_rate": 0.000953271504280473, "loss": 3.8466, "step": 5505 }, { "epoch": 0.3743715178692757, "grad_norm": 1.5803911685943604, "learning_rate": 0.0009532290392716402, "loss": 3.4877, "step": 5510 }, { "epoch": 0.3747112379399375, "grad_norm": 1.9594582319259644, "learning_rate": 0.0009531865742628074, "loss": 3.717, "step": 5515 }, { "epoch": 0.3750509580105993, "grad_norm": 1.5492185354232788, "learning_rate": 0.0009531441092539747, "loss": 3.4173, "step": 5520 }, { "epoch": 0.37539067808126103, "grad_norm": 1.5933808088302612, "learning_rate": 0.000953101644245142, "loss": 3.377, "step": 5525 }, { "epoch": 0.37573039815192283, "grad_norm": 2.0768401622772217, "learning_rate": 0.0009530591792363092, "loss": 3.5507, "step": 5530 }, { "epoch": 0.37607011822258457, "grad_norm": 1.3662269115447998, "learning_rate": 0.0009530167142274766, "loss": 3.878, "step": 5535 }, { "epoch": 0.37640983829324637, "grad_norm": 1.8900212049484253, "learning_rate": 0.0009529742492186439, "loss": 3.654, "step": 5540 }, { "epoch": 0.37674955836390817, "grad_norm": 1.3635151386260986, "learning_rate": 0.0009529317842098111, "loss": 3.8696, "step": 5545 }, { "epoch": 0.3770892784345699, "grad_norm": 1.3409770727157593, "learning_rate": 0.0009528893192009784, "loss": 3.9642, "step": 5550 }, { "epoch": 0.3774289985052317, "grad_norm": 1.830278992652893, "learning_rate": 0.0009528468541921457, "loss": 3.8223, "step": 5555 }, { "epoch": 0.37776871857589345, "grad_norm": 2.405775785446167, "learning_rate": 0.000952804389183313, "loss": 3.7648, "step": 5560 }, { "epoch": 0.37810843864655524, "grad_norm": 1.731757640838623, "learning_rate": 0.0009527619241744803, "loss": 3.5445, "step": 5565 }, { "epoch": 0.378448158717217, "grad_norm": 1.859444260597229, "learning_rate": 0.0009527194591656475, "loss": 3.6844, "step": 5570 }, { "epoch": 0.3787878787878788, "grad_norm": 1.5652376413345337, "learning_rate": 0.0009526769941568149, "loss": 3.4542, "step": 5575 }, { "epoch": 0.3791275988585406, "grad_norm": 1.9930014610290527, "learning_rate": 0.0009526345291479821, "loss": 3.5241, "step": 5580 }, { "epoch": 0.3794673189292023, "grad_norm": 1.7304593324661255, "learning_rate": 0.0009525920641391493, "loss": 3.5168, "step": 5585 }, { "epoch": 0.3798070389998641, "grad_norm": 1.5652704238891602, "learning_rate": 0.0009525495991303167, "loss": 3.4185, "step": 5590 }, { "epoch": 0.38014675907052586, "grad_norm": 1.952778935432434, "learning_rate": 0.0009525071341214839, "loss": 3.4699, "step": 5595 }, { "epoch": 0.38048647914118766, "grad_norm": 1.4394186735153198, "learning_rate": 0.0009524646691126512, "loss": 3.7226, "step": 5600 }, { "epoch": 0.38082619921184946, "grad_norm": 1.6469097137451172, "learning_rate": 0.0009524222041038186, "loss": 3.6495, "step": 5605 }, { "epoch": 0.3811659192825112, "grad_norm": 1.6280169486999512, "learning_rate": 0.0009523797390949858, "loss": 3.4988, "step": 5610 }, { "epoch": 0.381505639353173, "grad_norm": 1.5694022178649902, "learning_rate": 0.000952337274086153, "loss": 3.5241, "step": 5615 }, { "epoch": 0.38184535942383474, "grad_norm": 1.4661582708358765, "learning_rate": 0.0009522948090773203, "loss": 3.7033, "step": 5620 }, { "epoch": 0.38218507949449654, "grad_norm": 1.9795700311660767, "learning_rate": 0.0009522523440684876, "loss": 3.5658, "step": 5625 }, { "epoch": 0.38252479956515834, "grad_norm": 2.1982691287994385, "learning_rate": 0.0009522098790596548, "loss": 3.7128, "step": 5630 }, { "epoch": 0.3828645196358201, "grad_norm": 1.744372844696045, "learning_rate": 0.0009521674140508222, "loss": 3.5161, "step": 5635 }, { "epoch": 0.3832042397064819, "grad_norm": 1.656912088394165, "learning_rate": 0.0009521249490419895, "loss": 3.6386, "step": 5640 }, { "epoch": 0.3835439597771436, "grad_norm": 2.0462169647216797, "learning_rate": 0.0009520824840331567, "loss": 3.6641, "step": 5645 }, { "epoch": 0.3838836798478054, "grad_norm": 1.937244176864624, "learning_rate": 0.000952040019024324, "loss": 3.5274, "step": 5650 }, { "epoch": 0.38422339991846716, "grad_norm": 1.5051823854446411, "learning_rate": 0.0009519975540154913, "loss": 3.7115, "step": 5655 }, { "epoch": 0.38456311998912895, "grad_norm": 1.4647164344787598, "learning_rate": 0.0009519550890066585, "loss": 3.5542, "step": 5660 }, { "epoch": 0.38490284005979075, "grad_norm": 1.7984769344329834, "learning_rate": 0.0009519126239978258, "loss": 3.5955, "step": 5665 }, { "epoch": 0.3852425601304525, "grad_norm": 1.7603238821029663, "learning_rate": 0.0009518701589889931, "loss": 3.4589, "step": 5670 }, { "epoch": 0.3855822802011143, "grad_norm": 1.6151098012924194, "learning_rate": 0.0009518276939801604, "loss": 3.3949, "step": 5675 }, { "epoch": 0.38592200027177603, "grad_norm": 1.8881529569625854, "learning_rate": 0.0009517852289713277, "loss": 3.4561, "step": 5680 }, { "epoch": 0.38626172034243783, "grad_norm": 1.6941258907318115, "learning_rate": 0.0009517427639624949, "loss": 3.4259, "step": 5685 }, { "epoch": 0.38660144041309963, "grad_norm": 2.1984286308288574, "learning_rate": 0.0009517002989536622, "loss": 3.684, "step": 5690 }, { "epoch": 0.38694116048376137, "grad_norm": 1.5917991399765015, "learning_rate": 0.0009516578339448295, "loss": 3.5187, "step": 5695 }, { "epoch": 0.38728088055442317, "grad_norm": 2.023958444595337, "learning_rate": 0.0009516153689359967, "loss": 3.6307, "step": 5700 }, { "epoch": 0.3876206006250849, "grad_norm": 2.033766269683838, "learning_rate": 0.000951572903927164, "loss": 3.4151, "step": 5705 }, { "epoch": 0.3879603206957467, "grad_norm": 1.5124835968017578, "learning_rate": 0.0009515304389183314, "loss": 3.6428, "step": 5710 }, { "epoch": 0.3883000407664085, "grad_norm": 1.6250892877578735, "learning_rate": 0.0009514879739094986, "loss": 3.6998, "step": 5715 }, { "epoch": 0.38863976083707025, "grad_norm": 1.7705053091049194, "learning_rate": 0.0009514455089006658, "loss": 3.5534, "step": 5720 }, { "epoch": 0.38897948090773204, "grad_norm": 1.640376329421997, "learning_rate": 0.0009514030438918332, "loss": 3.8345, "step": 5725 }, { "epoch": 0.3893192009783938, "grad_norm": 1.6427613496780396, "learning_rate": 0.0009513605788830004, "loss": 3.6042, "step": 5730 }, { "epoch": 0.3896589210490556, "grad_norm": 2.2041895389556885, "learning_rate": 0.0009513181138741676, "loss": 3.6158, "step": 5735 }, { "epoch": 0.3899986411197173, "grad_norm": 1.7022197246551514, "learning_rate": 0.0009512756488653351, "loss": 3.8072, "step": 5740 }, { "epoch": 0.3903383611903791, "grad_norm": 1.4119857549667358, "learning_rate": 0.0009512331838565023, "loss": 3.602, "step": 5745 }, { "epoch": 0.3906780812610409, "grad_norm": 1.5478920936584473, "learning_rate": 0.0009511907188476695, "loss": 3.5734, "step": 5750 }, { "epoch": 0.39101780133170266, "grad_norm": 1.733489990234375, "learning_rate": 0.0009511482538388369, "loss": 3.7145, "step": 5755 }, { "epoch": 0.39135752140236446, "grad_norm": 2.734954833984375, "learning_rate": 0.0009511057888300041, "loss": 3.6471, "step": 5760 }, { "epoch": 0.3916972414730262, "grad_norm": 2.058105707168579, "learning_rate": 0.0009510633238211713, "loss": 3.4105, "step": 5765 }, { "epoch": 0.392036961543688, "grad_norm": 1.8512954711914062, "learning_rate": 0.0009510208588123386, "loss": 3.6782, "step": 5770 }, { "epoch": 0.3923766816143498, "grad_norm": 1.8585386276245117, "learning_rate": 0.000950978393803506, "loss": 3.6654, "step": 5775 }, { "epoch": 0.39271640168501154, "grad_norm": 1.933272123336792, "learning_rate": 0.0009509359287946732, "loss": 3.4257, "step": 5780 }, { "epoch": 0.39305612175567334, "grad_norm": 1.7961143255233765, "learning_rate": 0.0009508934637858405, "loss": 3.71, "step": 5785 }, { "epoch": 0.3933958418263351, "grad_norm": 1.984879732131958, "learning_rate": 0.0009508509987770078, "loss": 3.864, "step": 5790 }, { "epoch": 0.3937355618969969, "grad_norm": 1.6788439750671387, "learning_rate": 0.000950808533768175, "loss": 3.5728, "step": 5795 }, { "epoch": 0.3940752819676587, "grad_norm": 2.098679780960083, "learning_rate": 0.0009507660687593423, "loss": 3.7299, "step": 5800 }, { "epoch": 0.3944150020383204, "grad_norm": 2.0423359870910645, "learning_rate": 0.0009507236037505095, "loss": 3.8136, "step": 5805 }, { "epoch": 0.3947547221089822, "grad_norm": 1.8106367588043213, "learning_rate": 0.0009506811387416769, "loss": 3.5973, "step": 5810 }, { "epoch": 0.39509444217964396, "grad_norm": 2.3208208084106445, "learning_rate": 0.0009506386737328442, "loss": 3.2653, "step": 5815 }, { "epoch": 0.39543416225030575, "grad_norm": 1.7557055950164795, "learning_rate": 0.0009505962087240114, "loss": 3.8026, "step": 5820 }, { "epoch": 0.3957738823209675, "grad_norm": 1.7029435634613037, "learning_rate": 0.0009505537437151787, "loss": 3.7276, "step": 5825 }, { "epoch": 0.3961136023916293, "grad_norm": 2.074981689453125, "learning_rate": 0.000950511278706346, "loss": 3.478, "step": 5830 }, { "epoch": 0.3964533224622911, "grad_norm": 1.7561085224151611, "learning_rate": 0.0009504688136975132, "loss": 3.821, "step": 5835 }, { "epoch": 0.39679304253295283, "grad_norm": 1.7286807298660278, "learning_rate": 0.0009504263486886805, "loss": 3.5719, "step": 5840 }, { "epoch": 0.39713276260361463, "grad_norm": 2.191561222076416, "learning_rate": 0.0009503838836798479, "loss": 3.4892, "step": 5845 }, { "epoch": 0.3974724826742764, "grad_norm": 1.7382886409759521, "learning_rate": 0.0009503414186710151, "loss": 3.5237, "step": 5850 }, { "epoch": 0.39781220274493817, "grad_norm": 1.7540260553359985, "learning_rate": 0.0009502989536621823, "loss": 3.6351, "step": 5855 }, { "epoch": 0.39815192281559997, "grad_norm": 1.4059550762176514, "learning_rate": 0.0009502564886533497, "loss": 3.6352, "step": 5860 }, { "epoch": 0.3984916428862617, "grad_norm": 1.4953556060791016, "learning_rate": 0.0009502140236445169, "loss": 3.7767, "step": 5865 }, { "epoch": 0.3988313629569235, "grad_norm": 2.123931407928467, "learning_rate": 0.0009501715586356841, "loss": 3.535, "step": 5870 }, { "epoch": 0.39917108302758525, "grad_norm": 1.4881750345230103, "learning_rate": 0.0009501290936268515, "loss": 3.3396, "step": 5875 }, { "epoch": 0.39951080309824705, "grad_norm": 1.9908478260040283, "learning_rate": 0.0009500866286180188, "loss": 3.7538, "step": 5880 }, { "epoch": 0.39985052316890884, "grad_norm": 1.5263208150863647, "learning_rate": 0.000950044163609186, "loss": 3.9152, "step": 5885 }, { "epoch": 0.4001902432395706, "grad_norm": 1.4805376529693604, "learning_rate": 0.0009500016986003534, "loss": 3.6543, "step": 5890 }, { "epoch": 0.4005299633102324, "grad_norm": 1.695273518562317, "learning_rate": 0.0009499592335915206, "loss": 3.2988, "step": 5895 }, { "epoch": 0.4008696833808941, "grad_norm": 2.2069873809814453, "learning_rate": 0.0009499167685826879, "loss": 3.5452, "step": 5900 }, { "epoch": 0.4012094034515559, "grad_norm": 1.505942702293396, "learning_rate": 0.0009498743035738551, "loss": 3.7007, "step": 5905 }, { "epoch": 0.40154912352221767, "grad_norm": 2.1689538955688477, "learning_rate": 0.0009498318385650224, "loss": 3.7459, "step": 5910 }, { "epoch": 0.40188884359287946, "grad_norm": 1.8102879524230957, "learning_rate": 0.0009497893735561898, "loss": 3.709, "step": 5915 }, { "epoch": 0.40222856366354126, "grad_norm": 2.3439810276031494, "learning_rate": 0.000949746908547357, "loss": 3.7898, "step": 5920 }, { "epoch": 0.402568283734203, "grad_norm": 2.397991895675659, "learning_rate": 0.0009497044435385243, "loss": 3.5427, "step": 5925 }, { "epoch": 0.4029080038048648, "grad_norm": 1.9178234338760376, "learning_rate": 0.0009496619785296916, "loss": 3.6517, "step": 5930 }, { "epoch": 0.40324772387552654, "grad_norm": 1.4942330121994019, "learning_rate": 0.0009496195135208588, "loss": 3.8027, "step": 5935 }, { "epoch": 0.40358744394618834, "grad_norm": 1.6844919919967651, "learning_rate": 0.000949577048512026, "loss": 3.571, "step": 5940 }, { "epoch": 0.40392716401685014, "grad_norm": 1.752565860748291, "learning_rate": 0.0009495345835031934, "loss": 3.5641, "step": 5945 }, { "epoch": 0.4042668840875119, "grad_norm": 1.4018806219100952, "learning_rate": 0.0009494921184943607, "loss": 3.7835, "step": 5950 }, { "epoch": 0.4046066041581737, "grad_norm": 1.5537182092666626, "learning_rate": 0.000949449653485528, "loss": 3.6943, "step": 5955 }, { "epoch": 0.4049463242288354, "grad_norm": 1.6531347036361694, "learning_rate": 0.0009494071884766953, "loss": 3.5846, "step": 5960 }, { "epoch": 0.4052860442994972, "grad_norm": 1.687187910079956, "learning_rate": 0.0009493647234678625, "loss": 3.6545, "step": 5965 }, { "epoch": 0.405625764370159, "grad_norm": 1.9954149723052979, "learning_rate": 0.0009493222584590297, "loss": 3.8745, "step": 5970 }, { "epoch": 0.40596548444082076, "grad_norm": 1.6429190635681152, "learning_rate": 0.0009492797934501971, "loss": 3.4251, "step": 5975 }, { "epoch": 0.40630520451148255, "grad_norm": 2.09661602973938, "learning_rate": 0.0009492373284413643, "loss": 3.6241, "step": 5980 }, { "epoch": 0.4066449245821443, "grad_norm": 1.5073970556259155, "learning_rate": 0.0009491948634325316, "loss": 3.5228, "step": 5985 }, { "epoch": 0.4069846446528061, "grad_norm": 1.8978476524353027, "learning_rate": 0.000949152398423699, "loss": 3.9535, "step": 5990 }, { "epoch": 0.40732436472346784, "grad_norm": 1.4543629884719849, "learning_rate": 0.0009491099334148662, "loss": 3.644, "step": 5995 }, { "epoch": 0.40766408479412963, "grad_norm": 1.4441583156585693, "learning_rate": 0.0009490674684060334, "loss": 3.4734, "step": 6000 }, { "epoch": 0.40800380486479143, "grad_norm": 1.8809375762939453, "learning_rate": 0.0009490250033972007, "loss": 3.6493, "step": 6005 }, { "epoch": 0.4083435249354532, "grad_norm": 1.8752074241638184, "learning_rate": 0.000948982538388368, "loss": 3.5699, "step": 6010 }, { "epoch": 0.40868324500611497, "grad_norm": 1.4699512720108032, "learning_rate": 0.0009489400733795352, "loss": 3.5924, "step": 6015 }, { "epoch": 0.4090229650767767, "grad_norm": 1.8964354991912842, "learning_rate": 0.0009488976083707026, "loss": 3.6973, "step": 6020 }, { "epoch": 0.4093626851474385, "grad_norm": 1.7132669687271118, "learning_rate": 0.0009488551433618699, "loss": 3.4342, "step": 6025 }, { "epoch": 0.4097024052181003, "grad_norm": 1.8634973764419556, "learning_rate": 0.0009488126783530371, "loss": 3.4093, "step": 6030 }, { "epoch": 0.41004212528876205, "grad_norm": 1.9324086904525757, "learning_rate": 0.0009487702133442044, "loss": 3.6127, "step": 6035 }, { "epoch": 0.41038184535942385, "grad_norm": 2.4900193214416504, "learning_rate": 0.0009487277483353717, "loss": 3.7302, "step": 6040 }, { "epoch": 0.4107215654300856, "grad_norm": 1.6818910837173462, "learning_rate": 0.0009486852833265389, "loss": 3.5516, "step": 6045 }, { "epoch": 0.4110612855007474, "grad_norm": 1.739668607711792, "learning_rate": 0.0009486428183177062, "loss": 3.623, "step": 6050 }, { "epoch": 0.4114010055714092, "grad_norm": 1.5390011072158813, "learning_rate": 0.0009486003533088735, "loss": 3.4703, "step": 6055 }, { "epoch": 0.4117407256420709, "grad_norm": 1.8399816751480103, "learning_rate": 0.0009485578883000408, "loss": 3.4091, "step": 6060 }, { "epoch": 0.4120804457127327, "grad_norm": 2.4123551845550537, "learning_rate": 0.0009485154232912081, "loss": 3.6651, "step": 6065 }, { "epoch": 0.41242016578339447, "grad_norm": 1.9973444938659668, "learning_rate": 0.0009484729582823753, "loss": 3.8292, "step": 6070 }, { "epoch": 0.41275988585405626, "grad_norm": 1.9967353343963623, "learning_rate": 0.0009484304932735426, "loss": 3.2289, "step": 6075 }, { "epoch": 0.413099605924718, "grad_norm": 2.2026638984680176, "learning_rate": 0.0009483880282647099, "loss": 3.7335, "step": 6080 }, { "epoch": 0.4134393259953798, "grad_norm": 2.42104172706604, "learning_rate": 0.0009483455632558771, "loss": 3.7015, "step": 6085 }, { "epoch": 0.4137790460660416, "grad_norm": 1.6079154014587402, "learning_rate": 0.0009483030982470445, "loss": 3.8003, "step": 6090 }, { "epoch": 0.41411876613670334, "grad_norm": 2.078624963760376, "learning_rate": 0.0009482606332382118, "loss": 3.5213, "step": 6095 }, { "epoch": 0.41445848620736514, "grad_norm": 1.5589534044265747, "learning_rate": 0.000948218168229379, "loss": 3.5568, "step": 6100 }, { "epoch": 0.4147982062780269, "grad_norm": 1.7032876014709473, "learning_rate": 0.0009481757032205462, "loss": 3.4432, "step": 6105 }, { "epoch": 0.4151379263486887, "grad_norm": 1.620177984237671, "learning_rate": 0.0009481332382117136, "loss": 3.5718, "step": 6110 }, { "epoch": 0.4154776464193505, "grad_norm": 2.268782377243042, "learning_rate": 0.0009480907732028808, "loss": 3.4869, "step": 6115 }, { "epoch": 0.4158173664900122, "grad_norm": 1.7736921310424805, "learning_rate": 0.000948048308194048, "loss": 3.8937, "step": 6120 }, { "epoch": 0.416157086560674, "grad_norm": 1.721004843711853, "learning_rate": 0.0009480058431852155, "loss": 3.6835, "step": 6125 }, { "epoch": 0.41649680663133576, "grad_norm": 1.745653510093689, "learning_rate": 0.0009479633781763827, "loss": 3.3869, "step": 6130 }, { "epoch": 0.41683652670199756, "grad_norm": 2.158785581588745, "learning_rate": 0.0009479209131675499, "loss": 3.5247, "step": 6135 }, { "epoch": 0.41717624677265935, "grad_norm": 1.7532445192337036, "learning_rate": 0.0009478784481587173, "loss": 3.6702, "step": 6140 }, { "epoch": 0.4175159668433211, "grad_norm": 2.302788257598877, "learning_rate": 0.0009478359831498845, "loss": 3.6857, "step": 6145 }, { "epoch": 0.4178556869139829, "grad_norm": 2.885871171951294, "learning_rate": 0.0009477935181410517, "loss": 3.6768, "step": 6150 }, { "epoch": 0.41819540698464464, "grad_norm": 1.7536935806274414, "learning_rate": 0.0009477510531322192, "loss": 3.5548, "step": 6155 }, { "epoch": 0.41853512705530643, "grad_norm": 1.469265103340149, "learning_rate": 0.0009477085881233864, "loss": 3.6771, "step": 6160 }, { "epoch": 0.4188748471259682, "grad_norm": 1.8501005172729492, "learning_rate": 0.0009476746161163202, "loss": 3.9166, "step": 6165 }, { "epoch": 0.41921456719663, "grad_norm": 1.945460557937622, "learning_rate": 0.0009476321511074874, "loss": 3.7377, "step": 6170 }, { "epoch": 0.41955428726729177, "grad_norm": 1.8390744924545288, "learning_rate": 0.0009475896860986547, "loss": 3.6837, "step": 6175 }, { "epoch": 0.4198940073379535, "grad_norm": 1.6704758405685425, "learning_rate": 0.0009475472210898221, "loss": 3.5722, "step": 6180 }, { "epoch": 0.4202337274086153, "grad_norm": 1.5623986721038818, "learning_rate": 0.0009475047560809893, "loss": 3.4458, "step": 6185 }, { "epoch": 0.42057344747927705, "grad_norm": 1.8201884031295776, "learning_rate": 0.0009474622910721565, "loss": 3.4579, "step": 6190 }, { "epoch": 0.42091316754993885, "grad_norm": 1.3881213665008545, "learning_rate": 0.0009474198260633239, "loss": 3.6977, "step": 6195 }, { "epoch": 0.42125288762060065, "grad_norm": 1.5907771587371826, "learning_rate": 0.0009473773610544911, "loss": 3.6299, "step": 6200 }, { "epoch": 0.4215926076912624, "grad_norm": 2.0011353492736816, "learning_rate": 0.0009473348960456583, "loss": 3.5262, "step": 6205 }, { "epoch": 0.4219323277619242, "grad_norm": 1.5873202085494995, "learning_rate": 0.0009472924310368257, "loss": 3.5451, "step": 6210 }, { "epoch": 0.42227204783258593, "grad_norm": 1.9512501955032349, "learning_rate": 0.000947249966027993, "loss": 3.4909, "step": 6215 }, { "epoch": 0.4226117679032477, "grad_norm": 1.993990421295166, "learning_rate": 0.0009472075010191602, "loss": 3.512, "step": 6220 }, { "epoch": 0.4229514879739095, "grad_norm": 1.5832632780075073, "learning_rate": 0.0009471650360103276, "loss": 3.461, "step": 6225 }, { "epoch": 0.42329120804457127, "grad_norm": 1.56803560256958, "learning_rate": 0.0009471225710014948, "loss": 3.6839, "step": 6230 }, { "epoch": 0.42363092811523306, "grad_norm": 1.5706437826156616, "learning_rate": 0.000947080105992662, "loss": 3.6711, "step": 6235 }, { "epoch": 0.4239706481858948, "grad_norm": 1.7144986391067505, "learning_rate": 0.0009470376409838293, "loss": 3.4447, "step": 6240 }, { "epoch": 0.4243103682565566, "grad_norm": 2.0438389778137207, "learning_rate": 0.0009469951759749966, "loss": 3.6335, "step": 6245 }, { "epoch": 0.42465008832721834, "grad_norm": 1.6651569604873657, "learning_rate": 0.0009469527109661639, "loss": 3.5235, "step": 6250 }, { "epoch": 0.42498980839788014, "grad_norm": 1.5006132125854492, "learning_rate": 0.0009469102459573312, "loss": 3.6202, "step": 6255 }, { "epoch": 0.42532952846854194, "grad_norm": 1.7706266641616821, "learning_rate": 0.0009468677809484985, "loss": 3.3687, "step": 6260 }, { "epoch": 0.4256692485392037, "grad_norm": 1.3228206634521484, "learning_rate": 0.0009468253159396657, "loss": 3.4494, "step": 6265 }, { "epoch": 0.4260089686098655, "grad_norm": 2.0133461952209473, "learning_rate": 0.000946782850930833, "loss": 3.7756, "step": 6270 }, { "epoch": 0.4263486886805272, "grad_norm": 1.6084418296813965, "learning_rate": 0.0009467403859220003, "loss": 3.535, "step": 6275 }, { "epoch": 0.426688408751189, "grad_norm": 2.1309452056884766, "learning_rate": 0.0009466979209131675, "loss": 3.5289, "step": 6280 }, { "epoch": 0.4270281288218508, "grad_norm": 2.089107036590576, "learning_rate": 0.0009466554559043349, "loss": 3.793, "step": 6285 }, { "epoch": 0.42736784889251256, "grad_norm": 2.0027101039886475, "learning_rate": 0.0009466129908955021, "loss": 3.6377, "step": 6290 }, { "epoch": 0.42770756896317436, "grad_norm": 1.479036569595337, "learning_rate": 0.0009465705258866694, "loss": 3.7845, "step": 6295 }, { "epoch": 0.4280472890338361, "grad_norm": 1.6179125308990479, "learning_rate": 0.0009465280608778367, "loss": 3.5783, "step": 6300 }, { "epoch": 0.4283870091044979, "grad_norm": 1.4189685583114624, "learning_rate": 0.0009464855958690039, "loss": 3.2691, "step": 6305 }, { "epoch": 0.4287267291751597, "grad_norm": 1.713448405265808, "learning_rate": 0.0009464431308601712, "loss": 3.8745, "step": 6310 }, { "epoch": 0.42906644924582144, "grad_norm": 1.9014776945114136, "learning_rate": 0.0009464006658513386, "loss": 3.6745, "step": 6315 }, { "epoch": 0.42940616931648323, "grad_norm": 1.8731951713562012, "learning_rate": 0.0009463582008425058, "loss": 3.619, "step": 6320 }, { "epoch": 0.429745889387145, "grad_norm": 2.2848060131073, "learning_rate": 0.0009463157358336731, "loss": 3.4196, "step": 6325 }, { "epoch": 0.4300856094578068, "grad_norm": 2.3639073371887207, "learning_rate": 0.0009462732708248404, "loss": 3.5145, "step": 6330 }, { "epoch": 0.4304253295284685, "grad_norm": 1.5760060548782349, "learning_rate": 0.0009462308058160076, "loss": 3.5641, "step": 6335 }, { "epoch": 0.4307650495991303, "grad_norm": 1.8943477869033813, "learning_rate": 0.0009461883408071748, "loss": 3.5382, "step": 6340 }, { "epoch": 0.4311047696697921, "grad_norm": 1.7472189664840698, "learning_rate": 0.0009461458757983422, "loss": 3.4899, "step": 6345 }, { "epoch": 0.43144448974045385, "grad_norm": 2.0407843589782715, "learning_rate": 0.0009461034107895095, "loss": 3.5157, "step": 6350 }, { "epoch": 0.43178420981111565, "grad_norm": 1.9718049764633179, "learning_rate": 0.0009460609457806767, "loss": 3.6133, "step": 6355 }, { "epoch": 0.4321239298817774, "grad_norm": 1.6381659507751465, "learning_rate": 0.0009460184807718441, "loss": 3.4008, "step": 6360 }, { "epoch": 0.4324636499524392, "grad_norm": 1.5734117031097412, "learning_rate": 0.0009459760157630113, "loss": 3.5297, "step": 6365 }, { "epoch": 0.432803370023101, "grad_norm": 2.1675915718078613, "learning_rate": 0.0009459335507541785, "loss": 3.2843, "step": 6370 }, { "epoch": 0.43314309009376273, "grad_norm": 2.358248472213745, "learning_rate": 0.0009458910857453459, "loss": 3.5275, "step": 6375 }, { "epoch": 0.4334828101644245, "grad_norm": 1.8349809646606445, "learning_rate": 0.0009458486207365131, "loss": 3.684, "step": 6380 }, { "epoch": 0.43382253023508627, "grad_norm": 2.293426036834717, "learning_rate": 0.0009458061557276804, "loss": 3.7513, "step": 6385 }, { "epoch": 0.43416225030574807, "grad_norm": 2.0748000144958496, "learning_rate": 0.0009457636907188478, "loss": 3.6586, "step": 6390 }, { "epoch": 0.43450197037640986, "grad_norm": 1.7213579416275024, "learning_rate": 0.000945721225710015, "loss": 3.7358, "step": 6395 }, { "epoch": 0.4348416904470716, "grad_norm": 1.94364595413208, "learning_rate": 0.0009456787607011822, "loss": 3.6239, "step": 6400 }, { "epoch": 0.4351814105177334, "grad_norm": 1.5297433137893677, "learning_rate": 0.0009456362956923495, "loss": 3.5972, "step": 6405 }, { "epoch": 0.43552113058839514, "grad_norm": 1.649431824684143, "learning_rate": 0.0009455938306835168, "loss": 3.5039, "step": 6410 }, { "epoch": 0.43586085065905694, "grad_norm": 1.7631720304489136, "learning_rate": 0.000945551365674684, "loss": 3.4453, "step": 6415 }, { "epoch": 0.4362005707297187, "grad_norm": 1.5172796249389648, "learning_rate": 0.0009455089006658514, "loss": 3.4906, "step": 6420 }, { "epoch": 0.4365402908003805, "grad_norm": 1.7854291200637817, "learning_rate": 0.0009454664356570187, "loss": 3.4802, "step": 6425 }, { "epoch": 0.4368800108710423, "grad_norm": 1.6252285242080688, "learning_rate": 0.0009454239706481859, "loss": 3.8688, "step": 6430 }, { "epoch": 0.437219730941704, "grad_norm": 1.6903331279754639, "learning_rate": 0.0009453815056393532, "loss": 3.64, "step": 6435 }, { "epoch": 0.4375594510123658, "grad_norm": 1.9777930974960327, "learning_rate": 0.0009453390406305204, "loss": 3.5281, "step": 6440 }, { "epoch": 0.43789917108302756, "grad_norm": 2.228394031524658, "learning_rate": 0.0009452965756216878, "loss": 3.6662, "step": 6445 }, { "epoch": 0.43823889115368936, "grad_norm": 1.75724458694458, "learning_rate": 0.000945254110612855, "loss": 3.6503, "step": 6450 }, { "epoch": 0.43857861122435116, "grad_norm": 1.7253539562225342, "learning_rate": 0.0009452116456040223, "loss": 3.7491, "step": 6455 }, { "epoch": 0.4389183312950129, "grad_norm": 1.7263569831848145, "learning_rate": 0.0009451691805951897, "loss": 3.4577, "step": 6460 }, { "epoch": 0.4392580513656747, "grad_norm": 1.9969139099121094, "learning_rate": 0.0009451267155863569, "loss": 3.5957, "step": 6465 }, { "epoch": 0.43959777143633644, "grad_norm": 2.3201301097869873, "learning_rate": 0.0009450842505775241, "loss": 3.3557, "step": 6470 }, { "epoch": 0.43993749150699824, "grad_norm": 1.7514084577560425, "learning_rate": 0.0009450417855686915, "loss": 3.7781, "step": 6475 }, { "epoch": 0.44027721157766003, "grad_norm": 1.6302152872085571, "learning_rate": 0.0009449993205598587, "loss": 3.7823, "step": 6480 }, { "epoch": 0.4406169316483218, "grad_norm": 1.7171062231063843, "learning_rate": 0.0009449568555510259, "loss": 3.4802, "step": 6485 }, { "epoch": 0.44095665171898357, "grad_norm": 1.794606328010559, "learning_rate": 0.0009449143905421934, "loss": 3.7674, "step": 6490 }, { "epoch": 0.4412963717896453, "grad_norm": 2.0074305534362793, "learning_rate": 0.0009448719255333606, "loss": 3.5926, "step": 6495 }, { "epoch": 0.4416360918603071, "grad_norm": 2.069270610809326, "learning_rate": 0.0009448294605245278, "loss": 3.8358, "step": 6500 }, { "epoch": 0.4419758119309689, "grad_norm": 1.891984224319458, "learning_rate": 0.0009447869955156951, "loss": 3.4641, "step": 6505 }, { "epoch": 0.44231553200163065, "grad_norm": 1.6894874572753906, "learning_rate": 0.0009447445305068624, "loss": 3.696, "step": 6510 }, { "epoch": 0.44265525207229245, "grad_norm": 1.9723577499389648, "learning_rate": 0.0009447020654980296, "loss": 3.5979, "step": 6515 }, { "epoch": 0.4429949721429542, "grad_norm": 1.9263155460357666, "learning_rate": 0.0009446596004891969, "loss": 3.4441, "step": 6520 }, { "epoch": 0.443334692213616, "grad_norm": 1.9338316917419434, "learning_rate": 0.0009446171354803643, "loss": 3.6551, "step": 6525 }, { "epoch": 0.44367441228427773, "grad_norm": 1.669878602027893, "learning_rate": 0.0009445746704715315, "loss": 3.586, "step": 6530 }, { "epoch": 0.44401413235493953, "grad_norm": 1.9796849489212036, "learning_rate": 0.0009445322054626988, "loss": 3.4952, "step": 6535 }, { "epoch": 0.4443538524256013, "grad_norm": 1.8273417949676514, "learning_rate": 0.000944489740453866, "loss": 3.5155, "step": 6540 }, { "epoch": 0.44469357249626307, "grad_norm": 1.6206464767456055, "learning_rate": 0.0009444472754450333, "loss": 3.6363, "step": 6545 }, { "epoch": 0.44503329256692487, "grad_norm": 1.840315818786621, "learning_rate": 0.0009444048104362006, "loss": 3.3399, "step": 6550 }, { "epoch": 0.4453730126375866, "grad_norm": 2.2119333744049072, "learning_rate": 0.0009443623454273678, "loss": 3.7759, "step": 6555 }, { "epoch": 0.4457127327082484, "grad_norm": 1.6006566286087036, "learning_rate": 0.0009443198804185352, "loss": 3.9236, "step": 6560 }, { "epoch": 0.4460524527789102, "grad_norm": 1.728869915008545, "learning_rate": 0.0009442774154097025, "loss": 3.2327, "step": 6565 }, { "epoch": 0.44639217284957194, "grad_norm": 1.71336030960083, "learning_rate": 0.0009442349504008697, "loss": 3.3739, "step": 6570 }, { "epoch": 0.44673189292023374, "grad_norm": 2.3854784965515137, "learning_rate": 0.000944192485392037, "loss": 3.7452, "step": 6575 }, { "epoch": 0.4470716129908955, "grad_norm": 1.9536436796188354, "learning_rate": 0.0009441500203832043, "loss": 3.5091, "step": 6580 }, { "epoch": 0.4474113330615573, "grad_norm": 1.76387357711792, "learning_rate": 0.0009441075553743715, "loss": 3.5477, "step": 6585 }, { "epoch": 0.4477510531322191, "grad_norm": 1.6928564310073853, "learning_rate": 0.0009440650903655387, "loss": 3.5532, "step": 6590 }, { "epoch": 0.4480907732028808, "grad_norm": 1.5018301010131836, "learning_rate": 0.0009440226253567062, "loss": 3.5673, "step": 6595 }, { "epoch": 0.4484304932735426, "grad_norm": 1.815924882888794, "learning_rate": 0.0009439801603478734, "loss": 3.761, "step": 6600 }, { "epoch": 0.44877021334420436, "grad_norm": 1.3744932413101196, "learning_rate": 0.0009439376953390406, "loss": 3.4357, "step": 6605 }, { "epoch": 0.44910993341486616, "grad_norm": 1.6498910188674927, "learning_rate": 0.000943895230330208, "loss": 3.5039, "step": 6610 }, { "epoch": 0.4494496534855279, "grad_norm": 1.6308001279830933, "learning_rate": 0.0009438527653213752, "loss": 3.7774, "step": 6615 }, { "epoch": 0.4497893735561897, "grad_norm": 1.588278889656067, "learning_rate": 0.0009438103003125424, "loss": 3.5538, "step": 6620 }, { "epoch": 0.4501290936268515, "grad_norm": 1.960538387298584, "learning_rate": 0.0009437678353037098, "loss": 3.6967, "step": 6625 }, { "epoch": 0.45046881369751324, "grad_norm": 1.8328642845153809, "learning_rate": 0.0009437253702948771, "loss": 3.8461, "step": 6630 }, { "epoch": 0.45080853376817503, "grad_norm": 2.2926747798919678, "learning_rate": 0.0009436829052860443, "loss": 3.632, "step": 6635 }, { "epoch": 0.4511482538388368, "grad_norm": 1.942257046699524, "learning_rate": 0.0009436404402772116, "loss": 3.6907, "step": 6640 }, { "epoch": 0.4514879739094986, "grad_norm": 1.9429374933242798, "learning_rate": 0.0009435979752683789, "loss": 3.3593, "step": 6645 }, { "epoch": 0.45182769398016037, "grad_norm": 1.9705283641815186, "learning_rate": 0.0009435555102595461, "loss": 3.7534, "step": 6650 }, { "epoch": 0.4521674140508221, "grad_norm": 2.0319223403930664, "learning_rate": 0.0009435130452507134, "loss": 3.6129, "step": 6655 }, { "epoch": 0.4525071341214839, "grad_norm": 1.7275131940841675, "learning_rate": 0.0009434705802418807, "loss": 3.4947, "step": 6660 }, { "epoch": 0.45284685419214565, "grad_norm": 1.5665042400360107, "learning_rate": 0.000943428115233048, "loss": 3.4399, "step": 6665 }, { "epoch": 0.45318657426280745, "grad_norm": 1.9255952835083008, "learning_rate": 0.0009433856502242153, "loss": 3.8382, "step": 6670 }, { "epoch": 0.45352629433346925, "grad_norm": 1.7121191024780273, "learning_rate": 0.0009433431852153826, "loss": 3.9335, "step": 6675 }, { "epoch": 0.453866014404131, "grad_norm": 2.2549335956573486, "learning_rate": 0.0009433007202065498, "loss": 3.6539, "step": 6680 }, { "epoch": 0.4542057344747928, "grad_norm": 2.762925148010254, "learning_rate": 0.0009432582551977171, "loss": 3.5137, "step": 6685 }, { "epoch": 0.45454545454545453, "grad_norm": 1.7609695196151733, "learning_rate": 0.0009432157901888843, "loss": 3.4662, "step": 6690 }, { "epoch": 0.45488517461611633, "grad_norm": 2.2214958667755127, "learning_rate": 0.0009431733251800516, "loss": 3.6491, "step": 6695 }, { "epoch": 0.45522489468677807, "grad_norm": 2.2352828979492188, "learning_rate": 0.000943130860171219, "loss": 3.39, "step": 6700 }, { "epoch": 0.45556461475743987, "grad_norm": 2.2089431285858154, "learning_rate": 0.0009430883951623862, "loss": 3.5442, "step": 6705 }, { "epoch": 0.45590433482810166, "grad_norm": 2.136549711227417, "learning_rate": 0.0009430459301535535, "loss": 3.7416, "step": 6710 }, { "epoch": 0.4562440548987634, "grad_norm": 1.5467194318771362, "learning_rate": 0.0009430034651447208, "loss": 3.7103, "step": 6715 }, { "epoch": 0.4565837749694252, "grad_norm": 1.5492006540298462, "learning_rate": 0.000942961000135888, "loss": 3.5005, "step": 6720 }, { "epoch": 0.45692349504008695, "grad_norm": 1.9842593669891357, "learning_rate": 0.0009429185351270552, "loss": 3.5536, "step": 6725 }, { "epoch": 0.45726321511074874, "grad_norm": 1.721670150756836, "learning_rate": 0.0009428760701182226, "loss": 3.6326, "step": 6730 }, { "epoch": 0.45760293518141054, "grad_norm": 2.2003915309906006, "learning_rate": 0.0009428336051093899, "loss": 3.6236, "step": 6735 }, { "epoch": 0.4579426552520723, "grad_norm": 1.9771614074707031, "learning_rate": 0.0009427911401005571, "loss": 3.6297, "step": 6740 }, { "epoch": 0.4582823753227341, "grad_norm": 2.088775396347046, "learning_rate": 0.0009427486750917245, "loss": 3.6072, "step": 6745 }, { "epoch": 0.4586220953933958, "grad_norm": 1.626824140548706, "learning_rate": 0.0009427062100828917, "loss": 3.6602, "step": 6750 }, { "epoch": 0.4589618154640576, "grad_norm": 1.7100391387939453, "learning_rate": 0.0009426637450740589, "loss": 3.6769, "step": 6755 }, { "epoch": 0.4593015355347194, "grad_norm": 1.7326736450195312, "learning_rate": 0.0009426212800652263, "loss": 3.3841, "step": 6760 }, { "epoch": 0.45964125560538116, "grad_norm": 2.251054525375366, "learning_rate": 0.0009425788150563935, "loss": 3.6694, "step": 6765 }, { "epoch": 0.45998097567604296, "grad_norm": 1.8235591650009155, "learning_rate": 0.0009425363500475608, "loss": 3.6601, "step": 6770 }, { "epoch": 0.4603206957467047, "grad_norm": 1.7138705253601074, "learning_rate": 0.0009424938850387282, "loss": 3.5787, "step": 6775 }, { "epoch": 0.4606604158173665, "grad_norm": 1.7795994281768799, "learning_rate": 0.0009424514200298954, "loss": 3.7495, "step": 6780 }, { "epoch": 0.46100013588802824, "grad_norm": 1.7159169912338257, "learning_rate": 0.0009424089550210627, "loss": 3.7051, "step": 6785 }, { "epoch": 0.46133985595869004, "grad_norm": 1.8614548444747925, "learning_rate": 0.0009423664900122299, "loss": 3.5362, "step": 6790 }, { "epoch": 0.46167957602935183, "grad_norm": 2.041506767272949, "learning_rate": 0.0009423240250033972, "loss": 3.4729, "step": 6795 }, { "epoch": 0.4620192961000136, "grad_norm": 1.830488681793213, "learning_rate": 0.0009422815599945645, "loss": 3.6417, "step": 6800 }, { "epoch": 0.4623590161706754, "grad_norm": 2.1460328102111816, "learning_rate": 0.0009422390949857318, "loss": 3.5612, "step": 6805 }, { "epoch": 0.4626987362413371, "grad_norm": 2.2598912715911865, "learning_rate": 0.0009421966299768991, "loss": 3.5407, "step": 6810 }, { "epoch": 0.4630384563119989, "grad_norm": 1.811739206314087, "learning_rate": 0.0009421541649680664, "loss": 3.4532, "step": 6815 }, { "epoch": 0.4633781763826607, "grad_norm": 1.7173508405685425, "learning_rate": 0.0009421116999592336, "loss": 3.8933, "step": 6820 }, { "epoch": 0.46371789645332245, "grad_norm": 1.8505449295043945, "learning_rate": 0.0009420692349504008, "loss": 3.6833, "step": 6825 }, { "epoch": 0.46405761652398425, "grad_norm": 1.8023746013641357, "learning_rate": 0.0009420267699415682, "loss": 3.4734, "step": 6830 }, { "epoch": 0.464397336594646, "grad_norm": 1.8567525148391724, "learning_rate": 0.0009419843049327354, "loss": 3.4102, "step": 6835 }, { "epoch": 0.4647370566653078, "grad_norm": 1.4660717248916626, "learning_rate": 0.0009419418399239027, "loss": 3.7323, "step": 6840 }, { "epoch": 0.4650767767359696, "grad_norm": 1.499687910079956, "learning_rate": 0.0009418993749150701, "loss": 3.4355, "step": 6845 }, { "epoch": 0.46541649680663133, "grad_norm": 1.7819335460662842, "learning_rate": 0.0009418569099062373, "loss": 3.6545, "step": 6850 }, { "epoch": 0.4657562168772931, "grad_norm": 2.1856071949005127, "learning_rate": 0.0009418144448974045, "loss": 3.5808, "step": 6855 }, { "epoch": 0.46609593694795487, "grad_norm": 1.728170394897461, "learning_rate": 0.0009417719798885719, "loss": 3.6878, "step": 6860 }, { "epoch": 0.46643565701861667, "grad_norm": 1.5283757448196411, "learning_rate": 0.0009417295148797391, "loss": 3.4028, "step": 6865 }, { "epoch": 0.4667753770892784, "grad_norm": 1.7133967876434326, "learning_rate": 0.0009416870498709063, "loss": 3.5778, "step": 6870 }, { "epoch": 0.4671150971599402, "grad_norm": 1.9558247327804565, "learning_rate": 0.0009416445848620738, "loss": 3.3757, "step": 6875 }, { "epoch": 0.467454817230602, "grad_norm": 1.8532921075820923, "learning_rate": 0.000941602119853241, "loss": 3.4544, "step": 6880 }, { "epoch": 0.46779453730126375, "grad_norm": 1.807977557182312, "learning_rate": 0.0009415596548444082, "loss": 3.569, "step": 6885 }, { "epoch": 0.46813425737192554, "grad_norm": 1.4686120748519897, "learning_rate": 0.0009415171898355755, "loss": 3.7114, "step": 6890 }, { "epoch": 0.4684739774425873, "grad_norm": 2.0888619422912598, "learning_rate": 0.0009414747248267428, "loss": 3.6676, "step": 6895 }, { "epoch": 0.4688136975132491, "grad_norm": 1.7412678003311157, "learning_rate": 0.00094143225981791, "loss": 3.7681, "step": 6900 }, { "epoch": 0.4691534175839109, "grad_norm": 2.016752004623413, "learning_rate": 0.0009413897948090774, "loss": 3.7369, "step": 6905 }, { "epoch": 0.4694931376545726, "grad_norm": 2.444038152694702, "learning_rate": 0.0009413473298002447, "loss": 3.7087, "step": 6910 }, { "epoch": 0.4698328577252344, "grad_norm": 1.7053844928741455, "learning_rate": 0.0009413048647914119, "loss": 3.7192, "step": 6915 }, { "epoch": 0.47017257779589616, "grad_norm": 5.344666004180908, "learning_rate": 0.0009412623997825792, "loss": 3.4887, "step": 6920 }, { "epoch": 0.47051229786655796, "grad_norm": 1.5105865001678467, "learning_rate": 0.0009412199347737464, "loss": 3.5758, "step": 6925 }, { "epoch": 0.47085201793721976, "grad_norm": 2.1355557441711426, "learning_rate": 0.0009411774697649137, "loss": 3.6894, "step": 6930 }, { "epoch": 0.4711917380078815, "grad_norm": 1.9135714769363403, "learning_rate": 0.000941135004756081, "loss": 3.5421, "step": 6935 }, { "epoch": 0.4715314580785433, "grad_norm": 1.5393284559249878, "learning_rate": 0.0009410925397472483, "loss": 3.562, "step": 6940 }, { "epoch": 0.47187117814920504, "grad_norm": 1.8919819593429565, "learning_rate": 0.0009410500747384156, "loss": 3.7623, "step": 6945 }, { "epoch": 0.47221089821986684, "grad_norm": 1.6998759508132935, "learning_rate": 0.0009410076097295829, "loss": 3.5443, "step": 6950 }, { "epoch": 0.4725506182905286, "grad_norm": 1.7785887718200684, "learning_rate": 0.0009409651447207501, "loss": 3.8263, "step": 6955 }, { "epoch": 0.4728903383611904, "grad_norm": 2.251643180847168, "learning_rate": 0.0009409226797119174, "loss": 3.7491, "step": 6960 }, { "epoch": 0.4732300584318522, "grad_norm": 2.4585165977478027, "learning_rate": 0.0009408802147030847, "loss": 3.6889, "step": 6965 }, { "epoch": 0.4735697785025139, "grad_norm": 1.4177275896072388, "learning_rate": 0.0009408377496942519, "loss": 3.3546, "step": 6970 }, { "epoch": 0.4739094985731757, "grad_norm": 1.9172967672348022, "learning_rate": 0.0009407952846854192, "loss": 3.6515, "step": 6975 }, { "epoch": 0.47424921864383746, "grad_norm": 1.4081403017044067, "learning_rate": 0.0009407528196765866, "loss": 3.7457, "step": 6980 }, { "epoch": 0.47458893871449925, "grad_norm": 1.64298415184021, "learning_rate": 0.0009407103546677538, "loss": 3.5398, "step": 6985 }, { "epoch": 0.47492865878516105, "grad_norm": 1.6681382656097412, "learning_rate": 0.000940667889658921, "loss": 3.5448, "step": 6990 }, { "epoch": 0.4752683788558228, "grad_norm": 1.8861947059631348, "learning_rate": 0.0009406254246500884, "loss": 3.286, "step": 6995 }, { "epoch": 0.4756080989264846, "grad_norm": 1.9252830743789673, "learning_rate": 0.0009405829596412556, "loss": 3.2298, "step": 7000 }, { "epoch": 0.47594781899714633, "grad_norm": 2.161397695541382, "learning_rate": 0.0009405404946324228, "loss": 3.9287, "step": 7005 }, { "epoch": 0.47628753906780813, "grad_norm": 1.8560709953308105, "learning_rate": 0.0009404980296235903, "loss": 3.7103, "step": 7010 }, { "epoch": 0.4766272591384699, "grad_norm": 2.185884952545166, "learning_rate": 0.0009404555646147575, "loss": 3.6161, "step": 7015 }, { "epoch": 0.47696697920913167, "grad_norm": 1.6079546213150024, "learning_rate": 0.0009404130996059247, "loss": 3.318, "step": 7020 }, { "epoch": 0.47730669927979347, "grad_norm": 1.734803318977356, "learning_rate": 0.000940370634597092, "loss": 3.735, "step": 7025 }, { "epoch": 0.4776464193504552, "grad_norm": 1.8888092041015625, "learning_rate": 0.0009403281695882593, "loss": 3.6586, "step": 7030 }, { "epoch": 0.477986139421117, "grad_norm": 1.787172794342041, "learning_rate": 0.0009402857045794265, "loss": 3.5105, "step": 7035 }, { "epoch": 0.47832585949177875, "grad_norm": 1.6202917098999023, "learning_rate": 0.0009402432395705938, "loss": 3.6567, "step": 7040 }, { "epoch": 0.47866557956244055, "grad_norm": 1.8496652841567993, "learning_rate": 0.0009402007745617612, "loss": 3.6493, "step": 7045 }, { "epoch": 0.47900529963310234, "grad_norm": 1.607838749885559, "learning_rate": 0.0009401583095529284, "loss": 3.5706, "step": 7050 }, { "epoch": 0.4793450197037641, "grad_norm": 2.1549699306488037, "learning_rate": 0.0009401158445440957, "loss": 3.7454, "step": 7055 }, { "epoch": 0.4796847397744259, "grad_norm": 1.5644909143447876, "learning_rate": 0.000940073379535263, "loss": 3.6789, "step": 7060 }, { "epoch": 0.4800244598450876, "grad_norm": 1.8074989318847656, "learning_rate": 0.0009400309145264302, "loss": 3.5556, "step": 7065 }, { "epoch": 0.4803641799157494, "grad_norm": 2.1472339630126953, "learning_rate": 0.0009399884495175975, "loss": 3.4773, "step": 7070 }, { "epoch": 0.4807038999864112, "grad_norm": 1.3506633043289185, "learning_rate": 0.0009399459845087647, "loss": 3.6626, "step": 7075 }, { "epoch": 0.48104362005707296, "grad_norm": 1.4777321815490723, "learning_rate": 0.0009399035194999321, "loss": 3.6548, "step": 7080 }, { "epoch": 0.48138334012773476, "grad_norm": 1.9772120714187622, "learning_rate": 0.0009398610544910994, "loss": 3.7057, "step": 7085 }, { "epoch": 0.4817230601983965, "grad_norm": 2.068317413330078, "learning_rate": 0.0009398185894822666, "loss": 3.5067, "step": 7090 }, { "epoch": 0.4820627802690583, "grad_norm": 1.8196656703948975, "learning_rate": 0.0009397761244734339, "loss": 3.7892, "step": 7095 }, { "epoch": 0.4824025003397201, "grad_norm": 1.2960206270217896, "learning_rate": 0.0009397336594646012, "loss": 3.5008, "step": 7100 }, { "epoch": 0.48274222041038184, "grad_norm": 2.1386220455169678, "learning_rate": 0.0009396911944557684, "loss": 3.565, "step": 7105 }, { "epoch": 0.48308194048104364, "grad_norm": 1.9952939748764038, "learning_rate": 0.0009396487294469356, "loss": 3.8128, "step": 7110 }, { "epoch": 0.4834216605517054, "grad_norm": 1.775578260421753, "learning_rate": 0.0009396062644381031, "loss": 3.5906, "step": 7115 }, { "epoch": 0.4837613806223672, "grad_norm": 1.6075574159622192, "learning_rate": 0.0009395637994292703, "loss": 3.603, "step": 7120 }, { "epoch": 0.4841011006930289, "grad_norm": 2.0619513988494873, "learning_rate": 0.0009395213344204377, "loss": 3.6965, "step": 7125 }, { "epoch": 0.4844408207636907, "grad_norm": 1.9764682054519653, "learning_rate": 0.0009394788694116049, "loss": 3.5657, "step": 7130 }, { "epoch": 0.4847805408343525, "grad_norm": 2.1248953342437744, "learning_rate": 0.0009394364044027721, "loss": 3.4825, "step": 7135 }, { "epoch": 0.48512026090501426, "grad_norm": 1.8626660108566284, "learning_rate": 0.0009393939393939394, "loss": 3.8041, "step": 7140 }, { "epoch": 0.48545998097567605, "grad_norm": 2.0224533081054688, "learning_rate": 0.0009393514743851067, "loss": 3.746, "step": 7145 }, { "epoch": 0.4857997010463378, "grad_norm": 1.7374697923660278, "learning_rate": 0.000939309009376274, "loss": 3.6854, "step": 7150 }, { "epoch": 0.4861394211169996, "grad_norm": 1.6987800598144531, "learning_rate": 0.0009392665443674413, "loss": 3.6879, "step": 7155 }, { "epoch": 0.4864791411876614, "grad_norm": 1.8735805749893188, "learning_rate": 0.0009392240793586086, "loss": 3.7014, "step": 7160 }, { "epoch": 0.48681886125832313, "grad_norm": 2.0647523403167725, "learning_rate": 0.0009391816143497758, "loss": 3.6051, "step": 7165 }, { "epoch": 0.48715858132898493, "grad_norm": 1.9533485174179077, "learning_rate": 0.0009391391493409431, "loss": 3.3424, "step": 7170 }, { "epoch": 0.48749830139964667, "grad_norm": 2.075756549835205, "learning_rate": 0.0009390966843321103, "loss": 3.7404, "step": 7175 }, { "epoch": 0.48783802147030847, "grad_norm": 1.624358892440796, "learning_rate": 0.0009390542193232776, "loss": 3.7726, "step": 7180 }, { "epoch": 0.48817774154097027, "grad_norm": 2.071878433227539, "learning_rate": 0.000939011754314445, "loss": 3.526, "step": 7185 }, { "epoch": 0.488517461611632, "grad_norm": 1.570052146911621, "learning_rate": 0.0009389692893056122, "loss": 3.6368, "step": 7190 }, { "epoch": 0.4888571816822938, "grad_norm": 1.958909273147583, "learning_rate": 0.0009389268242967795, "loss": 3.7068, "step": 7195 }, { "epoch": 0.48919690175295555, "grad_norm": 1.7956711053848267, "learning_rate": 0.0009388843592879468, "loss": 3.5032, "step": 7200 }, { "epoch": 0.48953662182361735, "grad_norm": 1.8504315614700317, "learning_rate": 0.000938841894279114, "loss": 3.5543, "step": 7205 }, { "epoch": 0.4898763418942791, "grad_norm": 1.8722294569015503, "learning_rate": 0.0009387994292702812, "loss": 3.7804, "step": 7210 }, { "epoch": 0.4902160619649409, "grad_norm": 1.7130316495895386, "learning_rate": 0.0009387569642614486, "loss": 3.7187, "step": 7215 }, { "epoch": 0.4905557820356027, "grad_norm": 1.977596402168274, "learning_rate": 0.0009387144992526159, "loss": 3.502, "step": 7220 }, { "epoch": 0.4908955021062644, "grad_norm": 1.9869643449783325, "learning_rate": 0.0009386720342437831, "loss": 3.7239, "step": 7225 }, { "epoch": 0.4912352221769262, "grad_norm": 1.9572643041610718, "learning_rate": 0.0009386295692349505, "loss": 3.3732, "step": 7230 }, { "epoch": 0.49157494224758796, "grad_norm": 1.8034398555755615, "learning_rate": 0.0009385871042261177, "loss": 3.5871, "step": 7235 }, { "epoch": 0.49191466231824976, "grad_norm": 2.2631728649139404, "learning_rate": 0.0009385446392172849, "loss": 3.6887, "step": 7240 }, { "epoch": 0.49225438238891156, "grad_norm": 1.84742271900177, "learning_rate": 0.0009385021742084523, "loss": 3.7087, "step": 7245 }, { "epoch": 0.4925941024595733, "grad_norm": 1.7904912233352661, "learning_rate": 0.0009384597091996195, "loss": 3.6815, "step": 7250 }, { "epoch": 0.4929338225302351, "grad_norm": 1.866523265838623, "learning_rate": 0.0009384172441907868, "loss": 3.1882, "step": 7255 }, { "epoch": 0.49327354260089684, "grad_norm": 1.968131422996521, "learning_rate": 0.0009383747791819542, "loss": 3.6663, "step": 7260 }, { "epoch": 0.49361326267155864, "grad_norm": 1.6553910970687866, "learning_rate": 0.0009383323141731214, "loss": 3.6461, "step": 7265 }, { "epoch": 0.49395298274222044, "grad_norm": 1.7364695072174072, "learning_rate": 0.0009382898491642886, "loss": 3.5949, "step": 7270 }, { "epoch": 0.4942927028128822, "grad_norm": 1.9017541408538818, "learning_rate": 0.0009382473841554559, "loss": 3.4513, "step": 7275 }, { "epoch": 0.494632422883544, "grad_norm": 1.7761356830596924, "learning_rate": 0.0009382049191466232, "loss": 3.6479, "step": 7280 }, { "epoch": 0.4949721429542057, "grad_norm": 1.6567416191101074, "learning_rate": 0.0009381624541377904, "loss": 3.8322, "step": 7285 }, { "epoch": 0.4953118630248675, "grad_norm": 1.9109162092208862, "learning_rate": 0.0009381199891289578, "loss": 3.479, "step": 7290 }, { "epoch": 0.49565158309552926, "grad_norm": 2.3615670204162598, "learning_rate": 0.0009380775241201251, "loss": 3.5768, "step": 7295 }, { "epoch": 0.49599130316619106, "grad_norm": 1.6070575714111328, "learning_rate": 0.0009380350591112923, "loss": 3.7373, "step": 7300 }, { "epoch": 0.49633102323685285, "grad_norm": 1.7815711498260498, "learning_rate": 0.0009379925941024596, "loss": 3.5444, "step": 7305 }, { "epoch": 0.4966707433075146, "grad_norm": 1.6682283878326416, "learning_rate": 0.0009379501290936269, "loss": 3.5954, "step": 7310 }, { "epoch": 0.4970104633781764, "grad_norm": 1.5873022079467773, "learning_rate": 0.0009379076640847941, "loss": 3.5169, "step": 7315 }, { "epoch": 0.49735018344883813, "grad_norm": 1.9068167209625244, "learning_rate": 0.0009378651990759614, "loss": 3.8537, "step": 7320 }, { "epoch": 0.49768990351949993, "grad_norm": 1.8835747241973877, "learning_rate": 0.0009378227340671287, "loss": 3.6198, "step": 7325 }, { "epoch": 0.49802962359016173, "grad_norm": 1.505694031715393, "learning_rate": 0.000937780269058296, "loss": 3.6553, "step": 7330 }, { "epoch": 0.49836934366082347, "grad_norm": 1.7213826179504395, "learning_rate": 0.0009377378040494633, "loss": 3.6731, "step": 7335 }, { "epoch": 0.49870906373148527, "grad_norm": 1.6704955101013184, "learning_rate": 0.0009376953390406305, "loss": 3.7818, "step": 7340 }, { "epoch": 0.499048783802147, "grad_norm": 1.7049568891525269, "learning_rate": 0.0009376528740317978, "loss": 3.7059, "step": 7345 }, { "epoch": 0.4993885038728088, "grad_norm": 1.683502435684204, "learning_rate": 0.0009376104090229651, "loss": 3.3105, "step": 7350 }, { "epoch": 0.4997282239434706, "grad_norm": 1.7000365257263184, "learning_rate": 0.0009375679440141323, "loss": 3.5882, "step": 7355 }, { "epoch": 0.5000679440141323, "grad_norm": 1.8426121473312378, "learning_rate": 0.0009375254790052997, "loss": 3.6742, "step": 7360 }, { "epoch": 0.5004076640847941, "grad_norm": 1.777592658996582, "learning_rate": 0.000937483013996467, "loss": 3.6922, "step": 7365 }, { "epoch": 0.5007473841554559, "grad_norm": 2.14231538772583, "learning_rate": 0.0009374405489876342, "loss": 3.8406, "step": 7370 }, { "epoch": 0.5010871042261177, "grad_norm": 2.071455717086792, "learning_rate": 0.0009373980839788014, "loss": 3.712, "step": 7375 }, { "epoch": 0.5014268242967794, "grad_norm": 1.6444952487945557, "learning_rate": 0.0009373556189699688, "loss": 3.8843, "step": 7380 }, { "epoch": 0.5017665443674413, "grad_norm": 1.7723790407180786, "learning_rate": 0.000937313153961136, "loss": 3.8371, "step": 7385 }, { "epoch": 0.502106264438103, "grad_norm": 1.6483027935028076, "learning_rate": 0.0009372706889523032, "loss": 3.3719, "step": 7390 }, { "epoch": 0.5024459845087648, "grad_norm": 2.1090896129608154, "learning_rate": 0.0009372282239434707, "loss": 3.6466, "step": 7395 }, { "epoch": 0.5027857045794265, "grad_norm": 2.077058792114258, "learning_rate": 0.0009371857589346379, "loss": 3.6998, "step": 7400 }, { "epoch": 0.5031254246500884, "grad_norm": 2.1352155208587646, "learning_rate": 0.0009371432939258051, "loss": 3.6197, "step": 7405 }, { "epoch": 0.5034651447207501, "grad_norm": 1.8947709798812866, "learning_rate": 0.0009371008289169725, "loss": 3.5701, "step": 7410 }, { "epoch": 0.5038048647914118, "grad_norm": 1.8237416744232178, "learning_rate": 0.0009370583639081397, "loss": 3.566, "step": 7415 }, { "epoch": 0.5041445848620737, "grad_norm": 2.2022979259490967, "learning_rate": 0.0009370158988993069, "loss": 3.6237, "step": 7420 }, { "epoch": 0.5044843049327354, "grad_norm": 2.1901113986968994, "learning_rate": 0.0009369734338904742, "loss": 3.5566, "step": 7425 }, { "epoch": 0.5048240250033972, "grad_norm": 1.7077343463897705, "learning_rate": 0.0009369309688816416, "loss": 3.6073, "step": 7430 }, { "epoch": 0.5051637450740589, "grad_norm": 1.25267493724823, "learning_rate": 0.0009368885038728088, "loss": 3.6476, "step": 7435 }, { "epoch": 0.5055034651447208, "grad_norm": 1.5486775636672974, "learning_rate": 0.0009368460388639761, "loss": 3.5362, "step": 7440 }, { "epoch": 0.5058431852153825, "grad_norm": 1.3959455490112305, "learning_rate": 0.0009368035738551434, "loss": 3.5793, "step": 7445 }, { "epoch": 0.5061829052860443, "grad_norm": 1.435024619102478, "learning_rate": 0.0009367611088463106, "loss": 3.4947, "step": 7450 }, { "epoch": 0.5065226253567061, "grad_norm": 2.1510794162750244, "learning_rate": 0.0009367186438374779, "loss": 3.5505, "step": 7455 }, { "epoch": 0.5068623454273679, "grad_norm": 1.9566560983657837, "learning_rate": 0.0009366761788286451, "loss": 3.5697, "step": 7460 }, { "epoch": 0.5072020654980296, "grad_norm": 2.0401763916015625, "learning_rate": 0.0009366337138198125, "loss": 3.755, "step": 7465 }, { "epoch": 0.5075417855686915, "grad_norm": 1.928633213043213, "learning_rate": 0.0009365912488109798, "loss": 3.3659, "step": 7470 }, { "epoch": 0.5078815056393532, "grad_norm": 1.547893762588501, "learning_rate": 0.000936548783802147, "loss": 3.7375, "step": 7475 }, { "epoch": 0.5082212257100149, "grad_norm": 1.7905452251434326, "learning_rate": 0.0009365063187933144, "loss": 3.3578, "step": 7480 }, { "epoch": 0.5085609457806767, "grad_norm": 1.8393946886062622, "learning_rate": 0.0009364638537844816, "loss": 3.8487, "step": 7485 }, { "epoch": 0.5089006658513385, "grad_norm": 1.788972020149231, "learning_rate": 0.0009364213887756488, "loss": 3.4132, "step": 7490 }, { "epoch": 0.5092403859220003, "grad_norm": 1.8866252899169922, "learning_rate": 0.0009363789237668163, "loss": 3.7949, "step": 7495 }, { "epoch": 0.509580105992662, "grad_norm": 1.8186125755310059, "learning_rate": 0.0009363364587579835, "loss": 3.6783, "step": 7500 }, { "epoch": 0.5099198260633239, "grad_norm": 1.4656366109848022, "learning_rate": 0.0009362939937491507, "loss": 3.6659, "step": 7505 }, { "epoch": 0.5102595461339856, "grad_norm": 1.6327162981033325, "learning_rate": 0.000936251528740318, "loss": 3.785, "step": 7510 }, { "epoch": 0.5105992662046474, "grad_norm": 1.8041733503341675, "learning_rate": 0.0009362090637314853, "loss": 3.6811, "step": 7515 }, { "epoch": 0.5109389862753091, "grad_norm": 1.8657257556915283, "learning_rate": 0.0009361665987226525, "loss": 3.7483, "step": 7520 }, { "epoch": 0.511278706345971, "grad_norm": 2.0020978450775146, "learning_rate": 0.0009361241337138198, "loss": 3.6799, "step": 7525 }, { "epoch": 0.5116184264166327, "grad_norm": 2.2985899448394775, "learning_rate": 0.0009360816687049872, "loss": 3.7361, "step": 7530 }, { "epoch": 0.5119581464872944, "grad_norm": 2.3885738849639893, "learning_rate": 0.0009360392036961544, "loss": 3.6494, "step": 7535 }, { "epoch": 0.5122978665579563, "grad_norm": 1.48610258102417, "learning_rate": 0.0009359967386873217, "loss": 3.742, "step": 7540 }, { "epoch": 0.512637586628618, "grad_norm": 1.6918708086013794, "learning_rate": 0.000935954273678489, "loss": 3.4367, "step": 7545 }, { "epoch": 0.5129773066992798, "grad_norm": 1.605992078781128, "learning_rate": 0.0009359118086696562, "loss": 3.5314, "step": 7550 }, { "epoch": 0.5133170267699416, "grad_norm": 1.4339370727539062, "learning_rate": 0.0009358693436608235, "loss": 3.6224, "step": 7555 }, { "epoch": 0.5136567468406034, "grad_norm": 1.9084240198135376, "learning_rate": 0.0009358268786519907, "loss": 3.6809, "step": 7560 }, { "epoch": 0.5139964669112651, "grad_norm": 2.0481855869293213, "learning_rate": 0.0009357844136431581, "loss": 3.758, "step": 7565 }, { "epoch": 0.5143361869819268, "grad_norm": 1.5854063034057617, "learning_rate": 0.0009357419486343254, "loss": 3.6495, "step": 7570 }, { "epoch": 0.5146759070525887, "grad_norm": 2.080742597579956, "learning_rate": 0.0009356994836254926, "loss": 3.6321, "step": 7575 }, { "epoch": 0.5150156271232504, "grad_norm": 2.2340991497039795, "learning_rate": 0.0009356570186166599, "loss": 3.7258, "step": 7580 }, { "epoch": 0.5153553471939122, "grad_norm": 1.8672842979431152, "learning_rate": 0.0009356145536078272, "loss": 3.6893, "step": 7585 }, { "epoch": 0.515695067264574, "grad_norm": 1.6372299194335938, "learning_rate": 0.0009355720885989944, "loss": 3.6785, "step": 7590 }, { "epoch": 0.5160347873352358, "grad_norm": 2.2254903316497803, "learning_rate": 0.0009355296235901617, "loss": 3.5487, "step": 7595 }, { "epoch": 0.5163745074058975, "grad_norm": 1.4272196292877197, "learning_rate": 0.0009354871585813291, "loss": 3.6771, "step": 7600 }, { "epoch": 0.5167142274765593, "grad_norm": 1.5992006063461304, "learning_rate": 0.0009354446935724963, "loss": 3.5021, "step": 7605 }, { "epoch": 0.5170539475472211, "grad_norm": 2.448000907897949, "learning_rate": 0.0009354022285636635, "loss": 3.5478, "step": 7610 }, { "epoch": 0.5173936676178829, "grad_norm": 2.1374270915985107, "learning_rate": 0.0009353597635548309, "loss": 3.2972, "step": 7615 }, { "epoch": 0.5177333876885446, "grad_norm": 1.3660354614257812, "learning_rate": 0.0009353172985459981, "loss": 3.7316, "step": 7620 }, { "epoch": 0.5180731077592065, "grad_norm": 1.5693408250808716, "learning_rate": 0.0009352748335371653, "loss": 3.7326, "step": 7625 }, { "epoch": 0.5184128278298682, "grad_norm": 1.955303430557251, "learning_rate": 0.0009352323685283327, "loss": 3.5296, "step": 7630 }, { "epoch": 0.5187525479005299, "grad_norm": 2.354243040084839, "learning_rate": 0.0009351899035195, "loss": 3.8071, "step": 7635 }, { "epoch": 0.5190922679711918, "grad_norm": 1.489240050315857, "learning_rate": 0.0009351474385106672, "loss": 3.6362, "step": 7640 }, { "epoch": 0.5194319880418535, "grad_norm": 1.9244869947433472, "learning_rate": 0.0009351049735018346, "loss": 3.6155, "step": 7645 }, { "epoch": 0.5197717081125153, "grad_norm": 2.202951431274414, "learning_rate": 0.0009350625084930018, "loss": 3.6275, "step": 7650 }, { "epoch": 0.520111428183177, "grad_norm": 1.8837103843688965, "learning_rate": 0.000935020043484169, "loss": 3.4416, "step": 7655 }, { "epoch": 0.5204511482538389, "grad_norm": 1.7075027227401733, "learning_rate": 0.0009349775784753363, "loss": 3.5164, "step": 7660 }, { "epoch": 0.5207908683245006, "grad_norm": 1.9859100580215454, "learning_rate": 0.0009349351134665036, "loss": 3.4835, "step": 7665 }, { "epoch": 0.5211305883951624, "grad_norm": 1.7298312187194824, "learning_rate": 0.0009348926484576709, "loss": 3.6317, "step": 7670 }, { "epoch": 0.5214703084658242, "grad_norm": 1.706390380859375, "learning_rate": 0.0009348501834488382, "loss": 3.4453, "step": 7675 }, { "epoch": 0.521810028536486, "grad_norm": 1.516219139099121, "learning_rate": 0.0009348077184400055, "loss": 3.6385, "step": 7680 }, { "epoch": 0.5221497486071477, "grad_norm": 1.7676199674606323, "learning_rate": 0.0009347652534311727, "loss": 3.6235, "step": 7685 }, { "epoch": 0.5224894686778094, "grad_norm": 1.6288286447525024, "learning_rate": 0.00093472278842234, "loss": 3.71, "step": 7690 }, { "epoch": 0.5228291887484713, "grad_norm": 1.750823974609375, "learning_rate": 0.0009346803234135073, "loss": 3.3913, "step": 7695 }, { "epoch": 0.523168908819133, "grad_norm": 1.510366678237915, "learning_rate": 0.0009346378584046745, "loss": 3.6503, "step": 7700 }, { "epoch": 0.5235086288897948, "grad_norm": 1.737229347229004, "learning_rate": 0.0009345953933958419, "loss": 3.889, "step": 7705 }, { "epoch": 0.5238483489604566, "grad_norm": 1.67311429977417, "learning_rate": 0.0009345529283870091, "loss": 3.5427, "step": 7710 }, { "epoch": 0.5241880690311184, "grad_norm": 2.1415138244628906, "learning_rate": 0.0009345104633781764, "loss": 3.673, "step": 7715 }, { "epoch": 0.5245277891017801, "grad_norm": 1.8786710500717163, "learning_rate": 0.0009344679983693437, "loss": 3.6083, "step": 7720 }, { "epoch": 0.524867509172442, "grad_norm": 1.72193443775177, "learning_rate": 0.0009344255333605109, "loss": 3.6442, "step": 7725 }, { "epoch": 0.5252072292431037, "grad_norm": 1.7616075277328491, "learning_rate": 0.0009343830683516782, "loss": 3.564, "step": 7730 }, { "epoch": 0.5255469493137654, "grad_norm": 1.6592762470245361, "learning_rate": 0.0009343406033428455, "loss": 3.4524, "step": 7735 }, { "epoch": 0.5258866693844272, "grad_norm": 1.897073745727539, "learning_rate": 0.0009342981383340128, "loss": 3.6075, "step": 7740 }, { "epoch": 0.526226389455089, "grad_norm": 1.7323424816131592, "learning_rate": 0.0009342556733251801, "loss": 3.5451, "step": 7745 }, { "epoch": 0.5265661095257508, "grad_norm": 1.7599256038665771, "learning_rate": 0.0009342132083163474, "loss": 3.7838, "step": 7750 }, { "epoch": 0.5269058295964125, "grad_norm": 1.9775245189666748, "learning_rate": 0.0009341707433075146, "loss": 3.8024, "step": 7755 }, { "epoch": 0.5272455496670744, "grad_norm": 2.482767105102539, "learning_rate": 0.0009341282782986818, "loss": 3.207, "step": 7760 }, { "epoch": 0.5275852697377361, "grad_norm": 1.9170703887939453, "learning_rate": 0.0009340858132898492, "loss": 3.7846, "step": 7765 }, { "epoch": 0.5279249898083979, "grad_norm": 2.5882420539855957, "learning_rate": 0.0009340433482810164, "loss": 3.6127, "step": 7770 }, { "epoch": 0.5282647098790596, "grad_norm": 2.0136306285858154, "learning_rate": 0.0009340008832721837, "loss": 3.8307, "step": 7775 }, { "epoch": 0.5286044299497215, "grad_norm": 2.029428243637085, "learning_rate": 0.0009339584182633511, "loss": 3.3032, "step": 7780 }, { "epoch": 0.5289441500203832, "grad_norm": 1.7472927570343018, "learning_rate": 0.0009339159532545183, "loss": 3.8339, "step": 7785 }, { "epoch": 0.5292838700910449, "grad_norm": 1.7058749198913574, "learning_rate": 0.0009338734882456855, "loss": 3.8412, "step": 7790 }, { "epoch": 0.5296235901617068, "grad_norm": 1.9433486461639404, "learning_rate": 0.0009338310232368529, "loss": 3.7376, "step": 7795 }, { "epoch": 0.5299633102323685, "grad_norm": 2.1130709648132324, "learning_rate": 0.0009337885582280201, "loss": 3.4962, "step": 7800 }, { "epoch": 0.5303030303030303, "grad_norm": 2.403587818145752, "learning_rate": 0.0009337460932191873, "loss": 3.5073, "step": 7805 }, { "epoch": 0.5306427503736921, "grad_norm": 2.297070026397705, "learning_rate": 0.0009337036282103547, "loss": 3.4896, "step": 7810 }, { "epoch": 0.5309824704443539, "grad_norm": 2.1578752994537354, "learning_rate": 0.000933661163201522, "loss": 3.6424, "step": 7815 }, { "epoch": 0.5313221905150156, "grad_norm": 1.6395888328552246, "learning_rate": 0.0009336186981926893, "loss": 3.6984, "step": 7820 }, { "epoch": 0.5316619105856774, "grad_norm": 1.8796418905258179, "learning_rate": 0.0009335762331838565, "loss": 3.7351, "step": 7825 }, { "epoch": 0.5320016306563392, "grad_norm": 2.0628433227539062, "learning_rate": 0.0009335337681750238, "loss": 3.525, "step": 7830 }, { "epoch": 0.532341350727001, "grad_norm": 1.8997611999511719, "learning_rate": 0.0009334913031661911, "loss": 3.7489, "step": 7835 }, { "epoch": 0.5326810707976627, "grad_norm": 2.1866347789764404, "learning_rate": 0.0009334488381573583, "loss": 3.9032, "step": 7840 }, { "epoch": 0.5330207908683245, "grad_norm": 1.800102949142456, "learning_rate": 0.0009334063731485257, "loss": 3.5809, "step": 7845 }, { "epoch": 0.5333605109389863, "grad_norm": 1.4633499383926392, "learning_rate": 0.000933363908139693, "loss": 3.6471, "step": 7850 }, { "epoch": 0.533700231009648, "grad_norm": 1.4310908317565918, "learning_rate": 0.0009333214431308602, "loss": 3.6714, "step": 7855 }, { "epoch": 0.5340399510803098, "grad_norm": 1.6666985750198364, "learning_rate": 0.0009332789781220274, "loss": 3.6199, "step": 7860 }, { "epoch": 0.5343796711509716, "grad_norm": 2.3838517665863037, "learning_rate": 0.0009332365131131948, "loss": 3.7035, "step": 7865 }, { "epoch": 0.5347193912216334, "grad_norm": 1.6109825372695923, "learning_rate": 0.000933194048104362, "loss": 3.6714, "step": 7870 }, { "epoch": 0.5350591112922951, "grad_norm": 1.5236012935638428, "learning_rate": 0.0009331515830955292, "loss": 3.6602, "step": 7875 }, { "epoch": 0.535398831362957, "grad_norm": 2.2005584239959717, "learning_rate": 0.0009331091180866967, "loss": 3.7972, "step": 7880 }, { "epoch": 0.5357385514336187, "grad_norm": 1.9768003225326538, "learning_rate": 0.0009330666530778639, "loss": 3.5304, "step": 7885 }, { "epoch": 0.5360782715042804, "grad_norm": 1.6775590181350708, "learning_rate": 0.0009330241880690311, "loss": 3.8225, "step": 7890 }, { "epoch": 0.5364179915749423, "grad_norm": 1.8896383047103882, "learning_rate": 0.0009329817230601985, "loss": 3.4992, "step": 7895 }, { "epoch": 0.536757711645604, "grad_norm": 1.6054203510284424, "learning_rate": 0.0009329392580513657, "loss": 3.3415, "step": 7900 }, { "epoch": 0.5370974317162658, "grad_norm": 1.9882280826568604, "learning_rate": 0.0009328967930425329, "loss": 3.783, "step": 7905 }, { "epoch": 0.5374371517869275, "grad_norm": 1.5988709926605225, "learning_rate": 0.0009328543280337002, "loss": 3.7066, "step": 7910 }, { "epoch": 0.5377768718575894, "grad_norm": 1.4532543420791626, "learning_rate": 0.0009328118630248676, "loss": 3.7479, "step": 7915 }, { "epoch": 0.5381165919282511, "grad_norm": 1.860560417175293, "learning_rate": 0.0009327693980160348, "loss": 3.4987, "step": 7920 }, { "epoch": 0.5384563119989129, "grad_norm": 1.8620414733886719, "learning_rate": 0.0009327269330072021, "loss": 3.6011, "step": 7925 }, { "epoch": 0.5387960320695747, "grad_norm": 1.6990619897842407, "learning_rate": 0.0009326844679983694, "loss": 3.8667, "step": 7930 }, { "epoch": 0.5391357521402365, "grad_norm": 1.610358476638794, "learning_rate": 0.0009326420029895366, "loss": 3.7609, "step": 7935 }, { "epoch": 0.5394754722108982, "grad_norm": 2.4134366512298584, "learning_rate": 0.0009325995379807039, "loss": 3.5457, "step": 7940 }, { "epoch": 0.5398151922815599, "grad_norm": 1.999536395072937, "learning_rate": 0.0009325570729718711, "loss": 3.6648, "step": 7945 }, { "epoch": 0.5401549123522218, "grad_norm": 2.1275837421417236, "learning_rate": 0.0009325146079630385, "loss": 3.7706, "step": 7950 }, { "epoch": 0.5404946324228835, "grad_norm": 1.7118467092514038, "learning_rate": 0.0009324721429542058, "loss": 3.3694, "step": 7955 }, { "epoch": 0.5408343524935453, "grad_norm": 1.835548996925354, "learning_rate": 0.000932429677945373, "loss": 3.6007, "step": 7960 }, { "epoch": 0.5411740725642071, "grad_norm": 1.377042293548584, "learning_rate": 0.0009323872129365403, "loss": 3.6396, "step": 7965 }, { "epoch": 0.5415137926348689, "grad_norm": 2.3367950916290283, "learning_rate": 0.0009323447479277076, "loss": 3.55, "step": 7970 }, { "epoch": 0.5418535127055306, "grad_norm": 1.4968597888946533, "learning_rate": 0.0009323022829188748, "loss": 3.6356, "step": 7975 }, { "epoch": 0.5421932327761925, "grad_norm": 1.5773757696151733, "learning_rate": 0.0009322598179100421, "loss": 3.602, "step": 7980 }, { "epoch": 0.5425329528468542, "grad_norm": 1.8352891206741333, "learning_rate": 0.0009322173529012095, "loss": 3.6952, "step": 7985 }, { "epoch": 0.542872672917516, "grad_norm": 1.683901309967041, "learning_rate": 0.0009321748878923767, "loss": 3.6688, "step": 7990 }, { "epoch": 0.5432123929881777, "grad_norm": 1.9745949506759644, "learning_rate": 0.000932132422883544, "loss": 3.651, "step": 7995 }, { "epoch": 0.5435521130588395, "grad_norm": 2.4092304706573486, "learning_rate": 0.0009320899578747113, "loss": 3.7194, "step": 8000 }, { "epoch": 0.5438918331295013, "grad_norm": 1.7927699089050293, "learning_rate": 0.0009320474928658785, "loss": 3.4899, "step": 8005 }, { "epoch": 0.544231553200163, "grad_norm": 1.8242192268371582, "learning_rate": 0.0009320050278570457, "loss": 3.6309, "step": 8010 }, { "epoch": 0.5445712732708249, "grad_norm": 1.4554797410964966, "learning_rate": 0.0009319625628482131, "loss": 3.6038, "step": 8015 }, { "epoch": 0.5449109933414866, "grad_norm": 1.925947666168213, "learning_rate": 0.0009319200978393804, "loss": 3.6695, "step": 8020 }, { "epoch": 0.5452507134121484, "grad_norm": 1.7721742391586304, "learning_rate": 0.0009318776328305476, "loss": 3.7945, "step": 8025 }, { "epoch": 0.5455904334828101, "grad_norm": 1.9283572435379028, "learning_rate": 0.000931835167821715, "loss": 3.5693, "step": 8030 }, { "epoch": 0.545930153553472, "grad_norm": 1.4044153690338135, "learning_rate": 0.0009317927028128822, "loss": 3.6094, "step": 8035 }, { "epoch": 0.5462698736241337, "grad_norm": 1.9214093685150146, "learning_rate": 0.0009317502378040494, "loss": 3.4012, "step": 8040 }, { "epoch": 0.5466095936947954, "grad_norm": 1.922947645187378, "learning_rate": 0.0009317077727952168, "loss": 3.4826, "step": 8045 }, { "epoch": 0.5469493137654573, "grad_norm": 2.0845863819122314, "learning_rate": 0.000931665307786384, "loss": 3.4752, "step": 8050 }, { "epoch": 0.547289033836119, "grad_norm": 1.727854609489441, "learning_rate": 0.0009316228427775513, "loss": 3.5897, "step": 8055 }, { "epoch": 0.5476287539067808, "grad_norm": 1.876293659210205, "learning_rate": 0.0009315803777687186, "loss": 3.6107, "step": 8060 }, { "epoch": 0.5479684739774426, "grad_norm": 1.8344733715057373, "learning_rate": 0.0009315379127598859, "loss": 3.5917, "step": 8065 }, { "epoch": 0.5483081940481044, "grad_norm": 1.9191696643829346, "learning_rate": 0.0009314954477510531, "loss": 3.7156, "step": 8070 }, { "epoch": 0.5486479141187661, "grad_norm": 1.470839023590088, "learning_rate": 0.0009314529827422204, "loss": 3.5524, "step": 8075 }, { "epoch": 0.5489876341894279, "grad_norm": 1.509290337562561, "learning_rate": 0.0009314105177333877, "loss": 3.6382, "step": 8080 }, { "epoch": 0.5493273542600897, "grad_norm": 1.8427354097366333, "learning_rate": 0.0009313680527245549, "loss": 3.6167, "step": 8085 }, { "epoch": 0.5496670743307515, "grad_norm": 2.0404887199401855, "learning_rate": 0.0009313255877157223, "loss": 3.6772, "step": 8090 }, { "epoch": 0.5500067944014132, "grad_norm": 2.4883944988250732, "learning_rate": 0.0009312831227068896, "loss": 3.5771, "step": 8095 }, { "epoch": 0.550346514472075, "grad_norm": 2.1654672622680664, "learning_rate": 0.0009312406576980568, "loss": 3.6935, "step": 8100 }, { "epoch": 0.5506862345427368, "grad_norm": 2.1290266513824463, "learning_rate": 0.0009311981926892241, "loss": 3.5096, "step": 8105 }, { "epoch": 0.5510259546133985, "grad_norm": 1.9634230136871338, "learning_rate": 0.0009311557276803913, "loss": 3.5114, "step": 8110 }, { "epoch": 0.5513656746840603, "grad_norm": 1.6765480041503906, "learning_rate": 0.0009311132626715586, "loss": 3.4907, "step": 8115 }, { "epoch": 0.5517053947547221, "grad_norm": 1.8738046884536743, "learning_rate": 0.000931070797662726, "loss": 3.5709, "step": 8120 }, { "epoch": 0.5520451148253839, "grad_norm": 1.6958918571472168, "learning_rate": 0.0009310283326538932, "loss": 3.701, "step": 8125 }, { "epoch": 0.5523848348960456, "grad_norm": 2.3113629817962646, "learning_rate": 0.0009309858676450605, "loss": 3.7602, "step": 8130 }, { "epoch": 0.5527245549667075, "grad_norm": 1.6998616456985474, "learning_rate": 0.0009309434026362278, "loss": 3.4278, "step": 8135 }, { "epoch": 0.5530642750373692, "grad_norm": 1.8079131841659546, "learning_rate": 0.000930900937627395, "loss": 3.5519, "step": 8140 }, { "epoch": 0.553403995108031, "grad_norm": 1.8042770624160767, "learning_rate": 0.0009308584726185622, "loss": 3.6608, "step": 8145 }, { "epoch": 0.5537437151786928, "grad_norm": 2.0598206520080566, "learning_rate": 0.0009308160076097296, "loss": 3.4891, "step": 8150 }, { "epoch": 0.5540834352493546, "grad_norm": 2.6843669414520264, "learning_rate": 0.0009307735426008969, "loss": 3.6462, "step": 8155 }, { "epoch": 0.5544231553200163, "grad_norm": 1.862623929977417, "learning_rate": 0.0009307310775920642, "loss": 3.4817, "step": 8160 }, { "epoch": 0.554762875390678, "grad_norm": 1.8833339214324951, "learning_rate": 0.0009306886125832315, "loss": 3.7006, "step": 8165 }, { "epoch": 0.5551025954613399, "grad_norm": 1.4817801713943481, "learning_rate": 0.0009306461475743987, "loss": 3.7307, "step": 8170 }, { "epoch": 0.5554423155320016, "grad_norm": 1.595192313194275, "learning_rate": 0.000930603682565566, "loss": 3.6901, "step": 8175 }, { "epoch": 0.5557820356026634, "grad_norm": 1.6162148714065552, "learning_rate": 0.0009305612175567333, "loss": 3.6197, "step": 8180 }, { "epoch": 0.5561217556733252, "grad_norm": 1.4800665378570557, "learning_rate": 0.0009305187525479005, "loss": 3.5168, "step": 8185 }, { "epoch": 0.556461475743987, "grad_norm": 1.810594081878662, "learning_rate": 0.0009304762875390679, "loss": 3.6385, "step": 8190 }, { "epoch": 0.5568011958146487, "grad_norm": 2.353928804397583, "learning_rate": 0.0009304338225302352, "loss": 3.6352, "step": 8195 }, { "epoch": 0.5571409158853105, "grad_norm": 1.3925055265426636, "learning_rate": 0.0009303913575214024, "loss": 3.5832, "step": 8200 }, { "epoch": 0.5574806359559723, "grad_norm": 2.0510220527648926, "learning_rate": 0.0009303488925125697, "loss": 3.5239, "step": 8205 }, { "epoch": 0.557820356026634, "grad_norm": 1.9744503498077393, "learning_rate": 0.0009303064275037369, "loss": 3.5724, "step": 8210 }, { "epoch": 0.5581600760972958, "grad_norm": 1.791314721107483, "learning_rate": 0.0009302639624949042, "loss": 3.5479, "step": 8215 }, { "epoch": 0.5584997961679576, "grad_norm": 1.5230447053909302, "learning_rate": 0.0009302214974860715, "loss": 3.7405, "step": 8220 }, { "epoch": 0.5588395162386194, "grad_norm": 2.0632472038269043, "learning_rate": 0.0009301790324772388, "loss": 3.5237, "step": 8225 }, { "epoch": 0.5591792363092811, "grad_norm": 1.6589200496673584, "learning_rate": 0.0009301365674684061, "loss": 3.7873, "step": 8230 }, { "epoch": 0.559518956379943, "grad_norm": 1.97246515750885, "learning_rate": 0.0009300941024595734, "loss": 3.5955, "step": 8235 }, { "epoch": 0.5598586764506047, "grad_norm": 1.5142838954925537, "learning_rate": 0.0009300516374507406, "loss": 3.8366, "step": 8240 }, { "epoch": 0.5601983965212665, "grad_norm": 1.8630400896072388, "learning_rate": 0.0009300091724419078, "loss": 3.4054, "step": 8245 }, { "epoch": 0.5605381165919282, "grad_norm": 2.009117841720581, "learning_rate": 0.0009299667074330752, "loss": 3.6035, "step": 8250 }, { "epoch": 0.5608778366625901, "grad_norm": 1.6413780450820923, "learning_rate": 0.0009299242424242424, "loss": 3.4344, "step": 8255 }, { "epoch": 0.5612175567332518, "grad_norm": 1.8356825113296509, "learning_rate": 0.0009298817774154097, "loss": 3.6525, "step": 8260 }, { "epoch": 0.5615572768039135, "grad_norm": 1.4960434436798096, "learning_rate": 0.0009298393124065771, "loss": 3.5955, "step": 8265 }, { "epoch": 0.5618969968745754, "grad_norm": 1.458255410194397, "learning_rate": 0.0009297968473977443, "loss": 3.4435, "step": 8270 }, { "epoch": 0.5622367169452371, "grad_norm": 2.086765766143799, "learning_rate": 0.0009297543823889115, "loss": 3.4487, "step": 8275 }, { "epoch": 0.5625764370158989, "grad_norm": 2.3883235454559326, "learning_rate": 0.0009297119173800789, "loss": 3.6784, "step": 8280 }, { "epoch": 0.5629161570865606, "grad_norm": 1.7467583417892456, "learning_rate": 0.0009296694523712461, "loss": 3.5313, "step": 8285 }, { "epoch": 0.5632558771572225, "grad_norm": 1.8961039781570435, "learning_rate": 0.0009296269873624133, "loss": 3.7565, "step": 8290 }, { "epoch": 0.5635955972278842, "grad_norm": 2.1689369678497314, "learning_rate": 0.0009295845223535808, "loss": 3.5044, "step": 8295 }, { "epoch": 0.563935317298546, "grad_norm": 1.5227402448654175, "learning_rate": 0.000929542057344748, "loss": 3.705, "step": 8300 }, { "epoch": 0.5642750373692078, "grad_norm": 1.4848250150680542, "learning_rate": 0.0009294995923359152, "loss": 3.5539, "step": 8305 }, { "epoch": 0.5646147574398696, "grad_norm": 1.754381775856018, "learning_rate": 0.0009294571273270825, "loss": 3.5912, "step": 8310 }, { "epoch": 0.5649544775105313, "grad_norm": 1.9849607944488525, "learning_rate": 0.0009294146623182498, "loss": 3.4599, "step": 8315 }, { "epoch": 0.5652941975811931, "grad_norm": 1.689512848854065, "learning_rate": 0.000929372197309417, "loss": 3.4295, "step": 8320 }, { "epoch": 0.5656339176518549, "grad_norm": 1.9490418434143066, "learning_rate": 0.0009293297323005843, "loss": 3.775, "step": 8325 }, { "epoch": 0.5659736377225166, "grad_norm": 1.9984945058822632, "learning_rate": 0.0009292872672917517, "loss": 3.6956, "step": 8330 }, { "epoch": 0.5663133577931784, "grad_norm": 2.1443612575531006, "learning_rate": 0.0009292448022829189, "loss": 3.7985, "step": 8335 }, { "epoch": 0.5666530778638402, "grad_norm": 2.058922052383423, "learning_rate": 0.0009292023372740862, "loss": 3.7034, "step": 8340 }, { "epoch": 0.566992797934502, "grad_norm": 1.6432403326034546, "learning_rate": 0.0009291598722652534, "loss": 3.5522, "step": 8345 }, { "epoch": 0.5673325180051637, "grad_norm": 2.502938985824585, "learning_rate": 0.0009291174072564207, "loss": 3.9201, "step": 8350 }, { "epoch": 0.5676722380758256, "grad_norm": 1.601935863494873, "learning_rate": 0.000929074942247588, "loss": 3.7858, "step": 8355 }, { "epoch": 0.5680119581464873, "grad_norm": 1.5181854963302612, "learning_rate": 0.0009290324772387552, "loss": 3.5042, "step": 8360 }, { "epoch": 0.568351678217149, "grad_norm": 1.6795443296432495, "learning_rate": 0.0009289900122299226, "loss": 3.5345, "step": 8365 }, { "epoch": 0.5686913982878108, "grad_norm": 1.655259132385254, "learning_rate": 0.0009289475472210899, "loss": 3.5505, "step": 8370 }, { "epoch": 0.5690311183584726, "grad_norm": 1.4911961555480957, "learning_rate": 0.0009289050822122571, "loss": 3.7775, "step": 8375 }, { "epoch": 0.5693708384291344, "grad_norm": 1.631417989730835, "learning_rate": 0.0009288626172034244, "loss": 3.8504, "step": 8380 }, { "epoch": 0.5697105584997961, "grad_norm": 1.695139765739441, "learning_rate": 0.0009288201521945917, "loss": 3.5791, "step": 8385 }, { "epoch": 0.570050278570458, "grad_norm": 1.718969464302063, "learning_rate": 0.0009287776871857589, "loss": 3.5689, "step": 8390 }, { "epoch": 0.5703899986411197, "grad_norm": 1.843687653541565, "learning_rate": 0.0009287352221769261, "loss": 3.8402, "step": 8395 }, { "epoch": 0.5707297187117815, "grad_norm": 1.6716065406799316, "learning_rate": 0.0009286927571680936, "loss": 3.5169, "step": 8400 }, { "epoch": 0.5710694387824433, "grad_norm": 1.6999871730804443, "learning_rate": 0.0009286502921592608, "loss": 3.6564, "step": 8405 }, { "epoch": 0.5714091588531051, "grad_norm": 2.2780423164367676, "learning_rate": 0.000928607827150428, "loss": 3.6151, "step": 8410 }, { "epoch": 0.5717488789237668, "grad_norm": 2.0426759719848633, "learning_rate": 0.0009285653621415954, "loss": 3.2435, "step": 8415 }, { "epoch": 0.5720885989944285, "grad_norm": 1.871614694595337, "learning_rate": 0.0009285228971327626, "loss": 3.6815, "step": 8420 }, { "epoch": 0.5724283190650904, "grad_norm": 1.4338725805282593, "learning_rate": 0.0009284804321239298, "loss": 3.6226, "step": 8425 }, { "epoch": 0.5727680391357521, "grad_norm": 1.7484062910079956, "learning_rate": 0.0009284379671150972, "loss": 3.6585, "step": 8430 }, { "epoch": 0.5731077592064139, "grad_norm": 2.6366608142852783, "learning_rate": 0.0009283955021062645, "loss": 3.4469, "step": 8435 }, { "epoch": 0.5734474792770757, "grad_norm": 1.933793067932129, "learning_rate": 0.0009283530370974317, "loss": 3.5671, "step": 8440 }, { "epoch": 0.5737871993477375, "grad_norm": 2.2591910362243652, "learning_rate": 0.000928310572088599, "loss": 3.4938, "step": 8445 }, { "epoch": 0.5741269194183992, "grad_norm": 1.942354440689087, "learning_rate": 0.0009282681070797663, "loss": 3.6437, "step": 8450 }, { "epoch": 0.574466639489061, "grad_norm": 4.19645357131958, "learning_rate": 0.0009282256420709335, "loss": 3.3557, "step": 8455 }, { "epoch": 0.5748063595597228, "grad_norm": 1.6711753606796265, "learning_rate": 0.0009281831770621008, "loss": 3.7414, "step": 8460 }, { "epoch": 0.5751460796303846, "grad_norm": 1.717185378074646, "learning_rate": 0.0009281407120532681, "loss": 3.6686, "step": 8465 }, { "epoch": 0.5754857997010463, "grad_norm": 1.790390968322754, "learning_rate": 0.0009280982470444354, "loss": 3.5404, "step": 8470 }, { "epoch": 0.5758255197717081, "grad_norm": 1.7907826900482178, "learning_rate": 0.0009280557820356027, "loss": 3.6021, "step": 8475 }, { "epoch": 0.5761652398423699, "grad_norm": 1.8778092861175537, "learning_rate": 0.00092801331702677, "loss": 3.5096, "step": 8480 }, { "epoch": 0.5765049599130316, "grad_norm": 1.8749881982803345, "learning_rate": 0.0009279708520179372, "loss": 3.6406, "step": 8485 }, { "epoch": 0.5768446799836935, "grad_norm": 1.9166985750198364, "learning_rate": 0.0009279283870091045, "loss": 3.4046, "step": 8490 }, { "epoch": 0.5771844000543552, "grad_norm": 2.205601692199707, "learning_rate": 0.0009278859220002717, "loss": 3.6508, "step": 8495 }, { "epoch": 0.577524120125017, "grad_norm": 1.5160706043243408, "learning_rate": 0.0009278434569914391, "loss": 3.6401, "step": 8500 }, { "epoch": 0.5778638401956787, "grad_norm": 2.1555912494659424, "learning_rate": 0.0009278009919826064, "loss": 3.5458, "step": 8505 }, { "epoch": 0.5782035602663406, "grad_norm": 1.7756496667861938, "learning_rate": 0.0009277585269737736, "loss": 3.5908, "step": 8510 }, { "epoch": 0.5785432803370023, "grad_norm": 1.9302237033843994, "learning_rate": 0.000927716061964941, "loss": 3.6034, "step": 8515 }, { "epoch": 0.578883000407664, "grad_norm": 3.7671988010406494, "learning_rate": 0.0009276735969561082, "loss": 3.4115, "step": 8520 }, { "epoch": 0.5792227204783259, "grad_norm": 1.9579970836639404, "learning_rate": 0.0009276311319472754, "loss": 3.5352, "step": 8525 }, { "epoch": 0.5795624405489876, "grad_norm": 1.403574824333191, "learning_rate": 0.0009275886669384428, "loss": 3.5008, "step": 8530 }, { "epoch": 0.5799021606196494, "grad_norm": 2.193835973739624, "learning_rate": 0.00092754620192961, "loss": 3.5266, "step": 8535 }, { "epoch": 0.5802418806903111, "grad_norm": 3.2566967010498047, "learning_rate": 0.0009275037369207773, "loss": 3.5171, "step": 8540 }, { "epoch": 0.580581600760973, "grad_norm": 2.0704121589660645, "learning_rate": 0.0009274612719119447, "loss": 3.7362, "step": 8545 }, { "epoch": 0.5809213208316347, "grad_norm": 1.5881680250167847, "learning_rate": 0.0009274188069031119, "loss": 3.4184, "step": 8550 }, { "epoch": 0.5812610409022965, "grad_norm": 1.6216797828674316, "learning_rate": 0.0009273763418942791, "loss": 3.6328, "step": 8555 }, { "epoch": 0.5816007609729583, "grad_norm": 1.9217256307601929, "learning_rate": 0.0009273338768854464, "loss": 3.6225, "step": 8560 }, { "epoch": 0.5819404810436201, "grad_norm": 1.8218040466308594, "learning_rate": 0.0009272914118766137, "loss": 3.5851, "step": 8565 }, { "epoch": 0.5822802011142818, "grad_norm": 2.2056286334991455, "learning_rate": 0.0009272489468677809, "loss": 3.5824, "step": 8570 }, { "epoch": 0.5826199211849437, "grad_norm": 2.019808053970337, "learning_rate": 0.0009272064818589483, "loss": 3.5564, "step": 8575 }, { "epoch": 0.5829596412556054, "grad_norm": 1.7872081995010376, "learning_rate": 0.0009271640168501156, "loss": 3.4465, "step": 8580 }, { "epoch": 0.5832993613262671, "grad_norm": 1.788996696472168, "learning_rate": 0.0009271215518412828, "loss": 3.6406, "step": 8585 }, { "epoch": 0.5836390813969289, "grad_norm": 2.110400438308716, "learning_rate": 0.0009270790868324501, "loss": 3.6857, "step": 8590 }, { "epoch": 0.5839788014675907, "grad_norm": 1.5634715557098389, "learning_rate": 0.0009270366218236173, "loss": 3.9171, "step": 8595 }, { "epoch": 0.5843185215382525, "grad_norm": 1.9882853031158447, "learning_rate": 0.0009269941568147846, "loss": 3.5681, "step": 8600 }, { "epoch": 0.5846582416089142, "grad_norm": 2.3721466064453125, "learning_rate": 0.000926951691805952, "loss": 3.8869, "step": 8605 }, { "epoch": 0.5849979616795761, "grad_norm": 1.5536471605300903, "learning_rate": 0.0009269092267971192, "loss": 3.4691, "step": 8610 }, { "epoch": 0.5853376817502378, "grad_norm": 1.8059794902801514, "learning_rate": 0.0009268667617882865, "loss": 3.6269, "step": 8615 }, { "epoch": 0.5856774018208996, "grad_norm": 2.6601083278656006, "learning_rate": 0.0009268242967794538, "loss": 3.5519, "step": 8620 }, { "epoch": 0.5860171218915613, "grad_norm": 2.131978988647461, "learning_rate": 0.000926781831770621, "loss": 3.6403, "step": 8625 }, { "epoch": 0.5863568419622232, "grad_norm": 1.801137089729309, "learning_rate": 0.0009267393667617882, "loss": 3.7596, "step": 8630 }, { "epoch": 0.5866965620328849, "grad_norm": 1.5213325023651123, "learning_rate": 0.0009266969017529556, "loss": 3.612, "step": 8635 }, { "epoch": 0.5870362821035466, "grad_norm": 2.1005544662475586, "learning_rate": 0.0009266544367441229, "loss": 3.2965, "step": 8640 }, { "epoch": 0.5873760021742085, "grad_norm": 1.651050090789795, "learning_rate": 0.0009266119717352901, "loss": 3.9812, "step": 8645 }, { "epoch": 0.5877157222448702, "grad_norm": 1.50296950340271, "learning_rate": 0.0009265695067264575, "loss": 3.8881, "step": 8650 }, { "epoch": 0.588055442315532, "grad_norm": 1.6539520025253296, "learning_rate": 0.0009265270417176247, "loss": 3.3928, "step": 8655 }, { "epoch": 0.5883951623861938, "grad_norm": 2.2167775630950928, "learning_rate": 0.0009264845767087919, "loss": 3.484, "step": 8660 }, { "epoch": 0.5887348824568556, "grad_norm": 1.581592082977295, "learning_rate": 0.0009264421116999593, "loss": 3.5593, "step": 8665 }, { "epoch": 0.5890746025275173, "grad_norm": 1.3254930973052979, "learning_rate": 0.0009263996466911265, "loss": 3.6978, "step": 8670 }, { "epoch": 0.589414322598179, "grad_norm": 1.649031400680542, "learning_rate": 0.0009263571816822938, "loss": 3.5839, "step": 8675 }, { "epoch": 0.5897540426688409, "grad_norm": 1.5470324754714966, "learning_rate": 0.0009263147166734612, "loss": 3.5869, "step": 8680 }, { "epoch": 0.5900937627395026, "grad_norm": 1.6185967922210693, "learning_rate": 0.0009262722516646284, "loss": 3.469, "step": 8685 }, { "epoch": 0.5904334828101644, "grad_norm": 1.812577247619629, "learning_rate": 0.0009262297866557956, "loss": 3.3848, "step": 8690 }, { "epoch": 0.5907732028808262, "grad_norm": 1.6184992790222168, "learning_rate": 0.0009261873216469629, "loss": 3.648, "step": 8695 }, { "epoch": 0.591112922951488, "grad_norm": 1.8362847566604614, "learning_rate": 0.0009261448566381302, "loss": 3.6849, "step": 8700 }, { "epoch": 0.5914526430221497, "grad_norm": 1.990052342414856, "learning_rate": 0.0009261023916292974, "loss": 3.6536, "step": 8705 }, { "epoch": 0.5917923630928115, "grad_norm": 2.0379180908203125, "learning_rate": 0.0009260599266204648, "loss": 3.6557, "step": 8710 }, { "epoch": 0.5921320831634733, "grad_norm": 2.0240869522094727, "learning_rate": 0.0009260174616116321, "loss": 3.3792, "step": 8715 }, { "epoch": 0.5924718032341351, "grad_norm": 2.3912525177001953, "learning_rate": 0.0009259749966027993, "loss": 3.6087, "step": 8720 }, { "epoch": 0.5928115233047968, "grad_norm": 1.8275182247161865, "learning_rate": 0.0009259325315939666, "loss": 3.7186, "step": 8725 }, { "epoch": 0.5931512433754587, "grad_norm": 1.8130512237548828, "learning_rate": 0.0009258900665851339, "loss": 3.7618, "step": 8730 }, { "epoch": 0.5934909634461204, "grad_norm": 2.5373785495758057, "learning_rate": 0.0009258476015763011, "loss": 3.6537, "step": 8735 }, { "epoch": 0.5938306835167821, "grad_norm": 1.864529013633728, "learning_rate": 0.0009258051365674684, "loss": 3.6591, "step": 8740 }, { "epoch": 0.594170403587444, "grad_norm": 1.8723706007003784, "learning_rate": 0.0009257626715586357, "loss": 3.5967, "step": 8745 }, { "epoch": 0.5945101236581057, "grad_norm": 1.792792558670044, "learning_rate": 0.000925720206549803, "loss": 3.6432, "step": 8750 }, { "epoch": 0.5948498437287675, "grad_norm": 1.6405158042907715, "learning_rate": 0.0009256777415409703, "loss": 3.4517, "step": 8755 }, { "epoch": 0.5951895637994292, "grad_norm": 2.6514251232147217, "learning_rate": 0.0009256352765321375, "loss": 3.6744, "step": 8760 }, { "epoch": 0.5955292838700911, "grad_norm": 2.0320937633514404, "learning_rate": 0.0009255928115233048, "loss": 3.5421, "step": 8765 }, { "epoch": 0.5958690039407528, "grad_norm": 1.543256402015686, "learning_rate": 0.0009255503465144721, "loss": 3.56, "step": 8770 }, { "epoch": 0.5962087240114146, "grad_norm": 2.106759548187256, "learning_rate": 0.0009255078815056393, "loss": 3.5653, "step": 8775 }, { "epoch": 0.5965484440820764, "grad_norm": 1.790747046470642, "learning_rate": 0.0009254654164968067, "loss": 3.5841, "step": 8780 }, { "epoch": 0.5968881641527382, "grad_norm": 1.7739201784133911, "learning_rate": 0.000925422951487974, "loss": 3.6571, "step": 8785 }, { "epoch": 0.5972278842233999, "grad_norm": 1.8644073009490967, "learning_rate": 0.0009253804864791412, "loss": 3.6701, "step": 8790 }, { "epoch": 0.5975676042940616, "grad_norm": 1.488048791885376, "learning_rate": 0.0009253380214703084, "loss": 3.6917, "step": 8795 }, { "epoch": 0.5979073243647235, "grad_norm": 1.6964482069015503, "learning_rate": 0.0009252955564614758, "loss": 3.5346, "step": 8800 }, { "epoch": 0.5982470444353852, "grad_norm": 1.8396565914154053, "learning_rate": 0.000925253091452643, "loss": 3.6299, "step": 8805 }, { "epoch": 0.598586764506047, "grad_norm": 1.74364173412323, "learning_rate": 0.0009252106264438102, "loss": 3.6527, "step": 8810 }, { "epoch": 0.5989264845767088, "grad_norm": 2.2096166610717773, "learning_rate": 0.0009251681614349777, "loss": 3.6286, "step": 8815 }, { "epoch": 0.5992662046473706, "grad_norm": 1.8400013446807861, "learning_rate": 0.0009251256964261449, "loss": 3.426, "step": 8820 }, { "epoch": 0.5996059247180323, "grad_norm": 2.0055932998657227, "learning_rate": 0.0009250832314173121, "loss": 3.5473, "step": 8825 }, { "epoch": 0.5999456447886942, "grad_norm": 1.4625746011734009, "learning_rate": 0.0009250407664084795, "loss": 3.5188, "step": 8830 }, { "epoch": 0.6002853648593559, "grad_norm": 1.6361364126205444, "learning_rate": 0.0009249983013996467, "loss": 3.5838, "step": 8835 }, { "epoch": 0.6006250849300176, "grad_norm": 1.778080940246582, "learning_rate": 0.000924955836390814, "loss": 3.6467, "step": 8840 }, { "epoch": 0.6009648050006794, "grad_norm": 1.6331559419631958, "learning_rate": 0.0009249133713819812, "loss": 3.6365, "step": 8845 }, { "epoch": 0.6013045250713412, "grad_norm": 1.6935793161392212, "learning_rate": 0.0009248709063731486, "loss": 3.8475, "step": 8850 }, { "epoch": 0.601644245142003, "grad_norm": 1.8047494888305664, "learning_rate": 0.0009248284413643159, "loss": 3.6562, "step": 8855 }, { "epoch": 0.6019839652126647, "grad_norm": 1.740591049194336, "learning_rate": 0.0009247859763554831, "loss": 3.5537, "step": 8860 }, { "epoch": 0.6023236852833266, "grad_norm": 1.6231789588928223, "learning_rate": 0.0009247435113466504, "loss": 3.5393, "step": 8865 }, { "epoch": 0.6026634053539883, "grad_norm": 1.628199815750122, "learning_rate": 0.0009247010463378177, "loss": 3.5164, "step": 8870 }, { "epoch": 0.6030031254246501, "grad_norm": 1.871811032295227, "learning_rate": 0.0009246585813289849, "loss": 3.564, "step": 8875 }, { "epoch": 0.6033428454953118, "grad_norm": 1.9187747240066528, "learning_rate": 0.0009246161163201521, "loss": 3.5392, "step": 8880 }, { "epoch": 0.6036825655659737, "grad_norm": 1.9613898992538452, "learning_rate": 0.0009245736513113196, "loss": 3.7818, "step": 8885 }, { "epoch": 0.6040222856366354, "grad_norm": 1.420740008354187, "learning_rate": 0.0009245311863024868, "loss": 3.5751, "step": 8890 }, { "epoch": 0.6043620057072971, "grad_norm": 1.975730299949646, "learning_rate": 0.000924488721293654, "loss": 3.646, "step": 8895 }, { "epoch": 0.604701725777959, "grad_norm": 1.9160246849060059, "learning_rate": 0.0009244462562848214, "loss": 3.7875, "step": 8900 }, { "epoch": 0.6050414458486207, "grad_norm": 2.1043782234191895, "learning_rate": 0.0009244037912759886, "loss": 3.5591, "step": 8905 }, { "epoch": 0.6053811659192825, "grad_norm": 1.5138534307479858, "learning_rate": 0.0009243613262671558, "loss": 3.5875, "step": 8910 }, { "epoch": 0.6057208859899443, "grad_norm": 2.091632127761841, "learning_rate": 0.0009243188612583232, "loss": 3.4981, "step": 8915 }, { "epoch": 0.6060606060606061, "grad_norm": 1.4236527681350708, "learning_rate": 0.0009242763962494905, "loss": 3.5563, "step": 8920 }, { "epoch": 0.6064003261312678, "grad_norm": 1.9371850490570068, "learning_rate": 0.0009242339312406577, "loss": 3.4575, "step": 8925 }, { "epoch": 0.6067400462019296, "grad_norm": 2.0997776985168457, "learning_rate": 0.000924191466231825, "loss": 3.7594, "step": 8930 }, { "epoch": 0.6070797662725914, "grad_norm": 2.2973668575286865, "learning_rate": 0.0009241490012229923, "loss": 3.6701, "step": 8935 }, { "epoch": 0.6074194863432532, "grad_norm": 1.6395846605300903, "learning_rate": 0.0009241065362141595, "loss": 3.6688, "step": 8940 }, { "epoch": 0.6077592064139149, "grad_norm": 1.737242341041565, "learning_rate": 0.0009240640712053268, "loss": 3.72, "step": 8945 }, { "epoch": 0.6080989264845768, "grad_norm": 1.7912348508834839, "learning_rate": 0.0009240216061964941, "loss": 3.7516, "step": 8950 }, { "epoch": 0.6084386465552385, "grad_norm": 1.9811081886291504, "learning_rate": 0.0009239791411876614, "loss": 3.5736, "step": 8955 }, { "epoch": 0.6087783666259002, "grad_norm": 1.7028861045837402, "learning_rate": 0.0009239366761788287, "loss": 3.6331, "step": 8960 }, { "epoch": 0.609118086696562, "grad_norm": 1.6539908647537231, "learning_rate": 0.000923894211169996, "loss": 3.4991, "step": 8965 }, { "epoch": 0.6094578067672238, "grad_norm": 1.4642423391342163, "learning_rate": 0.0009238517461611632, "loss": 3.5892, "step": 8970 }, { "epoch": 0.6097975268378856, "grad_norm": 1.8361656665802002, "learning_rate": 0.0009238092811523305, "loss": 3.5577, "step": 8975 }, { "epoch": 0.6101372469085473, "grad_norm": 2.4520344734191895, "learning_rate": 0.0009237668161434977, "loss": 3.4446, "step": 8980 }, { "epoch": 0.6104769669792092, "grad_norm": 1.7283861637115479, "learning_rate": 0.000923724351134665, "loss": 3.3449, "step": 8985 }, { "epoch": 0.6108166870498709, "grad_norm": 1.5707637071609497, "learning_rate": 0.0009236818861258324, "loss": 3.5901, "step": 8990 }, { "epoch": 0.6111564071205327, "grad_norm": 2.073317289352417, "learning_rate": 0.0009236394211169996, "loss": 3.9238, "step": 8995 }, { "epoch": 0.6114961271911945, "grad_norm": 1.5850750207901, "learning_rate": 0.0009235969561081669, "loss": 3.6219, "step": 9000 }, { "epoch": 0.6118358472618562, "grad_norm": 1.8710379600524902, "learning_rate": 0.0009235544910993342, "loss": 3.7944, "step": 9005 }, { "epoch": 0.612175567332518, "grad_norm": 2.0422680377960205, "learning_rate": 0.0009235120260905014, "loss": 3.648, "step": 9010 }, { "epoch": 0.6125152874031797, "grad_norm": 1.8090587854385376, "learning_rate": 0.0009234695610816687, "loss": 3.5097, "step": 9015 }, { "epoch": 0.6128550074738416, "grad_norm": 1.7723501920700073, "learning_rate": 0.000923427096072836, "loss": 3.7298, "step": 9020 }, { "epoch": 0.6131947275445033, "grad_norm": 2.3708157539367676, "learning_rate": 0.0009233846310640033, "loss": 3.4601, "step": 9025 }, { "epoch": 0.6135344476151651, "grad_norm": 1.8989821672439575, "learning_rate": 0.0009233421660551705, "loss": 3.5055, "step": 9030 }, { "epoch": 0.6138741676858269, "grad_norm": 2.090282917022705, "learning_rate": 0.0009232997010463379, "loss": 3.4755, "step": 9035 }, { "epoch": 0.6142138877564887, "grad_norm": 2.4173941612243652, "learning_rate": 0.0009232572360375051, "loss": 3.7803, "step": 9040 }, { "epoch": 0.6145536078271504, "grad_norm": 1.8314687013626099, "learning_rate": 0.0009232147710286723, "loss": 3.6624, "step": 9045 }, { "epoch": 0.6148933278978121, "grad_norm": 1.775395154953003, "learning_rate": 0.0009231723060198397, "loss": 3.6418, "step": 9050 }, { "epoch": 0.615233047968474, "grad_norm": 1.9200234413146973, "learning_rate": 0.0009231298410110069, "loss": 3.737, "step": 9055 }, { "epoch": 0.6155727680391357, "grad_norm": 1.833133339881897, "learning_rate": 0.0009230873760021742, "loss": 3.6264, "step": 9060 }, { "epoch": 0.6159124881097975, "grad_norm": 1.6982721090316772, "learning_rate": 0.0009230449109933416, "loss": 3.778, "step": 9065 }, { "epoch": 0.6162522081804593, "grad_norm": 1.832292914390564, "learning_rate": 0.0009230024459845088, "loss": 3.4331, "step": 9070 }, { "epoch": 0.6165919282511211, "grad_norm": 2.015042543411255, "learning_rate": 0.000922959980975676, "loss": 3.4464, "step": 9075 }, { "epoch": 0.6169316483217828, "grad_norm": 1.4937949180603027, "learning_rate": 0.0009229175159668433, "loss": 3.6233, "step": 9080 }, { "epoch": 0.6172713683924447, "grad_norm": 1.6505391597747803, "learning_rate": 0.0009228750509580106, "loss": 3.6598, "step": 9085 }, { "epoch": 0.6176110884631064, "grad_norm": 1.757141351699829, "learning_rate": 0.0009228325859491778, "loss": 3.5579, "step": 9090 }, { "epoch": 0.6179508085337682, "grad_norm": 1.4874945878982544, "learning_rate": 0.0009227901209403452, "loss": 3.5253, "step": 9095 }, { "epoch": 0.6182905286044299, "grad_norm": 1.7674670219421387, "learning_rate": 0.0009227476559315125, "loss": 3.8426, "step": 9100 }, { "epoch": 0.6186302486750918, "grad_norm": 1.7990821599960327, "learning_rate": 0.0009227051909226797, "loss": 3.8549, "step": 9105 }, { "epoch": 0.6189699687457535, "grad_norm": 2.125661611557007, "learning_rate": 0.000922662725913847, "loss": 3.2344, "step": 9110 }, { "epoch": 0.6193096888164152, "grad_norm": 1.8698945045471191, "learning_rate": 0.0009226202609050143, "loss": 3.7451, "step": 9115 }, { "epoch": 0.6196494088870771, "grad_norm": 2.396942138671875, "learning_rate": 0.0009225777958961815, "loss": 3.6042, "step": 9120 }, { "epoch": 0.6199891289577388, "grad_norm": 1.7153599262237549, "learning_rate": 0.0009225353308873488, "loss": 3.7072, "step": 9125 }, { "epoch": 0.6203288490284006, "grad_norm": 1.8926149606704712, "learning_rate": 0.0009224928658785161, "loss": 3.6372, "step": 9130 }, { "epoch": 0.6206685690990623, "grad_norm": 1.563071370124817, "learning_rate": 0.0009224504008696834, "loss": 3.7395, "step": 9135 }, { "epoch": 0.6210082891697242, "grad_norm": 1.4925459623336792, "learning_rate": 0.0009224079358608507, "loss": 3.548, "step": 9140 }, { "epoch": 0.6213480092403859, "grad_norm": 1.9674763679504395, "learning_rate": 0.0009223654708520179, "loss": 3.6579, "step": 9145 }, { "epoch": 0.6216877293110477, "grad_norm": 1.318204402923584, "learning_rate": 0.0009223230058431852, "loss": 3.3913, "step": 9150 }, { "epoch": 0.6220274493817095, "grad_norm": 1.610824465751648, "learning_rate": 0.0009222805408343525, "loss": 3.6324, "step": 9155 }, { "epoch": 0.6223671694523712, "grad_norm": 1.6806154251098633, "learning_rate": 0.0009222380758255197, "loss": 3.697, "step": 9160 }, { "epoch": 0.622706889523033, "grad_norm": 1.962982177734375, "learning_rate": 0.0009221956108166871, "loss": 3.5669, "step": 9165 }, { "epoch": 0.6230466095936948, "grad_norm": 2.242494583129883, "learning_rate": 0.0009221531458078544, "loss": 3.6732, "step": 9170 }, { "epoch": 0.6233863296643566, "grad_norm": 1.9611090421676636, "learning_rate": 0.0009221106807990216, "loss": 3.6621, "step": 9175 }, { "epoch": 0.6237260497350183, "grad_norm": 2.4531869888305664, "learning_rate": 0.000922068215790189, "loss": 3.3635, "step": 9180 }, { "epoch": 0.6240657698056801, "grad_norm": 1.4472553730010986, "learning_rate": 0.0009220257507813562, "loss": 3.7988, "step": 9185 }, { "epoch": 0.6244054898763419, "grad_norm": 1.911106824874878, "learning_rate": 0.0009219832857725234, "loss": 3.6959, "step": 9190 }, { "epoch": 0.6247452099470037, "grad_norm": 1.6827830076217651, "learning_rate": 0.0009219408207636908, "loss": 3.7204, "step": 9195 }, { "epoch": 0.6250849300176654, "grad_norm": 1.9267044067382812, "learning_rate": 0.0009218983557548581, "loss": 3.53, "step": 9200 }, { "epoch": 0.6254246500883273, "grad_norm": 1.7320880889892578, "learning_rate": 0.0009218558907460253, "loss": 3.6947, "step": 9205 }, { "epoch": 0.625764370158989, "grad_norm": 2.0060832500457764, "learning_rate": 0.0009218134257371926, "loss": 3.8352, "step": 9210 }, { "epoch": 0.6261040902296507, "grad_norm": 2.273177146911621, "learning_rate": 0.0009217709607283599, "loss": 3.8385, "step": 9215 }, { "epoch": 0.6264438103003126, "grad_norm": 1.8600434064865112, "learning_rate": 0.0009217284957195271, "loss": 3.627, "step": 9220 }, { "epoch": 0.6267835303709743, "grad_norm": 2.0940895080566406, "learning_rate": 0.0009216860307106944, "loss": 4.0717, "step": 9225 }, { "epoch": 0.6271232504416361, "grad_norm": 2.140733480453491, "learning_rate": 0.0009216435657018617, "loss": 3.6157, "step": 9230 }, { "epoch": 0.6274629705122978, "grad_norm": 1.6992948055267334, "learning_rate": 0.000921601100693029, "loss": 3.5026, "step": 9235 }, { "epoch": 0.6278026905829597, "grad_norm": 1.9762685298919678, "learning_rate": 0.0009215586356841963, "loss": 3.5851, "step": 9240 }, { "epoch": 0.6281424106536214, "grad_norm": 2.013376474380493, "learning_rate": 0.0009215161706753635, "loss": 3.6116, "step": 9245 }, { "epoch": 0.6284821307242832, "grad_norm": 2.427639961242676, "learning_rate": 0.0009214737056665308, "loss": 3.4824, "step": 9250 }, { "epoch": 0.628821850794945, "grad_norm": 2.4473116397857666, "learning_rate": 0.0009214312406576981, "loss": 3.4049, "step": 9255 }, { "epoch": 0.6291615708656068, "grad_norm": 1.4580371379852295, "learning_rate": 0.0009213887756488653, "loss": 3.5801, "step": 9260 }, { "epoch": 0.6295012909362685, "grad_norm": 1.7258806228637695, "learning_rate": 0.0009213463106400327, "loss": 3.4919, "step": 9265 }, { "epoch": 0.6298410110069302, "grad_norm": 2.3463587760925293, "learning_rate": 0.0009213038456312, "loss": 3.4469, "step": 9270 }, { "epoch": 0.6301807310775921, "grad_norm": 2.3223965167999268, "learning_rate": 0.0009212613806223672, "loss": 3.4896, "step": 9275 }, { "epoch": 0.6305204511482538, "grad_norm": 1.7307283878326416, "learning_rate": 0.0009212189156135344, "loss": 3.714, "step": 9280 }, { "epoch": 0.6308601712189156, "grad_norm": 2.126415967941284, "learning_rate": 0.0009211764506047018, "loss": 3.8013, "step": 9285 }, { "epoch": 0.6311998912895774, "grad_norm": 2.0653738975524902, "learning_rate": 0.000921133985595869, "loss": 3.2903, "step": 9290 }, { "epoch": 0.6315396113602392, "grad_norm": 1.6250687837600708, "learning_rate": 0.0009210915205870362, "loss": 3.3799, "step": 9295 }, { "epoch": 0.6318793314309009, "grad_norm": 2.001086473464966, "learning_rate": 0.0009210490555782037, "loss": 3.7043, "step": 9300 }, { "epoch": 0.6322190515015628, "grad_norm": 1.5295230150222778, "learning_rate": 0.0009210065905693709, "loss": 3.5641, "step": 9305 }, { "epoch": 0.6325587715722245, "grad_norm": 1.531017780303955, "learning_rate": 0.0009209641255605381, "loss": 3.4167, "step": 9310 }, { "epoch": 0.6328984916428863, "grad_norm": 1.9597101211547852, "learning_rate": 0.0009209216605517055, "loss": 3.4923, "step": 9315 }, { "epoch": 0.633238211713548, "grad_norm": 1.769517183303833, "learning_rate": 0.0009208791955428727, "loss": 3.4872, "step": 9320 }, { "epoch": 0.6335779317842098, "grad_norm": 1.9648350477218628, "learning_rate": 0.0009208367305340399, "loss": 3.7906, "step": 9325 }, { "epoch": 0.6339176518548716, "grad_norm": 1.741439938545227, "learning_rate": 0.0009207942655252072, "loss": 3.6979, "step": 9330 }, { "epoch": 0.6342573719255333, "grad_norm": 2.0115058422088623, "learning_rate": 0.0009207518005163746, "loss": 3.8016, "step": 9335 }, { "epoch": 0.6345970919961952, "grad_norm": 1.654097557067871, "learning_rate": 0.0009207093355075418, "loss": 3.7863, "step": 9340 }, { "epoch": 0.6349368120668569, "grad_norm": 1.6414355039596558, "learning_rate": 0.0009206668704987091, "loss": 3.5615, "step": 9345 }, { "epoch": 0.6352765321375187, "grad_norm": 1.5995956659317017, "learning_rate": 0.0009206244054898764, "loss": 3.6419, "step": 9350 }, { "epoch": 0.6356162522081804, "grad_norm": 1.3419054746627808, "learning_rate": 0.0009205819404810436, "loss": 3.538, "step": 9355 }, { "epoch": 0.6359559722788423, "grad_norm": 1.6185327768325806, "learning_rate": 0.0009205394754722109, "loss": 3.5557, "step": 9360 }, { "epoch": 0.636295692349504, "grad_norm": 2.0059540271759033, "learning_rate": 0.0009204970104633781, "loss": 3.7247, "step": 9365 }, { "epoch": 0.6366354124201657, "grad_norm": 1.4770256280899048, "learning_rate": 0.0009204545454545455, "loss": 3.7428, "step": 9370 }, { "epoch": 0.6369751324908276, "grad_norm": 2.366694688796997, "learning_rate": 0.0009204120804457128, "loss": 3.6573, "step": 9375 }, { "epoch": 0.6373148525614893, "grad_norm": 2.491580009460449, "learning_rate": 0.00092036961543688, "loss": 3.3823, "step": 9380 }, { "epoch": 0.6376545726321511, "grad_norm": 1.5592350959777832, "learning_rate": 0.0009203271504280473, "loss": 3.6006, "step": 9385 }, { "epoch": 0.6379942927028129, "grad_norm": 1.676284909248352, "learning_rate": 0.0009202846854192146, "loss": 3.5698, "step": 9390 }, { "epoch": 0.6383340127734747, "grad_norm": 1.9527103900909424, "learning_rate": 0.0009202422204103818, "loss": 3.6295, "step": 9395 }, { "epoch": 0.6386737328441364, "grad_norm": 1.6484687328338623, "learning_rate": 0.0009201997554015491, "loss": 3.5425, "step": 9400 }, { "epoch": 0.6390134529147982, "grad_norm": 2.3277747631073, "learning_rate": 0.0009201572903927165, "loss": 3.5688, "step": 9405 }, { "epoch": 0.63935317298546, "grad_norm": 2.1935806274414062, "learning_rate": 0.0009201148253838837, "loss": 3.5845, "step": 9410 }, { "epoch": 0.6396928930561218, "grad_norm": 1.8893133401870728, "learning_rate": 0.000920072360375051, "loss": 3.4366, "step": 9415 }, { "epoch": 0.6400326131267835, "grad_norm": 1.7432233095169067, "learning_rate": 0.0009200298953662183, "loss": 3.583, "step": 9420 }, { "epoch": 0.6403723331974454, "grad_norm": 1.6902579069137573, "learning_rate": 0.0009199874303573855, "loss": 3.6491, "step": 9425 }, { "epoch": 0.6407120532681071, "grad_norm": 1.8418009281158447, "learning_rate": 0.0009199449653485527, "loss": 3.6407, "step": 9430 }, { "epoch": 0.6410517733387688, "grad_norm": 1.9162286520004272, "learning_rate": 0.0009199025003397201, "loss": 3.3912, "step": 9435 }, { "epoch": 0.6413914934094306, "grad_norm": 1.6783528327941895, "learning_rate": 0.0009198600353308874, "loss": 3.6339, "step": 9440 }, { "epoch": 0.6417312134800924, "grad_norm": 2.0396790504455566, "learning_rate": 0.0009198175703220546, "loss": 3.6449, "step": 9445 }, { "epoch": 0.6420709335507542, "grad_norm": 2.2441961765289307, "learning_rate": 0.000919775105313222, "loss": 3.635, "step": 9450 }, { "epoch": 0.6424106536214159, "grad_norm": 2.3424692153930664, "learning_rate": 0.0009197326403043892, "loss": 3.5526, "step": 9455 }, { "epoch": 0.6427503736920778, "grad_norm": 2.2003560066223145, "learning_rate": 0.0009196901752955564, "loss": 3.726, "step": 9460 }, { "epoch": 0.6430900937627395, "grad_norm": 1.9914557933807373, "learning_rate": 0.0009196477102867238, "loss": 3.488, "step": 9465 }, { "epoch": 0.6434298138334013, "grad_norm": 2.166740655899048, "learning_rate": 0.000919605245277891, "loss": 3.603, "step": 9470 }, { "epoch": 0.6437695339040631, "grad_norm": 1.7386200428009033, "learning_rate": 0.0009195627802690583, "loss": 3.7191, "step": 9475 }, { "epoch": 0.6441092539747248, "grad_norm": 1.6074467897415161, "learning_rate": 0.0009195203152602256, "loss": 3.425, "step": 9480 }, { "epoch": 0.6444489740453866, "grad_norm": 1.7538726329803467, "learning_rate": 0.0009194778502513929, "loss": 3.9921, "step": 9485 }, { "epoch": 0.6447886941160483, "grad_norm": 1.758480191230774, "learning_rate": 0.0009194353852425601, "loss": 3.613, "step": 9490 }, { "epoch": 0.6451284141867102, "grad_norm": 1.932093858718872, "learning_rate": 0.0009193929202337274, "loss": 3.5282, "step": 9495 }, { "epoch": 0.6454681342573719, "grad_norm": 1.8751147985458374, "learning_rate": 0.0009193504552248947, "loss": 3.5988, "step": 9500 }, { "epoch": 0.6458078543280337, "grad_norm": 1.543596863746643, "learning_rate": 0.0009193079902160619, "loss": 3.4171, "step": 9505 }, { "epoch": 0.6461475743986955, "grad_norm": 1.8408379554748535, "learning_rate": 0.0009192655252072293, "loss": 3.4953, "step": 9510 }, { "epoch": 0.6464872944693573, "grad_norm": 1.658422827720642, "learning_rate": 0.0009192230601983966, "loss": 3.6259, "step": 9515 }, { "epoch": 0.646827014540019, "grad_norm": 2.0727617740631104, "learning_rate": 0.0009191805951895639, "loss": 3.6998, "step": 9520 }, { "epoch": 0.6471667346106807, "grad_norm": 2.0431787967681885, "learning_rate": 0.0009191381301807311, "loss": 3.6857, "step": 9525 }, { "epoch": 0.6475064546813426, "grad_norm": 1.4358595609664917, "learning_rate": 0.0009190956651718983, "loss": 3.6275, "step": 9530 }, { "epoch": 0.6478461747520043, "grad_norm": 1.8048266172409058, "learning_rate": 0.0009190532001630657, "loss": 3.6312, "step": 9535 }, { "epoch": 0.6481858948226661, "grad_norm": 2.0571372509002686, "learning_rate": 0.0009190107351542329, "loss": 3.7955, "step": 9540 }, { "epoch": 0.6485256148933279, "grad_norm": 2.3149263858795166, "learning_rate": 0.0009189682701454002, "loss": 3.679, "step": 9545 }, { "epoch": 0.6488653349639897, "grad_norm": 1.8413270711898804, "learning_rate": 0.0009189258051365676, "loss": 3.7434, "step": 9550 }, { "epoch": 0.6492050550346514, "grad_norm": 2.192054271697998, "learning_rate": 0.0009188833401277348, "loss": 3.534, "step": 9555 }, { "epoch": 0.6495447751053133, "grad_norm": 1.689526915550232, "learning_rate": 0.000918840875118902, "loss": 3.6786, "step": 9560 }, { "epoch": 0.649884495175975, "grad_norm": 1.6984198093414307, "learning_rate": 0.0009187984101100694, "loss": 3.7125, "step": 9565 }, { "epoch": 0.6502242152466368, "grad_norm": 1.765568733215332, "learning_rate": 0.0009187559451012366, "loss": 3.5293, "step": 9570 }, { "epoch": 0.6505639353172985, "grad_norm": 1.999446988105774, "learning_rate": 0.0009187134800924038, "loss": 3.4311, "step": 9575 }, { "epoch": 0.6509036553879604, "grad_norm": 2.0595028400421143, "learning_rate": 0.0009186710150835712, "loss": 3.6806, "step": 9580 }, { "epoch": 0.6512433754586221, "grad_norm": 2.0167248249053955, "learning_rate": 0.0009186285500747385, "loss": 3.5575, "step": 9585 }, { "epoch": 0.6515830955292838, "grad_norm": 1.3378846645355225, "learning_rate": 0.0009185860850659057, "loss": 3.5565, "step": 9590 }, { "epoch": 0.6519228155999457, "grad_norm": 1.9939650297164917, "learning_rate": 0.000918543620057073, "loss": 3.6874, "step": 9595 }, { "epoch": 0.6522625356706074, "grad_norm": 1.8056820631027222, "learning_rate": 0.0009185011550482403, "loss": 3.4346, "step": 9600 }, { "epoch": 0.6526022557412692, "grad_norm": 1.6571496725082397, "learning_rate": 0.0009184586900394075, "loss": 3.7055, "step": 9605 }, { "epoch": 0.6529419758119309, "grad_norm": 1.9607594013214111, "learning_rate": 0.0009184162250305748, "loss": 3.6941, "step": 9610 }, { "epoch": 0.6532816958825928, "grad_norm": 1.8319791555404663, "learning_rate": 0.0009183737600217422, "loss": 3.8202, "step": 9615 }, { "epoch": 0.6536214159532545, "grad_norm": 1.8847386837005615, "learning_rate": 0.0009183312950129094, "loss": 3.5325, "step": 9620 }, { "epoch": 0.6539611360239163, "grad_norm": 1.5947563648223877, "learning_rate": 0.0009182888300040767, "loss": 3.6241, "step": 9625 }, { "epoch": 0.6543008560945781, "grad_norm": 2.0192806720733643, "learning_rate": 0.0009182463649952439, "loss": 3.5907, "step": 9630 }, { "epoch": 0.6546405761652399, "grad_norm": 1.7142876386642456, "learning_rate": 0.0009182038999864112, "loss": 3.5153, "step": 9635 }, { "epoch": 0.6549802962359016, "grad_norm": 2.3807241916656494, "learning_rate": 0.0009181614349775785, "loss": 3.5511, "step": 9640 }, { "epoch": 0.6553200163065634, "grad_norm": 1.7271336317062378, "learning_rate": 0.0009181189699687457, "loss": 3.4056, "step": 9645 }, { "epoch": 0.6556597363772252, "grad_norm": 2.442817449569702, "learning_rate": 0.0009180765049599131, "loss": 3.2185, "step": 9650 }, { "epoch": 0.6559994564478869, "grad_norm": 1.8849457502365112, "learning_rate": 0.0009180340399510804, "loss": 3.6109, "step": 9655 }, { "epoch": 0.6563391765185487, "grad_norm": 2.0893263816833496, "learning_rate": 0.0009179915749422476, "loss": 3.9019, "step": 9660 }, { "epoch": 0.6566788965892105, "grad_norm": 2.08650803565979, "learning_rate": 0.0009179491099334148, "loss": 3.4962, "step": 9665 }, { "epoch": 0.6570186166598723, "grad_norm": 2.2529501914978027, "learning_rate": 0.0009179066449245822, "loss": 3.1875, "step": 9670 }, { "epoch": 0.657358336730534, "grad_norm": 2.1259725093841553, "learning_rate": 0.0009178641799157494, "loss": 3.384, "step": 9675 }, { "epoch": 0.6576980568011959, "grad_norm": 1.5416696071624756, "learning_rate": 0.0009178217149069166, "loss": 3.7698, "step": 9680 }, { "epoch": 0.6580377768718576, "grad_norm": 2.0113742351531982, "learning_rate": 0.0009177792498980841, "loss": 3.4797, "step": 9685 }, { "epoch": 0.6583774969425193, "grad_norm": 2.5670621395111084, "learning_rate": 0.0009177367848892513, "loss": 3.3673, "step": 9690 }, { "epoch": 0.6587172170131811, "grad_norm": 2.053314447402954, "learning_rate": 0.0009176943198804185, "loss": 3.6237, "step": 9695 }, { "epoch": 0.6590569370838429, "grad_norm": 1.8588917255401611, "learning_rate": 0.0009176518548715859, "loss": 3.5343, "step": 9700 }, { "epoch": 0.6593966571545047, "grad_norm": 2.0257515907287598, "learning_rate": 0.0009176093898627531, "loss": 3.5912, "step": 9705 }, { "epoch": 0.6597363772251664, "grad_norm": 1.5975061655044556, "learning_rate": 0.0009175669248539203, "loss": 3.5512, "step": 9710 }, { "epoch": 0.6600760972958283, "grad_norm": 1.7919925451278687, "learning_rate": 0.0009175244598450876, "loss": 3.7012, "step": 9715 }, { "epoch": 0.66041581736649, "grad_norm": 1.494936466217041, "learning_rate": 0.000917481994836255, "loss": 4.124, "step": 9720 }, { "epoch": 0.6607555374371518, "grad_norm": 1.7486121654510498, "learning_rate": 0.0009174395298274222, "loss": 3.74, "step": 9725 }, { "epoch": 0.6610952575078136, "grad_norm": 1.9819061756134033, "learning_rate": 0.0009173970648185895, "loss": 3.3451, "step": 9730 }, { "epoch": 0.6614349775784754, "grad_norm": 1.7183732986450195, "learning_rate": 0.0009173545998097568, "loss": 3.5608, "step": 9735 }, { "epoch": 0.6617746976491371, "grad_norm": 2.129795551300049, "learning_rate": 0.000917312134800924, "loss": 3.5672, "step": 9740 }, { "epoch": 0.6621144177197988, "grad_norm": 1.599159836769104, "learning_rate": 0.0009172696697920913, "loss": 3.5678, "step": 9745 }, { "epoch": 0.6624541377904607, "grad_norm": 2.131281614303589, "learning_rate": 0.0009172272047832586, "loss": 3.6872, "step": 9750 }, { "epoch": 0.6627938578611224, "grad_norm": 1.999273657798767, "learning_rate": 0.0009171847397744259, "loss": 3.7289, "step": 9755 }, { "epoch": 0.6631335779317842, "grad_norm": 2.162783145904541, "learning_rate": 0.0009171422747655932, "loss": 3.545, "step": 9760 }, { "epoch": 0.663473298002446, "grad_norm": 2.254535675048828, "learning_rate": 0.0009170998097567604, "loss": 3.3781, "step": 9765 }, { "epoch": 0.6638130180731078, "grad_norm": 2.2688963413238525, "learning_rate": 0.0009170573447479277, "loss": 3.6674, "step": 9770 }, { "epoch": 0.6641527381437695, "grad_norm": 2.6622276306152344, "learning_rate": 0.000917014879739095, "loss": 3.5202, "step": 9775 }, { "epoch": 0.6644924582144313, "grad_norm": 1.5043233633041382, "learning_rate": 0.0009169724147302622, "loss": 3.7832, "step": 9780 }, { "epoch": 0.6648321782850931, "grad_norm": 1.6414227485656738, "learning_rate": 0.0009169299497214295, "loss": 3.7208, "step": 9785 }, { "epoch": 0.6651718983557549, "grad_norm": 1.6400879621505737, "learning_rate": 0.0009168874847125969, "loss": 3.4879, "step": 9790 }, { "epoch": 0.6655116184264166, "grad_norm": 1.8122682571411133, "learning_rate": 0.0009168450197037641, "loss": 3.3467, "step": 9795 }, { "epoch": 0.6658513384970784, "grad_norm": 2.152268409729004, "learning_rate": 0.0009168025546949314, "loss": 3.4388, "step": 9800 }, { "epoch": 0.6661910585677402, "grad_norm": 1.6867668628692627, "learning_rate": 0.0009167600896860987, "loss": 3.5222, "step": 9805 }, { "epoch": 0.6665307786384019, "grad_norm": 1.5157402753829956, "learning_rate": 0.0009167176246772659, "loss": 3.5689, "step": 9810 }, { "epoch": 0.6668704987090638, "grad_norm": 1.9959766864776611, "learning_rate": 0.0009166751596684331, "loss": 3.8168, "step": 9815 }, { "epoch": 0.6672102187797255, "grad_norm": 1.9766044616699219, "learning_rate": 0.0009166326946596006, "loss": 3.8807, "step": 9820 }, { "epoch": 0.6675499388503873, "grad_norm": 1.8154029846191406, "learning_rate": 0.0009165902296507678, "loss": 3.7262, "step": 9825 }, { "epoch": 0.667889658921049, "grad_norm": 1.759074330329895, "learning_rate": 0.000916547764641935, "loss": 3.6456, "step": 9830 }, { "epoch": 0.6682293789917109, "grad_norm": 1.8681968450546265, "learning_rate": 0.0009165052996331024, "loss": 3.502, "step": 9835 }, { "epoch": 0.6685690990623726, "grad_norm": 1.9278640747070312, "learning_rate": 0.0009164628346242696, "loss": 3.5702, "step": 9840 }, { "epoch": 0.6689088191330343, "grad_norm": 2.1573853492736816, "learning_rate": 0.0009164203696154368, "loss": 3.7754, "step": 9845 }, { "epoch": 0.6692485392036962, "grad_norm": 1.911843180656433, "learning_rate": 0.0009163779046066042, "loss": 3.4207, "step": 9850 }, { "epoch": 0.6695882592743579, "grad_norm": 2.1195015907287598, "learning_rate": 0.0009163354395977715, "loss": 3.8392, "step": 9855 }, { "epoch": 0.6699279793450197, "grad_norm": 1.5460922718048096, "learning_rate": 0.0009162929745889388, "loss": 3.7827, "step": 9860 }, { "epoch": 0.6702676994156814, "grad_norm": 1.5247182846069336, "learning_rate": 0.000916250509580106, "loss": 3.7222, "step": 9865 }, { "epoch": 0.6706074194863433, "grad_norm": 1.8584980964660645, "learning_rate": 0.0009162080445712733, "loss": 3.4295, "step": 9870 }, { "epoch": 0.670947139557005, "grad_norm": 1.534807562828064, "learning_rate": 0.0009161655795624406, "loss": 3.8796, "step": 9875 }, { "epoch": 0.6712868596276668, "grad_norm": 1.7045209407806396, "learning_rate": 0.0009161231145536078, "loss": 3.6513, "step": 9880 }, { "epoch": 0.6716265796983286, "grad_norm": 1.6661657094955444, "learning_rate": 0.0009160806495447751, "loss": 3.7592, "step": 9885 }, { "epoch": 0.6719662997689904, "grad_norm": 1.510776400566101, "learning_rate": 0.0009160381845359425, "loss": 3.4762, "step": 9890 }, { "epoch": 0.6723060198396521, "grad_norm": 1.810989499092102, "learning_rate": 0.0009159957195271097, "loss": 3.4882, "step": 9895 }, { "epoch": 0.672645739910314, "grad_norm": 1.6878626346588135, "learning_rate": 0.000915953254518277, "loss": 3.6952, "step": 9900 }, { "epoch": 0.6729854599809757, "grad_norm": 1.9639416933059692, "learning_rate": 0.0009159107895094443, "loss": 3.387, "step": 9905 }, { "epoch": 0.6733251800516374, "grad_norm": 1.553533673286438, "learning_rate": 0.0009158683245006115, "loss": 3.6347, "step": 9910 }, { "epoch": 0.6736649001222992, "grad_norm": 1.6177465915679932, "learning_rate": 0.0009158258594917787, "loss": 3.6982, "step": 9915 }, { "epoch": 0.674004620192961, "grad_norm": 1.8520668745040894, "learning_rate": 0.0009157833944829461, "loss": 3.6708, "step": 9920 }, { "epoch": 0.6743443402636228, "grad_norm": 1.8943510055541992, "learning_rate": 0.0009157409294741134, "loss": 3.4264, "step": 9925 }, { "epoch": 0.6746840603342845, "grad_norm": 1.4405831098556519, "learning_rate": 0.0009156984644652806, "loss": 3.6099, "step": 9930 }, { "epoch": 0.6750237804049464, "grad_norm": 2.001986026763916, "learning_rate": 0.000915655999456448, "loss": 3.5141, "step": 9935 }, { "epoch": 0.6753635004756081, "grad_norm": 2.0662710666656494, "learning_rate": 0.0009156135344476152, "loss": 3.5225, "step": 9940 }, { "epoch": 0.6757032205462699, "grad_norm": 1.7185523509979248, "learning_rate": 0.0009155710694387824, "loss": 3.6019, "step": 9945 }, { "epoch": 0.6760429406169316, "grad_norm": 1.6683071851730347, "learning_rate": 0.0009155286044299498, "loss": 3.7026, "step": 9950 }, { "epoch": 0.6763826606875935, "grad_norm": 1.7871067523956299, "learning_rate": 0.000915486139421117, "loss": 3.5017, "step": 9955 }, { "epoch": 0.6767223807582552, "grad_norm": 1.7457935810089111, "learning_rate": 0.0009154436744122843, "loss": 3.615, "step": 9960 }, { "epoch": 0.6770621008289169, "grad_norm": 1.7293988466262817, "learning_rate": 0.0009154012094034517, "loss": 3.6488, "step": 9965 }, { "epoch": 0.6774018208995788, "grad_norm": 2.103576421737671, "learning_rate": 0.0009153587443946189, "loss": 3.5486, "step": 9970 }, { "epoch": 0.6777415409702405, "grad_norm": 2.025010347366333, "learning_rate": 0.0009153162793857861, "loss": 3.5708, "step": 9975 }, { "epoch": 0.6780812610409023, "grad_norm": 1.7231366634368896, "learning_rate": 0.0009152738143769534, "loss": 3.6242, "step": 9980 }, { "epoch": 0.6784209811115641, "grad_norm": 2.3101649284362793, "learning_rate": 0.0009152313493681207, "loss": 3.5574, "step": 9985 }, { "epoch": 0.6787607011822259, "grad_norm": 1.4914988279342651, "learning_rate": 0.0009151888843592879, "loss": 3.5741, "step": 9990 }, { "epoch": 0.6791004212528876, "grad_norm": 1.7352806329727173, "learning_rate": 0.0009151464193504553, "loss": 3.3619, "step": 9995 }, { "epoch": 0.6794401413235494, "grad_norm": 1.8911422491073608, "learning_rate": 0.0009151039543416226, "loss": 3.4618, "step": 10000 }, { "epoch": 0.6797798613942112, "grad_norm": 1.8919175863265991, "learning_rate": 0.0009150614893327898, "loss": 3.7823, "step": 10005 }, { "epoch": 0.680119581464873, "grad_norm": 1.962111234664917, "learning_rate": 0.0009150190243239571, "loss": 3.6115, "step": 10010 }, { "epoch": 0.6804593015355347, "grad_norm": 1.7789183855056763, "learning_rate": 0.0009149765593151243, "loss": 3.6589, "step": 10015 }, { "epoch": 0.6807990216061965, "grad_norm": 1.798093318939209, "learning_rate": 0.0009149340943062916, "loss": 3.5338, "step": 10020 }, { "epoch": 0.6811387416768583, "grad_norm": 1.7661519050598145, "learning_rate": 0.0009148916292974589, "loss": 3.663, "step": 10025 }, { "epoch": 0.68147846174752, "grad_norm": 1.869270920753479, "learning_rate": 0.0009148491642886262, "loss": 3.5261, "step": 10030 }, { "epoch": 0.6818181818181818, "grad_norm": 1.7201179265975952, "learning_rate": 0.0009148066992797935, "loss": 3.8117, "step": 10035 }, { "epoch": 0.6821579018888436, "grad_norm": 1.8088816404342651, "learning_rate": 0.0009147642342709608, "loss": 3.5489, "step": 10040 }, { "epoch": 0.6824976219595054, "grad_norm": 1.7516591548919678, "learning_rate": 0.000914721769262128, "loss": 3.5943, "step": 10045 }, { "epoch": 0.6828373420301671, "grad_norm": 1.892500400543213, "learning_rate": 0.0009146793042532952, "loss": 3.4269, "step": 10050 }, { "epoch": 0.683177062100829, "grad_norm": 1.6524304151535034, "learning_rate": 0.0009146368392444626, "loss": 3.621, "step": 10055 }, { "epoch": 0.6835167821714907, "grad_norm": 1.6713835000991821, "learning_rate": 0.0009145943742356298, "loss": 3.809, "step": 10060 }, { "epoch": 0.6838565022421524, "grad_norm": 1.8735532760620117, "learning_rate": 0.0009145519092267971, "loss": 3.7756, "step": 10065 }, { "epoch": 0.6841962223128143, "grad_norm": 1.4864729642868042, "learning_rate": 0.0009145094442179645, "loss": 3.8755, "step": 10070 }, { "epoch": 0.684535942383476, "grad_norm": 3.8754918575286865, "learning_rate": 0.0009144669792091317, "loss": 3.3601, "step": 10075 }, { "epoch": 0.6848756624541378, "grad_norm": 2.094496965408325, "learning_rate": 0.0009144245142002989, "loss": 3.534, "step": 10080 }, { "epoch": 0.6852153825247995, "grad_norm": 1.7708234786987305, "learning_rate": 0.0009143820491914663, "loss": 3.5109, "step": 10085 }, { "epoch": 0.6855551025954614, "grad_norm": 1.6140949726104736, "learning_rate": 0.0009143395841826335, "loss": 3.4727, "step": 10090 }, { "epoch": 0.6858948226661231, "grad_norm": 2.1907958984375, "learning_rate": 0.0009142971191738007, "loss": 3.578, "step": 10095 }, { "epoch": 0.6862345427367849, "grad_norm": 2.6896252632141113, "learning_rate": 0.0009142546541649682, "loss": 3.7476, "step": 10100 }, { "epoch": 0.6865742628074467, "grad_norm": 1.931567668914795, "learning_rate": 0.0009142121891561354, "loss": 3.4638, "step": 10105 }, { "epoch": 0.6869139828781085, "grad_norm": 1.8306056261062622, "learning_rate": 0.0009141697241473026, "loss": 3.4852, "step": 10110 }, { "epoch": 0.6872537029487702, "grad_norm": 1.780611276626587, "learning_rate": 0.0009141272591384699, "loss": 3.7036, "step": 10115 }, { "epoch": 0.6875934230194319, "grad_norm": 2.3546876907348633, "learning_rate": 0.0009140847941296372, "loss": 3.3871, "step": 10120 }, { "epoch": 0.6879331430900938, "grad_norm": 1.7049635648727417, "learning_rate": 0.0009140423291208044, "loss": 3.6877, "step": 10125 }, { "epoch": 0.6882728631607555, "grad_norm": 1.6741193532943726, "learning_rate": 0.0009139998641119717, "loss": 3.4663, "step": 10130 }, { "epoch": 0.6886125832314173, "grad_norm": 1.478332757949829, "learning_rate": 0.0009139573991031391, "loss": 3.6218, "step": 10135 }, { "epoch": 0.6889523033020791, "grad_norm": 2.157047986984253, "learning_rate": 0.0009139149340943063, "loss": 3.4645, "step": 10140 }, { "epoch": 0.6892920233727409, "grad_norm": 1.635009765625, "learning_rate": 0.0009138724690854736, "loss": 3.6606, "step": 10145 }, { "epoch": 0.6896317434434026, "grad_norm": 1.6830946207046509, "learning_rate": 0.0009138300040766409, "loss": 3.6683, "step": 10150 }, { "epoch": 0.6899714635140645, "grad_norm": 1.9362448453903198, "learning_rate": 0.0009137875390678081, "loss": 3.633, "step": 10155 }, { "epoch": 0.6903111835847262, "grad_norm": 2.1373953819274902, "learning_rate": 0.0009137450740589754, "loss": 3.538, "step": 10160 }, { "epoch": 0.690650903655388, "grad_norm": 1.8674534559249878, "learning_rate": 0.0009137026090501426, "loss": 3.6141, "step": 10165 }, { "epoch": 0.6909906237260497, "grad_norm": 2.0175373554229736, "learning_rate": 0.00091366014404131, "loss": 3.5816, "step": 10170 }, { "epoch": 0.6913303437967115, "grad_norm": 1.8873035907745361, "learning_rate": 0.0009136176790324773, "loss": 3.4453, "step": 10175 }, { "epoch": 0.6916700638673733, "grad_norm": 2.162614583969116, "learning_rate": 0.0009135752140236445, "loss": 3.7362, "step": 10180 }, { "epoch": 0.692009783938035, "grad_norm": 1.6294333934783936, "learning_rate": 0.0009135327490148118, "loss": 3.2905, "step": 10185 }, { "epoch": 0.6923495040086969, "grad_norm": 1.7752203941345215, "learning_rate": 0.0009134902840059791, "loss": 3.4984, "step": 10190 }, { "epoch": 0.6926892240793586, "grad_norm": 1.7950907945632935, "learning_rate": 0.0009134478189971463, "loss": 3.616, "step": 10195 }, { "epoch": 0.6930289441500204, "grad_norm": 1.6904563903808594, "learning_rate": 0.0009134053539883137, "loss": 3.5301, "step": 10200 }, { "epoch": 0.6933686642206821, "grad_norm": 2.123793840408325, "learning_rate": 0.000913362888979481, "loss": 3.3373, "step": 10205 }, { "epoch": 0.693708384291344, "grad_norm": 1.5593639612197876, "learning_rate": 0.0009133204239706482, "loss": 3.5932, "step": 10210 }, { "epoch": 0.6940481043620057, "grad_norm": 1.734637975692749, "learning_rate": 0.0009132779589618155, "loss": 3.7412, "step": 10215 }, { "epoch": 0.6943878244326674, "grad_norm": 2.1218161582946777, "learning_rate": 0.0009132354939529828, "loss": 3.6993, "step": 10220 }, { "epoch": 0.6947275445033293, "grad_norm": 2.399801254272461, "learning_rate": 0.00091319302894415, "loss": 3.5596, "step": 10225 }, { "epoch": 0.695067264573991, "grad_norm": 1.941778302192688, "learning_rate": 0.0009131505639353173, "loss": 3.6544, "step": 10230 }, { "epoch": 0.6954069846446528, "grad_norm": 2.0374550819396973, "learning_rate": 0.0009131080989264846, "loss": 3.5987, "step": 10235 }, { "epoch": 0.6957467047153146, "grad_norm": 1.6965489387512207, "learning_rate": 0.0009130656339176519, "loss": 3.6729, "step": 10240 }, { "epoch": 0.6960864247859764, "grad_norm": 1.5816420316696167, "learning_rate": 0.0009130231689088192, "loss": 3.6891, "step": 10245 }, { "epoch": 0.6964261448566381, "grad_norm": 1.7663391828536987, "learning_rate": 0.0009129807038999865, "loss": 3.5924, "step": 10250 }, { "epoch": 0.6967658649272999, "grad_norm": 2.878582239151001, "learning_rate": 0.0009129382388911537, "loss": 3.5898, "step": 10255 }, { "epoch": 0.6971055849979617, "grad_norm": 2.4012043476104736, "learning_rate": 0.000912895773882321, "loss": 3.6323, "step": 10260 }, { "epoch": 0.6974453050686235, "grad_norm": 2.0840251445770264, "learning_rate": 0.0009128533088734882, "loss": 3.6753, "step": 10265 }, { "epoch": 0.6977850251392852, "grad_norm": 1.7900210618972778, "learning_rate": 0.0009128108438646555, "loss": 3.7733, "step": 10270 }, { "epoch": 0.698124745209947, "grad_norm": 1.9286638498306274, "learning_rate": 0.0009127683788558229, "loss": 3.4558, "step": 10275 }, { "epoch": 0.6984644652806088, "grad_norm": 2.2401771545410156, "learning_rate": 0.0009127259138469901, "loss": 3.7364, "step": 10280 }, { "epoch": 0.6988041853512705, "grad_norm": 1.7390024662017822, "learning_rate": 0.0009126834488381574, "loss": 3.4907, "step": 10285 }, { "epoch": 0.6991439054219323, "grad_norm": 1.8108172416687012, "learning_rate": 0.0009126409838293247, "loss": 3.4871, "step": 10290 }, { "epoch": 0.6994836254925941, "grad_norm": 1.7705355882644653, "learning_rate": 0.0009125985188204919, "loss": 3.3637, "step": 10295 }, { "epoch": 0.6998233455632559, "grad_norm": 1.7451473474502563, "learning_rate": 0.0009125560538116591, "loss": 3.6293, "step": 10300 }, { "epoch": 0.7001630656339176, "grad_norm": 2.389315366744995, "learning_rate": 0.0009125135888028266, "loss": 3.6451, "step": 10305 }, { "epoch": 0.7005027857045795, "grad_norm": 2.257412910461426, "learning_rate": 0.0009124711237939938, "loss": 3.5623, "step": 10310 }, { "epoch": 0.7008425057752412, "grad_norm": 2.017857789993286, "learning_rate": 0.000912428658785161, "loss": 3.6037, "step": 10315 }, { "epoch": 0.701182225845903, "grad_norm": 1.5366560220718384, "learning_rate": 0.0009123861937763284, "loss": 3.4779, "step": 10320 }, { "epoch": 0.7015219459165648, "grad_norm": 1.9116268157958984, "learning_rate": 0.0009123437287674956, "loss": 3.6636, "step": 10325 }, { "epoch": 0.7018616659872265, "grad_norm": 2.3383524417877197, "learning_rate": 0.0009123012637586628, "loss": 3.7508, "step": 10330 }, { "epoch": 0.7022013860578883, "grad_norm": 1.7082445621490479, "learning_rate": 0.0009122587987498302, "loss": 3.5724, "step": 10335 }, { "epoch": 0.70254110612855, "grad_norm": 1.7316560745239258, "learning_rate": 0.0009122163337409975, "loss": 3.4007, "step": 10340 }, { "epoch": 0.7028808261992119, "grad_norm": 1.9866753816604614, "learning_rate": 0.0009121738687321647, "loss": 3.7963, "step": 10345 }, { "epoch": 0.7032205462698736, "grad_norm": 1.7488545179367065, "learning_rate": 0.000912131403723332, "loss": 3.4888, "step": 10350 }, { "epoch": 0.7035602663405354, "grad_norm": 2.0518665313720703, "learning_rate": 0.0009120889387144993, "loss": 3.5875, "step": 10355 }, { "epoch": 0.7038999864111972, "grad_norm": 1.9120354652404785, "learning_rate": 0.0009120464737056665, "loss": 3.717, "step": 10360 }, { "epoch": 0.704239706481859, "grad_norm": 2.4368984699249268, "learning_rate": 0.0009120040086968338, "loss": 3.3745, "step": 10365 }, { "epoch": 0.7045794265525207, "grad_norm": 2.2774691581726074, "learning_rate": 0.0009119615436880011, "loss": 3.7061, "step": 10370 }, { "epoch": 0.7049191466231824, "grad_norm": 1.6713173389434814, "learning_rate": 0.0009119190786791684, "loss": 3.5051, "step": 10375 }, { "epoch": 0.7052588666938443, "grad_norm": 1.687821865081787, "learning_rate": 0.0009118766136703357, "loss": 3.7337, "step": 10380 }, { "epoch": 0.705598586764506, "grad_norm": 1.7874579429626465, "learning_rate": 0.000911834148661503, "loss": 3.6038, "step": 10385 }, { "epoch": 0.7059383068351678, "grad_norm": 1.7174843549728394, "learning_rate": 0.0009117916836526702, "loss": 3.6119, "step": 10390 }, { "epoch": 0.7062780269058296, "grad_norm": 1.7649627923965454, "learning_rate": 0.0009117492186438375, "loss": 3.7196, "step": 10395 }, { "epoch": 0.7066177469764914, "grad_norm": 1.588789939880371, "learning_rate": 0.0009117067536350047, "loss": 3.587, "step": 10400 }, { "epoch": 0.7069574670471531, "grad_norm": 1.6744483709335327, "learning_rate": 0.000911664288626172, "loss": 3.4241, "step": 10405 }, { "epoch": 0.707297187117815, "grad_norm": 1.6709529161453247, "learning_rate": 0.0009116218236173394, "loss": 3.7159, "step": 10410 }, { "epoch": 0.7076369071884767, "grad_norm": 1.612239122390747, "learning_rate": 0.0009115793586085066, "loss": 3.6717, "step": 10415 }, { "epoch": 0.7079766272591385, "grad_norm": 2.7813329696655273, "learning_rate": 0.0009115368935996739, "loss": 3.7822, "step": 10420 }, { "epoch": 0.7083163473298002, "grad_norm": 1.638322353363037, "learning_rate": 0.0009114944285908412, "loss": 3.7389, "step": 10425 }, { "epoch": 0.708656067400462, "grad_norm": 1.675769329071045, "learning_rate": 0.0009114519635820084, "loss": 3.7824, "step": 10430 }, { "epoch": 0.7089957874711238, "grad_norm": 1.5523114204406738, "learning_rate": 0.0009114094985731757, "loss": 3.7201, "step": 10435 }, { "epoch": 0.7093355075417855, "grad_norm": 1.6362709999084473, "learning_rate": 0.000911367033564343, "loss": 3.7201, "step": 10440 }, { "epoch": 0.7096752276124474, "grad_norm": 1.6710889339447021, "learning_rate": 0.0009113245685555103, "loss": 3.683, "step": 10445 }, { "epoch": 0.7100149476831091, "grad_norm": 1.846737265586853, "learning_rate": 0.0009112821035466775, "loss": 3.5859, "step": 10450 }, { "epoch": 0.7103546677537709, "grad_norm": 1.775804042816162, "learning_rate": 0.0009112396385378449, "loss": 3.8099, "step": 10455 }, { "epoch": 0.7106943878244326, "grad_norm": 1.903306484222412, "learning_rate": 0.0009111971735290121, "loss": 3.5994, "step": 10460 }, { "epoch": 0.7110341078950945, "grad_norm": 1.7899750471115112, "learning_rate": 0.0009111547085201793, "loss": 3.5641, "step": 10465 }, { "epoch": 0.7113738279657562, "grad_norm": 1.4439070224761963, "learning_rate": 0.0009111122435113467, "loss": 3.7302, "step": 10470 }, { "epoch": 0.711713548036418, "grad_norm": 2.119030475616455, "learning_rate": 0.0009110697785025139, "loss": 3.5536, "step": 10475 }, { "epoch": 0.7120532681070798, "grad_norm": 1.6663533449172974, "learning_rate": 0.0009110273134936812, "loss": 3.5956, "step": 10480 }, { "epoch": 0.7123929881777415, "grad_norm": 1.9808276891708374, "learning_rate": 0.0009109848484848486, "loss": 3.3274, "step": 10485 }, { "epoch": 0.7127327082484033, "grad_norm": 1.6668533086776733, "learning_rate": 0.0009109423834760158, "loss": 3.5852, "step": 10490 }, { "epoch": 0.7130724283190651, "grad_norm": 2.505760908126831, "learning_rate": 0.000910899918467183, "loss": 3.6151, "step": 10495 }, { "epoch": 0.7134121483897269, "grad_norm": 1.8561300039291382, "learning_rate": 0.0009108574534583503, "loss": 3.584, "step": 10500 }, { "epoch": 0.7137518684603886, "grad_norm": 2.2559993267059326, "learning_rate": 0.0009108149884495176, "loss": 3.6699, "step": 10505 }, { "epoch": 0.7140915885310504, "grad_norm": 1.5367292165756226, "learning_rate": 0.0009107725234406848, "loss": 3.8514, "step": 10510 }, { "epoch": 0.7144313086017122, "grad_norm": 1.5217583179473877, "learning_rate": 0.0009107300584318522, "loss": 3.4694, "step": 10515 }, { "epoch": 0.714771028672374, "grad_norm": 2.226489782333374, "learning_rate": 0.0009106875934230195, "loss": 3.5008, "step": 10520 }, { "epoch": 0.7151107487430357, "grad_norm": 2.542753219604492, "learning_rate": 0.0009106451284141867, "loss": 3.6518, "step": 10525 }, { "epoch": 0.7154504688136976, "grad_norm": 1.5770556926727295, "learning_rate": 0.000910602663405354, "loss": 3.452, "step": 10530 }, { "epoch": 0.7157901888843593, "grad_norm": 1.3439990282058716, "learning_rate": 0.0009105601983965213, "loss": 3.718, "step": 10535 }, { "epoch": 0.716129908955021, "grad_norm": 1.8979806900024414, "learning_rate": 0.0009105177333876886, "loss": 3.7087, "step": 10540 }, { "epoch": 0.7164696290256828, "grad_norm": 2.06891131401062, "learning_rate": 0.0009104752683788558, "loss": 3.3965, "step": 10545 }, { "epoch": 0.7168093490963446, "grad_norm": 2.327305316925049, "learning_rate": 0.0009104328033700231, "loss": 3.7187, "step": 10550 }, { "epoch": 0.7171490691670064, "grad_norm": 1.5833518505096436, "learning_rate": 0.0009103903383611905, "loss": 3.6993, "step": 10555 }, { "epoch": 0.7174887892376681, "grad_norm": 1.542525291442871, "learning_rate": 0.0009103478733523577, "loss": 3.6735, "step": 10560 }, { "epoch": 0.71782850930833, "grad_norm": 1.4606531858444214, "learning_rate": 0.0009103054083435249, "loss": 3.7162, "step": 10565 }, { "epoch": 0.7181682293789917, "grad_norm": 2.1443166732788086, "learning_rate": 0.0009102629433346923, "loss": 3.782, "step": 10570 }, { "epoch": 0.7185079494496535, "grad_norm": 2.2350246906280518, "learning_rate": 0.0009102204783258595, "loss": 3.5129, "step": 10575 }, { "epoch": 0.7188476695203153, "grad_norm": 2.3446648120880127, "learning_rate": 0.0009101780133170267, "loss": 3.7507, "step": 10580 }, { "epoch": 0.719187389590977, "grad_norm": 2.4580843448638916, "learning_rate": 0.0009101355483081942, "loss": 3.4728, "step": 10585 }, { "epoch": 0.7195271096616388, "grad_norm": 2.0156357288360596, "learning_rate": 0.0009100930832993614, "loss": 3.7173, "step": 10590 }, { "epoch": 0.7198668297323005, "grad_norm": 1.704158067703247, "learning_rate": 0.0009100506182905286, "loss": 3.4188, "step": 10595 }, { "epoch": 0.7202065498029624, "grad_norm": 1.6589950323104858, "learning_rate": 0.000910008153281696, "loss": 3.5028, "step": 10600 }, { "epoch": 0.7205462698736241, "grad_norm": 1.988143801689148, "learning_rate": 0.0009099656882728632, "loss": 3.6937, "step": 10605 }, { "epoch": 0.7208859899442859, "grad_norm": 1.5164556503295898, "learning_rate": 0.0009099232232640304, "loss": 3.6197, "step": 10610 }, { "epoch": 0.7212257100149477, "grad_norm": 1.4166566133499146, "learning_rate": 0.0009098807582551977, "loss": 3.5542, "step": 10615 }, { "epoch": 0.7215654300856095, "grad_norm": 1.9135428667068481, "learning_rate": 0.0009098382932463651, "loss": 3.5623, "step": 10620 }, { "epoch": 0.7219051501562712, "grad_norm": 1.8031169176101685, "learning_rate": 0.0009097958282375323, "loss": 3.7851, "step": 10625 }, { "epoch": 0.722244870226933, "grad_norm": 1.596536636352539, "learning_rate": 0.0009097533632286996, "loss": 3.5857, "step": 10630 }, { "epoch": 0.7225845902975948, "grad_norm": 1.6916321516036987, "learning_rate": 0.0009097108982198669, "loss": 3.4749, "step": 10635 }, { "epoch": 0.7229243103682566, "grad_norm": 2.230570077896118, "learning_rate": 0.0009096684332110341, "loss": 3.4873, "step": 10640 }, { "epoch": 0.7232640304389183, "grad_norm": 2.087834119796753, "learning_rate": 0.0009096259682022014, "loss": 3.6694, "step": 10645 }, { "epoch": 0.7236037505095801, "grad_norm": 2.175534725189209, "learning_rate": 0.0009095835031933686, "loss": 3.5921, "step": 10650 }, { "epoch": 0.7239434705802419, "grad_norm": 2.5262560844421387, "learning_rate": 0.000909541038184536, "loss": 3.5744, "step": 10655 }, { "epoch": 0.7242831906509036, "grad_norm": 2.1896793842315674, "learning_rate": 0.0009094985731757033, "loss": 3.3263, "step": 10660 }, { "epoch": 0.7246229107215655, "grad_norm": 2.0066115856170654, "learning_rate": 0.0009094561081668705, "loss": 3.624, "step": 10665 }, { "epoch": 0.7249626307922272, "grad_norm": 2.30483078956604, "learning_rate": 0.0009094136431580378, "loss": 3.5806, "step": 10670 }, { "epoch": 0.725302350862889, "grad_norm": 1.873932123184204, "learning_rate": 0.0009093711781492051, "loss": 3.7668, "step": 10675 }, { "epoch": 0.7256420709335507, "grad_norm": 1.7565371990203857, "learning_rate": 0.0009093287131403723, "loss": 3.7507, "step": 10680 }, { "epoch": 0.7259817910042126, "grad_norm": 2.1941516399383545, "learning_rate": 0.0009092862481315395, "loss": 3.6929, "step": 10685 }, { "epoch": 0.7263215110748743, "grad_norm": 1.6924620866775513, "learning_rate": 0.000909243783122707, "loss": 3.6216, "step": 10690 }, { "epoch": 0.726661231145536, "grad_norm": 1.5863230228424072, "learning_rate": 0.0009092013181138742, "loss": 3.7287, "step": 10695 }, { "epoch": 0.7270009512161979, "grad_norm": 2.0395824909210205, "learning_rate": 0.0009091588531050414, "loss": 3.8602, "step": 10700 }, { "epoch": 0.7273406712868596, "grad_norm": 1.612886667251587, "learning_rate": 0.0009091163880962088, "loss": 3.7285, "step": 10705 }, { "epoch": 0.7276803913575214, "grad_norm": 1.9462745189666748, "learning_rate": 0.000909073923087376, "loss": 3.7816, "step": 10710 }, { "epoch": 0.7280201114281831, "grad_norm": 1.6150790452957153, "learning_rate": 0.0009090314580785432, "loss": 3.8422, "step": 10715 }, { "epoch": 0.728359831498845, "grad_norm": 2.4481027126312256, "learning_rate": 0.0009089889930697106, "loss": 3.4361, "step": 10720 }, { "epoch": 0.7286995515695067, "grad_norm": 1.5063745975494385, "learning_rate": 0.0009089465280608779, "loss": 3.5978, "step": 10725 }, { "epoch": 0.7290392716401685, "grad_norm": 1.7738218307495117, "learning_rate": 0.0009089040630520451, "loss": 3.3852, "step": 10730 }, { "epoch": 0.7293789917108303, "grad_norm": 1.814520001411438, "learning_rate": 0.0009088615980432125, "loss": 3.8029, "step": 10735 }, { "epoch": 0.7297187117814921, "grad_norm": 1.6429280042648315, "learning_rate": 0.0009088191330343797, "loss": 3.6119, "step": 10740 }, { "epoch": 0.7300584318521538, "grad_norm": 1.8490699529647827, "learning_rate": 0.0009087766680255469, "loss": 3.4998, "step": 10745 }, { "epoch": 0.7303981519228157, "grad_norm": 2.508787155151367, "learning_rate": 0.0009087342030167142, "loss": 3.7285, "step": 10750 }, { "epoch": 0.7307378719934774, "grad_norm": 2.0201377868652344, "learning_rate": 0.0009086917380078815, "loss": 3.639, "step": 10755 }, { "epoch": 0.7310775920641391, "grad_norm": 1.9294347763061523, "learning_rate": 0.0009086492729990488, "loss": 3.7827, "step": 10760 }, { "epoch": 0.7314173121348009, "grad_norm": 1.9676815271377563, "learning_rate": 0.0009086068079902161, "loss": 3.6928, "step": 10765 }, { "epoch": 0.7317570322054627, "grad_norm": 1.8754358291625977, "learning_rate": 0.0009085643429813834, "loss": 3.4786, "step": 10770 }, { "epoch": 0.7320967522761245, "grad_norm": 1.6718255281448364, "learning_rate": 0.0009085218779725506, "loss": 3.8281, "step": 10775 }, { "epoch": 0.7324364723467862, "grad_norm": 1.8237642049789429, "learning_rate": 0.0009084794129637179, "loss": 3.5782, "step": 10780 }, { "epoch": 0.7327761924174481, "grad_norm": 1.799741506576538, "learning_rate": 0.0009084369479548851, "loss": 3.6574, "step": 10785 }, { "epoch": 0.7331159124881098, "grad_norm": 1.7937496900558472, "learning_rate": 0.0009083944829460524, "loss": 3.5781, "step": 10790 }, { "epoch": 0.7334556325587716, "grad_norm": 2.127027750015259, "learning_rate": 0.0009083520179372198, "loss": 3.6495, "step": 10795 }, { "epoch": 0.7337953526294333, "grad_norm": 2.1826248168945312, "learning_rate": 0.000908309552928387, "loss": 3.6242, "step": 10800 }, { "epoch": 0.7341350727000951, "grad_norm": 2.0348265171051025, "learning_rate": 0.0009082670879195543, "loss": 3.7404, "step": 10805 }, { "epoch": 0.7344747927707569, "grad_norm": 1.877200961112976, "learning_rate": 0.0009082246229107216, "loss": 3.7589, "step": 10810 }, { "epoch": 0.7348145128414186, "grad_norm": 2.0612223148345947, "learning_rate": 0.0009081821579018888, "loss": 3.5709, "step": 10815 }, { "epoch": 0.7351542329120805, "grad_norm": 1.4885286092758179, "learning_rate": 0.0009081396928930561, "loss": 3.6518, "step": 10820 }, { "epoch": 0.7354939529827422, "grad_norm": 1.5524770021438599, "learning_rate": 0.0009080972278842234, "loss": 3.623, "step": 10825 }, { "epoch": 0.735833673053404, "grad_norm": 1.799144983291626, "learning_rate": 0.0009080547628753907, "loss": 3.5511, "step": 10830 }, { "epoch": 0.7361733931240658, "grad_norm": 1.6991182565689087, "learning_rate": 0.000908012297866558, "loss": 3.6312, "step": 10835 }, { "epoch": 0.7365131131947276, "grad_norm": 1.7672581672668457, "learning_rate": 0.0009079698328577253, "loss": 3.5088, "step": 10840 }, { "epoch": 0.7368528332653893, "grad_norm": 1.961018443107605, "learning_rate": 0.0009079273678488925, "loss": 3.4358, "step": 10845 }, { "epoch": 0.737192553336051, "grad_norm": 2.112122058868408, "learning_rate": 0.0009078849028400597, "loss": 3.6593, "step": 10850 }, { "epoch": 0.7375322734067129, "grad_norm": 1.818400502204895, "learning_rate": 0.0009078424378312271, "loss": 3.5685, "step": 10855 }, { "epoch": 0.7378719934773746, "grad_norm": 1.805897831916809, "learning_rate": 0.0009077999728223943, "loss": 3.7459, "step": 10860 }, { "epoch": 0.7382117135480364, "grad_norm": 1.5545519590377808, "learning_rate": 0.0009077575078135616, "loss": 3.7726, "step": 10865 }, { "epoch": 0.7385514336186982, "grad_norm": 1.7011590003967285, "learning_rate": 0.000907715042804729, "loss": 3.4979, "step": 10870 }, { "epoch": 0.73889115368936, "grad_norm": 1.903577208518982, "learning_rate": 0.0009076725777958962, "loss": 3.6957, "step": 10875 }, { "epoch": 0.7392308737600217, "grad_norm": 2.0284531116485596, "learning_rate": 0.0009076301127870635, "loss": 3.6147, "step": 10880 }, { "epoch": 0.7395705938306835, "grad_norm": 1.7099533081054688, "learning_rate": 0.0009075876477782308, "loss": 3.5497, "step": 10885 }, { "epoch": 0.7399103139013453, "grad_norm": 1.8762907981872559, "learning_rate": 0.000907545182769398, "loss": 3.5468, "step": 10890 }, { "epoch": 0.7402500339720071, "grad_norm": 1.8908005952835083, "learning_rate": 0.0009075027177605654, "loss": 3.6105, "step": 10895 }, { "epoch": 0.7405897540426688, "grad_norm": 1.9788455963134766, "learning_rate": 0.0009074602527517326, "loss": 3.5917, "step": 10900 }, { "epoch": 0.7409294741133307, "grad_norm": 1.839943766593933, "learning_rate": 0.0009074177877428999, "loss": 3.6811, "step": 10905 }, { "epoch": 0.7412691941839924, "grad_norm": 1.778451919555664, "learning_rate": 0.0009073753227340672, "loss": 3.654, "step": 10910 }, { "epoch": 0.7416089142546541, "grad_norm": 1.5568796396255493, "learning_rate": 0.0009073328577252344, "loss": 3.8188, "step": 10915 }, { "epoch": 0.741948634325316, "grad_norm": 2.1456961631774902, "learning_rate": 0.0009072903927164017, "loss": 3.6149, "step": 10920 }, { "epoch": 0.7422883543959777, "grad_norm": 1.7917147874832153, "learning_rate": 0.000907247927707569, "loss": 3.5587, "step": 10925 }, { "epoch": 0.7426280744666395, "grad_norm": 9.777530670166016, "learning_rate": 0.0009072054626987363, "loss": 3.7762, "step": 10930 }, { "epoch": 0.7429677945373012, "grad_norm": 2.1948392391204834, "learning_rate": 0.0009071629976899036, "loss": 3.5018, "step": 10935 }, { "epoch": 0.7433075146079631, "grad_norm": 2.4765570163726807, "learning_rate": 0.0009071205326810709, "loss": 3.4883, "step": 10940 }, { "epoch": 0.7436472346786248, "grad_norm": 1.6723936796188354, "learning_rate": 0.0009070780676722381, "loss": 3.5365, "step": 10945 }, { "epoch": 0.7439869547492866, "grad_norm": 1.7698875665664673, "learning_rate": 0.0009070356026634053, "loss": 3.9474, "step": 10950 }, { "epoch": 0.7443266748199484, "grad_norm": 1.596268892288208, "learning_rate": 0.0009069931376545727, "loss": 3.468, "step": 10955 }, { "epoch": 0.7446663948906102, "grad_norm": 1.5150041580200195, "learning_rate": 0.0009069506726457399, "loss": 3.4619, "step": 10960 }, { "epoch": 0.7450061149612719, "grad_norm": 1.7516692876815796, "learning_rate": 0.0009069082076369072, "loss": 3.7617, "step": 10965 }, { "epoch": 0.7453458350319336, "grad_norm": 2.2515101432800293, "learning_rate": 0.0009068657426280746, "loss": 3.5494, "step": 10970 }, { "epoch": 0.7456855551025955, "grad_norm": 1.977340579032898, "learning_rate": 0.0009068232776192418, "loss": 3.6609, "step": 10975 }, { "epoch": 0.7460252751732572, "grad_norm": 2.3767447471618652, "learning_rate": 0.000906780812610409, "loss": 3.517, "step": 10980 }, { "epoch": 0.746364995243919, "grad_norm": 2.80189847946167, "learning_rate": 0.0009067383476015764, "loss": 3.6353, "step": 10985 }, { "epoch": 0.7467047153145808, "grad_norm": 1.6686455011367798, "learning_rate": 0.0009066958825927436, "loss": 3.5869, "step": 10990 }, { "epoch": 0.7470444353852426, "grad_norm": 1.5608234405517578, "learning_rate": 0.0009066534175839108, "loss": 3.3881, "step": 10995 }, { "epoch": 0.7473841554559043, "grad_norm": 2.042708158493042, "learning_rate": 0.0009066109525750782, "loss": 3.3761, "step": 11000 }, { "epoch": 0.7477238755265662, "grad_norm": 1.956146240234375, "learning_rate": 0.0009065684875662455, "loss": 3.5367, "step": 11005 }, { "epoch": 0.7480635955972279, "grad_norm": 1.774767279624939, "learning_rate": 0.0009065260225574127, "loss": 3.6062, "step": 11010 }, { "epoch": 0.7484033156678896, "grad_norm": 1.7759861946105957, "learning_rate": 0.00090648355754858, "loss": 3.3066, "step": 11015 }, { "epoch": 0.7487430357385514, "grad_norm": 1.9865847826004028, "learning_rate": 0.0009064410925397473, "loss": 3.5861, "step": 11020 }, { "epoch": 0.7490827558092132, "grad_norm": 1.6270124912261963, "learning_rate": 0.0009063986275309145, "loss": 3.5896, "step": 11025 }, { "epoch": 0.749422475879875, "grad_norm": 2.1606333255767822, "learning_rate": 0.0009063561625220818, "loss": 3.6413, "step": 11030 }, { "epoch": 0.7497621959505367, "grad_norm": 1.6985037326812744, "learning_rate": 0.0009063136975132492, "loss": 3.6511, "step": 11035 }, { "epoch": 0.7501019160211986, "grad_norm": 1.6427828073501587, "learning_rate": 0.0009062712325044164, "loss": 3.6631, "step": 11040 }, { "epoch": 0.7504416360918603, "grad_norm": 1.5944570302963257, "learning_rate": 0.0009062287674955837, "loss": 3.6265, "step": 11045 }, { "epoch": 0.7507813561625221, "grad_norm": 1.7033028602600098, "learning_rate": 0.0009061863024867509, "loss": 3.5915, "step": 11050 }, { "epoch": 0.7511210762331838, "grad_norm": 1.9235185384750366, "learning_rate": 0.0009061438374779182, "loss": 3.39, "step": 11055 }, { "epoch": 0.7514607963038457, "grad_norm": 1.6575926542282104, "learning_rate": 0.0009061013724690855, "loss": 3.647, "step": 11060 }, { "epoch": 0.7518005163745074, "grad_norm": 1.9008510112762451, "learning_rate": 0.0009060589074602527, "loss": 3.7077, "step": 11065 }, { "epoch": 0.7521402364451691, "grad_norm": 2.1481611728668213, "learning_rate": 0.0009060164424514201, "loss": 3.5984, "step": 11070 }, { "epoch": 0.752479956515831, "grad_norm": 1.5388333797454834, "learning_rate": 0.0009059739774425874, "loss": 3.7226, "step": 11075 }, { "epoch": 0.7528196765864927, "grad_norm": 1.5707014799118042, "learning_rate": 0.0009059315124337546, "loss": 3.4791, "step": 11080 }, { "epoch": 0.7531593966571545, "grad_norm": 2.195896863937378, "learning_rate": 0.0009058890474249218, "loss": 3.3526, "step": 11085 }, { "epoch": 0.7534991167278163, "grad_norm": 2.4687108993530273, "learning_rate": 0.0009058465824160892, "loss": 3.7014, "step": 11090 }, { "epoch": 0.7538388367984781, "grad_norm": 1.4946893453598022, "learning_rate": 0.0009058041174072564, "loss": 3.7176, "step": 11095 }, { "epoch": 0.7541785568691398, "grad_norm": 1.74015474319458, "learning_rate": 0.0009057616523984236, "loss": 3.441, "step": 11100 }, { "epoch": 0.7545182769398016, "grad_norm": 1.5070186853408813, "learning_rate": 0.0009057191873895911, "loss": 3.616, "step": 11105 }, { "epoch": 0.7548579970104634, "grad_norm": 1.7485861778259277, "learning_rate": 0.0009056767223807583, "loss": 3.5464, "step": 11110 }, { "epoch": 0.7551977170811252, "grad_norm": 1.8665772676467896, "learning_rate": 0.0009056342573719255, "loss": 3.5245, "step": 11115 }, { "epoch": 0.7555374371517869, "grad_norm": 1.9667366743087769, "learning_rate": 0.0009055917923630929, "loss": 3.6306, "step": 11120 }, { "epoch": 0.7558771572224487, "grad_norm": 1.7338312864303589, "learning_rate": 0.0009055493273542601, "loss": 3.7472, "step": 11125 }, { "epoch": 0.7562168772931105, "grad_norm": 1.7342556715011597, "learning_rate": 0.0009055068623454273, "loss": 3.663, "step": 11130 }, { "epoch": 0.7565565973637722, "grad_norm": 1.9933854341506958, "learning_rate": 0.0009054643973365946, "loss": 3.7701, "step": 11135 }, { "epoch": 0.756896317434434, "grad_norm": 1.7444850206375122, "learning_rate": 0.000905421932327762, "loss": 3.6974, "step": 11140 }, { "epoch": 0.7572360375050958, "grad_norm": 2.0101726055145264, "learning_rate": 0.0009053794673189292, "loss": 3.4171, "step": 11145 }, { "epoch": 0.7575757575757576, "grad_norm": 2.0909204483032227, "learning_rate": 0.0009053370023100965, "loss": 3.8724, "step": 11150 }, { "epoch": 0.7579154776464193, "grad_norm": 2.3234260082244873, "learning_rate": 0.0009052945373012638, "loss": 3.6244, "step": 11155 }, { "epoch": 0.7582551977170812, "grad_norm": 1.8569508790969849, "learning_rate": 0.000905252072292431, "loss": 3.5235, "step": 11160 }, { "epoch": 0.7585949177877429, "grad_norm": 2.1600327491760254, "learning_rate": 0.0009052096072835983, "loss": 3.4472, "step": 11165 }, { "epoch": 0.7589346378584046, "grad_norm": 2.1725587844848633, "learning_rate": 0.0009051671422747656, "loss": 3.5188, "step": 11170 }, { "epoch": 0.7592743579290665, "grad_norm": 1.6557410955429077, "learning_rate": 0.0009051246772659329, "loss": 3.6254, "step": 11175 }, { "epoch": 0.7596140779997282, "grad_norm": 2.431107997894287, "learning_rate": 0.0009050822122571002, "loss": 3.4686, "step": 11180 }, { "epoch": 0.75995379807039, "grad_norm": 1.6160998344421387, "learning_rate": 0.0009050397472482674, "loss": 3.5341, "step": 11185 }, { "epoch": 0.7602935181410517, "grad_norm": 1.7238149642944336, "learning_rate": 0.0009049972822394347, "loss": 3.6363, "step": 11190 }, { "epoch": 0.7606332382117136, "grad_norm": 1.559342861175537, "learning_rate": 0.000904954817230602, "loss": 3.6387, "step": 11195 }, { "epoch": 0.7609729582823753, "grad_norm": 1.9222300052642822, "learning_rate": 0.0009049123522217692, "loss": 3.4957, "step": 11200 }, { "epoch": 0.7613126783530371, "grad_norm": 1.8369094133377075, "learning_rate": 0.0009048698872129365, "loss": 3.6851, "step": 11205 }, { "epoch": 0.7616523984236989, "grad_norm": 1.8881033658981323, "learning_rate": 0.0009048274222041039, "loss": 3.6213, "step": 11210 }, { "epoch": 0.7619921184943607, "grad_norm": 2.049778699874878, "learning_rate": 0.0009047849571952711, "loss": 3.5231, "step": 11215 }, { "epoch": 0.7623318385650224, "grad_norm": 1.6866512298583984, "learning_rate": 0.0009047424921864385, "loss": 3.6897, "step": 11220 }, { "epoch": 0.7626715586356841, "grad_norm": 1.7050459384918213, "learning_rate": 0.0009047000271776057, "loss": 3.4822, "step": 11225 }, { "epoch": 0.763011278706346, "grad_norm": 1.5557515621185303, "learning_rate": 0.0009046575621687729, "loss": 3.7284, "step": 11230 }, { "epoch": 0.7633509987770077, "grad_norm": 1.6969112157821655, "learning_rate": 0.0009046150971599402, "loss": 3.7563, "step": 11235 }, { "epoch": 0.7636907188476695, "grad_norm": 1.7815346717834473, "learning_rate": 0.0009045726321511075, "loss": 3.5285, "step": 11240 }, { "epoch": 0.7640304389183313, "grad_norm": 2.151458978652954, "learning_rate": 0.0009045301671422748, "loss": 3.3454, "step": 11245 }, { "epoch": 0.7643701589889931, "grad_norm": 1.8623446226119995, "learning_rate": 0.0009044877021334421, "loss": 3.5068, "step": 11250 }, { "epoch": 0.7647098790596548, "grad_norm": 1.4780329465866089, "learning_rate": 0.0009044452371246094, "loss": 3.7545, "step": 11255 }, { "epoch": 0.7650495991303167, "grad_norm": 1.5795570611953735, "learning_rate": 0.0009044027721157766, "loss": 3.5749, "step": 11260 }, { "epoch": 0.7653893192009784, "grad_norm": 1.5764423608779907, "learning_rate": 0.0009043603071069439, "loss": 3.5966, "step": 11265 }, { "epoch": 0.7657290392716402, "grad_norm": 2.1038734912872314, "learning_rate": 0.0009043178420981112, "loss": 3.6288, "step": 11270 }, { "epoch": 0.7660687593423019, "grad_norm": 1.7304534912109375, "learning_rate": 0.0009042753770892784, "loss": 3.6354, "step": 11275 }, { "epoch": 0.7664084794129638, "grad_norm": 2.039487600326538, "learning_rate": 0.0009042329120804458, "loss": 3.5015, "step": 11280 }, { "epoch": 0.7667481994836255, "grad_norm": 2.0259695053100586, "learning_rate": 0.000904190447071613, "loss": 3.5164, "step": 11285 }, { "epoch": 0.7670879195542872, "grad_norm": 1.6299856901168823, "learning_rate": 0.0009041479820627803, "loss": 3.5319, "step": 11290 }, { "epoch": 0.7674276396249491, "grad_norm": 1.6062525510787964, "learning_rate": 0.0009041140100557141, "loss": 3.4526, "step": 11295 }, { "epoch": 0.7677673596956108, "grad_norm": 1.5029001235961914, "learning_rate": 0.0009040715450468814, "loss": 3.6019, "step": 11300 }, { "epoch": 0.7681070797662726, "grad_norm": 2.0119566917419434, "learning_rate": 0.0009040290800380487, "loss": 3.4888, "step": 11305 }, { "epoch": 0.7684467998369343, "grad_norm": 2.097609281539917, "learning_rate": 0.000903986615029216, "loss": 3.705, "step": 11310 }, { "epoch": 0.7687865199075962, "grad_norm": 2.44820237159729, "learning_rate": 0.0009039441500203832, "loss": 3.5926, "step": 11315 }, { "epoch": 0.7691262399782579, "grad_norm": 1.6247577667236328, "learning_rate": 0.0009039016850115504, "loss": 3.7341, "step": 11320 }, { "epoch": 0.7694659600489197, "grad_norm": 2.1121985912323, "learning_rate": 0.0009038592200027178, "loss": 3.5649, "step": 11325 }, { "epoch": 0.7698056801195815, "grad_norm": 2.155418634414673, "learning_rate": 0.000903816754993885, "loss": 3.4677, "step": 11330 }, { "epoch": 0.7701454001902432, "grad_norm": 2.5063936710357666, "learning_rate": 0.0009037742899850523, "loss": 3.5753, "step": 11335 }, { "epoch": 0.770485120260905, "grad_norm": 2.1693150997161865, "learning_rate": 0.0009037318249762197, "loss": 3.4018, "step": 11340 }, { "epoch": 0.7708248403315668, "grad_norm": 2.317955255508423, "learning_rate": 0.0009036893599673869, "loss": 3.6251, "step": 11345 }, { "epoch": 0.7711645604022286, "grad_norm": 1.4471355676651, "learning_rate": 0.0009036468949585541, "loss": 3.591, "step": 11350 }, { "epoch": 0.7715042804728903, "grad_norm": 2.0313844680786133, "learning_rate": 0.0009036044299497215, "loss": 3.4294, "step": 11355 }, { "epoch": 0.7718440005435521, "grad_norm": 2.280611753463745, "learning_rate": 0.0009035619649408887, "loss": 3.8326, "step": 11360 }, { "epoch": 0.7721837206142139, "grad_norm": 1.5211470127105713, "learning_rate": 0.0009035194999320559, "loss": 3.6201, "step": 11365 }, { "epoch": 0.7725234406848757, "grad_norm": 2.0211949348449707, "learning_rate": 0.0009034770349232234, "loss": 3.8204, "step": 11370 }, { "epoch": 0.7728631607555374, "grad_norm": 2.418600559234619, "learning_rate": 0.0009034345699143906, "loss": 3.4787, "step": 11375 }, { "epoch": 0.7732028808261993, "grad_norm": 2.015676498413086, "learning_rate": 0.0009033921049055578, "loss": 3.4586, "step": 11380 }, { "epoch": 0.773542600896861, "grad_norm": 1.8494967222213745, "learning_rate": 0.0009033496398967251, "loss": 3.6129, "step": 11385 }, { "epoch": 0.7738823209675227, "grad_norm": 1.9864630699157715, "learning_rate": 0.0009033071748878924, "loss": 3.6879, "step": 11390 }, { "epoch": 0.7742220410381845, "grad_norm": 2.1905086040496826, "learning_rate": 0.0009032647098790596, "loss": 3.6591, "step": 11395 }, { "epoch": 0.7745617611088463, "grad_norm": 1.6897391080856323, "learning_rate": 0.0009032222448702269, "loss": 3.5759, "step": 11400 }, { "epoch": 0.7749014811795081, "grad_norm": 1.8694080114364624, "learning_rate": 0.0009031797798613943, "loss": 3.7087, "step": 11405 }, { "epoch": 0.7752412012501698, "grad_norm": 1.393734097480774, "learning_rate": 0.0009031373148525615, "loss": 3.763, "step": 11410 }, { "epoch": 0.7755809213208317, "grad_norm": 2.379488468170166, "learning_rate": 0.0009030948498437288, "loss": 3.8311, "step": 11415 }, { "epoch": 0.7759206413914934, "grad_norm": 2.1031835079193115, "learning_rate": 0.000903052384834896, "loss": 3.7317, "step": 11420 }, { "epoch": 0.7762603614621552, "grad_norm": 2.3963630199432373, "learning_rate": 0.0009030099198260634, "loss": 3.3795, "step": 11425 }, { "epoch": 0.776600081532817, "grad_norm": 1.9230754375457764, "learning_rate": 0.0009029674548172306, "loss": 3.3976, "step": 11430 }, { "epoch": 0.7769398016034788, "grad_norm": 1.6524560451507568, "learning_rate": 0.0009029249898083978, "loss": 3.5197, "step": 11435 }, { "epoch": 0.7772795216741405, "grad_norm": 1.9016987085342407, "learning_rate": 0.0009028825247995653, "loss": 3.6234, "step": 11440 }, { "epoch": 0.7776192417448022, "grad_norm": 1.9351873397827148, "learning_rate": 0.0009028400597907325, "loss": 3.4567, "step": 11445 }, { "epoch": 0.7779589618154641, "grad_norm": 2.5554609298706055, "learning_rate": 0.0009027975947818997, "loss": 3.6219, "step": 11450 }, { "epoch": 0.7782986818861258, "grad_norm": 1.8863296508789062, "learning_rate": 0.0009027551297730671, "loss": 3.6858, "step": 11455 }, { "epoch": 0.7786384019567876, "grad_norm": 1.5956201553344727, "learning_rate": 0.0009027126647642343, "loss": 3.408, "step": 11460 }, { "epoch": 0.7789781220274494, "grad_norm": 1.6050745248794556, "learning_rate": 0.0009026701997554015, "loss": 3.6853, "step": 11465 }, { "epoch": 0.7793178420981112, "grad_norm": 1.8485314846038818, "learning_rate": 0.0009026277347465688, "loss": 3.6716, "step": 11470 }, { "epoch": 0.7796575621687729, "grad_norm": 1.8959099054336548, "learning_rate": 0.0009025852697377362, "loss": 3.6946, "step": 11475 }, { "epoch": 0.7799972822394347, "grad_norm": 1.9191590547561646, "learning_rate": 0.0009025428047289034, "loss": 3.6691, "step": 11480 }, { "epoch": 0.7803370023100965, "grad_norm": 1.757064700126648, "learning_rate": 0.0009025003397200707, "loss": 3.5833, "step": 11485 }, { "epoch": 0.7806767223807582, "grad_norm": 2.5137224197387695, "learning_rate": 0.000902457874711238, "loss": 3.5747, "step": 11490 }, { "epoch": 0.78101644245142, "grad_norm": 1.7521635293960571, "learning_rate": 0.0009024154097024052, "loss": 3.6756, "step": 11495 }, { "epoch": 0.7813561625220818, "grad_norm": 2.115110158920288, "learning_rate": 0.0009023729446935725, "loss": 3.2014, "step": 11500 }, { "epoch": 0.7816958825927436, "grad_norm": 1.5990296602249146, "learning_rate": 0.0009023304796847398, "loss": 3.5159, "step": 11505 }, { "epoch": 0.7820356026634053, "grad_norm": 1.5467530488967896, "learning_rate": 0.0009022880146759071, "loss": 3.7659, "step": 11510 }, { "epoch": 0.7823753227340672, "grad_norm": 1.758346438407898, "learning_rate": 0.0009022455496670744, "loss": 3.5898, "step": 11515 }, { "epoch": 0.7827150428047289, "grad_norm": 1.764122486114502, "learning_rate": 0.0009022030846582416, "loss": 3.6881, "step": 11520 }, { "epoch": 0.7830547628753907, "grad_norm": 1.7399351596832275, "learning_rate": 0.0009021606196494089, "loss": 3.7568, "step": 11525 }, { "epoch": 0.7833944829460524, "grad_norm": 1.8004785776138306, "learning_rate": 0.0009021181546405762, "loss": 3.4982, "step": 11530 }, { "epoch": 0.7837342030167143, "grad_norm": 1.8586972951889038, "learning_rate": 0.0009020756896317434, "loss": 3.4245, "step": 11535 }, { "epoch": 0.784073923087376, "grad_norm": 1.597180962562561, "learning_rate": 0.0009020332246229107, "loss": 3.7954, "step": 11540 }, { "epoch": 0.7844136431580377, "grad_norm": 1.6517304182052612, "learning_rate": 0.0009019907596140781, "loss": 3.6018, "step": 11545 }, { "epoch": 0.7847533632286996, "grad_norm": 1.455210566520691, "learning_rate": 0.0009019482946052453, "loss": 3.6459, "step": 11550 }, { "epoch": 0.7850930832993613, "grad_norm": 1.5656789541244507, "learning_rate": 0.0009019058295964126, "loss": 3.6172, "step": 11555 }, { "epoch": 0.7854328033700231, "grad_norm": 1.6050161123275757, "learning_rate": 0.0009018633645875799, "loss": 3.7318, "step": 11560 }, { "epoch": 0.7857725234406848, "grad_norm": 1.9400702714920044, "learning_rate": 0.0009018208995787471, "loss": 3.5501, "step": 11565 }, { "epoch": 0.7861122435113467, "grad_norm": 2.4921462535858154, "learning_rate": 0.0009017784345699143, "loss": 3.4655, "step": 11570 }, { "epoch": 0.7864519635820084, "grad_norm": 2.1080269813537598, "learning_rate": 0.0009017359695610817, "loss": 3.5721, "step": 11575 }, { "epoch": 0.7867916836526702, "grad_norm": 2.013817548751831, "learning_rate": 0.000901693504552249, "loss": 3.8927, "step": 11580 }, { "epoch": 0.787131403723332, "grad_norm": 1.9506970643997192, "learning_rate": 0.0009016510395434162, "loss": 3.4919, "step": 11585 }, { "epoch": 0.7874711237939938, "grad_norm": 1.3636064529418945, "learning_rate": 0.0009016085745345836, "loss": 3.5879, "step": 11590 }, { "epoch": 0.7878108438646555, "grad_norm": 2.259608268737793, "learning_rate": 0.0009015661095257508, "loss": 3.4579, "step": 11595 }, { "epoch": 0.7881505639353173, "grad_norm": 1.6124014854431152, "learning_rate": 0.000901523644516918, "loss": 3.5668, "step": 11600 }, { "epoch": 0.7884902840059791, "grad_norm": 1.8490430116653442, "learning_rate": 0.0009014811795080854, "loss": 3.7278, "step": 11605 }, { "epoch": 0.7888300040766408, "grad_norm": 1.8244900703430176, "learning_rate": 0.0009014387144992526, "loss": 3.7191, "step": 11610 }, { "epoch": 0.7891697241473026, "grad_norm": 1.926221251487732, "learning_rate": 0.0009013962494904199, "loss": 3.4685, "step": 11615 }, { "epoch": 0.7895094442179644, "grad_norm": 2.0132575035095215, "learning_rate": 0.0009013537844815872, "loss": 3.5513, "step": 11620 }, { "epoch": 0.7898491642886262, "grad_norm": 2.028223752975464, "learning_rate": 0.0009013113194727545, "loss": 3.5053, "step": 11625 }, { "epoch": 0.7901888843592879, "grad_norm": 1.8859734535217285, "learning_rate": 0.0009012688544639217, "loss": 3.5397, "step": 11630 }, { "epoch": 0.7905286044299498, "grad_norm": 1.818061113357544, "learning_rate": 0.000901226389455089, "loss": 3.5464, "step": 11635 }, { "epoch": 0.7908683245006115, "grad_norm": 1.8583074808120728, "learning_rate": 0.0009011839244462563, "loss": 3.8035, "step": 11640 }, { "epoch": 0.7912080445712733, "grad_norm": 1.899134635925293, "learning_rate": 0.0009011414594374235, "loss": 3.449, "step": 11645 }, { "epoch": 0.791547764641935, "grad_norm": 1.800734043121338, "learning_rate": 0.0009010989944285909, "loss": 3.6304, "step": 11650 }, { "epoch": 0.7918874847125968, "grad_norm": 2.8815345764160156, "learning_rate": 0.0009010565294197582, "loss": 3.6755, "step": 11655 }, { "epoch": 0.7922272047832586, "grad_norm": 1.8201318979263306, "learning_rate": 0.0009010140644109254, "loss": 3.6803, "step": 11660 }, { "epoch": 0.7925669248539203, "grad_norm": 2.1020052433013916, "learning_rate": 0.0009009715994020927, "loss": 3.5312, "step": 11665 }, { "epoch": 0.7929066449245822, "grad_norm": 1.5042980909347534, "learning_rate": 0.0009009291343932599, "loss": 3.5414, "step": 11670 }, { "epoch": 0.7932463649952439, "grad_norm": 1.823721170425415, "learning_rate": 0.0009008866693844272, "loss": 3.5912, "step": 11675 }, { "epoch": 0.7935860850659057, "grad_norm": 1.6344832181930542, "learning_rate": 0.0009008442043755946, "loss": 3.7419, "step": 11680 }, { "epoch": 0.7939258051365675, "grad_norm": 1.7554601430892944, "learning_rate": 0.0009008017393667618, "loss": 3.7664, "step": 11685 }, { "epoch": 0.7942655252072293, "grad_norm": 1.909085988998413, "learning_rate": 0.0009007592743579291, "loss": 3.4386, "step": 11690 }, { "epoch": 0.794605245277891, "grad_norm": 1.8912153244018555, "learning_rate": 0.0009007168093490964, "loss": 3.7027, "step": 11695 }, { "epoch": 0.7949449653485527, "grad_norm": 1.8567606210708618, "learning_rate": 0.0009006743443402636, "loss": 3.648, "step": 11700 }, { "epoch": 0.7952846854192146, "grad_norm": 2.6574018001556396, "learning_rate": 0.0009006318793314308, "loss": 3.436, "step": 11705 }, { "epoch": 0.7956244054898763, "grad_norm": 1.7235360145568848, "learning_rate": 0.0009005894143225982, "loss": 3.428, "step": 11710 }, { "epoch": 0.7959641255605381, "grad_norm": 1.6269991397857666, "learning_rate": 0.0009005469493137655, "loss": 3.6965, "step": 11715 }, { "epoch": 0.7963038456311999, "grad_norm": 2.1240196228027344, "learning_rate": 0.0009005044843049327, "loss": 3.7649, "step": 11720 }, { "epoch": 0.7966435657018617, "grad_norm": 1.835428237915039, "learning_rate": 0.0009004620192961001, "loss": 3.5267, "step": 11725 }, { "epoch": 0.7969832857725234, "grad_norm": 1.827282428741455, "learning_rate": 0.0009004195542872673, "loss": 3.7203, "step": 11730 }, { "epoch": 0.7973230058431852, "grad_norm": 1.9529973268508911, "learning_rate": 0.0009003770892784345, "loss": 3.6205, "step": 11735 }, { "epoch": 0.797662725913847, "grad_norm": 1.9578665494918823, "learning_rate": 0.0009003346242696019, "loss": 3.5067, "step": 11740 }, { "epoch": 0.7980024459845088, "grad_norm": 1.847833275794983, "learning_rate": 0.0009002921592607691, "loss": 3.6537, "step": 11745 }, { "epoch": 0.7983421660551705, "grad_norm": 2.558058738708496, "learning_rate": 0.0009002496942519364, "loss": 3.8492, "step": 11750 }, { "epoch": 0.7986818861258324, "grad_norm": 1.8259804248809814, "learning_rate": 0.0009002072292431038, "loss": 3.3058, "step": 11755 }, { "epoch": 0.7990216061964941, "grad_norm": 2.5261173248291016, "learning_rate": 0.000900164764234271, "loss": 3.7176, "step": 11760 }, { "epoch": 0.7993613262671558, "grad_norm": 1.8361433744430542, "learning_rate": 0.0009001222992254383, "loss": 3.6644, "step": 11765 }, { "epoch": 0.7997010463378177, "grad_norm": 1.4857279062271118, "learning_rate": 0.0009000798342166055, "loss": 3.6131, "step": 11770 }, { "epoch": 0.8000407664084794, "grad_norm": 1.437785029411316, "learning_rate": 0.0009000373692077728, "loss": 3.4676, "step": 11775 }, { "epoch": 0.8003804864791412, "grad_norm": 1.7390767335891724, "learning_rate": 0.0008999949041989401, "loss": 3.7914, "step": 11780 }, { "epoch": 0.8007202065498029, "grad_norm": 1.8753461837768555, "learning_rate": 0.0008999524391901074, "loss": 3.7545, "step": 11785 }, { "epoch": 0.8010599266204648, "grad_norm": 1.409063458442688, "learning_rate": 0.0008999099741812747, "loss": 3.244, "step": 11790 }, { "epoch": 0.8013996466911265, "grad_norm": 1.5937402248382568, "learning_rate": 0.000899867509172442, "loss": 3.5577, "step": 11795 }, { "epoch": 0.8017393667617883, "grad_norm": 1.8826838731765747, "learning_rate": 0.0008998250441636092, "loss": 3.708, "step": 11800 }, { "epoch": 0.8020790868324501, "grad_norm": 2.1698734760284424, "learning_rate": 0.0008997825791547764, "loss": 3.7608, "step": 11805 }, { "epoch": 0.8024188069031118, "grad_norm": 2.8856148719787598, "learning_rate": 0.0008997401141459438, "loss": 3.4434, "step": 11810 }, { "epoch": 0.8027585269737736, "grad_norm": 2.1730058193206787, "learning_rate": 0.000899697649137111, "loss": 3.4465, "step": 11815 }, { "epoch": 0.8030982470444353, "grad_norm": 1.8815830945968628, "learning_rate": 0.0008996551841282783, "loss": 3.578, "step": 11820 }, { "epoch": 0.8034379671150972, "grad_norm": 1.871692419052124, "learning_rate": 0.0008996127191194457, "loss": 3.7841, "step": 11825 }, { "epoch": 0.8037776871857589, "grad_norm": 1.6721887588500977, "learning_rate": 0.0008995702541106129, "loss": 3.712, "step": 11830 }, { "epoch": 0.8041174072564207, "grad_norm": 1.4873764514923096, "learning_rate": 0.0008995277891017801, "loss": 3.7036, "step": 11835 }, { "epoch": 0.8044571273270825, "grad_norm": 2.117584466934204, "learning_rate": 0.0008994853240929475, "loss": 3.7205, "step": 11840 }, { "epoch": 0.8047968473977443, "grad_norm": 1.8260453939437866, "learning_rate": 0.0008994428590841147, "loss": 3.5203, "step": 11845 }, { "epoch": 0.805136567468406, "grad_norm": 2.4830565452575684, "learning_rate": 0.0008994003940752819, "loss": 3.6369, "step": 11850 }, { "epoch": 0.8054762875390679, "grad_norm": 2.2740767002105713, "learning_rate": 0.0008993579290664494, "loss": 3.4525, "step": 11855 }, { "epoch": 0.8058160076097296, "grad_norm": 2.1062567234039307, "learning_rate": 0.0008993154640576166, "loss": 3.341, "step": 11860 }, { "epoch": 0.8061557276803913, "grad_norm": 1.7777636051177979, "learning_rate": 0.0008992729990487838, "loss": 3.5429, "step": 11865 }, { "epoch": 0.8064954477510531, "grad_norm": 1.5020085573196411, "learning_rate": 0.0008992305340399511, "loss": 3.6144, "step": 11870 }, { "epoch": 0.8068351678217149, "grad_norm": 1.639664649963379, "learning_rate": 0.0008991880690311184, "loss": 3.7157, "step": 11875 }, { "epoch": 0.8071748878923767, "grad_norm": 1.9514044523239136, "learning_rate": 0.0008991456040222856, "loss": 3.2241, "step": 11880 }, { "epoch": 0.8075146079630384, "grad_norm": 1.6407039165496826, "learning_rate": 0.0008991031390134529, "loss": 3.6506, "step": 11885 }, { "epoch": 0.8078543280337003, "grad_norm": 1.7657625675201416, "learning_rate": 0.0008990606740046203, "loss": 3.3629, "step": 11890 }, { "epoch": 0.808194048104362, "grad_norm": 1.974593162536621, "learning_rate": 0.0008990182089957875, "loss": 3.5063, "step": 11895 }, { "epoch": 0.8085337681750238, "grad_norm": 2.1709442138671875, "learning_rate": 0.0008989757439869548, "loss": 3.474, "step": 11900 }, { "epoch": 0.8088734882456855, "grad_norm": 2.1011240482330322, "learning_rate": 0.000898933278978122, "loss": 3.4931, "step": 11905 }, { "epoch": 0.8092132083163474, "grad_norm": 1.8195377588272095, "learning_rate": 0.0008988908139692893, "loss": 3.628, "step": 11910 }, { "epoch": 0.8095529283870091, "grad_norm": 1.621151089668274, "learning_rate": 0.0008988483489604566, "loss": 3.6184, "step": 11915 }, { "epoch": 0.8098926484576708, "grad_norm": 1.6647098064422607, "learning_rate": 0.0008988058839516238, "loss": 3.3851, "step": 11920 }, { "epoch": 0.8102323685283327, "grad_norm": 1.5140550136566162, "learning_rate": 0.0008987634189427912, "loss": 3.5369, "step": 11925 }, { "epoch": 0.8105720885989944, "grad_norm": 1.8715113401412964, "learning_rate": 0.0008987209539339585, "loss": 3.6242, "step": 11930 }, { "epoch": 0.8109118086696562, "grad_norm": 1.4643272161483765, "learning_rate": 0.0008986784889251257, "loss": 3.709, "step": 11935 }, { "epoch": 0.811251528740318, "grad_norm": 1.5623282194137573, "learning_rate": 0.000898636023916293, "loss": 3.393, "step": 11940 }, { "epoch": 0.8115912488109798, "grad_norm": 1.6262115240097046, "learning_rate": 0.0008985935589074603, "loss": 3.3144, "step": 11945 }, { "epoch": 0.8119309688816415, "grad_norm": 1.6608471870422363, "learning_rate": 0.0008985510938986275, "loss": 3.5993, "step": 11950 }, { "epoch": 0.8122706889523033, "grad_norm": 2.907862663269043, "learning_rate": 0.0008985086288897947, "loss": 3.6296, "step": 11955 }, { "epoch": 0.8126104090229651, "grad_norm": 1.7754555940628052, "learning_rate": 0.0008984661638809622, "loss": 3.5493, "step": 11960 }, { "epoch": 0.8129501290936268, "grad_norm": 1.5765506029129028, "learning_rate": 0.0008984236988721294, "loss": 3.5755, "step": 11965 }, { "epoch": 0.8132898491642886, "grad_norm": 2.245465040206909, "learning_rate": 0.0008983812338632966, "loss": 3.7039, "step": 11970 }, { "epoch": 0.8136295692349504, "grad_norm": 2.0975759029388428, "learning_rate": 0.000898338768854464, "loss": 3.6059, "step": 11975 }, { "epoch": 0.8139692893056122, "grad_norm": 2.109805107116699, "learning_rate": 0.0008982963038456312, "loss": 3.477, "step": 11980 }, { "epoch": 0.8143090093762739, "grad_norm": 1.5268175601959229, "learning_rate": 0.0008982538388367984, "loss": 3.4419, "step": 11985 }, { "epoch": 0.8146487294469357, "grad_norm": 2.105478286743164, "learning_rate": 0.0008982113738279658, "loss": 3.7144, "step": 11990 }, { "epoch": 0.8149884495175975, "grad_norm": 2.3388166427612305, "learning_rate": 0.0008981689088191331, "loss": 3.4734, "step": 11995 }, { "epoch": 0.8153281695882593, "grad_norm": 1.9484961032867432, "learning_rate": 0.0008981264438103003, "loss": 3.457, "step": 12000 }, { "epoch": 0.815667889658921, "grad_norm": 2.2545650005340576, "learning_rate": 0.0008980839788014677, "loss": 3.555, "step": 12005 }, { "epoch": 0.8160076097295829, "grad_norm": 1.9020745754241943, "learning_rate": 0.0008980415137926349, "loss": 3.6503, "step": 12010 }, { "epoch": 0.8163473298002446, "grad_norm": 1.7824493646621704, "learning_rate": 0.0008979990487838021, "loss": 3.7003, "step": 12015 }, { "epoch": 0.8166870498709063, "grad_norm": 1.8018903732299805, "learning_rate": 0.0008979565837749694, "loss": 3.6568, "step": 12020 }, { "epoch": 0.8170267699415682, "grad_norm": 1.897049903869629, "learning_rate": 0.0008979141187661367, "loss": 3.1647, "step": 12025 }, { "epoch": 0.8173664900122299, "grad_norm": 1.6352155208587646, "learning_rate": 0.000897871653757304, "loss": 3.5905, "step": 12030 }, { "epoch": 0.8177062100828917, "grad_norm": 2.4553425312042236, "learning_rate": 0.0008978291887484713, "loss": 3.5401, "step": 12035 }, { "epoch": 0.8180459301535534, "grad_norm": 1.8682621717453003, "learning_rate": 0.0008977867237396386, "loss": 3.408, "step": 12040 }, { "epoch": 0.8183856502242153, "grad_norm": 1.3070932626724243, "learning_rate": 0.0008977442587308058, "loss": 3.8205, "step": 12045 }, { "epoch": 0.818725370294877, "grad_norm": 1.5089374780654907, "learning_rate": 0.0008977017937219731, "loss": 3.6364, "step": 12050 }, { "epoch": 0.8190650903655388, "grad_norm": 2.089048385620117, "learning_rate": 0.0008976593287131403, "loss": 3.6199, "step": 12055 }, { "epoch": 0.8194048104362006, "grad_norm": 1.8559980392456055, "learning_rate": 0.0008976168637043076, "loss": 3.5064, "step": 12060 }, { "epoch": 0.8197445305068624, "grad_norm": 1.7014050483703613, "learning_rate": 0.000897574398695475, "loss": 3.7327, "step": 12065 }, { "epoch": 0.8200842505775241, "grad_norm": 2.090141534805298, "learning_rate": 0.0008975319336866422, "loss": 3.5042, "step": 12070 }, { "epoch": 0.8204239706481858, "grad_norm": 1.6767606735229492, "learning_rate": 0.0008974894686778095, "loss": 3.7401, "step": 12075 }, { "epoch": 0.8207636907188477, "grad_norm": 1.9763669967651367, "learning_rate": 0.0008974470036689768, "loss": 3.3406, "step": 12080 }, { "epoch": 0.8211034107895094, "grad_norm": 1.8153197765350342, "learning_rate": 0.000897404538660144, "loss": 3.7075, "step": 12085 }, { "epoch": 0.8214431308601712, "grad_norm": 1.9009218215942383, "learning_rate": 0.0008973620736513113, "loss": 3.6916, "step": 12090 }, { "epoch": 0.821782850930833, "grad_norm": 1.8067152500152588, "learning_rate": 0.0008973196086424786, "loss": 3.6597, "step": 12095 }, { "epoch": 0.8221225710014948, "grad_norm": 2.047412872314453, "learning_rate": 0.0008972771436336459, "loss": 3.6026, "step": 12100 }, { "epoch": 0.8224622910721565, "grad_norm": 1.9204285144805908, "learning_rate": 0.0008972346786248133, "loss": 3.4593, "step": 12105 }, { "epoch": 0.8228020111428184, "grad_norm": 1.7023279666900635, "learning_rate": 0.0008971922136159805, "loss": 3.6539, "step": 12110 }, { "epoch": 0.8231417312134801, "grad_norm": 1.9788775444030762, "learning_rate": 0.0008971497486071477, "loss": 3.5701, "step": 12115 }, { "epoch": 0.8234814512841419, "grad_norm": 1.8770027160644531, "learning_rate": 0.000897107283598315, "loss": 3.6677, "step": 12120 }, { "epoch": 0.8238211713548036, "grad_norm": 2.6365151405334473, "learning_rate": 0.0008970648185894823, "loss": 3.6265, "step": 12125 }, { "epoch": 0.8241608914254654, "grad_norm": 1.7585949897766113, "learning_rate": 0.0008970223535806495, "loss": 3.738, "step": 12130 }, { "epoch": 0.8245006114961272, "grad_norm": 2.7371826171875, "learning_rate": 0.0008969798885718169, "loss": 3.3781, "step": 12135 }, { "epoch": 0.8248403315667889, "grad_norm": 1.6018025875091553, "learning_rate": 0.0008969374235629842, "loss": 3.5789, "step": 12140 }, { "epoch": 0.8251800516374508, "grad_norm": 1.5802757740020752, "learning_rate": 0.0008968949585541514, "loss": 3.6398, "step": 12145 }, { "epoch": 0.8255197717081125, "grad_norm": 1.998632788658142, "learning_rate": 0.0008968524935453187, "loss": 3.5376, "step": 12150 }, { "epoch": 0.8258594917787743, "grad_norm": 1.6627589464187622, "learning_rate": 0.000896810028536486, "loss": 3.7255, "step": 12155 }, { "epoch": 0.826199211849436, "grad_norm": 1.911772608757019, "learning_rate": 0.0008967675635276532, "loss": 3.6304, "step": 12160 }, { "epoch": 0.8265389319200979, "grad_norm": 1.9612982273101807, "learning_rate": 0.0008967250985188205, "loss": 3.5334, "step": 12165 }, { "epoch": 0.8268786519907596, "grad_norm": 2.0774810314178467, "learning_rate": 0.0008966826335099878, "loss": 3.6428, "step": 12170 }, { "epoch": 0.8272183720614213, "grad_norm": 2.4802634716033936, "learning_rate": 0.0008966401685011551, "loss": 3.4141, "step": 12175 }, { "epoch": 0.8275580921320832, "grad_norm": 1.4749765396118164, "learning_rate": 0.0008965977034923224, "loss": 3.6358, "step": 12180 }, { "epoch": 0.8278978122027449, "grad_norm": 1.6224263906478882, "learning_rate": 0.0008965552384834896, "loss": 3.5197, "step": 12185 }, { "epoch": 0.8282375322734067, "grad_norm": 2.1257662773132324, "learning_rate": 0.0008965127734746569, "loss": 3.71, "step": 12190 }, { "epoch": 0.8285772523440685, "grad_norm": 1.5916825532913208, "learning_rate": 0.0008964703084658242, "loss": 3.7698, "step": 12195 }, { "epoch": 0.8289169724147303, "grad_norm": 1.8161696195602417, "learning_rate": 0.0008964278434569914, "loss": 3.735, "step": 12200 }, { "epoch": 0.829256692485392, "grad_norm": 1.8742128610610962, "learning_rate": 0.0008963853784481587, "loss": 3.7664, "step": 12205 }, { "epoch": 0.8295964125560538, "grad_norm": 2.142746925354004, "learning_rate": 0.0008963429134393261, "loss": 3.4817, "step": 12210 }, { "epoch": 0.8299361326267156, "grad_norm": 2.4074392318725586, "learning_rate": 0.0008963004484304933, "loss": 3.6337, "step": 12215 }, { "epoch": 0.8302758526973774, "grad_norm": 1.6855005025863647, "learning_rate": 0.0008962579834216605, "loss": 3.4595, "step": 12220 }, { "epoch": 0.8306155727680391, "grad_norm": 1.8447942733764648, "learning_rate": 0.0008962155184128279, "loss": 3.6601, "step": 12225 }, { "epoch": 0.830955292838701, "grad_norm": 1.7631922960281372, "learning_rate": 0.0008961730534039951, "loss": 3.4473, "step": 12230 }, { "epoch": 0.8312950129093627, "grad_norm": 1.9511041641235352, "learning_rate": 0.0008961305883951623, "loss": 3.2386, "step": 12235 }, { "epoch": 0.8316347329800244, "grad_norm": 1.8717700242996216, "learning_rate": 0.0008960881233863298, "loss": 3.5527, "step": 12240 }, { "epoch": 0.8319744530506862, "grad_norm": 1.7600947618484497, "learning_rate": 0.000896045658377497, "loss": 3.2384, "step": 12245 }, { "epoch": 0.832314173121348, "grad_norm": 1.7623400688171387, "learning_rate": 0.0008960031933686642, "loss": 3.5628, "step": 12250 }, { "epoch": 0.8326538931920098, "grad_norm": 2.143364667892456, "learning_rate": 0.0008959607283598315, "loss": 3.4024, "step": 12255 }, { "epoch": 0.8329936132626715, "grad_norm": 2.3278868198394775, "learning_rate": 0.0008959182633509988, "loss": 3.5421, "step": 12260 }, { "epoch": 0.8333333333333334, "grad_norm": 2.0980100631713867, "learning_rate": 0.000895875798342166, "loss": 3.7046, "step": 12265 }, { "epoch": 0.8336730534039951, "grad_norm": 2.457810878753662, "learning_rate": 0.0008958333333333334, "loss": 3.2412, "step": 12270 }, { "epoch": 0.8340127734746569, "grad_norm": 1.874558687210083, "learning_rate": 0.0008957908683245007, "loss": 3.7081, "step": 12275 }, { "epoch": 0.8343524935453187, "grad_norm": 1.941207766532898, "learning_rate": 0.0008957484033156679, "loss": 3.7245, "step": 12280 }, { "epoch": 0.8346922136159804, "grad_norm": 1.9928566217422485, "learning_rate": 0.0008957059383068352, "loss": 3.4548, "step": 12285 }, { "epoch": 0.8350319336866422, "grad_norm": 2.07574200630188, "learning_rate": 0.0008956634732980025, "loss": 3.5804, "step": 12290 }, { "epoch": 0.8353716537573039, "grad_norm": 1.8716129064559937, "learning_rate": 0.0008956210082891697, "loss": 3.4059, "step": 12295 }, { "epoch": 0.8357113738279658, "grad_norm": 1.8985776901245117, "learning_rate": 0.000895578543280337, "loss": 3.6951, "step": 12300 }, { "epoch": 0.8360510938986275, "grad_norm": 2.9675426483154297, "learning_rate": 0.0008955360782715043, "loss": 3.3709, "step": 12305 }, { "epoch": 0.8363908139692893, "grad_norm": 1.9848655462265015, "learning_rate": 0.0008954936132626716, "loss": 3.8111, "step": 12310 }, { "epoch": 0.8367305340399511, "grad_norm": 1.8711246252059937, "learning_rate": 0.0008954511482538389, "loss": 3.5166, "step": 12315 }, { "epoch": 0.8370702541106129, "grad_norm": 1.979178786277771, "learning_rate": 0.0008954086832450061, "loss": 3.7043, "step": 12320 }, { "epoch": 0.8374099741812746, "grad_norm": 1.7801852226257324, "learning_rate": 0.0008953662182361734, "loss": 3.6503, "step": 12325 }, { "epoch": 0.8377496942519363, "grad_norm": 1.8398452997207642, "learning_rate": 0.0008953237532273407, "loss": 3.238, "step": 12330 }, { "epoch": 0.8380894143225982, "grad_norm": 1.825632095336914, "learning_rate": 0.0008952812882185079, "loss": 3.7575, "step": 12335 }, { "epoch": 0.83842913439326, "grad_norm": 1.8405952453613281, "learning_rate": 0.0008952388232096753, "loss": 3.5723, "step": 12340 }, { "epoch": 0.8387688544639217, "grad_norm": 1.9175306558609009, "learning_rate": 0.0008951963582008426, "loss": 3.6023, "step": 12345 }, { "epoch": 0.8391085745345835, "grad_norm": 1.793798565864563, "learning_rate": 0.0008951538931920098, "loss": 3.4915, "step": 12350 }, { "epoch": 0.8394482946052453, "grad_norm": 1.7855476140975952, "learning_rate": 0.000895111428183177, "loss": 3.6193, "step": 12355 }, { "epoch": 0.839788014675907, "grad_norm": 1.9969016313552856, "learning_rate": 0.0008950689631743444, "loss": 3.52, "step": 12360 }, { "epoch": 0.8401277347465689, "grad_norm": 1.4823076725006104, "learning_rate": 0.0008950264981655116, "loss": 3.8175, "step": 12365 }, { "epoch": 0.8404674548172306, "grad_norm": 2.2174625396728516, "learning_rate": 0.0008949840331566788, "loss": 3.7436, "step": 12370 }, { "epoch": 0.8408071748878924, "grad_norm": 2.1262893676757812, "learning_rate": 0.0008949415681478463, "loss": 3.5657, "step": 12375 }, { "epoch": 0.8411468949585541, "grad_norm": 1.9660542011260986, "learning_rate": 0.0008948991031390135, "loss": 3.69, "step": 12380 }, { "epoch": 0.841486615029216, "grad_norm": 2.321239948272705, "learning_rate": 0.0008948566381301807, "loss": 3.8255, "step": 12385 }, { "epoch": 0.8418263350998777, "grad_norm": 1.9638482332229614, "learning_rate": 0.0008948141731213481, "loss": 3.453, "step": 12390 }, { "epoch": 0.8421660551705394, "grad_norm": 2.0547423362731934, "learning_rate": 0.0008947717081125153, "loss": 3.5388, "step": 12395 }, { "epoch": 0.8425057752412013, "grad_norm": 2.0282537937164307, "learning_rate": 0.0008947292431036825, "loss": 3.6852, "step": 12400 }, { "epoch": 0.842845495311863, "grad_norm": 2.03861403465271, "learning_rate": 0.0008946867780948498, "loss": 3.5382, "step": 12405 }, { "epoch": 0.8431852153825248, "grad_norm": 1.9999773502349854, "learning_rate": 0.0008946443130860172, "loss": 3.8035, "step": 12410 }, { "epoch": 0.8435249354531865, "grad_norm": 1.967223048210144, "learning_rate": 0.0008946018480771844, "loss": 3.7149, "step": 12415 }, { "epoch": 0.8438646555238484, "grad_norm": 2.0523531436920166, "learning_rate": 0.0008945593830683517, "loss": 3.8283, "step": 12420 }, { "epoch": 0.8442043755945101, "grad_norm": 2.1235415935516357, "learning_rate": 0.000894516918059519, "loss": 3.7062, "step": 12425 }, { "epoch": 0.8445440956651719, "grad_norm": 1.6314929723739624, "learning_rate": 0.0008944744530506862, "loss": 3.555, "step": 12430 }, { "epoch": 0.8448838157358337, "grad_norm": 2.0033817291259766, "learning_rate": 0.0008944319880418535, "loss": 3.7065, "step": 12435 }, { "epoch": 0.8452235358064955, "grad_norm": 2.430074691772461, "learning_rate": 0.0008943895230330207, "loss": 3.567, "step": 12440 }, { "epoch": 0.8455632558771572, "grad_norm": 1.7578620910644531, "learning_rate": 0.0008943470580241882, "loss": 3.5517, "step": 12445 }, { "epoch": 0.845902975947819, "grad_norm": 1.7230654954910278, "learning_rate": 0.0008943045930153554, "loss": 3.5455, "step": 12450 }, { "epoch": 0.8462426960184808, "grad_norm": 2.196265459060669, "learning_rate": 0.0008942621280065226, "loss": 3.8836, "step": 12455 }, { "epoch": 0.8465824160891425, "grad_norm": 2.0614888668060303, "learning_rate": 0.00089421966299769, "loss": 3.6058, "step": 12460 }, { "epoch": 0.8469221361598043, "grad_norm": 1.6073611974716187, "learning_rate": 0.0008941771979888572, "loss": 3.6754, "step": 12465 }, { "epoch": 0.8472618562304661, "grad_norm": 1.9645023345947266, "learning_rate": 0.0008941347329800244, "loss": 3.5771, "step": 12470 }, { "epoch": 0.8476015763011279, "grad_norm": 1.7098908424377441, "learning_rate": 0.0008940922679711918, "loss": 3.2358, "step": 12475 }, { "epoch": 0.8479412963717896, "grad_norm": 2.235793113708496, "learning_rate": 0.0008940498029623591, "loss": 3.4784, "step": 12480 }, { "epoch": 0.8482810164424515, "grad_norm": 1.8941439390182495, "learning_rate": 0.0008940073379535263, "loss": 3.5937, "step": 12485 }, { "epoch": 0.8486207365131132, "grad_norm": 1.681727647781372, "learning_rate": 0.0008939648729446937, "loss": 3.6285, "step": 12490 }, { "epoch": 0.848960456583775, "grad_norm": 1.69469153881073, "learning_rate": 0.0008939224079358609, "loss": 3.6285, "step": 12495 }, { "epoch": 0.8493001766544367, "grad_norm": 2.106505870819092, "learning_rate": 0.0008938799429270281, "loss": 3.5898, "step": 12500 }, { "epoch": 0.8496398967250985, "grad_norm": 1.9518529176712036, "learning_rate": 0.0008938374779181954, "loss": 3.6734, "step": 12505 }, { "epoch": 0.8499796167957603, "grad_norm": 2.518794298171997, "learning_rate": 0.0008937950129093627, "loss": 3.4634, "step": 12510 }, { "epoch": 0.850319336866422, "grad_norm": 2.2232234477996826, "learning_rate": 0.00089375254790053, "loss": 3.4785, "step": 12515 }, { "epoch": 0.8506590569370839, "grad_norm": 1.8501381874084473, "learning_rate": 0.0008937100828916973, "loss": 3.6542, "step": 12520 }, { "epoch": 0.8509987770077456, "grad_norm": 2.3830342292785645, "learning_rate": 0.0008936676178828646, "loss": 3.613, "step": 12525 }, { "epoch": 0.8513384970784074, "grad_norm": 1.9587599039077759, "learning_rate": 0.0008936251528740318, "loss": 3.7966, "step": 12530 }, { "epoch": 0.8516782171490692, "grad_norm": 1.9891637563705444, "learning_rate": 0.0008935826878651991, "loss": 3.792, "step": 12535 }, { "epoch": 0.852017937219731, "grad_norm": 1.896682620048523, "learning_rate": 0.0008935402228563663, "loss": 3.6603, "step": 12540 }, { "epoch": 0.8523576572903927, "grad_norm": 1.7955166101455688, "learning_rate": 0.0008934977578475336, "loss": 3.7256, "step": 12545 }, { "epoch": 0.8526973773610544, "grad_norm": 1.7587004899978638, "learning_rate": 0.000893455292838701, "loss": 3.694, "step": 12550 }, { "epoch": 0.8530370974317163, "grad_norm": 2.13004469871521, "learning_rate": 0.0008934128278298682, "loss": 3.6144, "step": 12555 }, { "epoch": 0.853376817502378, "grad_norm": 2.173023223876953, "learning_rate": 0.0008933703628210355, "loss": 3.4421, "step": 12560 }, { "epoch": 0.8537165375730398, "grad_norm": 1.587682843208313, "learning_rate": 0.0008933278978122028, "loss": 3.8937, "step": 12565 }, { "epoch": 0.8540562576437016, "grad_norm": 2.426766872406006, "learning_rate": 0.00089328543280337, "loss": 3.465, "step": 12570 }, { "epoch": 0.8543959777143634, "grad_norm": 1.7623871564865112, "learning_rate": 0.0008932429677945373, "loss": 3.698, "step": 12575 }, { "epoch": 0.8547356977850251, "grad_norm": 2.0152108669281006, "learning_rate": 0.0008932005027857046, "loss": 3.6132, "step": 12580 }, { "epoch": 0.8550754178556869, "grad_norm": 1.8599278926849365, "learning_rate": 0.0008931580377768719, "loss": 3.3289, "step": 12585 }, { "epoch": 0.8554151379263487, "grad_norm": 1.7364797592163086, "learning_rate": 0.0008931155727680392, "loss": 3.5962, "step": 12590 }, { "epoch": 0.8557548579970105, "grad_norm": 1.7669225931167603, "learning_rate": 0.0008930731077592065, "loss": 3.7261, "step": 12595 }, { "epoch": 0.8560945780676722, "grad_norm": 1.9251421689987183, "learning_rate": 0.0008930306427503737, "loss": 3.7453, "step": 12600 }, { "epoch": 0.856434298138334, "grad_norm": 2.1037094593048096, "learning_rate": 0.0008929881777415409, "loss": 3.3429, "step": 12605 }, { "epoch": 0.8567740182089958, "grad_norm": 1.521697759628296, "learning_rate": 0.0008929457127327083, "loss": 3.5546, "step": 12610 }, { "epoch": 0.8571137382796575, "grad_norm": 1.8826313018798828, "learning_rate": 0.0008929032477238755, "loss": 3.5577, "step": 12615 }, { "epoch": 0.8574534583503194, "grad_norm": 2.170121669769287, "learning_rate": 0.0008928607827150428, "loss": 3.8613, "step": 12620 }, { "epoch": 0.8577931784209811, "grad_norm": 2.0297653675079346, "learning_rate": 0.0008928183177062102, "loss": 3.7788, "step": 12625 }, { "epoch": 0.8581328984916429, "grad_norm": 1.7449734210968018, "learning_rate": 0.0008927758526973774, "loss": 3.7532, "step": 12630 }, { "epoch": 0.8584726185623046, "grad_norm": 2.8665409088134766, "learning_rate": 0.0008927333876885446, "loss": 3.3919, "step": 12635 }, { "epoch": 0.8588123386329665, "grad_norm": 1.9297112226486206, "learning_rate": 0.000892690922679712, "loss": 3.6775, "step": 12640 }, { "epoch": 0.8591520587036282, "grad_norm": 1.4432010650634766, "learning_rate": 0.0008926484576708792, "loss": 3.733, "step": 12645 }, { "epoch": 0.85949177877429, "grad_norm": 1.9161884784698486, "learning_rate": 0.0008926059926620464, "loss": 3.599, "step": 12650 }, { "epoch": 0.8598314988449518, "grad_norm": 1.820621371269226, "learning_rate": 0.0008925635276532138, "loss": 3.6391, "step": 12655 }, { "epoch": 0.8601712189156135, "grad_norm": 1.769112229347229, "learning_rate": 0.0008925210626443811, "loss": 3.5023, "step": 12660 }, { "epoch": 0.8605109389862753, "grad_norm": 1.94588041305542, "learning_rate": 0.0008924785976355483, "loss": 3.6158, "step": 12665 }, { "epoch": 0.860850659056937, "grad_norm": 1.8630372285842896, "learning_rate": 0.0008924361326267156, "loss": 3.4955, "step": 12670 }, { "epoch": 0.8611903791275989, "grad_norm": 1.430772066116333, "learning_rate": 0.0008923936676178829, "loss": 3.5106, "step": 12675 }, { "epoch": 0.8615300991982606, "grad_norm": 2.0313425064086914, "learning_rate": 0.0008923512026090501, "loss": 3.5306, "step": 12680 }, { "epoch": 0.8618698192689224, "grad_norm": 1.9828473329544067, "learning_rate": 0.0008923087376002174, "loss": 3.5904, "step": 12685 }, { "epoch": 0.8622095393395842, "grad_norm": 2.2052927017211914, "learning_rate": 0.0008922662725913848, "loss": 3.365, "step": 12690 }, { "epoch": 0.862549259410246, "grad_norm": 1.890277624130249, "learning_rate": 0.000892223807582552, "loss": 3.7296, "step": 12695 }, { "epoch": 0.8628889794809077, "grad_norm": 2.4740495681762695, "learning_rate": 0.0008921813425737193, "loss": 3.6153, "step": 12700 }, { "epoch": 0.8632286995515696, "grad_norm": 1.5340805053710938, "learning_rate": 0.0008921388775648865, "loss": 3.4461, "step": 12705 }, { "epoch": 0.8635684196222313, "grad_norm": 2.006525754928589, "learning_rate": 0.0008920964125560538, "loss": 3.4266, "step": 12710 }, { "epoch": 0.863908139692893, "grad_norm": 2.403287172317505, "learning_rate": 0.0008920539475472211, "loss": 3.8024, "step": 12715 }, { "epoch": 0.8642478597635548, "grad_norm": 1.5612201690673828, "learning_rate": 0.0008920114825383883, "loss": 3.7225, "step": 12720 }, { "epoch": 0.8645875798342166, "grad_norm": 1.6602909564971924, "learning_rate": 0.0008919690175295557, "loss": 3.7067, "step": 12725 }, { "epoch": 0.8649272999048784, "grad_norm": 1.568968653678894, "learning_rate": 0.000891926552520723, "loss": 3.57, "step": 12730 }, { "epoch": 0.8652670199755401, "grad_norm": 2.125265598297119, "learning_rate": 0.0008918840875118902, "loss": 3.8306, "step": 12735 }, { "epoch": 0.865606740046202, "grad_norm": 2.179769515991211, "learning_rate": 0.0008918416225030574, "loss": 3.7185, "step": 12740 }, { "epoch": 0.8659464601168637, "grad_norm": 2.0790231227874756, "learning_rate": 0.0008917991574942248, "loss": 3.7594, "step": 12745 }, { "epoch": 0.8662861801875255, "grad_norm": 1.8237594366073608, "learning_rate": 0.000891756692485392, "loss": 3.6818, "step": 12750 }, { "epoch": 0.8666259002581872, "grad_norm": 1.8147236108779907, "learning_rate": 0.0008917142274765592, "loss": 3.5427, "step": 12755 }, { "epoch": 0.866965620328849, "grad_norm": 2.0151071548461914, "learning_rate": 0.0008916717624677267, "loss": 3.4867, "step": 12760 }, { "epoch": 0.8673053403995108, "grad_norm": 1.7541842460632324, "learning_rate": 0.0008916292974588939, "loss": 3.609, "step": 12765 }, { "epoch": 0.8676450604701725, "grad_norm": 1.7681385278701782, "learning_rate": 0.0008915868324500611, "loss": 3.6899, "step": 12770 }, { "epoch": 0.8679847805408344, "grad_norm": 1.777166724205017, "learning_rate": 0.0008915443674412285, "loss": 3.785, "step": 12775 }, { "epoch": 0.8683245006114961, "grad_norm": 1.647775411605835, "learning_rate": 0.0008915019024323957, "loss": 3.5424, "step": 12780 }, { "epoch": 0.8686642206821579, "grad_norm": 2.0175790786743164, "learning_rate": 0.000891459437423563, "loss": 3.7884, "step": 12785 }, { "epoch": 0.8690039407528197, "grad_norm": 2.0764808654785156, "learning_rate": 0.0008914169724147302, "loss": 3.5568, "step": 12790 }, { "epoch": 0.8693436608234815, "grad_norm": 1.546127438545227, "learning_rate": 0.0008913745074058976, "loss": 3.6268, "step": 12795 }, { "epoch": 0.8696833808941432, "grad_norm": 1.8093070983886719, "learning_rate": 0.0008913320423970649, "loss": 3.7151, "step": 12800 }, { "epoch": 0.870023100964805, "grad_norm": 1.7161674499511719, "learning_rate": 0.0008912895773882321, "loss": 3.7675, "step": 12805 }, { "epoch": 0.8703628210354668, "grad_norm": 2.032407283782959, "learning_rate": 0.0008912471123793994, "loss": 3.8253, "step": 12810 }, { "epoch": 0.8707025411061285, "grad_norm": 2.032160758972168, "learning_rate": 0.0008912046473705667, "loss": 3.5872, "step": 12815 }, { "epoch": 0.8710422611767903, "grad_norm": 1.6790246963500977, "learning_rate": 0.0008911621823617339, "loss": 3.6324, "step": 12820 }, { "epoch": 0.8713819812474521, "grad_norm": 2.17712664604187, "learning_rate": 0.0008911197173529012, "loss": 3.8341, "step": 12825 }, { "epoch": 0.8717217013181139, "grad_norm": 1.6422011852264404, "learning_rate": 0.0008910772523440686, "loss": 3.5551, "step": 12830 }, { "epoch": 0.8720614213887756, "grad_norm": 1.4484297037124634, "learning_rate": 0.0008910347873352358, "loss": 3.4272, "step": 12835 }, { "epoch": 0.8724011414594374, "grad_norm": 2.1743156909942627, "learning_rate": 0.000890992322326403, "loss": 3.5601, "step": 12840 }, { "epoch": 0.8727408615300992, "grad_norm": 1.7776672840118408, "learning_rate": 0.0008909498573175704, "loss": 3.5911, "step": 12845 }, { "epoch": 0.873080581600761, "grad_norm": 1.7769372463226318, "learning_rate": 0.0008909073923087376, "loss": 3.7596, "step": 12850 }, { "epoch": 0.8734203016714227, "grad_norm": 1.6966371536254883, "learning_rate": 0.0008908649272999048, "loss": 3.7335, "step": 12855 }, { "epoch": 0.8737600217420846, "grad_norm": 1.681147575378418, "learning_rate": 0.0008908224622910723, "loss": 3.588, "step": 12860 }, { "epoch": 0.8740997418127463, "grad_norm": 1.6719528436660767, "learning_rate": 0.0008907799972822395, "loss": 3.4875, "step": 12865 }, { "epoch": 0.874439461883408, "grad_norm": 2.9050405025482178, "learning_rate": 0.0008907375322734067, "loss": 3.3931, "step": 12870 }, { "epoch": 0.8747791819540699, "grad_norm": 1.8222780227661133, "learning_rate": 0.0008906950672645741, "loss": 3.6372, "step": 12875 }, { "epoch": 0.8751189020247316, "grad_norm": 1.8438986539840698, "learning_rate": 0.0008906526022557413, "loss": 3.3383, "step": 12880 }, { "epoch": 0.8754586220953934, "grad_norm": 1.9417293071746826, "learning_rate": 0.0008906101372469085, "loss": 3.6433, "step": 12885 }, { "epoch": 0.8757983421660551, "grad_norm": 1.4983652830123901, "learning_rate": 0.0008905676722380758, "loss": 3.4755, "step": 12890 }, { "epoch": 0.876138062236717, "grad_norm": 1.910403847694397, "learning_rate": 0.0008905252072292432, "loss": 3.6636, "step": 12895 }, { "epoch": 0.8764777823073787, "grad_norm": 1.7070621252059937, "learning_rate": 0.0008904827422204104, "loss": 3.7224, "step": 12900 }, { "epoch": 0.8768175023780405, "grad_norm": 2.128110647201538, "learning_rate": 0.0008904402772115777, "loss": 3.5332, "step": 12905 }, { "epoch": 0.8771572224487023, "grad_norm": 2.6459951400756836, "learning_rate": 0.000890397812202745, "loss": 3.6385, "step": 12910 }, { "epoch": 0.877496942519364, "grad_norm": 1.7597079277038574, "learning_rate": 0.0008903553471939122, "loss": 3.3978, "step": 12915 }, { "epoch": 0.8778366625900258, "grad_norm": 1.8489024639129639, "learning_rate": 0.0008903128821850795, "loss": 3.5945, "step": 12920 }, { "epoch": 0.8781763826606876, "grad_norm": 2.2630090713500977, "learning_rate": 0.0008902704171762468, "loss": 3.3778, "step": 12925 }, { "epoch": 0.8785161027313494, "grad_norm": 2.47971510887146, "learning_rate": 0.0008902279521674141, "loss": 3.6174, "step": 12930 }, { "epoch": 0.8788558228020111, "grad_norm": 2.06284499168396, "learning_rate": 0.0008901854871585814, "loss": 3.5391, "step": 12935 }, { "epoch": 0.8791955428726729, "grad_norm": 2.1338679790496826, "learning_rate": 0.0008901430221497486, "loss": 3.6347, "step": 12940 }, { "epoch": 0.8795352629433347, "grad_norm": 1.5991894006729126, "learning_rate": 0.0008901005571409159, "loss": 3.5886, "step": 12945 }, { "epoch": 0.8798749830139965, "grad_norm": 1.5717995166778564, "learning_rate": 0.0008900580921320832, "loss": 3.3454, "step": 12950 }, { "epoch": 0.8802147030846582, "grad_norm": 2.0489213466644287, "learning_rate": 0.0008900156271232504, "loss": 3.6912, "step": 12955 }, { "epoch": 0.8805544231553201, "grad_norm": 1.855434536933899, "learning_rate": 0.0008899731621144177, "loss": 3.5657, "step": 12960 }, { "epoch": 0.8808941432259818, "grad_norm": 2.004754066467285, "learning_rate": 0.0008899306971055851, "loss": 3.7392, "step": 12965 }, { "epoch": 0.8812338632966435, "grad_norm": 1.8590043783187866, "learning_rate": 0.0008898882320967523, "loss": 3.6467, "step": 12970 }, { "epoch": 0.8815735833673053, "grad_norm": 1.9889534711837769, "learning_rate": 0.0008898457670879196, "loss": 3.5704, "step": 12975 }, { "epoch": 0.8819133034379671, "grad_norm": 1.9082046747207642, "learning_rate": 0.0008898033020790869, "loss": 3.7537, "step": 12980 }, { "epoch": 0.8822530235086289, "grad_norm": 2.1489760875701904, "learning_rate": 0.0008897608370702541, "loss": 3.5755, "step": 12985 }, { "epoch": 0.8825927435792906, "grad_norm": 2.673567771911621, "learning_rate": 0.0008897183720614213, "loss": 3.7259, "step": 12990 }, { "epoch": 0.8829324636499525, "grad_norm": 2.063520908355713, "learning_rate": 0.0008896759070525887, "loss": 3.6788, "step": 12995 }, { "epoch": 0.8832721837206142, "grad_norm": 1.8139513731002808, "learning_rate": 0.000889633442043756, "loss": 3.5827, "step": 13000 }, { "epoch": 0.883611903791276, "grad_norm": 2.9265592098236084, "learning_rate": 0.0008895909770349232, "loss": 3.6949, "step": 13005 }, { "epoch": 0.8839516238619378, "grad_norm": 1.717988133430481, "learning_rate": 0.0008895485120260906, "loss": 3.3825, "step": 13010 }, { "epoch": 0.8842913439325996, "grad_norm": 1.6232023239135742, "learning_rate": 0.0008895060470172578, "loss": 3.4326, "step": 13015 }, { "epoch": 0.8846310640032613, "grad_norm": 1.5751674175262451, "learning_rate": 0.000889463582008425, "loss": 3.4144, "step": 13020 }, { "epoch": 0.884970784073923, "grad_norm": 2.2014694213867188, "learning_rate": 0.0008894211169995924, "loss": 3.4756, "step": 13025 }, { "epoch": 0.8853105041445849, "grad_norm": 2.0370266437530518, "learning_rate": 0.0008893786519907596, "loss": 3.4851, "step": 13030 }, { "epoch": 0.8856502242152466, "grad_norm": 2.116920232772827, "learning_rate": 0.0008893361869819269, "loss": 3.6312, "step": 13035 }, { "epoch": 0.8859899442859084, "grad_norm": 1.9668282270431519, "learning_rate": 0.0008892937219730942, "loss": 3.4707, "step": 13040 }, { "epoch": 0.8863296643565702, "grad_norm": 1.5540945529937744, "learning_rate": 0.0008892512569642615, "loss": 3.5649, "step": 13045 }, { "epoch": 0.886669384427232, "grad_norm": 2.0075972080230713, "learning_rate": 0.0008892087919554287, "loss": 3.7267, "step": 13050 }, { "epoch": 0.8870091044978937, "grad_norm": 1.8787798881530762, "learning_rate": 0.000889166326946596, "loss": 3.5244, "step": 13055 }, { "epoch": 0.8873488245685555, "grad_norm": 2.2842817306518555, "learning_rate": 0.0008891238619377633, "loss": 3.569, "step": 13060 }, { "epoch": 0.8876885446392173, "grad_norm": 1.7459248304367065, "learning_rate": 0.0008890813969289305, "loss": 3.6099, "step": 13065 }, { "epoch": 0.8880282647098791, "grad_norm": 2.314497232437134, "learning_rate": 0.0008890389319200979, "loss": 3.5207, "step": 13070 }, { "epoch": 0.8883679847805408, "grad_norm": 1.754669189453125, "learning_rate": 0.0008889964669112652, "loss": 3.6565, "step": 13075 }, { "epoch": 0.8887077048512027, "grad_norm": 1.7657948732376099, "learning_rate": 0.0008889540019024324, "loss": 3.65, "step": 13080 }, { "epoch": 0.8890474249218644, "grad_norm": 1.9539648294448853, "learning_rate": 0.0008889115368935997, "loss": 3.4875, "step": 13085 }, { "epoch": 0.8893871449925261, "grad_norm": 1.8267979621887207, "learning_rate": 0.0008888690718847669, "loss": 3.596, "step": 13090 }, { "epoch": 0.889726865063188, "grad_norm": 1.9213993549346924, "learning_rate": 0.0008888266068759342, "loss": 3.6109, "step": 13095 }, { "epoch": 0.8900665851338497, "grad_norm": 1.641018033027649, "learning_rate": 0.0008887841418671015, "loss": 3.5756, "step": 13100 }, { "epoch": 0.8904063052045115, "grad_norm": 2.19036865234375, "learning_rate": 0.0008887416768582688, "loss": 3.6383, "step": 13105 }, { "epoch": 0.8907460252751732, "grad_norm": 2.077855110168457, "learning_rate": 0.0008886992118494361, "loss": 3.6584, "step": 13110 }, { "epoch": 0.8910857453458351, "grad_norm": 1.8863942623138428, "learning_rate": 0.0008886567468406034, "loss": 3.5956, "step": 13115 }, { "epoch": 0.8914254654164968, "grad_norm": 1.8423131704330444, "learning_rate": 0.0008886142818317706, "loss": 3.7362, "step": 13120 }, { "epoch": 0.8917651854871586, "grad_norm": 1.7347972393035889, "learning_rate": 0.000888571816822938, "loss": 3.3853, "step": 13125 }, { "epoch": 0.8921049055578204, "grad_norm": 3.473407506942749, "learning_rate": 0.0008885293518141052, "loss": 3.3932, "step": 13130 }, { "epoch": 0.8924446256284821, "grad_norm": 2.205472469329834, "learning_rate": 0.0008884868868052724, "loss": 3.559, "step": 13135 }, { "epoch": 0.8927843456991439, "grad_norm": 1.730684518814087, "learning_rate": 0.0008884444217964399, "loss": 3.5875, "step": 13140 }, { "epoch": 0.8931240657698056, "grad_norm": 1.8321517705917358, "learning_rate": 0.0008884019567876071, "loss": 3.7624, "step": 13145 }, { "epoch": 0.8934637858404675, "grad_norm": 2.4136242866516113, "learning_rate": 0.0008883594917787743, "loss": 3.6105, "step": 13150 }, { "epoch": 0.8938035059111292, "grad_norm": 1.615853190422058, "learning_rate": 0.0008883170267699416, "loss": 3.8547, "step": 13155 }, { "epoch": 0.894143225981791, "grad_norm": 1.8485841751098633, "learning_rate": 0.0008882745617611089, "loss": 3.2517, "step": 13160 }, { "epoch": 0.8944829460524528, "grad_norm": 1.9229202270507812, "learning_rate": 0.0008882320967522761, "loss": 3.4343, "step": 13165 }, { "epoch": 0.8948226661231146, "grad_norm": 1.8365297317504883, "learning_rate": 0.0008881896317434434, "loss": 3.4552, "step": 13170 }, { "epoch": 0.8951623861937763, "grad_norm": 1.3263647556304932, "learning_rate": 0.0008881471667346108, "loss": 3.6461, "step": 13175 }, { "epoch": 0.8955021062644382, "grad_norm": 2.1813788414001465, "learning_rate": 0.000888104701725778, "loss": 3.6523, "step": 13180 }, { "epoch": 0.8958418263350999, "grad_norm": 2.0149848461151123, "learning_rate": 0.0008880622367169453, "loss": 3.8759, "step": 13185 }, { "epoch": 0.8961815464057616, "grad_norm": 1.6331664323806763, "learning_rate": 0.0008880197717081125, "loss": 3.5009, "step": 13190 }, { "epoch": 0.8965212664764234, "grad_norm": 2.139089584350586, "learning_rate": 0.0008879773066992798, "loss": 3.7528, "step": 13195 }, { "epoch": 0.8968609865470852, "grad_norm": 1.8027629852294922, "learning_rate": 0.0008879348416904471, "loss": 3.5903, "step": 13200 }, { "epoch": 0.897200706617747, "grad_norm": 1.75471830368042, "learning_rate": 0.0008878923766816143, "loss": 3.5179, "step": 13205 }, { "epoch": 0.8975404266884087, "grad_norm": 1.9526591300964355, "learning_rate": 0.0008878499116727817, "loss": 3.6201, "step": 13210 }, { "epoch": 0.8978801467590706, "grad_norm": 2.033658504486084, "learning_rate": 0.000887807446663949, "loss": 3.4759, "step": 13215 }, { "epoch": 0.8982198668297323, "grad_norm": 1.9054367542266846, "learning_rate": 0.0008877649816551162, "loss": 3.5584, "step": 13220 }, { "epoch": 0.8985595869003941, "grad_norm": 2.121842622756958, "learning_rate": 0.0008877225166462834, "loss": 3.8136, "step": 13225 }, { "epoch": 0.8988993069710558, "grad_norm": 2.0208425521850586, "learning_rate": 0.0008876800516374508, "loss": 3.8139, "step": 13230 }, { "epoch": 0.8992390270417177, "grad_norm": 1.9835959672927856, "learning_rate": 0.000887637586628618, "loss": 3.5541, "step": 13235 }, { "epoch": 0.8995787471123794, "grad_norm": 1.6149407625198364, "learning_rate": 0.0008875951216197852, "loss": 3.7272, "step": 13240 }, { "epoch": 0.8999184671830411, "grad_norm": 2.052929162979126, "learning_rate": 0.0008875526566109527, "loss": 3.3883, "step": 13245 }, { "epoch": 0.900258187253703, "grad_norm": 2.203157663345337, "learning_rate": 0.0008875101916021199, "loss": 3.5602, "step": 13250 }, { "epoch": 0.9005979073243647, "grad_norm": 1.7505279779434204, "learning_rate": 0.0008874677265932871, "loss": 3.5747, "step": 13255 }, { "epoch": 0.9009376273950265, "grad_norm": 1.9119298458099365, "learning_rate": 0.0008874252615844545, "loss": 3.5364, "step": 13260 }, { "epoch": 0.9012773474656883, "grad_norm": 1.6238698959350586, "learning_rate": 0.0008873827965756217, "loss": 3.5656, "step": 13265 }, { "epoch": 0.9016170675363501, "grad_norm": 2.0554749965667725, "learning_rate": 0.0008873403315667889, "loss": 3.6541, "step": 13270 }, { "epoch": 0.9019567876070118, "grad_norm": 2.1767964363098145, "learning_rate": 0.0008872978665579562, "loss": 3.7127, "step": 13275 }, { "epoch": 0.9022965076776736, "grad_norm": 2.3297646045684814, "learning_rate": 0.0008872554015491236, "loss": 3.9905, "step": 13280 }, { "epoch": 0.9026362277483354, "grad_norm": 1.5422009229660034, "learning_rate": 0.0008872129365402908, "loss": 3.6083, "step": 13285 }, { "epoch": 0.9029759478189971, "grad_norm": 2.25058913230896, "learning_rate": 0.0008871704715314581, "loss": 3.5854, "step": 13290 }, { "epoch": 0.9033156678896589, "grad_norm": 1.7555556297302246, "learning_rate": 0.0008871280065226254, "loss": 3.8756, "step": 13295 }, { "epoch": 0.9036553879603207, "grad_norm": 1.8697395324707031, "learning_rate": 0.0008870855415137926, "loss": 3.6593, "step": 13300 }, { "epoch": 0.9039951080309825, "grad_norm": 1.8998295068740845, "learning_rate": 0.0008870430765049599, "loss": 3.4543, "step": 13305 }, { "epoch": 0.9043348281016442, "grad_norm": 1.6896958351135254, "learning_rate": 0.0008870006114961272, "loss": 3.7213, "step": 13310 }, { "epoch": 0.904674548172306, "grad_norm": 1.512308955192566, "learning_rate": 0.0008869581464872945, "loss": 3.7974, "step": 13315 }, { "epoch": 0.9050142682429678, "grad_norm": 1.7283625602722168, "learning_rate": 0.0008869156814784618, "loss": 3.3793, "step": 13320 }, { "epoch": 0.9053539883136296, "grad_norm": 1.7798125743865967, "learning_rate": 0.000886873216469629, "loss": 3.3865, "step": 13325 }, { "epoch": 0.9056937083842913, "grad_norm": 2.0875632762908936, "learning_rate": 0.0008868307514607963, "loss": 3.5798, "step": 13330 }, { "epoch": 0.9060334284549532, "grad_norm": 1.897408127784729, "learning_rate": 0.0008867882864519636, "loss": 3.7963, "step": 13335 }, { "epoch": 0.9063731485256149, "grad_norm": 1.9939271211624146, "learning_rate": 0.0008867458214431308, "loss": 3.554, "step": 13340 }, { "epoch": 0.9067128685962766, "grad_norm": 1.61286461353302, "learning_rate": 0.0008867033564342981, "loss": 3.6109, "step": 13345 }, { "epoch": 0.9070525886669385, "grad_norm": 2.2304017543792725, "learning_rate": 0.0008866608914254655, "loss": 3.6837, "step": 13350 }, { "epoch": 0.9073923087376002, "grad_norm": 1.6856931447982788, "learning_rate": 0.0008866184264166327, "loss": 3.5988, "step": 13355 }, { "epoch": 0.907732028808262, "grad_norm": 2.034062385559082, "learning_rate": 0.0008865759614078, "loss": 3.5211, "step": 13360 }, { "epoch": 0.9080717488789237, "grad_norm": 2.0809788703918457, "learning_rate": 0.0008865334963989673, "loss": 3.4507, "step": 13365 }, { "epoch": 0.9084114689495856, "grad_norm": 2.0526952743530273, "learning_rate": 0.0008864910313901345, "loss": 3.6237, "step": 13370 }, { "epoch": 0.9087511890202473, "grad_norm": 1.5799039602279663, "learning_rate": 0.0008864485663813017, "loss": 3.1996, "step": 13375 }, { "epoch": 0.9090909090909091, "grad_norm": 1.852223515510559, "learning_rate": 0.0008864061013724691, "loss": 3.8055, "step": 13380 }, { "epoch": 0.9094306291615709, "grad_norm": 1.6107300519943237, "learning_rate": 0.0008863636363636364, "loss": 3.7531, "step": 13385 }, { "epoch": 0.9097703492322327, "grad_norm": 2.370051383972168, "learning_rate": 0.0008863211713548036, "loss": 3.792, "step": 13390 }, { "epoch": 0.9101100693028944, "grad_norm": 1.7845580577850342, "learning_rate": 0.000886278706345971, "loss": 3.4174, "step": 13395 }, { "epoch": 0.9104497893735561, "grad_norm": 2.1022050380706787, "learning_rate": 0.0008862362413371382, "loss": 3.4066, "step": 13400 }, { "epoch": 0.910789509444218, "grad_norm": 2.1295151710510254, "learning_rate": 0.0008861937763283054, "loss": 3.9671, "step": 13405 }, { "epoch": 0.9111292295148797, "grad_norm": 1.6212434768676758, "learning_rate": 0.0008861513113194728, "loss": 3.3923, "step": 13410 }, { "epoch": 0.9114689495855415, "grad_norm": 1.434550404548645, "learning_rate": 0.00088610884631064, "loss": 3.5857, "step": 13415 }, { "epoch": 0.9118086696562033, "grad_norm": 1.7258330583572388, "learning_rate": 0.0008860663813018073, "loss": 3.811, "step": 13420 }, { "epoch": 0.9121483897268651, "grad_norm": 1.9021703004837036, "learning_rate": 0.0008860239162929747, "loss": 3.5321, "step": 13425 }, { "epoch": 0.9124881097975268, "grad_norm": 1.9636627435684204, "learning_rate": 0.0008859814512841419, "loss": 3.3819, "step": 13430 }, { "epoch": 0.9128278298681887, "grad_norm": 2.261352300643921, "learning_rate": 0.0008859389862753091, "loss": 3.5591, "step": 13435 }, { "epoch": 0.9131675499388504, "grad_norm": 2.2179672718048096, "learning_rate": 0.0008858965212664764, "loss": 3.4752, "step": 13440 }, { "epoch": 0.9135072700095122, "grad_norm": 1.8240618705749512, "learning_rate": 0.0008858540562576437, "loss": 3.4885, "step": 13445 }, { "epoch": 0.9138469900801739, "grad_norm": 1.853271722793579, "learning_rate": 0.0008858115912488109, "loss": 3.3787, "step": 13450 }, { "epoch": 0.9141867101508357, "grad_norm": 1.7167638540267944, "learning_rate": 0.0008857691262399783, "loss": 3.5508, "step": 13455 }, { "epoch": 0.9145264302214975, "grad_norm": 2.171039342880249, "learning_rate": 0.0008857266612311456, "loss": 3.7785, "step": 13460 }, { "epoch": 0.9148661502921592, "grad_norm": 1.9932540655136108, "learning_rate": 0.0008856841962223129, "loss": 3.6196, "step": 13465 }, { "epoch": 0.9152058703628211, "grad_norm": 1.964184284210205, "learning_rate": 0.0008856417312134801, "loss": 3.7306, "step": 13470 }, { "epoch": 0.9155455904334828, "grad_norm": 1.671668529510498, "learning_rate": 0.0008855992662046473, "loss": 3.5636, "step": 13475 }, { "epoch": 0.9158853105041446, "grad_norm": 2.1175734996795654, "learning_rate": 0.0008855568011958147, "loss": 3.4543, "step": 13480 }, { "epoch": 0.9162250305748063, "grad_norm": 1.6319934129714966, "learning_rate": 0.000885514336186982, "loss": 3.5034, "step": 13485 }, { "epoch": 0.9165647506454682, "grad_norm": 2.081697940826416, "learning_rate": 0.0008854718711781492, "loss": 3.6706, "step": 13490 }, { "epoch": 0.9169044707161299, "grad_norm": 2.2833549976348877, "learning_rate": 0.0008854294061693166, "loss": 3.643, "step": 13495 }, { "epoch": 0.9172441907867916, "grad_norm": 1.9374768733978271, "learning_rate": 0.0008853869411604838, "loss": 3.5468, "step": 13500 }, { "epoch": 0.9175839108574535, "grad_norm": 1.8699275255203247, "learning_rate": 0.000885344476151651, "loss": 3.7279, "step": 13505 }, { "epoch": 0.9179236309281152, "grad_norm": 1.6213719844818115, "learning_rate": 0.0008853020111428184, "loss": 3.8478, "step": 13510 }, { "epoch": 0.918263350998777, "grad_norm": 1.7526793479919434, "learning_rate": 0.0008852595461339856, "loss": 3.1716, "step": 13515 }, { "epoch": 0.9186030710694388, "grad_norm": 2.869647741317749, "learning_rate": 0.0008852170811251529, "loss": 3.6699, "step": 13520 }, { "epoch": 0.9189427911401006, "grad_norm": 1.7590012550354004, "learning_rate": 0.0008851746161163203, "loss": 3.5669, "step": 13525 }, { "epoch": 0.9192825112107623, "grad_norm": 1.8642826080322266, "learning_rate": 0.0008851321511074875, "loss": 3.3074, "step": 13530 }, { "epoch": 0.9196222312814241, "grad_norm": 1.849387526512146, "learning_rate": 0.0008850896860986547, "loss": 3.788, "step": 13535 }, { "epoch": 0.9199619513520859, "grad_norm": 1.8125319480895996, "learning_rate": 0.000885047221089822, "loss": 3.8704, "step": 13540 }, { "epoch": 0.9203016714227477, "grad_norm": 2.1393861770629883, "learning_rate": 0.0008850047560809893, "loss": 3.5019, "step": 13545 }, { "epoch": 0.9206413914934094, "grad_norm": 1.8362094163894653, "learning_rate": 0.0008849622910721565, "loss": 3.7301, "step": 13550 }, { "epoch": 0.9209811115640713, "grad_norm": 2.0984482765197754, "learning_rate": 0.0008849198260633239, "loss": 3.6949, "step": 13555 }, { "epoch": 0.921320831634733, "grad_norm": 1.9681694507598877, "learning_rate": 0.0008848773610544912, "loss": 3.5676, "step": 13560 }, { "epoch": 0.9216605517053947, "grad_norm": 1.9857286214828491, "learning_rate": 0.0008848348960456584, "loss": 3.639, "step": 13565 }, { "epoch": 0.9220002717760565, "grad_norm": 1.626232624053955, "learning_rate": 0.0008847924310368257, "loss": 3.689, "step": 13570 }, { "epoch": 0.9223399918467183, "grad_norm": 1.665917158126831, "learning_rate": 0.000884749966027993, "loss": 3.5655, "step": 13575 }, { "epoch": 0.9226797119173801, "grad_norm": 1.9065200090408325, "learning_rate": 0.0008847075010191602, "loss": 3.5059, "step": 13580 }, { "epoch": 0.9230194319880418, "grad_norm": 1.939616322517395, "learning_rate": 0.0008846650360103275, "loss": 3.3711, "step": 13585 }, { "epoch": 0.9233591520587037, "grad_norm": 1.6312322616577148, "learning_rate": 0.0008846225710014948, "loss": 3.5409, "step": 13590 }, { "epoch": 0.9236988721293654, "grad_norm": 1.8673675060272217, "learning_rate": 0.0008845801059926621, "loss": 3.5966, "step": 13595 }, { "epoch": 0.9240385922000272, "grad_norm": 1.6026567220687866, "learning_rate": 0.0008845376409838294, "loss": 3.4823, "step": 13600 }, { "epoch": 0.924378312270689, "grad_norm": 1.951188087463379, "learning_rate": 0.0008844951759749966, "loss": 3.7312, "step": 13605 }, { "epoch": 0.9247180323413507, "grad_norm": 1.6437736749649048, "learning_rate": 0.0008844527109661639, "loss": 3.671, "step": 13610 }, { "epoch": 0.9250577524120125, "grad_norm": 2.0472586154937744, "learning_rate": 0.0008844102459573312, "loss": 3.7057, "step": 13615 }, { "epoch": 0.9253974724826742, "grad_norm": 1.9159362316131592, "learning_rate": 0.0008843677809484984, "loss": 3.6665, "step": 13620 }, { "epoch": 0.9257371925533361, "grad_norm": 1.3971366882324219, "learning_rate": 0.0008843253159396657, "loss": 3.6998, "step": 13625 }, { "epoch": 0.9260769126239978, "grad_norm": 1.3022412061691284, "learning_rate": 0.0008842828509308331, "loss": 3.5168, "step": 13630 }, { "epoch": 0.9264166326946596, "grad_norm": 1.7126657962799072, "learning_rate": 0.0008842403859220003, "loss": 3.6446, "step": 13635 }, { "epoch": 0.9267563527653214, "grad_norm": 2.184784412384033, "learning_rate": 0.0008841979209131675, "loss": 3.5125, "step": 13640 }, { "epoch": 0.9270960728359832, "grad_norm": 1.7511719465255737, "learning_rate": 0.0008841554559043349, "loss": 3.3982, "step": 13645 }, { "epoch": 0.9274357929066449, "grad_norm": 1.845175862312317, "learning_rate": 0.0008841129908955021, "loss": 3.7202, "step": 13650 }, { "epoch": 0.9277755129773066, "grad_norm": 1.9143561124801636, "learning_rate": 0.0008840705258866693, "loss": 3.6433, "step": 13655 }, { "epoch": 0.9281152330479685, "grad_norm": 1.6291388273239136, "learning_rate": 0.0008840280608778368, "loss": 3.6215, "step": 13660 }, { "epoch": 0.9284549531186302, "grad_norm": 1.8398523330688477, "learning_rate": 0.000883985595869004, "loss": 3.5472, "step": 13665 }, { "epoch": 0.928794673189292, "grad_norm": 1.8306328058242798, "learning_rate": 0.0008839431308601712, "loss": 3.6994, "step": 13670 }, { "epoch": 0.9291343932599538, "grad_norm": 2.328791618347168, "learning_rate": 0.0008839006658513385, "loss": 3.7294, "step": 13675 }, { "epoch": 0.9294741133306156, "grad_norm": 2.344027042388916, "learning_rate": 0.0008838582008425058, "loss": 3.6128, "step": 13680 }, { "epoch": 0.9298138334012773, "grad_norm": 2.123417615890503, "learning_rate": 0.000883815735833673, "loss": 3.4072, "step": 13685 }, { "epoch": 0.9301535534719392, "grad_norm": 2.174516201019287, "learning_rate": 0.0008837732708248403, "loss": 3.3897, "step": 13690 }, { "epoch": 0.9304932735426009, "grad_norm": 2.428058624267578, "learning_rate": 0.0008837308058160077, "loss": 3.5715, "step": 13695 }, { "epoch": 0.9308329936132627, "grad_norm": 1.7366739511489868, "learning_rate": 0.0008836883408071749, "loss": 3.5524, "step": 13700 }, { "epoch": 0.9311727136839244, "grad_norm": 2.26873517036438, "learning_rate": 0.0008836458757983422, "loss": 3.6291, "step": 13705 }, { "epoch": 0.9315124337545863, "grad_norm": 1.6103434562683105, "learning_rate": 0.0008836034107895095, "loss": 3.5777, "step": 13710 }, { "epoch": 0.931852153825248, "grad_norm": 1.6185975074768066, "learning_rate": 0.0008835609457806767, "loss": 3.652, "step": 13715 }, { "epoch": 0.9321918738959097, "grad_norm": 2.052217960357666, "learning_rate": 0.000883518480771844, "loss": 3.5496, "step": 13720 }, { "epoch": 0.9325315939665716, "grad_norm": 1.969704270362854, "learning_rate": 0.0008834760157630112, "loss": 3.3106, "step": 13725 }, { "epoch": 0.9328713140372333, "grad_norm": 1.7529767751693726, "learning_rate": 0.0008834335507541786, "loss": 3.5006, "step": 13730 }, { "epoch": 0.9332110341078951, "grad_norm": 1.9498862028121948, "learning_rate": 0.0008833910857453459, "loss": 3.8125, "step": 13735 }, { "epoch": 0.9335507541785568, "grad_norm": 1.732547640800476, "learning_rate": 0.0008833486207365131, "loss": 3.5627, "step": 13740 }, { "epoch": 0.9338904742492187, "grad_norm": 1.8373773097991943, "learning_rate": 0.0008833061557276804, "loss": 3.7949, "step": 13745 }, { "epoch": 0.9342301943198804, "grad_norm": 1.96869695186615, "learning_rate": 0.0008832636907188477, "loss": 3.2573, "step": 13750 }, { "epoch": 0.9345699143905422, "grad_norm": 1.5088729858398438, "learning_rate": 0.0008832212257100149, "loss": 3.7919, "step": 13755 }, { "epoch": 0.934909634461204, "grad_norm": 2.19632625579834, "learning_rate": 0.0008831787607011821, "loss": 3.6207, "step": 13760 }, { "epoch": 0.9352493545318658, "grad_norm": 2.0536532402038574, "learning_rate": 0.0008831362956923496, "loss": 3.5794, "step": 13765 }, { "epoch": 0.9355890746025275, "grad_norm": 1.406928539276123, "learning_rate": 0.0008830938306835168, "loss": 3.5483, "step": 13770 }, { "epoch": 0.9359287946731893, "grad_norm": 2.1777796745300293, "learning_rate": 0.000883051365674684, "loss": 3.4354, "step": 13775 }, { "epoch": 0.9362685147438511, "grad_norm": 2.059699535369873, "learning_rate": 0.0008830089006658514, "loss": 3.503, "step": 13780 }, { "epoch": 0.9366082348145128, "grad_norm": 1.587005853652954, "learning_rate": 0.0008829664356570186, "loss": 3.6274, "step": 13785 }, { "epoch": 0.9369479548851746, "grad_norm": 1.6998502016067505, "learning_rate": 0.0008829239706481858, "loss": 3.5019, "step": 13790 }, { "epoch": 0.9372876749558364, "grad_norm": 2.3081276416778564, "learning_rate": 0.0008828815056393532, "loss": 3.6461, "step": 13795 }, { "epoch": 0.9376273950264982, "grad_norm": 1.5501303672790527, "learning_rate": 0.0008828390406305205, "loss": 3.307, "step": 13800 }, { "epoch": 0.9379671150971599, "grad_norm": 1.536294937133789, "learning_rate": 0.0008827965756216878, "loss": 3.6198, "step": 13805 }, { "epoch": 0.9383068351678218, "grad_norm": 1.8251539468765259, "learning_rate": 0.0008827541106128551, "loss": 3.3366, "step": 13810 }, { "epoch": 0.9386465552384835, "grad_norm": 1.9391412734985352, "learning_rate": 0.0008827116456040223, "loss": 3.5272, "step": 13815 }, { "epoch": 0.9389862753091452, "grad_norm": 1.5734752416610718, "learning_rate": 0.0008826691805951896, "loss": 3.7289, "step": 13820 }, { "epoch": 0.939325995379807, "grad_norm": 2.5833253860473633, "learning_rate": 0.0008826267155863568, "loss": 3.3166, "step": 13825 }, { "epoch": 0.9396657154504688, "grad_norm": 2.125808000564575, "learning_rate": 0.0008825842505775241, "loss": 3.673, "step": 13830 }, { "epoch": 0.9400054355211306, "grad_norm": 1.976138710975647, "learning_rate": 0.0008825417855686915, "loss": 3.4737, "step": 13835 }, { "epoch": 0.9403451555917923, "grad_norm": 1.565671682357788, "learning_rate": 0.0008824993205598587, "loss": 3.5004, "step": 13840 }, { "epoch": 0.9406848756624542, "grad_norm": 2.149965763092041, "learning_rate": 0.000882456855551026, "loss": 3.4415, "step": 13845 }, { "epoch": 0.9410245957331159, "grad_norm": 1.6040290594100952, "learning_rate": 0.0008824143905421933, "loss": 3.631, "step": 13850 }, { "epoch": 0.9413643158037777, "grad_norm": 1.4916927814483643, "learning_rate": 0.0008823719255333605, "loss": 3.7118, "step": 13855 }, { "epoch": 0.9417040358744395, "grad_norm": 1.722422480583191, "learning_rate": 0.0008823294605245277, "loss": 3.5505, "step": 13860 }, { "epoch": 0.9420437559451013, "grad_norm": 2.3802309036254883, "learning_rate": 0.0008822869955156951, "loss": 3.3724, "step": 13865 }, { "epoch": 0.942383476015763, "grad_norm": 2.1005890369415283, "learning_rate": 0.0008822445305068624, "loss": 3.5703, "step": 13870 }, { "epoch": 0.9427231960864247, "grad_norm": 2.2870492935180664, "learning_rate": 0.0008822020654980296, "loss": 3.5871, "step": 13875 }, { "epoch": 0.9430629161570866, "grad_norm": 2.0826916694641113, "learning_rate": 0.000882159600489197, "loss": 3.4689, "step": 13880 }, { "epoch": 0.9434026362277483, "grad_norm": 1.8623478412628174, "learning_rate": 0.0008821171354803642, "loss": 3.4069, "step": 13885 }, { "epoch": 0.9437423562984101, "grad_norm": 1.9572420120239258, "learning_rate": 0.0008820746704715314, "loss": 3.7522, "step": 13890 }, { "epoch": 0.9440820763690719, "grad_norm": 1.7246694564819336, "learning_rate": 0.0008820322054626988, "loss": 3.7778, "step": 13895 }, { "epoch": 0.9444217964397337, "grad_norm": 5.104165554046631, "learning_rate": 0.000881989740453866, "loss": 3.5182, "step": 13900 }, { "epoch": 0.9447615165103954, "grad_norm": 1.7758396863937378, "learning_rate": 0.0008819472754450333, "loss": 3.6501, "step": 13905 }, { "epoch": 0.9451012365810572, "grad_norm": 1.7826497554779053, "learning_rate": 0.0008819048104362007, "loss": 3.8115, "step": 13910 }, { "epoch": 0.945440956651719, "grad_norm": 2.0776877403259277, "learning_rate": 0.0008818623454273679, "loss": 3.564, "step": 13915 }, { "epoch": 0.9457806767223808, "grad_norm": 2.07145094871521, "learning_rate": 0.0008818198804185351, "loss": 3.4703, "step": 13920 }, { "epoch": 0.9461203967930425, "grad_norm": 1.6295313835144043, "learning_rate": 0.0008817774154097024, "loss": 3.7205, "step": 13925 }, { "epoch": 0.9464601168637043, "grad_norm": 2.063546657562256, "learning_rate": 0.0008817349504008697, "loss": 3.5596, "step": 13930 }, { "epoch": 0.9467998369343661, "grad_norm": 1.9327260255813599, "learning_rate": 0.0008816924853920369, "loss": 3.4966, "step": 13935 }, { "epoch": 0.9471395570050278, "grad_norm": 2.2258460521698, "learning_rate": 0.0008816500203832043, "loss": 3.8279, "step": 13940 }, { "epoch": 0.9474792770756897, "grad_norm": 1.932578444480896, "learning_rate": 0.0008816075553743716, "loss": 3.6184, "step": 13945 }, { "epoch": 0.9478189971463514, "grad_norm": 1.9622392654418945, "learning_rate": 0.0008815650903655388, "loss": 3.6237, "step": 13950 }, { "epoch": 0.9481587172170132, "grad_norm": 1.704645037651062, "learning_rate": 0.0008815226253567061, "loss": 3.467, "step": 13955 }, { "epoch": 0.9484984372876749, "grad_norm": 1.7828354835510254, "learning_rate": 0.0008814801603478733, "loss": 3.5393, "step": 13960 }, { "epoch": 0.9488381573583368, "grad_norm": 1.9960014820098877, "learning_rate": 0.0008814376953390406, "loss": 3.4325, "step": 13965 }, { "epoch": 0.9491778774289985, "grad_norm": 2.87235689163208, "learning_rate": 0.000881395230330208, "loss": 3.8318, "step": 13970 }, { "epoch": 0.9495175974996602, "grad_norm": 1.7371578216552734, "learning_rate": 0.0008813527653213752, "loss": 3.6191, "step": 13975 }, { "epoch": 0.9498573175703221, "grad_norm": 1.9684944152832031, "learning_rate": 0.0008813103003125425, "loss": 3.8082, "step": 13980 }, { "epoch": 0.9501970376409838, "grad_norm": 1.96102774143219, "learning_rate": 0.0008812678353037098, "loss": 3.4911, "step": 13985 }, { "epoch": 0.9505367577116456, "grad_norm": 1.7216155529022217, "learning_rate": 0.000881225370294877, "loss": 3.6049, "step": 13990 }, { "epoch": 0.9508764777823073, "grad_norm": 1.9940170049667358, "learning_rate": 0.0008811829052860443, "loss": 3.6895, "step": 13995 }, { "epoch": 0.9512161978529692, "grad_norm": 1.7582635879516602, "learning_rate": 0.0008811404402772116, "loss": 3.6223, "step": 14000 }, { "epoch": 0.9515559179236309, "grad_norm": 1.404127836227417, "learning_rate": 0.0008810979752683789, "loss": 3.212, "step": 14005 }, { "epoch": 0.9518956379942927, "grad_norm": 1.773445963859558, "learning_rate": 0.0008810555102595462, "loss": 3.4072, "step": 14010 }, { "epoch": 0.9522353580649545, "grad_norm": 1.865232229232788, "learning_rate": 0.0008810130452507135, "loss": 3.6039, "step": 14015 }, { "epoch": 0.9525750781356163, "grad_norm": 1.7591264247894287, "learning_rate": 0.0008809705802418807, "loss": 3.6121, "step": 14020 }, { "epoch": 0.952914798206278, "grad_norm": 1.9811052083969116, "learning_rate": 0.0008809281152330479, "loss": 3.6702, "step": 14025 }, { "epoch": 0.9532545182769399, "grad_norm": 1.6481715440750122, "learning_rate": 0.0008808856502242153, "loss": 3.6068, "step": 14030 }, { "epoch": 0.9535942383476016, "grad_norm": 1.7021180391311646, "learning_rate": 0.0008808431852153825, "loss": 3.6142, "step": 14035 }, { "epoch": 0.9539339584182633, "grad_norm": 1.6620618104934692, "learning_rate": 0.0008808007202065498, "loss": 3.5924, "step": 14040 }, { "epoch": 0.9542736784889251, "grad_norm": 2.475987434387207, "learning_rate": 0.0008807582551977172, "loss": 3.6815, "step": 14045 }, { "epoch": 0.9546133985595869, "grad_norm": 1.8125295639038086, "learning_rate": 0.0008807157901888844, "loss": 3.8295, "step": 14050 }, { "epoch": 0.9549531186302487, "grad_norm": 1.8650206327438354, "learning_rate": 0.0008806733251800516, "loss": 3.6207, "step": 14055 }, { "epoch": 0.9552928387009104, "grad_norm": 2.0592496395111084, "learning_rate": 0.000880630860171219, "loss": 3.458, "step": 14060 }, { "epoch": 0.9556325587715723, "grad_norm": 1.8699405193328857, "learning_rate": 0.0008805883951623862, "loss": 3.6142, "step": 14065 }, { "epoch": 0.955972278842234, "grad_norm": 1.9525097608566284, "learning_rate": 0.0008805459301535534, "loss": 3.5688, "step": 14070 }, { "epoch": 0.9563119989128958, "grad_norm": 1.8613605499267578, "learning_rate": 0.0008805034651447208, "loss": 3.6298, "step": 14075 }, { "epoch": 0.9566517189835575, "grad_norm": 2.335171937942505, "learning_rate": 0.0008804610001358881, "loss": 3.6771, "step": 14080 }, { "epoch": 0.9569914390542194, "grad_norm": 1.9074374437332153, "learning_rate": 0.0008804185351270553, "loss": 3.5368, "step": 14085 }, { "epoch": 0.9573311591248811, "grad_norm": 2.0395760536193848, "learning_rate": 0.0008803760701182226, "loss": 3.6064, "step": 14090 }, { "epoch": 0.9576708791955428, "grad_norm": 1.5705000162124634, "learning_rate": 0.0008803336051093899, "loss": 3.6925, "step": 14095 }, { "epoch": 0.9580105992662047, "grad_norm": 2.340400457382202, "learning_rate": 0.0008802911401005571, "loss": 3.4912, "step": 14100 }, { "epoch": 0.9583503193368664, "grad_norm": 2.237704038619995, "learning_rate": 0.0008802486750917244, "loss": 3.4303, "step": 14105 }, { "epoch": 0.9586900394075282, "grad_norm": 1.8781665563583374, "learning_rate": 0.0008802062100828918, "loss": 3.4723, "step": 14110 }, { "epoch": 0.95902975947819, "grad_norm": 1.899883508682251, "learning_rate": 0.000880163745074059, "loss": 3.4832, "step": 14115 }, { "epoch": 0.9593694795488518, "grad_norm": 1.8506848812103271, "learning_rate": 0.0008801212800652263, "loss": 3.8643, "step": 14120 }, { "epoch": 0.9597091996195135, "grad_norm": 2.0693132877349854, "learning_rate": 0.0008800788150563935, "loss": 3.8157, "step": 14125 }, { "epoch": 0.9600489196901753, "grad_norm": 1.9430558681488037, "learning_rate": 0.0008800363500475608, "loss": 3.648, "step": 14130 }, { "epoch": 0.9603886397608371, "grad_norm": 2.3631482124328613, "learning_rate": 0.0008799938850387281, "loss": 3.5981, "step": 14135 }, { "epoch": 0.9607283598314988, "grad_norm": 2.065199851989746, "learning_rate": 0.0008799514200298953, "loss": 3.6896, "step": 14140 }, { "epoch": 0.9610680799021606, "grad_norm": 1.5046477317810059, "learning_rate": 0.0008799089550210628, "loss": 3.5565, "step": 14145 }, { "epoch": 0.9614077999728224, "grad_norm": 1.5556479692459106, "learning_rate": 0.00087986649001223, "loss": 3.7377, "step": 14150 }, { "epoch": 0.9617475200434842, "grad_norm": 2.1971678733825684, "learning_rate": 0.0008798240250033972, "loss": 3.6789, "step": 14155 }, { "epoch": 0.9620872401141459, "grad_norm": 1.8526889085769653, "learning_rate": 0.0008797815599945646, "loss": 3.6262, "step": 14160 }, { "epoch": 0.9624269601848077, "grad_norm": 1.7917839288711548, "learning_rate": 0.0008797390949857318, "loss": 3.711, "step": 14165 }, { "epoch": 0.9627666802554695, "grad_norm": 1.9105530977249146, "learning_rate": 0.000879696629976899, "loss": 3.6189, "step": 14170 }, { "epoch": 0.9631064003261313, "grad_norm": 1.9511061906814575, "learning_rate": 0.0008796541649680663, "loss": 3.5721, "step": 14175 }, { "epoch": 0.963446120396793, "grad_norm": 1.867663025856018, "learning_rate": 0.0008796116999592337, "loss": 3.5719, "step": 14180 }, { "epoch": 0.9637858404674549, "grad_norm": 1.9031658172607422, "learning_rate": 0.0008795692349504009, "loss": 3.7425, "step": 14185 }, { "epoch": 0.9641255605381166, "grad_norm": 1.8478600978851318, "learning_rate": 0.0008795267699415682, "loss": 3.5205, "step": 14190 }, { "epoch": 0.9644652806087783, "grad_norm": 1.839176058769226, "learning_rate": 0.0008794843049327355, "loss": 3.7517, "step": 14195 }, { "epoch": 0.9648050006794402, "grad_norm": 1.473109483718872, "learning_rate": 0.0008794418399239027, "loss": 3.4993, "step": 14200 }, { "epoch": 0.9651447207501019, "grad_norm": 2.1076714992523193, "learning_rate": 0.00087939937491507, "loss": 3.4577, "step": 14205 }, { "epoch": 0.9654844408207637, "grad_norm": 1.5848150253295898, "learning_rate": 0.0008793569099062372, "loss": 3.4521, "step": 14210 }, { "epoch": 0.9658241608914254, "grad_norm": 2.047776460647583, "learning_rate": 0.0008793144448974046, "loss": 3.6359, "step": 14215 }, { "epoch": 0.9661638809620873, "grad_norm": 2.2436141967773438, "learning_rate": 0.0008792719798885719, "loss": 3.681, "step": 14220 }, { "epoch": 0.966503601032749, "grad_norm": 2.2643284797668457, "learning_rate": 0.0008792295148797391, "loss": 3.6044, "step": 14225 }, { "epoch": 0.9668433211034108, "grad_norm": 1.9713140726089478, "learning_rate": 0.0008791870498709064, "loss": 3.6152, "step": 14230 }, { "epoch": 0.9671830411740726, "grad_norm": 1.6751842498779297, "learning_rate": 0.0008791445848620737, "loss": 3.6705, "step": 14235 }, { "epoch": 0.9675227612447344, "grad_norm": 2.002821922302246, "learning_rate": 0.0008791021198532409, "loss": 3.4461, "step": 14240 }, { "epoch": 0.9678624813153961, "grad_norm": 1.8011014461517334, "learning_rate": 0.0008790596548444082, "loss": 3.3751, "step": 14245 }, { "epoch": 0.9682022013860578, "grad_norm": 1.6978390216827393, "learning_rate": 0.0008790171898355756, "loss": 3.715, "step": 14250 }, { "epoch": 0.9685419214567197, "grad_norm": 1.70782470703125, "learning_rate": 0.0008789747248267428, "loss": 3.6324, "step": 14255 }, { "epoch": 0.9688816415273814, "grad_norm": 1.5809664726257324, "learning_rate": 0.00087893225981791, "loss": 3.5712, "step": 14260 }, { "epoch": 0.9692213615980432, "grad_norm": 1.980398178100586, "learning_rate": 0.0008788897948090774, "loss": 3.4762, "step": 14265 }, { "epoch": 0.969561081668705, "grad_norm": 1.9460806846618652, "learning_rate": 0.0008788473298002446, "loss": 3.583, "step": 14270 }, { "epoch": 0.9699008017393668, "grad_norm": 2.302633762359619, "learning_rate": 0.0008788048647914118, "loss": 3.605, "step": 14275 }, { "epoch": 0.9702405218100285, "grad_norm": 2.0441534519195557, "learning_rate": 0.0008787623997825792, "loss": 3.5356, "step": 14280 }, { "epoch": 0.9705802418806904, "grad_norm": 1.5296775102615356, "learning_rate": 0.0008787199347737465, "loss": 3.6372, "step": 14285 }, { "epoch": 0.9709199619513521, "grad_norm": 1.5885497331619263, "learning_rate": 0.0008786774697649137, "loss": 3.5882, "step": 14290 }, { "epoch": 0.9712596820220138, "grad_norm": 1.796403408050537, "learning_rate": 0.0008786350047560811, "loss": 3.5974, "step": 14295 }, { "epoch": 0.9715994020926756, "grad_norm": 1.8156596422195435, "learning_rate": 0.0008785925397472483, "loss": 3.553, "step": 14300 }, { "epoch": 0.9719391221633374, "grad_norm": 1.9707403182983398, "learning_rate": 0.0008785500747384155, "loss": 3.69, "step": 14305 }, { "epoch": 0.9722788422339992, "grad_norm": 1.9348807334899902, "learning_rate": 0.0008785076097295828, "loss": 3.5358, "step": 14310 }, { "epoch": 0.9726185623046609, "grad_norm": 1.855501413345337, "learning_rate": 0.0008784651447207501, "loss": 3.3764, "step": 14315 }, { "epoch": 0.9729582823753228, "grad_norm": 1.9006061553955078, "learning_rate": 0.0008784226797119174, "loss": 3.5294, "step": 14320 }, { "epoch": 0.9732980024459845, "grad_norm": 2.163264513015747, "learning_rate": 0.0008783802147030847, "loss": 3.1527, "step": 14325 }, { "epoch": 0.9736377225166463, "grad_norm": 1.7555955648422241, "learning_rate": 0.000878337749694252, "loss": 3.6279, "step": 14330 }, { "epoch": 0.973977442587308, "grad_norm": 1.5152305364608765, "learning_rate": 0.0008782952846854192, "loss": 3.651, "step": 14335 }, { "epoch": 0.9743171626579699, "grad_norm": 1.251171350479126, "learning_rate": 0.0008782528196765865, "loss": 3.5124, "step": 14340 }, { "epoch": 0.9746568827286316, "grad_norm": 1.6504833698272705, "learning_rate": 0.0008782103546677538, "loss": 3.6473, "step": 14345 }, { "epoch": 0.9749966027992933, "grad_norm": 2.117558002471924, "learning_rate": 0.000878167889658921, "loss": 3.6119, "step": 14350 }, { "epoch": 0.9753363228699552, "grad_norm": 1.7923351526260376, "learning_rate": 0.0008781254246500884, "loss": 3.203, "step": 14355 }, { "epoch": 0.9756760429406169, "grad_norm": 1.788429617881775, "learning_rate": 0.0008780829596412556, "loss": 3.8213, "step": 14360 }, { "epoch": 0.9760157630112787, "grad_norm": 1.4971190690994263, "learning_rate": 0.0008780404946324229, "loss": 3.5542, "step": 14365 }, { "epoch": 0.9763554830819405, "grad_norm": 1.860251545906067, "learning_rate": 0.0008779980296235902, "loss": 3.7987, "step": 14370 }, { "epoch": 0.9766952031526023, "grad_norm": 1.925361156463623, "learning_rate": 0.0008779555646147574, "loss": 3.4045, "step": 14375 }, { "epoch": 0.977034923223264, "grad_norm": 1.7533081769943237, "learning_rate": 0.0008779130996059247, "loss": 3.6771, "step": 14380 }, { "epoch": 0.9773746432939258, "grad_norm": 1.3946232795715332, "learning_rate": 0.000877870634597092, "loss": 3.2859, "step": 14385 }, { "epoch": 0.9777143633645876, "grad_norm": 2.13960599899292, "learning_rate": 0.0008778281695882593, "loss": 3.4295, "step": 14390 }, { "epoch": 0.9780540834352494, "grad_norm": 1.9527534246444702, "learning_rate": 0.0008777857045794266, "loss": 3.7447, "step": 14395 }, { "epoch": 0.9783938035059111, "grad_norm": 1.982917070388794, "learning_rate": 0.0008777432395705939, "loss": 3.5619, "step": 14400 }, { "epoch": 0.978733523576573, "grad_norm": 1.788532018661499, "learning_rate": 0.0008777007745617611, "loss": 3.4185, "step": 14405 }, { "epoch": 0.9790732436472347, "grad_norm": 1.7973467111587524, "learning_rate": 0.0008776583095529283, "loss": 3.432, "step": 14410 }, { "epoch": 0.9794129637178964, "grad_norm": 1.7589871883392334, "learning_rate": 0.0008776158445440957, "loss": 3.7203, "step": 14415 }, { "epoch": 0.9797526837885582, "grad_norm": 2.0848870277404785, "learning_rate": 0.0008775733795352629, "loss": 3.322, "step": 14420 }, { "epoch": 0.98009240385922, "grad_norm": 1.959299087524414, "learning_rate": 0.0008775309145264302, "loss": 3.3103, "step": 14425 }, { "epoch": 0.9804321239298818, "grad_norm": 2.2177419662475586, "learning_rate": 0.0008774884495175976, "loss": 3.6639, "step": 14430 }, { "epoch": 0.9807718440005435, "grad_norm": 3.403296947479248, "learning_rate": 0.0008774459845087648, "loss": 3.2117, "step": 14435 }, { "epoch": 0.9811115640712054, "grad_norm": 2.13602614402771, "learning_rate": 0.000877403519499932, "loss": 3.5927, "step": 14440 }, { "epoch": 0.9814512841418671, "grad_norm": 1.9059659242630005, "learning_rate": 0.0008773610544910994, "loss": 3.6913, "step": 14445 }, { "epoch": 0.9817910042125289, "grad_norm": 2.1341190338134766, "learning_rate": 0.0008773185894822666, "loss": 3.2994, "step": 14450 }, { "epoch": 0.9821307242831907, "grad_norm": 1.4101698398590088, "learning_rate": 0.0008772761244734338, "loss": 3.6485, "step": 14455 }, { "epoch": 0.9824704443538524, "grad_norm": 1.9857091903686523, "learning_rate": 0.0008772336594646012, "loss": 3.4346, "step": 14460 }, { "epoch": 0.9828101644245142, "grad_norm": 1.7400894165039062, "learning_rate": 0.0008771911944557685, "loss": 3.655, "step": 14465 }, { "epoch": 0.9831498844951759, "grad_norm": 2.1362364292144775, "learning_rate": 0.0008771487294469357, "loss": 3.6155, "step": 14470 }, { "epoch": 0.9834896045658378, "grad_norm": 2.185096025466919, "learning_rate": 0.000877106264438103, "loss": 3.3181, "step": 14475 }, { "epoch": 0.9838293246364995, "grad_norm": 2.2210841178894043, "learning_rate": 0.0008770637994292703, "loss": 3.4977, "step": 14480 }, { "epoch": 0.9841690447071613, "grad_norm": 1.8673489093780518, "learning_rate": 0.0008770213344204376, "loss": 3.6763, "step": 14485 }, { "epoch": 0.9845087647778231, "grad_norm": 1.7493350505828857, "learning_rate": 0.0008769788694116048, "loss": 3.5409, "step": 14490 }, { "epoch": 0.9848484848484849, "grad_norm": 2.517333745956421, "learning_rate": 0.0008769364044027722, "loss": 3.7841, "step": 14495 }, { "epoch": 0.9851882049191466, "grad_norm": 2.520669937133789, "learning_rate": 0.0008768939393939395, "loss": 3.6483, "step": 14500 }, { "epoch": 0.9855279249898083, "grad_norm": 2.054696798324585, "learning_rate": 0.0008768514743851067, "loss": 3.5377, "step": 14505 }, { "epoch": 0.9858676450604702, "grad_norm": 2.5392329692840576, "learning_rate": 0.0008768090093762739, "loss": 3.4484, "step": 14510 }, { "epoch": 0.9862073651311319, "grad_norm": 1.9237840175628662, "learning_rate": 0.0008767665443674413, "loss": 3.7203, "step": 14515 }, { "epoch": 0.9865470852017937, "grad_norm": 1.8284837007522583, "learning_rate": 0.0008767240793586085, "loss": 3.4225, "step": 14520 }, { "epoch": 0.9868868052724555, "grad_norm": 1.5182780027389526, "learning_rate": 0.0008766816143497757, "loss": 3.381, "step": 14525 }, { "epoch": 0.9872265253431173, "grad_norm": 1.558766484260559, "learning_rate": 0.0008766391493409432, "loss": 3.5856, "step": 14530 }, { "epoch": 0.987566245413779, "grad_norm": 2.392632246017456, "learning_rate": 0.0008765966843321104, "loss": 3.5557, "step": 14535 }, { "epoch": 0.9879059654844409, "grad_norm": 2.261991024017334, "learning_rate": 0.0008765542193232776, "loss": 3.677, "step": 14540 }, { "epoch": 0.9882456855551026, "grad_norm": 1.451434850692749, "learning_rate": 0.000876511754314445, "loss": 3.3984, "step": 14545 }, { "epoch": 0.9885854056257644, "grad_norm": 1.9388504028320312, "learning_rate": 0.0008764692893056122, "loss": 3.4906, "step": 14550 }, { "epoch": 0.9889251256964261, "grad_norm": 2.4324870109558105, "learning_rate": 0.0008764268242967794, "loss": 3.4613, "step": 14555 }, { "epoch": 0.989264845767088, "grad_norm": 1.7847661972045898, "learning_rate": 0.0008763843592879469, "loss": 3.5332, "step": 14560 }, { "epoch": 0.9896045658377497, "grad_norm": 1.6587510108947754, "learning_rate": 0.0008763418942791141, "loss": 3.6268, "step": 14565 }, { "epoch": 0.9899442859084114, "grad_norm": 1.2911208868026733, "learning_rate": 0.0008762994292702813, "loss": 3.4141, "step": 14570 }, { "epoch": 0.9902840059790733, "grad_norm": 2.2747836112976074, "learning_rate": 0.0008762569642614486, "loss": 3.7644, "step": 14575 }, { "epoch": 0.990623726049735, "grad_norm": 1.925992727279663, "learning_rate": 0.0008762144992526159, "loss": 3.4986, "step": 14580 }, { "epoch": 0.9909634461203968, "grad_norm": 1.760200023651123, "learning_rate": 0.0008761720342437831, "loss": 3.501, "step": 14585 }, { "epoch": 0.9913031661910585, "grad_norm": 1.5275013446807861, "learning_rate": 0.0008761295692349504, "loss": 3.7252, "step": 14590 }, { "epoch": 0.9916428862617204, "grad_norm": 2.009692668914795, "learning_rate": 0.0008760871042261178, "loss": 3.5099, "step": 14595 }, { "epoch": 0.9919826063323821, "grad_norm": 1.6547898054122925, "learning_rate": 0.000876044639217285, "loss": 3.6621, "step": 14600 }, { "epoch": 0.9923223264030439, "grad_norm": 2.1938939094543457, "learning_rate": 0.0008760021742084523, "loss": 3.7991, "step": 14605 }, { "epoch": 0.9926620464737057, "grad_norm": 2.1626105308532715, "learning_rate": 0.0008759597091996195, "loss": 3.4963, "step": 14610 }, { "epoch": 0.9930017665443674, "grad_norm": 1.8644903898239136, "learning_rate": 0.0008759172441907868, "loss": 3.6514, "step": 14615 }, { "epoch": 0.9933414866150292, "grad_norm": 1.8404417037963867, "learning_rate": 0.0008758747791819541, "loss": 3.5271, "step": 14620 }, { "epoch": 0.993681206685691, "grad_norm": 1.8159419298171997, "learning_rate": 0.0008758323141731213, "loss": 3.5352, "step": 14625 }, { "epoch": 0.9940209267563528, "grad_norm": 1.7221579551696777, "learning_rate": 0.0008757898491642887, "loss": 3.7973, "step": 14630 }, { "epoch": 0.9943606468270145, "grad_norm": 2.3039567470550537, "learning_rate": 0.000875747384155456, "loss": 3.4401, "step": 14635 }, { "epoch": 0.9947003668976763, "grad_norm": 1.692643404006958, "learning_rate": 0.0008757049191466232, "loss": 3.6111, "step": 14640 }, { "epoch": 0.9950400869683381, "grad_norm": 2.3008368015289307, "learning_rate": 0.0008756624541377904, "loss": 3.5202, "step": 14645 }, { "epoch": 0.9953798070389999, "grad_norm": 1.8632310628890991, "learning_rate": 0.0008756199891289578, "loss": 3.4749, "step": 14650 }, { "epoch": 0.9957195271096616, "grad_norm": 2.2665090560913086, "learning_rate": 0.000875577524120125, "loss": 3.4864, "step": 14655 }, { "epoch": 0.9960592471803235, "grad_norm": 1.9325493574142456, "learning_rate": 0.0008755350591112922, "loss": 3.4962, "step": 14660 }, { "epoch": 0.9963989672509852, "grad_norm": 1.9138481616973877, "learning_rate": 0.0008754925941024597, "loss": 3.7907, "step": 14665 }, { "epoch": 0.9967386873216469, "grad_norm": 1.8930778503417969, "learning_rate": 0.0008754501290936269, "loss": 3.7754, "step": 14670 }, { "epoch": 0.9970784073923087, "grad_norm": 1.7712587118148804, "learning_rate": 0.0008754076640847941, "loss": 3.59, "step": 14675 }, { "epoch": 0.9974181274629705, "grad_norm": 1.8054713010787964, "learning_rate": 0.0008753651990759615, "loss": 3.7315, "step": 14680 }, { "epoch": 0.9977578475336323, "grad_norm": 1.843814730644226, "learning_rate": 0.0008753227340671287, "loss": 3.5498, "step": 14685 }, { "epoch": 0.998097567604294, "grad_norm": 1.2777363061904907, "learning_rate": 0.0008752802690582959, "loss": 3.5758, "step": 14690 }, { "epoch": 0.9984372876749559, "grad_norm": 2.294368028640747, "learning_rate": 0.0008752378040494632, "loss": 3.8653, "step": 14695 }, { "epoch": 0.9987770077456176, "grad_norm": 1.9230793714523315, "learning_rate": 0.0008751953390406306, "loss": 3.812, "step": 14700 }, { "epoch": 0.9991167278162794, "grad_norm": 2.034294366836548, "learning_rate": 0.0008751528740317978, "loss": 3.4491, "step": 14705 }, { "epoch": 0.9994564478869412, "grad_norm": 2.4909703731536865, "learning_rate": 0.0008751104090229651, "loss": 3.6138, "step": 14710 }, { "epoch": 0.999796167957603, "grad_norm": 2.000767946243286, "learning_rate": 0.0008750679440141324, "loss": 3.8114, "step": 14715 }, { "epoch": 1.0, "eval_bertscore": { "f1": 0.7615058666941893, "precision": 0.7236520710847719, "recall": 0.810467636097724 }, "eval_bleu_4": 0.0005247450043906736, "eval_exact_match": 0.0, "eval_loss": 3.5066394805908203, "eval_meteor": 0.03480342702328122, "eval_rouge": { "rouge1": 0.049814699699171086, "rouge2": 0.005514256805958815, "rougeL": 0.042035898440111866, "rougeLsum": 0.0420290832012866 }, "eval_runtime": 2928.8716, "eval_samples_per_second": 3.523, "eval_steps_per_second": 0.44, "step": 14718 }, { "epoch": 1.0001358880282647, "grad_norm": 1.9899215698242188, "learning_rate": 0.0008750254790052996, "loss": 3.4806, "step": 14720 }, { "epoch": 1.0004756080989265, "grad_norm": 1.6863739490509033, "learning_rate": 0.0008749830139964669, "loss": 3.4675, "step": 14725 }, { "epoch": 1.0008153281695882, "grad_norm": 2.316746234893799, "learning_rate": 0.0008749405489876342, "loss": 3.4475, "step": 14730 }, { "epoch": 1.00115504824025, "grad_norm": 1.6870085000991821, "learning_rate": 0.0008748980839788015, "loss": 3.5047, "step": 14735 }, { "epoch": 1.0014947683109119, "grad_norm": 1.8858730792999268, "learning_rate": 0.0008748556189699688, "loss": 3.5066, "step": 14740 }, { "epoch": 1.0018344883815735, "grad_norm": 2.4895291328430176, "learning_rate": 0.000874813153961136, "loss": 3.6431, "step": 14745 }, { "epoch": 1.0021742084522354, "grad_norm": 1.9084696769714355, "learning_rate": 0.0008747706889523033, "loss": 3.3755, "step": 14750 }, { "epoch": 1.0025139285228972, "grad_norm": 1.9209641218185425, "learning_rate": 0.0008747282239434706, "loss": 3.3079, "step": 14755 }, { "epoch": 1.0028536485935589, "grad_norm": 2.740617275238037, "learning_rate": 0.0008746857589346378, "loss": 3.6658, "step": 14760 }, { "epoch": 1.0031933686642207, "grad_norm": 1.736154317855835, "learning_rate": 0.0008746432939258051, "loss": 3.539, "step": 14765 }, { "epoch": 1.0035330887348826, "grad_norm": 1.962472677230835, "learning_rate": 0.0008746008289169725, "loss": 3.4085, "step": 14770 }, { "epoch": 1.0038728088055442, "grad_norm": 2.1270241737365723, "learning_rate": 0.0008745583639081397, "loss": 3.5495, "step": 14775 }, { "epoch": 1.004212528876206, "grad_norm": 2.095865249633789, "learning_rate": 0.000874515898899307, "loss": 3.5865, "step": 14780 }, { "epoch": 1.0045522489468677, "grad_norm": 2.251265287399292, "learning_rate": 0.0008744734338904743, "loss": 3.6784, "step": 14785 }, { "epoch": 1.0048919690175295, "grad_norm": 2.2845466136932373, "learning_rate": 0.0008744309688816415, "loss": 3.6211, "step": 14790 }, { "epoch": 1.0052316890881914, "grad_norm": 2.412414073944092, "learning_rate": 0.0008743885038728087, "loss": 3.4759, "step": 14795 }, { "epoch": 1.005571409158853, "grad_norm": 1.9648034572601318, "learning_rate": 0.0008743460388639761, "loss": 3.5183, "step": 14800 }, { "epoch": 1.0059111292295149, "grad_norm": 2.183490037918091, "learning_rate": 0.0008743035738551434, "loss": 3.5268, "step": 14805 }, { "epoch": 1.0062508493001767, "grad_norm": 2.0420267581939697, "learning_rate": 0.0008742611088463106, "loss": 3.4797, "step": 14810 }, { "epoch": 1.0065905693708384, "grad_norm": 1.775578498840332, "learning_rate": 0.000874218643837478, "loss": 3.2966, "step": 14815 }, { "epoch": 1.0069302894415002, "grad_norm": 2.3162667751312256, "learning_rate": 0.0008741761788286452, "loss": 3.8465, "step": 14820 }, { "epoch": 1.007270009512162, "grad_norm": 2.3233158588409424, "learning_rate": 0.0008741337138198124, "loss": 3.6103, "step": 14825 }, { "epoch": 1.0076097295828237, "grad_norm": 1.9374260902404785, "learning_rate": 0.0008740912488109798, "loss": 3.584, "step": 14830 }, { "epoch": 1.0079494496534855, "grad_norm": 2.2106966972351074, "learning_rate": 0.000874048783802147, "loss": 3.5681, "step": 14835 }, { "epoch": 1.0082891697241474, "grad_norm": 1.8707890510559082, "learning_rate": 0.0008740063187933144, "loss": 3.5118, "step": 14840 }, { "epoch": 1.008628889794809, "grad_norm": 1.8829134702682495, "learning_rate": 0.0008739638537844817, "loss": 3.9154, "step": 14845 }, { "epoch": 1.0089686098654709, "grad_norm": 2.355165481567383, "learning_rate": 0.0008739213887756489, "loss": 3.7959, "step": 14850 }, { "epoch": 1.0093083299361327, "grad_norm": 1.5069011449813843, "learning_rate": 0.0008738789237668162, "loss": 3.863, "step": 14855 }, { "epoch": 1.0096480500067944, "grad_norm": 2.3188679218292236, "learning_rate": 0.0008738364587579834, "loss": 3.4602, "step": 14860 }, { "epoch": 1.0099877700774562, "grad_norm": 2.168625831604004, "learning_rate": 0.0008737939937491507, "loss": 3.4463, "step": 14865 }, { "epoch": 1.0103274901481178, "grad_norm": 2.0413906574249268, "learning_rate": 0.000873751528740318, "loss": 3.5543, "step": 14870 }, { "epoch": 1.0106672102187797, "grad_norm": 1.9804706573486328, "learning_rate": 0.0008737090637314853, "loss": 3.5216, "step": 14875 }, { "epoch": 1.0110069302894416, "grad_norm": 2.500136613845825, "learning_rate": 0.0008736665987226526, "loss": 3.4493, "step": 14880 }, { "epoch": 1.0113466503601032, "grad_norm": 1.6582796573638916, "learning_rate": 0.0008736241337138199, "loss": 3.7002, "step": 14885 }, { "epoch": 1.011686370430765, "grad_norm": 2.0002493858337402, "learning_rate": 0.0008735816687049871, "loss": 3.788, "step": 14890 }, { "epoch": 1.012026090501427, "grad_norm": 1.6450293064117432, "learning_rate": 0.0008735476966979209, "loss": 3.4994, "step": 14895 }, { "epoch": 1.0123658105720885, "grad_norm": 2.517714738845825, "learning_rate": 0.0008735052316890883, "loss": 3.4205, "step": 14900 }, { "epoch": 1.0127055306427504, "grad_norm": 1.730347752571106, "learning_rate": 0.0008734627666802555, "loss": 3.8528, "step": 14905 }, { "epoch": 1.0130452507134122, "grad_norm": 1.5135661363601685, "learning_rate": 0.0008734203016714227, "loss": 3.689, "step": 14910 }, { "epoch": 1.0133849707840739, "grad_norm": 2.0665290355682373, "learning_rate": 0.0008733778366625901, "loss": 3.4661, "step": 14915 }, { "epoch": 1.0137246908547357, "grad_norm": 2.0097975730895996, "learning_rate": 0.0008733353716537573, "loss": 3.4313, "step": 14920 }, { "epoch": 1.0140644109253976, "grad_norm": 1.8058648109436035, "learning_rate": 0.0008732929066449245, "loss": 3.5196, "step": 14925 }, { "epoch": 1.0144041309960592, "grad_norm": 2.4521448612213135, "learning_rate": 0.000873250441636092, "loss": 3.5097, "step": 14930 }, { "epoch": 1.014743851066721, "grad_norm": 2.1440508365631104, "learning_rate": 0.0008732079766272592, "loss": 3.3619, "step": 14935 }, { "epoch": 1.015083571137383, "grad_norm": 1.7532398700714111, "learning_rate": 0.0008731655116184264, "loss": 3.3159, "step": 14940 }, { "epoch": 1.0154232912080445, "grad_norm": 1.912855863571167, "learning_rate": 0.0008731230466095937, "loss": 3.795, "step": 14945 }, { "epoch": 1.0157630112787064, "grad_norm": 1.638986349105835, "learning_rate": 0.000873080581600761, "loss": 3.7168, "step": 14950 }, { "epoch": 1.016102731349368, "grad_norm": 1.6472984552383423, "learning_rate": 0.0008730381165919282, "loss": 3.5047, "step": 14955 }, { "epoch": 1.0164424514200299, "grad_norm": 1.9326499700546265, "learning_rate": 0.0008729956515830955, "loss": 3.5751, "step": 14960 }, { "epoch": 1.0167821714906917, "grad_norm": 2.163098096847534, "learning_rate": 0.0008729531865742629, "loss": 3.5524, "step": 14965 }, { "epoch": 1.0171218915613534, "grad_norm": 1.9033434391021729, "learning_rate": 0.0008729107215654301, "loss": 3.2626, "step": 14970 }, { "epoch": 1.0174616116320152, "grad_norm": 1.8390592336654663, "learning_rate": 0.0008728682565565974, "loss": 3.5163, "step": 14975 }, { "epoch": 1.017801331702677, "grad_norm": 1.9012869596481323, "learning_rate": 0.0008728257915477646, "loss": 3.4542, "step": 14980 }, { "epoch": 1.0181410517733387, "grad_norm": 2.0062949657440186, "learning_rate": 0.0008727833265389319, "loss": 3.693, "step": 14985 }, { "epoch": 1.0184807718440005, "grad_norm": 2.055487871170044, "learning_rate": 0.0008727408615300992, "loss": 3.4501, "step": 14990 }, { "epoch": 1.0188204919146624, "grad_norm": 2.205455780029297, "learning_rate": 0.0008726983965212664, "loss": 3.6102, "step": 14995 }, { "epoch": 1.019160211985324, "grad_norm": 1.724591851234436, "learning_rate": 0.0008726559315124338, "loss": 3.5707, "step": 15000 }, { "epoch": 1.0194999320559859, "grad_norm": 1.6317620277404785, "learning_rate": 0.0008726134665036011, "loss": 3.3954, "step": 15005 }, { "epoch": 1.0198396521266477, "grad_norm": 1.5667023658752441, "learning_rate": 0.0008725710014947683, "loss": 3.5376, "step": 15010 }, { "epoch": 1.0201793721973094, "grad_norm": 1.616355299949646, "learning_rate": 0.0008725285364859356, "loss": 3.5487, "step": 15015 }, { "epoch": 1.0205190922679712, "grad_norm": 2.2920281887054443, "learning_rate": 0.0008724860714771029, "loss": 3.2898, "step": 15020 }, { "epoch": 1.020858812338633, "grad_norm": 2.182886838912964, "learning_rate": 0.0008724436064682701, "loss": 3.5794, "step": 15025 }, { "epoch": 1.0211985324092947, "grad_norm": 2.229311466217041, "learning_rate": 0.0008724011414594373, "loss": 3.3407, "step": 15030 }, { "epoch": 1.0215382524799566, "grad_norm": 1.5655303001403809, "learning_rate": 0.0008723586764506048, "loss": 3.7403, "step": 15035 }, { "epoch": 1.0218779725506182, "grad_norm": 1.8254441022872925, "learning_rate": 0.000872316211441772, "loss": 3.5876, "step": 15040 }, { "epoch": 1.02221769262128, "grad_norm": 2.0362486839294434, "learning_rate": 0.0008722737464329393, "loss": 3.5052, "step": 15045 }, { "epoch": 1.022557412691942, "grad_norm": 2.363847494125366, "learning_rate": 0.0008722312814241066, "loss": 3.5641, "step": 15050 }, { "epoch": 1.0228971327626035, "grad_norm": 2.0194010734558105, "learning_rate": 0.0008721888164152738, "loss": 3.5333, "step": 15055 }, { "epoch": 1.0232368528332654, "grad_norm": 1.9422698020935059, "learning_rate": 0.0008721463514064411, "loss": 3.5619, "step": 15060 }, { "epoch": 1.0235765729039272, "grad_norm": 2.1006839275360107, "learning_rate": 0.0008721038863976084, "loss": 3.4497, "step": 15065 }, { "epoch": 1.0239162929745889, "grad_norm": 2.0777528285980225, "learning_rate": 0.0008720614213887757, "loss": 3.3933, "step": 15070 }, { "epoch": 1.0242560130452507, "grad_norm": 2.1644139289855957, "learning_rate": 0.000872018956379943, "loss": 3.5596, "step": 15075 }, { "epoch": 1.0245957331159126, "grad_norm": 1.79354727268219, "learning_rate": 0.0008719764913711103, "loss": 3.7047, "step": 15080 }, { "epoch": 1.0249354531865742, "grad_norm": 1.9509495496749878, "learning_rate": 0.0008719340263622775, "loss": 3.4853, "step": 15085 }, { "epoch": 1.025275173257236, "grad_norm": 1.9385100603103638, "learning_rate": 0.0008718915613534448, "loss": 3.5553, "step": 15090 }, { "epoch": 1.025614893327898, "grad_norm": 1.769592046737671, "learning_rate": 0.000871849096344612, "loss": 3.6703, "step": 15095 }, { "epoch": 1.0259546133985595, "grad_norm": 1.7328593730926514, "learning_rate": 0.0008718066313357793, "loss": 3.4976, "step": 15100 }, { "epoch": 1.0262943334692214, "grad_norm": 1.8566131591796875, "learning_rate": 0.0008717641663269467, "loss": 3.4991, "step": 15105 }, { "epoch": 1.0266340535398832, "grad_norm": 2.0885040760040283, "learning_rate": 0.0008717217013181139, "loss": 3.5015, "step": 15110 }, { "epoch": 1.0269737736105449, "grad_norm": 2.4964067935943604, "learning_rate": 0.0008716792363092812, "loss": 3.4781, "step": 15115 }, { "epoch": 1.0273134936812067, "grad_norm": 1.597957730293274, "learning_rate": 0.0008716367713004485, "loss": 3.6477, "step": 15120 }, { "epoch": 1.0276532137518684, "grad_norm": 2.130380392074585, "learning_rate": 0.0008715943062916157, "loss": 3.4575, "step": 15125 }, { "epoch": 1.0279929338225302, "grad_norm": 2.119652032852173, "learning_rate": 0.0008715518412827829, "loss": 3.5314, "step": 15130 }, { "epoch": 1.028332653893192, "grad_norm": 2.2687036991119385, "learning_rate": 0.0008715093762739503, "loss": 3.5624, "step": 15135 }, { "epoch": 1.0286723739638537, "grad_norm": 1.5953481197357178, "learning_rate": 0.0008714669112651176, "loss": 3.6819, "step": 15140 }, { "epoch": 1.0290120940345155, "grad_norm": 1.714962124824524, "learning_rate": 0.0008714244462562848, "loss": 3.4723, "step": 15145 }, { "epoch": 1.0293518141051774, "grad_norm": 1.957432746887207, "learning_rate": 0.0008713819812474522, "loss": 3.6627, "step": 15150 }, { "epoch": 1.029691534175839, "grad_norm": 1.7226779460906982, "learning_rate": 0.0008713395162386194, "loss": 3.5239, "step": 15155 }, { "epoch": 1.0300312542465009, "grad_norm": 1.7487636804580688, "learning_rate": 0.0008712970512297866, "loss": 3.5798, "step": 15160 }, { "epoch": 1.0303709743171627, "grad_norm": 1.794744849205017, "learning_rate": 0.000871254586220954, "loss": 3.4387, "step": 15165 }, { "epoch": 1.0307106943878244, "grad_norm": 1.448742151260376, "learning_rate": 0.0008712121212121212, "loss": 3.6515, "step": 15170 }, { "epoch": 1.0310504144584862, "grad_norm": 2.2061283588409424, "learning_rate": 0.0008711696562032885, "loss": 3.3411, "step": 15175 }, { "epoch": 1.031390134529148, "grad_norm": 2.4885082244873047, "learning_rate": 0.0008711271911944559, "loss": 3.716, "step": 15180 }, { "epoch": 1.0317298545998097, "grad_norm": 1.5105595588684082, "learning_rate": 0.0008710847261856231, "loss": 3.601, "step": 15185 }, { "epoch": 1.0320695746704716, "grad_norm": 2.060776472091675, "learning_rate": 0.0008710422611767903, "loss": 3.689, "step": 15190 }, { "epoch": 1.0324092947411334, "grad_norm": 1.7198128700256348, "learning_rate": 0.0008709997961679576, "loss": 3.5405, "step": 15195 }, { "epoch": 1.032749014811795, "grad_norm": 1.6731669902801514, "learning_rate": 0.0008709573311591249, "loss": 3.5461, "step": 15200 }, { "epoch": 1.033088734882457, "grad_norm": 1.9810267686843872, "learning_rate": 0.0008709148661502921, "loss": 3.5626, "step": 15205 }, { "epoch": 1.0334284549531185, "grad_norm": 2.5361995697021484, "learning_rate": 0.0008708724011414595, "loss": 3.5936, "step": 15210 }, { "epoch": 1.0337681750237804, "grad_norm": 1.5226508378982544, "learning_rate": 0.0008708299361326268, "loss": 3.6739, "step": 15215 }, { "epoch": 1.0341078950944422, "grad_norm": 1.7573224306106567, "learning_rate": 0.000870787471123794, "loss": 3.6527, "step": 15220 }, { "epoch": 1.0344476151651039, "grad_norm": 2.4008219242095947, "learning_rate": 0.0008707450061149613, "loss": 3.4104, "step": 15225 }, { "epoch": 1.0347873352357657, "grad_norm": 2.6567673683166504, "learning_rate": 0.0008707025411061285, "loss": 3.5326, "step": 15230 }, { "epoch": 1.0351270553064276, "grad_norm": 1.5472854375839233, "learning_rate": 0.0008706600760972958, "loss": 3.7697, "step": 15235 }, { "epoch": 1.0354667753770892, "grad_norm": 1.9022748470306396, "learning_rate": 0.0008706176110884631, "loss": 3.5344, "step": 15240 }, { "epoch": 1.035806495447751, "grad_norm": 2.166810989379883, "learning_rate": 0.0008705751460796304, "loss": 3.7076, "step": 15245 }, { "epoch": 1.036146215518413, "grad_norm": 1.6635407209396362, "learning_rate": 0.0008705326810707977, "loss": 3.6076, "step": 15250 }, { "epoch": 1.0364859355890745, "grad_norm": 1.7555835247039795, "learning_rate": 0.000870490216061965, "loss": 3.6557, "step": 15255 }, { "epoch": 1.0368256556597364, "grad_norm": 1.8358813524246216, "learning_rate": 0.0008704477510531322, "loss": 3.6118, "step": 15260 }, { "epoch": 1.0371653757303982, "grad_norm": 1.7055326700210571, "learning_rate": 0.0008704052860442995, "loss": 3.5223, "step": 15265 }, { "epoch": 1.0375050958010599, "grad_norm": 1.6890265941619873, "learning_rate": 0.0008703628210354668, "loss": 3.6157, "step": 15270 }, { "epoch": 1.0378448158717217, "grad_norm": 2.9776954650878906, "learning_rate": 0.000870320356026634, "loss": 3.3847, "step": 15275 }, { "epoch": 1.0381845359423836, "grad_norm": 1.8452388048171997, "learning_rate": 0.0008702778910178013, "loss": 3.6081, "step": 15280 }, { "epoch": 1.0385242560130452, "grad_norm": 1.9113622903823853, "learning_rate": 0.0008702354260089687, "loss": 3.6934, "step": 15285 }, { "epoch": 1.038863976083707, "grad_norm": 1.4852412939071655, "learning_rate": 0.0008701929610001359, "loss": 3.9342, "step": 15290 }, { "epoch": 1.0392036961543687, "grad_norm": 2.19572114944458, "learning_rate": 0.0008701504959913031, "loss": 3.5169, "step": 15295 }, { "epoch": 1.0395434162250305, "grad_norm": 1.7379204034805298, "learning_rate": 0.0008701080309824705, "loss": 3.5742, "step": 15300 }, { "epoch": 1.0398831362956924, "grad_norm": 2.285370111465454, "learning_rate": 0.0008700655659736377, "loss": 3.6061, "step": 15305 }, { "epoch": 1.040222856366354, "grad_norm": 2.330362558364868, "learning_rate": 0.0008700231009648049, "loss": 3.1957, "step": 15310 }, { "epoch": 1.0405625764370159, "grad_norm": 2.3586573600769043, "learning_rate": 0.0008699806359559724, "loss": 3.6524, "step": 15315 }, { "epoch": 1.0409022965076777, "grad_norm": 2.2692251205444336, "learning_rate": 0.0008699381709471396, "loss": 3.483, "step": 15320 }, { "epoch": 1.0412420165783394, "grad_norm": 1.856572151184082, "learning_rate": 0.0008698957059383068, "loss": 3.5101, "step": 15325 }, { "epoch": 1.0415817366490012, "grad_norm": 1.7623182535171509, "learning_rate": 0.0008698532409294741, "loss": 3.5615, "step": 15330 }, { "epoch": 1.041921456719663, "grad_norm": 1.9587610960006714, "learning_rate": 0.0008698107759206414, "loss": 3.6458, "step": 15335 }, { "epoch": 1.0422611767903247, "grad_norm": 1.733918309211731, "learning_rate": 0.0008697683109118086, "loss": 3.8405, "step": 15340 }, { "epoch": 1.0426008968609866, "grad_norm": 1.8518199920654297, "learning_rate": 0.000869725845902976, "loss": 3.5491, "step": 15345 }, { "epoch": 1.0429406169316484, "grad_norm": 1.7768478393554688, "learning_rate": 0.0008696833808941433, "loss": 3.3946, "step": 15350 }, { "epoch": 1.04328033700231, "grad_norm": 1.8478847742080688, "learning_rate": 0.0008696409158853105, "loss": 3.3243, "step": 15355 }, { "epoch": 1.043620057072972, "grad_norm": 1.6116282939910889, "learning_rate": 0.0008695984508764778, "loss": 3.4999, "step": 15360 }, { "epoch": 1.0439597771436337, "grad_norm": 2.1055612564086914, "learning_rate": 0.000869555985867645, "loss": 3.3399, "step": 15365 }, { "epoch": 1.0442994972142954, "grad_norm": 2.0469298362731934, "learning_rate": 0.0008695135208588123, "loss": 3.6861, "step": 15370 }, { "epoch": 1.0446392172849572, "grad_norm": 1.7754013538360596, "learning_rate": 0.0008694710558499796, "loss": 3.4696, "step": 15375 }, { "epoch": 1.0449789373556189, "grad_norm": 1.6844810247421265, "learning_rate": 0.000869428590841147, "loss": 3.3527, "step": 15380 }, { "epoch": 1.0453186574262807, "grad_norm": 2.448235273361206, "learning_rate": 0.0008693861258323143, "loss": 3.3906, "step": 15385 }, { "epoch": 1.0456583774969426, "grad_norm": 2.388754367828369, "learning_rate": 0.0008693436608234815, "loss": 3.4922, "step": 15390 }, { "epoch": 1.0459980975676042, "grad_norm": 1.6881991624832153, "learning_rate": 0.0008693011958146487, "loss": 3.2395, "step": 15395 }, { "epoch": 1.046337817638266, "grad_norm": 2.7861831188201904, "learning_rate": 0.0008692587308058161, "loss": 3.5017, "step": 15400 }, { "epoch": 1.046677537708928, "grad_norm": 2.820462226867676, "learning_rate": 0.0008692162657969833, "loss": 3.5062, "step": 15405 }, { "epoch": 1.0470172577795895, "grad_norm": 3.1144607067108154, "learning_rate": 0.0008691738007881505, "loss": 3.5086, "step": 15410 }, { "epoch": 1.0473569778502514, "grad_norm": 1.8766608238220215, "learning_rate": 0.000869131335779318, "loss": 3.784, "step": 15415 }, { "epoch": 1.0476966979209132, "grad_norm": 2.1938633918762207, "learning_rate": 0.0008690888707704852, "loss": 3.4125, "step": 15420 }, { "epoch": 1.0480364179915749, "grad_norm": 1.5662120580673218, "learning_rate": 0.0008690464057616524, "loss": 3.4674, "step": 15425 }, { "epoch": 1.0483761380622367, "grad_norm": 2.113255023956299, "learning_rate": 0.0008690039407528197, "loss": 3.3888, "step": 15430 }, { "epoch": 1.0487158581328986, "grad_norm": 1.7646269798278809, "learning_rate": 0.000868961475743987, "loss": 3.5054, "step": 15435 }, { "epoch": 1.0490555782035602, "grad_norm": 1.3706932067871094, "learning_rate": 0.0008689190107351542, "loss": 3.468, "step": 15440 }, { "epoch": 1.049395298274222, "grad_norm": 2.34065842628479, "learning_rate": 0.0008688765457263215, "loss": 3.4182, "step": 15445 }, { "epoch": 1.049735018344884, "grad_norm": 1.754632830619812, "learning_rate": 0.0008688340807174889, "loss": 3.7338, "step": 15450 }, { "epoch": 1.0500747384155455, "grad_norm": 1.9267313480377197, "learning_rate": 0.0008687916157086561, "loss": 3.5059, "step": 15455 }, { "epoch": 1.0504144584862074, "grad_norm": 2.5275115966796875, "learning_rate": 0.0008687491506998234, "loss": 3.4032, "step": 15460 }, { "epoch": 1.050754178556869, "grad_norm": 1.8873320817947388, "learning_rate": 0.0008687066856909907, "loss": 3.6823, "step": 15465 }, { "epoch": 1.0510938986275309, "grad_norm": 2.117192506790161, "learning_rate": 0.0008686642206821579, "loss": 3.6614, "step": 15470 }, { "epoch": 1.0514336186981927, "grad_norm": 1.7981576919555664, "learning_rate": 0.0008686217556733252, "loss": 3.4579, "step": 15475 }, { "epoch": 1.0517733387688544, "grad_norm": 1.9507906436920166, "learning_rate": 0.0008685792906644924, "loss": 3.5779, "step": 15480 }, { "epoch": 1.0521130588395162, "grad_norm": 1.9763658046722412, "learning_rate": 0.0008685368256556598, "loss": 3.6987, "step": 15485 }, { "epoch": 1.052452778910178, "grad_norm": 1.5064700841903687, "learning_rate": 0.0008684943606468271, "loss": 3.6609, "step": 15490 }, { "epoch": 1.0527924989808397, "grad_norm": 2.0286896228790283, "learning_rate": 0.0008684518956379943, "loss": 3.4093, "step": 15495 }, { "epoch": 1.0531322190515016, "grad_norm": 1.7230660915374756, "learning_rate": 0.0008684094306291616, "loss": 3.3685, "step": 15500 }, { "epoch": 1.0534719391221634, "grad_norm": 2.1386780738830566, "learning_rate": 0.0008683669656203289, "loss": 3.6168, "step": 15505 }, { "epoch": 1.053811659192825, "grad_norm": 1.6745182275772095, "learning_rate": 0.0008683245006114961, "loss": 3.7339, "step": 15510 }, { "epoch": 1.054151379263487, "grad_norm": 2.160470724105835, "learning_rate": 0.0008682820356026633, "loss": 3.4457, "step": 15515 }, { "epoch": 1.0544910993341488, "grad_norm": 2.0753681659698486, "learning_rate": 0.0008682395705938308, "loss": 3.7271, "step": 15520 }, { "epoch": 1.0548308194048104, "grad_norm": 2.0650453567504883, "learning_rate": 0.000868197105584998, "loss": 3.4877, "step": 15525 }, { "epoch": 1.0551705394754722, "grad_norm": 1.8525198698043823, "learning_rate": 0.0008681546405761652, "loss": 3.4489, "step": 15530 }, { "epoch": 1.055510259546134, "grad_norm": 2.317668914794922, "learning_rate": 0.0008681121755673326, "loss": 3.7556, "step": 15535 }, { "epoch": 1.0558499796167957, "grad_norm": 2.275883674621582, "learning_rate": 0.0008680697105584998, "loss": 3.5722, "step": 15540 }, { "epoch": 1.0561896996874576, "grad_norm": 1.4869105815887451, "learning_rate": 0.000868027245549667, "loss": 3.6769, "step": 15545 }, { "epoch": 1.0565294197581192, "grad_norm": 1.812619924545288, "learning_rate": 0.0008679847805408344, "loss": 3.5874, "step": 15550 }, { "epoch": 1.056869139828781, "grad_norm": 1.638025164604187, "learning_rate": 0.0008679423155320017, "loss": 3.3869, "step": 15555 }, { "epoch": 1.057208859899443, "grad_norm": 2.043907403945923, "learning_rate": 0.0008678998505231689, "loss": 3.2403, "step": 15560 }, { "epoch": 1.0575485799701045, "grad_norm": 2.0180890560150146, "learning_rate": 0.0008678573855143363, "loss": 3.6347, "step": 15565 }, { "epoch": 1.0578883000407664, "grad_norm": 2.434389591217041, "learning_rate": 0.0008678149205055035, "loss": 3.6758, "step": 15570 }, { "epoch": 1.0582280201114282, "grad_norm": 2.272862672805786, "learning_rate": 0.0008677724554966707, "loss": 3.401, "step": 15575 }, { "epoch": 1.0585677401820899, "grad_norm": 1.7870267629623413, "learning_rate": 0.000867729990487838, "loss": 3.6004, "step": 15580 }, { "epoch": 1.0589074602527517, "grad_norm": 1.9373098611831665, "learning_rate": 0.0008676875254790053, "loss": 3.5798, "step": 15585 }, { "epoch": 1.0592471803234136, "grad_norm": 1.6638004779815674, "learning_rate": 0.0008676450604701726, "loss": 3.7343, "step": 15590 }, { "epoch": 1.0595869003940752, "grad_norm": 1.9312357902526855, "learning_rate": 0.0008676025954613399, "loss": 3.6386, "step": 15595 }, { "epoch": 1.059926620464737, "grad_norm": 1.380387306213379, "learning_rate": 0.0008675601304525072, "loss": 3.5915, "step": 15600 }, { "epoch": 1.060266340535399, "grad_norm": 1.7869572639465332, "learning_rate": 0.0008675176654436744, "loss": 3.4426, "step": 15605 }, { "epoch": 1.0606060606060606, "grad_norm": 2.306274890899658, "learning_rate": 0.0008674752004348417, "loss": 3.6258, "step": 15610 }, { "epoch": 1.0609457806767224, "grad_norm": 1.7239903211593628, "learning_rate": 0.000867432735426009, "loss": 3.563, "step": 15615 }, { "epoch": 1.0612855007473843, "grad_norm": 1.7357815504074097, "learning_rate": 0.0008673902704171762, "loss": 3.8822, "step": 15620 }, { "epoch": 1.061625220818046, "grad_norm": 1.8367563486099243, "learning_rate": 0.0008673478054083436, "loss": 3.5722, "step": 15625 }, { "epoch": 1.0619649408887077, "grad_norm": 1.5500743389129639, "learning_rate": 0.0008673053403995108, "loss": 3.5139, "step": 15630 }, { "epoch": 1.0623046609593694, "grad_norm": 2.13507342338562, "learning_rate": 0.0008672628753906781, "loss": 3.7559, "step": 15635 }, { "epoch": 1.0626443810300312, "grad_norm": 2.4314653873443604, "learning_rate": 0.0008672204103818454, "loss": 3.4343, "step": 15640 }, { "epoch": 1.062984101100693, "grad_norm": 1.7294831275939941, "learning_rate": 0.0008671779453730126, "loss": 3.6032, "step": 15645 }, { "epoch": 1.0633238211713547, "grad_norm": 1.8644754886627197, "learning_rate": 0.0008671354803641799, "loss": 3.4586, "step": 15650 }, { "epoch": 1.0636635412420166, "grad_norm": 2.2781710624694824, "learning_rate": 0.0008670930153553472, "loss": 3.583, "step": 15655 }, { "epoch": 1.0640032613126784, "grad_norm": 2.3216145038604736, "learning_rate": 0.0008670505503465145, "loss": 3.498, "step": 15660 }, { "epoch": 1.06434298138334, "grad_norm": 1.7159181833267212, "learning_rate": 0.0008670080853376817, "loss": 3.4933, "step": 15665 }, { "epoch": 1.064682701454002, "grad_norm": 1.8575037717819214, "learning_rate": 0.0008669656203288491, "loss": 3.3869, "step": 15670 }, { "epoch": 1.0650224215246638, "grad_norm": 2.7465641498565674, "learning_rate": 0.0008669231553200163, "loss": 3.481, "step": 15675 }, { "epoch": 1.0653621415953254, "grad_norm": 2.0552265644073486, "learning_rate": 0.0008668806903111835, "loss": 3.7168, "step": 15680 }, { "epoch": 1.0657018616659872, "grad_norm": 2.3784584999084473, "learning_rate": 0.0008668382253023509, "loss": 3.4617, "step": 15685 }, { "epoch": 1.066041581736649, "grad_norm": 1.608978509902954, "learning_rate": 0.0008667957602935181, "loss": 3.6041, "step": 15690 }, { "epoch": 1.0663813018073107, "grad_norm": 2.046257972717285, "learning_rate": 0.0008667532952846854, "loss": 3.6065, "step": 15695 }, { "epoch": 1.0667210218779726, "grad_norm": 1.8170822858810425, "learning_rate": 0.0008667108302758528, "loss": 3.5651, "step": 15700 }, { "epoch": 1.0670607419486344, "grad_norm": 1.92103111743927, "learning_rate": 0.00086666836526702, "loss": 3.2111, "step": 15705 }, { "epoch": 1.067400462019296, "grad_norm": 1.7082716226577759, "learning_rate": 0.0008666259002581872, "loss": 3.4499, "step": 15710 }, { "epoch": 1.067740182089958, "grad_norm": 2.223968267440796, "learning_rate": 0.0008665834352493546, "loss": 3.5054, "step": 15715 }, { "epoch": 1.0680799021606195, "grad_norm": 1.8091564178466797, "learning_rate": 0.0008665409702405218, "loss": 3.5439, "step": 15720 }, { "epoch": 1.0684196222312814, "grad_norm": 2.0097153186798096, "learning_rate": 0.0008664985052316891, "loss": 3.5401, "step": 15725 }, { "epoch": 1.0687593423019432, "grad_norm": 2.0006895065307617, "learning_rate": 0.0008664560402228564, "loss": 3.4907, "step": 15730 }, { "epoch": 1.0690990623726049, "grad_norm": 1.745009422302246, "learning_rate": 0.0008664135752140237, "loss": 3.6534, "step": 15735 }, { "epoch": 1.0694387824432667, "grad_norm": 1.904919147491455, "learning_rate": 0.000866371110205191, "loss": 3.6485, "step": 15740 }, { "epoch": 1.0697785025139286, "grad_norm": 1.7624326944351196, "learning_rate": 0.0008663286451963582, "loss": 3.4267, "step": 15745 }, { "epoch": 1.0701182225845902, "grad_norm": 1.7379952669143677, "learning_rate": 0.0008662861801875255, "loss": 3.2119, "step": 15750 }, { "epoch": 1.070457942655252, "grad_norm": 2.1288902759552, "learning_rate": 0.0008662437151786928, "loss": 3.8678, "step": 15755 }, { "epoch": 1.070797662725914, "grad_norm": 2.175440549850464, "learning_rate": 0.00086620125016986, "loss": 3.5127, "step": 15760 }, { "epoch": 1.0711373827965756, "grad_norm": 1.7428317070007324, "learning_rate": 0.0008661587851610274, "loss": 3.6739, "step": 15765 }, { "epoch": 1.0714771028672374, "grad_norm": 1.763629674911499, "learning_rate": 0.0008661163201521947, "loss": 3.2853, "step": 15770 }, { "epoch": 1.0718168229378993, "grad_norm": 1.9584739208221436, "learning_rate": 0.0008660738551433619, "loss": 3.6369, "step": 15775 }, { "epoch": 1.072156543008561, "grad_norm": 1.967901349067688, "learning_rate": 0.0008660313901345291, "loss": 3.5886, "step": 15780 }, { "epoch": 1.0724962630792227, "grad_norm": 1.71491539478302, "learning_rate": 0.0008659889251256965, "loss": 3.3688, "step": 15785 }, { "epoch": 1.0728359831498846, "grad_norm": 1.8440542221069336, "learning_rate": 0.0008659464601168637, "loss": 3.5199, "step": 15790 }, { "epoch": 1.0731757032205462, "grad_norm": 2.0137457847595215, "learning_rate": 0.0008659039951080309, "loss": 3.7374, "step": 15795 }, { "epoch": 1.073515423291208, "grad_norm": 2.277247190475464, "learning_rate": 0.0008658615300991984, "loss": 3.5602, "step": 15800 }, { "epoch": 1.07385514336187, "grad_norm": 1.7136021852493286, "learning_rate": 0.0008658190650903656, "loss": 3.6886, "step": 15805 }, { "epoch": 1.0741948634325316, "grad_norm": 1.467106819152832, "learning_rate": 0.0008657766000815328, "loss": 3.4531, "step": 15810 }, { "epoch": 1.0745345835031934, "grad_norm": 1.9710341691970825, "learning_rate": 0.0008657341350727002, "loss": 3.7089, "step": 15815 }, { "epoch": 1.074874303573855, "grad_norm": 1.5835366249084473, "learning_rate": 0.0008656916700638674, "loss": 3.5385, "step": 15820 }, { "epoch": 1.075214023644517, "grad_norm": 1.9405211210250854, "learning_rate": 0.0008656492050550346, "loss": 3.6628, "step": 15825 }, { "epoch": 1.0755537437151788, "grad_norm": 1.7881320714950562, "learning_rate": 0.0008656067400462019, "loss": 3.5861, "step": 15830 }, { "epoch": 1.0758934637858404, "grad_norm": 1.8588788509368896, "learning_rate": 0.0008655642750373693, "loss": 3.2974, "step": 15835 }, { "epoch": 1.0762331838565022, "grad_norm": 1.8056217432022095, "learning_rate": 0.0008655218100285365, "loss": 3.5153, "step": 15840 }, { "epoch": 1.076572903927164, "grad_norm": 2.160036563873291, "learning_rate": 0.0008654793450197038, "loss": 3.1483, "step": 15845 }, { "epoch": 1.0769126239978257, "grad_norm": 1.7892941236495972, "learning_rate": 0.0008654368800108711, "loss": 3.2864, "step": 15850 }, { "epoch": 1.0772523440684876, "grad_norm": 1.6635992527008057, "learning_rate": 0.0008653944150020383, "loss": 3.5383, "step": 15855 }, { "epoch": 1.0775920641391494, "grad_norm": 2.067166805267334, "learning_rate": 0.0008653519499932056, "loss": 3.7895, "step": 15860 }, { "epoch": 1.077931784209811, "grad_norm": 2.0790491104125977, "learning_rate": 0.0008653094849843728, "loss": 3.3826, "step": 15865 }, { "epoch": 1.078271504280473, "grad_norm": 1.741557240486145, "learning_rate": 0.0008652670199755402, "loss": 3.5556, "step": 15870 }, { "epoch": 1.0786112243511348, "grad_norm": 1.8770644664764404, "learning_rate": 0.0008652245549667075, "loss": 3.4625, "step": 15875 }, { "epoch": 1.0789509444217964, "grad_norm": 2.2747609615325928, "learning_rate": 0.0008651820899578747, "loss": 3.4784, "step": 15880 }, { "epoch": 1.0792906644924583, "grad_norm": 1.7923911809921265, "learning_rate": 0.000865139624949042, "loss": 3.6742, "step": 15885 }, { "epoch": 1.0796303845631199, "grad_norm": 1.7864614725112915, "learning_rate": 0.0008650971599402093, "loss": 3.4187, "step": 15890 }, { "epoch": 1.0799701046337817, "grad_norm": 1.7740347385406494, "learning_rate": 0.0008650546949313765, "loss": 3.5687, "step": 15895 }, { "epoch": 1.0803098247044436, "grad_norm": 2.1028828620910645, "learning_rate": 0.0008650122299225438, "loss": 3.2268, "step": 15900 }, { "epoch": 1.0806495447751052, "grad_norm": 1.861507534980774, "learning_rate": 0.0008649697649137112, "loss": 3.6943, "step": 15905 }, { "epoch": 1.080989264845767, "grad_norm": 2.117854356765747, "learning_rate": 0.0008649272999048784, "loss": 3.8197, "step": 15910 }, { "epoch": 1.081328984916429, "grad_norm": 2.2001826763153076, "learning_rate": 0.0008648848348960456, "loss": 3.6691, "step": 15915 }, { "epoch": 1.0816687049870906, "grad_norm": 2.494863748550415, "learning_rate": 0.000864842369887213, "loss": 3.4976, "step": 15920 }, { "epoch": 1.0820084250577524, "grad_norm": 1.8104077577590942, "learning_rate": 0.0008647999048783802, "loss": 3.4746, "step": 15925 }, { "epoch": 1.0823481451284143, "grad_norm": 1.5699657201766968, "learning_rate": 0.0008647574398695474, "loss": 3.4559, "step": 15930 }, { "epoch": 1.082687865199076, "grad_norm": 2.015615701675415, "learning_rate": 0.0008647149748607149, "loss": 3.4388, "step": 15935 }, { "epoch": 1.0830275852697377, "grad_norm": 2.9350075721740723, "learning_rate": 0.0008646725098518821, "loss": 3.6616, "step": 15940 }, { "epoch": 1.0833673053403996, "grad_norm": 1.934022307395935, "learning_rate": 0.0008646300448430493, "loss": 3.5616, "step": 15945 }, { "epoch": 1.0837070254110612, "grad_norm": 1.8070998191833496, "learning_rate": 0.0008645875798342167, "loss": 3.4112, "step": 15950 }, { "epoch": 1.084046745481723, "grad_norm": 1.8542884588241577, "learning_rate": 0.0008645451148253839, "loss": 3.6901, "step": 15955 }, { "epoch": 1.084386465552385, "grad_norm": 1.8621578216552734, "learning_rate": 0.0008645026498165511, "loss": 3.4687, "step": 15960 }, { "epoch": 1.0847261856230466, "grad_norm": 1.9774243831634521, "learning_rate": 0.0008644601848077184, "loss": 3.6429, "step": 15965 }, { "epoch": 1.0850659056937084, "grad_norm": 1.5711113214492798, "learning_rate": 0.0008644177197988858, "loss": 3.459, "step": 15970 }, { "epoch": 1.0854056257643703, "grad_norm": 1.7809581756591797, "learning_rate": 0.000864375254790053, "loss": 3.7065, "step": 15975 }, { "epoch": 1.085745345835032, "grad_norm": 1.7707101106643677, "learning_rate": 0.0008643327897812203, "loss": 3.7112, "step": 15980 }, { "epoch": 1.0860850659056938, "grad_norm": 1.919883370399475, "learning_rate": 0.0008642903247723876, "loss": 3.5879, "step": 15985 }, { "epoch": 1.0864247859763554, "grad_norm": 1.667800784111023, "learning_rate": 0.0008642478597635548, "loss": 3.4974, "step": 15990 }, { "epoch": 1.0867645060470172, "grad_norm": 1.8029980659484863, "learning_rate": 0.0008642053947547221, "loss": 3.6201, "step": 15995 }, { "epoch": 1.087104226117679, "grad_norm": 2.1279075145721436, "learning_rate": 0.0008641629297458894, "loss": 3.6694, "step": 16000 }, { "epoch": 1.0874439461883407, "grad_norm": 1.7459523677825928, "learning_rate": 0.0008641204647370567, "loss": 3.5982, "step": 16005 }, { "epoch": 1.0877836662590026, "grad_norm": 2.182878255844116, "learning_rate": 0.000864077999728224, "loss": 3.8387, "step": 16010 }, { "epoch": 1.0881233863296644, "grad_norm": 1.4032326936721802, "learning_rate": 0.0008640355347193912, "loss": 3.5167, "step": 16015 }, { "epoch": 1.088463106400326, "grad_norm": 1.706626057624817, "learning_rate": 0.0008639930697105585, "loss": 3.4945, "step": 16020 }, { "epoch": 1.088802826470988, "grad_norm": 1.6049808263778687, "learning_rate": 0.0008639506047017258, "loss": 3.4756, "step": 16025 }, { "epoch": 1.0891425465416498, "grad_norm": 1.8091461658477783, "learning_rate": 0.000863908139692893, "loss": 3.4845, "step": 16030 }, { "epoch": 1.0894822666123114, "grad_norm": 1.8610219955444336, "learning_rate": 0.0008638656746840603, "loss": 3.4218, "step": 16035 }, { "epoch": 1.0898219866829733, "grad_norm": 1.890720248222351, "learning_rate": 0.0008638232096752277, "loss": 3.3817, "step": 16040 }, { "epoch": 1.090161706753635, "grad_norm": 2.398329257965088, "learning_rate": 0.0008637807446663949, "loss": 3.3717, "step": 16045 }, { "epoch": 1.0905014268242967, "grad_norm": 1.407028317451477, "learning_rate": 0.0008637382796575622, "loss": 3.7889, "step": 16050 }, { "epoch": 1.0908411468949586, "grad_norm": 1.6028591394424438, "learning_rate": 0.0008636958146487295, "loss": 3.5536, "step": 16055 }, { "epoch": 1.0911808669656202, "grad_norm": 1.9573651552200317, "learning_rate": 0.0008636533496398967, "loss": 3.3643, "step": 16060 }, { "epoch": 1.091520587036282, "grad_norm": 2.006328582763672, "learning_rate": 0.000863610884631064, "loss": 3.5902, "step": 16065 }, { "epoch": 1.091860307106944, "grad_norm": 2.3047945499420166, "learning_rate": 0.0008635684196222313, "loss": 3.5127, "step": 16070 }, { "epoch": 1.0922000271776056, "grad_norm": 1.7655564546585083, "learning_rate": 0.0008635259546133986, "loss": 3.6031, "step": 16075 }, { "epoch": 1.0925397472482674, "grad_norm": 1.83333420753479, "learning_rate": 0.0008634834896045659, "loss": 3.6063, "step": 16080 }, { "epoch": 1.0928794673189293, "grad_norm": 2.1019504070281982, "learning_rate": 0.0008634410245957332, "loss": 3.5111, "step": 16085 }, { "epoch": 1.093219187389591, "grad_norm": 2.019373893737793, "learning_rate": 0.0008633985595869004, "loss": 3.3201, "step": 16090 }, { "epoch": 1.0935589074602527, "grad_norm": 2.4055802822113037, "learning_rate": 0.0008633560945780677, "loss": 3.7316, "step": 16095 }, { "epoch": 1.0938986275309146, "grad_norm": 1.946518063545227, "learning_rate": 0.000863313629569235, "loss": 3.7084, "step": 16100 }, { "epoch": 1.0942383476015762, "grad_norm": 1.7929294109344482, "learning_rate": 0.0008632711645604022, "loss": 3.4753, "step": 16105 }, { "epoch": 1.094578067672238, "grad_norm": 2.029458999633789, "learning_rate": 0.0008632286995515696, "loss": 3.5441, "step": 16110 }, { "epoch": 1.0949177877429, "grad_norm": 1.920196294784546, "learning_rate": 0.0008631862345427368, "loss": 3.6169, "step": 16115 }, { "epoch": 1.0952575078135616, "grad_norm": 1.522337555885315, "learning_rate": 0.0008631437695339041, "loss": 3.5872, "step": 16120 }, { "epoch": 1.0955972278842234, "grad_norm": 2.365062952041626, "learning_rate": 0.0008631013045250714, "loss": 3.4801, "step": 16125 }, { "epoch": 1.0959369479548853, "grad_norm": 1.6821391582489014, "learning_rate": 0.0008630588395162386, "loss": 3.7328, "step": 16130 }, { "epoch": 1.096276668025547, "grad_norm": 1.9787356853485107, "learning_rate": 0.0008630163745074059, "loss": 3.3259, "step": 16135 }, { "epoch": 1.0966163880962088, "grad_norm": 1.534199833869934, "learning_rate": 0.0008629739094985732, "loss": 3.671, "step": 16140 }, { "epoch": 1.0969561081668706, "grad_norm": 1.65921151638031, "learning_rate": 0.0008629314444897405, "loss": 3.5059, "step": 16145 }, { "epoch": 1.0972958282375322, "grad_norm": 1.5793408155441284, "learning_rate": 0.0008628889794809078, "loss": 3.5717, "step": 16150 }, { "epoch": 1.097635548308194, "grad_norm": 1.7471755743026733, "learning_rate": 0.0008628465144720751, "loss": 3.541, "step": 16155 }, { "epoch": 1.0979752683788557, "grad_norm": 1.6465954780578613, "learning_rate": 0.0008628040494632423, "loss": 3.4508, "step": 16160 }, { "epoch": 1.0983149884495176, "grad_norm": 2.2017982006073, "learning_rate": 0.0008627615844544095, "loss": 3.4426, "step": 16165 }, { "epoch": 1.0986547085201794, "grad_norm": 2.273348808288574, "learning_rate": 0.0008627191194455769, "loss": 3.5287, "step": 16170 }, { "epoch": 1.098994428590841, "grad_norm": 2.4217026233673096, "learning_rate": 0.0008626766544367441, "loss": 3.6097, "step": 16175 }, { "epoch": 1.099334148661503, "grad_norm": 2.0745649337768555, "learning_rate": 0.0008626341894279114, "loss": 3.5741, "step": 16180 }, { "epoch": 1.0996738687321648, "grad_norm": 1.5751808881759644, "learning_rate": 0.0008625917244190788, "loss": 3.6439, "step": 16185 }, { "epoch": 1.1000135888028264, "grad_norm": 1.9278700351715088, "learning_rate": 0.000862549259410246, "loss": 3.2444, "step": 16190 }, { "epoch": 1.1003533088734883, "grad_norm": 1.9483592510223389, "learning_rate": 0.0008625067944014132, "loss": 3.6234, "step": 16195 }, { "epoch": 1.10069302894415, "grad_norm": 1.7531538009643555, "learning_rate": 0.0008624643293925806, "loss": 3.505, "step": 16200 }, { "epoch": 1.1010327490148117, "grad_norm": 1.5275884866714478, "learning_rate": 0.0008624218643837478, "loss": 3.5655, "step": 16205 }, { "epoch": 1.1013724690854736, "grad_norm": 1.6072789430618286, "learning_rate": 0.000862379399374915, "loss": 3.3561, "step": 16210 }, { "epoch": 1.1017121891561354, "grad_norm": 1.82156240940094, "learning_rate": 0.0008623369343660824, "loss": 3.3176, "step": 16215 }, { "epoch": 1.102051909226797, "grad_norm": 1.647696614265442, "learning_rate": 0.0008622944693572497, "loss": 3.549, "step": 16220 }, { "epoch": 1.102391629297459, "grad_norm": 1.906937837600708, "learning_rate": 0.0008622520043484169, "loss": 3.7132, "step": 16225 }, { "epoch": 1.1027313493681206, "grad_norm": 1.8517951965332031, "learning_rate": 0.0008622095393395842, "loss": 3.6024, "step": 16230 }, { "epoch": 1.1030710694387824, "grad_norm": 2.0773751735687256, "learning_rate": 0.0008621670743307515, "loss": 3.6617, "step": 16235 }, { "epoch": 1.1034107895094443, "grad_norm": 1.5703932046890259, "learning_rate": 0.0008621246093219187, "loss": 3.6948, "step": 16240 }, { "epoch": 1.103750509580106, "grad_norm": 1.7424570322036743, "learning_rate": 0.000862082144313086, "loss": 3.6292, "step": 16245 }, { "epoch": 1.1040902296507678, "grad_norm": 1.889528512954712, "learning_rate": 0.0008620396793042534, "loss": 3.5821, "step": 16250 }, { "epoch": 1.1044299497214296, "grad_norm": 2.043821096420288, "learning_rate": 0.0008619972142954206, "loss": 3.5188, "step": 16255 }, { "epoch": 1.1047696697920912, "grad_norm": 1.8238500356674194, "learning_rate": 0.0008619547492865879, "loss": 3.3813, "step": 16260 }, { "epoch": 1.105109389862753, "grad_norm": 2.1774392127990723, "learning_rate": 0.0008619122842777551, "loss": 3.4, "step": 16265 }, { "epoch": 1.105449109933415, "grad_norm": 2.460782766342163, "learning_rate": 0.0008618698192689224, "loss": 3.3719, "step": 16270 }, { "epoch": 1.1057888300040766, "grad_norm": 1.8477303981781006, "learning_rate": 0.0008618273542600897, "loss": 3.6966, "step": 16275 }, { "epoch": 1.1061285500747384, "grad_norm": 1.971618413925171, "learning_rate": 0.0008617848892512569, "loss": 3.4834, "step": 16280 }, { "epoch": 1.1064682701454003, "grad_norm": 1.8015130758285522, "learning_rate": 0.0008617424242424243, "loss": 3.5115, "step": 16285 }, { "epoch": 1.106807990216062, "grad_norm": 2.083879232406616, "learning_rate": 0.0008616999592335916, "loss": 3.5536, "step": 16290 }, { "epoch": 1.1071477102867238, "grad_norm": 2.1391918659210205, "learning_rate": 0.0008616574942247588, "loss": 3.3132, "step": 16295 }, { "epoch": 1.1074874303573856, "grad_norm": 2.969728469848633, "learning_rate": 0.000861615029215926, "loss": 3.2407, "step": 16300 }, { "epoch": 1.1078271504280472, "grad_norm": 1.5937721729278564, "learning_rate": 0.0008615725642070934, "loss": 3.4266, "step": 16305 }, { "epoch": 1.108166870498709, "grad_norm": 1.7710217237472534, "learning_rate": 0.0008615300991982606, "loss": 3.7019, "step": 16310 }, { "epoch": 1.108506590569371, "grad_norm": 1.844078540802002, "learning_rate": 0.0008614876341894278, "loss": 3.3571, "step": 16315 }, { "epoch": 1.1088463106400326, "grad_norm": 2.1357226371765137, "learning_rate": 0.0008614451691805953, "loss": 3.7013, "step": 16320 }, { "epoch": 1.1091860307106944, "grad_norm": 1.5517311096191406, "learning_rate": 0.0008614027041717625, "loss": 3.5627, "step": 16325 }, { "epoch": 1.109525750781356, "grad_norm": 2.3539044857025146, "learning_rate": 0.0008613602391629297, "loss": 3.4592, "step": 16330 }, { "epoch": 1.109865470852018, "grad_norm": 1.6040375232696533, "learning_rate": 0.0008613177741540971, "loss": 3.6735, "step": 16335 }, { "epoch": 1.1102051909226798, "grad_norm": 1.8127402067184448, "learning_rate": 0.0008612753091452643, "loss": 3.3963, "step": 16340 }, { "epoch": 1.1105449109933414, "grad_norm": 1.7555068731307983, "learning_rate": 0.0008612328441364315, "loss": 3.7292, "step": 16345 }, { "epoch": 1.1108846310640033, "grad_norm": 1.573702096939087, "learning_rate": 0.0008611903791275988, "loss": 3.5063, "step": 16350 }, { "epoch": 1.1112243511346651, "grad_norm": 2.254734516143799, "learning_rate": 0.0008611479141187662, "loss": 3.5944, "step": 16355 }, { "epoch": 1.1115640712053267, "grad_norm": 1.7881487607955933, "learning_rate": 0.0008611054491099334, "loss": 3.2758, "step": 16360 }, { "epoch": 1.1119037912759886, "grad_norm": 1.4455369710922241, "learning_rate": 0.0008610629841011007, "loss": 3.4881, "step": 16365 }, { "epoch": 1.1122435113466504, "grad_norm": 1.7377865314483643, "learning_rate": 0.000861020519092268, "loss": 3.4727, "step": 16370 }, { "epoch": 1.112583231417312, "grad_norm": 1.7004138231277466, "learning_rate": 0.0008609780540834352, "loss": 3.2695, "step": 16375 }, { "epoch": 1.112922951487974, "grad_norm": 1.9803085327148438, "learning_rate": 0.0008609355890746025, "loss": 3.6944, "step": 16380 }, { "epoch": 1.1132626715586358, "grad_norm": 1.7921953201293945, "learning_rate": 0.0008608931240657698, "loss": 3.7254, "step": 16385 }, { "epoch": 1.1136023916292974, "grad_norm": 1.766756296157837, "learning_rate": 0.0008608506590569371, "loss": 3.8349, "step": 16390 }, { "epoch": 1.1139421116999593, "grad_norm": 1.6824524402618408, "learning_rate": 0.0008608081940481044, "loss": 3.7469, "step": 16395 }, { "epoch": 1.114281831770621, "grad_norm": 1.8381779193878174, "learning_rate": 0.0008607657290392716, "loss": 3.4954, "step": 16400 }, { "epoch": 1.1146215518412828, "grad_norm": 2.066818952560425, "learning_rate": 0.000860723264030439, "loss": 3.4143, "step": 16405 }, { "epoch": 1.1149612719119446, "grad_norm": 1.8614917993545532, "learning_rate": 0.0008606807990216062, "loss": 3.6506, "step": 16410 }, { "epoch": 1.1153009919826062, "grad_norm": 2.2002761363983154, "learning_rate": 0.0008606383340127734, "loss": 3.5762, "step": 16415 }, { "epoch": 1.115640712053268, "grad_norm": 1.9427475929260254, "learning_rate": 0.0008605958690039408, "loss": 3.5982, "step": 16420 }, { "epoch": 1.11598043212393, "grad_norm": 1.7392160892486572, "learning_rate": 0.0008605534039951081, "loss": 3.4208, "step": 16425 }, { "epoch": 1.1163201521945916, "grad_norm": 2.233673572540283, "learning_rate": 0.0008605109389862753, "loss": 3.6686, "step": 16430 }, { "epoch": 1.1166598722652534, "grad_norm": 2.531463861465454, "learning_rate": 0.0008604684739774427, "loss": 3.5697, "step": 16435 }, { "epoch": 1.1169995923359153, "grad_norm": 2.123476028442383, "learning_rate": 0.0008604260089686099, "loss": 3.5132, "step": 16440 }, { "epoch": 1.117339312406577, "grad_norm": 2.2227587699890137, "learning_rate": 0.0008603835439597771, "loss": 3.5152, "step": 16445 }, { "epoch": 1.1176790324772388, "grad_norm": 1.980911374092102, "learning_rate": 0.0008603410789509445, "loss": 3.8571, "step": 16450 }, { "epoch": 1.1180187525479006, "grad_norm": 1.796059250831604, "learning_rate": 0.0008602986139421117, "loss": 3.6068, "step": 16455 }, { "epoch": 1.1183584726185622, "grad_norm": 2.6120548248291016, "learning_rate": 0.000860256148933279, "loss": 3.6141, "step": 16460 }, { "epoch": 1.118698192689224, "grad_norm": 2.0036826133728027, "learning_rate": 0.0008602136839244463, "loss": 3.8053, "step": 16465 }, { "epoch": 1.119037912759886, "grad_norm": 2.4349749088287354, "learning_rate": 0.0008601712189156136, "loss": 3.3608, "step": 16470 }, { "epoch": 1.1193776328305476, "grad_norm": 1.3883602619171143, "learning_rate": 0.0008601287539067808, "loss": 3.5764, "step": 16475 }, { "epoch": 1.1197173529012094, "grad_norm": 1.754997730255127, "learning_rate": 0.0008600862888979481, "loss": 3.5994, "step": 16480 }, { "epoch": 1.1200570729718713, "grad_norm": 1.7742875814437866, "learning_rate": 0.0008600438238891154, "loss": 3.5009, "step": 16485 }, { "epoch": 1.120396793042533, "grad_norm": 2.0200395584106445, "learning_rate": 0.0008600013588802826, "loss": 3.5628, "step": 16490 }, { "epoch": 1.1207365131131948, "grad_norm": 1.5311100482940674, "learning_rate": 0.00085995889387145, "loss": 3.4869, "step": 16495 }, { "epoch": 1.1210762331838564, "grad_norm": 1.9200016260147095, "learning_rate": 0.0008599164288626173, "loss": 3.6441, "step": 16500 }, { "epoch": 1.1214159532545183, "grad_norm": 1.6596509218215942, "learning_rate": 0.0008598739638537845, "loss": 3.5293, "step": 16505 }, { "epoch": 1.1217556733251801, "grad_norm": 1.7607593536376953, "learning_rate": 0.0008598314988449518, "loss": 3.4701, "step": 16510 }, { "epoch": 1.1220953933958417, "grad_norm": 1.684966802597046, "learning_rate": 0.000859789033836119, "loss": 3.5025, "step": 16515 }, { "epoch": 1.1224351134665036, "grad_norm": 2.053370237350464, "learning_rate": 0.0008597465688272863, "loss": 3.6476, "step": 16520 }, { "epoch": 1.1227748335371655, "grad_norm": 1.7267569303512573, "learning_rate": 0.0008597041038184537, "loss": 3.4376, "step": 16525 }, { "epoch": 1.123114553607827, "grad_norm": 1.7427246570587158, "learning_rate": 0.0008596616388096209, "loss": 3.5715, "step": 16530 }, { "epoch": 1.123454273678489, "grad_norm": 1.848954439163208, "learning_rate": 0.0008596191738007882, "loss": 3.432, "step": 16535 }, { "epoch": 1.1237939937491508, "grad_norm": 1.8493072986602783, "learning_rate": 0.0008595767087919555, "loss": 3.7233, "step": 16540 }, { "epoch": 1.1241337138198124, "grad_norm": 1.8013818264007568, "learning_rate": 0.0008595342437831227, "loss": 3.5875, "step": 16545 }, { "epoch": 1.1244734338904743, "grad_norm": 2.024517774581909, "learning_rate": 0.0008594917787742899, "loss": 3.5176, "step": 16550 }, { "epoch": 1.1248131539611361, "grad_norm": 2.019306182861328, "learning_rate": 0.0008594493137654573, "loss": 3.5047, "step": 16555 }, { "epoch": 1.1251528740317978, "grad_norm": 2.0104217529296875, "learning_rate": 0.0008594068487566246, "loss": 3.5901, "step": 16560 }, { "epoch": 1.1254925941024596, "grad_norm": 2.0284318923950195, "learning_rate": 0.0008593643837477918, "loss": 3.6236, "step": 16565 }, { "epoch": 1.1258323141731212, "grad_norm": 2.115154504776001, "learning_rate": 0.0008593219187389592, "loss": 3.5298, "step": 16570 }, { "epoch": 1.126172034243783, "grad_norm": 2.1600797176361084, "learning_rate": 0.0008592794537301264, "loss": 3.6673, "step": 16575 }, { "epoch": 1.126511754314445, "grad_norm": 1.849574327468872, "learning_rate": 0.0008592369887212936, "loss": 3.6788, "step": 16580 }, { "epoch": 1.1268514743851066, "grad_norm": 1.3924534320831299, "learning_rate": 0.000859194523712461, "loss": 3.6197, "step": 16585 }, { "epoch": 1.1271911944557684, "grad_norm": 2.5842738151550293, "learning_rate": 0.0008591520587036282, "loss": 3.5302, "step": 16590 }, { "epoch": 1.1275309145264303, "grad_norm": 1.9251682758331299, "learning_rate": 0.0008591095936947955, "loss": 3.826, "step": 16595 }, { "epoch": 1.127870634597092, "grad_norm": 2.361985206604004, "learning_rate": 0.0008590671286859629, "loss": 3.5599, "step": 16600 }, { "epoch": 1.1282103546677538, "grad_norm": 1.937642216682434, "learning_rate": 0.0008590246636771301, "loss": 3.5323, "step": 16605 }, { "epoch": 1.1285500747384156, "grad_norm": 1.9583410024642944, "learning_rate": 0.0008589821986682973, "loss": 3.6721, "step": 16610 }, { "epoch": 1.1288897948090773, "grad_norm": 2.016066312789917, "learning_rate": 0.0008589397336594646, "loss": 3.6174, "step": 16615 }, { "epoch": 1.129229514879739, "grad_norm": 1.9423311948776245, "learning_rate": 0.0008588972686506319, "loss": 3.3941, "step": 16620 }, { "epoch": 1.129569234950401, "grad_norm": 2.4609386920928955, "learning_rate": 0.0008588548036417991, "loss": 3.5248, "step": 16625 }, { "epoch": 1.1299089550210626, "grad_norm": 2.435246229171753, "learning_rate": 0.0008588123386329665, "loss": 3.3765, "step": 16630 }, { "epoch": 1.1302486750917244, "grad_norm": 1.7279891967773438, "learning_rate": 0.0008587698736241338, "loss": 3.5752, "step": 16635 }, { "epoch": 1.1305883951623863, "grad_norm": 2.028611660003662, "learning_rate": 0.000858727408615301, "loss": 3.6868, "step": 16640 }, { "epoch": 1.130928115233048, "grad_norm": 1.996147632598877, "learning_rate": 0.0008586849436064683, "loss": 3.5686, "step": 16645 }, { "epoch": 1.1312678353037098, "grad_norm": 2.0746898651123047, "learning_rate": 0.0008586424785976355, "loss": 3.8447, "step": 16650 }, { "epoch": 1.1316075553743716, "grad_norm": 1.8856121301651, "learning_rate": 0.0008586000135888028, "loss": 3.3046, "step": 16655 }, { "epoch": 1.1319472754450333, "grad_norm": 1.8720864057540894, "learning_rate": 0.0008585575485799701, "loss": 3.5578, "step": 16660 }, { "epoch": 1.1322869955156951, "grad_norm": 1.806776762008667, "learning_rate": 0.0008585150835711374, "loss": 3.4126, "step": 16665 }, { "epoch": 1.1326267155863567, "grad_norm": 2.013883590698242, "learning_rate": 0.0008584726185623047, "loss": 3.6189, "step": 16670 }, { "epoch": 1.1329664356570186, "grad_norm": 1.8568984270095825, "learning_rate": 0.000858430153553472, "loss": 3.5407, "step": 16675 }, { "epoch": 1.1333061557276805, "grad_norm": 2.266979694366455, "learning_rate": 0.0008583876885446392, "loss": 3.5805, "step": 16680 }, { "epoch": 1.133645875798342, "grad_norm": 1.6309233903884888, "learning_rate": 0.0008583452235358065, "loss": 3.5912, "step": 16685 }, { "epoch": 1.133985595869004, "grad_norm": 2.4077610969543457, "learning_rate": 0.0008583027585269738, "loss": 3.6305, "step": 16690 }, { "epoch": 1.1343253159396658, "grad_norm": 1.788483738899231, "learning_rate": 0.000858260293518141, "loss": 3.5583, "step": 16695 }, { "epoch": 1.1346650360103274, "grad_norm": 1.9598952531814575, "learning_rate": 0.0008582178285093083, "loss": 3.4249, "step": 16700 }, { "epoch": 1.1350047560809893, "grad_norm": 1.682187557220459, "learning_rate": 0.0008581753635004757, "loss": 3.6547, "step": 16705 }, { "epoch": 1.1353444761516511, "grad_norm": 2.04107666015625, "learning_rate": 0.0008581328984916429, "loss": 3.6069, "step": 16710 }, { "epoch": 1.1356841962223128, "grad_norm": 1.7568902969360352, "learning_rate": 0.0008580904334828101, "loss": 3.5946, "step": 16715 }, { "epoch": 1.1360239162929746, "grad_norm": 1.8484389781951904, "learning_rate": 0.0008580479684739775, "loss": 3.7556, "step": 16720 }, { "epoch": 1.1363636363636362, "grad_norm": 1.993487000465393, "learning_rate": 0.0008580055034651447, "loss": 3.6427, "step": 16725 }, { "epoch": 1.136703356434298, "grad_norm": 1.81662917137146, "learning_rate": 0.0008579630384563119, "loss": 3.664, "step": 16730 }, { "epoch": 1.13704307650496, "grad_norm": 1.9655038118362427, "learning_rate": 0.0008579205734474794, "loss": 3.6404, "step": 16735 }, { "epoch": 1.1373827965756216, "grad_norm": 2.3199784755706787, "learning_rate": 0.0008578781084386466, "loss": 3.5111, "step": 16740 }, { "epoch": 1.1377225166462834, "grad_norm": 1.9788190126419067, "learning_rate": 0.0008578356434298139, "loss": 3.5031, "step": 16745 }, { "epoch": 1.1380622367169453, "grad_norm": 2.1566262245178223, "learning_rate": 0.0008577931784209811, "loss": 3.5871, "step": 16750 }, { "epoch": 1.138401956787607, "grad_norm": 2.4860706329345703, "learning_rate": 0.0008577507134121484, "loss": 3.7566, "step": 16755 }, { "epoch": 1.1387416768582688, "grad_norm": 2.183373212814331, "learning_rate": 0.0008577082484033157, "loss": 3.5191, "step": 16760 }, { "epoch": 1.1390813969289306, "grad_norm": 1.6791890859603882, "learning_rate": 0.0008576657833944829, "loss": 3.7298, "step": 16765 }, { "epoch": 1.1394211169995923, "grad_norm": 2.0944371223449707, "learning_rate": 0.0008576233183856503, "loss": 3.5699, "step": 16770 }, { "epoch": 1.139760837070254, "grad_norm": 2.0099680423736572, "learning_rate": 0.0008575808533768176, "loss": 3.599, "step": 16775 }, { "epoch": 1.140100557140916, "grad_norm": 1.7928192615509033, "learning_rate": 0.0008575383883679848, "loss": 3.541, "step": 16780 }, { "epoch": 1.1404402772115776, "grad_norm": 1.9941149950027466, "learning_rate": 0.000857495923359152, "loss": 3.5636, "step": 16785 }, { "epoch": 1.1407799972822394, "grad_norm": 1.815636396408081, "learning_rate": 0.0008574534583503194, "loss": 3.782, "step": 16790 }, { "epoch": 1.1411197173529013, "grad_norm": 2.3302810192108154, "learning_rate": 0.0008574109933414866, "loss": 3.3109, "step": 16795 }, { "epoch": 1.141459437423563, "grad_norm": 2.0622191429138184, "learning_rate": 0.0008573685283326538, "loss": 3.5935, "step": 16800 }, { "epoch": 1.1417991574942248, "grad_norm": 2.1324551105499268, "learning_rate": 0.0008573260633238213, "loss": 3.595, "step": 16805 }, { "epoch": 1.1421388775648866, "grad_norm": 2.0254037380218506, "learning_rate": 0.0008572835983149885, "loss": 3.638, "step": 16810 }, { "epoch": 1.1424785976355483, "grad_norm": 2.6176977157592773, "learning_rate": 0.0008572411333061557, "loss": 3.5577, "step": 16815 }, { "epoch": 1.1428183177062101, "grad_norm": 1.9444864988327026, "learning_rate": 0.0008571986682973231, "loss": 3.4313, "step": 16820 }, { "epoch": 1.143158037776872, "grad_norm": 1.956196904182434, "learning_rate": 0.0008571562032884903, "loss": 3.4931, "step": 16825 }, { "epoch": 1.1434977578475336, "grad_norm": 1.718850016593933, "learning_rate": 0.0008571137382796575, "loss": 3.5417, "step": 16830 }, { "epoch": 1.1438374779181955, "grad_norm": 1.8330146074295044, "learning_rate": 0.0008570712732708249, "loss": 3.713, "step": 16835 }, { "epoch": 1.144177197988857, "grad_norm": 2.186615467071533, "learning_rate": 0.0008570288082619922, "loss": 3.6537, "step": 16840 }, { "epoch": 1.144516918059519, "grad_norm": 1.5709896087646484, "learning_rate": 0.0008569863432531594, "loss": 3.5099, "step": 16845 }, { "epoch": 1.1448566381301808, "grad_norm": 2.059382677078247, "learning_rate": 0.0008569438782443267, "loss": 3.6493, "step": 16850 }, { "epoch": 1.1451963582008424, "grad_norm": 1.9322395324707031, "learning_rate": 0.000856901413235494, "loss": 3.4363, "step": 16855 }, { "epoch": 1.1455360782715043, "grad_norm": 1.438854455947876, "learning_rate": 0.0008568589482266612, "loss": 3.5962, "step": 16860 }, { "epoch": 1.1458757983421661, "grad_norm": 2.0887491703033447, "learning_rate": 0.0008568164832178285, "loss": 3.4332, "step": 16865 }, { "epoch": 1.1462155184128278, "grad_norm": 1.9976816177368164, "learning_rate": 0.0008567740182089958, "loss": 3.5658, "step": 16870 }, { "epoch": 1.1465552384834896, "grad_norm": 2.0575666427612305, "learning_rate": 0.0008567315532001631, "loss": 3.7438, "step": 16875 }, { "epoch": 1.1468949585541515, "grad_norm": 1.8802634477615356, "learning_rate": 0.0008566890881913304, "loss": 3.4609, "step": 16880 }, { "epoch": 1.147234678624813, "grad_norm": 1.9002468585968018, "learning_rate": 0.0008566466231824977, "loss": 3.605, "step": 16885 }, { "epoch": 1.147574398695475, "grad_norm": 2.032700300216675, "learning_rate": 0.0008566041581736649, "loss": 3.5409, "step": 16890 }, { "epoch": 1.1479141187661366, "grad_norm": 2.108625888824463, "learning_rate": 0.0008565616931648322, "loss": 3.6069, "step": 16895 }, { "epoch": 1.1482538388367984, "grad_norm": 1.775633692741394, "learning_rate": 0.0008565192281559994, "loss": 3.4433, "step": 16900 }, { "epoch": 1.1485935589074603, "grad_norm": 1.6514999866485596, "learning_rate": 0.0008564767631471667, "loss": 3.7155, "step": 16905 }, { "epoch": 1.148933278978122, "grad_norm": 1.9495784044265747, "learning_rate": 0.0008564342981383341, "loss": 3.4462, "step": 16910 }, { "epoch": 1.1492729990487838, "grad_norm": 1.8487318754196167, "learning_rate": 0.0008563918331295013, "loss": 3.4084, "step": 16915 }, { "epoch": 1.1496127191194456, "grad_norm": 2.193416118621826, "learning_rate": 0.0008563493681206686, "loss": 3.4127, "step": 16920 }, { "epoch": 1.1499524391901073, "grad_norm": 1.9636510610580444, "learning_rate": 0.0008563069031118359, "loss": 3.5077, "step": 16925 }, { "epoch": 1.150292159260769, "grad_norm": 2.1804914474487305, "learning_rate": 0.0008562644381030031, "loss": 3.6549, "step": 16930 }, { "epoch": 1.150631879331431, "grad_norm": 2.2871828079223633, "learning_rate": 0.0008562219730941703, "loss": 3.48, "step": 16935 }, { "epoch": 1.1509715994020926, "grad_norm": 1.5739065408706665, "learning_rate": 0.0008561795080853377, "loss": 3.7291, "step": 16940 }, { "epoch": 1.1513113194727544, "grad_norm": 1.7206246852874756, "learning_rate": 0.000856137043076505, "loss": 3.4456, "step": 16945 }, { "epoch": 1.1516510395434163, "grad_norm": 1.763586401939392, "learning_rate": 0.0008560945780676722, "loss": 3.5324, "step": 16950 }, { "epoch": 1.151990759614078, "grad_norm": 1.6490496397018433, "learning_rate": 0.0008560521130588396, "loss": 3.2657, "step": 16955 }, { "epoch": 1.1523304796847398, "grad_norm": 2.6761202812194824, "learning_rate": 0.0008560096480500068, "loss": 3.3978, "step": 16960 }, { "epoch": 1.1526701997554016, "grad_norm": 1.6831036806106567, "learning_rate": 0.000855967183041174, "loss": 3.4579, "step": 16965 }, { "epoch": 1.1530099198260633, "grad_norm": 2.3157753944396973, "learning_rate": 0.0008559247180323414, "loss": 3.5408, "step": 16970 }, { "epoch": 1.1533496398967251, "grad_norm": 2.013523578643799, "learning_rate": 0.0008558822530235086, "loss": 3.3516, "step": 16975 }, { "epoch": 1.153689359967387, "grad_norm": 2.484408378601074, "learning_rate": 0.0008558397880146759, "loss": 3.5199, "step": 16980 }, { "epoch": 1.1540290800380486, "grad_norm": 2.3716819286346436, "learning_rate": 0.0008557973230058433, "loss": 3.707, "step": 16985 }, { "epoch": 1.1543688001087105, "grad_norm": 1.9487395286560059, "learning_rate": 0.0008557548579970105, "loss": 3.3806, "step": 16990 }, { "epoch": 1.1547085201793723, "grad_norm": 1.7973018884658813, "learning_rate": 0.0008557123929881777, "loss": 3.5239, "step": 16995 }, { "epoch": 1.155048240250034, "grad_norm": 2.007960557937622, "learning_rate": 0.000855669927979345, "loss": 3.5252, "step": 17000 }, { "epoch": 1.1553879603206958, "grad_norm": 2.4030954837799072, "learning_rate": 0.0008556274629705123, "loss": 3.633, "step": 17005 }, { "epoch": 1.1557276803913574, "grad_norm": 1.9913227558135986, "learning_rate": 0.0008555849979616795, "loss": 3.7079, "step": 17010 }, { "epoch": 1.1560674004620193, "grad_norm": 1.5912976264953613, "learning_rate": 0.0008555425329528469, "loss": 3.634, "step": 17015 }, { "epoch": 1.1564071205326811, "grad_norm": 1.6033474206924438, "learning_rate": 0.0008555000679440142, "loss": 3.6189, "step": 17020 }, { "epoch": 1.1567468406033428, "grad_norm": 2.1592485904693604, "learning_rate": 0.0008554576029351814, "loss": 3.8851, "step": 17025 }, { "epoch": 1.1570865606740046, "grad_norm": 2.040064573287964, "learning_rate": 0.0008554151379263487, "loss": 3.574, "step": 17030 }, { "epoch": 1.1574262807446665, "grad_norm": 2.7419917583465576, "learning_rate": 0.000855372672917516, "loss": 3.558, "step": 17035 }, { "epoch": 1.157766000815328, "grad_norm": 2.332113742828369, "learning_rate": 0.0008553302079086832, "loss": 3.3421, "step": 17040 }, { "epoch": 1.15810572088599, "grad_norm": 1.544045329093933, "learning_rate": 0.0008552877428998506, "loss": 3.8776, "step": 17045 }, { "epoch": 1.1584454409566518, "grad_norm": 1.9740737676620483, "learning_rate": 0.0008552452778910178, "loss": 3.3142, "step": 17050 }, { "epoch": 1.1587851610273134, "grad_norm": 1.463136076927185, "learning_rate": 0.0008552028128821851, "loss": 3.618, "step": 17055 }, { "epoch": 1.1591248810979753, "grad_norm": 1.820013403892517, "learning_rate": 0.0008551603478733524, "loss": 3.4952, "step": 17060 }, { "epoch": 1.159464601168637, "grad_norm": 1.7520593404769897, "learning_rate": 0.0008551178828645196, "loss": 3.5963, "step": 17065 }, { "epoch": 1.1598043212392988, "grad_norm": 1.689084768295288, "learning_rate": 0.0008550754178556869, "loss": 3.5808, "step": 17070 }, { "epoch": 1.1601440413099606, "grad_norm": 1.9541704654693604, "learning_rate": 0.0008550329528468542, "loss": 3.6019, "step": 17075 }, { "epoch": 1.1604837613806223, "grad_norm": 2.440030813217163, "learning_rate": 0.0008549904878380215, "loss": 3.3794, "step": 17080 }, { "epoch": 1.1608234814512841, "grad_norm": 2.5204880237579346, "learning_rate": 0.0008549480228291889, "loss": 3.5096, "step": 17085 }, { "epoch": 1.161163201521946, "grad_norm": 1.9244213104248047, "learning_rate": 0.0008549055578203561, "loss": 3.4605, "step": 17090 }, { "epoch": 1.1615029215926076, "grad_norm": 1.7722175121307373, "learning_rate": 0.0008548630928115233, "loss": 3.5842, "step": 17095 }, { "epoch": 1.1618426416632694, "grad_norm": 1.7773661613464355, "learning_rate": 0.0008548206278026906, "loss": 3.4373, "step": 17100 }, { "epoch": 1.1621823617339313, "grad_norm": 1.8302110433578491, "learning_rate": 0.0008547781627938579, "loss": 3.5857, "step": 17105 }, { "epoch": 1.162522081804593, "grad_norm": 1.5325363874435425, "learning_rate": 0.0008547356977850251, "loss": 3.3361, "step": 17110 }, { "epoch": 1.1628618018752548, "grad_norm": 1.979843258857727, "learning_rate": 0.0008546932327761925, "loss": 3.4874, "step": 17115 }, { "epoch": 1.1632015219459166, "grad_norm": 1.7791088819503784, "learning_rate": 0.0008546507677673598, "loss": 3.4465, "step": 17120 }, { "epoch": 1.1635412420165783, "grad_norm": 1.7820944786071777, "learning_rate": 0.000854608302758527, "loss": 3.427, "step": 17125 }, { "epoch": 1.1638809620872401, "grad_norm": 1.4628201723098755, "learning_rate": 0.0008545658377496943, "loss": 3.8874, "step": 17130 }, { "epoch": 1.164220682157902, "grad_norm": 2.057918071746826, "learning_rate": 0.0008545233727408615, "loss": 3.4225, "step": 17135 }, { "epoch": 1.1645604022285636, "grad_norm": 1.8763885498046875, "learning_rate": 0.0008544809077320288, "loss": 3.7303, "step": 17140 }, { "epoch": 1.1649001222992255, "grad_norm": 2.321974515914917, "learning_rate": 0.0008544384427231961, "loss": 3.5705, "step": 17145 }, { "epoch": 1.1652398423698873, "grad_norm": 2.686845064163208, "learning_rate": 0.0008543959777143634, "loss": 3.6166, "step": 17150 }, { "epoch": 1.165579562440549, "grad_norm": 1.4943121671676636, "learning_rate": 0.0008543535127055307, "loss": 3.5459, "step": 17155 }, { "epoch": 1.1659192825112108, "grad_norm": 1.7118414640426636, "learning_rate": 0.000854311047696698, "loss": 3.6799, "step": 17160 }, { "epoch": 1.1662590025818727, "grad_norm": 1.835471272468567, "learning_rate": 0.0008542685826878652, "loss": 3.6055, "step": 17165 }, { "epoch": 1.1665987226525343, "grad_norm": 1.6482820510864258, "learning_rate": 0.0008542261176790325, "loss": 3.4517, "step": 17170 }, { "epoch": 1.1669384427231961, "grad_norm": 2.2045562267303467, "learning_rate": 0.0008541836526701998, "loss": 3.7395, "step": 17175 }, { "epoch": 1.1672781627938578, "grad_norm": 1.7475917339324951, "learning_rate": 0.000854141187661367, "loss": 3.6726, "step": 17180 }, { "epoch": 1.1676178828645196, "grad_norm": 1.725072979927063, "learning_rate": 0.0008540987226525344, "loss": 3.7263, "step": 17185 }, { "epoch": 1.1679576029351815, "grad_norm": 2.0314626693725586, "learning_rate": 0.0008540562576437017, "loss": 3.4026, "step": 17190 }, { "epoch": 1.168297323005843, "grad_norm": 2.403137683868408, "learning_rate": 0.0008540137926348689, "loss": 3.5099, "step": 17195 }, { "epoch": 1.168637043076505, "grad_norm": 2.0431580543518066, "learning_rate": 0.0008539713276260361, "loss": 3.7239, "step": 17200 }, { "epoch": 1.1689767631471668, "grad_norm": 1.5595005750656128, "learning_rate": 0.0008539288626172035, "loss": 3.4944, "step": 17205 }, { "epoch": 1.1693164832178284, "grad_norm": 1.7592273950576782, "learning_rate": 0.0008538863976083707, "loss": 3.5402, "step": 17210 }, { "epoch": 1.1696562032884903, "grad_norm": 1.9774879217147827, "learning_rate": 0.0008538439325995379, "loss": 3.6049, "step": 17215 }, { "epoch": 1.1699959233591521, "grad_norm": 1.4884467124938965, "learning_rate": 0.0008538014675907054, "loss": 3.6524, "step": 17220 }, { "epoch": 1.1703356434298138, "grad_norm": 1.6910085678100586, "learning_rate": 0.0008537590025818726, "loss": 3.4931, "step": 17225 }, { "epoch": 1.1706753635004756, "grad_norm": 2.1924288272857666, "learning_rate": 0.0008537165375730398, "loss": 3.5379, "step": 17230 }, { "epoch": 1.1710150835711373, "grad_norm": 1.9595152139663696, "learning_rate": 0.0008536740725642072, "loss": 3.3124, "step": 17235 }, { "epoch": 1.1713548036417991, "grad_norm": 2.4013512134552, "learning_rate": 0.0008536316075553744, "loss": 3.3804, "step": 17240 }, { "epoch": 1.171694523712461, "grad_norm": 2.196016550064087, "learning_rate": 0.0008535891425465416, "loss": 3.5523, "step": 17245 }, { "epoch": 1.1720342437831226, "grad_norm": 2.480201244354248, "learning_rate": 0.0008535466775377089, "loss": 3.5436, "step": 17250 }, { "epoch": 1.1723739638537845, "grad_norm": 2.0646753311157227, "learning_rate": 0.0008535042125288763, "loss": 3.7164, "step": 17255 }, { "epoch": 1.1727136839244463, "grad_norm": 1.792047142982483, "learning_rate": 0.0008534617475200435, "loss": 3.5897, "step": 17260 }, { "epoch": 1.173053403995108, "grad_norm": 1.6170724630355835, "learning_rate": 0.0008534192825112108, "loss": 3.489, "step": 17265 }, { "epoch": 1.1733931240657698, "grad_norm": 1.944118618965149, "learning_rate": 0.0008533768175023781, "loss": 3.4034, "step": 17270 }, { "epoch": 1.1737328441364316, "grad_norm": 2.0456202030181885, "learning_rate": 0.0008533343524935453, "loss": 3.2811, "step": 17275 }, { "epoch": 1.1740725642070933, "grad_norm": 2.08939790725708, "learning_rate": 0.0008532918874847126, "loss": 3.7528, "step": 17280 }, { "epoch": 1.1744122842777551, "grad_norm": 2.1020443439483643, "learning_rate": 0.0008532494224758798, "loss": 3.4633, "step": 17285 }, { "epoch": 1.174752004348417, "grad_norm": 1.771389365196228, "learning_rate": 0.0008532069574670472, "loss": 3.6497, "step": 17290 }, { "epoch": 1.1750917244190786, "grad_norm": 1.7864038944244385, "learning_rate": 0.0008531644924582145, "loss": 3.6201, "step": 17295 }, { "epoch": 1.1754314444897405, "grad_norm": 1.7178056240081787, "learning_rate": 0.0008531220274493817, "loss": 3.719, "step": 17300 }, { "epoch": 1.1757711645604023, "grad_norm": 1.8654197454452515, "learning_rate": 0.000853079562440549, "loss": 3.6403, "step": 17305 }, { "epoch": 1.176110884631064, "grad_norm": 2.1643381118774414, "learning_rate": 0.0008530370974317163, "loss": 3.6835, "step": 17310 }, { "epoch": 1.1764506047017258, "grad_norm": 1.823142170906067, "learning_rate": 0.0008529946324228835, "loss": 3.5247, "step": 17315 }, { "epoch": 1.1767903247723877, "grad_norm": 1.6363978385925293, "learning_rate": 0.0008529521674140508, "loss": 3.7443, "step": 17320 }, { "epoch": 1.1771300448430493, "grad_norm": 2.033047914505005, "learning_rate": 0.0008529097024052182, "loss": 3.4949, "step": 17325 }, { "epoch": 1.1774697649137111, "grad_norm": 1.4762217998504639, "learning_rate": 0.0008528672373963854, "loss": 3.8277, "step": 17330 }, { "epoch": 1.177809484984373, "grad_norm": 1.3127846717834473, "learning_rate": 0.0008528247723875526, "loss": 3.682, "step": 17335 }, { "epoch": 1.1781492050550346, "grad_norm": 1.9907234907150269, "learning_rate": 0.00085278230737872, "loss": 3.6716, "step": 17340 }, { "epoch": 1.1784889251256965, "grad_norm": 2.1019768714904785, "learning_rate": 0.0008527398423698872, "loss": 3.6306, "step": 17345 }, { "epoch": 1.178828645196358, "grad_norm": 1.7817714214324951, "learning_rate": 0.0008526973773610544, "loss": 3.5, "step": 17350 }, { "epoch": 1.17916836526702, "grad_norm": 1.7036272287368774, "learning_rate": 0.0008526549123522218, "loss": 3.3913, "step": 17355 }, { "epoch": 1.1795080853376818, "grad_norm": 1.9824670553207397, "learning_rate": 0.0008526124473433891, "loss": 3.6705, "step": 17360 }, { "epoch": 1.1798478054083434, "grad_norm": 2.348332643508911, "learning_rate": 0.0008525699823345563, "loss": 3.2672, "step": 17365 }, { "epoch": 1.1801875254790053, "grad_norm": 1.7929391860961914, "learning_rate": 0.0008525275173257237, "loss": 3.4866, "step": 17370 }, { "epoch": 1.1805272455496671, "grad_norm": 1.7665647268295288, "learning_rate": 0.0008524850523168909, "loss": 3.419, "step": 17375 }, { "epoch": 1.1808669656203288, "grad_norm": 1.7996807098388672, "learning_rate": 0.0008524425873080581, "loss": 3.5885, "step": 17380 }, { "epoch": 1.1812066856909906, "grad_norm": 1.9758511781692505, "learning_rate": 0.0008524001222992254, "loss": 3.3748, "step": 17385 }, { "epoch": 1.1815464057616525, "grad_norm": 2.24991774559021, "learning_rate": 0.0008523576572903927, "loss": 3.5672, "step": 17390 }, { "epoch": 1.1818861258323141, "grad_norm": 2.431373357772827, "learning_rate": 0.00085231519228156, "loss": 3.4271, "step": 17395 }, { "epoch": 1.182225845902976, "grad_norm": 1.6901240348815918, "learning_rate": 0.0008522727272727273, "loss": 3.3773, "step": 17400 }, { "epoch": 1.1825655659736376, "grad_norm": 2.115805149078369, "learning_rate": 0.0008522302622638946, "loss": 3.4535, "step": 17405 }, { "epoch": 1.1829052860442995, "grad_norm": 1.7195649147033691, "learning_rate": 0.0008521877972550618, "loss": 3.7018, "step": 17410 }, { "epoch": 1.1832450061149613, "grad_norm": 1.8287990093231201, "learning_rate": 0.0008521453322462291, "loss": 3.4512, "step": 17415 }, { "epoch": 1.183584726185623, "grad_norm": 3.4681975841522217, "learning_rate": 0.0008521028672373964, "loss": 3.3969, "step": 17420 }, { "epoch": 1.1839244462562848, "grad_norm": 1.900041937828064, "learning_rate": 0.0008520604022285637, "loss": 3.4967, "step": 17425 }, { "epoch": 1.1842641663269466, "grad_norm": 1.8381807804107666, "learning_rate": 0.000852017937219731, "loss": 3.6893, "step": 17430 }, { "epoch": 1.1846038863976083, "grad_norm": 1.4903849363327026, "learning_rate": 0.0008519754722108982, "loss": 3.7479, "step": 17435 }, { "epoch": 1.1849436064682701, "grad_norm": 2.0111091136932373, "learning_rate": 0.0008519330072020656, "loss": 3.4611, "step": 17440 }, { "epoch": 1.185283326538932, "grad_norm": 1.704453706741333, "learning_rate": 0.0008518905421932328, "loss": 3.2645, "step": 17445 }, { "epoch": 1.1856230466095936, "grad_norm": 1.8850260972976685, "learning_rate": 0.0008518480771844, "loss": 3.4681, "step": 17450 }, { "epoch": 1.1859627666802555, "grad_norm": 2.106034994125366, "learning_rate": 0.0008518056121755674, "loss": 3.7173, "step": 17455 }, { "epoch": 1.1863024867509173, "grad_norm": 1.6205029487609863, "learning_rate": 0.0008517631471667346, "loss": 3.5385, "step": 17460 }, { "epoch": 1.186642206821579, "grad_norm": 2.0187954902648926, "learning_rate": 0.0008517206821579019, "loss": 3.5661, "step": 17465 }, { "epoch": 1.1869819268922408, "grad_norm": 1.9009325504302979, "learning_rate": 0.0008516782171490693, "loss": 3.681, "step": 17470 }, { "epoch": 1.1873216469629027, "grad_norm": 2.170804977416992, "learning_rate": 0.0008516357521402365, "loss": 3.2421, "step": 17475 }, { "epoch": 1.1876613670335643, "grad_norm": 2.2702512741088867, "learning_rate": 0.0008515932871314037, "loss": 3.6369, "step": 17480 }, { "epoch": 1.1880010871042261, "grad_norm": 2.0509729385375977, "learning_rate": 0.000851550822122571, "loss": 3.6457, "step": 17485 }, { "epoch": 1.188340807174888, "grad_norm": 2.029017925262451, "learning_rate": 0.0008515083571137383, "loss": 3.2464, "step": 17490 }, { "epoch": 1.1886805272455496, "grad_norm": 1.9080442190170288, "learning_rate": 0.0008514658921049055, "loss": 3.3395, "step": 17495 }, { "epoch": 1.1890202473162115, "grad_norm": 2.0014288425445557, "learning_rate": 0.0008514234270960729, "loss": 3.4411, "step": 17500 }, { "epoch": 1.1893599673868733, "grad_norm": 1.9367555379867554, "learning_rate": 0.0008513809620872402, "loss": 3.4673, "step": 17505 }, { "epoch": 1.189699687457535, "grad_norm": 1.7635741233825684, "learning_rate": 0.0008513384970784074, "loss": 3.4821, "step": 17510 }, { "epoch": 1.1900394075281968, "grad_norm": 1.6506190299987793, "learning_rate": 0.0008512960320695747, "loss": 3.761, "step": 17515 }, { "epoch": 1.1903791275988584, "grad_norm": 1.839938759803772, "learning_rate": 0.000851253567060742, "loss": 3.4456, "step": 17520 }, { "epoch": 1.1907188476695203, "grad_norm": 2.3184375762939453, "learning_rate": 0.0008512111020519092, "loss": 3.5089, "step": 17525 }, { "epoch": 1.1910585677401822, "grad_norm": 1.6330196857452393, "learning_rate": 0.0008511686370430765, "loss": 3.5064, "step": 17530 }, { "epoch": 1.1913982878108438, "grad_norm": 1.8279242515563965, "learning_rate": 0.0008511261720342438, "loss": 3.7291, "step": 17535 }, { "epoch": 1.1917380078815056, "grad_norm": 2.2708306312561035, "learning_rate": 0.0008510837070254111, "loss": 3.4924, "step": 17540 }, { "epoch": 1.1920777279521675, "grad_norm": 2.1182024478912354, "learning_rate": 0.0008510412420165784, "loss": 3.6297, "step": 17545 }, { "epoch": 1.1924174480228291, "grad_norm": 1.8895636796951294, "learning_rate": 0.0008509987770077456, "loss": 3.5992, "step": 17550 }, { "epoch": 1.192757168093491, "grad_norm": 2.659210681915283, "learning_rate": 0.0008509563119989129, "loss": 3.559, "step": 17555 }, { "epoch": 1.1930968881641528, "grad_norm": 1.9381566047668457, "learning_rate": 0.0008509138469900802, "loss": 3.5748, "step": 17560 }, { "epoch": 1.1934366082348145, "grad_norm": 1.955570101737976, "learning_rate": 0.0008508713819812474, "loss": 3.2709, "step": 17565 }, { "epoch": 1.1937763283054763, "grad_norm": 1.785201907157898, "learning_rate": 0.0008508289169724148, "loss": 3.5603, "step": 17570 }, { "epoch": 1.194116048376138, "grad_norm": 2.2565925121307373, "learning_rate": 0.0008507864519635821, "loss": 3.3265, "step": 17575 }, { "epoch": 1.1944557684467998, "grad_norm": 1.8342045545578003, "learning_rate": 0.0008507439869547493, "loss": 3.5388, "step": 17580 }, { "epoch": 1.1947954885174616, "grad_norm": 2.0961084365844727, "learning_rate": 0.0008507015219459165, "loss": 3.6882, "step": 17585 }, { "epoch": 1.1951352085881233, "grad_norm": 1.9552158117294312, "learning_rate": 0.0008506590569370839, "loss": 3.6516, "step": 17590 }, { "epoch": 1.1954749286587851, "grad_norm": 1.7974010705947876, "learning_rate": 0.0008506165919282511, "loss": 3.6629, "step": 17595 }, { "epoch": 1.195814648729447, "grad_norm": 1.5753734111785889, "learning_rate": 0.0008505741269194183, "loss": 3.5982, "step": 17600 }, { "epoch": 1.1961543688001086, "grad_norm": 2.1128177642822266, "learning_rate": 0.0008505316619105858, "loss": 3.3744, "step": 17605 }, { "epoch": 1.1964940888707705, "grad_norm": 1.8365362882614136, "learning_rate": 0.000850489196901753, "loss": 3.0753, "step": 17610 }, { "epoch": 1.1968338089414323, "grad_norm": 2.089017629623413, "learning_rate": 0.0008504467318929202, "loss": 3.7139, "step": 17615 }, { "epoch": 1.197173529012094, "grad_norm": 2.0535483360290527, "learning_rate": 0.0008504042668840876, "loss": 3.5617, "step": 17620 }, { "epoch": 1.1975132490827558, "grad_norm": 2.2422401905059814, "learning_rate": 0.0008503618018752548, "loss": 3.2431, "step": 17625 }, { "epoch": 1.1978529691534177, "grad_norm": 1.975978970527649, "learning_rate": 0.000850319336866422, "loss": 3.5293, "step": 17630 }, { "epoch": 1.1981926892240793, "grad_norm": 1.5895005464553833, "learning_rate": 0.0008502768718575894, "loss": 3.3728, "step": 17635 }, { "epoch": 1.1985324092947411, "grad_norm": 1.9280073642730713, "learning_rate": 0.0008502344068487567, "loss": 3.7828, "step": 17640 }, { "epoch": 1.198872129365403, "grad_norm": 1.8892112970352173, "learning_rate": 0.0008501919418399239, "loss": 3.4474, "step": 17645 }, { "epoch": 1.1992118494360646, "grad_norm": 2.0597004890441895, "learning_rate": 0.0008501494768310912, "loss": 3.6103, "step": 17650 }, { "epoch": 1.1995515695067265, "grad_norm": 1.9150537252426147, "learning_rate": 0.0008501070118222585, "loss": 3.6163, "step": 17655 }, { "epoch": 1.1998912895773883, "grad_norm": 2.0424864292144775, "learning_rate": 0.0008500645468134257, "loss": 3.6374, "step": 17660 }, { "epoch": 1.20023100964805, "grad_norm": 1.8846702575683594, "learning_rate": 0.000850022081804593, "loss": 3.6067, "step": 17665 }, { "epoch": 1.2005707297187118, "grad_norm": 1.656031847000122, "learning_rate": 0.0008499796167957604, "loss": 3.4333, "step": 17670 }, { "epoch": 1.2009104497893737, "grad_norm": 1.5284887552261353, "learning_rate": 0.0008499371517869276, "loss": 3.497, "step": 17675 }, { "epoch": 1.2012501698600353, "grad_norm": 2.146608591079712, "learning_rate": 0.0008498946867780949, "loss": 3.346, "step": 17680 }, { "epoch": 1.2015898899306972, "grad_norm": 2.3983352184295654, "learning_rate": 0.0008498522217692621, "loss": 3.6314, "step": 17685 }, { "epoch": 1.2019296100013588, "grad_norm": 1.6308153867721558, "learning_rate": 0.0008498097567604294, "loss": 3.6878, "step": 17690 }, { "epoch": 1.2022693300720206, "grad_norm": 1.750099778175354, "learning_rate": 0.0008497672917515967, "loss": 3.2308, "step": 17695 }, { "epoch": 1.2026090501426825, "grad_norm": 1.5545402765274048, "learning_rate": 0.0008497248267427639, "loss": 3.4286, "step": 17700 }, { "epoch": 1.2029487702133441, "grad_norm": 1.9773284196853638, "learning_rate": 0.0008496823617339313, "loss": 3.3646, "step": 17705 }, { "epoch": 1.203288490284006, "grad_norm": 1.8190264701843262, "learning_rate": 0.0008496398967250986, "loss": 3.8087, "step": 17710 }, { "epoch": 1.2036282103546678, "grad_norm": 1.7882612943649292, "learning_rate": 0.0008495974317162658, "loss": 3.2874, "step": 17715 }, { "epoch": 1.2039679304253295, "grad_norm": 1.5918198823928833, "learning_rate": 0.000849554966707433, "loss": 3.6274, "step": 17720 }, { "epoch": 1.2043076504959913, "grad_norm": 1.908536434173584, "learning_rate": 0.0008495125016986004, "loss": 3.6678, "step": 17725 }, { "epoch": 1.2046473705666532, "grad_norm": 2.255589485168457, "learning_rate": 0.0008494700366897676, "loss": 3.5427, "step": 17730 }, { "epoch": 1.2049870906373148, "grad_norm": 2.2210781574249268, "learning_rate": 0.0008494275716809348, "loss": 3.5548, "step": 17735 }, { "epoch": 1.2053268107079766, "grad_norm": 1.7614150047302246, "learning_rate": 0.0008493851066721023, "loss": 3.3601, "step": 17740 }, { "epoch": 1.2056665307786383, "grad_norm": 1.5923863649368286, "learning_rate": 0.0008493426416632695, "loss": 3.4879, "step": 17745 }, { "epoch": 1.2060062508493001, "grad_norm": 2.000263214111328, "learning_rate": 0.0008493001766544367, "loss": 3.6902, "step": 17750 }, { "epoch": 1.206345970919962, "grad_norm": 2.3105428218841553, "learning_rate": 0.0008492577116456041, "loss": 3.4169, "step": 17755 }, { "epoch": 1.2066856909906236, "grad_norm": 1.7550848722457886, "learning_rate": 0.0008492152466367713, "loss": 3.5557, "step": 17760 }, { "epoch": 1.2070254110612855, "grad_norm": 1.6505159139633179, "learning_rate": 0.0008491727816279386, "loss": 3.6196, "step": 17765 }, { "epoch": 1.2073651311319473, "grad_norm": 2.111278772354126, "learning_rate": 0.0008491303166191058, "loss": 3.3664, "step": 17770 }, { "epoch": 1.207704851202609, "grad_norm": 1.8148882389068604, "learning_rate": 0.0008490878516102732, "loss": 3.6944, "step": 17775 }, { "epoch": 1.2080445712732708, "grad_norm": 2.0108895301818848, "learning_rate": 0.0008490453866014405, "loss": 3.66, "step": 17780 }, { "epoch": 1.2083842913439327, "grad_norm": 2.837254047393799, "learning_rate": 0.0008490029215926077, "loss": 3.4696, "step": 17785 }, { "epoch": 1.2087240114145943, "grad_norm": 2.0264246463775635, "learning_rate": 0.000848960456583775, "loss": 3.876, "step": 17790 }, { "epoch": 1.2090637314852561, "grad_norm": 1.7061927318572998, "learning_rate": 0.0008489179915749423, "loss": 3.6341, "step": 17795 }, { "epoch": 1.209403451555918, "grad_norm": 2.047839403152466, "learning_rate": 0.0008488755265661095, "loss": 3.7123, "step": 17800 }, { "epoch": 1.2097431716265796, "grad_norm": 2.007291793823242, "learning_rate": 0.0008488330615572768, "loss": 3.358, "step": 17805 }, { "epoch": 1.2100828916972415, "grad_norm": 2.265408515930176, "learning_rate": 0.0008487905965484442, "loss": 3.5136, "step": 17810 }, { "epoch": 1.2104226117679033, "grad_norm": 2.72601056098938, "learning_rate": 0.0008487481315396114, "loss": 3.6468, "step": 17815 }, { "epoch": 1.210762331838565, "grad_norm": 1.7522302865982056, "learning_rate": 0.0008487056665307786, "loss": 3.545, "step": 17820 }, { "epoch": 1.2111020519092268, "grad_norm": 3.080106258392334, "learning_rate": 0.000848663201521946, "loss": 3.6777, "step": 17825 }, { "epoch": 1.2114417719798887, "grad_norm": 1.543269157409668, "learning_rate": 0.0008486207365131132, "loss": 3.7033, "step": 17830 }, { "epoch": 1.2117814920505503, "grad_norm": 2.0093696117401123, "learning_rate": 0.0008485782715042804, "loss": 3.5517, "step": 17835 }, { "epoch": 1.2121212121212122, "grad_norm": 2.0795557498931885, "learning_rate": 0.0008485358064954478, "loss": 3.8289, "step": 17840 }, { "epoch": 1.212460932191874, "grad_norm": 1.7924796342849731, "learning_rate": 0.0008484933414866151, "loss": 3.7012, "step": 17845 }, { "epoch": 1.2128006522625356, "grad_norm": 2.0109901428222656, "learning_rate": 0.0008484508764777823, "loss": 3.5558, "step": 17850 }, { "epoch": 1.2131403723331975, "grad_norm": 2.3339755535125732, "learning_rate": 0.0008484084114689497, "loss": 3.5954, "step": 17855 }, { "epoch": 1.2134800924038591, "grad_norm": 1.4610693454742432, "learning_rate": 0.0008483659464601169, "loss": 3.3876, "step": 17860 }, { "epoch": 1.213819812474521, "grad_norm": 1.7768585681915283, "learning_rate": 0.0008483234814512841, "loss": 3.733, "step": 17865 }, { "epoch": 1.2141595325451828, "grad_norm": 1.9189460277557373, "learning_rate": 0.0008482810164424515, "loss": 3.3948, "step": 17870 }, { "epoch": 1.2144992526158445, "grad_norm": 1.8253602981567383, "learning_rate": 0.0008482385514336187, "loss": 3.7306, "step": 17875 }, { "epoch": 1.2148389726865063, "grad_norm": 1.7934894561767578, "learning_rate": 0.000848196086424786, "loss": 3.6554, "step": 17880 }, { "epoch": 1.2151786927571682, "grad_norm": 2.3533387184143066, "learning_rate": 0.0008481536214159533, "loss": 3.4715, "step": 17885 }, { "epoch": 1.2155184128278298, "grad_norm": 1.4991620779037476, "learning_rate": 0.0008481111564071206, "loss": 3.7199, "step": 17890 }, { "epoch": 1.2158581328984917, "grad_norm": 1.6567742824554443, "learning_rate": 0.0008480686913982878, "loss": 3.7228, "step": 17895 }, { "epoch": 1.2161978529691535, "grad_norm": 1.779989242553711, "learning_rate": 0.0008480262263894551, "loss": 3.678, "step": 17900 }, { "epoch": 1.2165375730398151, "grad_norm": 2.2104218006134033, "learning_rate": 0.0008479837613806224, "loss": 3.5134, "step": 17905 }, { "epoch": 1.216877293110477, "grad_norm": 1.9458142518997192, "learning_rate": 0.0008479412963717896, "loss": 3.4288, "step": 17910 }, { "epoch": 1.2172170131811386, "grad_norm": 2.2900640964508057, "learning_rate": 0.000847898831362957, "loss": 3.5628, "step": 17915 }, { "epoch": 1.2175567332518005, "grad_norm": 1.562981367111206, "learning_rate": 0.0008478563663541243, "loss": 3.5577, "step": 17920 }, { "epoch": 1.2178964533224623, "grad_norm": 2.0231339931488037, "learning_rate": 0.0008478139013452915, "loss": 3.6147, "step": 17925 }, { "epoch": 1.218236173393124, "grad_norm": 2.097475290298462, "learning_rate": 0.0008477714363364588, "loss": 3.2998, "step": 17930 }, { "epoch": 1.2185758934637858, "grad_norm": 1.7388554811477661, "learning_rate": 0.000847728971327626, "loss": 3.7591, "step": 17935 }, { "epoch": 1.2189156135344477, "grad_norm": 2.3448069095611572, "learning_rate": 0.0008476865063187933, "loss": 3.4233, "step": 17940 }, { "epoch": 1.2192553336051093, "grad_norm": 1.5322405099868774, "learning_rate": 0.0008476440413099606, "loss": 3.8778, "step": 17945 }, { "epoch": 1.2195950536757711, "grad_norm": 1.887063980102539, "learning_rate": 0.0008476015763011279, "loss": 3.5159, "step": 17950 }, { "epoch": 1.219934773746433, "grad_norm": 2.444591522216797, "learning_rate": 0.0008475591112922952, "loss": 3.466, "step": 17955 }, { "epoch": 1.2202744938170946, "grad_norm": 1.9529963731765747, "learning_rate": 0.0008475166462834625, "loss": 3.5968, "step": 17960 }, { "epoch": 1.2206142138877565, "grad_norm": 1.7758082151412964, "learning_rate": 0.0008474741812746297, "loss": 3.5119, "step": 17965 }, { "epoch": 1.2209539339584183, "grad_norm": 1.6183125972747803, "learning_rate": 0.0008474317162657969, "loss": 3.531, "step": 17970 }, { "epoch": 1.22129365402908, "grad_norm": 2.3440845012664795, "learning_rate": 0.0008473892512569643, "loss": 3.6213, "step": 17975 }, { "epoch": 1.2216333740997418, "grad_norm": 1.4580657482147217, "learning_rate": 0.0008473467862481315, "loss": 3.5394, "step": 17980 }, { "epoch": 1.2219730941704037, "grad_norm": 1.8956751823425293, "learning_rate": 0.0008473043212392988, "loss": 3.3295, "step": 17985 }, { "epoch": 1.2223128142410653, "grad_norm": 2.3421568870544434, "learning_rate": 0.0008472618562304662, "loss": 3.7754, "step": 17990 }, { "epoch": 1.2226525343117272, "grad_norm": 1.8667229413986206, "learning_rate": 0.0008472193912216334, "loss": 3.6917, "step": 17995 }, { "epoch": 1.222992254382389, "grad_norm": 2.7878904342651367, "learning_rate": 0.0008471769262128006, "loss": 3.6636, "step": 18000 }, { "epoch": 1.2233319744530506, "grad_norm": 2.0375654697418213, "learning_rate": 0.000847134461203968, "loss": 3.421, "step": 18005 }, { "epoch": 1.2236716945237125, "grad_norm": 2.1450796127319336, "learning_rate": 0.0008470919961951352, "loss": 3.5893, "step": 18010 }, { "epoch": 1.2240114145943743, "grad_norm": 1.6194438934326172, "learning_rate": 0.0008470495311863024, "loss": 3.5174, "step": 18015 }, { "epoch": 1.224351134665036, "grad_norm": 1.6992720365524292, "learning_rate": 0.0008470070661774699, "loss": 3.7847, "step": 18020 }, { "epoch": 1.2246908547356978, "grad_norm": 1.6670448780059814, "learning_rate": 0.0008469646011686371, "loss": 3.4005, "step": 18025 }, { "epoch": 1.2250305748063597, "grad_norm": 1.7820667028427124, "learning_rate": 0.0008469221361598043, "loss": 3.3028, "step": 18030 }, { "epoch": 1.2253702948770213, "grad_norm": 1.7427339553833008, "learning_rate": 0.0008468796711509716, "loss": 3.8888, "step": 18035 }, { "epoch": 1.2257100149476832, "grad_norm": 2.4016785621643066, "learning_rate": 0.0008468372061421389, "loss": 3.7554, "step": 18040 }, { "epoch": 1.2260497350183448, "grad_norm": 1.9913702011108398, "learning_rate": 0.0008467947411333061, "loss": 3.6455, "step": 18045 }, { "epoch": 1.2263894550890067, "grad_norm": 1.6238505840301514, "learning_rate": 0.0008467522761244734, "loss": 3.4107, "step": 18050 }, { "epoch": 1.2267291751596685, "grad_norm": 2.3176565170288086, "learning_rate": 0.0008467098111156408, "loss": 3.6076, "step": 18055 }, { "epoch": 1.2270688952303301, "grad_norm": 2.485699415206909, "learning_rate": 0.000846667346106808, "loss": 3.3995, "step": 18060 }, { "epoch": 1.227408615300992, "grad_norm": 1.7955009937286377, "learning_rate": 0.0008466248810979753, "loss": 3.5262, "step": 18065 }, { "epoch": 1.2277483353716538, "grad_norm": 1.8411760330200195, "learning_rate": 0.0008465824160891425, "loss": 3.6362, "step": 18070 }, { "epoch": 1.2280880554423155, "grad_norm": 1.5488213300704956, "learning_rate": 0.0008465399510803098, "loss": 3.6602, "step": 18075 }, { "epoch": 1.2284277755129773, "grad_norm": 2.0485126972198486, "learning_rate": 0.0008464974860714771, "loss": 3.398, "step": 18080 }, { "epoch": 1.228767495583639, "grad_norm": 2.159575939178467, "learning_rate": 0.0008464550210626443, "loss": 3.4404, "step": 18085 }, { "epoch": 1.2291072156543008, "grad_norm": 2.2848258018493652, "learning_rate": 0.0008464125560538117, "loss": 3.5847, "step": 18090 }, { "epoch": 1.2294469357249627, "grad_norm": 2.0614349842071533, "learning_rate": 0.000846370091044979, "loss": 3.5709, "step": 18095 }, { "epoch": 1.2297866557956243, "grad_norm": 1.577433705329895, "learning_rate": 0.0008463276260361462, "loss": 3.712, "step": 18100 }, { "epoch": 1.2301263758662861, "grad_norm": 1.6820054054260254, "learning_rate": 0.0008462851610273136, "loss": 3.663, "step": 18105 }, { "epoch": 1.230466095936948, "grad_norm": 1.6255944967269897, "learning_rate": 0.0008462426960184808, "loss": 3.5408, "step": 18110 }, { "epoch": 1.2308058160076096, "grad_norm": 1.7997314929962158, "learning_rate": 0.000846200231009648, "loss": 3.6028, "step": 18115 }, { "epoch": 1.2311455360782715, "grad_norm": 1.9206079244613647, "learning_rate": 0.0008461577660008153, "loss": 3.7806, "step": 18120 }, { "epoch": 1.2314852561489333, "grad_norm": 1.7699023485183716, "learning_rate": 0.0008461153009919827, "loss": 3.4149, "step": 18125 }, { "epoch": 1.231824976219595, "grad_norm": 1.7119793891906738, "learning_rate": 0.0008460728359831499, "loss": 3.4317, "step": 18130 }, { "epoch": 1.2321646962902568, "grad_norm": 1.8186097145080566, "learning_rate": 0.0008460303709743172, "loss": 3.536, "step": 18135 }, { "epoch": 1.2325044163609187, "grad_norm": 2.1146914958953857, "learning_rate": 0.0008459879059654845, "loss": 3.5043, "step": 18140 }, { "epoch": 1.2328441364315803, "grad_norm": 2.6327998638153076, "learning_rate": 0.0008459454409566517, "loss": 3.5208, "step": 18145 }, { "epoch": 1.2331838565022422, "grad_norm": 1.8343321084976196, "learning_rate": 0.000845902975947819, "loss": 3.529, "step": 18150 }, { "epoch": 1.233523576572904, "grad_norm": 1.527064561843872, "learning_rate": 0.0008458605109389863, "loss": 3.4187, "step": 18155 }, { "epoch": 1.2338632966435656, "grad_norm": 2.0521204471588135, "learning_rate": 0.0008458180459301536, "loss": 3.5412, "step": 18160 }, { "epoch": 1.2342030167142275, "grad_norm": 2.9345896244049072, "learning_rate": 0.0008457755809213209, "loss": 3.428, "step": 18165 }, { "epoch": 1.2345427367848893, "grad_norm": 2.3034255504608154, "learning_rate": 0.0008457331159124881, "loss": 3.5238, "step": 18170 }, { "epoch": 1.234882456855551, "grad_norm": 2.1757352352142334, "learning_rate": 0.0008456906509036554, "loss": 3.506, "step": 18175 }, { "epoch": 1.2352221769262128, "grad_norm": 2.2320687770843506, "learning_rate": 0.0008456481858948227, "loss": 3.526, "step": 18180 }, { "epoch": 1.2355618969968747, "grad_norm": 1.944793462753296, "learning_rate": 0.0008456057208859899, "loss": 3.5493, "step": 18185 }, { "epoch": 1.2359016170675363, "grad_norm": 2.078934669494629, "learning_rate": 0.0008455632558771572, "loss": 3.5058, "step": 18190 }, { "epoch": 1.2362413371381982, "grad_norm": 1.7289339303970337, "learning_rate": 0.0008455207908683246, "loss": 3.4322, "step": 18195 }, { "epoch": 1.23658105720886, "grad_norm": 1.9044334888458252, "learning_rate": 0.0008454783258594918, "loss": 3.5566, "step": 18200 }, { "epoch": 1.2369207772795217, "grad_norm": 2.059199333190918, "learning_rate": 0.000845435860850659, "loss": 3.4283, "step": 18205 }, { "epoch": 1.2372604973501835, "grad_norm": 2.28061842918396, "learning_rate": 0.0008453933958418264, "loss": 3.4787, "step": 18210 }, { "epoch": 1.2376002174208451, "grad_norm": 1.59272038936615, "learning_rate": 0.0008453509308329936, "loss": 3.5339, "step": 18215 }, { "epoch": 1.237939937491507, "grad_norm": 1.7254936695098877, "learning_rate": 0.0008453084658241608, "loss": 3.3376, "step": 18220 }, { "epoch": 1.2382796575621688, "grad_norm": 2.1311328411102295, "learning_rate": 0.0008452660008153283, "loss": 3.8386, "step": 18225 }, { "epoch": 1.2386193776328305, "grad_norm": 2.256758689880371, "learning_rate": 0.0008452235358064955, "loss": 3.5721, "step": 18230 }, { "epoch": 1.2389590977034923, "grad_norm": 1.7698497772216797, "learning_rate": 0.0008451810707976627, "loss": 3.5795, "step": 18235 }, { "epoch": 1.2392988177741542, "grad_norm": 1.3804277181625366, "learning_rate": 0.0008451386057888301, "loss": 3.4105, "step": 18240 }, { "epoch": 1.2396385378448158, "grad_norm": 2.3609209060668945, "learning_rate": 0.0008450961407799973, "loss": 3.5919, "step": 18245 }, { "epoch": 1.2399782579154777, "grad_norm": 1.7847497463226318, "learning_rate": 0.0008450536757711645, "loss": 3.5854, "step": 18250 }, { "epoch": 1.2403179779861393, "grad_norm": 1.9283337593078613, "learning_rate": 0.0008450112107623319, "loss": 3.5121, "step": 18255 }, { "epoch": 1.2406576980568012, "grad_norm": 2.052476644515991, "learning_rate": 0.0008449687457534992, "loss": 3.413, "step": 18260 }, { "epoch": 1.240997418127463, "grad_norm": 1.9388667345046997, "learning_rate": 0.0008449262807446664, "loss": 3.7583, "step": 18265 }, { "epoch": 1.2413371381981246, "grad_norm": 1.990066409111023, "learning_rate": 0.0008448838157358337, "loss": 3.1899, "step": 18270 }, { "epoch": 1.2416768582687865, "grad_norm": 1.98281729221344, "learning_rate": 0.000844841350727001, "loss": 3.3507, "step": 18275 }, { "epoch": 1.2420165783394483, "grad_norm": 2.019948959350586, "learning_rate": 0.0008447988857181682, "loss": 3.6683, "step": 18280 }, { "epoch": 1.24235629841011, "grad_norm": 1.7594032287597656, "learning_rate": 0.0008447564207093355, "loss": 3.8158, "step": 18285 }, { "epoch": 1.2426960184807718, "grad_norm": 2.026517868041992, "learning_rate": 0.0008447139557005028, "loss": 3.4917, "step": 18290 }, { "epoch": 1.2430357385514337, "grad_norm": 2.097914457321167, "learning_rate": 0.0008446714906916701, "loss": 3.4981, "step": 18295 }, { "epoch": 1.2433754586220953, "grad_norm": 2.493880033493042, "learning_rate": 0.0008446290256828374, "loss": 3.7268, "step": 18300 }, { "epoch": 1.2437151786927572, "grad_norm": 1.7460466623306274, "learning_rate": 0.0008445865606740047, "loss": 3.5997, "step": 18305 }, { "epoch": 1.244054898763419, "grad_norm": 1.8323854207992554, "learning_rate": 0.0008445440956651719, "loss": 3.7169, "step": 18310 }, { "epoch": 1.2443946188340806, "grad_norm": 1.878403663635254, "learning_rate": 0.0008445016306563392, "loss": 3.6261, "step": 18315 }, { "epoch": 1.2447343389047425, "grad_norm": 2.326411485671997, "learning_rate": 0.0008444591656475064, "loss": 3.5962, "step": 18320 }, { "epoch": 1.2450740589754044, "grad_norm": 1.9078205823898315, "learning_rate": 0.0008444167006386737, "loss": 3.3738, "step": 18325 }, { "epoch": 1.245413779046066, "grad_norm": 2.747694492340088, "learning_rate": 0.0008443742356298411, "loss": 3.3634, "step": 18330 }, { "epoch": 1.2457534991167278, "grad_norm": 2.134599208831787, "learning_rate": 0.0008443317706210083, "loss": 3.513, "step": 18335 }, { "epoch": 1.2460932191873897, "grad_norm": 1.9454607963562012, "learning_rate": 0.0008442893056121756, "loss": 3.5332, "step": 18340 }, { "epoch": 1.2464329392580513, "grad_norm": 2.0758864879608154, "learning_rate": 0.0008442468406033429, "loss": 3.6485, "step": 18345 }, { "epoch": 1.2467726593287132, "grad_norm": 1.6405595541000366, "learning_rate": 0.0008442043755945101, "loss": 3.3923, "step": 18350 }, { "epoch": 1.247112379399375, "grad_norm": 1.552114725112915, "learning_rate": 0.0008441619105856773, "loss": 3.7483, "step": 18355 }, { "epoch": 1.2474520994700367, "grad_norm": 1.5730385780334473, "learning_rate": 0.0008441194455768447, "loss": 3.6736, "step": 18360 }, { "epoch": 1.2477918195406985, "grad_norm": 1.323628306388855, "learning_rate": 0.000844076980568012, "loss": 3.5311, "step": 18365 }, { "epoch": 1.2481315396113604, "grad_norm": 2.058269500732422, "learning_rate": 0.0008440345155591792, "loss": 3.4372, "step": 18370 }, { "epoch": 1.248471259682022, "grad_norm": 1.6178131103515625, "learning_rate": 0.0008439920505503466, "loss": 3.7376, "step": 18375 }, { "epoch": 1.2488109797526838, "grad_norm": 2.0949625968933105, "learning_rate": 0.0008439495855415138, "loss": 3.2491, "step": 18380 }, { "epoch": 1.2491506998233455, "grad_norm": 1.8381386995315552, "learning_rate": 0.000843907120532681, "loss": 3.9053, "step": 18385 }, { "epoch": 1.2494904198940073, "grad_norm": 1.7984652519226074, "learning_rate": 0.0008438646555238484, "loss": 3.4164, "step": 18390 }, { "epoch": 1.2498301399646692, "grad_norm": 1.548486351966858, "learning_rate": 0.0008438221905150156, "loss": 3.4681, "step": 18395 }, { "epoch": 1.2501698600353308, "grad_norm": 1.7109782695770264, "learning_rate": 0.0008437797255061829, "loss": 3.6854, "step": 18400 }, { "epoch": 1.2505095801059927, "grad_norm": 1.8634968996047974, "learning_rate": 0.0008437372604973503, "loss": 3.737, "step": 18405 }, { "epoch": 1.2508493001766543, "grad_norm": 1.6332908868789673, "learning_rate": 0.0008436947954885175, "loss": 3.6345, "step": 18410 }, { "epoch": 1.2511890202473162, "grad_norm": 1.6274290084838867, "learning_rate": 0.0008436523304796847, "loss": 3.2413, "step": 18415 }, { "epoch": 1.251528740317978, "grad_norm": 2.219931125640869, "learning_rate": 0.000843609865470852, "loss": 3.4338, "step": 18420 }, { "epoch": 1.2518684603886396, "grad_norm": 2.050523042678833, "learning_rate": 0.0008435674004620193, "loss": 3.6008, "step": 18425 }, { "epoch": 1.2522081804593015, "grad_norm": 2.0478527545928955, "learning_rate": 0.0008435249354531865, "loss": 3.521, "step": 18430 }, { "epoch": 1.2525479005299633, "grad_norm": 2.0817742347717285, "learning_rate": 0.0008434824704443539, "loss": 3.6908, "step": 18435 }, { "epoch": 1.252887620600625, "grad_norm": 1.806164026260376, "learning_rate": 0.0008434400054355212, "loss": 3.6657, "step": 18440 }, { "epoch": 1.2532273406712868, "grad_norm": 2.0985147953033447, "learning_rate": 0.0008433975404266885, "loss": 3.5178, "step": 18445 }, { "epoch": 1.2535670607419487, "grad_norm": 1.599692940711975, "learning_rate": 0.0008433550754178557, "loss": 3.63, "step": 18450 }, { "epoch": 1.2539067808126103, "grad_norm": 1.7167301177978516, "learning_rate": 0.000843312610409023, "loss": 3.4059, "step": 18455 }, { "epoch": 1.2542465008832722, "grad_norm": 2.003946304321289, "learning_rate": 0.0008432701454001903, "loss": 3.6281, "step": 18460 }, { "epoch": 1.254586220953934, "grad_norm": 3.7874655723571777, "learning_rate": 0.0008432276803913575, "loss": 3.4835, "step": 18465 }, { "epoch": 1.2549259410245956, "grad_norm": 2.045422077178955, "learning_rate": 0.0008431852153825248, "loss": 3.1854, "step": 18470 }, { "epoch": 1.2552656610952575, "grad_norm": 2.173119068145752, "learning_rate": 0.0008431427503736922, "loss": 3.5171, "step": 18475 }, { "epoch": 1.2556053811659194, "grad_norm": 2.15389084815979, "learning_rate": 0.0008431002853648594, "loss": 3.422, "step": 18480 }, { "epoch": 1.255945101236581, "grad_norm": 1.918323278427124, "learning_rate": 0.0008430578203560266, "loss": 3.3686, "step": 18485 }, { "epoch": 1.2562848213072428, "grad_norm": 2.412795305252075, "learning_rate": 0.000843015355347194, "loss": 3.6516, "step": 18490 }, { "epoch": 1.2566245413779047, "grad_norm": 2.004425287246704, "learning_rate": 0.0008429728903383612, "loss": 3.6075, "step": 18495 }, { "epoch": 1.2569642614485663, "grad_norm": 2.079214572906494, "learning_rate": 0.0008429304253295284, "loss": 3.4161, "step": 18500 }, { "epoch": 1.2573039815192282, "grad_norm": 2.0987632274627686, "learning_rate": 0.0008428879603206959, "loss": 3.5528, "step": 18505 }, { "epoch": 1.25764370158989, "grad_norm": 2.067065715789795, "learning_rate": 0.0008428454953118631, "loss": 3.5507, "step": 18510 }, { "epoch": 1.2579834216605517, "grad_norm": 1.8939454555511475, "learning_rate": 0.0008428030303030303, "loss": 3.7615, "step": 18515 }, { "epoch": 1.2583231417312135, "grad_norm": 2.3733155727386475, "learning_rate": 0.0008427605652941976, "loss": 3.6447, "step": 18520 }, { "epoch": 1.2586628618018754, "grad_norm": 1.8178366422653198, "learning_rate": 0.0008427181002853649, "loss": 3.8366, "step": 18525 }, { "epoch": 1.259002581872537, "grad_norm": 1.6658523082733154, "learning_rate": 0.0008426756352765321, "loss": 3.6058, "step": 18530 }, { "epoch": 1.2593423019431988, "grad_norm": 1.7313770055770874, "learning_rate": 0.0008426331702676994, "loss": 3.7017, "step": 18535 }, { "epoch": 1.2596820220138607, "grad_norm": 1.9225564002990723, "learning_rate": 0.0008425907052588668, "loss": 3.4667, "step": 18540 }, { "epoch": 1.2600217420845223, "grad_norm": 1.9768012762069702, "learning_rate": 0.000842548240250034, "loss": 3.7421, "step": 18545 }, { "epoch": 1.2603614621551842, "grad_norm": 1.5601882934570312, "learning_rate": 0.0008425057752412013, "loss": 3.8551, "step": 18550 }, { "epoch": 1.260701182225846, "grad_norm": 2.1776046752929688, "learning_rate": 0.0008424633102323685, "loss": 3.4563, "step": 18555 }, { "epoch": 1.2610409022965077, "grad_norm": 1.6352254152297974, "learning_rate": 0.0008424208452235358, "loss": 3.9041, "step": 18560 }, { "epoch": 1.2613806223671695, "grad_norm": 2.331691265106201, "learning_rate": 0.0008423783802147031, "loss": 3.6071, "step": 18565 }, { "epoch": 1.2617203424378312, "grad_norm": 1.8600019216537476, "learning_rate": 0.0008423359152058703, "loss": 3.7229, "step": 18570 }, { "epoch": 1.262060062508493, "grad_norm": 1.8964533805847168, "learning_rate": 0.0008422934501970377, "loss": 3.3678, "step": 18575 }, { "epoch": 1.2623997825791546, "grad_norm": 2.4651589393615723, "learning_rate": 0.000842250985188205, "loss": 3.4345, "step": 18580 }, { "epoch": 1.2627395026498165, "grad_norm": 2.5396945476531982, "learning_rate": 0.0008422085201793722, "loss": 3.593, "step": 18585 }, { "epoch": 1.2630792227204783, "grad_norm": 2.1399998664855957, "learning_rate": 0.0008421660551705395, "loss": 3.7733, "step": 18590 }, { "epoch": 1.26341894279114, "grad_norm": 2.1015443801879883, "learning_rate": 0.0008421235901617068, "loss": 3.5802, "step": 18595 }, { "epoch": 1.2637586628618018, "grad_norm": 1.8742570877075195, "learning_rate": 0.000842081125152874, "loss": 3.7118, "step": 18600 }, { "epoch": 1.2640983829324637, "grad_norm": 1.907408356666565, "learning_rate": 0.0008420386601440412, "loss": 3.4954, "step": 18605 }, { "epoch": 1.2644381030031253, "grad_norm": 1.722137451171875, "learning_rate": 0.0008419961951352087, "loss": 3.2814, "step": 18610 }, { "epoch": 1.2647778230737872, "grad_norm": 1.9968839883804321, "learning_rate": 0.0008419537301263759, "loss": 3.6661, "step": 18615 }, { "epoch": 1.265117543144449, "grad_norm": 2.061830520629883, "learning_rate": 0.0008419112651175431, "loss": 3.8251, "step": 18620 }, { "epoch": 1.2654572632151107, "grad_norm": 2.037079095840454, "learning_rate": 0.0008418688001087105, "loss": 3.3329, "step": 18625 }, { "epoch": 1.2657969832857725, "grad_norm": 1.912039041519165, "learning_rate": 0.0008418263350998777, "loss": 3.389, "step": 18630 }, { "epoch": 1.2661367033564344, "grad_norm": 2.0195910930633545, "learning_rate": 0.0008417838700910449, "loss": 3.7881, "step": 18635 }, { "epoch": 1.266476423427096, "grad_norm": 1.6627869606018066, "learning_rate": 0.0008417414050822123, "loss": 3.251, "step": 18640 }, { "epoch": 1.2668161434977578, "grad_norm": 1.907738208770752, "learning_rate": 0.0008416989400733796, "loss": 3.3706, "step": 18645 }, { "epoch": 1.2671558635684197, "grad_norm": 2.003467559814453, "learning_rate": 0.0008416564750645468, "loss": 3.5396, "step": 18650 }, { "epoch": 1.2674955836390813, "grad_norm": 1.4395939111709595, "learning_rate": 0.0008416140100557142, "loss": 3.6983, "step": 18655 }, { "epoch": 1.2678353037097432, "grad_norm": 2.02187442779541, "learning_rate": 0.0008415715450468814, "loss": 3.6391, "step": 18660 }, { "epoch": 1.268175023780405, "grad_norm": 1.913180947303772, "learning_rate": 0.0008415290800380486, "loss": 3.761, "step": 18665 }, { "epoch": 1.2685147438510667, "grad_norm": 2.70967173576355, "learning_rate": 0.0008414866150292159, "loss": 3.4225, "step": 18670 }, { "epoch": 1.2688544639217285, "grad_norm": 1.992073655128479, "learning_rate": 0.0008414441500203832, "loss": 3.4563, "step": 18675 }, { "epoch": 1.2691941839923904, "grad_norm": 2.168074607849121, "learning_rate": 0.0008414016850115505, "loss": 3.38, "step": 18680 }, { "epoch": 1.269533904063052, "grad_norm": 2.0365025997161865, "learning_rate": 0.0008413592200027178, "loss": 3.7318, "step": 18685 }, { "epoch": 1.2698736241337139, "grad_norm": 2.03244686126709, "learning_rate": 0.0008413167549938851, "loss": 3.7346, "step": 18690 }, { "epoch": 1.2702133442043757, "grad_norm": 2.2212038040161133, "learning_rate": 0.0008412742899850523, "loss": 3.5745, "step": 18695 }, { "epoch": 1.2705530642750373, "grad_norm": 2.1848058700561523, "learning_rate": 0.0008412318249762196, "loss": 3.8166, "step": 18700 }, { "epoch": 1.2708927843456992, "grad_norm": 1.5693469047546387, "learning_rate": 0.0008411893599673868, "loss": 3.5628, "step": 18705 }, { "epoch": 1.271232504416361, "grad_norm": 1.950211763381958, "learning_rate": 0.0008411468949585541, "loss": 3.5531, "step": 18710 }, { "epoch": 1.2715722244870227, "grad_norm": 1.969591498374939, "learning_rate": 0.0008411044299497215, "loss": 3.4609, "step": 18715 }, { "epoch": 1.2719119445576845, "grad_norm": 2.1184885501861572, "learning_rate": 0.0008410619649408887, "loss": 3.4085, "step": 18720 }, { "epoch": 1.2722516646283464, "grad_norm": 1.7584772109985352, "learning_rate": 0.000841019499932056, "loss": 3.6127, "step": 18725 }, { "epoch": 1.272591384699008, "grad_norm": 2.047315835952759, "learning_rate": 0.0008409770349232233, "loss": 3.4332, "step": 18730 }, { "epoch": 1.2729311047696699, "grad_norm": 1.6701781749725342, "learning_rate": 0.0008409345699143905, "loss": 3.5575, "step": 18735 }, { "epoch": 1.2732708248403315, "grad_norm": 1.7678508758544922, "learning_rate": 0.0008408921049055577, "loss": 3.3728, "step": 18740 }, { "epoch": 1.2736105449109933, "grad_norm": 2.3242194652557373, "learning_rate": 0.0008408496398967251, "loss": 3.4969, "step": 18745 }, { "epoch": 1.273950264981655, "grad_norm": 2.408721685409546, "learning_rate": 0.0008408071748878924, "loss": 3.6577, "step": 18750 }, { "epoch": 1.2742899850523168, "grad_norm": 2.510456085205078, "learning_rate": 0.0008407647098790596, "loss": 3.7485, "step": 18755 }, { "epoch": 1.2746297051229787, "grad_norm": 1.7609496116638184, "learning_rate": 0.000840722244870227, "loss": 3.5968, "step": 18760 }, { "epoch": 1.2749694251936403, "grad_norm": 2.381263017654419, "learning_rate": 0.0008406797798613942, "loss": 3.3689, "step": 18765 }, { "epoch": 1.2753091452643022, "grad_norm": 1.7842485904693604, "learning_rate": 0.0008406373148525614, "loss": 3.5108, "step": 18770 }, { "epoch": 1.275648865334964, "grad_norm": 1.7722336053848267, "learning_rate": 0.0008405948498437288, "loss": 3.5147, "step": 18775 }, { "epoch": 1.2759885854056257, "grad_norm": 1.6749249696731567, "learning_rate": 0.000840552384834896, "loss": 3.792, "step": 18780 }, { "epoch": 1.2763283054762875, "grad_norm": 2.0790388584136963, "learning_rate": 0.0008405099198260634, "loss": 3.6335, "step": 18785 }, { "epoch": 1.2766680255469494, "grad_norm": 2.083552360534668, "learning_rate": 0.0008404674548172307, "loss": 3.6976, "step": 18790 }, { "epoch": 1.277007745617611, "grad_norm": 1.713538408279419, "learning_rate": 0.0008404249898083979, "loss": 3.6259, "step": 18795 }, { "epoch": 1.2773474656882728, "grad_norm": 2.0080904960632324, "learning_rate": 0.0008403825247995652, "loss": 3.4313, "step": 18800 }, { "epoch": 1.2776871857589347, "grad_norm": 1.5428937673568726, "learning_rate": 0.0008403400597907324, "loss": 3.5861, "step": 18805 }, { "epoch": 1.2780269058295963, "grad_norm": 1.6123367547988892, "learning_rate": 0.0008402975947818997, "loss": 3.6155, "step": 18810 }, { "epoch": 1.2783666259002582, "grad_norm": 1.7825534343719482, "learning_rate": 0.0008402551297730671, "loss": 3.5868, "step": 18815 }, { "epoch": 1.27870634597092, "grad_norm": 2.372002363204956, "learning_rate": 0.0008402126647642343, "loss": 3.6921, "step": 18820 }, { "epoch": 1.2790460660415817, "grad_norm": 1.6267695426940918, "learning_rate": 0.0008401701997554016, "loss": 3.5999, "step": 18825 }, { "epoch": 1.2793857861122435, "grad_norm": 1.9717662334442139, "learning_rate": 0.0008401277347465689, "loss": 3.5455, "step": 18830 }, { "epoch": 1.2797255061829054, "grad_norm": 2.2492005825042725, "learning_rate": 0.0008400852697377361, "loss": 3.8695, "step": 18835 }, { "epoch": 1.280065226253567, "grad_norm": 1.9477237462997437, "learning_rate": 0.0008400428047289034, "loss": 3.6302, "step": 18840 }, { "epoch": 1.2804049463242289, "grad_norm": 1.859017014503479, "learning_rate": 0.0008400003397200707, "loss": 3.4878, "step": 18845 }, { "epoch": 1.2807446663948907, "grad_norm": 2.2910311222076416, "learning_rate": 0.000839957874711238, "loss": 3.4513, "step": 18850 }, { "epoch": 1.2810843864655523, "grad_norm": 1.7365258932113647, "learning_rate": 0.0008399154097024052, "loss": 3.5378, "step": 18855 }, { "epoch": 1.2814241065362142, "grad_norm": 1.706438422203064, "learning_rate": 0.0008398729446935726, "loss": 3.5797, "step": 18860 }, { "epoch": 1.281763826606876, "grad_norm": 1.544829249382019, "learning_rate": 0.0008398304796847398, "loss": 3.4045, "step": 18865 }, { "epoch": 1.2821035466775377, "grad_norm": 1.864040493965149, "learning_rate": 0.000839788014675907, "loss": 3.6903, "step": 18870 }, { "epoch": 1.2824432667481995, "grad_norm": 1.6006860733032227, "learning_rate": 0.0008397455496670744, "loss": 3.5323, "step": 18875 }, { "epoch": 1.2827829868188614, "grad_norm": 1.902289628982544, "learning_rate": 0.0008397030846582416, "loss": 3.3799, "step": 18880 }, { "epoch": 1.283122706889523, "grad_norm": 1.856964111328125, "learning_rate": 0.0008396606196494089, "loss": 3.5514, "step": 18885 }, { "epoch": 1.2834624269601849, "grad_norm": 1.9346674680709839, "learning_rate": 0.0008396181546405763, "loss": 3.3113, "step": 18890 }, { "epoch": 1.2838021470308467, "grad_norm": 1.7262587547302246, "learning_rate": 0.0008395756896317435, "loss": 3.5693, "step": 18895 }, { "epoch": 1.2841418671015083, "grad_norm": 1.53852379322052, "learning_rate": 0.0008395332246229107, "loss": 3.5304, "step": 18900 }, { "epoch": 1.2844815871721702, "grad_norm": 2.0744926929473877, "learning_rate": 0.000839490759614078, "loss": 3.4963, "step": 18905 }, { "epoch": 1.2848213072428318, "grad_norm": 1.62220299243927, "learning_rate": 0.0008394482946052453, "loss": 3.6637, "step": 18910 }, { "epoch": 1.2851610273134937, "grad_norm": 2.084960460662842, "learning_rate": 0.0008394058295964125, "loss": 3.2012, "step": 18915 }, { "epoch": 1.2855007473841553, "grad_norm": 1.692065954208374, "learning_rate": 0.0008393633645875799, "loss": 3.5638, "step": 18920 }, { "epoch": 1.2858404674548172, "grad_norm": 1.683809757232666, "learning_rate": 0.0008393208995787472, "loss": 3.4057, "step": 18925 }, { "epoch": 1.286180187525479, "grad_norm": 1.756144642829895, "learning_rate": 0.0008392784345699144, "loss": 3.5932, "step": 18930 }, { "epoch": 1.2865199075961407, "grad_norm": 1.7865302562713623, "learning_rate": 0.0008392359695610817, "loss": 3.3644, "step": 18935 }, { "epoch": 1.2868596276668025, "grad_norm": 2.1899573802948, "learning_rate": 0.000839193504552249, "loss": 3.6512, "step": 18940 }, { "epoch": 1.2871993477374644, "grad_norm": 2.173671007156372, "learning_rate": 0.0008391510395434162, "loss": 3.5146, "step": 18945 }, { "epoch": 1.287539067808126, "grad_norm": 1.6678794622421265, "learning_rate": 0.0008391085745345835, "loss": 3.7443, "step": 18950 }, { "epoch": 1.2878787878787878, "grad_norm": 1.5591424703598022, "learning_rate": 0.0008390661095257508, "loss": 3.5877, "step": 18955 }, { "epoch": 1.2882185079494497, "grad_norm": 2.274824619293213, "learning_rate": 0.0008390236445169181, "loss": 3.3513, "step": 18960 }, { "epoch": 1.2885582280201113, "grad_norm": 2.0304794311523438, "learning_rate": 0.0008389811795080854, "loss": 3.4829, "step": 18965 }, { "epoch": 1.2888979480907732, "grad_norm": 1.998932123184204, "learning_rate": 0.0008389387144992526, "loss": 3.6623, "step": 18970 }, { "epoch": 1.289237668161435, "grad_norm": 1.5383567810058594, "learning_rate": 0.0008388962494904199, "loss": 3.6271, "step": 18975 }, { "epoch": 1.2895773882320967, "grad_norm": 1.481818437576294, "learning_rate": 0.0008388537844815872, "loss": 3.5209, "step": 18980 }, { "epoch": 1.2899171083027585, "grad_norm": 1.6060434579849243, "learning_rate": 0.0008388113194727544, "loss": 3.663, "step": 18985 }, { "epoch": 1.2902568283734204, "grad_norm": 1.5986127853393555, "learning_rate": 0.0008387688544639218, "loss": 3.3818, "step": 18990 }, { "epoch": 1.290596548444082, "grad_norm": 2.607527732849121, "learning_rate": 0.0008387263894550891, "loss": 3.963, "step": 18995 }, { "epoch": 1.2909362685147439, "grad_norm": 1.6149656772613525, "learning_rate": 0.0008386839244462563, "loss": 3.4103, "step": 19000 }, { "epoch": 1.2912759885854057, "grad_norm": 2.168496608734131, "learning_rate": 0.0008386414594374235, "loss": 3.5111, "step": 19005 }, { "epoch": 1.2916157086560673, "grad_norm": 1.730539321899414, "learning_rate": 0.0008385989944285909, "loss": 3.6007, "step": 19010 }, { "epoch": 1.2919554287267292, "grad_norm": 2.199922561645508, "learning_rate": 0.0008385565294197581, "loss": 3.6806, "step": 19015 }, { "epoch": 1.292295148797391, "grad_norm": 2.480663776397705, "learning_rate": 0.0008385140644109253, "loss": 3.5952, "step": 19020 }, { "epoch": 1.2926348688680527, "grad_norm": 1.788547396659851, "learning_rate": 0.0008384715994020928, "loss": 3.5949, "step": 19025 }, { "epoch": 1.2929745889387145, "grad_norm": 2.173103094100952, "learning_rate": 0.00083842913439326, "loss": 3.5969, "step": 19030 }, { "epoch": 1.2933143090093764, "grad_norm": 2.0105550289154053, "learning_rate": 0.0008383866693844272, "loss": 3.7788, "step": 19035 }, { "epoch": 1.293654029080038, "grad_norm": 1.7781530618667603, "learning_rate": 0.0008383442043755946, "loss": 3.5969, "step": 19040 }, { "epoch": 1.2939937491506999, "grad_norm": 1.7856115102767944, "learning_rate": 0.0008383017393667618, "loss": 3.6208, "step": 19045 }, { "epoch": 1.2943334692213617, "grad_norm": 1.6698822975158691, "learning_rate": 0.000838259274357929, "loss": 3.2893, "step": 19050 }, { "epoch": 1.2946731892920234, "grad_norm": 1.679687738418579, "learning_rate": 0.0008382168093490963, "loss": 3.6422, "step": 19055 }, { "epoch": 1.2950129093626852, "grad_norm": 2.0073485374450684, "learning_rate": 0.0008381743443402637, "loss": 3.7274, "step": 19060 }, { "epoch": 1.295352629433347, "grad_norm": 1.623830795288086, "learning_rate": 0.0008381318793314309, "loss": 3.4059, "step": 19065 }, { "epoch": 1.2956923495040087, "grad_norm": 1.9111374616622925, "learning_rate": 0.0008380894143225982, "loss": 3.33, "step": 19070 }, { "epoch": 1.2960320695746705, "grad_norm": 2.2163772583007812, "learning_rate": 0.0008380469493137655, "loss": 3.45, "step": 19075 }, { "epoch": 1.2963717896453322, "grad_norm": 1.703721284866333, "learning_rate": 0.0008380044843049327, "loss": 3.8524, "step": 19080 }, { "epoch": 1.296711509715994, "grad_norm": 2.246683120727539, "learning_rate": 0.0008379620192961, "loss": 3.7833, "step": 19085 }, { "epoch": 1.2970512297866557, "grad_norm": 1.6975170373916626, "learning_rate": 0.0008379195542872672, "loss": 3.6203, "step": 19090 }, { "epoch": 1.2973909498573175, "grad_norm": 1.8660392761230469, "learning_rate": 0.0008378770892784346, "loss": 3.4131, "step": 19095 }, { "epoch": 1.2977306699279794, "grad_norm": 2.2513999938964844, "learning_rate": 0.0008378346242696019, "loss": 3.5026, "step": 19100 }, { "epoch": 1.298070389998641, "grad_norm": 1.8578860759735107, "learning_rate": 0.0008377921592607691, "loss": 3.4854, "step": 19105 }, { "epoch": 1.2984101100693028, "grad_norm": 2.147987127304077, "learning_rate": 0.0008377496942519364, "loss": 3.5721, "step": 19110 }, { "epoch": 1.2987498301399647, "grad_norm": 2.2172443866729736, "learning_rate": 0.0008377072292431037, "loss": 3.5193, "step": 19115 }, { "epoch": 1.2990895502106263, "grad_norm": 1.6403456926345825, "learning_rate": 0.0008376647642342709, "loss": 3.6205, "step": 19120 }, { "epoch": 1.2994292702812882, "grad_norm": 1.7239973545074463, "learning_rate": 0.0008376222992254383, "loss": 3.6579, "step": 19125 }, { "epoch": 1.29976899035195, "grad_norm": 2.030193567276001, "learning_rate": 0.0008375798342166056, "loss": 3.5916, "step": 19130 }, { "epoch": 1.3001087104226117, "grad_norm": 2.139174699783325, "learning_rate": 0.0008375373692077728, "loss": 3.4606, "step": 19135 }, { "epoch": 1.3004484304932735, "grad_norm": 2.146052837371826, "learning_rate": 0.0008374949041989402, "loss": 3.454, "step": 19140 }, { "epoch": 1.3007881505639354, "grad_norm": 2.23116135597229, "learning_rate": 0.0008374524391901074, "loss": 3.5146, "step": 19145 }, { "epoch": 1.301127870634597, "grad_norm": 1.60590660572052, "learning_rate": 0.0008374099741812746, "loss": 3.6451, "step": 19150 }, { "epoch": 1.3014675907052589, "grad_norm": 1.913094401359558, "learning_rate": 0.0008373675091724419, "loss": 3.4007, "step": 19155 }, { "epoch": 1.3018073107759207, "grad_norm": 1.9521270990371704, "learning_rate": 0.0008373250441636092, "loss": 3.6166, "step": 19160 }, { "epoch": 1.3021470308465823, "grad_norm": 2.2334327697753906, "learning_rate": 0.0008372825791547765, "loss": 3.4642, "step": 19165 }, { "epoch": 1.3024867509172442, "grad_norm": 1.7695037126541138, "learning_rate": 0.0008372401141459438, "loss": 3.8516, "step": 19170 }, { "epoch": 1.302826470987906, "grad_norm": 1.8781336545944214, "learning_rate": 0.0008371976491371111, "loss": 3.3588, "step": 19175 }, { "epoch": 1.3031661910585677, "grad_norm": 2.4607295989990234, "learning_rate": 0.0008371551841282783, "loss": 3.6076, "step": 19180 }, { "epoch": 1.3035059111292295, "grad_norm": 1.7529363632202148, "learning_rate": 0.0008371127191194456, "loss": 3.405, "step": 19185 }, { "epoch": 1.3038456311998914, "grad_norm": 1.5837793350219727, "learning_rate": 0.0008370702541106128, "loss": 3.8176, "step": 19190 }, { "epoch": 1.304185351270553, "grad_norm": 1.6325138807296753, "learning_rate": 0.0008370277891017801, "loss": 3.4617, "step": 19195 }, { "epoch": 1.3045250713412149, "grad_norm": 1.8163272142410278, "learning_rate": 0.0008369853240929475, "loss": 3.8793, "step": 19200 }, { "epoch": 1.3048647914118767, "grad_norm": 2.245483636856079, "learning_rate": 0.0008369428590841147, "loss": 3.5548, "step": 19205 }, { "epoch": 1.3052045114825384, "grad_norm": 1.5007449388504028, "learning_rate": 0.000836900394075282, "loss": 3.5116, "step": 19210 }, { "epoch": 1.3055442315532002, "grad_norm": 1.9287620782852173, "learning_rate": 0.0008368579290664493, "loss": 3.7064, "step": 19215 }, { "epoch": 1.305883951623862, "grad_norm": 1.640777587890625, "learning_rate": 0.0008368154640576165, "loss": 3.409, "step": 19220 }, { "epoch": 1.3062236716945237, "grad_norm": 2.175463914871216, "learning_rate": 0.0008367729990487838, "loss": 3.7716, "step": 19225 }, { "epoch": 1.3065633917651855, "grad_norm": 2.3264408111572266, "learning_rate": 0.0008367305340399511, "loss": 3.5884, "step": 19230 }, { "epoch": 1.3069031118358474, "grad_norm": 2.455965518951416, "learning_rate": 0.0008366880690311184, "loss": 3.3362, "step": 19235 }, { "epoch": 1.307242831906509, "grad_norm": 1.8409650325775146, "learning_rate": 0.0008366456040222856, "loss": 3.5255, "step": 19240 }, { "epoch": 1.3075825519771709, "grad_norm": 1.7732799053192139, "learning_rate": 0.000836603139013453, "loss": 3.8071, "step": 19245 }, { "epoch": 1.3079222720478325, "grad_norm": 2.238210678100586, "learning_rate": 0.0008365606740046202, "loss": 3.7801, "step": 19250 }, { "epoch": 1.3082619921184944, "grad_norm": 1.9418832063674927, "learning_rate": 0.0008365182089957874, "loss": 3.8564, "step": 19255 }, { "epoch": 1.308601712189156, "grad_norm": 2.779914617538452, "learning_rate": 0.0008364757439869548, "loss": 3.7084, "step": 19260 }, { "epoch": 1.3089414322598178, "grad_norm": 1.6171354055404663, "learning_rate": 0.000836433278978122, "loss": 3.1466, "step": 19265 }, { "epoch": 1.3092811523304797, "grad_norm": 2.863645553588867, "learning_rate": 0.0008363908139692893, "loss": 3.5473, "step": 19270 }, { "epoch": 1.3096208724011413, "grad_norm": 1.685624122619629, "learning_rate": 0.0008363483489604567, "loss": 3.392, "step": 19275 }, { "epoch": 1.3099605924718032, "grad_norm": 1.7928946018218994, "learning_rate": 0.0008363143769533904, "loss": 3.1434, "step": 19280 }, { "epoch": 1.310300312542465, "grad_norm": 1.8623627424240112, "learning_rate": 0.0008362719119445576, "loss": 3.5068, "step": 19285 }, { "epoch": 1.3106400326131267, "grad_norm": 1.530536413192749, "learning_rate": 0.000836229446935725, "loss": 3.4224, "step": 19290 }, { "epoch": 1.3109797526837885, "grad_norm": 1.748029112815857, "learning_rate": 0.0008361869819268923, "loss": 3.5822, "step": 19295 }, { "epoch": 1.3113194727544504, "grad_norm": 1.782485842704773, "learning_rate": 0.0008361445169180595, "loss": 3.6, "step": 19300 }, { "epoch": 1.311659192825112, "grad_norm": 1.7806373834609985, "learning_rate": 0.0008361020519092268, "loss": 3.6724, "step": 19305 }, { "epoch": 1.3119989128957739, "grad_norm": 1.836791753768921, "learning_rate": 0.0008360595869003941, "loss": 3.263, "step": 19310 }, { "epoch": 1.3123386329664357, "grad_norm": 2.17862868309021, "learning_rate": 0.0008360171218915613, "loss": 3.4027, "step": 19315 }, { "epoch": 1.3126783530370973, "grad_norm": 1.4562073945999146, "learning_rate": 0.0008359746568827286, "loss": 3.4837, "step": 19320 }, { "epoch": 1.3130180731077592, "grad_norm": 1.7853091955184937, "learning_rate": 0.000835932191873896, "loss": 3.601, "step": 19325 }, { "epoch": 1.313357793178421, "grad_norm": 2.136294364929199, "learning_rate": 0.0008358897268650633, "loss": 3.6503, "step": 19330 }, { "epoch": 1.3136975132490827, "grad_norm": 2.2047417163848877, "learning_rate": 0.0008358472618562305, "loss": 3.4349, "step": 19335 }, { "epoch": 1.3140372333197445, "grad_norm": 2.0830230712890625, "learning_rate": 0.0008358047968473977, "loss": 3.6599, "step": 19340 }, { "epoch": 1.3143769533904064, "grad_norm": 1.9119350910186768, "learning_rate": 0.0008357623318385651, "loss": 3.587, "step": 19345 }, { "epoch": 1.314716673461068, "grad_norm": 2.3416640758514404, "learning_rate": 0.0008357198668297323, "loss": 3.5681, "step": 19350 }, { "epoch": 1.3150563935317299, "grad_norm": 2.811776638031006, "learning_rate": 0.0008356774018208995, "loss": 3.4247, "step": 19355 }, { "epoch": 1.3153961136023917, "grad_norm": 2.3713786602020264, "learning_rate": 0.000835634936812067, "loss": 3.436, "step": 19360 }, { "epoch": 1.3157358336730534, "grad_norm": 1.9042177200317383, "learning_rate": 0.0008355924718032342, "loss": 3.6682, "step": 19365 }, { "epoch": 1.3160755537437152, "grad_norm": 1.6880093812942505, "learning_rate": 0.0008355500067944014, "loss": 3.5859, "step": 19370 }, { "epoch": 1.316415273814377, "grad_norm": 1.5811434984207153, "learning_rate": 0.0008355075417855688, "loss": 3.8227, "step": 19375 }, { "epoch": 1.3167549938850387, "grad_norm": 1.8754618167877197, "learning_rate": 0.000835465076776736, "loss": 3.5627, "step": 19380 }, { "epoch": 1.3170947139557005, "grad_norm": 1.8651005029678345, "learning_rate": 0.0008354226117679032, "loss": 3.6729, "step": 19385 }, { "epoch": 1.3174344340263624, "grad_norm": 2.2850558757781982, "learning_rate": 0.0008353801467590705, "loss": 3.7581, "step": 19390 }, { "epoch": 1.317774154097024, "grad_norm": 2.6417059898376465, "learning_rate": 0.0008353376817502379, "loss": 3.3302, "step": 19395 }, { "epoch": 1.3181138741676859, "grad_norm": 1.675385594367981, "learning_rate": 0.0008352952167414051, "loss": 3.6874, "step": 19400 }, { "epoch": 1.3184535942383477, "grad_norm": 1.9577597379684448, "learning_rate": 0.0008352527517325724, "loss": 3.4986, "step": 19405 }, { "epoch": 1.3187933143090094, "grad_norm": 2.0332067012786865, "learning_rate": 0.0008352102867237397, "loss": 3.6301, "step": 19410 }, { "epoch": 1.3191330343796712, "grad_norm": 3.785659074783325, "learning_rate": 0.0008351678217149069, "loss": 3.4409, "step": 19415 }, { "epoch": 1.3194727544503329, "grad_norm": 1.607729196548462, "learning_rate": 0.0008351253567060742, "loss": 3.8097, "step": 19420 }, { "epoch": 1.3198124745209947, "grad_norm": 1.5007296800613403, "learning_rate": 0.0008350828916972414, "loss": 3.4796, "step": 19425 }, { "epoch": 1.3201521945916566, "grad_norm": 1.8320077657699585, "learning_rate": 0.0008350404266884088, "loss": 3.5697, "step": 19430 }, { "epoch": 1.3204919146623182, "grad_norm": 1.7538210153579712, "learning_rate": 0.0008349979616795761, "loss": 3.5663, "step": 19435 }, { "epoch": 1.32083163473298, "grad_norm": 2.3731508255004883, "learning_rate": 0.0008349554966707433, "loss": 3.4905, "step": 19440 }, { "epoch": 1.3211713548036417, "grad_norm": 2.087120532989502, "learning_rate": 0.0008349130316619106, "loss": 3.5825, "step": 19445 }, { "epoch": 1.3215110748743035, "grad_norm": 2.264925956726074, "learning_rate": 0.0008348705666530779, "loss": 3.563, "step": 19450 }, { "epoch": 1.3218507949449654, "grad_norm": 1.8351346254348755, "learning_rate": 0.0008348281016442451, "loss": 3.7665, "step": 19455 }, { "epoch": 1.322190515015627, "grad_norm": 1.900141716003418, "learning_rate": 0.0008347856366354124, "loss": 3.791, "step": 19460 }, { "epoch": 1.3225302350862889, "grad_norm": 1.6336435079574585, "learning_rate": 0.0008347431716265798, "loss": 3.4203, "step": 19465 }, { "epoch": 1.3228699551569507, "grad_norm": 2.1229381561279297, "learning_rate": 0.000834700706617747, "loss": 3.5583, "step": 19470 }, { "epoch": 1.3232096752276123, "grad_norm": 2.0808122158050537, "learning_rate": 0.0008346582416089142, "loss": 3.5405, "step": 19475 }, { "epoch": 1.3235493952982742, "grad_norm": 1.9692496061325073, "learning_rate": 0.0008346157766000816, "loss": 3.5679, "step": 19480 }, { "epoch": 1.323889115368936, "grad_norm": 1.7377182245254517, "learning_rate": 0.0008345733115912488, "loss": 3.5739, "step": 19485 }, { "epoch": 1.3242288354395977, "grad_norm": 1.666560411453247, "learning_rate": 0.000834530846582416, "loss": 3.3442, "step": 19490 }, { "epoch": 1.3245685555102595, "grad_norm": 1.8868719339370728, "learning_rate": 0.0008344883815735834, "loss": 3.6545, "step": 19495 }, { "epoch": 1.3249082755809214, "grad_norm": 1.5637195110321045, "learning_rate": 0.0008344459165647507, "loss": 3.5723, "step": 19500 }, { "epoch": 1.325247995651583, "grad_norm": 2.4698596000671387, "learning_rate": 0.0008344034515559179, "loss": 3.4083, "step": 19505 }, { "epoch": 1.3255877157222449, "grad_norm": 2.6710383892059326, "learning_rate": 0.0008343609865470853, "loss": 3.5483, "step": 19510 }, { "epoch": 1.3259274357929067, "grad_norm": 1.7624460458755493, "learning_rate": 0.0008343185215382525, "loss": 3.6278, "step": 19515 }, { "epoch": 1.3262671558635684, "grad_norm": 1.9563546180725098, "learning_rate": 0.0008342760565294197, "loss": 3.4337, "step": 19520 }, { "epoch": 1.3266068759342302, "grad_norm": 1.796618938446045, "learning_rate": 0.000834233591520587, "loss": 3.4855, "step": 19525 }, { "epoch": 1.326946596004892, "grad_norm": 2.0781407356262207, "learning_rate": 0.0008341911265117543, "loss": 3.3136, "step": 19530 }, { "epoch": 1.3272863160755537, "grad_norm": 2.009997606277466, "learning_rate": 0.0008341486615029216, "loss": 3.6217, "step": 19535 }, { "epoch": 1.3276260361462155, "grad_norm": 2.275590658187866, "learning_rate": 0.0008341061964940889, "loss": 3.7047, "step": 19540 }, { "epoch": 1.3279657562168774, "grad_norm": 1.4038951396942139, "learning_rate": 0.0008340637314852562, "loss": 3.4797, "step": 19545 }, { "epoch": 1.328305476287539, "grad_norm": 1.6049015522003174, "learning_rate": 0.0008340212664764234, "loss": 3.5003, "step": 19550 }, { "epoch": 1.3286451963582009, "grad_norm": 2.305433511734009, "learning_rate": 0.0008339788014675907, "loss": 3.7212, "step": 19555 }, { "epoch": 1.3289849164288627, "grad_norm": 1.713694453239441, "learning_rate": 0.000833936336458758, "loss": 3.6084, "step": 19560 }, { "epoch": 1.3293246364995244, "grad_norm": 2.4636693000793457, "learning_rate": 0.0008338938714499252, "loss": 3.5101, "step": 19565 }, { "epoch": 1.3296643565701862, "grad_norm": 1.8470737934112549, "learning_rate": 0.0008338514064410926, "loss": 3.5201, "step": 19570 }, { "epoch": 1.330004076640848, "grad_norm": 1.8818084001541138, "learning_rate": 0.0008338089414322598, "loss": 3.6658, "step": 19575 }, { "epoch": 1.3303437967115097, "grad_norm": 1.8451824188232422, "learning_rate": 0.0008337664764234271, "loss": 3.466, "step": 19580 }, { "epoch": 1.3306835167821716, "grad_norm": 1.7799946069717407, "learning_rate": 0.0008337240114145944, "loss": 3.5547, "step": 19585 }, { "epoch": 1.3310232368528332, "grad_norm": 1.9309349060058594, "learning_rate": 0.0008336815464057616, "loss": 3.5004, "step": 19590 }, { "epoch": 1.331362956923495, "grad_norm": 1.8982113599777222, "learning_rate": 0.0008336390813969289, "loss": 3.6612, "step": 19595 }, { "epoch": 1.331702676994157, "grad_norm": 1.99607515335083, "learning_rate": 0.0008335966163880963, "loss": 3.6858, "step": 19600 }, { "epoch": 1.3320423970648185, "grad_norm": 1.8037559986114502, "learning_rate": 0.0008335541513792635, "loss": 3.5927, "step": 19605 }, { "epoch": 1.3323821171354804, "grad_norm": 2.3917880058288574, "learning_rate": 0.0008335116863704308, "loss": 3.4399, "step": 19610 }, { "epoch": 1.332721837206142, "grad_norm": 2.197458028793335, "learning_rate": 0.0008334692213615981, "loss": 3.3356, "step": 19615 }, { "epoch": 1.3330615572768039, "grad_norm": 1.9957889318466187, "learning_rate": 0.0008334267563527653, "loss": 3.9218, "step": 19620 }, { "epoch": 1.3334012773474657, "grad_norm": 2.1062397956848145, "learning_rate": 0.0008333842913439325, "loss": 3.5342, "step": 19625 }, { "epoch": 1.3337409974181273, "grad_norm": 2.464247226715088, "learning_rate": 0.0008333418263350999, "loss": 3.99, "step": 19630 }, { "epoch": 1.3340807174887892, "grad_norm": 1.9465113878250122, "learning_rate": 0.0008332993613262672, "loss": 3.5916, "step": 19635 }, { "epoch": 1.334420437559451, "grad_norm": 1.9974610805511475, "learning_rate": 0.0008332568963174344, "loss": 3.4732, "step": 19640 }, { "epoch": 1.3347601576301127, "grad_norm": 1.7438633441925049, "learning_rate": 0.0008332144313086018, "loss": 3.4376, "step": 19645 }, { "epoch": 1.3350998777007745, "grad_norm": 2.230818748474121, "learning_rate": 0.000833171966299769, "loss": 3.3045, "step": 19650 }, { "epoch": 1.3354395977714364, "grad_norm": 1.6516640186309814, "learning_rate": 0.0008331295012909362, "loss": 3.4065, "step": 19655 }, { "epoch": 1.335779317842098, "grad_norm": 1.9271934032440186, "learning_rate": 0.0008330870362821036, "loss": 3.6664, "step": 19660 }, { "epoch": 1.3361190379127599, "grad_norm": 2.0613174438476562, "learning_rate": 0.0008330445712732708, "loss": 3.7616, "step": 19665 }, { "epoch": 1.3364587579834217, "grad_norm": 1.8657575845718384, "learning_rate": 0.0008330021062644382, "loss": 3.6808, "step": 19670 }, { "epoch": 1.3367984780540834, "grad_norm": 1.998151183128357, "learning_rate": 0.0008329596412556055, "loss": 3.783, "step": 19675 }, { "epoch": 1.3371381981247452, "grad_norm": 2.466794729232788, "learning_rate": 0.0008329171762467727, "loss": 3.6569, "step": 19680 }, { "epoch": 1.337477918195407, "grad_norm": 1.8471038341522217, "learning_rate": 0.00083287471123794, "loss": 3.6792, "step": 19685 }, { "epoch": 1.3378176382660687, "grad_norm": 2.25006103515625, "learning_rate": 0.0008328322462291072, "loss": 3.5432, "step": 19690 }, { "epoch": 1.3381573583367306, "grad_norm": 1.6828203201293945, "learning_rate": 0.0008327897812202745, "loss": 3.6176, "step": 19695 }, { "epoch": 1.3384970784073924, "grad_norm": 2.045933485031128, "learning_rate": 0.0008327473162114418, "loss": 3.4444, "step": 19700 }, { "epoch": 1.338836798478054, "grad_norm": 1.8460534811019897, "learning_rate": 0.0008327048512026091, "loss": 3.5982, "step": 19705 }, { "epoch": 1.3391765185487159, "grad_norm": 2.3332979679107666, "learning_rate": 0.0008326623861937764, "loss": 3.4567, "step": 19710 }, { "epoch": 1.3395162386193777, "grad_norm": 1.7199784517288208, "learning_rate": 0.0008326199211849437, "loss": 3.3645, "step": 19715 }, { "epoch": 1.3398559586900394, "grad_norm": 2.4664340019226074, "learning_rate": 0.0008325774561761109, "loss": 3.6508, "step": 19720 }, { "epoch": 1.3401956787607012, "grad_norm": 1.8253315687179565, "learning_rate": 0.0008325349911672781, "loss": 3.5392, "step": 19725 }, { "epoch": 1.340535398831363, "grad_norm": 1.9658236503601074, "learning_rate": 0.0008324925261584455, "loss": 3.5989, "step": 19730 }, { "epoch": 1.3408751189020247, "grad_norm": 1.6805843114852905, "learning_rate": 0.0008324500611496127, "loss": 3.5926, "step": 19735 }, { "epoch": 1.3412148389726866, "grad_norm": 1.8874200582504272, "learning_rate": 0.00083240759614078, "loss": 3.254, "step": 19740 }, { "epoch": 1.3415545590433484, "grad_norm": 1.647483468055725, "learning_rate": 0.0008323651311319474, "loss": 3.6065, "step": 19745 }, { "epoch": 1.34189427911401, "grad_norm": 2.221747398376465, "learning_rate": 0.0008323226661231146, "loss": 3.3787, "step": 19750 }, { "epoch": 1.342233999184672, "grad_norm": 1.9015133380889893, "learning_rate": 0.0008322802011142818, "loss": 3.6157, "step": 19755 }, { "epoch": 1.3425737192553335, "grad_norm": 1.9803816080093384, "learning_rate": 0.0008322377361054492, "loss": 3.4683, "step": 19760 }, { "epoch": 1.3429134393259954, "grad_norm": 2.347476005554199, "learning_rate": 0.0008321952710966164, "loss": 3.4267, "step": 19765 }, { "epoch": 1.3432531593966572, "grad_norm": 2.5384650230407715, "learning_rate": 0.0008321528060877836, "loss": 3.5414, "step": 19770 }, { "epoch": 1.3435928794673189, "grad_norm": 2.762211322784424, "learning_rate": 0.000832110341078951, "loss": 3.3746, "step": 19775 }, { "epoch": 1.3439325995379807, "grad_norm": 1.8134292364120483, "learning_rate": 0.0008320678760701183, "loss": 3.7242, "step": 19780 }, { "epoch": 1.3442723196086424, "grad_norm": 1.926315188407898, "learning_rate": 0.0008320254110612855, "loss": 3.5811, "step": 19785 }, { "epoch": 1.3446120396793042, "grad_norm": 1.6280776262283325, "learning_rate": 0.0008319829460524528, "loss": 3.6919, "step": 19790 }, { "epoch": 1.344951759749966, "grad_norm": 1.8620539903640747, "learning_rate": 0.0008319404810436201, "loss": 3.676, "step": 19795 }, { "epoch": 1.3452914798206277, "grad_norm": 2.4940009117126465, "learning_rate": 0.0008318980160347873, "loss": 3.566, "step": 19800 }, { "epoch": 1.3456311998912895, "grad_norm": 2.2660281658172607, "learning_rate": 0.0008318555510259546, "loss": 3.5478, "step": 19805 }, { "epoch": 1.3459709199619514, "grad_norm": 1.706393837928772, "learning_rate": 0.000831813086017122, "loss": 3.3269, "step": 19810 }, { "epoch": 1.346310640032613, "grad_norm": 2.0231740474700928, "learning_rate": 0.0008317706210082892, "loss": 3.5709, "step": 19815 }, { "epoch": 1.3466503601032749, "grad_norm": 1.88991117477417, "learning_rate": 0.0008317281559994565, "loss": 3.5064, "step": 19820 }, { "epoch": 1.3469900801739367, "grad_norm": 1.6397074460983276, "learning_rate": 0.0008316856909906237, "loss": 3.5541, "step": 19825 }, { "epoch": 1.3473298002445984, "grad_norm": 1.5647306442260742, "learning_rate": 0.000831643225981791, "loss": 3.4991, "step": 19830 }, { "epoch": 1.3476695203152602, "grad_norm": 1.6012145280838013, "learning_rate": 0.0008316007609729583, "loss": 3.7071, "step": 19835 }, { "epoch": 1.348009240385922, "grad_norm": 1.7149651050567627, "learning_rate": 0.0008315582959641255, "loss": 3.547, "step": 19840 }, { "epoch": 1.3483489604565837, "grad_norm": 1.9215894937515259, "learning_rate": 0.0008315158309552929, "loss": 3.4387, "step": 19845 }, { "epoch": 1.3486886805272456, "grad_norm": 2.4376699924468994, "learning_rate": 0.0008314733659464602, "loss": 3.74, "step": 19850 }, { "epoch": 1.3490284005979074, "grad_norm": 1.4563629627227783, "learning_rate": 0.0008314309009376274, "loss": 3.5936, "step": 19855 }, { "epoch": 1.349368120668569, "grad_norm": 2.1557438373565674, "learning_rate": 0.0008313884359287947, "loss": 3.5047, "step": 19860 }, { "epoch": 1.349707840739231, "grad_norm": 2.2544121742248535, "learning_rate": 0.000831345970919962, "loss": 3.7385, "step": 19865 }, { "epoch": 1.3500475608098927, "grad_norm": 1.8389840126037598, "learning_rate": 0.0008313035059111292, "loss": 3.6306, "step": 19870 }, { "epoch": 1.3503872808805544, "grad_norm": 2.177144765853882, "learning_rate": 0.0008312610409022964, "loss": 3.8264, "step": 19875 }, { "epoch": 1.3507270009512162, "grad_norm": 1.911105990409851, "learning_rate": 0.0008312185758934639, "loss": 3.7342, "step": 19880 }, { "epoch": 1.351066721021878, "grad_norm": 1.7025624513626099, "learning_rate": 0.0008311761108846311, "loss": 3.6617, "step": 19885 }, { "epoch": 1.3514064410925397, "grad_norm": 2.118809461593628, "learning_rate": 0.0008311336458757983, "loss": 3.4795, "step": 19890 }, { "epoch": 1.3517461611632016, "grad_norm": 1.7850449085235596, "learning_rate": 0.0008310911808669657, "loss": 3.5721, "step": 19895 }, { "epoch": 1.3520858812338634, "grad_norm": 2.097015142440796, "learning_rate": 0.0008310487158581329, "loss": 3.4921, "step": 19900 }, { "epoch": 1.352425601304525, "grad_norm": 2.1296799182891846, "learning_rate": 0.0008310062508493001, "loss": 3.5383, "step": 19905 }, { "epoch": 1.352765321375187, "grad_norm": 2.1836962699890137, "learning_rate": 0.0008309637858404675, "loss": 3.5654, "step": 19910 }, { "epoch": 1.3531050414458488, "grad_norm": 1.726462960243225, "learning_rate": 0.0008309213208316348, "loss": 3.6049, "step": 19915 }, { "epoch": 1.3534447615165104, "grad_norm": 1.928788185119629, "learning_rate": 0.000830878855822802, "loss": 3.603, "step": 19920 }, { "epoch": 1.3537844815871722, "grad_norm": 1.7606205940246582, "learning_rate": 0.0008308363908139693, "loss": 3.5648, "step": 19925 }, { "epoch": 1.3541242016578339, "grad_norm": 1.768642544746399, "learning_rate": 0.0008307939258051366, "loss": 3.7626, "step": 19930 }, { "epoch": 1.3544639217284957, "grad_norm": 1.7779419422149658, "learning_rate": 0.0008307514607963038, "loss": 3.6309, "step": 19935 }, { "epoch": 1.3548036417991576, "grad_norm": 1.9249917268753052, "learning_rate": 0.0008307089957874711, "loss": 3.6268, "step": 19940 }, { "epoch": 1.3551433618698192, "grad_norm": 1.7032184600830078, "learning_rate": 0.0008306665307786384, "loss": 3.6395, "step": 19945 }, { "epoch": 1.355483081940481, "grad_norm": 1.7320231199264526, "learning_rate": 0.0008306240657698057, "loss": 3.6857, "step": 19950 }, { "epoch": 1.3558228020111427, "grad_norm": 1.9526951313018799, "learning_rate": 0.000830581600760973, "loss": 3.671, "step": 19955 }, { "epoch": 1.3561625220818045, "grad_norm": 1.7940090894699097, "learning_rate": 0.0008305391357521403, "loss": 3.7516, "step": 19960 }, { "epoch": 1.3565022421524664, "grad_norm": 1.8987723588943481, "learning_rate": 0.0008304966707433075, "loss": 3.6839, "step": 19965 }, { "epoch": 1.356841962223128, "grad_norm": 1.93777596950531, "learning_rate": 0.0008304542057344748, "loss": 3.5507, "step": 19970 }, { "epoch": 1.3571816822937899, "grad_norm": 1.6889129877090454, "learning_rate": 0.000830411740725642, "loss": 3.5123, "step": 19975 }, { "epoch": 1.3575214023644517, "grad_norm": 1.570347547531128, "learning_rate": 0.0008303692757168093, "loss": 3.5938, "step": 19980 }, { "epoch": 1.3578611224351134, "grad_norm": 1.656009554862976, "learning_rate": 0.0008303268107079767, "loss": 3.4868, "step": 19985 }, { "epoch": 1.3582008425057752, "grad_norm": 1.4532183408737183, "learning_rate": 0.0008302843456991439, "loss": 3.5848, "step": 19990 }, { "epoch": 1.358540562576437, "grad_norm": 1.9058010578155518, "learning_rate": 0.0008302418806903112, "loss": 3.4584, "step": 19995 }, { "epoch": 1.3588802826470987, "grad_norm": 1.7279140949249268, "learning_rate": 0.0008301994156814785, "loss": 3.7339, "step": 20000 }, { "epoch": 1.3592200027177606, "grad_norm": 1.8862743377685547, "learning_rate": 0.0008301569506726457, "loss": 3.8344, "step": 20005 }, { "epoch": 1.3595597227884224, "grad_norm": 2.241901397705078, "learning_rate": 0.000830114485663813, "loss": 3.7105, "step": 20010 }, { "epoch": 1.359899442859084, "grad_norm": 1.8344684839248657, "learning_rate": 0.0008300720206549803, "loss": 3.7975, "step": 20015 }, { "epoch": 1.360239162929746, "grad_norm": 1.5561294555664062, "learning_rate": 0.0008300295556461476, "loss": 3.6899, "step": 20020 }, { "epoch": 1.3605788830004077, "grad_norm": 2.2139899730682373, "learning_rate": 0.000829987090637315, "loss": 3.4797, "step": 20025 }, { "epoch": 1.3609186030710694, "grad_norm": 1.6071172952651978, "learning_rate": 0.0008299446256284822, "loss": 3.5863, "step": 20030 }, { "epoch": 1.3612583231417312, "grad_norm": 1.787934422492981, "learning_rate": 0.0008299021606196494, "loss": 3.7427, "step": 20035 }, { "epoch": 1.361598043212393, "grad_norm": 2.1028616428375244, "learning_rate": 0.0008298596956108167, "loss": 3.7718, "step": 20040 }, { "epoch": 1.3619377632830547, "grad_norm": 2.019482135772705, "learning_rate": 0.000829817230601984, "loss": 3.5585, "step": 20045 }, { "epoch": 1.3622774833537166, "grad_norm": 2.0988497734069824, "learning_rate": 0.0008297747655931512, "loss": 3.6205, "step": 20050 }, { "epoch": 1.3626172034243784, "grad_norm": 2.099580764770508, "learning_rate": 0.0008297323005843186, "loss": 3.6446, "step": 20055 }, { "epoch": 1.36295692349504, "grad_norm": 1.4301972389221191, "learning_rate": 0.0008296898355754859, "loss": 3.6552, "step": 20060 }, { "epoch": 1.363296643565702, "grad_norm": 1.818860650062561, "learning_rate": 0.0008296473705666531, "loss": 3.687, "step": 20065 }, { "epoch": 1.3636363636363638, "grad_norm": 2.042128086090088, "learning_rate": 0.0008296049055578204, "loss": 3.6045, "step": 20070 }, { "epoch": 1.3639760837070254, "grad_norm": 1.998295545578003, "learning_rate": 0.0008295624405489876, "loss": 3.5911, "step": 20075 }, { "epoch": 1.3643158037776872, "grad_norm": 1.7454036474227905, "learning_rate": 0.0008295199755401549, "loss": 3.4919, "step": 20080 }, { "epoch": 1.364655523848349, "grad_norm": 1.8976943492889404, "learning_rate": 0.0008294775105313222, "loss": 3.4243, "step": 20085 }, { "epoch": 1.3649952439190107, "grad_norm": 2.1171371936798096, "learning_rate": 0.0008294350455224895, "loss": 3.6051, "step": 20090 }, { "epoch": 1.3653349639896726, "grad_norm": 1.6005185842514038, "learning_rate": 0.0008293925805136568, "loss": 3.4328, "step": 20095 }, { "epoch": 1.3656746840603342, "grad_norm": 1.785158395767212, "learning_rate": 0.0008293501155048241, "loss": 3.5156, "step": 20100 }, { "epoch": 1.366014404130996, "grad_norm": 2.37711501121521, "learning_rate": 0.0008293076504959913, "loss": 3.6854, "step": 20105 }, { "epoch": 1.366354124201658, "grad_norm": 2.7966220378875732, "learning_rate": 0.0008292651854871585, "loss": 3.4549, "step": 20110 }, { "epoch": 1.3666938442723195, "grad_norm": 2.3135435581207275, "learning_rate": 0.0008292227204783259, "loss": 3.6971, "step": 20115 }, { "epoch": 1.3670335643429814, "grad_norm": 1.8720405101776123, "learning_rate": 0.0008291802554694931, "loss": 3.0564, "step": 20120 }, { "epoch": 1.367373284413643, "grad_norm": 1.7200326919555664, "learning_rate": 0.0008291377904606604, "loss": 3.4121, "step": 20125 }, { "epoch": 1.3677130044843049, "grad_norm": 1.8643462657928467, "learning_rate": 0.0008290953254518278, "loss": 3.4928, "step": 20130 }, { "epoch": 1.3680527245549667, "grad_norm": 2.0498712062835693, "learning_rate": 0.000829052860442995, "loss": 3.6743, "step": 20135 }, { "epoch": 1.3683924446256284, "grad_norm": 1.7233493328094482, "learning_rate": 0.0008290103954341622, "loss": 3.2304, "step": 20140 }, { "epoch": 1.3687321646962902, "grad_norm": 2.3516058921813965, "learning_rate": 0.0008289679304253296, "loss": 3.5163, "step": 20145 }, { "epoch": 1.369071884766952, "grad_norm": 2.151078701019287, "learning_rate": 0.0008289254654164968, "loss": 3.3396, "step": 20150 }, { "epoch": 1.3694116048376137, "grad_norm": 2.497443437576294, "learning_rate": 0.000828883000407664, "loss": 3.7302, "step": 20155 }, { "epoch": 1.3697513249082756, "grad_norm": 2.111124277114868, "learning_rate": 0.0008288405353988315, "loss": 3.5953, "step": 20160 }, { "epoch": 1.3700910449789374, "grad_norm": 1.880816102027893, "learning_rate": 0.0008287980703899987, "loss": 3.4907, "step": 20165 }, { "epoch": 1.370430765049599, "grad_norm": 1.974303960800171, "learning_rate": 0.0008287556053811659, "loss": 3.439, "step": 20170 }, { "epoch": 1.370770485120261, "grad_norm": 1.5322948694229126, "learning_rate": 0.0008287131403723332, "loss": 3.3644, "step": 20175 }, { "epoch": 1.3711102051909227, "grad_norm": 1.8673878908157349, "learning_rate": 0.0008286706753635005, "loss": 3.6347, "step": 20180 }, { "epoch": 1.3714499252615844, "grad_norm": 1.5910282135009766, "learning_rate": 0.0008286282103546677, "loss": 3.2604, "step": 20185 }, { "epoch": 1.3717896453322462, "grad_norm": 2.2850711345672607, "learning_rate": 0.0008285857453458351, "loss": 3.7184, "step": 20190 }, { "epoch": 1.372129365402908, "grad_norm": 2.376132011413574, "learning_rate": 0.0008285432803370024, "loss": 3.2894, "step": 20195 }, { "epoch": 1.3724690854735697, "grad_norm": 2.559811592102051, "learning_rate": 0.0008285008153281696, "loss": 3.349, "step": 20200 }, { "epoch": 1.3728088055442316, "grad_norm": 1.5651746988296509, "learning_rate": 0.0008284583503193369, "loss": 3.3679, "step": 20205 }, { "epoch": 1.3731485256148934, "grad_norm": 1.6085213422775269, "learning_rate": 0.0008284158853105041, "loss": 3.4783, "step": 20210 }, { "epoch": 1.373488245685555, "grad_norm": 2.0396790504455566, "learning_rate": 0.0008283734203016714, "loss": 3.5001, "step": 20215 }, { "epoch": 1.373827965756217, "grad_norm": 1.7586556673049927, "learning_rate": 0.0008283309552928387, "loss": 3.3126, "step": 20220 }, { "epoch": 1.3741676858268788, "grad_norm": 1.7840750217437744, "learning_rate": 0.000828288490284006, "loss": 3.5614, "step": 20225 }, { "epoch": 1.3745074058975404, "grad_norm": 1.4242470264434814, "learning_rate": 0.0008282460252751733, "loss": 3.196, "step": 20230 }, { "epoch": 1.3748471259682022, "grad_norm": 2.305985927581787, "learning_rate": 0.0008282035602663406, "loss": 3.582, "step": 20235 }, { "epoch": 1.375186846038864, "grad_norm": 1.8997113704681396, "learning_rate": 0.0008281610952575078, "loss": 3.417, "step": 20240 }, { "epoch": 1.3755265661095257, "grad_norm": 1.8020834922790527, "learning_rate": 0.0008281186302486751, "loss": 3.4403, "step": 20245 }, { "epoch": 1.3758662861801876, "grad_norm": 1.9188740253448486, "learning_rate": 0.0008280761652398424, "loss": 3.4243, "step": 20250 }, { "epoch": 1.3762060062508494, "grad_norm": 2.1932148933410645, "learning_rate": 0.0008280337002310096, "loss": 3.4632, "step": 20255 }, { "epoch": 1.376545726321511, "grad_norm": 1.9454857110977173, "learning_rate": 0.000827991235222177, "loss": 3.4186, "step": 20260 }, { "epoch": 1.376885446392173, "grad_norm": 2.833801031112671, "learning_rate": 0.0008279487702133443, "loss": 3.5474, "step": 20265 }, { "epoch": 1.3772251664628345, "grad_norm": 2.0390262603759766, "learning_rate": 0.0008279063052045115, "loss": 3.8805, "step": 20270 }, { "epoch": 1.3775648865334964, "grad_norm": 2.3034262657165527, "learning_rate": 0.0008278638401956787, "loss": 3.5749, "step": 20275 }, { "epoch": 1.3779046066041583, "grad_norm": 1.6638375520706177, "learning_rate": 0.0008278213751868461, "loss": 3.5338, "step": 20280 }, { "epoch": 1.3782443266748199, "grad_norm": 2.2904069423675537, "learning_rate": 0.0008277789101780133, "loss": 3.4233, "step": 20285 }, { "epoch": 1.3785840467454817, "grad_norm": 1.6308863162994385, "learning_rate": 0.0008277364451691805, "loss": 3.6067, "step": 20290 }, { "epoch": 1.3789237668161434, "grad_norm": 1.8929437398910522, "learning_rate": 0.000827693980160348, "loss": 3.7253, "step": 20295 }, { "epoch": 1.3792634868868052, "grad_norm": 2.6426291465759277, "learning_rate": 0.0008276515151515152, "loss": 3.6291, "step": 20300 }, { "epoch": 1.379603206957467, "grad_norm": 1.9365605115890503, "learning_rate": 0.0008276090501426824, "loss": 3.2912, "step": 20305 }, { "epoch": 1.3799429270281287, "grad_norm": 1.9584057331085205, "learning_rate": 0.0008275665851338498, "loss": 3.5275, "step": 20310 }, { "epoch": 1.3802826470987906, "grad_norm": 1.7274287939071655, "learning_rate": 0.000827524120125017, "loss": 3.6367, "step": 20315 }, { "epoch": 1.3806223671694524, "grad_norm": 2.314180850982666, "learning_rate": 0.0008274816551161842, "loss": 3.6175, "step": 20320 }, { "epoch": 1.380962087240114, "grad_norm": 2.7481937408447266, "learning_rate": 0.0008274391901073515, "loss": 3.4854, "step": 20325 }, { "epoch": 1.381301807310776, "grad_norm": 1.755794644355774, "learning_rate": 0.0008273967250985189, "loss": 3.6306, "step": 20330 }, { "epoch": 1.3816415273814378, "grad_norm": 1.7835807800292969, "learning_rate": 0.0008273542600896861, "loss": 3.4059, "step": 20335 }, { "epoch": 1.3819812474520994, "grad_norm": 2.096898317337036, "learning_rate": 0.0008273117950808534, "loss": 3.6063, "step": 20340 }, { "epoch": 1.3823209675227612, "grad_norm": 1.8414372205734253, "learning_rate": 0.0008272693300720207, "loss": 3.557, "step": 20345 }, { "epoch": 1.382660687593423, "grad_norm": 1.8207132816314697, "learning_rate": 0.000827226865063188, "loss": 3.5426, "step": 20350 }, { "epoch": 1.3830004076640847, "grad_norm": 1.8826719522476196, "learning_rate": 0.0008271844000543552, "loss": 3.5197, "step": 20355 }, { "epoch": 1.3833401277347466, "grad_norm": 1.9557772874832153, "learning_rate": 0.0008271419350455224, "loss": 3.4022, "step": 20360 }, { "epoch": 1.3836798478054084, "grad_norm": 1.8238818645477295, "learning_rate": 0.0008270994700366899, "loss": 3.7181, "step": 20365 }, { "epoch": 1.38401956787607, "grad_norm": 1.8025892972946167, "learning_rate": 0.0008270570050278571, "loss": 3.5591, "step": 20370 }, { "epoch": 1.384359287946732, "grad_norm": 1.7565847635269165, "learning_rate": 0.0008270145400190243, "loss": 3.5785, "step": 20375 }, { "epoch": 1.3846990080173938, "grad_norm": 1.809238076210022, "learning_rate": 0.0008269720750101917, "loss": 3.8315, "step": 20380 }, { "epoch": 1.3850387280880554, "grad_norm": 2.0102925300598145, "learning_rate": 0.0008269296100013589, "loss": 3.7647, "step": 20385 }, { "epoch": 1.3853784481587172, "grad_norm": 1.6261119842529297, "learning_rate": 0.0008268871449925261, "loss": 3.3652, "step": 20390 }, { "epoch": 1.385718168229379, "grad_norm": 1.8718252182006836, "learning_rate": 0.0008268446799836935, "loss": 3.575, "step": 20395 }, { "epoch": 1.3860578883000407, "grad_norm": 1.8014845848083496, "learning_rate": 0.0008268022149748608, "loss": 3.533, "step": 20400 }, { "epoch": 1.3863976083707026, "grad_norm": 2.324538469314575, "learning_rate": 0.000826759749966028, "loss": 3.4829, "step": 20405 }, { "epoch": 1.3867373284413644, "grad_norm": 1.742626667022705, "learning_rate": 0.0008267172849571954, "loss": 3.4503, "step": 20410 }, { "epoch": 1.387077048512026, "grad_norm": 2.208207130432129, "learning_rate": 0.0008266748199483626, "loss": 3.6384, "step": 20415 }, { "epoch": 1.387416768582688, "grad_norm": 1.5769950151443481, "learning_rate": 0.0008266323549395298, "loss": 3.6935, "step": 20420 }, { "epoch": 1.3877564886533498, "grad_norm": 1.71384596824646, "learning_rate": 0.0008265898899306971, "loss": 3.5341, "step": 20425 }, { "epoch": 1.3880962087240114, "grad_norm": 1.7690469026565552, "learning_rate": 0.0008265474249218644, "loss": 3.5139, "step": 20430 }, { "epoch": 1.3884359287946733, "grad_norm": 1.9111394882202148, "learning_rate": 0.0008265049599130317, "loss": 3.6672, "step": 20435 }, { "epoch": 1.3887756488653349, "grad_norm": 1.9258511066436768, "learning_rate": 0.000826462494904199, "loss": 3.5303, "step": 20440 }, { "epoch": 1.3891153689359967, "grad_norm": 1.8523157835006714, "learning_rate": 0.0008264200298953663, "loss": 3.783, "step": 20445 }, { "epoch": 1.3894550890066586, "grad_norm": 2.3791370391845703, "learning_rate": 0.0008263775648865335, "loss": 3.6623, "step": 20450 }, { "epoch": 1.3897948090773202, "grad_norm": 1.712935447692871, "learning_rate": 0.0008263350998777008, "loss": 3.3988, "step": 20455 }, { "epoch": 1.390134529147982, "grad_norm": 1.7519152164459229, "learning_rate": 0.000826292634868868, "loss": 3.7903, "step": 20460 }, { "epoch": 1.3904742492186437, "grad_norm": 1.7459465265274048, "learning_rate": 0.0008262501698600353, "loss": 3.5981, "step": 20465 }, { "epoch": 1.3908139692893056, "grad_norm": 2.054600715637207, "learning_rate": 0.0008262077048512027, "loss": 3.4036, "step": 20470 }, { "epoch": 1.3911536893599674, "grad_norm": 1.9884319305419922, "learning_rate": 0.0008261652398423699, "loss": 3.5675, "step": 20475 }, { "epoch": 1.391493409430629, "grad_norm": 2.006743907928467, "learning_rate": 0.0008261227748335372, "loss": 3.7049, "step": 20480 }, { "epoch": 1.391833129501291, "grad_norm": 1.8970950841903687, "learning_rate": 0.0008260803098247045, "loss": 3.4058, "step": 20485 }, { "epoch": 1.3921728495719528, "grad_norm": 1.9553558826446533, "learning_rate": 0.0008260378448158717, "loss": 3.5783, "step": 20490 }, { "epoch": 1.3925125696426144, "grad_norm": 2.124760866165161, "learning_rate": 0.000825995379807039, "loss": 3.4063, "step": 20495 }, { "epoch": 1.3928522897132762, "grad_norm": 2.423436403274536, "learning_rate": 0.0008259529147982063, "loss": 3.5992, "step": 20500 }, { "epoch": 1.393192009783938, "grad_norm": 1.9312231540679932, "learning_rate": 0.0008259104497893736, "loss": 3.5574, "step": 20505 }, { "epoch": 1.3935317298545997, "grad_norm": 2.0648386478424072, "learning_rate": 0.0008258679847805408, "loss": 3.4502, "step": 20510 }, { "epoch": 1.3938714499252616, "grad_norm": 1.9824024438858032, "learning_rate": 0.0008258255197717082, "loss": 3.4843, "step": 20515 }, { "epoch": 1.3942111699959234, "grad_norm": 2.56807541847229, "learning_rate": 0.0008257830547628754, "loss": 3.6887, "step": 20520 }, { "epoch": 1.394550890066585, "grad_norm": 2.1300277709960938, "learning_rate": 0.0008257405897540426, "loss": 3.6084, "step": 20525 }, { "epoch": 1.394890610137247, "grad_norm": 2.253253698348999, "learning_rate": 0.00082569812474521, "loss": 3.4234, "step": 20530 }, { "epoch": 1.3952303302079088, "grad_norm": 1.8273035287857056, "learning_rate": 0.0008256556597363772, "loss": 3.62, "step": 20535 }, { "epoch": 1.3955700502785704, "grad_norm": 1.572648525238037, "learning_rate": 0.0008256131947275445, "loss": 3.7446, "step": 20540 }, { "epoch": 1.3959097703492322, "grad_norm": 1.7596142292022705, "learning_rate": 0.0008255707297187119, "loss": 3.5504, "step": 20545 }, { "epoch": 1.396249490419894, "grad_norm": 2.3391783237457275, "learning_rate": 0.0008255282647098791, "loss": 3.3822, "step": 20550 }, { "epoch": 1.3965892104905557, "grad_norm": 1.5084131956100464, "learning_rate": 0.0008254857997010463, "loss": 3.3516, "step": 20555 }, { "epoch": 1.3969289305612176, "grad_norm": 1.5234606266021729, "learning_rate": 0.0008254433346922136, "loss": 3.4121, "step": 20560 }, { "epoch": 1.3972686506318794, "grad_norm": 1.7459993362426758, "learning_rate": 0.0008254008696833809, "loss": 3.5869, "step": 20565 }, { "epoch": 1.397608370702541, "grad_norm": 1.805911660194397, "learning_rate": 0.0008253584046745481, "loss": 3.46, "step": 20570 }, { "epoch": 1.397948090773203, "grad_norm": 1.813320517539978, "learning_rate": 0.0008253159396657155, "loss": 3.1994, "step": 20575 }, { "epoch": 1.3982878108438648, "grad_norm": 2.1924545764923096, "learning_rate": 0.0008252734746568828, "loss": 3.5728, "step": 20580 }, { "epoch": 1.3986275309145264, "grad_norm": 2.1803767681121826, "learning_rate": 0.00082523100964805, "loss": 3.4489, "step": 20585 }, { "epoch": 1.3989672509851883, "grad_norm": 1.8406882286071777, "learning_rate": 0.0008251885446392173, "loss": 3.6148, "step": 20590 }, { "epoch": 1.3993069710558501, "grad_norm": 2.0551605224609375, "learning_rate": 0.0008251460796303846, "loss": 3.4389, "step": 20595 }, { "epoch": 1.3996466911265117, "grad_norm": 2.411487102508545, "learning_rate": 0.0008251036146215518, "loss": 3.5641, "step": 20600 }, { "epoch": 1.3999864111971736, "grad_norm": 1.9363917112350464, "learning_rate": 0.0008250611496127191, "loss": 3.7108, "step": 20605 }, { "epoch": 1.4003261312678352, "grad_norm": 2.0332870483398438, "learning_rate": 0.0008250186846038864, "loss": 3.6625, "step": 20610 }, { "epoch": 1.400665851338497, "grad_norm": 1.405763864517212, "learning_rate": 0.0008249762195950537, "loss": 3.6915, "step": 20615 }, { "epoch": 1.401005571409159, "grad_norm": 1.6673496961593628, "learning_rate": 0.000824933754586221, "loss": 3.4824, "step": 20620 }, { "epoch": 1.4013452914798206, "grad_norm": 2.19793963432312, "learning_rate": 0.0008248912895773882, "loss": 3.411, "step": 20625 }, { "epoch": 1.4016850115504824, "grad_norm": 1.2796305418014526, "learning_rate": 0.0008248488245685555, "loss": 3.516, "step": 20630 }, { "epoch": 1.402024731621144, "grad_norm": 1.965120792388916, "learning_rate": 0.0008248063595597228, "loss": 3.3671, "step": 20635 }, { "epoch": 1.402364451691806, "grad_norm": 1.7035589218139648, "learning_rate": 0.00082476389455089, "loss": 3.9111, "step": 20640 }, { "epoch": 1.4027041717624678, "grad_norm": 2.2678356170654297, "learning_rate": 0.0008247214295420574, "loss": 3.8655, "step": 20645 }, { "epoch": 1.4030438918331294, "grad_norm": 2.2734320163726807, "learning_rate": 0.0008246789645332247, "loss": 3.4971, "step": 20650 }, { "epoch": 1.4033836119037912, "grad_norm": 1.6919182538986206, "learning_rate": 0.0008246364995243919, "loss": 3.3202, "step": 20655 }, { "epoch": 1.403723331974453, "grad_norm": 1.8140507936477661, "learning_rate": 0.0008245940345155591, "loss": 3.5554, "step": 20660 }, { "epoch": 1.4040630520451147, "grad_norm": 1.8901423215866089, "learning_rate": 0.0008245515695067265, "loss": 3.7286, "step": 20665 }, { "epoch": 1.4044027721157766, "grad_norm": 1.7965500354766846, "learning_rate": 0.0008245091044978937, "loss": 3.5365, "step": 20670 }, { "epoch": 1.4047424921864384, "grad_norm": 1.7362486124038696, "learning_rate": 0.0008244666394890609, "loss": 3.3706, "step": 20675 }, { "epoch": 1.4050822122571, "grad_norm": 2.000021457672119, "learning_rate": 0.0008244241744802284, "loss": 3.6929, "step": 20680 }, { "epoch": 1.405421932327762, "grad_norm": 1.5991675853729248, "learning_rate": 0.0008243817094713956, "loss": 3.4991, "step": 20685 }, { "epoch": 1.4057616523984238, "grad_norm": 1.6136271953582764, "learning_rate": 0.0008243392444625629, "loss": 3.3243, "step": 20690 }, { "epoch": 1.4061013724690854, "grad_norm": 1.8487865924835205, "learning_rate": 0.0008242967794537302, "loss": 3.5285, "step": 20695 }, { "epoch": 1.4064410925397473, "grad_norm": 2.0552427768707275, "learning_rate": 0.0008242543144448974, "loss": 3.6701, "step": 20700 }, { "epoch": 1.406780812610409, "grad_norm": 1.8557356595993042, "learning_rate": 0.0008242118494360647, "loss": 3.5656, "step": 20705 }, { "epoch": 1.4071205326810707, "grad_norm": 1.8759557008743286, "learning_rate": 0.000824169384427232, "loss": 3.8317, "step": 20710 }, { "epoch": 1.4074602527517326, "grad_norm": 1.8882561922073364, "learning_rate": 0.0008241269194183993, "loss": 3.484, "step": 20715 }, { "epoch": 1.4077999728223944, "grad_norm": 1.3785020112991333, "learning_rate": 0.0008240844544095666, "loss": 3.2479, "step": 20720 }, { "epoch": 1.408139692893056, "grad_norm": 1.7187740802764893, "learning_rate": 0.0008240419894007338, "loss": 3.3856, "step": 20725 }, { "epoch": 1.408479412963718, "grad_norm": 2.0054328441619873, "learning_rate": 0.0008239995243919011, "loss": 3.3714, "step": 20730 }, { "epoch": 1.4088191330343798, "grad_norm": 1.8660703897476196, "learning_rate": 0.0008239570593830684, "loss": 3.6936, "step": 20735 }, { "epoch": 1.4091588531050414, "grad_norm": 2.397674083709717, "learning_rate": 0.0008239145943742356, "loss": 3.3704, "step": 20740 }, { "epoch": 1.4094985731757033, "grad_norm": 1.755327582359314, "learning_rate": 0.000823872129365403, "loss": 3.4534, "step": 20745 }, { "epoch": 1.4098382932463651, "grad_norm": 2.723674774169922, "learning_rate": 0.0008238296643565703, "loss": 3.6165, "step": 20750 }, { "epoch": 1.4101780133170267, "grad_norm": 1.9854230880737305, "learning_rate": 0.0008237871993477375, "loss": 3.4658, "step": 20755 }, { "epoch": 1.4105177333876886, "grad_norm": 1.7600204944610596, "learning_rate": 0.0008237447343389047, "loss": 3.4499, "step": 20760 }, { "epoch": 1.4108574534583505, "grad_norm": 2.0289671421051025, "learning_rate": 0.0008237022693300721, "loss": 3.6096, "step": 20765 }, { "epoch": 1.411197173529012, "grad_norm": 1.7544114589691162, "learning_rate": 0.0008236598043212393, "loss": 3.6159, "step": 20770 }, { "epoch": 1.411536893599674, "grad_norm": 2.4109668731689453, "learning_rate": 0.0008236173393124065, "loss": 3.4193, "step": 20775 }, { "epoch": 1.4118766136703356, "grad_norm": 1.7033661603927612, "learning_rate": 0.000823574874303574, "loss": 3.7515, "step": 20780 }, { "epoch": 1.4122163337409974, "grad_norm": 2.1516990661621094, "learning_rate": 0.0008235324092947412, "loss": 3.5915, "step": 20785 }, { "epoch": 1.4125560538116593, "grad_norm": 2.481679916381836, "learning_rate": 0.0008234899442859084, "loss": 3.5462, "step": 20790 }, { "epoch": 1.412895773882321, "grad_norm": 1.5674717426300049, "learning_rate": 0.0008234474792770758, "loss": 3.5896, "step": 20795 }, { "epoch": 1.4132354939529828, "grad_norm": 2.113678455352783, "learning_rate": 0.000823405014268243, "loss": 3.5655, "step": 20800 }, { "epoch": 1.4135752140236444, "grad_norm": 1.9706592559814453, "learning_rate": 0.0008233625492594102, "loss": 3.5035, "step": 20805 }, { "epoch": 1.4139149340943062, "grad_norm": 1.6701948642730713, "learning_rate": 0.0008233200842505775, "loss": 3.3204, "step": 20810 }, { "epoch": 1.414254654164968, "grad_norm": 2.1162352561950684, "learning_rate": 0.0008232776192417449, "loss": 3.7422, "step": 20815 }, { "epoch": 1.4145943742356297, "grad_norm": 2.142275333404541, "learning_rate": 0.0008232351542329121, "loss": 3.6526, "step": 20820 }, { "epoch": 1.4149340943062916, "grad_norm": 1.7807503938674927, "learning_rate": 0.0008231926892240794, "loss": 3.6323, "step": 20825 }, { "epoch": 1.4152738143769534, "grad_norm": 2.066946506500244, "learning_rate": 0.0008231502242152467, "loss": 3.6259, "step": 20830 }, { "epoch": 1.415613534447615, "grad_norm": 1.589900016784668, "learning_rate": 0.0008231077592064139, "loss": 3.4356, "step": 20835 }, { "epoch": 1.415953254518277, "grad_norm": 1.6818242073059082, "learning_rate": 0.0008230652941975812, "loss": 3.6201, "step": 20840 }, { "epoch": 1.4162929745889388, "grad_norm": 2.5348472595214844, "learning_rate": 0.0008230228291887484, "loss": 3.7754, "step": 20845 }, { "epoch": 1.4166326946596004, "grad_norm": 1.9312574863433838, "learning_rate": 0.0008229803641799158, "loss": 3.3813, "step": 20850 }, { "epoch": 1.4169724147302623, "grad_norm": 1.6732105016708374, "learning_rate": 0.0008229378991710831, "loss": 3.3638, "step": 20855 }, { "epoch": 1.417312134800924, "grad_norm": 1.8244004249572754, "learning_rate": 0.0008228954341622503, "loss": 3.4777, "step": 20860 }, { "epoch": 1.4176518548715857, "grad_norm": 2.246915102005005, "learning_rate": 0.0008228529691534176, "loss": 3.5261, "step": 20865 }, { "epoch": 1.4179915749422476, "grad_norm": 1.8550515174865723, "learning_rate": 0.0008228105041445849, "loss": 3.5688, "step": 20870 }, { "epoch": 1.4183312950129094, "grad_norm": 1.8187114000320435, "learning_rate": 0.0008227680391357521, "loss": 3.5599, "step": 20875 }, { "epoch": 1.418671015083571, "grad_norm": 1.9504708051681519, "learning_rate": 0.0008227255741269194, "loss": 3.6994, "step": 20880 }, { "epoch": 1.419010735154233, "grad_norm": 1.8152912855148315, "learning_rate": 0.0008226831091180868, "loss": 3.553, "step": 20885 }, { "epoch": 1.4193504552248948, "grad_norm": 1.9569236040115356, "learning_rate": 0.000822640644109254, "loss": 3.6208, "step": 20890 }, { "epoch": 1.4196901752955564, "grad_norm": 1.935279369354248, "learning_rate": 0.0008225981791004212, "loss": 3.7733, "step": 20895 }, { "epoch": 1.4200298953662183, "grad_norm": 1.7545326948165894, "learning_rate": 0.0008225557140915886, "loss": 3.5771, "step": 20900 }, { "epoch": 1.4203696154368801, "grad_norm": 2.3562846183776855, "learning_rate": 0.0008225132490827558, "loss": 3.3504, "step": 20905 }, { "epoch": 1.4207093355075417, "grad_norm": 2.330242395401001, "learning_rate": 0.000822470784073923, "loss": 3.832, "step": 20910 }, { "epoch": 1.4210490555782036, "grad_norm": 1.7336534261703491, "learning_rate": 0.0008224283190650904, "loss": 3.5296, "step": 20915 }, { "epoch": 1.4213887756488655, "grad_norm": 1.9733681678771973, "learning_rate": 0.0008223858540562577, "loss": 3.7599, "step": 20920 }, { "epoch": 1.421728495719527, "grad_norm": 2.3039121627807617, "learning_rate": 0.0008223433890474249, "loss": 3.5726, "step": 20925 }, { "epoch": 1.422068215790189, "grad_norm": 1.6997582912445068, "learning_rate": 0.0008223009240385923, "loss": 3.4864, "step": 20930 }, { "epoch": 1.4224079358608508, "grad_norm": 2.063715934753418, "learning_rate": 0.0008222584590297595, "loss": 3.5982, "step": 20935 }, { "epoch": 1.4227476559315124, "grad_norm": 1.5952693223953247, "learning_rate": 0.0008222159940209267, "loss": 3.2765, "step": 20940 }, { "epoch": 1.4230873760021743, "grad_norm": 1.7435662746429443, "learning_rate": 0.000822173529012094, "loss": 3.4795, "step": 20945 }, { "epoch": 1.423427096072836, "grad_norm": 2.556755781173706, "learning_rate": 0.0008221310640032613, "loss": 3.3931, "step": 20950 }, { "epoch": 1.4237668161434978, "grad_norm": 1.7818217277526855, "learning_rate": 0.0008220885989944286, "loss": 3.7793, "step": 20955 }, { "epoch": 1.4241065362141596, "grad_norm": 1.9612815380096436, "learning_rate": 0.0008220461339855959, "loss": 3.632, "step": 20960 }, { "epoch": 1.4244462562848212, "grad_norm": 1.6057367324829102, "learning_rate": 0.0008220036689767632, "loss": 3.4116, "step": 20965 }, { "epoch": 1.424785976355483, "grad_norm": 2.0878074169158936, "learning_rate": 0.0008219612039679304, "loss": 3.6005, "step": 20970 }, { "epoch": 1.4251256964261447, "grad_norm": 2.068525791168213, "learning_rate": 0.0008219187389590977, "loss": 3.5329, "step": 20975 }, { "epoch": 1.4254654164968066, "grad_norm": 1.7708622217178345, "learning_rate": 0.000821876273950265, "loss": 3.7183, "step": 20980 }, { "epoch": 1.4258051365674684, "grad_norm": 1.8746306896209717, "learning_rate": 0.0008218338089414322, "loss": 3.5, "step": 20985 }, { "epoch": 1.42614485663813, "grad_norm": 2.2102177143096924, "learning_rate": 0.0008217913439325996, "loss": 3.3767, "step": 20990 }, { "epoch": 1.426484576708792, "grad_norm": 1.6163400411605835, "learning_rate": 0.0008217488789237668, "loss": 3.2599, "step": 20995 }, { "epoch": 1.4268242967794538, "grad_norm": 1.7762906551361084, "learning_rate": 0.0008217064139149341, "loss": 3.4582, "step": 21000 }, { "epoch": 1.4271640168501154, "grad_norm": 1.9465874433517456, "learning_rate": 0.0008216639489061014, "loss": 3.7774, "step": 21005 }, { "epoch": 1.4275037369207773, "grad_norm": 2.1115899085998535, "learning_rate": 0.0008216214838972686, "loss": 3.6884, "step": 21010 }, { "epoch": 1.427843456991439, "grad_norm": 1.8787678480148315, "learning_rate": 0.0008215790188884359, "loss": 3.8131, "step": 21015 }, { "epoch": 1.4281831770621007, "grad_norm": 1.9283225536346436, "learning_rate": 0.0008215365538796032, "loss": 3.3845, "step": 21020 }, { "epoch": 1.4285228971327626, "grad_norm": 1.8761333227157593, "learning_rate": 0.0008214940888707705, "loss": 3.5896, "step": 21025 }, { "epoch": 1.4288626172034244, "grad_norm": 2.1877269744873047, "learning_rate": 0.0008214516238619379, "loss": 3.712, "step": 21030 }, { "epoch": 1.429202337274086, "grad_norm": 1.6467047929763794, "learning_rate": 0.0008214091588531051, "loss": 3.6636, "step": 21035 }, { "epoch": 1.429542057344748, "grad_norm": 1.5966676473617554, "learning_rate": 0.0008213666938442723, "loss": 3.7213, "step": 21040 }, { "epoch": 1.4298817774154098, "grad_norm": 1.437247633934021, "learning_rate": 0.0008213242288354397, "loss": 3.5579, "step": 21045 }, { "epoch": 1.4302214974860714, "grad_norm": 1.6508616209030151, "learning_rate": 0.0008212817638266069, "loss": 3.7262, "step": 21050 }, { "epoch": 1.4305612175567333, "grad_norm": 2.1411845684051514, "learning_rate": 0.0008212392988177741, "loss": 3.6938, "step": 21055 }, { "epoch": 1.4309009376273951, "grad_norm": 1.8890281915664673, "learning_rate": 0.0008211968338089415, "loss": 3.5585, "step": 21060 }, { "epoch": 1.4312406576980568, "grad_norm": 1.5512350797653198, "learning_rate": 0.0008211543688001088, "loss": 3.5912, "step": 21065 }, { "epoch": 1.4315803777687186, "grad_norm": 1.8866053819656372, "learning_rate": 0.000821111903791276, "loss": 3.4613, "step": 21070 }, { "epoch": 1.4319200978393805, "grad_norm": 1.7833759784698486, "learning_rate": 0.0008210694387824433, "loss": 3.427, "step": 21075 }, { "epoch": 1.432259817910042, "grad_norm": 2.040332317352295, "learning_rate": 0.0008210269737736106, "loss": 3.3882, "step": 21080 }, { "epoch": 1.432599537980704, "grad_norm": 2.3114938735961914, "learning_rate": 0.0008209845087647778, "loss": 3.7237, "step": 21085 }, { "epoch": 1.4329392580513658, "grad_norm": 1.9981578588485718, "learning_rate": 0.0008209420437559451, "loss": 3.5032, "step": 21090 }, { "epoch": 1.4332789781220274, "grad_norm": 1.812230110168457, "learning_rate": 0.0008208995787471125, "loss": 3.465, "step": 21095 }, { "epoch": 1.4336186981926893, "grad_norm": 2.1400647163391113, "learning_rate": 0.0008208571137382797, "loss": 3.151, "step": 21100 }, { "epoch": 1.4339584182633511, "grad_norm": 2.1303112506866455, "learning_rate": 0.000820814648729447, "loss": 3.4068, "step": 21105 }, { "epoch": 1.4342981383340128, "grad_norm": 1.724900722503662, "learning_rate": 0.0008207721837206142, "loss": 3.5972, "step": 21110 }, { "epoch": 1.4346378584046746, "grad_norm": 1.5438514947891235, "learning_rate": 0.0008207297187117815, "loss": 3.4441, "step": 21115 }, { "epoch": 1.4349775784753362, "grad_norm": 2.151691198348999, "learning_rate": 0.0008206872537029488, "loss": 3.6102, "step": 21120 }, { "epoch": 1.435317298545998, "grad_norm": 1.5222599506378174, "learning_rate": 0.000820644788694116, "loss": 3.5308, "step": 21125 }, { "epoch": 1.43565701861666, "grad_norm": 2.1043930053710938, "learning_rate": 0.0008206023236852834, "loss": 3.6736, "step": 21130 }, { "epoch": 1.4359967386873216, "grad_norm": 1.5238230228424072, "learning_rate": 0.0008205598586764507, "loss": 3.5868, "step": 21135 }, { "epoch": 1.4363364587579834, "grad_norm": 2.744475841522217, "learning_rate": 0.0008205173936676179, "loss": 3.5348, "step": 21140 }, { "epoch": 1.436676178828645, "grad_norm": 2.043384075164795, "learning_rate": 0.0008204749286587851, "loss": 3.6, "step": 21145 }, { "epoch": 1.437015898899307, "grad_norm": 2.0762953758239746, "learning_rate": 0.0008204324636499525, "loss": 3.5776, "step": 21150 }, { "epoch": 1.4373556189699688, "grad_norm": 1.7319883108139038, "learning_rate": 0.0008203899986411197, "loss": 3.4287, "step": 21155 }, { "epoch": 1.4376953390406304, "grad_norm": 2.099475860595703, "learning_rate": 0.0008203475336322869, "loss": 3.3506, "step": 21160 }, { "epoch": 1.4380350591112923, "grad_norm": 2.344135284423828, "learning_rate": 0.0008203050686234544, "loss": 3.6266, "step": 21165 }, { "epoch": 1.438374779181954, "grad_norm": 1.8422867059707642, "learning_rate": 0.0008202626036146216, "loss": 3.8851, "step": 21170 }, { "epoch": 1.4387144992526157, "grad_norm": 2.33756422996521, "learning_rate": 0.0008202201386057888, "loss": 3.6082, "step": 21175 }, { "epoch": 1.4390542193232776, "grad_norm": 1.701798677444458, "learning_rate": 0.0008201776735969562, "loss": 3.4345, "step": 21180 }, { "epoch": 1.4393939393939394, "grad_norm": 1.5943142175674438, "learning_rate": 0.0008201352085881234, "loss": 3.7504, "step": 21185 }, { "epoch": 1.439733659464601, "grad_norm": 2.17305064201355, "learning_rate": 0.0008200927435792906, "loss": 3.4942, "step": 21190 }, { "epoch": 1.440073379535263, "grad_norm": 1.881743311882019, "learning_rate": 0.0008200502785704579, "loss": 3.5852, "step": 21195 }, { "epoch": 1.4404130996059248, "grad_norm": 2.29557466506958, "learning_rate": 0.0008200078135616253, "loss": 3.491, "step": 21200 }, { "epoch": 1.4407528196765864, "grad_norm": 2.1081085205078125, "learning_rate": 0.0008199653485527925, "loss": 3.9252, "step": 21205 }, { "epoch": 1.4410925397472483, "grad_norm": 1.7397750616073608, "learning_rate": 0.0008199228835439598, "loss": 3.5807, "step": 21210 }, { "epoch": 1.4414322598179101, "grad_norm": 1.8507689237594604, "learning_rate": 0.0008198804185351271, "loss": 3.7233, "step": 21215 }, { "epoch": 1.4417719798885718, "grad_norm": 1.963744044303894, "learning_rate": 0.0008198379535262943, "loss": 3.5439, "step": 21220 }, { "epoch": 1.4421116999592336, "grad_norm": 2.838550090789795, "learning_rate": 0.0008197954885174616, "loss": 3.7826, "step": 21225 }, { "epoch": 1.4424514200298955, "grad_norm": 1.845935344696045, "learning_rate": 0.0008197530235086289, "loss": 3.5395, "step": 21230 }, { "epoch": 1.442791140100557, "grad_norm": 2.468733787536621, "learning_rate": 0.0008197105584997962, "loss": 3.602, "step": 21235 }, { "epoch": 1.443130860171219, "grad_norm": 1.688786506652832, "learning_rate": 0.0008196680934909635, "loss": 3.4503, "step": 21240 }, { "epoch": 1.4434705802418808, "grad_norm": 1.872418761253357, "learning_rate": 0.0008196256284821307, "loss": 3.6528, "step": 21245 }, { "epoch": 1.4438103003125424, "grad_norm": 1.8635536432266235, "learning_rate": 0.000819583163473298, "loss": 3.3485, "step": 21250 }, { "epoch": 1.4441500203832043, "grad_norm": 2.207932710647583, "learning_rate": 0.0008195406984644653, "loss": 3.6405, "step": 21255 }, { "epoch": 1.4444897404538661, "grad_norm": 1.993544578552246, "learning_rate": 0.0008194982334556325, "loss": 3.4109, "step": 21260 }, { "epoch": 1.4448294605245278, "grad_norm": 2.282886266708374, "learning_rate": 0.0008194557684467998, "loss": 3.3268, "step": 21265 }, { "epoch": 1.4451691805951896, "grad_norm": 1.6044907569885254, "learning_rate": 0.0008194133034379672, "loss": 3.5089, "step": 21270 }, { "epoch": 1.4455089006658515, "grad_norm": 3.2146170139312744, "learning_rate": 0.0008193708384291344, "loss": 3.3833, "step": 21275 }, { "epoch": 1.445848620736513, "grad_norm": 1.5499203205108643, "learning_rate": 0.0008193283734203017, "loss": 3.4371, "step": 21280 }, { "epoch": 1.446188340807175, "grad_norm": 1.9338873624801636, "learning_rate": 0.000819285908411469, "loss": 3.6319, "step": 21285 }, { "epoch": 1.4465280608778366, "grad_norm": 1.492163896560669, "learning_rate": 0.0008192434434026362, "loss": 3.8121, "step": 21290 }, { "epoch": 1.4468677809484984, "grad_norm": 2.311924457550049, "learning_rate": 0.0008192009783938034, "loss": 3.6342, "step": 21295 }, { "epoch": 1.4472075010191603, "grad_norm": 2.009265661239624, "learning_rate": 0.0008191585133849709, "loss": 3.6019, "step": 21300 }, { "epoch": 1.447547221089822, "grad_norm": 2.2912468910217285, "learning_rate": 0.0008191160483761381, "loss": 3.5598, "step": 21305 }, { "epoch": 1.4478869411604838, "grad_norm": 2.0142982006073, "learning_rate": 0.0008190735833673053, "loss": 3.5758, "step": 21310 }, { "epoch": 1.4482266612311454, "grad_norm": 2.4458324909210205, "learning_rate": 0.0008190311183584727, "loss": 3.5348, "step": 21315 }, { "epoch": 1.4485663813018073, "grad_norm": 1.917738914489746, "learning_rate": 0.0008189886533496399, "loss": 3.4825, "step": 21320 }, { "epoch": 1.4489061013724691, "grad_norm": 1.946356177330017, "learning_rate": 0.0008189461883408071, "loss": 3.6275, "step": 21325 }, { "epoch": 1.4492458214431307, "grad_norm": 2.1165192127227783, "learning_rate": 0.0008189037233319745, "loss": 3.5308, "step": 21330 }, { "epoch": 1.4495855415137926, "grad_norm": 1.7370351552963257, "learning_rate": 0.0008188612583231418, "loss": 3.7306, "step": 21335 }, { "epoch": 1.4499252615844545, "grad_norm": 2.1016592979431152, "learning_rate": 0.000818818793314309, "loss": 3.5028, "step": 21340 }, { "epoch": 1.450264981655116, "grad_norm": 1.7725964784622192, "learning_rate": 0.0008187763283054763, "loss": 3.7294, "step": 21345 }, { "epoch": 1.450604701725778, "grad_norm": 3.1294031143188477, "learning_rate": 0.0008187338632966436, "loss": 3.6378, "step": 21350 }, { "epoch": 1.4509444217964398, "grad_norm": 1.9446979761123657, "learning_rate": 0.0008186913982878108, "loss": 3.6937, "step": 21355 }, { "epoch": 1.4512841418671014, "grad_norm": 1.9382051229476929, "learning_rate": 0.0008186489332789781, "loss": 3.4766, "step": 21360 }, { "epoch": 1.4516238619377633, "grad_norm": 1.8010505437850952, "learning_rate": 0.0008186064682701454, "loss": 3.4407, "step": 21365 }, { "epoch": 1.4519635820084251, "grad_norm": 1.59963059425354, "learning_rate": 0.0008185640032613128, "loss": 3.8412, "step": 21370 }, { "epoch": 1.4523033020790868, "grad_norm": 1.7291884422302246, "learning_rate": 0.00081852153825248, "loss": 3.4838, "step": 21375 }, { "epoch": 1.4526430221497486, "grad_norm": 2.770934581756592, "learning_rate": 0.0008184790732436473, "loss": 3.3274, "step": 21380 }, { "epoch": 1.4529827422204105, "grad_norm": 1.9835455417633057, "learning_rate": 0.0008184366082348146, "loss": 3.7311, "step": 21385 }, { "epoch": 1.453322462291072, "grad_norm": 2.081803321838379, "learning_rate": 0.0008183941432259818, "loss": 3.6341, "step": 21390 }, { "epoch": 1.453662182361734, "grad_norm": 2.384094476699829, "learning_rate": 0.000818351678217149, "loss": 3.3111, "step": 21395 }, { "epoch": 1.4540019024323958, "grad_norm": 2.0259015560150146, "learning_rate": 0.0008183092132083164, "loss": 3.3839, "step": 21400 }, { "epoch": 1.4543416225030574, "grad_norm": 1.4861462116241455, "learning_rate": 0.0008182667481994837, "loss": 3.5632, "step": 21405 }, { "epoch": 1.4546813425737193, "grad_norm": 2.3625543117523193, "learning_rate": 0.0008182242831906509, "loss": 3.6652, "step": 21410 }, { "epoch": 1.4550210626443811, "grad_norm": 2.1594111919403076, "learning_rate": 0.0008181818181818183, "loss": 3.6399, "step": 21415 }, { "epoch": 1.4553607827150428, "grad_norm": 2.1392524242401123, "learning_rate": 0.0008181393531729855, "loss": 3.4178, "step": 21420 }, { "epoch": 1.4557005027857046, "grad_norm": 1.6064695119857788, "learning_rate": 0.0008180968881641527, "loss": 3.4982, "step": 21425 }, { "epoch": 1.4560402228563665, "grad_norm": 2.287001848220825, "learning_rate": 0.00081805442315532, "loss": 3.5669, "step": 21430 }, { "epoch": 1.456379942927028, "grad_norm": 1.7445263862609863, "learning_rate": 0.0008180119581464873, "loss": 3.4412, "step": 21435 }, { "epoch": 1.45671966299769, "grad_norm": 2.171492576599121, "learning_rate": 0.0008179694931376546, "loss": 3.6229, "step": 21440 }, { "epoch": 1.4570593830683518, "grad_norm": 1.9988853931427002, "learning_rate": 0.000817927028128822, "loss": 3.5219, "step": 21445 }, { "epoch": 1.4573991031390134, "grad_norm": 1.710379958152771, "learning_rate": 0.0008178845631199892, "loss": 3.8183, "step": 21450 }, { "epoch": 1.4577388232096753, "grad_norm": 1.7215532064437866, "learning_rate": 0.0008178420981111564, "loss": 3.5773, "step": 21455 }, { "epoch": 1.458078543280337, "grad_norm": 2.0131676197052, "learning_rate": 0.0008177996331023237, "loss": 3.351, "step": 21460 }, { "epoch": 1.4584182633509988, "grad_norm": 2.186452627182007, "learning_rate": 0.000817757168093491, "loss": 3.6947, "step": 21465 }, { "epoch": 1.4587579834216606, "grad_norm": 1.4303077459335327, "learning_rate": 0.0008177147030846582, "loss": 3.388, "step": 21470 }, { "epoch": 1.4590977034923223, "grad_norm": 2.0050175189971924, "learning_rate": 0.0008176722380758256, "loss": 3.462, "step": 21475 }, { "epoch": 1.4594374235629841, "grad_norm": 1.96678626537323, "learning_rate": 0.0008176297730669929, "loss": 3.729, "step": 21480 }, { "epoch": 1.4597771436336457, "grad_norm": 2.2215347290039062, "learning_rate": 0.0008175873080581601, "loss": 3.5722, "step": 21485 }, { "epoch": 1.4601168637043076, "grad_norm": 2.563248872756958, "learning_rate": 0.0008175448430493274, "loss": 3.9725, "step": 21490 }, { "epoch": 1.4604565837749695, "grad_norm": 1.8783595561981201, "learning_rate": 0.0008175023780404946, "loss": 3.7643, "step": 21495 }, { "epoch": 1.460796303845631, "grad_norm": 1.804848313331604, "learning_rate": 0.0008174599130316619, "loss": 3.33, "step": 21500 }, { "epoch": 1.461136023916293, "grad_norm": 2.1057116985321045, "learning_rate": 0.0008174174480228292, "loss": 3.5203, "step": 21505 }, { "epoch": 1.4614757439869548, "grad_norm": 1.7116907835006714, "learning_rate": 0.0008173749830139965, "loss": 3.7408, "step": 21510 }, { "epoch": 1.4618154640576164, "grad_norm": 2.234093189239502, "learning_rate": 0.0008173325180051638, "loss": 3.6633, "step": 21515 }, { "epoch": 1.4621551841282783, "grad_norm": 2.2290616035461426, "learning_rate": 0.0008172900529963311, "loss": 3.2924, "step": 21520 }, { "epoch": 1.4624949041989401, "grad_norm": 1.929660439491272, "learning_rate": 0.0008172475879874983, "loss": 3.7924, "step": 21525 }, { "epoch": 1.4628346242696018, "grad_norm": 1.9240871667861938, "learning_rate": 0.0008172051229786655, "loss": 3.4985, "step": 21530 }, { "epoch": 1.4631743443402636, "grad_norm": 2.152189254760742, "learning_rate": 0.0008171626579698329, "loss": 3.6686, "step": 21535 }, { "epoch": 1.4635140644109255, "grad_norm": 2.5755703449249268, "learning_rate": 0.0008171201929610001, "loss": 3.5757, "step": 21540 }, { "epoch": 1.463853784481587, "grad_norm": 1.8248814344406128, "learning_rate": 0.0008170777279521674, "loss": 3.3875, "step": 21545 }, { "epoch": 1.464193504552249, "grad_norm": 1.9473364353179932, "learning_rate": 0.0008170352629433348, "loss": 3.7034, "step": 21550 }, { "epoch": 1.4645332246229108, "grad_norm": 2.3959133625030518, "learning_rate": 0.000816992797934502, "loss": 3.52, "step": 21555 }, { "epoch": 1.4648729446935724, "grad_norm": 1.686409831047058, "learning_rate": 0.0008169503329256692, "loss": 3.4813, "step": 21560 }, { "epoch": 1.4652126647642343, "grad_norm": 1.5150794982910156, "learning_rate": 0.0008169078679168366, "loss": 3.754, "step": 21565 }, { "epoch": 1.4655523848348961, "grad_norm": 1.9634722471237183, "learning_rate": 0.0008168654029080038, "loss": 3.7012, "step": 21570 }, { "epoch": 1.4658921049055578, "grad_norm": 1.7065541744232178, "learning_rate": 0.000816822937899171, "loss": 3.6991, "step": 21575 }, { "epoch": 1.4662318249762196, "grad_norm": 2.0665438175201416, "learning_rate": 0.0008167804728903385, "loss": 3.8702, "step": 21580 }, { "epoch": 1.4665715450468815, "grad_norm": 1.8327040672302246, "learning_rate": 0.0008167380078815057, "loss": 3.4365, "step": 21585 }, { "epoch": 1.466911265117543, "grad_norm": 1.5339112281799316, "learning_rate": 0.0008166955428726729, "loss": 3.4267, "step": 21590 }, { "epoch": 1.467250985188205, "grad_norm": 2.014848470687866, "learning_rate": 0.0008166530778638402, "loss": 3.6032, "step": 21595 }, { "epoch": 1.4675907052588668, "grad_norm": 1.981432318687439, "learning_rate": 0.0008166106128550075, "loss": 3.6432, "step": 21600 }, { "epoch": 1.4679304253295284, "grad_norm": 1.7118514776229858, "learning_rate": 0.0008165681478461747, "loss": 3.6034, "step": 21605 }, { "epoch": 1.4682701454001903, "grad_norm": 2.0050599575042725, "learning_rate": 0.000816525682837342, "loss": 3.6235, "step": 21610 }, { "epoch": 1.4686098654708521, "grad_norm": 1.7234011888504028, "learning_rate": 0.0008164832178285094, "loss": 3.406, "step": 21615 }, { "epoch": 1.4689495855415138, "grad_norm": 2.5142982006073, "learning_rate": 0.0008164407528196766, "loss": 3.5457, "step": 21620 }, { "epoch": 1.4692893056121756, "grad_norm": 2.018045663833618, "learning_rate": 0.0008163982878108439, "loss": 3.2823, "step": 21625 }, { "epoch": 1.4696290256828373, "grad_norm": 1.857593059539795, "learning_rate": 0.0008163558228020111, "loss": 3.2695, "step": 21630 }, { "epoch": 1.4699687457534991, "grad_norm": 2.3206918239593506, "learning_rate": 0.0008163133577931784, "loss": 3.6665, "step": 21635 }, { "epoch": 1.470308465824161, "grad_norm": 2.165635108947754, "learning_rate": 0.0008162708927843457, "loss": 3.4056, "step": 21640 }, { "epoch": 1.4706481858948226, "grad_norm": 1.7854334115982056, "learning_rate": 0.0008162284277755129, "loss": 3.4587, "step": 21645 }, { "epoch": 1.4709879059654845, "grad_norm": 2.094360828399658, "learning_rate": 0.0008161859627666803, "loss": 3.5094, "step": 21650 }, { "epoch": 1.471327626036146, "grad_norm": 2.116670608520508, "learning_rate": 0.0008161434977578476, "loss": 3.5061, "step": 21655 }, { "epoch": 1.471667346106808, "grad_norm": 1.7774393558502197, "learning_rate": 0.0008161010327490148, "loss": 3.3643, "step": 21660 }, { "epoch": 1.4720070661774698, "grad_norm": 2.3391337394714355, "learning_rate": 0.0008160585677401821, "loss": 3.665, "step": 21665 }, { "epoch": 1.4723467862481314, "grad_norm": 1.9790852069854736, "learning_rate": 0.0008160161027313494, "loss": 3.8241, "step": 21670 }, { "epoch": 1.4726865063187933, "grad_norm": NaN, "learning_rate": 0.0008159821307242832, "loss": 3.5523, "step": 21675 }, { "epoch": 1.4730262263894551, "grad_norm": 1.9034414291381836, "learning_rate": 0.0008159396657154505, "loss": 3.5342, "step": 21680 }, { "epoch": 1.4733659464601168, "grad_norm": 1.8535746335983276, "learning_rate": 0.0008158972007066178, "loss": 3.5238, "step": 21685 }, { "epoch": 1.4737056665307786, "grad_norm": 1.7122397422790527, "learning_rate": 0.000815854735697785, "loss": 3.5703, "step": 21690 }, { "epoch": 1.4740453866014405, "grad_norm": 2.0800673961639404, "learning_rate": 0.0008158122706889523, "loss": 3.824, "step": 21695 }, { "epoch": 1.474385106672102, "grad_norm": 1.7134912014007568, "learning_rate": 0.0008157698056801196, "loss": 3.4075, "step": 21700 }, { "epoch": 1.474724826742764, "grad_norm": 1.6536178588867188, "learning_rate": 0.0008157273406712869, "loss": 3.5053, "step": 21705 }, { "epoch": 1.4750645468134258, "grad_norm": 2.1085917949676514, "learning_rate": 0.0008156848756624542, "loss": 3.5383, "step": 21710 }, { "epoch": 1.4754042668840874, "grad_norm": 1.595596432685852, "learning_rate": 0.0008156424106536215, "loss": 3.6378, "step": 21715 }, { "epoch": 1.4757439869547493, "grad_norm": 2.044358253479004, "learning_rate": 0.0008155999456447887, "loss": 3.5483, "step": 21720 }, { "epoch": 1.4760837070254111, "grad_norm": 2.0392305850982666, "learning_rate": 0.000815557480635956, "loss": 3.6492, "step": 21725 }, { "epoch": 1.4764234270960728, "grad_norm": 1.725498914718628, "learning_rate": 0.0008155150156271232, "loss": 3.5083, "step": 21730 }, { "epoch": 1.4767631471667346, "grad_norm": 1.691824197769165, "learning_rate": 0.0008154725506182905, "loss": 3.525, "step": 21735 }, { "epoch": 1.4771028672373965, "grad_norm": 2.14479398727417, "learning_rate": 0.0008154300856094579, "loss": 3.4875, "step": 21740 }, { "epoch": 1.477442587308058, "grad_norm": 2.752737522125244, "learning_rate": 0.0008153876206006251, "loss": 3.3346, "step": 21745 }, { "epoch": 1.47778230737872, "grad_norm": 1.7548120021820068, "learning_rate": 0.0008153451555917924, "loss": 3.446, "step": 21750 }, { "epoch": 1.4781220274493818, "grad_norm": 1.4530019760131836, "learning_rate": 0.0008153026905829597, "loss": 3.5656, "step": 21755 }, { "epoch": 1.4784617475200434, "grad_norm": 1.5607455968856812, "learning_rate": 0.0008152602255741269, "loss": 3.5797, "step": 21760 }, { "epoch": 1.4788014675907053, "grad_norm": 2.064924478530884, "learning_rate": 0.0008152177605652941, "loss": 3.833, "step": 21765 }, { "epoch": 1.4791411876613672, "grad_norm": 1.993611216545105, "learning_rate": 0.0008151752955564615, "loss": 3.4969, "step": 21770 }, { "epoch": 1.4794809077320288, "grad_norm": 1.9170632362365723, "learning_rate": 0.0008151328305476288, "loss": 3.5621, "step": 21775 }, { "epoch": 1.4798206278026906, "grad_norm": 1.7811414003372192, "learning_rate": 0.000815090365538796, "loss": 3.3728, "step": 21780 }, { "epoch": 1.4801603478733525, "grad_norm": 1.5098391771316528, "learning_rate": 0.0008150479005299634, "loss": 3.774, "step": 21785 }, { "epoch": 1.4805000679440141, "grad_norm": 1.9801771640777588, "learning_rate": 0.0008150054355211306, "loss": 3.6546, "step": 21790 }, { "epoch": 1.480839788014676, "grad_norm": 2.0720131397247314, "learning_rate": 0.0008149629705122978, "loss": 3.6997, "step": 21795 }, { "epoch": 1.4811795080853376, "grad_norm": 2.2321603298187256, "learning_rate": 0.0008149205055034652, "loss": 3.6182, "step": 21800 }, { "epoch": 1.4815192281559995, "grad_norm": 1.373401165008545, "learning_rate": 0.0008148780404946324, "loss": 3.7851, "step": 21805 }, { "epoch": 1.4818589482266613, "grad_norm": 1.4113408327102661, "learning_rate": 0.0008148355754857997, "loss": 3.5979, "step": 21810 }, { "epoch": 1.482198668297323, "grad_norm": 2.310774803161621, "learning_rate": 0.0008147931104769671, "loss": 3.758, "step": 21815 }, { "epoch": 1.4825383883679848, "grad_norm": 1.6175768375396729, "learning_rate": 0.0008147506454681343, "loss": 3.3196, "step": 21820 }, { "epoch": 1.4828781084386464, "grad_norm": 1.6821767091751099, "learning_rate": 0.0008147081804593015, "loss": 3.9147, "step": 21825 }, { "epoch": 1.4832178285093083, "grad_norm": 1.7885135412216187, "learning_rate": 0.0008146657154504688, "loss": 3.5506, "step": 21830 }, { "epoch": 1.4835575485799701, "grad_norm": 2.344935417175293, "learning_rate": 0.0008146232504416361, "loss": 3.7582, "step": 21835 }, { "epoch": 1.4838972686506318, "grad_norm": 2.052231550216675, "learning_rate": 0.0008145807854328033, "loss": 3.5354, "step": 21840 }, { "epoch": 1.4842369887212936, "grad_norm": 1.8413223028182983, "learning_rate": 0.0008145383204239707, "loss": 3.5697, "step": 21845 }, { "epoch": 1.4845767087919555, "grad_norm": 1.9265248775482178, "learning_rate": 0.000814495855415138, "loss": 3.5268, "step": 21850 }, { "epoch": 1.484916428862617, "grad_norm": 2.5093252658843994, "learning_rate": 0.0008144533904063052, "loss": 3.7859, "step": 21855 }, { "epoch": 1.485256148933279, "grad_norm": 2.0984692573547363, "learning_rate": 0.0008144109253974725, "loss": 3.5797, "step": 21860 }, { "epoch": 1.4855958690039408, "grad_norm": 1.839085340499878, "learning_rate": 0.0008143684603886397, "loss": 3.7147, "step": 21865 }, { "epoch": 1.4859355890746024, "grad_norm": 2.1969246864318848, "learning_rate": 0.000814325995379807, "loss": 3.4581, "step": 21870 }, { "epoch": 1.4862753091452643, "grad_norm": 1.814962387084961, "learning_rate": 0.0008142835303709743, "loss": 3.4773, "step": 21875 }, { "epoch": 1.4866150292159261, "grad_norm": 1.9843443632125854, "learning_rate": 0.0008142410653621416, "loss": 3.613, "step": 21880 }, { "epoch": 1.4869547492865878, "grad_norm": 1.4679371118545532, "learning_rate": 0.0008141986003533089, "loss": 3.2548, "step": 21885 }, { "epoch": 1.4872944693572496, "grad_norm": 2.0806329250335693, "learning_rate": 0.0008141561353444762, "loss": 3.4406, "step": 21890 }, { "epoch": 1.4876341894279115, "grad_norm": 1.9348266124725342, "learning_rate": 0.0008141136703356434, "loss": 3.588, "step": 21895 }, { "epoch": 1.487973909498573, "grad_norm": 2.2532522678375244, "learning_rate": 0.0008140712053268107, "loss": 3.4911, "step": 21900 }, { "epoch": 1.488313629569235, "grad_norm": 2.999166250228882, "learning_rate": 0.000814028740317978, "loss": 3.674, "step": 21905 }, { "epoch": 1.4886533496398968, "grad_norm": 1.7550327777862549, "learning_rate": 0.0008139862753091452, "loss": 3.2666, "step": 21910 }, { "epoch": 1.4889930697105584, "grad_norm": 2.324232816696167, "learning_rate": 0.0008139438103003127, "loss": 3.4656, "step": 21915 }, { "epoch": 1.4893327897812203, "grad_norm": 2.321166515350342, "learning_rate": 0.0008139013452914799, "loss": 3.4271, "step": 21920 }, { "epoch": 1.4896725098518822, "grad_norm": 2.2608895301818848, "learning_rate": 0.0008138588802826471, "loss": 3.3646, "step": 21925 }, { "epoch": 1.4900122299225438, "grad_norm": 1.9438616037368774, "learning_rate": 0.0008138164152738144, "loss": 3.6873, "step": 21930 }, { "epoch": 1.4903519499932056, "grad_norm": 1.6181762218475342, "learning_rate": 0.0008137739502649817, "loss": 3.5325, "step": 21935 }, { "epoch": 1.4906916700638675, "grad_norm": 1.7751147747039795, "learning_rate": 0.0008137314852561489, "loss": 3.7, "step": 21940 }, { "epoch": 1.4910313901345291, "grad_norm": 2.1270577907562256, "learning_rate": 0.0008136890202473162, "loss": 3.593, "step": 21945 }, { "epoch": 1.491371110205191, "grad_norm": 2.1331870555877686, "learning_rate": 0.0008136465552384836, "loss": 3.6773, "step": 21950 }, { "epoch": 1.4917108302758528, "grad_norm": 1.7900360822677612, "learning_rate": 0.0008136040902296508, "loss": 3.5067, "step": 21955 }, { "epoch": 1.4920505503465145, "grad_norm": 1.6077015399932861, "learning_rate": 0.0008135616252208181, "loss": 3.5275, "step": 21960 }, { "epoch": 1.4923902704171763, "grad_norm": 2.020085573196411, "learning_rate": 0.0008135191602119853, "loss": 3.4159, "step": 21965 }, { "epoch": 1.492729990487838, "grad_norm": 2.116680860519409, "learning_rate": 0.0008134766952031526, "loss": 3.4018, "step": 21970 }, { "epoch": 1.4930697105584998, "grad_norm": 2.1681416034698486, "learning_rate": 0.0008134342301943199, "loss": 3.6733, "step": 21975 }, { "epoch": 1.4934094306291616, "grad_norm": 1.7474652528762817, "learning_rate": 0.0008133917651854871, "loss": 3.617, "step": 21980 }, { "epoch": 1.4937491506998233, "grad_norm": 2.538402557373047, "learning_rate": 0.0008133493001766545, "loss": 3.3582, "step": 21985 }, { "epoch": 1.4940888707704851, "grad_norm": 1.4879124164581299, "learning_rate": 0.0008133068351678218, "loss": 3.4833, "step": 21990 }, { "epoch": 1.4944285908411468, "grad_norm": 2.0278806686401367, "learning_rate": 0.000813264370158989, "loss": 3.5507, "step": 21995 }, { "epoch": 1.4947683109118086, "grad_norm": 1.5924593210220337, "learning_rate": 0.0008132219051501563, "loss": 3.6826, "step": 22000 }, { "epoch": 1.4951080309824705, "grad_norm": 2.263540029525757, "learning_rate": 0.0008131794401413236, "loss": 3.7531, "step": 22005 }, { "epoch": 1.495447751053132, "grad_norm": 1.968785047531128, "learning_rate": 0.0008131369751324908, "loss": 3.441, "step": 22010 }, { "epoch": 1.495787471123794, "grad_norm": 2.101916551589966, "learning_rate": 0.000813094510123658, "loss": 3.6159, "step": 22015 }, { "epoch": 1.4961271911944558, "grad_norm": 1.7991074323654175, "learning_rate": 0.0008130520451148255, "loss": 3.6162, "step": 22020 }, { "epoch": 1.4964669112651174, "grad_norm": 1.6815059185028076, "learning_rate": 0.0008130095801059927, "loss": 3.4129, "step": 22025 }, { "epoch": 1.4968066313357793, "grad_norm": 1.8529598712921143, "learning_rate": 0.0008129671150971599, "loss": 3.3379, "step": 22030 }, { "epoch": 1.4971463514064411, "grad_norm": 1.8720238208770752, "learning_rate": 0.0008129246500883273, "loss": 3.7303, "step": 22035 }, { "epoch": 1.4974860714771028, "grad_norm": 1.812760591506958, "learning_rate": 0.0008128821850794945, "loss": 3.4976, "step": 22040 }, { "epoch": 1.4978257915477646, "grad_norm": 1.760554552078247, "learning_rate": 0.0008128397200706617, "loss": 3.5071, "step": 22045 }, { "epoch": 1.4981655116184265, "grad_norm": 1.9054664373397827, "learning_rate": 0.0008127972550618292, "loss": 3.6065, "step": 22050 }, { "epoch": 1.4985052316890881, "grad_norm": 1.901766061782837, "learning_rate": 0.0008127547900529964, "loss": 3.4121, "step": 22055 }, { "epoch": 1.49884495175975, "grad_norm": 1.6979860067367554, "learning_rate": 0.0008127123250441636, "loss": 3.6853, "step": 22060 }, { "epoch": 1.4991846718304118, "grad_norm": 1.6335610151290894, "learning_rate": 0.000812669860035331, "loss": 3.8912, "step": 22065 }, { "epoch": 1.4995243919010735, "grad_norm": 1.7216668128967285, "learning_rate": 0.0008126273950264982, "loss": 3.4179, "step": 22070 }, { "epoch": 1.4998641119717353, "grad_norm": 1.8736950159072876, "learning_rate": 0.0008125849300176654, "loss": 3.6372, "step": 22075 }, { "epoch": 1.5002038320423972, "grad_norm": 1.8366526365280151, "learning_rate": 0.0008125424650088327, "loss": 3.6458, "step": 22080 }, { "epoch": 1.5005435521130588, "grad_norm": 1.7531993389129639, "learning_rate": 0.0008125000000000001, "loss": 3.3699, "step": 22085 }, { "epoch": 1.5008832721837206, "grad_norm": 2.1831488609313965, "learning_rate": 0.0008124575349911673, "loss": 3.4047, "step": 22090 }, { "epoch": 1.5012229922543825, "grad_norm": 2.415583848953247, "learning_rate": 0.0008124150699823346, "loss": 3.5614, "step": 22095 }, { "epoch": 1.5015627123250441, "grad_norm": 2.006632089614868, "learning_rate": 0.0008123726049735019, "loss": 3.3985, "step": 22100 }, { "epoch": 1.501902432395706, "grad_norm": 1.7940641641616821, "learning_rate": 0.0008123301399646691, "loss": 3.5808, "step": 22105 }, { "epoch": 1.5022421524663678, "grad_norm": 1.7732900381088257, "learning_rate": 0.0008122876749558364, "loss": 3.6895, "step": 22110 }, { "epoch": 1.5025818725370295, "grad_norm": 1.9048693180084229, "learning_rate": 0.0008122452099470036, "loss": 3.5715, "step": 22115 }, { "epoch": 1.5029215926076913, "grad_norm": 1.993883728981018, "learning_rate": 0.0008122027449381709, "loss": 3.4168, "step": 22120 }, { "epoch": 1.5032613126783532, "grad_norm": 1.7664330005645752, "learning_rate": 0.0008121602799293383, "loss": 3.5397, "step": 22125 }, { "epoch": 1.5036010327490148, "grad_norm": 1.7980175018310547, "learning_rate": 0.0008121178149205055, "loss": 3.4731, "step": 22130 }, { "epoch": 1.5039407528196764, "grad_norm": 3.057271718978882, "learning_rate": 0.0008120753499116728, "loss": 3.5531, "step": 22135 }, { "epoch": 1.5042804728903385, "grad_norm": 2.089822769165039, "learning_rate": 0.0008120328849028401, "loss": 3.5298, "step": 22140 }, { "epoch": 1.5046201929610001, "grad_norm": 1.8611682653427124, "learning_rate": 0.0008119904198940073, "loss": 3.6393, "step": 22145 }, { "epoch": 1.5049599130316618, "grad_norm": 2.425762414932251, "learning_rate": 0.0008119479548851745, "loss": 3.6859, "step": 22150 }, { "epoch": 1.5052996331023238, "grad_norm": 1.905336618423462, "learning_rate": 0.000811905489876342, "loss": 3.5409, "step": 22155 }, { "epoch": 1.5056393531729855, "grad_norm": 2.061791181564331, "learning_rate": 0.0008118630248675092, "loss": 3.676, "step": 22160 }, { "epoch": 1.505979073243647, "grad_norm": 2.249695062637329, "learning_rate": 0.0008118205598586764, "loss": 3.6697, "step": 22165 }, { "epoch": 1.506318793314309, "grad_norm": 2.1144065856933594, "learning_rate": 0.0008117780948498438, "loss": 3.4758, "step": 22170 }, { "epoch": 1.5066585133849708, "grad_norm": 2.8637726306915283, "learning_rate": 0.000811735629841011, "loss": 3.4202, "step": 22175 }, { "epoch": 1.5069982334556324, "grad_norm": 1.8701188564300537, "learning_rate": 0.0008116931648321782, "loss": 3.524, "step": 22180 }, { "epoch": 1.5073379535262943, "grad_norm": 1.9423328638076782, "learning_rate": 0.0008116506998233456, "loss": 3.7584, "step": 22185 }, { "epoch": 1.5076776735969561, "grad_norm": 2.2046256065368652, "learning_rate": 0.0008116082348145129, "loss": 3.285, "step": 22190 }, { "epoch": 1.5080173936676178, "grad_norm": 1.9600319862365723, "learning_rate": 0.0008115657698056801, "loss": 3.6092, "step": 22195 }, { "epoch": 1.5083571137382796, "grad_norm": 1.5413305759429932, "learning_rate": 0.0008115233047968475, "loss": 3.557, "step": 22200 }, { "epoch": 1.5086968338089415, "grad_norm": 2.263040065765381, "learning_rate": 0.0008114808397880147, "loss": 3.7583, "step": 22205 }, { "epoch": 1.5090365538796031, "grad_norm": 2.123072862625122, "learning_rate": 0.0008114383747791819, "loss": 3.4726, "step": 22210 }, { "epoch": 1.509376273950265, "grad_norm": 1.5288366079330444, "learning_rate": 0.0008113959097703492, "loss": 3.6606, "step": 22215 }, { "epoch": 1.5097159940209268, "grad_norm": 2.0453927516937256, "learning_rate": 0.0008113534447615165, "loss": 3.415, "step": 22220 }, { "epoch": 1.5100557140915885, "grad_norm": 1.9949939250946045, "learning_rate": 0.0008113109797526838, "loss": 3.5155, "step": 22225 }, { "epoch": 1.5103954341622503, "grad_norm": 2.2232770919799805, "learning_rate": 0.0008112685147438511, "loss": 3.4322, "step": 22230 }, { "epoch": 1.5107351542329122, "grad_norm": 2.1504604816436768, "learning_rate": 0.0008112260497350184, "loss": 3.4224, "step": 22235 }, { "epoch": 1.5110748743035738, "grad_norm": 1.7511768341064453, "learning_rate": 0.0008111835847261856, "loss": 3.5954, "step": 22240 }, { "epoch": 1.5114145943742356, "grad_norm": 1.6291862726211548, "learning_rate": 0.0008111411197173529, "loss": 3.3583, "step": 22245 }, { "epoch": 1.5117543144448975, "grad_norm": 1.8731166124343872, "learning_rate": 0.0008110986547085202, "loss": 3.6365, "step": 22250 }, { "epoch": 1.5120940345155591, "grad_norm": 2.010645627975464, "learning_rate": 0.0008110561896996874, "loss": 3.6996, "step": 22255 }, { "epoch": 1.512433754586221, "grad_norm": 1.739433765411377, "learning_rate": 0.0008110137246908548, "loss": 3.3316, "step": 22260 }, { "epoch": 1.5127734746568828, "grad_norm": 1.6362615823745728, "learning_rate": 0.000810971259682022, "loss": 3.499, "step": 22265 }, { "epoch": 1.5131131947275445, "grad_norm": 2.142382860183716, "learning_rate": 0.0008109287946731894, "loss": 3.4739, "step": 22270 }, { "epoch": 1.5134529147982063, "grad_norm": 1.8620973825454712, "learning_rate": 0.0008108863296643566, "loss": 3.3957, "step": 22275 }, { "epoch": 1.5137926348688682, "grad_norm": 2.80727219581604, "learning_rate": 0.0008108438646555238, "loss": 3.4349, "step": 22280 }, { "epoch": 1.5141323549395298, "grad_norm": 2.0356650352478027, "learning_rate": 0.0008108013996466912, "loss": 3.5902, "step": 22285 }, { "epoch": 1.5144720750101917, "grad_norm": 1.906355381011963, "learning_rate": 0.0008107589346378584, "loss": 3.6439, "step": 22290 }, { "epoch": 1.5148117950808535, "grad_norm": 2.376046657562256, "learning_rate": 0.0008107164696290257, "loss": 3.4925, "step": 22295 }, { "epoch": 1.5151515151515151, "grad_norm": 1.9598703384399414, "learning_rate": 0.0008106740046201931, "loss": 3.6682, "step": 22300 }, { "epoch": 1.5154912352221768, "grad_norm": 1.7499949932098389, "learning_rate": 0.0008106315396113603, "loss": 3.7712, "step": 22305 }, { "epoch": 1.5158309552928388, "grad_norm": 2.050539970397949, "learning_rate": 0.0008105890746025275, "loss": 3.5035, "step": 22310 }, { "epoch": 1.5161706753635005, "grad_norm": 1.8813397884368896, "learning_rate": 0.0008105466095936948, "loss": 3.6589, "step": 22315 }, { "epoch": 1.516510395434162, "grad_norm": 1.7713863849639893, "learning_rate": 0.0008105041445848621, "loss": 3.6044, "step": 22320 }, { "epoch": 1.5168501155048242, "grad_norm": 2.529733419418335, "learning_rate": 0.0008104616795760293, "loss": 3.4861, "step": 22325 }, { "epoch": 1.5171898355754858, "grad_norm": 2.244767665863037, "learning_rate": 0.0008104192145671967, "loss": 3.5657, "step": 22330 }, { "epoch": 1.5175295556461474, "grad_norm": 1.5805888175964355, "learning_rate": 0.000810376749558364, "loss": 3.5195, "step": 22335 }, { "epoch": 1.5178692757168093, "grad_norm": 2.0875790119171143, "learning_rate": 0.0008103342845495312, "loss": 3.344, "step": 22340 }, { "epoch": 1.5182089957874711, "grad_norm": 1.8490417003631592, "learning_rate": 0.0008102918195406985, "loss": 3.4107, "step": 22345 }, { "epoch": 1.5185487158581328, "grad_norm": 1.7992706298828125, "learning_rate": 0.0008102493545318658, "loss": 3.4698, "step": 22350 }, { "epoch": 1.5188884359287946, "grad_norm": 1.91929292678833, "learning_rate": 0.000810206889523033, "loss": 3.3806, "step": 22355 }, { "epoch": 1.5192281559994565, "grad_norm": 1.929587721824646, "learning_rate": 0.0008101644245142003, "loss": 3.6831, "step": 22360 }, { "epoch": 1.5195678760701181, "grad_norm": 2.19490122795105, "learning_rate": 0.0008101219595053676, "loss": 3.3706, "step": 22365 }, { "epoch": 1.51990759614078, "grad_norm": 1.6874310970306396, "learning_rate": 0.0008100794944965349, "loss": 3.349, "step": 22370 }, { "epoch": 1.5202473162114418, "grad_norm": 1.8229236602783203, "learning_rate": 0.0008100370294877022, "loss": 3.6575, "step": 22375 }, { "epoch": 1.5205870362821035, "grad_norm": 1.6561843156814575, "learning_rate": 0.0008099945644788694, "loss": 3.6852, "step": 22380 }, { "epoch": 1.5209267563527653, "grad_norm": 1.725564956665039, "learning_rate": 0.0008099520994700367, "loss": 3.6786, "step": 22385 }, { "epoch": 1.5212664764234272, "grad_norm": 1.9937971830368042, "learning_rate": 0.000809909634461204, "loss": 3.5678, "step": 22390 }, { "epoch": 1.5216061964940888, "grad_norm": 2.399339199066162, "learning_rate": 0.0008098671694523712, "loss": 3.7188, "step": 22395 }, { "epoch": 1.5219459165647506, "grad_norm": 1.737396001815796, "learning_rate": 0.0008098247044435386, "loss": 3.7098, "step": 22400 }, { "epoch": 1.5222856366354125, "grad_norm": 2.229586362838745, "learning_rate": 0.0008097822394347059, "loss": 3.5875, "step": 22405 }, { "epoch": 1.5226253567060741, "grad_norm": 1.648237943649292, "learning_rate": 0.0008097397744258731, "loss": 3.948, "step": 22410 }, { "epoch": 1.522965076776736, "grad_norm": 1.5115975141525269, "learning_rate": 0.0008096973094170403, "loss": 3.6859, "step": 22415 }, { "epoch": 1.5233047968473978, "grad_norm": 2.3105862140655518, "learning_rate": 0.0008096548444082077, "loss": 3.5502, "step": 22420 }, { "epoch": 1.5236445169180595, "grad_norm": 1.4358019828796387, "learning_rate": 0.0008096123793993749, "loss": 3.6396, "step": 22425 }, { "epoch": 1.5239842369887213, "grad_norm": 1.9484699964523315, "learning_rate": 0.0008095699143905421, "loss": 3.3417, "step": 22430 }, { "epoch": 1.5243239570593832, "grad_norm": 2.0373873710632324, "learning_rate": 0.0008095274493817096, "loss": 3.4473, "step": 22435 }, { "epoch": 1.5246636771300448, "grad_norm": 2.1632535457611084, "learning_rate": 0.0008094849843728768, "loss": 3.5933, "step": 22440 }, { "epoch": 1.5250033972007067, "grad_norm": 2.027787923812866, "learning_rate": 0.000809442519364044, "loss": 3.7389, "step": 22445 }, { "epoch": 1.5253431172713685, "grad_norm": 2.4250235557556152, "learning_rate": 0.0008094000543552114, "loss": 3.4673, "step": 22450 }, { "epoch": 1.5256828373420301, "grad_norm": 1.7587496042251587, "learning_rate": 0.0008093575893463786, "loss": 3.5355, "step": 22455 }, { "epoch": 1.526022557412692, "grad_norm": 2.0473415851593018, "learning_rate": 0.0008093151243375458, "loss": 3.6866, "step": 22460 }, { "epoch": 1.5263622774833538, "grad_norm": 1.7871379852294922, "learning_rate": 0.0008092726593287131, "loss": 3.6226, "step": 22465 }, { "epoch": 1.5267019975540155, "grad_norm": 1.7367722988128662, "learning_rate": 0.0008092301943198805, "loss": 3.4791, "step": 22470 }, { "epoch": 1.527041717624677, "grad_norm": 2.0475194454193115, "learning_rate": 0.0008091877293110477, "loss": 3.7739, "step": 22475 }, { "epoch": 1.5273814376953392, "grad_norm": 2.153505802154541, "learning_rate": 0.000809145264302215, "loss": 3.5219, "step": 22480 }, { "epoch": 1.5277211577660008, "grad_norm": 1.4816542863845825, "learning_rate": 0.0008091027992933823, "loss": 3.6481, "step": 22485 }, { "epoch": 1.5280608778366624, "grad_norm": 1.9317747354507446, "learning_rate": 0.0008090603342845495, "loss": 3.6049, "step": 22490 }, { "epoch": 1.5284005979073245, "grad_norm": 1.984548568725586, "learning_rate": 0.0008090178692757168, "loss": 3.6521, "step": 22495 }, { "epoch": 1.5287403179779862, "grad_norm": 2.3924145698547363, "learning_rate": 0.000808975404266884, "loss": 3.436, "step": 22500 }, { "epoch": 1.5290800380486478, "grad_norm": 1.9245356321334839, "learning_rate": 0.0008089329392580514, "loss": 3.3851, "step": 22505 }, { "epoch": 1.5294197581193096, "grad_norm": 1.8365975618362427, "learning_rate": 0.0008088904742492187, "loss": 3.5365, "step": 22510 }, { "epoch": 1.5297594781899715, "grad_norm": 11.516547203063965, "learning_rate": 0.0008088480092403859, "loss": 3.3691, "step": 22515 }, { "epoch": 1.5300991982606331, "grad_norm": 2.1957104206085205, "learning_rate": 0.0008088055442315532, "loss": 3.3277, "step": 22520 }, { "epoch": 1.530438918331295, "grad_norm": 1.8284063339233398, "learning_rate": 0.0008087630792227205, "loss": 3.6457, "step": 22525 }, { "epoch": 1.5307786384019568, "grad_norm": 1.9554513692855835, "learning_rate": 0.0008087206142138877, "loss": 3.4371, "step": 22530 }, { "epoch": 1.5311183584726185, "grad_norm": 1.6514474153518677, "learning_rate": 0.000808678149205055, "loss": 3.7177, "step": 22535 }, { "epoch": 1.5314580785432803, "grad_norm": 2.1120784282684326, "learning_rate": 0.0008086356841962224, "loss": 3.5928, "step": 22540 }, { "epoch": 1.5317977986139422, "grad_norm": 1.7289270162582397, "learning_rate": 0.0008085932191873896, "loss": 3.4843, "step": 22545 }, { "epoch": 1.5321375186846038, "grad_norm": 2.002979278564453, "learning_rate": 0.0008085507541785568, "loss": 3.5077, "step": 22550 }, { "epoch": 1.5324772387552656, "grad_norm": 2.102417230606079, "learning_rate": 0.0008085082891697242, "loss": 3.6582, "step": 22555 }, { "epoch": 1.5328169588259275, "grad_norm": 1.6226658821105957, "learning_rate": 0.0008084658241608914, "loss": 3.5369, "step": 22560 }, { "epoch": 1.5331566788965891, "grad_norm": 2.5215559005737305, "learning_rate": 0.0008084233591520586, "loss": 3.5193, "step": 22565 }, { "epoch": 1.533496398967251, "grad_norm": 1.844967246055603, "learning_rate": 0.000808380894143226, "loss": 3.648, "step": 22570 }, { "epoch": 1.5338361190379128, "grad_norm": 1.852358341217041, "learning_rate": 0.0008083384291343933, "loss": 3.4507, "step": 22575 }, { "epoch": 1.5341758391085745, "grad_norm": 3.1263175010681152, "learning_rate": 0.0008082959641255605, "loss": 3.6666, "step": 22580 }, { "epoch": 1.5345155591792363, "grad_norm": 2.4497199058532715, "learning_rate": 0.0008082534991167279, "loss": 3.7693, "step": 22585 }, { "epoch": 1.5348552792498982, "grad_norm": 2.059950351715088, "learning_rate": 0.0008082110341078951, "loss": 3.814, "step": 22590 }, { "epoch": 1.5351949993205598, "grad_norm": 1.955972671508789, "learning_rate": 0.0008081685690990623, "loss": 3.8113, "step": 22595 }, { "epoch": 1.5355347193912217, "grad_norm": 1.8931630849838257, "learning_rate": 0.0008081261040902296, "loss": 3.5529, "step": 22600 }, { "epoch": 1.5358744394618835, "grad_norm": 1.4493045806884766, "learning_rate": 0.0008080836390813969, "loss": 3.5259, "step": 22605 }, { "epoch": 1.5362141595325451, "grad_norm": 2.5044212341308594, "learning_rate": 0.0008080411740725643, "loss": 3.5912, "step": 22610 }, { "epoch": 1.536553879603207, "grad_norm": 1.5261307954788208, "learning_rate": 0.0008079987090637315, "loss": 3.8017, "step": 22615 }, { "epoch": 1.5368935996738688, "grad_norm": 1.8029478788375854, "learning_rate": 0.0008079562440548988, "loss": 3.5744, "step": 22620 }, { "epoch": 1.5372333197445305, "grad_norm": 1.73594331741333, "learning_rate": 0.0008079137790460661, "loss": 3.5573, "step": 22625 }, { "epoch": 1.5375730398151923, "grad_norm": 1.8628888130187988, "learning_rate": 0.0008078713140372333, "loss": 3.5657, "step": 22630 }, { "epoch": 1.5379127598858542, "grad_norm": 1.9247620105743408, "learning_rate": 0.0008078288490284006, "loss": 3.7165, "step": 22635 }, { "epoch": 1.5382524799565158, "grad_norm": 1.6094032526016235, "learning_rate": 0.000807786384019568, "loss": 3.5167, "step": 22640 }, { "epoch": 1.5385922000271774, "grad_norm": 1.7079049348831177, "learning_rate": 0.0008077439190107352, "loss": 3.5313, "step": 22645 }, { "epoch": 1.5389319200978395, "grad_norm": 1.7697583436965942, "learning_rate": 0.0008077014540019024, "loss": 3.5453, "step": 22650 }, { "epoch": 1.5392716401685012, "grad_norm": 1.8432189226150513, "learning_rate": 0.0008076589889930698, "loss": 3.489, "step": 22655 }, { "epoch": 1.5396113602391628, "grad_norm": 1.6566567420959473, "learning_rate": 0.000807616523984237, "loss": 3.5476, "step": 22660 }, { "epoch": 1.5399510803098249, "grad_norm": 2.0389111042022705, "learning_rate": 0.0008075740589754042, "loss": 3.4811, "step": 22665 }, { "epoch": 1.5402908003804865, "grad_norm": 1.7268434762954712, "learning_rate": 0.0008075315939665716, "loss": 3.5626, "step": 22670 }, { "epoch": 1.5406305204511481, "grad_norm": 1.7994575500488281, "learning_rate": 0.0008074891289577389, "loss": 3.5834, "step": 22675 }, { "epoch": 1.54097024052181, "grad_norm": 1.8993951082229614, "learning_rate": 0.0008074466639489061, "loss": 3.63, "step": 22680 }, { "epoch": 1.5413099605924718, "grad_norm": 1.7274539470672607, "learning_rate": 0.0008074041989400735, "loss": 3.6558, "step": 22685 }, { "epoch": 1.5416496806631335, "grad_norm": 1.7924760580062866, "learning_rate": 0.0008073617339312407, "loss": 3.3595, "step": 22690 }, { "epoch": 1.5419894007337953, "grad_norm": 1.8087273836135864, "learning_rate": 0.0008073192689224079, "loss": 3.6555, "step": 22695 }, { "epoch": 1.5423291208044572, "grad_norm": 2.280667781829834, "learning_rate": 0.0008072768039135752, "loss": 3.409, "step": 22700 }, { "epoch": 1.5426688408751188, "grad_norm": 1.3716049194335938, "learning_rate": 0.0008072343389047425, "loss": 3.5215, "step": 22705 }, { "epoch": 1.5430085609457806, "grad_norm": 2.308572769165039, "learning_rate": 0.0008071918738959098, "loss": 3.4608, "step": 22710 }, { "epoch": 1.5433482810164425, "grad_norm": 2.0151145458221436, "learning_rate": 0.0008071494088870771, "loss": 3.5353, "step": 22715 }, { "epoch": 1.5436880010871041, "grad_norm": 1.8135225772857666, "learning_rate": 0.0008071069438782444, "loss": 3.3511, "step": 22720 }, { "epoch": 1.544027721157766, "grad_norm": 1.7837281227111816, "learning_rate": 0.0008070644788694116, "loss": 3.5554, "step": 22725 }, { "epoch": 1.5443674412284278, "grad_norm": 2.3787899017333984, "learning_rate": 0.0008070220138605789, "loss": 3.7251, "step": 22730 }, { "epoch": 1.5447071612990895, "grad_norm": 1.55287504196167, "learning_rate": 0.0008069795488517462, "loss": 3.4635, "step": 22735 }, { "epoch": 1.5450468813697513, "grad_norm": 1.4853205680847168, "learning_rate": 0.0008069370838429134, "loss": 3.6064, "step": 22740 }, { "epoch": 1.5453866014404132, "grad_norm": 1.913398265838623, "learning_rate": 0.0008068946188340808, "loss": 3.4514, "step": 22745 }, { "epoch": 1.5457263215110748, "grad_norm": 1.7702900171279907, "learning_rate": 0.000806852153825248, "loss": 3.7858, "step": 22750 }, { "epoch": 1.5460660415817367, "grad_norm": 1.8370060920715332, "learning_rate": 0.0008068096888164153, "loss": 3.5108, "step": 22755 }, { "epoch": 1.5464057616523985, "grad_norm": 2.1133346557617188, "learning_rate": 0.0008067672238075826, "loss": 3.3632, "step": 22760 }, { "epoch": 1.5467454817230601, "grad_norm": 1.6865262985229492, "learning_rate": 0.0008067247587987498, "loss": 3.4793, "step": 22765 }, { "epoch": 1.547085201793722, "grad_norm": 1.9007073640823364, "learning_rate": 0.0008066907867916837, "loss": 3.4668, "step": 22770 }, { "epoch": 1.5474249218643839, "grad_norm": 1.922041654586792, "learning_rate": 0.000806648321782851, "loss": 3.7698, "step": 22775 }, { "epoch": 1.5477646419350455, "grad_norm": 2.362251043319702, "learning_rate": 0.0008066058567740182, "loss": 3.4763, "step": 22780 }, { "epoch": 1.5481043620057073, "grad_norm": 2.0008738040924072, "learning_rate": 0.0008065633917651854, "loss": 3.6897, "step": 22785 }, { "epoch": 1.5484440820763692, "grad_norm": 2.183310031890869, "learning_rate": 0.0008065209267563528, "loss": 3.615, "step": 22790 }, { "epoch": 1.5487838021470308, "grad_norm": 1.9934557676315308, "learning_rate": 0.00080647846174752, "loss": 3.8156, "step": 22795 }, { "epoch": 1.5491235222176927, "grad_norm": 1.696584701538086, "learning_rate": 0.0008064359967386872, "loss": 3.279, "step": 22800 }, { "epoch": 1.5494632422883545, "grad_norm": 1.9075849056243896, "learning_rate": 0.0008063935317298547, "loss": 3.3285, "step": 22805 }, { "epoch": 1.5498029623590162, "grad_norm": 2.0296380519866943, "learning_rate": 0.0008063510667210219, "loss": 3.4125, "step": 22810 }, { "epoch": 1.5501426824296778, "grad_norm": 2.1858065128326416, "learning_rate": 0.0008063086017121892, "loss": 3.6718, "step": 22815 }, { "epoch": 1.5504824025003399, "grad_norm": 1.4800453186035156, "learning_rate": 0.0008062661367033565, "loss": 3.4609, "step": 22820 }, { "epoch": 1.5508221225710015, "grad_norm": 2.1184213161468506, "learning_rate": 0.0008062236716945237, "loss": 3.5509, "step": 22825 }, { "epoch": 1.5511618426416631, "grad_norm": 2.038698434829712, "learning_rate": 0.000806181206685691, "loss": 3.6934, "step": 22830 }, { "epoch": 1.5515015627123252, "grad_norm": 2.0349106788635254, "learning_rate": 0.0008061387416768582, "loss": 3.6803, "step": 22835 }, { "epoch": 1.5518412827829868, "grad_norm": 2.1873018741607666, "learning_rate": 0.0008060962766680256, "loss": 3.5902, "step": 22840 }, { "epoch": 1.5521810028536485, "grad_norm": 1.8978259563446045, "learning_rate": 0.0008060538116591929, "loss": 3.6209, "step": 22845 }, { "epoch": 1.5525207229243103, "grad_norm": 1.914222002029419, "learning_rate": 0.0008060113466503601, "loss": 3.5809, "step": 22850 }, { "epoch": 1.5528604429949722, "grad_norm": 2.4129462242126465, "learning_rate": 0.0008059688816415274, "loss": 3.4004, "step": 22855 }, { "epoch": 1.5532001630656338, "grad_norm": 2.131826877593994, "learning_rate": 0.0008059264166326947, "loss": 3.5369, "step": 22860 }, { "epoch": 1.5535398831362957, "grad_norm": 1.7124607563018799, "learning_rate": 0.0008058839516238619, "loss": 3.6369, "step": 22865 }, { "epoch": 1.5538796032069575, "grad_norm": 2.50028657913208, "learning_rate": 0.0008058414866150292, "loss": 3.5169, "step": 22870 }, { "epoch": 1.5542193232776191, "grad_norm": 1.663404941558838, "learning_rate": 0.0008057990216061966, "loss": 3.4703, "step": 22875 }, { "epoch": 1.554559043348281, "grad_norm": 1.501482367515564, "learning_rate": 0.0008057565565973638, "loss": 3.539, "step": 22880 }, { "epoch": 1.5548987634189428, "grad_norm": 1.8135497570037842, "learning_rate": 0.000805714091588531, "loss": 3.5333, "step": 22885 }, { "epoch": 1.5552384834896045, "grad_norm": 2.5120131969451904, "learning_rate": 0.0008056716265796984, "loss": 3.6446, "step": 22890 }, { "epoch": 1.5555782035602663, "grad_norm": 1.8807826042175293, "learning_rate": 0.0008056291615708656, "loss": 3.6899, "step": 22895 }, { "epoch": 1.5559179236309282, "grad_norm": 2.1153831481933594, "learning_rate": 0.0008055866965620328, "loss": 3.5643, "step": 22900 }, { "epoch": 1.5562576437015898, "grad_norm": 1.5353294610977173, "learning_rate": 0.0008055442315532003, "loss": 3.4752, "step": 22905 }, { "epoch": 1.5565973637722517, "grad_norm": 1.8117833137512207, "learning_rate": 0.0008055017665443675, "loss": 3.58, "step": 22910 }, { "epoch": 1.5569370838429135, "grad_norm": 2.0306479930877686, "learning_rate": 0.0008054593015355347, "loss": 3.6815, "step": 22915 }, { "epoch": 1.5572768039135751, "grad_norm": 1.6302391290664673, "learning_rate": 0.0008054168365267021, "loss": 3.4656, "step": 22920 }, { "epoch": 1.557616523984237, "grad_norm": 1.524204969406128, "learning_rate": 0.0008053743715178693, "loss": 3.5851, "step": 22925 }, { "epoch": 1.5579562440548989, "grad_norm": 1.8259791135787964, "learning_rate": 0.0008053319065090365, "loss": 3.4678, "step": 22930 }, { "epoch": 1.5582959641255605, "grad_norm": 1.9731754064559937, "learning_rate": 0.0008052894415002038, "loss": 3.511, "step": 22935 }, { "epoch": 1.5586356841962223, "grad_norm": 1.5579873323440552, "learning_rate": 0.0008052469764913712, "loss": 3.6395, "step": 22940 }, { "epoch": 1.5589754042668842, "grad_norm": 2.0498604774475098, "learning_rate": 0.0008052045114825384, "loss": 3.3441, "step": 22945 }, { "epoch": 1.5593151243375458, "grad_norm": 1.776694893836975, "learning_rate": 0.0008051620464737057, "loss": 3.4905, "step": 22950 }, { "epoch": 1.5596548444082077, "grad_norm": 1.9566031694412231, "learning_rate": 0.000805119581464873, "loss": 3.557, "step": 22955 }, { "epoch": 1.5599945644788695, "grad_norm": 1.9837868213653564, "learning_rate": 0.0008050771164560402, "loss": 3.556, "step": 22960 }, { "epoch": 1.5603342845495312, "grad_norm": 2.474841356277466, "learning_rate": 0.0008050346514472075, "loss": 3.5526, "step": 22965 }, { "epoch": 1.560674004620193, "grad_norm": 1.9838335514068604, "learning_rate": 0.0008049921864383748, "loss": 3.7449, "step": 22970 }, { "epoch": 1.5610137246908549, "grad_norm": 1.6358883380889893, "learning_rate": 0.0008049497214295421, "loss": 3.3294, "step": 22975 }, { "epoch": 1.5613534447615165, "grad_norm": 2.178567886352539, "learning_rate": 0.0008049072564207094, "loss": 3.4152, "step": 22980 }, { "epoch": 1.5616931648321781, "grad_norm": 2.1167614459991455, "learning_rate": 0.0008048647914118766, "loss": 3.5389, "step": 22985 }, { "epoch": 1.5620328849028402, "grad_norm": 1.867225170135498, "learning_rate": 0.0008048223264030439, "loss": 3.829, "step": 22990 }, { "epoch": 1.5623726049735018, "grad_norm": 1.4936398267745972, "learning_rate": 0.0008047798613942112, "loss": 3.4636, "step": 22995 }, { "epoch": 1.5627123250441635, "grad_norm": 1.9328405857086182, "learning_rate": 0.0008047373963853784, "loss": 3.494, "step": 23000 }, { "epoch": 1.5630520451148255, "grad_norm": 2.0604333877563477, "learning_rate": 0.0008046949313765457, "loss": 3.5201, "step": 23005 }, { "epoch": 1.5633917651854872, "grad_norm": 1.5458109378814697, "learning_rate": 0.0008046524663677131, "loss": 3.4038, "step": 23010 }, { "epoch": 1.5637314852561488, "grad_norm": 2.0157699584960938, "learning_rate": 0.0008046100013588803, "loss": 3.9174, "step": 23015 }, { "epoch": 1.5640712053268107, "grad_norm": 1.8432953357696533, "learning_rate": 0.0008045675363500476, "loss": 3.7694, "step": 23020 }, { "epoch": 1.5644109253974725, "grad_norm": 1.9820671081542969, "learning_rate": 0.0008045250713412149, "loss": 3.4822, "step": 23025 }, { "epoch": 1.5647506454681341, "grad_norm": 2.4087331295013428, "learning_rate": 0.0008044826063323821, "loss": 3.382, "step": 23030 }, { "epoch": 1.565090365538796, "grad_norm": 1.9530588388442993, "learning_rate": 0.0008044401413235493, "loss": 3.8083, "step": 23035 }, { "epoch": 1.5654300856094578, "grad_norm": 1.7469260692596436, "learning_rate": 0.0008043976763147167, "loss": 3.5224, "step": 23040 }, { "epoch": 1.5657698056801195, "grad_norm": 1.8504178524017334, "learning_rate": 0.000804355211305884, "loss": 3.514, "step": 23045 }, { "epoch": 1.5661095257507813, "grad_norm": 1.5791294574737549, "learning_rate": 0.0008043127462970512, "loss": 3.5032, "step": 23050 }, { "epoch": 1.5664492458214432, "grad_norm": 2.6067421436309814, "learning_rate": 0.0008042702812882186, "loss": 3.6091, "step": 23055 }, { "epoch": 1.5667889658921048, "grad_norm": 1.4389196634292603, "learning_rate": 0.0008042278162793858, "loss": 3.7031, "step": 23060 }, { "epoch": 1.5671286859627667, "grad_norm": 2.2478995323181152, "learning_rate": 0.000804185351270553, "loss": 3.8419, "step": 23065 }, { "epoch": 1.5674684060334285, "grad_norm": 1.871673822402954, "learning_rate": 0.0008041428862617204, "loss": 3.8173, "step": 23070 }, { "epoch": 1.5678081261040901, "grad_norm": 2.0944323539733887, "learning_rate": 0.0008041004212528876, "loss": 3.4951, "step": 23075 }, { "epoch": 1.568147846174752, "grad_norm": 1.8361326456069946, "learning_rate": 0.0008040579562440549, "loss": 3.679, "step": 23080 }, { "epoch": 1.5684875662454139, "grad_norm": 1.8853049278259277, "learning_rate": 0.0008040154912352223, "loss": 3.5571, "step": 23085 }, { "epoch": 1.5688272863160755, "grad_norm": 1.9428473711013794, "learning_rate": 0.0008039730262263895, "loss": 3.5137, "step": 23090 }, { "epoch": 1.5691670063867373, "grad_norm": 2.253173828125, "learning_rate": 0.0008039305612175567, "loss": 3.4693, "step": 23095 }, { "epoch": 1.5695067264573992, "grad_norm": 1.799572467803955, "learning_rate": 0.000803888096208724, "loss": 3.547, "step": 23100 }, { "epoch": 1.5698464465280608, "grad_norm": 2.0673468112945557, "learning_rate": 0.0008038456311998913, "loss": 3.6353, "step": 23105 }, { "epoch": 1.5701861665987227, "grad_norm": 1.7627886533737183, "learning_rate": 0.0008038031661910585, "loss": 3.4709, "step": 23110 }, { "epoch": 1.5705258866693845, "grad_norm": 2.2748382091522217, "learning_rate": 0.0008037607011822259, "loss": 3.5865, "step": 23115 }, { "epoch": 1.5708656067400462, "grad_norm": 1.9271080493927002, "learning_rate": 0.0008037182361733932, "loss": 3.5015, "step": 23120 }, { "epoch": 1.571205326810708, "grad_norm": 1.7752090692520142, "learning_rate": 0.0008036757711645604, "loss": 3.6043, "step": 23125 }, { "epoch": 1.5715450468813699, "grad_norm": 1.7268786430358887, "learning_rate": 0.0008036333061557277, "loss": 3.479, "step": 23130 }, { "epoch": 1.5718847669520315, "grad_norm": 1.6993887424468994, "learning_rate": 0.0008035908411468949, "loss": 3.685, "step": 23135 }, { "epoch": 1.5722244870226934, "grad_norm": 1.9937673807144165, "learning_rate": 0.0008035483761380622, "loss": 3.6283, "step": 23140 }, { "epoch": 1.5725642070933552, "grad_norm": 1.6865957975387573, "learning_rate": 0.0008035059111292295, "loss": 3.5674, "step": 23145 }, { "epoch": 1.5729039271640168, "grad_norm": 1.6450358629226685, "learning_rate": 0.0008034634461203968, "loss": 3.6061, "step": 23150 }, { "epoch": 1.5732436472346785, "grad_norm": 2.630002498626709, "learning_rate": 0.0008034209811115642, "loss": 3.6808, "step": 23155 }, { "epoch": 1.5735833673053405, "grad_norm": 2.173051595687866, "learning_rate": 0.0008033785161027314, "loss": 3.4069, "step": 23160 }, { "epoch": 1.5739230873760022, "grad_norm": 1.5916359424591064, "learning_rate": 0.0008033360510938986, "loss": 3.527, "step": 23165 }, { "epoch": 1.5742628074466638, "grad_norm": 2.5224034786224365, "learning_rate": 0.000803293586085066, "loss": 3.5901, "step": 23170 }, { "epoch": 1.5746025275173259, "grad_norm": 2.376255512237549, "learning_rate": 0.0008032511210762332, "loss": 3.629, "step": 23175 }, { "epoch": 1.5749422475879875, "grad_norm": 2.0783560276031494, "learning_rate": 0.0008032086560674004, "loss": 3.7468, "step": 23180 }, { "epoch": 1.5752819676586491, "grad_norm": 1.8058825731277466, "learning_rate": 0.0008031661910585679, "loss": 3.5976, "step": 23185 }, { "epoch": 1.575621687729311, "grad_norm": 2.2499966621398926, "learning_rate": 0.0008031237260497351, "loss": 3.6748, "step": 23190 }, { "epoch": 1.5759614077999728, "grad_norm": 1.8455944061279297, "learning_rate": 0.0008030812610409023, "loss": 3.7688, "step": 23195 }, { "epoch": 1.5763011278706345, "grad_norm": 1.87844979763031, "learning_rate": 0.0008030387960320696, "loss": 3.3436, "step": 23200 }, { "epoch": 1.5766408479412963, "grad_norm": 2.138408660888672, "learning_rate": 0.0008029963310232369, "loss": 3.5335, "step": 23205 }, { "epoch": 1.5769805680119582, "grad_norm": 2.2479794025421143, "learning_rate": 0.0008029538660144041, "loss": 3.5006, "step": 23210 }, { "epoch": 1.5773202880826198, "grad_norm": 1.7349311113357544, "learning_rate": 0.0008029114010055714, "loss": 3.5164, "step": 23215 }, { "epoch": 1.5776600081532817, "grad_norm": 1.9988031387329102, "learning_rate": 0.0008028689359967388, "loss": 3.5801, "step": 23220 }, { "epoch": 1.5779997282239435, "grad_norm": 1.5729578733444214, "learning_rate": 0.000802826470987906, "loss": 3.655, "step": 23225 }, { "epoch": 1.5783394482946052, "grad_norm": 2.011669635772705, "learning_rate": 0.0008027840059790733, "loss": 3.3762, "step": 23230 }, { "epoch": 1.578679168365267, "grad_norm": 2.0340628623962402, "learning_rate": 0.0008027415409702405, "loss": 3.5282, "step": 23235 }, { "epoch": 1.5790188884359289, "grad_norm": 1.9511324167251587, "learning_rate": 0.0008026990759614078, "loss": 3.5338, "step": 23240 }, { "epoch": 1.5793586085065905, "grad_norm": 2.1159732341766357, "learning_rate": 0.0008026566109525751, "loss": 3.3344, "step": 23245 }, { "epoch": 1.5796983285772523, "grad_norm": 1.800499677658081, "learning_rate": 0.0008026141459437423, "loss": 3.5423, "step": 23250 }, { "epoch": 1.5800380486479142, "grad_norm": 2.136367082595825, "learning_rate": 0.0008025716809349097, "loss": 3.7215, "step": 23255 }, { "epoch": 1.5803777687185758, "grad_norm": 1.9528241157531738, "learning_rate": 0.000802529215926077, "loss": 3.6032, "step": 23260 }, { "epoch": 1.5807174887892377, "grad_norm": 2.34028959274292, "learning_rate": 0.0008024867509172442, "loss": 3.4619, "step": 23265 }, { "epoch": 1.5810572088598995, "grad_norm": 1.9729326963424683, "learning_rate": 0.0008024442859084115, "loss": 3.5693, "step": 23270 }, { "epoch": 1.5813969289305612, "grad_norm": 1.9112826585769653, "learning_rate": 0.0008024018208995788, "loss": 3.8857, "step": 23275 }, { "epoch": 1.581736649001223, "grad_norm": 2.082242965698242, "learning_rate": 0.000802359355890746, "loss": 3.7639, "step": 23280 }, { "epoch": 1.5820763690718849, "grad_norm": 1.8520334959030151, "learning_rate": 0.0008023168908819132, "loss": 3.5333, "step": 23285 }, { "epoch": 1.5824160891425465, "grad_norm": 2.745725154876709, "learning_rate": 0.0008022744258730807, "loss": 3.2156, "step": 23290 }, { "epoch": 1.5827558092132084, "grad_norm": 2.179616689682007, "learning_rate": 0.0008022319608642479, "loss": 3.4699, "step": 23295 }, { "epoch": 1.5830955292838702, "grad_norm": 1.958565354347229, "learning_rate": 0.0008021894958554151, "loss": 3.5623, "step": 23300 }, { "epoch": 1.5834352493545318, "grad_norm": 1.9957692623138428, "learning_rate": 0.0008021470308465825, "loss": 3.5931, "step": 23305 }, { "epoch": 1.5837749694251937, "grad_norm": 2.172163486480713, "learning_rate": 0.0008021045658377497, "loss": 3.4009, "step": 23310 }, { "epoch": 1.5841146894958555, "grad_norm": 1.9111034870147705, "learning_rate": 0.0008020621008289169, "loss": 3.2657, "step": 23315 }, { "epoch": 1.5844544095665172, "grad_norm": 1.7003166675567627, "learning_rate": 0.0008020196358200843, "loss": 3.3513, "step": 23320 }, { "epoch": 1.5847941296371788, "grad_norm": 1.8467625379562378, "learning_rate": 0.0008019771708112516, "loss": 3.5029, "step": 23325 }, { "epoch": 1.5851338497078409, "grad_norm": 1.8965120315551758, "learning_rate": 0.0008019347058024188, "loss": 3.53, "step": 23330 }, { "epoch": 1.5854735697785025, "grad_norm": 1.8880783319473267, "learning_rate": 0.0008018922407935861, "loss": 3.5729, "step": 23335 }, { "epoch": 1.5858132898491641, "grad_norm": 2.2547378540039062, "learning_rate": 0.0008018497757847534, "loss": 3.4293, "step": 23340 }, { "epoch": 1.5861530099198262, "grad_norm": 2.131545305252075, "learning_rate": 0.0008018073107759206, "loss": 3.5698, "step": 23345 }, { "epoch": 1.5864927299904878, "grad_norm": 1.743728756904602, "learning_rate": 0.0008017648457670879, "loss": 3.8722, "step": 23350 }, { "epoch": 1.5868324500611495, "grad_norm": 2.412135601043701, "learning_rate": 0.0008017223807582552, "loss": 3.6616, "step": 23355 }, { "epoch": 1.5871721701318113, "grad_norm": 2.1024255752563477, "learning_rate": 0.0008016799157494225, "loss": 3.4793, "step": 23360 }, { "epoch": 1.5875118902024732, "grad_norm": 1.677167296409607, "learning_rate": 0.0008016374507405898, "loss": 3.6709, "step": 23365 }, { "epoch": 1.5878516102731348, "grad_norm": 1.73008131980896, "learning_rate": 0.000801594985731757, "loss": 3.612, "step": 23370 }, { "epoch": 1.5881913303437967, "grad_norm": 1.6392993927001953, "learning_rate": 0.0008015525207229243, "loss": 3.5447, "step": 23375 }, { "epoch": 1.5885310504144585, "grad_norm": 1.739480972290039, "learning_rate": 0.0008015100557140916, "loss": 3.606, "step": 23380 }, { "epoch": 1.5888707704851202, "grad_norm": 2.100578784942627, "learning_rate": 0.0008014675907052588, "loss": 3.5895, "step": 23385 }, { "epoch": 1.589210490555782, "grad_norm": 2.384650707244873, "learning_rate": 0.0008014251256964261, "loss": 3.4294, "step": 23390 }, { "epoch": 1.5895502106264439, "grad_norm": 1.6761105060577393, "learning_rate": 0.0008013826606875935, "loss": 3.6594, "step": 23395 }, { "epoch": 1.5898899306971055, "grad_norm": 2.1853253841400146, "learning_rate": 0.0008013401956787607, "loss": 3.533, "step": 23400 }, { "epoch": 1.5902296507677673, "grad_norm": 1.8706094026565552, "learning_rate": 0.000801297730669928, "loss": 3.7803, "step": 23405 }, { "epoch": 1.5905693708384292, "grad_norm": 2.081413745880127, "learning_rate": 0.0008012552656610953, "loss": 3.7848, "step": 23410 }, { "epoch": 1.5909090909090908, "grad_norm": 2.024430751800537, "learning_rate": 0.0008012128006522625, "loss": 3.5888, "step": 23415 }, { "epoch": 1.5912488109797527, "grad_norm": 2.4992477893829346, "learning_rate": 0.0008011703356434297, "loss": 3.3968, "step": 23420 }, { "epoch": 1.5915885310504145, "grad_norm": 2.2470648288726807, "learning_rate": 0.0008011278706345971, "loss": 3.741, "step": 23425 }, { "epoch": 1.5919282511210762, "grad_norm": 1.7047548294067383, "learning_rate": 0.0008010854056257644, "loss": 3.4306, "step": 23430 }, { "epoch": 1.592267971191738, "grad_norm": 2.4299874305725098, "learning_rate": 0.0008010429406169316, "loss": 3.2421, "step": 23435 }, { "epoch": 1.5926076912623999, "grad_norm": 2.7796568870544434, "learning_rate": 0.000801000475608099, "loss": 3.2849, "step": 23440 }, { "epoch": 1.5929474113330615, "grad_norm": 2.0544276237487793, "learning_rate": 0.0008009580105992662, "loss": 3.4738, "step": 23445 }, { "epoch": 1.5932871314037234, "grad_norm": 1.85711669921875, "learning_rate": 0.0008009155455904334, "loss": 3.8571, "step": 23450 }, { "epoch": 1.5936268514743852, "grad_norm": 2.1828761100769043, "learning_rate": 0.0008008730805816008, "loss": 3.3663, "step": 23455 }, { "epoch": 1.5939665715450468, "grad_norm": 1.715456247329712, "learning_rate": 0.000800830615572768, "loss": 3.7023, "step": 23460 }, { "epoch": 1.5943062916157087, "grad_norm": 1.5831234455108643, "learning_rate": 0.0008007881505639353, "loss": 3.4087, "step": 23465 }, { "epoch": 1.5946460116863705, "grad_norm": 2.3072264194488525, "learning_rate": 0.0008007456855551027, "loss": 3.784, "step": 23470 }, { "epoch": 1.5949857317570322, "grad_norm": 2.1365585327148438, "learning_rate": 0.0008007032205462699, "loss": 3.1338, "step": 23475 }, { "epoch": 1.595325451827694, "grad_norm": 1.9411497116088867, "learning_rate": 0.0008006607555374371, "loss": 3.5473, "step": 23480 }, { "epoch": 1.5956651718983559, "grad_norm": 2.0870699882507324, "learning_rate": 0.0008006182905286044, "loss": 3.4792, "step": 23485 }, { "epoch": 1.5960048919690175, "grad_norm": 1.8405263423919678, "learning_rate": 0.0008005758255197717, "loss": 3.44, "step": 23490 }, { "epoch": 1.5963446120396791, "grad_norm": 2.025926113128662, "learning_rate": 0.0008005333605109391, "loss": 3.3909, "step": 23495 }, { "epoch": 1.5966843321103412, "grad_norm": 2.4942188262939453, "learning_rate": 0.0008004908955021063, "loss": 3.667, "step": 23500 }, { "epoch": 1.5970240521810029, "grad_norm": 2.221907615661621, "learning_rate": 0.0008004484304932736, "loss": 3.362, "step": 23505 }, { "epoch": 1.5973637722516645, "grad_norm": 2.1792328357696533, "learning_rate": 0.0008004059654844409, "loss": 3.6228, "step": 23510 }, { "epoch": 1.5977034923223266, "grad_norm": 1.9709287881851196, "learning_rate": 0.0008003635004756081, "loss": 3.7179, "step": 23515 }, { "epoch": 1.5980432123929882, "grad_norm": 1.937830924987793, "learning_rate": 0.0008003210354667753, "loss": 3.4466, "step": 23520 }, { "epoch": 1.5983829324636498, "grad_norm": 1.9598108530044556, "learning_rate": 0.0008002785704579427, "loss": 3.8008, "step": 23525 }, { "epoch": 1.5987226525343117, "grad_norm": 1.8515477180480957, "learning_rate": 0.00080023610544911, "loss": 3.4512, "step": 23530 }, { "epoch": 1.5990623726049735, "grad_norm": 1.6960211992263794, "learning_rate": 0.0008001936404402772, "loss": 3.2794, "step": 23535 }, { "epoch": 1.5994020926756352, "grad_norm": 2.299978733062744, "learning_rate": 0.0008001511754314446, "loss": 3.8451, "step": 23540 }, { "epoch": 1.599741812746297, "grad_norm": 2.31001615524292, "learning_rate": 0.0008001087104226118, "loss": 3.4179, "step": 23545 }, { "epoch": 1.6000815328169589, "grad_norm": 1.7606394290924072, "learning_rate": 0.000800066245413779, "loss": 3.7693, "step": 23550 }, { "epoch": 1.6004212528876205, "grad_norm": 1.7334281206130981, "learning_rate": 0.0008000237804049464, "loss": 3.7089, "step": 23555 }, { "epoch": 1.6007609729582823, "grad_norm": 2.0957791805267334, "learning_rate": 0.0007999813153961136, "loss": 3.6603, "step": 23560 }, { "epoch": 1.6011006930289442, "grad_norm": 2.0864741802215576, "learning_rate": 0.0007999388503872809, "loss": 3.3766, "step": 23565 }, { "epoch": 1.6014404130996058, "grad_norm": 1.9425009489059448, "learning_rate": 0.0007998963853784483, "loss": 3.5616, "step": 23570 }, { "epoch": 1.6017801331702677, "grad_norm": 2.1247153282165527, "learning_rate": 0.0007998539203696155, "loss": 3.4789, "step": 23575 }, { "epoch": 1.6021198532409295, "grad_norm": 2.9673616886138916, "learning_rate": 0.0007998114553607827, "loss": 3.4188, "step": 23580 }, { "epoch": 1.6024595733115912, "grad_norm": 2.0995635986328125, "learning_rate": 0.00079976899035195, "loss": 3.8782, "step": 23585 }, { "epoch": 1.602799293382253, "grad_norm": 1.7121665477752686, "learning_rate": 0.0007997265253431173, "loss": 3.6354, "step": 23590 }, { "epoch": 1.6031390134529149, "grad_norm": 1.5815964937210083, "learning_rate": 0.0007996840603342845, "loss": 3.4029, "step": 23595 }, { "epoch": 1.6034787335235765, "grad_norm": 2.17334246635437, "learning_rate": 0.0007996415953254519, "loss": 3.7302, "step": 23600 }, { "epoch": 1.6038184535942384, "grad_norm": 1.8143824338912964, "learning_rate": 0.0007995991303166192, "loss": 3.7839, "step": 23605 }, { "epoch": 1.6041581736649002, "grad_norm": 1.8242380619049072, "learning_rate": 0.0007995566653077864, "loss": 3.5376, "step": 23610 }, { "epoch": 1.6044978937355618, "grad_norm": 2.972324848175049, "learning_rate": 0.0007995142002989537, "loss": 3.3157, "step": 23615 }, { "epoch": 1.6048376138062237, "grad_norm": 1.7236449718475342, "learning_rate": 0.000799471735290121, "loss": 3.6189, "step": 23620 }, { "epoch": 1.6051773338768855, "grad_norm": 1.6785613298416138, "learning_rate": 0.0007994292702812882, "loss": 3.6225, "step": 23625 }, { "epoch": 1.6055170539475472, "grad_norm": 1.5531799793243408, "learning_rate": 0.0007993868052724555, "loss": 3.5364, "step": 23630 }, { "epoch": 1.605856774018209, "grad_norm": 2.054192066192627, "learning_rate": 0.0007993443402636228, "loss": 3.4197, "step": 23635 }, { "epoch": 1.6061964940888709, "grad_norm": 1.979701042175293, "learning_rate": 0.0007993018752547901, "loss": 3.6865, "step": 23640 }, { "epoch": 1.6065362141595325, "grad_norm": 1.4676417112350464, "learning_rate": 0.0007992594102459574, "loss": 3.3933, "step": 23645 }, { "epoch": 1.6068759342301944, "grad_norm": 1.7577424049377441, "learning_rate": 0.0007992169452371246, "loss": 3.4906, "step": 23650 }, { "epoch": 1.6072156543008562, "grad_norm": 2.069469928741455, "learning_rate": 0.0007991744802282919, "loss": 3.5799, "step": 23655 }, { "epoch": 1.6075553743715179, "grad_norm": 1.5298595428466797, "learning_rate": 0.0007991320152194592, "loss": 3.5947, "step": 23660 }, { "epoch": 1.6078950944421795, "grad_norm": 1.74728262424469, "learning_rate": 0.0007990895502106264, "loss": 3.3599, "step": 23665 }, { "epoch": 1.6082348145128416, "grad_norm": 2.062626361846924, "learning_rate": 0.0007990470852017937, "loss": 3.4051, "step": 23670 }, { "epoch": 1.6085745345835032, "grad_norm": 1.8756132125854492, "learning_rate": 0.0007990046201929611, "loss": 3.4955, "step": 23675 }, { "epoch": 1.6089142546541648, "grad_norm": 1.8008344173431396, "learning_rate": 0.0007989621551841283, "loss": 3.5424, "step": 23680 }, { "epoch": 1.609253974724827, "grad_norm": 1.7549993991851807, "learning_rate": 0.0007989196901752955, "loss": 3.5571, "step": 23685 }, { "epoch": 1.6095936947954885, "grad_norm": 1.5963294506072998, "learning_rate": 0.0007988772251664629, "loss": 3.6538, "step": 23690 }, { "epoch": 1.6099334148661502, "grad_norm": 2.045198917388916, "learning_rate": 0.0007988347601576301, "loss": 3.2761, "step": 23695 }, { "epoch": 1.610273134936812, "grad_norm": 1.8980746269226074, "learning_rate": 0.0007987922951487973, "loss": 3.5386, "step": 23700 }, { "epoch": 1.6106128550074739, "grad_norm": 1.4726983308792114, "learning_rate": 0.0007987498301399648, "loss": 3.5457, "step": 23705 }, { "epoch": 1.6109525750781355, "grad_norm": 1.8510695695877075, "learning_rate": 0.000798707365131132, "loss": 3.6483, "step": 23710 }, { "epoch": 1.6112922951487973, "grad_norm": 1.7956894636154175, "learning_rate": 0.0007986649001222992, "loss": 3.6089, "step": 23715 }, { "epoch": 1.6116320152194592, "grad_norm": 1.8844528198242188, "learning_rate": 0.0007986224351134665, "loss": 3.8474, "step": 23720 }, { "epoch": 1.6119717352901208, "grad_norm": 1.935056209564209, "learning_rate": 0.0007985799701046338, "loss": 3.5055, "step": 23725 }, { "epoch": 1.6123114553607827, "grad_norm": 2.1560847759246826, "learning_rate": 0.000798537505095801, "loss": 3.7515, "step": 23730 }, { "epoch": 1.6126511754314445, "grad_norm": 2.3472766876220703, "learning_rate": 0.0007984950400869683, "loss": 3.5905, "step": 23735 }, { "epoch": 1.6129908955021062, "grad_norm": 1.6776847839355469, "learning_rate": 0.0007984525750781357, "loss": 3.3107, "step": 23740 }, { "epoch": 1.613330615572768, "grad_norm": 1.9683531522750854, "learning_rate": 0.0007984101100693029, "loss": 3.5958, "step": 23745 }, { "epoch": 1.6136703356434299, "grad_norm": 1.768484115600586, "learning_rate": 0.0007983676450604702, "loss": 3.6266, "step": 23750 }, { "epoch": 1.6140100557140915, "grad_norm": 1.9138858318328857, "learning_rate": 0.0007983251800516375, "loss": 3.6772, "step": 23755 }, { "epoch": 1.6143497757847534, "grad_norm": 1.6443262100219727, "learning_rate": 0.0007982827150428047, "loss": 3.5648, "step": 23760 }, { "epoch": 1.6146894958554152, "grad_norm": 2.031132936477661, "learning_rate": 0.000798240250033972, "loss": 3.5095, "step": 23765 }, { "epoch": 1.6150292159260768, "grad_norm": 1.7517004013061523, "learning_rate": 0.0007981977850251392, "loss": 3.3095, "step": 23770 }, { "epoch": 1.6153689359967387, "grad_norm": 1.744955062866211, "learning_rate": 0.0007981553200163066, "loss": 3.5689, "step": 23775 }, { "epoch": 1.6157086560674006, "grad_norm": 1.8973430395126343, "learning_rate": 0.0007981128550074739, "loss": 3.5772, "step": 23780 }, { "epoch": 1.6160483761380622, "grad_norm": 1.7230591773986816, "learning_rate": 0.0007980703899986411, "loss": 3.4426, "step": 23785 }, { "epoch": 1.616388096208724, "grad_norm": 1.752519965171814, "learning_rate": 0.0007980279249898084, "loss": 3.3815, "step": 23790 }, { "epoch": 1.6167278162793859, "grad_norm": 2.938122272491455, "learning_rate": 0.0007979854599809757, "loss": 3.4032, "step": 23795 }, { "epoch": 1.6170675363500475, "grad_norm": 1.882591724395752, "learning_rate": 0.0007979429949721429, "loss": 3.5633, "step": 23800 }, { "epoch": 1.6174072564207094, "grad_norm": 1.5749343633651733, "learning_rate": 0.0007979005299633101, "loss": 3.3494, "step": 23805 }, { "epoch": 1.6177469764913712, "grad_norm": 1.9502907991409302, "learning_rate": 0.0007978580649544776, "loss": 3.5785, "step": 23810 }, { "epoch": 1.6180866965620329, "grad_norm": 1.7399243116378784, "learning_rate": 0.0007978155999456448, "loss": 3.8549, "step": 23815 }, { "epoch": 1.6184264166326947, "grad_norm": 1.8170359134674072, "learning_rate": 0.000797773134936812, "loss": 3.4309, "step": 23820 }, { "epoch": 1.6187661367033566, "grad_norm": 2.401106595993042, "learning_rate": 0.0007977306699279794, "loss": 3.5841, "step": 23825 }, { "epoch": 1.6191058567740182, "grad_norm": 1.7103898525238037, "learning_rate": 0.0007976882049191466, "loss": 3.5376, "step": 23830 }, { "epoch": 1.6194455768446798, "grad_norm": 1.7955375909805298, "learning_rate": 0.0007976457399103139, "loss": 3.5196, "step": 23835 }, { "epoch": 1.619785296915342, "grad_norm": 1.631537675857544, "learning_rate": 0.0007976032749014812, "loss": 3.3187, "step": 23840 }, { "epoch": 1.6201250169860035, "grad_norm": 1.909715175628662, "learning_rate": 0.0007975608098926485, "loss": 3.255, "step": 23845 }, { "epoch": 1.6204647370566652, "grad_norm": 2.0205795764923096, "learning_rate": 0.0007975183448838158, "loss": 3.7029, "step": 23850 }, { "epoch": 1.6208044571273272, "grad_norm": 1.7752208709716797, "learning_rate": 0.0007974758798749831, "loss": 3.6462, "step": 23855 }, { "epoch": 1.6211441771979889, "grad_norm": 1.9004470109939575, "learning_rate": 0.0007974334148661503, "loss": 3.4217, "step": 23860 }, { "epoch": 1.6214838972686505, "grad_norm": 1.7186065912246704, "learning_rate": 0.0007973909498573176, "loss": 3.4356, "step": 23865 }, { "epoch": 1.6218236173393124, "grad_norm": 1.8769859075546265, "learning_rate": 0.0007973484848484848, "loss": 3.4471, "step": 23870 }, { "epoch": 1.6221633374099742, "grad_norm": 2.190946340560913, "learning_rate": 0.0007973060198396521, "loss": 3.4991, "step": 23875 }, { "epoch": 1.6225030574806358, "grad_norm": 1.8793553113937378, "learning_rate": 0.0007972635548308195, "loss": 3.6433, "step": 23880 }, { "epoch": 1.6228427775512977, "grad_norm": 1.7597250938415527, "learning_rate": 0.0007972210898219867, "loss": 3.5502, "step": 23885 }, { "epoch": 1.6231824976219595, "grad_norm": 1.9381603002548218, "learning_rate": 0.000797178624813154, "loss": 3.7202, "step": 23890 }, { "epoch": 1.6235222176926212, "grad_norm": 2.0988333225250244, "learning_rate": 0.0007971361598043213, "loss": 3.3691, "step": 23895 }, { "epoch": 1.623861937763283, "grad_norm": 1.7780425548553467, "learning_rate": 0.0007970936947954885, "loss": 3.5705, "step": 23900 }, { "epoch": 1.6242016578339449, "grad_norm": 1.9163907766342163, "learning_rate": 0.0007970512297866558, "loss": 3.5683, "step": 23905 }, { "epoch": 1.6245413779046065, "grad_norm": 2.125408172607422, "learning_rate": 0.0007970087647778231, "loss": 3.5745, "step": 23910 }, { "epoch": 1.6248810979752684, "grad_norm": 2.286273241043091, "learning_rate": 0.0007969662997689904, "loss": 3.525, "step": 23915 }, { "epoch": 1.6252208180459302, "grad_norm": 1.8307465314865112, "learning_rate": 0.0007969238347601576, "loss": 3.5987, "step": 23920 }, { "epoch": 1.6255605381165918, "grad_norm": 1.5867305994033813, "learning_rate": 0.000796881369751325, "loss": 3.5278, "step": 23925 }, { "epoch": 1.6259002581872537, "grad_norm": 1.5513094663619995, "learning_rate": 0.0007968389047424922, "loss": 3.6089, "step": 23930 }, { "epoch": 1.6262399782579156, "grad_norm": 2.122361660003662, "learning_rate": 0.0007967964397336594, "loss": 3.3896, "step": 23935 }, { "epoch": 1.6265796983285772, "grad_norm": 1.4910447597503662, "learning_rate": 0.0007967539747248268, "loss": 3.5338, "step": 23940 }, { "epoch": 1.626919418399239, "grad_norm": 2.0623676776885986, "learning_rate": 0.000796711509715994, "loss": 3.5473, "step": 23945 }, { "epoch": 1.627259138469901, "grad_norm": 2.176236152648926, "learning_rate": 0.0007966690447071613, "loss": 3.2936, "step": 23950 }, { "epoch": 1.6275988585405625, "grad_norm": 2.1452128887176514, "learning_rate": 0.0007966265796983287, "loss": 3.5633, "step": 23955 }, { "epoch": 1.6279385786112244, "grad_norm": 1.7768951654434204, "learning_rate": 0.0007965841146894959, "loss": 3.7675, "step": 23960 }, { "epoch": 1.6282782986818862, "grad_norm": 2.217686891555786, "learning_rate": 0.0007965416496806631, "loss": 3.5474, "step": 23965 }, { "epoch": 1.6286180187525479, "grad_norm": 2.1516788005828857, "learning_rate": 0.0007964991846718304, "loss": 3.6386, "step": 23970 }, { "epoch": 1.6289577388232097, "grad_norm": 1.7785457372665405, "learning_rate": 0.0007964567196629977, "loss": 3.5773, "step": 23975 }, { "epoch": 1.6292974588938716, "grad_norm": 2.132992744445801, "learning_rate": 0.0007964142546541649, "loss": 3.5424, "step": 23980 }, { "epoch": 1.6296371789645332, "grad_norm": 1.8186427354812622, "learning_rate": 0.0007963717896453323, "loss": 3.7519, "step": 23985 }, { "epoch": 1.629976899035195, "grad_norm": 2.206990957260132, "learning_rate": 0.0007963293246364996, "loss": 3.6669, "step": 23990 }, { "epoch": 1.630316619105857, "grad_norm": 1.7358455657958984, "learning_rate": 0.0007962868596276668, "loss": 3.5765, "step": 23995 }, { "epoch": 1.6306563391765185, "grad_norm": 1.5739344358444214, "learning_rate": 0.0007962443946188341, "loss": 3.5751, "step": 24000 }, { "epoch": 1.6309960592471802, "grad_norm": 2.119497776031494, "learning_rate": 0.0007962019296100014, "loss": 3.4769, "step": 24005 }, { "epoch": 1.6313357793178422, "grad_norm": 2.036278247833252, "learning_rate": 0.0007961594646011686, "loss": 3.3886, "step": 24010 }, { "epoch": 1.6316754993885039, "grad_norm": 1.7117360830307007, "learning_rate": 0.000796116999592336, "loss": 3.3689, "step": 24015 }, { "epoch": 1.6320152194591655, "grad_norm": 1.796807885169983, "learning_rate": 0.0007960745345835032, "loss": 3.7024, "step": 24020 }, { "epoch": 1.6323549395298276, "grad_norm": 2.361604690551758, "learning_rate": 0.0007960320695746705, "loss": 3.6883, "step": 24025 }, { "epoch": 1.6326946596004892, "grad_norm": 1.6751177310943604, "learning_rate": 0.0007959896045658378, "loss": 3.6162, "step": 24030 }, { "epoch": 1.6330343796711508, "grad_norm": 1.637323260307312, "learning_rate": 0.000795947139557005, "loss": 3.6337, "step": 24035 }, { "epoch": 1.633374099741813, "grad_norm": 1.7787002325057983, "learning_rate": 0.0007959046745481723, "loss": 3.4792, "step": 24040 }, { "epoch": 1.6337138198124745, "grad_norm": 1.5686943531036377, "learning_rate": 0.0007958622095393396, "loss": 3.3624, "step": 24045 }, { "epoch": 1.6340535398831362, "grad_norm": 2.2679617404937744, "learning_rate": 0.0007958197445305069, "loss": 3.3666, "step": 24050 }, { "epoch": 1.634393259953798, "grad_norm": 1.9929540157318115, "learning_rate": 0.0007957772795216742, "loss": 3.2754, "step": 24055 }, { "epoch": 1.6347329800244599, "grad_norm": 1.8854026794433594, "learning_rate": 0.0007957348145128415, "loss": 3.5316, "step": 24060 }, { "epoch": 1.6350727000951215, "grad_norm": 1.700840950012207, "learning_rate": 0.0007956923495040087, "loss": 3.6668, "step": 24065 }, { "epoch": 1.6354124201657834, "grad_norm": 2.2000632286071777, "learning_rate": 0.0007956498844951759, "loss": 3.6655, "step": 24070 }, { "epoch": 1.6357521402364452, "grad_norm": 1.6521892547607422, "learning_rate": 0.0007956074194863433, "loss": 3.5238, "step": 24075 }, { "epoch": 1.6360918603071068, "grad_norm": 1.7937004566192627, "learning_rate": 0.0007955649544775105, "loss": 3.7957, "step": 24080 }, { "epoch": 1.6364315803777687, "grad_norm": 2.1637673377990723, "learning_rate": 0.0007955224894686778, "loss": 3.8998, "step": 24085 }, { "epoch": 1.6367713004484306, "grad_norm": 2.790614366531372, "learning_rate": 0.0007954800244598452, "loss": 3.6169, "step": 24090 }, { "epoch": 1.6371110205190922, "grad_norm": 1.8715345859527588, "learning_rate": 0.0007954375594510124, "loss": 3.4705, "step": 24095 }, { "epoch": 1.637450740589754, "grad_norm": 2.1381049156188965, "learning_rate": 0.0007953950944421796, "loss": 3.5554, "step": 24100 }, { "epoch": 1.637790460660416, "grad_norm": 2.464179039001465, "learning_rate": 0.000795352629433347, "loss": 3.424, "step": 24105 }, { "epoch": 1.6381301807310775, "grad_norm": 2.21128249168396, "learning_rate": 0.0007953101644245142, "loss": 3.4191, "step": 24110 }, { "epoch": 1.6384699008017394, "grad_norm": 1.8977630138397217, "learning_rate": 0.0007952676994156814, "loss": 3.4054, "step": 24115 }, { "epoch": 1.6388096208724012, "grad_norm": 1.9519940614700317, "learning_rate": 0.0007952252344068488, "loss": 3.3936, "step": 24120 }, { "epoch": 1.6391493409430629, "grad_norm": 1.8180725574493408, "learning_rate": 0.0007951827693980161, "loss": 3.4204, "step": 24125 }, { "epoch": 1.6394890610137247, "grad_norm": 1.8408910036087036, "learning_rate": 0.0007951403043891833, "loss": 3.5509, "step": 24130 }, { "epoch": 1.6398287810843866, "grad_norm": 1.4652979373931885, "learning_rate": 0.0007950978393803506, "loss": 3.4552, "step": 24135 }, { "epoch": 1.6401685011550482, "grad_norm": 2.1417009830474854, "learning_rate": 0.0007950553743715179, "loss": 3.6521, "step": 24140 }, { "epoch": 1.64050822122571, "grad_norm": 2.1331522464752197, "learning_rate": 0.0007950129093626851, "loss": 3.3375, "step": 24145 }, { "epoch": 1.640847941296372, "grad_norm": 1.7947235107421875, "learning_rate": 0.0007949704443538524, "loss": 3.7328, "step": 24150 }, { "epoch": 1.6411876613670335, "grad_norm": 2.140002727508545, "learning_rate": 0.0007949279793450198, "loss": 3.5007, "step": 24155 }, { "epoch": 1.6415273814376954, "grad_norm": 1.9798887968063354, "learning_rate": 0.000794885514336187, "loss": 3.2774, "step": 24160 }, { "epoch": 1.6418671015083572, "grad_norm": 1.9175218343734741, "learning_rate": 0.0007948430493273543, "loss": 3.745, "step": 24165 }, { "epoch": 1.6422068215790189, "grad_norm": 2.063779354095459, "learning_rate": 0.0007948005843185215, "loss": 3.2775, "step": 24170 }, { "epoch": 1.6425465416496805, "grad_norm": 2.0906331539154053, "learning_rate": 0.0007947581193096889, "loss": 3.5316, "step": 24175 }, { "epoch": 1.6428862617203426, "grad_norm": 1.911420464515686, "learning_rate": 0.0007947156543008561, "loss": 3.5098, "step": 24180 }, { "epoch": 1.6432259817910042, "grad_norm": 2.4627292156219482, "learning_rate": 0.0007946731892920233, "loss": 3.6636, "step": 24185 }, { "epoch": 1.6435657018616658, "grad_norm": 1.7302913665771484, "learning_rate": 0.0007946307242831908, "loss": 3.2829, "step": 24190 }, { "epoch": 1.643905421932328, "grad_norm": 1.7381397485733032, "learning_rate": 0.000794588259274358, "loss": 3.4994, "step": 24195 }, { "epoch": 1.6442451420029895, "grad_norm": 2.2457005977630615, "learning_rate": 0.0007945457942655252, "loss": 3.8267, "step": 24200 }, { "epoch": 1.6445848620736512, "grad_norm": 1.942236065864563, "learning_rate": 0.0007945033292566926, "loss": 3.5861, "step": 24205 }, { "epoch": 1.6449245821443133, "grad_norm": 1.41391921043396, "learning_rate": 0.0007944608642478598, "loss": 3.421, "step": 24210 }, { "epoch": 1.6452643022149749, "grad_norm": 2.034721612930298, "learning_rate": 0.000794418399239027, "loss": 3.5716, "step": 24215 }, { "epoch": 1.6456040222856365, "grad_norm": 1.8894801139831543, "learning_rate": 0.0007943759342301943, "loss": 3.4944, "step": 24220 }, { "epoch": 1.6459437423562984, "grad_norm": 1.654801845550537, "learning_rate": 0.0007943334692213617, "loss": 3.5025, "step": 24225 }, { "epoch": 1.6462834624269602, "grad_norm": 2.836221694946289, "learning_rate": 0.0007942910042125289, "loss": 3.5734, "step": 24230 }, { "epoch": 1.6466231824976219, "grad_norm": 1.9195380210876465, "learning_rate": 0.0007942485392036962, "loss": 3.4656, "step": 24235 }, { "epoch": 1.6469629025682837, "grad_norm": 1.671648383140564, "learning_rate": 0.0007942060741948635, "loss": 3.634, "step": 24240 }, { "epoch": 1.6473026226389456, "grad_norm": 1.7552051544189453, "learning_rate": 0.0007941636091860307, "loss": 3.4322, "step": 24245 }, { "epoch": 1.6476423427096072, "grad_norm": 1.724908709526062, "learning_rate": 0.000794121144177198, "loss": 3.6284, "step": 24250 }, { "epoch": 1.647982062780269, "grad_norm": 2.335911989212036, "learning_rate": 0.0007940786791683652, "loss": 3.4443, "step": 24255 }, { "epoch": 1.648321782850931, "grad_norm": 1.9961529970169067, "learning_rate": 0.0007940362141595326, "loss": 3.6047, "step": 24260 }, { "epoch": 1.6486615029215925, "grad_norm": 1.791506290435791, "learning_rate": 0.0007939937491506999, "loss": 3.668, "step": 24265 }, { "epoch": 1.6490012229922544, "grad_norm": 3.0941669940948486, "learning_rate": 0.0007939512841418671, "loss": 3.4534, "step": 24270 }, { "epoch": 1.6493409430629162, "grad_norm": 1.74546480178833, "learning_rate": 0.0007939088191330344, "loss": 3.4751, "step": 24275 }, { "epoch": 1.6496806631335779, "grad_norm": 2.487797737121582, "learning_rate": 0.0007938663541242017, "loss": 3.7081, "step": 24280 }, { "epoch": 1.6500203832042397, "grad_norm": 1.7761061191558838, "learning_rate": 0.0007938238891153689, "loss": 3.4514, "step": 24285 }, { "epoch": 1.6503601032749016, "grad_norm": 2.6685264110565186, "learning_rate": 0.0007937814241065362, "loss": 3.4423, "step": 24290 }, { "epoch": 1.6506998233455632, "grad_norm": 2.289740562438965, "learning_rate": 0.0007937389590977036, "loss": 3.7028, "step": 24295 }, { "epoch": 1.651039543416225, "grad_norm": 1.6072702407836914, "learning_rate": 0.0007936964940888708, "loss": 3.3717, "step": 24300 }, { "epoch": 1.651379263486887, "grad_norm": 1.809860110282898, "learning_rate": 0.000793654029080038, "loss": 3.5571, "step": 24305 }, { "epoch": 1.6517189835575485, "grad_norm": 2.707441568374634, "learning_rate": 0.0007936115640712054, "loss": 3.5923, "step": 24310 }, { "epoch": 1.6520587036282104, "grad_norm": 1.7409400939941406, "learning_rate": 0.0007935690990623726, "loss": 3.6357, "step": 24315 }, { "epoch": 1.6523984236988722, "grad_norm": 2.5246174335479736, "learning_rate": 0.0007935266340535398, "loss": 3.4986, "step": 24320 }, { "epoch": 1.6527381437695339, "grad_norm": 1.8603237867355347, "learning_rate": 0.0007934841690447072, "loss": 3.4534, "step": 24325 }, { "epoch": 1.6530778638401957, "grad_norm": 2.552454948425293, "learning_rate": 0.0007934417040358745, "loss": 3.4253, "step": 24330 }, { "epoch": 1.6534175839108576, "grad_norm": 1.8499362468719482, "learning_rate": 0.0007933992390270417, "loss": 3.5883, "step": 24335 }, { "epoch": 1.6537573039815192, "grad_norm": 1.7460224628448486, "learning_rate": 0.0007933567740182091, "loss": 3.3086, "step": 24340 }, { "epoch": 1.6540970240521808, "grad_norm": 1.6763783693313599, "learning_rate": 0.0007933143090093763, "loss": 3.5998, "step": 24345 }, { "epoch": 1.654436744122843, "grad_norm": 2.042801856994629, "learning_rate": 0.0007932718440005435, "loss": 3.5604, "step": 24350 }, { "epoch": 1.6547764641935045, "grad_norm": 2.0807955265045166, "learning_rate": 0.0007932293789917108, "loss": 3.5582, "step": 24355 }, { "epoch": 1.6551161842641662, "grad_norm": 1.8108375072479248, "learning_rate": 0.0007931869139828781, "loss": 3.5767, "step": 24360 }, { "epoch": 1.6554559043348283, "grad_norm": 1.8867918252944946, "learning_rate": 0.0007931444489740454, "loss": 3.5688, "step": 24365 }, { "epoch": 1.6557956244054899, "grad_norm": 1.8727976083755493, "learning_rate": 0.0007931019839652127, "loss": 3.5848, "step": 24370 }, { "epoch": 1.6561353444761515, "grad_norm": 1.7848118543624878, "learning_rate": 0.00079305951895638, "loss": 3.4475, "step": 24375 }, { "epoch": 1.6564750645468136, "grad_norm": 1.5621070861816406, "learning_rate": 0.0007930170539475472, "loss": 3.3207, "step": 24380 }, { "epoch": 1.6568147846174752, "grad_norm": 1.9529811143875122, "learning_rate": 0.0007929745889387145, "loss": 3.7832, "step": 24385 }, { "epoch": 1.6571545046881369, "grad_norm": 2.144897937774658, "learning_rate": 0.0007929321239298818, "loss": 3.4107, "step": 24390 }, { "epoch": 1.6574942247587987, "grad_norm": 1.8998234272003174, "learning_rate": 0.000792889658921049, "loss": 3.6618, "step": 24395 }, { "epoch": 1.6578339448294606, "grad_norm": 1.6615227460861206, "learning_rate": 0.0007928471939122164, "loss": 3.6616, "step": 24400 }, { "epoch": 1.6581736649001222, "grad_norm": 2.0341134071350098, "learning_rate": 0.0007928047289033836, "loss": 3.4637, "step": 24405 }, { "epoch": 1.658513384970784, "grad_norm": 2.073718547821045, "learning_rate": 0.0007927622638945509, "loss": 3.5943, "step": 24410 }, { "epoch": 1.658853105041446, "grad_norm": 2.0046095848083496, "learning_rate": 0.0007927197988857182, "loss": 3.6108, "step": 24415 }, { "epoch": 1.6591928251121075, "grad_norm": 2.0357518196105957, "learning_rate": 0.0007926773338768854, "loss": 3.4598, "step": 24420 }, { "epoch": 1.6595325451827694, "grad_norm": 1.923946738243103, "learning_rate": 0.0007926348688680527, "loss": 3.5184, "step": 24425 }, { "epoch": 1.6598722652534312, "grad_norm": 1.7628453969955444, "learning_rate": 0.00079259240385922, "loss": 3.6897, "step": 24430 }, { "epoch": 1.6602119853240929, "grad_norm": 1.8592790365219116, "learning_rate": 0.0007925499388503873, "loss": 3.5368, "step": 24435 }, { "epoch": 1.6605517053947547, "grad_norm": 2.820798397064209, "learning_rate": 0.0007925074738415546, "loss": 3.6015, "step": 24440 }, { "epoch": 1.6608914254654166, "grad_norm": 1.8616455793380737, "learning_rate": 0.0007924650088327219, "loss": 3.5024, "step": 24445 }, { "epoch": 1.6612311455360782, "grad_norm": 1.9562115669250488, "learning_rate": 0.0007924225438238891, "loss": 3.6776, "step": 24450 }, { "epoch": 1.66157086560674, "grad_norm": 2.390681743621826, "learning_rate": 0.0007923800788150563, "loss": 3.483, "step": 24455 }, { "epoch": 1.661910585677402, "grad_norm": 1.6927886009216309, "learning_rate": 0.0007923376138062237, "loss": 3.5708, "step": 24460 }, { "epoch": 1.6622503057480635, "grad_norm": 2.31612229347229, "learning_rate": 0.0007922951487973909, "loss": 3.6122, "step": 24465 }, { "epoch": 1.6625900258187254, "grad_norm": 2.663978099822998, "learning_rate": 0.0007922526837885582, "loss": 3.7576, "step": 24470 }, { "epoch": 1.6629297458893872, "grad_norm": 2.0080225467681885, "learning_rate": 0.0007922102187797256, "loss": 3.6917, "step": 24475 }, { "epoch": 1.6632694659600489, "grad_norm": 1.6618870496749878, "learning_rate": 0.0007921677537708928, "loss": 3.6915, "step": 24480 }, { "epoch": 1.6636091860307107, "grad_norm": 2.3623738288879395, "learning_rate": 0.00079212528876206, "loss": 3.4281, "step": 24485 }, { "epoch": 1.6639489061013726, "grad_norm": 1.9915447235107422, "learning_rate": 0.0007920828237532274, "loss": 3.617, "step": 24490 }, { "epoch": 1.6642886261720342, "grad_norm": 3.414003849029541, "learning_rate": 0.0007920403587443946, "loss": 3.5548, "step": 24495 }, { "epoch": 1.664628346242696, "grad_norm": 1.9254553318023682, "learning_rate": 0.0007919978937355618, "loss": 3.7334, "step": 24500 }, { "epoch": 1.664968066313358, "grad_norm": 1.9747116565704346, "learning_rate": 0.0007919554287267293, "loss": 3.4158, "step": 24505 }, { "epoch": 1.6653077863840196, "grad_norm": 1.85152006149292, "learning_rate": 0.0007919129637178965, "loss": 3.5419, "step": 24510 }, { "epoch": 1.6656475064546812, "grad_norm": 1.5717148780822754, "learning_rate": 0.0007918704987090638, "loss": 3.3009, "step": 24515 }, { "epoch": 1.6659872265253433, "grad_norm": 1.9058703184127808, "learning_rate": 0.000791828033700231, "loss": 3.4409, "step": 24520 }, { "epoch": 1.6663269465960049, "grad_norm": 1.5167299509048462, "learning_rate": 0.0007917855686913983, "loss": 3.592, "step": 24525 }, { "epoch": 1.6666666666666665, "grad_norm": 1.8441784381866455, "learning_rate": 0.0007917431036825656, "loss": 3.6336, "step": 24530 }, { "epoch": 1.6670063867373286, "grad_norm": 2.0084733963012695, "learning_rate": 0.0007917006386737328, "loss": 3.6807, "step": 24535 }, { "epoch": 1.6673461068079902, "grad_norm": 1.8997167348861694, "learning_rate": 0.0007916581736649002, "loss": 3.5083, "step": 24540 }, { "epoch": 1.6676858268786519, "grad_norm": 2.1355061531066895, "learning_rate": 0.0007916157086560675, "loss": 3.5605, "step": 24545 }, { "epoch": 1.668025546949314, "grad_norm": 2.4605422019958496, "learning_rate": 0.0007915732436472347, "loss": 3.5899, "step": 24550 }, { "epoch": 1.6683652670199756, "grad_norm": 2.845499277114868, "learning_rate": 0.0007915307786384019, "loss": 3.7893, "step": 24555 }, { "epoch": 1.6687049870906372, "grad_norm": 1.6742901802062988, "learning_rate": 0.0007914883136295693, "loss": 3.4952, "step": 24560 }, { "epoch": 1.669044707161299, "grad_norm": 1.5092793703079224, "learning_rate": 0.0007914458486207365, "loss": 3.6694, "step": 24565 }, { "epoch": 1.669384427231961, "grad_norm": 1.9634472131729126, "learning_rate": 0.0007914033836119037, "loss": 3.8153, "step": 24570 }, { "epoch": 1.6697241473026225, "grad_norm": 1.7643916606903076, "learning_rate": 0.0007913609186030712, "loss": 3.4507, "step": 24575 }, { "epoch": 1.6700638673732844, "grad_norm": 2.162959575653076, "learning_rate": 0.0007913184535942384, "loss": 3.5561, "step": 24580 }, { "epoch": 1.6704035874439462, "grad_norm": 2.108365535736084, "learning_rate": 0.0007912759885854056, "loss": 3.4126, "step": 24585 }, { "epoch": 1.6707433075146079, "grad_norm": 1.8806301355361938, "learning_rate": 0.000791233523576573, "loss": 3.3181, "step": 24590 }, { "epoch": 1.6710830275852697, "grad_norm": 1.5920456647872925, "learning_rate": 0.0007911910585677402, "loss": 3.5001, "step": 24595 }, { "epoch": 1.6714227476559316, "grad_norm": 1.767977237701416, "learning_rate": 0.0007911485935589074, "loss": 3.3786, "step": 24600 }, { "epoch": 1.6717624677265932, "grad_norm": 1.8872900009155273, "learning_rate": 0.0007911061285500749, "loss": 3.5618, "step": 24605 }, { "epoch": 1.672102187797255, "grad_norm": 2.091700792312622, "learning_rate": 0.0007910636635412421, "loss": 3.6653, "step": 24610 }, { "epoch": 1.672441907867917, "grad_norm": 2.3151180744171143, "learning_rate": 0.0007910211985324093, "loss": 3.7415, "step": 24615 }, { "epoch": 1.6727816279385785, "grad_norm": 2.341123580932617, "learning_rate": 0.0007909787335235766, "loss": 3.3518, "step": 24620 }, { "epoch": 1.6731213480092404, "grad_norm": 2.080110549926758, "learning_rate": 0.0007909362685147439, "loss": 3.5612, "step": 24625 }, { "epoch": 1.6734610680799022, "grad_norm": 1.9306949377059937, "learning_rate": 0.0007908938035059111, "loss": 3.6742, "step": 24630 }, { "epoch": 1.6738007881505639, "grad_norm": 1.6237024068832397, "learning_rate": 0.0007908513384970784, "loss": 3.5732, "step": 24635 }, { "epoch": 1.6741405082212257, "grad_norm": 1.8553367853164673, "learning_rate": 0.0007908088734882458, "loss": 3.5681, "step": 24640 }, { "epoch": 1.6744802282918876, "grad_norm": 1.6605676412582397, "learning_rate": 0.000790766408479413, "loss": 3.5821, "step": 24645 }, { "epoch": 1.6748199483625492, "grad_norm": 1.6132123470306396, "learning_rate": 0.0007907239434705803, "loss": 3.3921, "step": 24650 }, { "epoch": 1.675159668433211, "grad_norm": 2.027263641357422, "learning_rate": 0.0007906814784617475, "loss": 3.5534, "step": 24655 }, { "epoch": 1.675499388503873, "grad_norm": 1.6810462474822998, "learning_rate": 0.0007906390134529148, "loss": 3.456, "step": 24660 }, { "epoch": 1.6758391085745346, "grad_norm": 2.4495482444763184, "learning_rate": 0.0007905965484440821, "loss": 3.6558, "step": 24665 }, { "epoch": 1.6761788286451964, "grad_norm": 2.402320623397827, "learning_rate": 0.0007905540834352493, "loss": 3.5358, "step": 24670 }, { "epoch": 1.6765185487158583, "grad_norm": 1.758581280708313, "learning_rate": 0.0007905116184264167, "loss": 3.6227, "step": 24675 }, { "epoch": 1.67685826878652, "grad_norm": 1.7260793447494507, "learning_rate": 0.000790469153417584, "loss": 3.3962, "step": 24680 }, { "epoch": 1.6771979888571815, "grad_norm": 2.0938045978546143, "learning_rate": 0.0007904266884087512, "loss": 3.787, "step": 24685 }, { "epoch": 1.6775377089278436, "grad_norm": 1.966149091720581, "learning_rate": 0.0007903842233999185, "loss": 3.4797, "step": 24690 }, { "epoch": 1.6778774289985052, "grad_norm": 2.110272169113159, "learning_rate": 0.0007903417583910858, "loss": 3.6409, "step": 24695 }, { "epoch": 1.6782171490691669, "grad_norm": 1.9394716024398804, "learning_rate": 0.000790299293382253, "loss": 3.5192, "step": 24700 }, { "epoch": 1.678556869139829, "grad_norm": 2.020275592803955, "learning_rate": 0.0007902568283734202, "loss": 3.6065, "step": 24705 }, { "epoch": 1.6788965892104906, "grad_norm": 1.5819050073623657, "learning_rate": 0.0007902143633645877, "loss": 3.5152, "step": 24710 }, { "epoch": 1.6792363092811522, "grad_norm": 1.7392027378082275, "learning_rate": 0.0007901718983557549, "loss": 3.7739, "step": 24715 }, { "epoch": 1.6795760293518143, "grad_norm": 1.9121578931808472, "learning_rate": 0.0007901294333469221, "loss": 3.4567, "step": 24720 }, { "epoch": 1.679915749422476, "grad_norm": 1.8122159242630005, "learning_rate": 0.0007900869683380895, "loss": 3.4312, "step": 24725 }, { "epoch": 1.6802554694931375, "grad_norm": 2.6061160564422607, "learning_rate": 0.0007900445033292567, "loss": 3.5321, "step": 24730 }, { "epoch": 1.6805951895637994, "grad_norm": 2.377042293548584, "learning_rate": 0.0007900020383204239, "loss": 3.5566, "step": 24735 }, { "epoch": 1.6809349096344612, "grad_norm": 1.526827096939087, "learning_rate": 0.0007899595733115913, "loss": 3.4965, "step": 24740 }, { "epoch": 1.6812746297051229, "grad_norm": 1.9434771537780762, "learning_rate": 0.0007899171083027586, "loss": 3.8035, "step": 24745 }, { "epoch": 1.6816143497757847, "grad_norm": 1.9310600757598877, "learning_rate": 0.0007898746432939258, "loss": 3.5549, "step": 24750 }, { "epoch": 1.6819540698464466, "grad_norm": 1.9575639963150024, "learning_rate": 0.0007898321782850931, "loss": 3.5462, "step": 24755 }, { "epoch": 1.6822937899171082, "grad_norm": 2.1132142543792725, "learning_rate": 0.0007897897132762604, "loss": 3.377, "step": 24760 }, { "epoch": 1.68263350998777, "grad_norm": 1.8120906352996826, "learning_rate": 0.0007897472482674276, "loss": 3.1016, "step": 24765 }, { "epoch": 1.682973230058432, "grad_norm": 2.630066156387329, "learning_rate": 0.0007897047832585949, "loss": 3.4855, "step": 24770 }, { "epoch": 1.6833129501290935, "grad_norm": 2.9033005237579346, "learning_rate": 0.0007896623182497622, "loss": 3.519, "step": 24775 }, { "epoch": 1.6836526701997554, "grad_norm": 2.0753297805786133, "learning_rate": 0.0007896198532409295, "loss": 3.4329, "step": 24780 }, { "epoch": 1.6839923902704172, "grad_norm": 2.387497901916504, "learning_rate": 0.0007895773882320968, "loss": 3.4268, "step": 24785 }, { "epoch": 1.6843321103410789, "grad_norm": 1.6955839395523071, "learning_rate": 0.000789534923223264, "loss": 3.4452, "step": 24790 }, { "epoch": 1.6846718304117407, "grad_norm": 1.6345068216323853, "learning_rate": 0.0007894924582144313, "loss": 3.6781, "step": 24795 }, { "epoch": 1.6850115504824026, "grad_norm": 1.761690616607666, "learning_rate": 0.0007894499932055986, "loss": 3.4319, "step": 24800 }, { "epoch": 1.6853512705530642, "grad_norm": 2.6030492782592773, "learning_rate": 0.0007894075281967658, "loss": 3.3385, "step": 24805 }, { "epoch": 1.685690990623726, "grad_norm": 4.114711761474609, "learning_rate": 0.0007893650631879331, "loss": 3.2675, "step": 24810 }, { "epoch": 1.686030710694388, "grad_norm": 1.7702885866165161, "learning_rate": 0.0007893225981791005, "loss": 3.6495, "step": 24815 }, { "epoch": 1.6863704307650496, "grad_norm": 1.8051308393478394, "learning_rate": 0.0007892801331702677, "loss": 3.4586, "step": 24820 }, { "epoch": 1.6867101508357114, "grad_norm": 1.7422586679458618, "learning_rate": 0.000789237668161435, "loss": 3.6042, "step": 24825 }, { "epoch": 1.6870498709063733, "grad_norm": 2.0151662826538086, "learning_rate": 0.0007891952031526023, "loss": 3.7103, "step": 24830 }, { "epoch": 1.687389590977035, "grad_norm": 1.8751500844955444, "learning_rate": 0.0007891527381437695, "loss": 3.485, "step": 24835 }, { "epoch": 1.6877293110476967, "grad_norm": 1.7985957860946655, "learning_rate": 0.0007891102731349367, "loss": 3.6373, "step": 24840 }, { "epoch": 1.6880690311183586, "grad_norm": 2.0009896755218506, "learning_rate": 0.0007890678081261041, "loss": 3.6513, "step": 24845 }, { "epoch": 1.6884087511890202, "grad_norm": 2.5033867359161377, "learning_rate": 0.0007890253431172714, "loss": 3.4701, "step": 24850 }, { "epoch": 1.6887484712596819, "grad_norm": 1.70130455493927, "learning_rate": 0.0007889828781084387, "loss": 3.4759, "step": 24855 }, { "epoch": 1.689088191330344, "grad_norm": 1.8564890623092651, "learning_rate": 0.000788940413099606, "loss": 3.5779, "step": 24860 }, { "epoch": 1.6894279114010056, "grad_norm": 1.7642574310302734, "learning_rate": 0.0007888979480907732, "loss": 3.3719, "step": 24865 }, { "epoch": 1.6897676314716672, "grad_norm": 1.7733279466629028, "learning_rate": 0.0007888554830819405, "loss": 3.3651, "step": 24870 }, { "epoch": 1.6901073515423293, "grad_norm": 1.5517992973327637, "learning_rate": 0.0007888130180731078, "loss": 3.5646, "step": 24875 }, { "epoch": 1.690447071612991, "grad_norm": 1.444474697113037, "learning_rate": 0.000788770553064275, "loss": 3.419, "step": 24880 }, { "epoch": 1.6907867916836525, "grad_norm": 2.3581390380859375, "learning_rate": 0.0007887280880554424, "loss": 3.3678, "step": 24885 }, { "epoch": 1.6911265117543146, "grad_norm": 2.0662667751312256, "learning_rate": 0.0007886856230466097, "loss": 3.5355, "step": 24890 }, { "epoch": 1.6914662318249762, "grad_norm": 2.516414165496826, "learning_rate": 0.0007886431580377769, "loss": 3.6588, "step": 24895 }, { "epoch": 1.6918059518956379, "grad_norm": 1.8723740577697754, "learning_rate": 0.0007886006930289442, "loss": 3.3928, "step": 24900 }, { "epoch": 1.6921456719662997, "grad_norm": 1.9827500581741333, "learning_rate": 0.0007885582280201114, "loss": 3.416, "step": 24905 }, { "epoch": 1.6924853920369616, "grad_norm": 2.0018770694732666, "learning_rate": 0.0007885157630112787, "loss": 3.5849, "step": 24910 }, { "epoch": 1.6928251121076232, "grad_norm": 2.1644442081451416, "learning_rate": 0.000788473298002446, "loss": 3.5166, "step": 24915 }, { "epoch": 1.693164832178285, "grad_norm": 1.8138115406036377, "learning_rate": 0.0007884308329936133, "loss": 3.4525, "step": 24920 }, { "epoch": 1.693504552248947, "grad_norm": 1.8853263854980469, "learning_rate": 0.0007883883679847806, "loss": 3.8526, "step": 24925 }, { "epoch": 1.6938442723196085, "grad_norm": 2.2034666538238525, "learning_rate": 0.0007883459029759479, "loss": 3.1976, "step": 24930 }, { "epoch": 1.6941839923902704, "grad_norm": 1.53842294216156, "learning_rate": 0.0007883034379671151, "loss": 3.7219, "step": 24935 }, { "epoch": 1.6945237124609323, "grad_norm": 2.121513605117798, "learning_rate": 0.0007882609729582823, "loss": 3.4709, "step": 24940 }, { "epoch": 1.6948634325315939, "grad_norm": 1.781772494316101, "learning_rate": 0.0007882185079494497, "loss": 3.4556, "step": 24945 }, { "epoch": 1.6952031526022557, "grad_norm": 2.4430787563323975, "learning_rate": 0.0007881760429406169, "loss": 3.6194, "step": 24950 }, { "epoch": 1.6955428726729176, "grad_norm": 1.6281354427337646, "learning_rate": 0.0007881335779317842, "loss": 3.6101, "step": 24955 }, { "epoch": 1.6958825927435792, "grad_norm": 1.7196732759475708, "learning_rate": 0.0007880911129229516, "loss": 3.367, "step": 24960 }, { "epoch": 1.696222312814241, "grad_norm": 1.7428375482559204, "learning_rate": 0.0007880486479141188, "loss": 3.4754, "step": 24965 }, { "epoch": 1.696562032884903, "grad_norm": 1.602816104888916, "learning_rate": 0.000788006182905286, "loss": 3.6857, "step": 24970 }, { "epoch": 1.6969017529555646, "grad_norm": 1.546082615852356, "learning_rate": 0.0007879637178964534, "loss": 3.5368, "step": 24975 }, { "epoch": 1.6972414730262264, "grad_norm": 1.889907717704773, "learning_rate": 0.0007879212528876206, "loss": 3.555, "step": 24980 }, { "epoch": 1.6975811930968883, "grad_norm": 2.550780773162842, "learning_rate": 0.0007878787878787878, "loss": 3.3747, "step": 24985 }, { "epoch": 1.69792091316755, "grad_norm": 1.7854183912277222, "learning_rate": 0.0007878363228699553, "loss": 3.3643, "step": 24990 }, { "epoch": 1.6982606332382117, "grad_norm": 1.9522658586502075, "learning_rate": 0.0007877938578611225, "loss": 3.703, "step": 24995 }, { "epoch": 1.6986003533088736, "grad_norm": 1.7797818183898926, "learning_rate": 0.0007877513928522897, "loss": 3.6715, "step": 25000 }, { "epoch": 1.6989400733795352, "grad_norm": 1.7552387714385986, "learning_rate": 0.000787708927843457, "loss": 3.5119, "step": 25005 }, { "epoch": 1.699279793450197, "grad_norm": 2.267080068588257, "learning_rate": 0.0007876664628346243, "loss": 3.6922, "step": 25010 }, { "epoch": 1.699619513520859, "grad_norm": 2.1506741046905518, "learning_rate": 0.0007876239978257915, "loss": 3.2893, "step": 25015 }, { "epoch": 1.6999592335915206, "grad_norm": 2.13320255279541, "learning_rate": 0.0007875815328169588, "loss": 3.686, "step": 25020 }, { "epoch": 1.7002989536621822, "grad_norm": 2.312440872192383, "learning_rate": 0.0007875390678081262, "loss": 3.5862, "step": 25025 }, { "epoch": 1.7006386737328443, "grad_norm": 1.7708652019500732, "learning_rate": 0.0007874966027992934, "loss": 3.4377, "step": 25030 }, { "epoch": 1.700978393803506, "grad_norm": 1.934239149093628, "learning_rate": 0.0007874541377904607, "loss": 3.2123, "step": 25035 }, { "epoch": 1.7013181138741675, "grad_norm": 2.106175422668457, "learning_rate": 0.000787411672781628, "loss": 3.6287, "step": 25040 }, { "epoch": 1.7016578339448296, "grad_norm": 1.9681979417800903, "learning_rate": 0.0007873692077727952, "loss": 3.4692, "step": 25045 }, { "epoch": 1.7019975540154912, "grad_norm": 2.1718332767486572, "learning_rate": 0.0007873267427639625, "loss": 3.5528, "step": 25050 }, { "epoch": 1.7023372740861529, "grad_norm": 2.265899658203125, "learning_rate": 0.0007872842777551297, "loss": 3.5393, "step": 25055 }, { "epoch": 1.702676994156815, "grad_norm": 1.8842217922210693, "learning_rate": 0.0007872418127462971, "loss": 3.3985, "step": 25060 }, { "epoch": 1.7030167142274766, "grad_norm": 1.5203450918197632, "learning_rate": 0.0007871993477374644, "loss": 3.2536, "step": 25065 }, { "epoch": 1.7033564342981382, "grad_norm": 1.830514669418335, "learning_rate": 0.0007871568827286316, "loss": 3.6342, "step": 25070 }, { "epoch": 1.7036961543688, "grad_norm": 1.6691879034042358, "learning_rate": 0.0007871144177197989, "loss": 3.3159, "step": 25075 }, { "epoch": 1.704035874439462, "grad_norm": 1.6761834621429443, "learning_rate": 0.0007870719527109662, "loss": 3.5346, "step": 25080 }, { "epoch": 1.7043755945101235, "grad_norm": 1.9526817798614502, "learning_rate": 0.0007870294877021334, "loss": 3.6643, "step": 25085 }, { "epoch": 1.7047153145807854, "grad_norm": 1.6223347187042236, "learning_rate": 0.0007869870226933006, "loss": 3.5721, "step": 25090 }, { "epoch": 1.7050550346514473, "grad_norm": 1.757399320602417, "learning_rate": 0.0007869445576844681, "loss": 3.6693, "step": 25095 }, { "epoch": 1.7053947547221089, "grad_norm": 1.6339486837387085, "learning_rate": 0.0007869020926756353, "loss": 3.5968, "step": 25100 }, { "epoch": 1.7057344747927707, "grad_norm": 1.9465932846069336, "learning_rate": 0.0007868596276668025, "loss": 3.5912, "step": 25105 }, { "epoch": 1.7060741948634326, "grad_norm": 1.3730475902557373, "learning_rate": 0.0007868171626579699, "loss": 3.4635, "step": 25110 }, { "epoch": 1.7064139149340942, "grad_norm": 1.6975542306900024, "learning_rate": 0.0007867746976491371, "loss": 3.4756, "step": 25115 }, { "epoch": 1.706753635004756, "grad_norm": 1.9520105123519897, "learning_rate": 0.0007867322326403043, "loss": 3.4539, "step": 25120 }, { "epoch": 1.707093355075418, "grad_norm": 2.3367114067077637, "learning_rate": 0.0007866897676314717, "loss": 3.5297, "step": 25125 }, { "epoch": 1.7074330751460796, "grad_norm": 1.7353836297988892, "learning_rate": 0.000786647302622639, "loss": 3.8593, "step": 25130 }, { "epoch": 1.7077727952167414, "grad_norm": 1.7009551525115967, "learning_rate": 0.0007866048376138062, "loss": 3.3832, "step": 25135 }, { "epoch": 1.7081125152874033, "grad_norm": 1.7684279680252075, "learning_rate": 0.0007865623726049735, "loss": 3.8142, "step": 25140 }, { "epoch": 1.708452235358065, "grad_norm": 3.0157828330993652, "learning_rate": 0.0007865199075961408, "loss": 3.4993, "step": 25145 }, { "epoch": 1.7087919554287267, "grad_norm": 1.965090274810791, "learning_rate": 0.000786477442587308, "loss": 3.9242, "step": 25150 }, { "epoch": 1.7091316754993886, "grad_norm": 1.8513412475585938, "learning_rate": 0.0007864349775784753, "loss": 3.6516, "step": 25155 }, { "epoch": 1.7094713955700502, "grad_norm": 2.263183355331421, "learning_rate": 0.0007863925125696426, "loss": 3.5351, "step": 25160 }, { "epoch": 1.709811115640712, "grad_norm": 2.0800936222076416, "learning_rate": 0.0007863500475608099, "loss": 3.4416, "step": 25165 }, { "epoch": 1.710150835711374, "grad_norm": 2.0024445056915283, "learning_rate": 0.0007863075825519772, "loss": 3.6425, "step": 25170 }, { "epoch": 1.7104905557820356, "grad_norm": 2.0068674087524414, "learning_rate": 0.0007862651175431445, "loss": 3.7091, "step": 25175 }, { "epoch": 1.7108302758526974, "grad_norm": 1.9899266958236694, "learning_rate": 0.0007862226525343117, "loss": 3.6073, "step": 25180 }, { "epoch": 1.7111699959233593, "grad_norm": 1.7915674448013306, "learning_rate": 0.000786180187525479, "loss": 3.681, "step": 25185 }, { "epoch": 1.711509715994021, "grad_norm": 2.9249379634857178, "learning_rate": 0.0007861377225166462, "loss": 3.6605, "step": 25190 }, { "epoch": 1.7118494360646825, "grad_norm": 2.3313612937927246, "learning_rate": 0.0007860952575078137, "loss": 3.7304, "step": 25195 }, { "epoch": 1.7121891561353446, "grad_norm": 1.790425181388855, "learning_rate": 0.0007860527924989809, "loss": 3.3996, "step": 25200 }, { "epoch": 1.7125288762060062, "grad_norm": 1.940064787864685, "learning_rate": 0.0007860103274901481, "loss": 3.4104, "step": 25205 }, { "epoch": 1.7128685962766679, "grad_norm": 1.9557806253433228, "learning_rate": 0.0007859678624813155, "loss": 3.5321, "step": 25210 }, { "epoch": 1.71320831634733, "grad_norm": 1.9356191158294678, "learning_rate": 0.0007859253974724827, "loss": 3.4822, "step": 25215 }, { "epoch": 1.7135480364179916, "grad_norm": 1.8120182752609253, "learning_rate": 0.0007858829324636499, "loss": 3.6163, "step": 25220 }, { "epoch": 1.7138877564886532, "grad_norm": 1.8195436000823975, "learning_rate": 0.0007858404674548173, "loss": 3.5747, "step": 25225 }, { "epoch": 1.7142274765593153, "grad_norm": 1.9227581024169922, "learning_rate": 0.0007857980024459846, "loss": 3.3513, "step": 25230 }, { "epoch": 1.714567196629977, "grad_norm": 2.1254990100860596, "learning_rate": 0.0007857555374371518, "loss": 3.4836, "step": 25235 }, { "epoch": 1.7149069167006386, "grad_norm": 2.134773015975952, "learning_rate": 0.0007857130724283192, "loss": 4.0254, "step": 25240 }, { "epoch": 1.7152466367713004, "grad_norm": 1.4946385622024536, "learning_rate": 0.0007856706074194864, "loss": 3.4013, "step": 25245 }, { "epoch": 1.7155863568419623, "grad_norm": 1.7153475284576416, "learning_rate": 0.0007856281424106536, "loss": 3.6083, "step": 25250 }, { "epoch": 1.7159260769126239, "grad_norm": 1.75645112991333, "learning_rate": 0.0007855856774018209, "loss": 3.4467, "step": 25255 }, { "epoch": 1.7162657969832857, "grad_norm": 1.9460816383361816, "learning_rate": 0.0007855432123929882, "loss": 3.4665, "step": 25260 }, { "epoch": 1.7166055170539476, "grad_norm": 1.9662479162216187, "learning_rate": 0.0007855007473841555, "loss": 3.7491, "step": 25265 }, { "epoch": 1.7169452371246092, "grad_norm": 1.618243932723999, "learning_rate": 0.0007854582823753228, "loss": 3.5767, "step": 25270 }, { "epoch": 1.717284957195271, "grad_norm": 1.591151475906372, "learning_rate": 0.0007854158173664901, "loss": 3.4218, "step": 25275 }, { "epoch": 1.717624677265933, "grad_norm": 2.2773561477661133, "learning_rate": 0.0007853733523576573, "loss": 3.6101, "step": 25280 }, { "epoch": 1.7179643973365946, "grad_norm": 1.7655794620513916, "learning_rate": 0.0007853308873488246, "loss": 3.6844, "step": 25285 }, { "epoch": 1.7183041174072564, "grad_norm": 1.7378642559051514, "learning_rate": 0.0007852884223399918, "loss": 3.6233, "step": 25290 }, { "epoch": 1.7186438374779183, "grad_norm": 2.4587812423706055, "learning_rate": 0.0007852459573311591, "loss": 3.3049, "step": 25295 }, { "epoch": 1.71898355754858, "grad_norm": 1.6623395681381226, "learning_rate": 0.0007852034923223265, "loss": 3.6057, "step": 25300 }, { "epoch": 1.7193232776192418, "grad_norm": 1.9597523212432861, "learning_rate": 0.0007851610273134937, "loss": 3.3708, "step": 25305 }, { "epoch": 1.7196629976899036, "grad_norm": 2.3194360733032227, "learning_rate": 0.000785118562304661, "loss": 3.6952, "step": 25310 }, { "epoch": 1.7200027177605652, "grad_norm": 2.1995649337768555, "learning_rate": 0.0007850760972958283, "loss": 3.5802, "step": 25315 }, { "epoch": 1.720342437831227, "grad_norm": 1.8823150396347046, "learning_rate": 0.0007850336322869955, "loss": 3.5621, "step": 25320 }, { "epoch": 1.720682157901889, "grad_norm": 2.0211594104766846, "learning_rate": 0.0007849911672781627, "loss": 3.4994, "step": 25325 }, { "epoch": 1.7210218779725506, "grad_norm": 1.676804542541504, "learning_rate": 0.0007849487022693301, "loss": 3.6075, "step": 25330 }, { "epoch": 1.7213615980432124, "grad_norm": 2.368696451187134, "learning_rate": 0.0007849062372604974, "loss": 3.3961, "step": 25335 }, { "epoch": 1.7217013181138743, "grad_norm": 1.5973131656646729, "learning_rate": 0.0007848637722516646, "loss": 3.5565, "step": 25340 }, { "epoch": 1.722041038184536, "grad_norm": 1.737319827079773, "learning_rate": 0.000784821307242832, "loss": 3.3788, "step": 25345 }, { "epoch": 1.7223807582551978, "grad_norm": 1.8910108804702759, "learning_rate": 0.0007847788422339992, "loss": 3.3553, "step": 25350 }, { "epoch": 1.7227204783258596, "grad_norm": 2.235173225402832, "learning_rate": 0.0007847363772251664, "loss": 3.4724, "step": 25355 }, { "epoch": 1.7230601983965212, "grad_norm": 1.7656561136245728, "learning_rate": 0.0007846939122163338, "loss": 3.3418, "step": 25360 }, { "epoch": 1.7233999184671829, "grad_norm": 1.9429280757904053, "learning_rate": 0.000784651447207501, "loss": 3.4002, "step": 25365 }, { "epoch": 1.723739638537845, "grad_norm": 1.9624143838882446, "learning_rate": 0.0007846089821986683, "loss": 3.7743, "step": 25370 }, { "epoch": 1.7240793586085066, "grad_norm": 1.8956007957458496, "learning_rate": 0.0007845665171898357, "loss": 3.4756, "step": 25375 }, { "epoch": 1.7244190786791682, "grad_norm": 2.188648223876953, "learning_rate": 0.0007845240521810029, "loss": 3.466, "step": 25380 }, { "epoch": 1.7247587987498303, "grad_norm": 2.0461673736572266, "learning_rate": 0.0007844815871721701, "loss": 3.6426, "step": 25385 }, { "epoch": 1.725098518820492, "grad_norm": 1.6510043144226074, "learning_rate": 0.0007844391221633374, "loss": 3.6918, "step": 25390 }, { "epoch": 1.7254382388911536, "grad_norm": 2.3761658668518066, "learning_rate": 0.0007843966571545047, "loss": 3.5146, "step": 25395 }, { "epoch": 1.7257779589618156, "grad_norm": 1.9541124105453491, "learning_rate": 0.0007843541921456719, "loss": 3.4509, "step": 25400 }, { "epoch": 1.7261176790324773, "grad_norm": 1.8634281158447266, "learning_rate": 0.0007843117271368393, "loss": 3.4961, "step": 25405 }, { "epoch": 1.726457399103139, "grad_norm": 1.8503111600875854, "learning_rate": 0.0007842692621280066, "loss": 3.6271, "step": 25410 }, { "epoch": 1.7267971191738007, "grad_norm": 1.9612778425216675, "learning_rate": 0.0007842267971191738, "loss": 3.6481, "step": 25415 }, { "epoch": 1.7271368392444626, "grad_norm": 1.793723464012146, "learning_rate": 0.0007841843321103411, "loss": 3.7623, "step": 25420 }, { "epoch": 1.7274765593151242, "grad_norm": 1.931287169456482, "learning_rate": 0.0007841418671015084, "loss": 3.485, "step": 25425 }, { "epoch": 1.727816279385786, "grad_norm": 2.0357425212860107, "learning_rate": 0.0007840994020926756, "loss": 3.4829, "step": 25430 }, { "epoch": 1.728155999456448, "grad_norm": 2.4043264389038086, "learning_rate": 0.0007840569370838429, "loss": 3.5639, "step": 25435 }, { "epoch": 1.7284957195271096, "grad_norm": 1.6736863851547241, "learning_rate": 0.0007840144720750102, "loss": 3.6272, "step": 25440 }, { "epoch": 1.7288354395977714, "grad_norm": 1.8700617551803589, "learning_rate": 0.0007839720070661775, "loss": 3.4511, "step": 25445 }, { "epoch": 1.7291751596684333, "grad_norm": 2.2608344554901123, "learning_rate": 0.0007839295420573448, "loss": 3.5491, "step": 25450 }, { "epoch": 1.729514879739095, "grad_norm": 2.2654941082000732, "learning_rate": 0.000783887077048512, "loss": 3.4948, "step": 25455 }, { "epoch": 1.7298545998097568, "grad_norm": 1.8225078582763672, "learning_rate": 0.0007838446120396793, "loss": 3.7395, "step": 25460 }, { "epoch": 1.7301943198804186, "grad_norm": 1.851405143737793, "learning_rate": 0.0007838021470308466, "loss": 3.6313, "step": 25465 }, { "epoch": 1.7305340399510802, "grad_norm": 2.1945688724517822, "learning_rate": 0.0007837596820220138, "loss": 3.4238, "step": 25470 }, { "epoch": 1.730873760021742, "grad_norm": 1.8736850023269653, "learning_rate": 0.0007837172170131812, "loss": 3.6004, "step": 25475 }, { "epoch": 1.731213480092404, "grad_norm": 2.167818307876587, "learning_rate": 0.0007836747520043485, "loss": 3.2767, "step": 25480 }, { "epoch": 1.7315532001630656, "grad_norm": 2.914031982421875, "learning_rate": 0.0007836322869955157, "loss": 3.5537, "step": 25485 }, { "epoch": 1.7318929202337274, "grad_norm": 2.1989898681640625, "learning_rate": 0.0007835898219866829, "loss": 3.5494, "step": 25490 }, { "epoch": 1.7322326403043893, "grad_norm": 2.1393380165100098, "learning_rate": 0.0007835473569778503, "loss": 3.3937, "step": 25495 }, { "epoch": 1.732572360375051, "grad_norm": 3.1115920543670654, "learning_rate": 0.0007835048919690175, "loss": 3.6222, "step": 25500 }, { "epoch": 1.7329120804457128, "grad_norm": 2.2429091930389404, "learning_rate": 0.0007834624269601847, "loss": 3.5757, "step": 25505 }, { "epoch": 1.7332518005163746, "grad_norm": 2.008220911026001, "learning_rate": 0.0007834199619513522, "loss": 3.7805, "step": 25510 }, { "epoch": 1.7335915205870362, "grad_norm": 1.928051471710205, "learning_rate": 0.0007833774969425194, "loss": 3.3877, "step": 25515 }, { "epoch": 1.733931240657698, "grad_norm": 1.7471927404403687, "learning_rate": 0.0007833350319336866, "loss": 3.5298, "step": 25520 }, { "epoch": 1.73427096072836, "grad_norm": 2.1367218494415283, "learning_rate": 0.000783292566924854, "loss": 3.2129, "step": 25525 }, { "epoch": 1.7346106807990216, "grad_norm": 1.966066598892212, "learning_rate": 0.0007832501019160212, "loss": 3.2567, "step": 25530 }, { "epoch": 1.7349504008696832, "grad_norm": 1.6953784227371216, "learning_rate": 0.0007832076369071885, "loss": 3.4367, "step": 25535 }, { "epoch": 1.7352901209403453, "grad_norm": 1.796836495399475, "learning_rate": 0.0007831651718983557, "loss": 3.4661, "step": 25540 }, { "epoch": 1.735629841011007, "grad_norm": 2.274252414703369, "learning_rate": 0.0007831227068895231, "loss": 3.532, "step": 25545 }, { "epoch": 1.7359695610816686, "grad_norm": 2.299431085586548, "learning_rate": 0.0007830802418806904, "loss": 3.6383, "step": 25550 }, { "epoch": 1.7363092811523306, "grad_norm": 1.8431810140609741, "learning_rate": 0.0007830377768718576, "loss": 3.6841, "step": 25555 }, { "epoch": 1.7366490012229923, "grad_norm": 1.6388750076293945, "learning_rate": 0.0007829953118630249, "loss": 3.4649, "step": 25560 }, { "epoch": 1.736988721293654, "grad_norm": 2.432389736175537, "learning_rate": 0.0007829528468541922, "loss": 3.4392, "step": 25565 }, { "epoch": 1.737328441364316, "grad_norm": 2.1356630325317383, "learning_rate": 0.0007829103818453594, "loss": 3.2056, "step": 25570 }, { "epoch": 1.7376681614349776, "grad_norm": 1.9192836284637451, "learning_rate": 0.0007828679168365266, "loss": 3.4762, "step": 25575 }, { "epoch": 1.7380078815056392, "grad_norm": 2.1273932456970215, "learning_rate": 0.0007828254518276941, "loss": 3.635, "step": 25580 }, { "epoch": 1.738347601576301, "grad_norm": 1.868888258934021, "learning_rate": 0.0007827829868188613, "loss": 3.4496, "step": 25585 }, { "epoch": 1.738687321646963, "grad_norm": 1.9353479146957397, "learning_rate": 0.0007827405218100285, "loss": 3.5143, "step": 25590 }, { "epoch": 1.7390270417176246, "grad_norm": 2.1653013229370117, "learning_rate": 0.0007826980568011959, "loss": 3.6641, "step": 25595 }, { "epoch": 1.7393667617882864, "grad_norm": 2.212287425994873, "learning_rate": 0.0007826555917923631, "loss": 3.7958, "step": 25600 }, { "epoch": 1.7397064818589483, "grad_norm": 1.992061972618103, "learning_rate": 0.0007826131267835303, "loss": 3.5322, "step": 25605 }, { "epoch": 1.74004620192961, "grad_norm": 1.903251051902771, "learning_rate": 0.0007825706617746977, "loss": 3.5498, "step": 25610 }, { "epoch": 1.7403859220002718, "grad_norm": 1.9877099990844727, "learning_rate": 0.000782528196765865, "loss": 3.7965, "step": 25615 }, { "epoch": 1.7407256420709336, "grad_norm": 2.3960161209106445, "learning_rate": 0.0007824857317570322, "loss": 3.4532, "step": 25620 }, { "epoch": 1.7410653621415952, "grad_norm": 2.0241284370422363, "learning_rate": 0.0007824432667481996, "loss": 3.5276, "step": 25625 }, { "epoch": 1.741405082212257, "grad_norm": 2.133754014968872, "learning_rate": 0.0007824008017393668, "loss": 3.5771, "step": 25630 }, { "epoch": 1.741744802282919, "grad_norm": 2.113647699356079, "learning_rate": 0.000782358336730534, "loss": 3.5775, "step": 25635 }, { "epoch": 1.7420845223535806, "grad_norm": 1.6571323871612549, "learning_rate": 0.0007823158717217013, "loss": 3.3498, "step": 25640 }, { "epoch": 1.7424242424242424, "grad_norm": 2.231170892715454, "learning_rate": 0.0007822734067128686, "loss": 3.4531, "step": 25645 }, { "epoch": 1.7427639624949043, "grad_norm": 2.2761807441711426, "learning_rate": 0.0007822309417040359, "loss": 3.3804, "step": 25650 }, { "epoch": 1.743103682565566, "grad_norm": 1.63253653049469, "learning_rate": 0.0007821884766952032, "loss": 3.5354, "step": 25655 }, { "epoch": 1.7434434026362278, "grad_norm": 1.8877935409545898, "learning_rate": 0.0007821460116863705, "loss": 3.3539, "step": 25660 }, { "epoch": 1.7437831227068896, "grad_norm": 2.2918519973754883, "learning_rate": 0.0007821035466775377, "loss": 3.491, "step": 25665 }, { "epoch": 1.7441228427775513, "grad_norm": 1.714491844177246, "learning_rate": 0.000782061081668705, "loss": 3.8387, "step": 25670 }, { "epoch": 1.744462562848213, "grad_norm": 1.8996104001998901, "learning_rate": 0.0007820186166598722, "loss": 3.3833, "step": 25675 }, { "epoch": 1.744802282918875, "grad_norm": 1.6972473859786987, "learning_rate": 0.0007819761516510395, "loss": 3.6234, "step": 25680 }, { "epoch": 1.7451420029895366, "grad_norm": 2.3331010341644287, "learning_rate": 0.0007819336866422069, "loss": 3.4606, "step": 25685 }, { "epoch": 1.7454817230601984, "grad_norm": 2.0116379261016846, "learning_rate": 0.0007818912216333741, "loss": 3.654, "step": 25690 }, { "epoch": 1.7458214431308603, "grad_norm": 1.7095600366592407, "learning_rate": 0.0007818487566245414, "loss": 3.6668, "step": 25695 }, { "epoch": 1.746161163201522, "grad_norm": 1.6772890090942383, "learning_rate": 0.0007818062916157087, "loss": 3.4141, "step": 25700 }, { "epoch": 1.7465008832721836, "grad_norm": 2.1473822593688965, "learning_rate": 0.0007817638266068759, "loss": 3.7294, "step": 25705 }, { "epoch": 1.7468406033428456, "grad_norm": 2.1958463191986084, "learning_rate": 0.0007817213615980432, "loss": 3.4553, "step": 25710 }, { "epoch": 1.7471803234135073, "grad_norm": 2.4068734645843506, "learning_rate": 0.0007816788965892106, "loss": 3.4905, "step": 25715 }, { "epoch": 1.747520043484169, "grad_norm": 1.959209680557251, "learning_rate": 0.0007816364315803778, "loss": 3.5467, "step": 25720 }, { "epoch": 1.747859763554831, "grad_norm": 1.9582140445709229, "learning_rate": 0.000781593966571545, "loss": 3.4382, "step": 25725 }, { "epoch": 1.7481994836254926, "grad_norm": 1.621286153793335, "learning_rate": 0.0007815515015627124, "loss": 3.5212, "step": 25730 }, { "epoch": 1.7485392036961542, "grad_norm": 1.8204723596572876, "learning_rate": 0.0007815090365538796, "loss": 3.2364, "step": 25735 }, { "epoch": 1.7488789237668163, "grad_norm": 1.9599645137786865, "learning_rate": 0.0007814665715450468, "loss": 3.6029, "step": 25740 }, { "epoch": 1.749218643837478, "grad_norm": 1.5326048135757446, "learning_rate": 0.0007814241065362142, "loss": 3.2478, "step": 25745 }, { "epoch": 1.7495583639081396, "grad_norm": 1.9724456071853638, "learning_rate": 0.0007813816415273815, "loss": 3.6717, "step": 25750 }, { "epoch": 1.7498980839788014, "grad_norm": 2.90053391456604, "learning_rate": 0.0007813391765185487, "loss": 3.5302, "step": 25755 }, { "epoch": 1.7502378040494633, "grad_norm": 2.3355486392974854, "learning_rate": 0.0007812967115097161, "loss": 3.558, "step": 25760 }, { "epoch": 1.750577524120125, "grad_norm": 1.882770299911499, "learning_rate": 0.0007812542465008833, "loss": 3.4978, "step": 25765 }, { "epoch": 1.7509172441907868, "grad_norm": 1.4346344470977783, "learning_rate": 0.0007812117814920505, "loss": 3.2968, "step": 25770 }, { "epoch": 1.7512569642614486, "grad_norm": 1.8124969005584717, "learning_rate": 0.0007811693164832178, "loss": 3.6681, "step": 25775 }, { "epoch": 1.7515966843321102, "grad_norm": 1.832862377166748, "learning_rate": 0.0007811268514743851, "loss": 3.5579, "step": 25780 }, { "epoch": 1.751936404402772, "grad_norm": 1.9102516174316406, "learning_rate": 0.0007810843864655524, "loss": 3.3461, "step": 25785 }, { "epoch": 1.752276124473434, "grad_norm": 1.977845311164856, "learning_rate": 0.0007810419214567197, "loss": 3.7906, "step": 25790 }, { "epoch": 1.7526158445440956, "grad_norm": 1.5668460130691528, "learning_rate": 0.000780999456447887, "loss": 3.4458, "step": 25795 }, { "epoch": 1.7529555646147574, "grad_norm": 1.8130799531936646, "learning_rate": 0.0007809569914390542, "loss": 3.6091, "step": 25800 }, { "epoch": 1.7532952846854193, "grad_norm": 2.3669228553771973, "learning_rate": 0.0007809145264302215, "loss": 3.5355, "step": 25805 }, { "epoch": 1.753635004756081, "grad_norm": 1.5958282947540283, "learning_rate": 0.0007808720614213888, "loss": 3.7039, "step": 25810 }, { "epoch": 1.7539747248267428, "grad_norm": 2.017608404159546, "learning_rate": 0.000780829596412556, "loss": 3.7826, "step": 25815 }, { "epoch": 1.7543144448974046, "grad_norm": 2.6123108863830566, "learning_rate": 0.0007807871314037234, "loss": 3.8271, "step": 25820 }, { "epoch": 1.7546541649680663, "grad_norm": 2.059497833251953, "learning_rate": 0.0007807446663948906, "loss": 3.4355, "step": 25825 }, { "epoch": 1.754993885038728, "grad_norm": 2.9077625274658203, "learning_rate": 0.0007807022013860579, "loss": 3.7177, "step": 25830 }, { "epoch": 1.75533360510939, "grad_norm": 1.9994817972183228, "learning_rate": 0.0007806597363772252, "loss": 3.6594, "step": 25835 }, { "epoch": 1.7556733251800516, "grad_norm": 1.6748301982879639, "learning_rate": 0.0007806172713683924, "loss": 3.5557, "step": 25840 }, { "epoch": 1.7560130452507134, "grad_norm": 1.9798150062561035, "learning_rate": 0.0007805748063595597, "loss": 3.633, "step": 25845 }, { "epoch": 1.7563527653213753, "grad_norm": 2.341554880142212, "learning_rate": 0.000780532341350727, "loss": 3.6424, "step": 25850 }, { "epoch": 1.756692485392037, "grad_norm": 1.7893519401550293, "learning_rate": 0.0007804898763418943, "loss": 3.52, "step": 25855 }, { "epoch": 1.7570322054626988, "grad_norm": 2.3055810928344727, "learning_rate": 0.0007804474113330616, "loss": 3.5798, "step": 25860 }, { "epoch": 1.7573719255333606, "grad_norm": 2.245218276977539, "learning_rate": 0.0007804049463242289, "loss": 3.5844, "step": 25865 }, { "epoch": 1.7577116456040223, "grad_norm": 1.5873669385910034, "learning_rate": 0.0007803624813153961, "loss": 3.3839, "step": 25870 }, { "epoch": 1.758051365674684, "grad_norm": 2.09385347366333, "learning_rate": 0.0007803200163065634, "loss": 3.2864, "step": 25875 }, { "epoch": 1.758391085745346, "grad_norm": 2.083247661590576, "learning_rate": 0.0007802775512977307, "loss": 3.3248, "step": 25880 }, { "epoch": 1.7587308058160076, "grad_norm": 1.694003701210022, "learning_rate": 0.0007802350862888979, "loss": 3.5136, "step": 25885 }, { "epoch": 1.7590705258866692, "grad_norm": 1.9032186269760132, "learning_rate": 0.0007801926212800653, "loss": 3.3935, "step": 25890 }, { "epoch": 1.7594102459573313, "grad_norm": 1.6765103340148926, "learning_rate": 0.0007801501562712326, "loss": 3.4802, "step": 25895 }, { "epoch": 1.759749966027993, "grad_norm": 1.7590738534927368, "learning_rate": 0.0007801076912623998, "loss": 3.1942, "step": 25900 }, { "epoch": 1.7600896860986546, "grad_norm": 2.2843563556671143, "learning_rate": 0.0007800652262535671, "loss": 3.4824, "step": 25905 }, { "epoch": 1.7604294061693166, "grad_norm": 2.006793737411499, "learning_rate": 0.0007800227612447344, "loss": 3.65, "step": 25910 }, { "epoch": 1.7607691262399783, "grad_norm": 1.646658182144165, "learning_rate": 0.0007799802962359016, "loss": 3.5256, "step": 25915 }, { "epoch": 1.76110884631064, "grad_norm": 1.7792363166809082, "learning_rate": 0.0007799378312270689, "loss": 3.4128, "step": 25920 }, { "epoch": 1.7614485663813018, "grad_norm": 2.0181546211242676, "learning_rate": 0.0007798953662182363, "loss": 3.5076, "step": 25925 }, { "epoch": 1.7617882864519636, "grad_norm": 1.6662319898605347, "learning_rate": 0.0007798529012094035, "loss": 3.5571, "step": 25930 }, { "epoch": 1.7621280065226252, "grad_norm": 2.436514139175415, "learning_rate": 0.0007798104362005708, "loss": 3.6465, "step": 25935 }, { "epoch": 1.762467726593287, "grad_norm": 2.037813663482666, "learning_rate": 0.000779767971191738, "loss": 3.3604, "step": 25940 }, { "epoch": 1.762807446663949, "grad_norm": 1.5696594715118408, "learning_rate": 0.0007797255061829053, "loss": 3.7701, "step": 25945 }, { "epoch": 1.7631471667346106, "grad_norm": 2.2144079208374023, "learning_rate": 0.0007796830411740726, "loss": 3.4619, "step": 25950 }, { "epoch": 1.7634868868052724, "grad_norm": 1.9070181846618652, "learning_rate": 0.0007796405761652398, "loss": 3.6776, "step": 25955 }, { "epoch": 1.7638266068759343, "grad_norm": 1.796687364578247, "learning_rate": 0.0007795981111564072, "loss": 3.711, "step": 25960 }, { "epoch": 1.764166326946596, "grad_norm": 1.8547630310058594, "learning_rate": 0.0007795556461475745, "loss": 3.4748, "step": 25965 }, { "epoch": 1.7645060470172578, "grad_norm": 1.7843934297561646, "learning_rate": 0.0007795131811387417, "loss": 3.8197, "step": 25970 }, { "epoch": 1.7648457670879196, "grad_norm": 1.3248651027679443, "learning_rate": 0.0007794707161299089, "loss": 3.5117, "step": 25975 }, { "epoch": 1.7651854871585813, "grad_norm": 1.4313092231750488, "learning_rate": 0.0007794282511210763, "loss": 3.416, "step": 25980 }, { "epoch": 1.765525207229243, "grad_norm": 2.1390833854675293, "learning_rate": 0.0007793857861122435, "loss": 3.4391, "step": 25985 }, { "epoch": 1.765864927299905, "grad_norm": 1.987778902053833, "learning_rate": 0.0007793433211034107, "loss": 3.5098, "step": 25990 }, { "epoch": 1.7662046473705666, "grad_norm": 1.9447896480560303, "learning_rate": 0.0007793008560945782, "loss": 3.9004, "step": 25995 }, { "epoch": 1.7665443674412284, "grad_norm": 1.6585066318511963, "learning_rate": 0.0007792583910857454, "loss": 3.6729, "step": 26000 }, { "epoch": 1.7668840875118903, "grad_norm": 1.5302056074142456, "learning_rate": 0.0007792159260769126, "loss": 3.5048, "step": 26005 }, { "epoch": 1.767223807582552, "grad_norm": 2.117276906967163, "learning_rate": 0.00077917346106808, "loss": 3.7243, "step": 26010 }, { "epoch": 1.7675635276532138, "grad_norm": 1.614738941192627, "learning_rate": 0.0007791309960592472, "loss": 3.562, "step": 26015 }, { "epoch": 1.7679032477238756, "grad_norm": 2.2171671390533447, "learning_rate": 0.0007790885310504144, "loss": 3.4648, "step": 26020 }, { "epoch": 1.7682429677945373, "grad_norm": 1.777828574180603, "learning_rate": 0.0007790460660415817, "loss": 3.2768, "step": 26025 }, { "epoch": 1.7685826878651991, "grad_norm": 1.7922108173370361, "learning_rate": 0.0007790036010327491, "loss": 3.4386, "step": 26030 }, { "epoch": 1.768922407935861, "grad_norm": 1.7416952848434448, "learning_rate": 0.0007789611360239163, "loss": 3.6017, "step": 26035 }, { "epoch": 1.7692621280065226, "grad_norm": 1.856008529663086, "learning_rate": 0.0007789186710150836, "loss": 3.6002, "step": 26040 }, { "epoch": 1.7696018480771842, "grad_norm": 2.002056121826172, "learning_rate": 0.0007788762060062509, "loss": 3.5503, "step": 26045 }, { "epoch": 1.7699415681478463, "grad_norm": 2.296607494354248, "learning_rate": 0.0007788337409974181, "loss": 3.6925, "step": 26050 }, { "epoch": 1.770281288218508, "grad_norm": 2.4911296367645264, "learning_rate": 0.0007787912759885854, "loss": 3.404, "step": 26055 }, { "epoch": 1.7706210082891696, "grad_norm": 1.8859704732894897, "learning_rate": 0.0007787488109797527, "loss": 3.4539, "step": 26060 }, { "epoch": 1.7709607283598316, "grad_norm": 1.8989832401275635, "learning_rate": 0.00077870634597092, "loss": 3.5642, "step": 26065 }, { "epoch": 1.7713004484304933, "grad_norm": 2.201676845550537, "learning_rate": 0.0007786638809620873, "loss": 3.3602, "step": 26070 }, { "epoch": 1.771640168501155, "grad_norm": 2.14866304397583, "learning_rate": 0.0007786214159532545, "loss": 3.3392, "step": 26075 }, { "epoch": 1.771979888571817, "grad_norm": 2.031550168991089, "learning_rate": 0.0007785789509444218, "loss": 3.6964, "step": 26080 }, { "epoch": 1.7723196086424786, "grad_norm": 1.7708381414413452, "learning_rate": 0.0007785364859355891, "loss": 3.3906, "step": 26085 }, { "epoch": 1.7726593287131402, "grad_norm": 1.7788397073745728, "learning_rate": 0.0007784940209267563, "loss": 3.5912, "step": 26090 }, { "epoch": 1.772999048783802, "grad_norm": 2.3534505367279053, "learning_rate": 0.0007784515559179236, "loss": 3.3309, "step": 26095 }, { "epoch": 1.773338768854464, "grad_norm": 1.9255259037017822, "learning_rate": 0.000778409090909091, "loss": 3.4061, "step": 26100 }, { "epoch": 1.7736784889251256, "grad_norm": 1.3033028841018677, "learning_rate": 0.0007783666259002582, "loss": 3.4967, "step": 26105 }, { "epoch": 1.7740182089957874, "grad_norm": 2.081023931503296, "learning_rate": 0.0007783241608914255, "loss": 3.1995, "step": 26110 }, { "epoch": 1.7743579290664493, "grad_norm": 1.933728814125061, "learning_rate": 0.0007782816958825928, "loss": 3.5534, "step": 26115 }, { "epoch": 1.774697649137111, "grad_norm": 2.305975914001465, "learning_rate": 0.00077823923087376, "loss": 3.6738, "step": 26120 }, { "epoch": 1.7750373692077728, "grad_norm": 1.8201874494552612, "learning_rate": 0.0007781967658649272, "loss": 3.5642, "step": 26125 }, { "epoch": 1.7753770892784346, "grad_norm": 1.7766954898834229, "learning_rate": 0.0007781543008560946, "loss": 3.4999, "step": 26130 }, { "epoch": 1.7757168093490963, "grad_norm": 2.0850508213043213, "learning_rate": 0.0007781118358472619, "loss": 3.5128, "step": 26135 }, { "epoch": 1.7760565294197581, "grad_norm": 1.6362967491149902, "learning_rate": 0.0007780693708384291, "loss": 3.4987, "step": 26140 }, { "epoch": 1.77639624949042, "grad_norm": 2.1081793308258057, "learning_rate": 0.0007780269058295965, "loss": 3.4759, "step": 26145 }, { "epoch": 1.7767359695610816, "grad_norm": 2.033809185028076, "learning_rate": 0.0007779844408207637, "loss": 3.3623, "step": 26150 }, { "epoch": 1.7770756896317434, "grad_norm": 1.9711390733718872, "learning_rate": 0.0007779419758119309, "loss": 3.2985, "step": 26155 }, { "epoch": 1.7774154097024053, "grad_norm": 1.7803947925567627, "learning_rate": 0.0007778995108030983, "loss": 3.4882, "step": 26160 }, { "epoch": 1.777755129773067, "grad_norm": 2.1239137649536133, "learning_rate": 0.0007778570457942655, "loss": 3.6458, "step": 26165 }, { "epoch": 1.7780948498437288, "grad_norm": 2.366281270980835, "learning_rate": 0.0007778145807854328, "loss": 3.5326, "step": 26170 }, { "epoch": 1.7784345699143906, "grad_norm": 2.0073184967041016, "learning_rate": 0.0007777721157766001, "loss": 3.3365, "step": 26175 }, { "epoch": 1.7787742899850523, "grad_norm": 1.8074142932891846, "learning_rate": 0.0007777296507677674, "loss": 3.7443, "step": 26180 }, { "epoch": 1.7791140100557141, "grad_norm": 1.6049145460128784, "learning_rate": 0.0007776871857589346, "loss": 3.5255, "step": 26185 }, { "epoch": 1.779453730126376, "grad_norm": 2.109724521636963, "learning_rate": 0.0007776447207501019, "loss": 3.4039, "step": 26190 }, { "epoch": 1.7797934501970376, "grad_norm": 2.2791078090667725, "learning_rate": 0.0007776022557412692, "loss": 3.2783, "step": 26195 }, { "epoch": 1.7801331702676995, "grad_norm": 2.0947980880737305, "learning_rate": 0.0007775597907324364, "loss": 3.3481, "step": 26200 }, { "epoch": 1.7804728903383613, "grad_norm": 1.9203946590423584, "learning_rate": 0.0007775173257236038, "loss": 3.4335, "step": 26205 }, { "epoch": 1.780812610409023, "grad_norm": 2.2875802516937256, "learning_rate": 0.000777474860714771, "loss": 3.4889, "step": 26210 }, { "epoch": 1.7811523304796846, "grad_norm": 2.071453332901001, "learning_rate": 0.0007774323957059384, "loss": 3.6901, "step": 26215 }, { "epoch": 1.7814920505503467, "grad_norm": 1.9195566177368164, "learning_rate": 0.0007773899306971056, "loss": 3.5477, "step": 26220 }, { "epoch": 1.7818317706210083, "grad_norm": 1.3706783056259155, "learning_rate": 0.0007773474656882728, "loss": 3.6231, "step": 26225 }, { "epoch": 1.78217149069167, "grad_norm": 1.715039610862732, "learning_rate": 0.0007773050006794402, "loss": 3.6284, "step": 26230 }, { "epoch": 1.782511210762332, "grad_norm": 1.4535348415374756, "learning_rate": 0.0007772625356706074, "loss": 3.5467, "step": 26235 }, { "epoch": 1.7828509308329936, "grad_norm": 1.818393349647522, "learning_rate": 0.0007772200706617747, "loss": 3.5232, "step": 26240 }, { "epoch": 1.7831906509036552, "grad_norm": 1.7817379236221313, "learning_rate": 0.0007771776056529421, "loss": 3.599, "step": 26245 }, { "epoch": 1.7835303709743173, "grad_norm": 2.17407488822937, "learning_rate": 0.0007771351406441093, "loss": 3.7612, "step": 26250 }, { "epoch": 1.783870091044979, "grad_norm": 1.3803008794784546, "learning_rate": 0.0007770926756352765, "loss": 3.4077, "step": 26255 }, { "epoch": 1.7842098111156406, "grad_norm": 2.3012895584106445, "learning_rate": 0.0007770502106264439, "loss": 3.4445, "step": 26260 }, { "epoch": 1.7845495311863024, "grad_norm": 1.8446768522262573, "learning_rate": 0.0007770077456176111, "loss": 3.4721, "step": 26265 }, { "epoch": 1.7848892512569643, "grad_norm": 1.6296966075897217, "learning_rate": 0.0007769652806087783, "loss": 3.5265, "step": 26270 }, { "epoch": 1.785228971327626, "grad_norm": 2.5465967655181885, "learning_rate": 0.0007769228155999457, "loss": 3.5778, "step": 26275 }, { "epoch": 1.7855686913982878, "grad_norm": 1.5342360734939575, "learning_rate": 0.000776880350591113, "loss": 3.6076, "step": 26280 }, { "epoch": 1.7859084114689496, "grad_norm": 2.004396438598633, "learning_rate": 0.0007768378855822802, "loss": 3.6085, "step": 26285 }, { "epoch": 1.7862481315396113, "grad_norm": 1.8106803894042969, "learning_rate": 0.0007767954205734475, "loss": 3.5708, "step": 26290 }, { "epoch": 1.7865878516102731, "grad_norm": 2.05560302734375, "learning_rate": 0.0007767529555646148, "loss": 3.5788, "step": 26295 }, { "epoch": 1.786927571680935, "grad_norm": 1.5746197700500488, "learning_rate": 0.000776710490555782, "loss": 3.3965, "step": 26300 }, { "epoch": 1.7872672917515966, "grad_norm": 1.6396528482437134, "learning_rate": 0.0007766680255469494, "loss": 3.4849, "step": 26305 }, { "epoch": 1.7876070118222585, "grad_norm": 2.1277952194213867, "learning_rate": 0.0007766255605381167, "loss": 3.5041, "step": 26310 }, { "epoch": 1.7879467318929203, "grad_norm": 1.8718596696853638, "learning_rate": 0.0007765830955292839, "loss": 3.632, "step": 26315 }, { "epoch": 1.788286451963582, "grad_norm": 1.4451439380645752, "learning_rate": 0.0007765406305204512, "loss": 3.3382, "step": 26320 }, { "epoch": 1.7886261720342438, "grad_norm": 1.8743160963058472, "learning_rate": 0.0007764981655116184, "loss": 3.3823, "step": 26325 }, { "epoch": 1.7889658921049056, "grad_norm": 1.9527188539505005, "learning_rate": 0.0007764557005027857, "loss": 3.5709, "step": 26330 }, { "epoch": 1.7893056121755673, "grad_norm": 2.186070203781128, "learning_rate": 0.000776413235493953, "loss": 3.4077, "step": 26335 }, { "epoch": 1.7896453322462291, "grad_norm": 1.9306191205978394, "learning_rate": 0.0007763707704851203, "loss": 3.6463, "step": 26340 }, { "epoch": 1.789985052316891, "grad_norm": 1.926594853401184, "learning_rate": 0.0007763283054762876, "loss": 3.5451, "step": 26345 }, { "epoch": 1.7903247723875526, "grad_norm": 1.9427152872085571, "learning_rate": 0.0007762858404674549, "loss": 3.868, "step": 26350 }, { "epoch": 1.7906644924582145, "grad_norm": 1.810490608215332, "learning_rate": 0.0007762433754586221, "loss": 3.7064, "step": 26355 }, { "epoch": 1.7910042125288763, "grad_norm": 1.6941251754760742, "learning_rate": 0.0007762009104497893, "loss": 3.5289, "step": 26360 }, { "epoch": 1.791343932599538, "grad_norm": 1.8621190786361694, "learning_rate": 0.0007761584454409567, "loss": 3.5866, "step": 26365 }, { "epoch": 1.7916836526701998, "grad_norm": 1.9297999143600464, "learning_rate": 0.0007761159804321239, "loss": 3.6725, "step": 26370 }, { "epoch": 1.7920233727408617, "grad_norm": 1.7857892513275146, "learning_rate": 0.0007760735154232912, "loss": 3.5505, "step": 26375 }, { "epoch": 1.7923630928115233, "grad_norm": 1.6882864236831665, "learning_rate": 0.0007760310504144586, "loss": 3.7169, "step": 26380 }, { "epoch": 1.792702812882185, "grad_norm": 1.6995550394058228, "learning_rate": 0.0007759885854056258, "loss": 3.6306, "step": 26385 }, { "epoch": 1.793042532952847, "grad_norm": 1.8320118188858032, "learning_rate": 0.000775946120396793, "loss": 3.5314, "step": 26390 }, { "epoch": 1.7933822530235086, "grad_norm": 1.57142174243927, "learning_rate": 0.0007759036553879604, "loss": 3.7086, "step": 26395 }, { "epoch": 1.7937219730941703, "grad_norm": 2.1337037086486816, "learning_rate": 0.0007758611903791276, "loss": 3.5066, "step": 26400 }, { "epoch": 1.7940616931648323, "grad_norm": 1.6611911058425903, "learning_rate": 0.0007758187253702948, "loss": 3.5346, "step": 26405 }, { "epoch": 1.794401413235494, "grad_norm": 1.6819777488708496, "learning_rate": 0.0007757762603614623, "loss": 3.4613, "step": 26410 }, { "epoch": 1.7947411333061556, "grad_norm": 1.3920419216156006, "learning_rate": 0.0007757337953526295, "loss": 3.5279, "step": 26415 }, { "epoch": 1.7950808533768177, "grad_norm": 1.6677374839782715, "learning_rate": 0.0007756913303437967, "loss": 3.3249, "step": 26420 }, { "epoch": 1.7954205734474793, "grad_norm": 2.0486531257629395, "learning_rate": 0.000775648865334964, "loss": 3.3866, "step": 26425 }, { "epoch": 1.795760293518141, "grad_norm": 2.1470608711242676, "learning_rate": 0.0007756064003261313, "loss": 3.5129, "step": 26430 }, { "epoch": 1.7961000135888028, "grad_norm": 2.235893726348877, "learning_rate": 0.0007755639353172985, "loss": 3.3054, "step": 26435 }, { "epoch": 1.7964397336594646, "grad_norm": 2.702421188354492, "learning_rate": 0.0007755214703084658, "loss": 3.7306, "step": 26440 }, { "epoch": 1.7967794537301263, "grad_norm": 1.6117935180664062, "learning_rate": 0.0007754790052996332, "loss": 3.3723, "step": 26445 }, { "epoch": 1.7971191738007881, "grad_norm": 1.9187041521072388, "learning_rate": 0.0007754365402908004, "loss": 3.2744, "step": 26450 }, { "epoch": 1.79745889387145, "grad_norm": 2.251390218734741, "learning_rate": 0.0007753940752819677, "loss": 3.5432, "step": 26455 }, { "epoch": 1.7977986139421116, "grad_norm": 2.1004278659820557, "learning_rate": 0.000775351610273135, "loss": 3.6269, "step": 26460 }, { "epoch": 1.7981383340127735, "grad_norm": 1.7042800188064575, "learning_rate": 0.0007753091452643022, "loss": 3.461, "step": 26465 }, { "epoch": 1.7984780540834353, "grad_norm": 1.9231048822402954, "learning_rate": 0.0007752666802554695, "loss": 3.5466, "step": 26470 }, { "epoch": 1.798817774154097, "grad_norm": 3.208801746368408, "learning_rate": 0.0007752242152466367, "loss": 3.6233, "step": 26475 }, { "epoch": 1.7991574942247588, "grad_norm": 1.7549165487289429, "learning_rate": 0.0007751817502378041, "loss": 3.6438, "step": 26480 }, { "epoch": 1.7994972142954206, "grad_norm": 2.1838574409484863, "learning_rate": 0.0007751392852289714, "loss": 3.4786, "step": 26485 }, { "epoch": 1.7998369343660823, "grad_norm": 2.0714142322540283, "learning_rate": 0.0007750968202201386, "loss": 3.5041, "step": 26490 }, { "epoch": 1.8001766544367441, "grad_norm": 1.6896475553512573, "learning_rate": 0.0007750543552113059, "loss": 3.6129, "step": 26495 }, { "epoch": 1.800516374507406, "grad_norm": 1.8202804327011108, "learning_rate": 0.0007750118902024732, "loss": 3.4947, "step": 26500 }, { "epoch": 1.8008560945780676, "grad_norm": 2.323246955871582, "learning_rate": 0.0007749694251936404, "loss": 3.4235, "step": 26505 }, { "epoch": 1.8011958146487295, "grad_norm": 1.666033387184143, "learning_rate": 0.0007749269601848076, "loss": 3.7983, "step": 26510 }, { "epoch": 1.8015355347193913, "grad_norm": 2.1222338676452637, "learning_rate": 0.0007748844951759751, "loss": 3.6093, "step": 26515 }, { "epoch": 1.801875254790053, "grad_norm": 1.7342725992202759, "learning_rate": 0.0007748420301671423, "loss": 3.236, "step": 26520 }, { "epoch": 1.8022149748607148, "grad_norm": 2.0654959678649902, "learning_rate": 0.0007747995651583095, "loss": 3.7321, "step": 26525 }, { "epoch": 1.8025546949313767, "grad_norm": 1.93532133102417, "learning_rate": 0.0007747571001494769, "loss": 3.3886, "step": 26530 }, { "epoch": 1.8028944150020383, "grad_norm": 2.178227424621582, "learning_rate": 0.0007747146351406441, "loss": 3.7122, "step": 26535 }, { "epoch": 1.8032341350727001, "grad_norm": 2.1261932849884033, "learning_rate": 0.0007746721701318113, "loss": 3.626, "step": 26540 }, { "epoch": 1.803573855143362, "grad_norm": 1.9917123317718506, "learning_rate": 0.0007746297051229787, "loss": 3.56, "step": 26545 }, { "epoch": 1.8039135752140236, "grad_norm": 2.209240436553955, "learning_rate": 0.000774587240114146, "loss": 3.469, "step": 26550 }, { "epoch": 1.8042532952846853, "grad_norm": 2.109031915664673, "learning_rate": 0.0007745447751053133, "loss": 3.7758, "step": 26555 }, { "epoch": 1.8045930153553473, "grad_norm": 2.3730239868164062, "learning_rate": 0.0007745023100964805, "loss": 3.8338, "step": 26560 }, { "epoch": 1.804932735426009, "grad_norm": 1.6057615280151367, "learning_rate": 0.0007744598450876478, "loss": 3.54, "step": 26565 }, { "epoch": 1.8052724554966706, "grad_norm": 1.8959780931472778, "learning_rate": 0.0007744173800788151, "loss": 3.582, "step": 26570 }, { "epoch": 1.8056121755673327, "grad_norm": 2.061457395553589, "learning_rate": 0.0007743749150699823, "loss": 3.3837, "step": 26575 }, { "epoch": 1.8059518956379943, "grad_norm": 1.9488922357559204, "learning_rate": 0.0007743324500611496, "loss": 3.4481, "step": 26580 }, { "epoch": 1.806291615708656, "grad_norm": 1.7485398054122925, "learning_rate": 0.000774289985052317, "loss": 3.4822, "step": 26585 }, { "epoch": 1.806631335779318, "grad_norm": 1.889772891998291, "learning_rate": 0.0007742475200434842, "loss": 3.5219, "step": 26590 }, { "epoch": 1.8069710558499796, "grad_norm": 1.8565921783447266, "learning_rate": 0.0007742050550346515, "loss": 3.6213, "step": 26595 }, { "epoch": 1.8073107759206413, "grad_norm": 2.1520273685455322, "learning_rate": 0.0007741625900258188, "loss": 3.6688, "step": 26600 }, { "epoch": 1.8076504959913031, "grad_norm": 1.8356894254684448, "learning_rate": 0.000774120125016986, "loss": 3.467, "step": 26605 }, { "epoch": 1.807990216061965, "grad_norm": 1.6533726453781128, "learning_rate": 0.0007740776600081532, "loss": 3.5683, "step": 26610 }, { "epoch": 1.8083299361326266, "grad_norm": 1.593624234199524, "learning_rate": 0.0007740351949993206, "loss": 3.4917, "step": 26615 }, { "epoch": 1.8086696562032885, "grad_norm": 1.5948582887649536, "learning_rate": 0.0007739927299904879, "loss": 3.4873, "step": 26620 }, { "epoch": 1.8090093762739503, "grad_norm": 1.8928743600845337, "learning_rate": 0.0007739502649816551, "loss": 3.5114, "step": 26625 }, { "epoch": 1.809349096344612, "grad_norm": 1.555973768234253, "learning_rate": 0.0007739077999728225, "loss": 3.6019, "step": 26630 }, { "epoch": 1.8096888164152738, "grad_norm": 1.5726549625396729, "learning_rate": 0.0007738653349639897, "loss": 3.8041, "step": 26635 }, { "epoch": 1.8100285364859356, "grad_norm": 2.0627849102020264, "learning_rate": 0.0007738228699551569, "loss": 3.5544, "step": 26640 }, { "epoch": 1.8103682565565973, "grad_norm": 1.5492722988128662, "learning_rate": 0.0007737804049463243, "loss": 3.596, "step": 26645 }, { "epoch": 1.8107079766272591, "grad_norm": 1.6264946460723877, "learning_rate": 0.0007737379399374915, "loss": 3.638, "step": 26650 }, { "epoch": 1.811047696697921, "grad_norm": 1.8243403434753418, "learning_rate": 0.0007736954749286588, "loss": 3.6578, "step": 26655 }, { "epoch": 1.8113874167685826, "grad_norm": 1.7621837854385376, "learning_rate": 0.0007736530099198262, "loss": 3.5161, "step": 26660 }, { "epoch": 1.8117271368392445, "grad_norm": 1.5406440496444702, "learning_rate": 0.0007736105449109934, "loss": 3.4607, "step": 26665 }, { "epoch": 1.8120668569099063, "grad_norm": 1.8778982162475586, "learning_rate": 0.0007735680799021606, "loss": 3.485, "step": 26670 }, { "epoch": 1.812406576980568, "grad_norm": 1.9278556108474731, "learning_rate": 0.0007735256148933279, "loss": 3.502, "step": 26675 }, { "epoch": 1.8127462970512298, "grad_norm": 1.7743650674819946, "learning_rate": 0.0007734831498844952, "loss": 3.4334, "step": 26680 }, { "epoch": 1.8130860171218917, "grad_norm": 1.939711093902588, "learning_rate": 0.0007734406848756624, "loss": 3.5874, "step": 26685 }, { "epoch": 1.8134257371925533, "grad_norm": 1.8697450160980225, "learning_rate": 0.0007733982198668298, "loss": 3.458, "step": 26690 }, { "epoch": 1.8137654572632151, "grad_norm": 2.144545078277588, "learning_rate": 0.0007733557548579971, "loss": 3.8412, "step": 26695 }, { "epoch": 1.814105177333877, "grad_norm": 1.558310866355896, "learning_rate": 0.0007733132898491643, "loss": 3.599, "step": 26700 }, { "epoch": 1.8144448974045386, "grad_norm": 1.8286315202713013, "learning_rate": 0.0007732708248403316, "loss": 3.3242, "step": 26705 }, { "epoch": 1.8147846174752005, "grad_norm": 1.577196717262268, "learning_rate": 0.0007732283598314988, "loss": 3.6551, "step": 26710 }, { "epoch": 1.8151243375458623, "grad_norm": 2.415374279022217, "learning_rate": 0.0007731858948226661, "loss": 3.5156, "step": 26715 }, { "epoch": 1.815464057616524, "grad_norm": 2.117063045501709, "learning_rate": 0.0007731434298138334, "loss": 3.5081, "step": 26720 }, { "epoch": 1.8158037776871856, "grad_norm": 1.595170497894287, "learning_rate": 0.0007731009648050007, "loss": 3.4615, "step": 26725 }, { "epoch": 1.8161434977578477, "grad_norm": 2.2694334983825684, "learning_rate": 0.000773058499796168, "loss": 3.7211, "step": 26730 }, { "epoch": 1.8164832178285093, "grad_norm": 1.9274650812149048, "learning_rate": 0.0007730160347873353, "loss": 3.6827, "step": 26735 }, { "epoch": 1.816822937899171, "grad_norm": 1.690150499343872, "learning_rate": 0.0007729735697785025, "loss": 3.2437, "step": 26740 }, { "epoch": 1.817162657969833, "grad_norm": 1.6247020959854126, "learning_rate": 0.0007729311047696697, "loss": 3.6651, "step": 26745 }, { "epoch": 1.8175023780404946, "grad_norm": 2.8355937004089355, "learning_rate": 0.0007728886397608371, "loss": 3.7681, "step": 26750 }, { "epoch": 1.8178420981111563, "grad_norm": 1.4600155353546143, "learning_rate": 0.0007728461747520043, "loss": 3.5339, "step": 26755 }, { "epoch": 1.8181818181818183, "grad_norm": 2.6141157150268555, "learning_rate": 0.0007728037097431716, "loss": 3.5209, "step": 26760 }, { "epoch": 1.81852153825248, "grad_norm": 1.8344131708145142, "learning_rate": 0.000772761244734339, "loss": 3.3567, "step": 26765 }, { "epoch": 1.8188612583231416, "grad_norm": 1.8038644790649414, "learning_rate": 0.0007727187797255062, "loss": 3.5773, "step": 26770 }, { "epoch": 1.8192009783938035, "grad_norm": 2.1059176921844482, "learning_rate": 0.0007726763147166734, "loss": 3.6757, "step": 26775 }, { "epoch": 1.8195406984644653, "grad_norm": 2.3672187328338623, "learning_rate": 0.0007726338497078408, "loss": 3.4105, "step": 26780 }, { "epoch": 1.819880418535127, "grad_norm": 1.7452830076217651, "learning_rate": 0.000772591384699008, "loss": 3.5207, "step": 26785 }, { "epoch": 1.8202201386057888, "grad_norm": 2.3837318420410156, "learning_rate": 0.0007725489196901752, "loss": 3.4523, "step": 26790 }, { "epoch": 1.8205598586764506, "grad_norm": 1.9456344842910767, "learning_rate": 0.0007725064546813427, "loss": 3.4718, "step": 26795 }, { "epoch": 1.8208995787471123, "grad_norm": 1.826697587966919, "learning_rate": 0.0007724639896725099, "loss": 3.5901, "step": 26800 }, { "epoch": 1.8212392988177741, "grad_norm": 1.964856743812561, "learning_rate": 0.0007724215246636771, "loss": 3.6645, "step": 26805 }, { "epoch": 1.821579018888436, "grad_norm": 2.5050413608551025, "learning_rate": 0.0007723790596548444, "loss": 3.5724, "step": 26810 }, { "epoch": 1.8219187389590976, "grad_norm": 1.8032875061035156, "learning_rate": 0.0007723365946460117, "loss": 3.4427, "step": 26815 }, { "epoch": 1.8222584590297595, "grad_norm": 1.7304234504699707, "learning_rate": 0.0007722941296371789, "loss": 3.3911, "step": 26820 }, { "epoch": 1.8225981791004213, "grad_norm": 1.9181687831878662, "learning_rate": 0.0007722516646283462, "loss": 3.3649, "step": 26825 }, { "epoch": 1.822937899171083, "grad_norm": 1.9087944030761719, "learning_rate": 0.0007722091996195136, "loss": 3.4599, "step": 26830 }, { "epoch": 1.8232776192417448, "grad_norm": 1.7339497804641724, "learning_rate": 0.0007721667346106808, "loss": 3.4587, "step": 26835 }, { "epoch": 1.8236173393124067, "grad_norm": 1.767340898513794, "learning_rate": 0.0007721242696018481, "loss": 3.6065, "step": 26840 }, { "epoch": 1.8239570593830683, "grad_norm": 2.1116795539855957, "learning_rate": 0.0007720818045930154, "loss": 3.593, "step": 26845 }, { "epoch": 1.8242967794537301, "grad_norm": 1.7421995401382446, "learning_rate": 0.0007720393395841826, "loss": 3.6734, "step": 26850 }, { "epoch": 1.824636499524392, "grad_norm": 1.9062492847442627, "learning_rate": 0.0007719968745753499, "loss": 3.6005, "step": 26855 }, { "epoch": 1.8249762195950536, "grad_norm": 2.3230090141296387, "learning_rate": 0.0007719544095665171, "loss": 3.3051, "step": 26860 }, { "epoch": 1.8253159396657155, "grad_norm": 1.7609935998916626, "learning_rate": 0.0007719119445576845, "loss": 3.5506, "step": 26865 }, { "epoch": 1.8256556597363773, "grad_norm": 2.199185371398926, "learning_rate": 0.0007718694795488518, "loss": 3.6717, "step": 26870 }, { "epoch": 1.825995379807039, "grad_norm": 2.3476672172546387, "learning_rate": 0.000771827014540019, "loss": 3.9484, "step": 26875 }, { "epoch": 1.8263350998777008, "grad_norm": 1.866787314414978, "learning_rate": 0.0007717845495311863, "loss": 3.4983, "step": 26880 }, { "epoch": 1.8266748199483627, "grad_norm": 1.6162313222885132, "learning_rate": 0.0007717420845223536, "loss": 3.465, "step": 26885 }, { "epoch": 1.8270145400190243, "grad_norm": 2.8821396827697754, "learning_rate": 0.0007716996195135208, "loss": 3.5198, "step": 26890 }, { "epoch": 1.827354260089686, "grad_norm": 1.5973892211914062, "learning_rate": 0.0007716571545046883, "loss": 3.535, "step": 26895 }, { "epoch": 1.827693980160348, "grad_norm": 2.1340951919555664, "learning_rate": 0.0007716146894958555, "loss": 3.3446, "step": 26900 }, { "epoch": 1.8280337002310096, "grad_norm": 2.2978219985961914, "learning_rate": 0.0007715722244870227, "loss": 3.4937, "step": 26905 }, { "epoch": 1.8283734203016713, "grad_norm": 1.7594600915908813, "learning_rate": 0.00077152975947819, "loss": 3.4106, "step": 26910 }, { "epoch": 1.8287131403723333, "grad_norm": 2.117316246032715, "learning_rate": 0.0007714957874711238, "loss": 3.6057, "step": 26915 }, { "epoch": 1.829052860442995, "grad_norm": 2.0065412521362305, "learning_rate": 0.0007714533224622911, "loss": 3.4468, "step": 26920 }, { "epoch": 1.8293925805136566, "grad_norm": 2.853571653366089, "learning_rate": 0.0007714108574534584, "loss": 3.4748, "step": 26925 }, { "epoch": 1.8297323005843187, "grad_norm": 1.8639742136001587, "learning_rate": 0.0007713683924446257, "loss": 3.5697, "step": 26930 }, { "epoch": 1.8300720206549803, "grad_norm": 1.482786774635315, "learning_rate": 0.0007713259274357929, "loss": 3.6117, "step": 26935 }, { "epoch": 1.830411740725642, "grad_norm": 1.8280349969863892, "learning_rate": 0.0007712834624269602, "loss": 3.5586, "step": 26940 }, { "epoch": 1.8307514607963038, "grad_norm": 1.6716251373291016, "learning_rate": 0.0007712409974181274, "loss": 3.3958, "step": 26945 }, { "epoch": 1.8310911808669657, "grad_norm": 2.0517184734344482, "learning_rate": 0.0007711985324092947, "loss": 3.2395, "step": 26950 }, { "epoch": 1.8314309009376273, "grad_norm": 2.9257493019104004, "learning_rate": 0.0007711560674004621, "loss": 3.5069, "step": 26955 }, { "epoch": 1.8317706210082891, "grad_norm": 1.498694896697998, "learning_rate": 0.0007711136023916293, "loss": 3.5131, "step": 26960 }, { "epoch": 1.832110341078951, "grad_norm": 1.7080377340316772, "learning_rate": 0.0007710711373827966, "loss": 3.4685, "step": 26965 }, { "epoch": 1.8324500611496126, "grad_norm": 2.0075206756591797, "learning_rate": 0.0007710286723739639, "loss": 3.5476, "step": 26970 }, { "epoch": 1.8327897812202745, "grad_norm": 2.0638346672058105, "learning_rate": 0.0007709862073651311, "loss": 3.7717, "step": 26975 }, { "epoch": 1.8331295012909363, "grad_norm": 1.5499616861343384, "learning_rate": 0.0007709437423562983, "loss": 3.5321, "step": 26980 }, { "epoch": 1.833469221361598, "grad_norm": 2.2633414268493652, "learning_rate": 0.0007709012773474657, "loss": 3.5172, "step": 26985 }, { "epoch": 1.8338089414322598, "grad_norm": 1.8225057125091553, "learning_rate": 0.000770858812338633, "loss": 3.4364, "step": 26990 }, { "epoch": 1.8341486615029217, "grad_norm": 1.9598350524902344, "learning_rate": 0.0007708163473298002, "loss": 3.2683, "step": 26995 }, { "epoch": 1.8344883815735833, "grad_norm": 1.7784428596496582, "learning_rate": 0.0007707738823209676, "loss": 3.6072, "step": 27000 }, { "epoch": 1.8348281016442451, "grad_norm": 2.23134183883667, "learning_rate": 0.0007707314173121348, "loss": 3.375, "step": 27005 }, { "epoch": 1.835167821714907, "grad_norm": 2.134064197540283, "learning_rate": 0.000770688952303302, "loss": 3.3515, "step": 27010 }, { "epoch": 1.8355075417855686, "grad_norm": 1.7798010110855103, "learning_rate": 0.0007706464872944694, "loss": 3.4951, "step": 27015 }, { "epoch": 1.8358472618562305, "grad_norm": 2.1624510288238525, "learning_rate": 0.0007706040222856366, "loss": 3.631, "step": 27020 }, { "epoch": 1.8361869819268923, "grad_norm": 2.2364139556884766, "learning_rate": 0.0007705615572768039, "loss": 3.6913, "step": 27025 }, { "epoch": 1.836526701997554, "grad_norm": 2.2580649852752686, "learning_rate": 0.0007705190922679713, "loss": 3.3775, "step": 27030 }, { "epoch": 1.8368664220682158, "grad_norm": 1.4275676012039185, "learning_rate": 0.0007704766272591385, "loss": 3.5024, "step": 27035 }, { "epoch": 1.8372061421388777, "grad_norm": 1.6519391536712646, "learning_rate": 0.0007704341622503057, "loss": 3.234, "step": 27040 }, { "epoch": 1.8375458622095393, "grad_norm": 2.4590089321136475, "learning_rate": 0.000770391697241473, "loss": 3.4161, "step": 27045 }, { "epoch": 1.8378855822802012, "grad_norm": 2.5243263244628906, "learning_rate": 0.0007703492322326403, "loss": 3.6514, "step": 27050 }, { "epoch": 1.838225302350863, "grad_norm": 2.0099828243255615, "learning_rate": 0.0007703067672238075, "loss": 3.6329, "step": 27055 }, { "epoch": 1.8385650224215246, "grad_norm": 1.8592894077301025, "learning_rate": 0.0007702643022149749, "loss": 3.505, "step": 27060 }, { "epoch": 1.8389047424921863, "grad_norm": 2.091883897781372, "learning_rate": 0.0007702218372061422, "loss": 3.6449, "step": 27065 }, { "epoch": 1.8392444625628483, "grad_norm": 1.6776138544082642, "learning_rate": 0.0007701793721973094, "loss": 3.5137, "step": 27070 }, { "epoch": 1.83958418263351, "grad_norm": 1.8702689409255981, "learning_rate": 0.0007701369071884767, "loss": 3.5065, "step": 27075 }, { "epoch": 1.8399239027041716, "grad_norm": 1.9890397787094116, "learning_rate": 0.000770094442179644, "loss": 3.5037, "step": 27080 }, { "epoch": 1.8402636227748337, "grad_norm": 2.578583002090454, "learning_rate": 0.0007700519771708112, "loss": 3.5812, "step": 27085 }, { "epoch": 1.8406033428454953, "grad_norm": 2.2139298915863037, "learning_rate": 0.0007700095121619785, "loss": 3.5267, "step": 27090 }, { "epoch": 1.840943062916157, "grad_norm": 1.976183533668518, "learning_rate": 0.0007699670471531458, "loss": 3.3096, "step": 27095 }, { "epoch": 1.841282782986819, "grad_norm": 2.3416874408721924, "learning_rate": 0.0007699245821443132, "loss": 3.5475, "step": 27100 }, { "epoch": 1.8416225030574807, "grad_norm": 2.208660364151001, "learning_rate": 0.0007698821171354804, "loss": 3.5264, "step": 27105 }, { "epoch": 1.8419622231281423, "grad_norm": 1.6106668710708618, "learning_rate": 0.0007698396521266476, "loss": 3.5628, "step": 27110 }, { "epoch": 1.8423019431988041, "grad_norm": 1.9991599321365356, "learning_rate": 0.000769797187117815, "loss": 3.3058, "step": 27115 }, { "epoch": 1.842641663269466, "grad_norm": 1.8685816526412964, "learning_rate": 0.0007697547221089822, "loss": 3.5995, "step": 27120 }, { "epoch": 1.8429813833401276, "grad_norm": 2.134671449661255, "learning_rate": 0.0007697122571001494, "loss": 3.6457, "step": 27125 }, { "epoch": 1.8433211034107895, "grad_norm": 1.8678562641143799, "learning_rate": 0.0007696697920913169, "loss": 3.5509, "step": 27130 }, { "epoch": 1.8436608234814513, "grad_norm": 1.6613407135009766, "learning_rate": 0.0007696273270824841, "loss": 3.5331, "step": 27135 }, { "epoch": 1.844000543552113, "grad_norm": 1.5810447931289673, "learning_rate": 0.0007695848620736513, "loss": 3.5869, "step": 27140 }, { "epoch": 1.8443402636227748, "grad_norm": 1.5824031829833984, "learning_rate": 0.0007695423970648186, "loss": 3.3559, "step": 27145 }, { "epoch": 1.8446799836934367, "grad_norm": 2.130042791366577, "learning_rate": 0.0007694999320559859, "loss": 3.6128, "step": 27150 }, { "epoch": 1.8450197037640983, "grad_norm": 1.7170734405517578, "learning_rate": 0.0007694574670471531, "loss": 3.5976, "step": 27155 }, { "epoch": 1.8453594238347601, "grad_norm": 1.7640535831451416, "learning_rate": 0.0007694150020383205, "loss": 3.6547, "step": 27160 }, { "epoch": 1.845699143905422, "grad_norm": 2.3666582107543945, "learning_rate": 0.0007693725370294878, "loss": 3.2951, "step": 27165 }, { "epoch": 1.8460388639760836, "grad_norm": 1.9535681009292603, "learning_rate": 0.000769330072020655, "loss": 3.4557, "step": 27170 }, { "epoch": 1.8463785840467455, "grad_norm": 1.788480281829834, "learning_rate": 0.0007692876070118223, "loss": 3.6629, "step": 27175 }, { "epoch": 1.8467183041174073, "grad_norm": 2.346432685852051, "learning_rate": 0.0007692451420029896, "loss": 3.8818, "step": 27180 }, { "epoch": 1.847058024188069, "grad_norm": 2.190547227859497, "learning_rate": 0.0007692026769941568, "loss": 3.595, "step": 27185 }, { "epoch": 1.8473977442587308, "grad_norm": 1.9585672616958618, "learning_rate": 0.0007691602119853241, "loss": 3.7565, "step": 27190 }, { "epoch": 1.8477374643293927, "grad_norm": 1.5533322095870972, "learning_rate": 0.0007691177469764914, "loss": 3.766, "step": 27195 }, { "epoch": 1.8480771844000543, "grad_norm": 1.6618834733963013, "learning_rate": 0.0007690752819676587, "loss": 3.6035, "step": 27200 }, { "epoch": 1.8484169044707162, "grad_norm": 1.51637864112854, "learning_rate": 0.000769032816958826, "loss": 3.8316, "step": 27205 }, { "epoch": 1.848756624541378, "grad_norm": 1.6054595708847046, "learning_rate": 0.0007689903519499932, "loss": 3.6003, "step": 27210 }, { "epoch": 1.8490963446120396, "grad_norm": 1.4542235136032104, "learning_rate": 0.0007689478869411605, "loss": 3.3554, "step": 27215 }, { "epoch": 1.8494360646827015, "grad_norm": 1.7551956176757812, "learning_rate": 0.0007689054219323278, "loss": 3.6409, "step": 27220 }, { "epoch": 1.8497757847533634, "grad_norm": 2.3596744537353516, "learning_rate": 0.000768862956923495, "loss": 3.5488, "step": 27225 }, { "epoch": 1.850115504824025, "grad_norm": 2.2733259201049805, "learning_rate": 0.0007688204919146624, "loss": 3.4001, "step": 27230 }, { "epoch": 1.8504552248946866, "grad_norm": 1.6386045217514038, "learning_rate": 0.0007687780269058297, "loss": 3.6189, "step": 27235 }, { "epoch": 1.8507949449653487, "grad_norm": 2.066037654876709, "learning_rate": 0.0007687355618969969, "loss": 3.5174, "step": 27240 }, { "epoch": 1.8511346650360103, "grad_norm": 2.0306315422058105, "learning_rate": 0.0007686930968881641, "loss": 3.619, "step": 27245 }, { "epoch": 1.851474385106672, "grad_norm": 2.2708323001861572, "learning_rate": 0.0007686506318793315, "loss": 3.4032, "step": 27250 }, { "epoch": 1.851814105177334, "grad_norm": 1.9640482664108276, "learning_rate": 0.0007686081668704987, "loss": 3.5508, "step": 27255 }, { "epoch": 1.8521538252479957, "grad_norm": 1.7165420055389404, "learning_rate": 0.0007685657018616659, "loss": 3.6417, "step": 27260 }, { "epoch": 1.8524935453186573, "grad_norm": 2.052129030227661, "learning_rate": 0.0007685232368528334, "loss": 3.6782, "step": 27265 }, { "epoch": 1.8528332653893194, "grad_norm": 2.44720458984375, "learning_rate": 0.0007684807718440006, "loss": 3.6191, "step": 27270 }, { "epoch": 1.853172985459981, "grad_norm": 1.9562259912490845, "learning_rate": 0.0007684383068351678, "loss": 3.856, "step": 27275 }, { "epoch": 1.8535127055306426, "grad_norm": 2.1147701740264893, "learning_rate": 0.0007683958418263352, "loss": 3.6094, "step": 27280 }, { "epoch": 1.8538524256013045, "grad_norm": 2.2872605323791504, "learning_rate": 0.0007683533768175024, "loss": 3.5176, "step": 27285 }, { "epoch": 1.8541921456719663, "grad_norm": 1.3896465301513672, "learning_rate": 0.0007683109118086696, "loss": 3.5364, "step": 27290 }, { "epoch": 1.854531865742628, "grad_norm": 2.3718926906585693, "learning_rate": 0.0007682684467998369, "loss": 3.5852, "step": 27295 }, { "epoch": 1.8548715858132898, "grad_norm": 2.608623743057251, "learning_rate": 0.0007682259817910043, "loss": 3.5976, "step": 27300 }, { "epoch": 1.8552113058839517, "grad_norm": 1.8527121543884277, "learning_rate": 0.0007681835167821715, "loss": 3.4354, "step": 27305 }, { "epoch": 1.8555510259546133, "grad_norm": 2.514662742614746, "learning_rate": 0.0007681410517733388, "loss": 3.4317, "step": 27310 }, { "epoch": 1.8558907460252752, "grad_norm": 2.498023748397827, "learning_rate": 0.0007680985867645061, "loss": 3.6838, "step": 27315 }, { "epoch": 1.856230466095937, "grad_norm": 1.830414056777954, "learning_rate": 0.0007680561217556733, "loss": 3.4478, "step": 27320 }, { "epoch": 1.8565701861665986, "grad_norm": 1.6993560791015625, "learning_rate": 0.0007680136567468406, "loss": 3.4038, "step": 27325 }, { "epoch": 1.8569099062372605, "grad_norm": 2.119870901107788, "learning_rate": 0.0007679711917380078, "loss": 3.4557, "step": 27330 }, { "epoch": 1.8572496263079223, "grad_norm": 1.8650189638137817, "learning_rate": 0.0007679287267291752, "loss": 3.4096, "step": 27335 }, { "epoch": 1.857589346378584, "grad_norm": 1.787619948387146, "learning_rate": 0.0007678862617203425, "loss": 3.4487, "step": 27340 }, { "epoch": 1.8579290664492458, "grad_norm": 2.0347330570220947, "learning_rate": 0.0007678437967115097, "loss": 3.3243, "step": 27345 }, { "epoch": 1.8582687865199077, "grad_norm": 2.1434130668640137, "learning_rate": 0.000767801331702677, "loss": 3.5385, "step": 27350 }, { "epoch": 1.8586085065905693, "grad_norm": 2.0268187522888184, "learning_rate": 0.0007677588666938443, "loss": 3.5958, "step": 27355 }, { "epoch": 1.8589482266612312, "grad_norm": 1.8145625591278076, "learning_rate": 0.0007677164016850115, "loss": 3.4967, "step": 27360 }, { "epoch": 1.859287946731893, "grad_norm": 1.8708158731460571, "learning_rate": 0.0007676739366761788, "loss": 3.3521, "step": 27365 }, { "epoch": 1.8596276668025546, "grad_norm": 1.618387222290039, "learning_rate": 0.0007676314716673462, "loss": 3.4528, "step": 27370 }, { "epoch": 1.8599673868732165, "grad_norm": 2.438400983810425, "learning_rate": 0.0007675890066585134, "loss": 3.5217, "step": 27375 }, { "epoch": 1.8603071069438784, "grad_norm": 1.8039978742599487, "learning_rate": 0.0007675465416496806, "loss": 3.4239, "step": 27380 }, { "epoch": 1.86064682701454, "grad_norm": 1.9228705167770386, "learning_rate": 0.000767504076640848, "loss": 3.492, "step": 27385 }, { "epoch": 1.8609865470852018, "grad_norm": 2.217000722885132, "learning_rate": 0.0007674616116320152, "loss": 3.275, "step": 27390 }, { "epoch": 1.8613262671558637, "grad_norm": 1.5111249685287476, "learning_rate": 0.0007674191466231824, "loss": 3.6314, "step": 27395 }, { "epoch": 1.8616659872265253, "grad_norm": 2.2884361743927, "learning_rate": 0.0007673766816143498, "loss": 3.7622, "step": 27400 }, { "epoch": 1.862005707297187, "grad_norm": 2.349768877029419, "learning_rate": 0.0007673342166055171, "loss": 3.4501, "step": 27405 }, { "epoch": 1.862345427367849, "grad_norm": 2.1102418899536133, "learning_rate": 0.0007672917515966843, "loss": 3.7451, "step": 27410 }, { "epoch": 1.8626851474385107, "grad_norm": 2.222224473953247, "learning_rate": 0.0007672492865878517, "loss": 3.8179, "step": 27415 }, { "epoch": 1.8630248675091723, "grad_norm": 1.7359968423843384, "learning_rate": 0.0007672068215790189, "loss": 3.6004, "step": 27420 }, { "epoch": 1.8633645875798344, "grad_norm": 1.7627074718475342, "learning_rate": 0.0007671643565701861, "loss": 3.6544, "step": 27425 }, { "epoch": 1.863704307650496, "grad_norm": 1.9357272386550903, "learning_rate": 0.0007671218915613534, "loss": 3.6316, "step": 27430 }, { "epoch": 1.8640440277211576, "grad_norm": 1.5416173934936523, "learning_rate": 0.0007670794265525207, "loss": 3.5495, "step": 27435 }, { "epoch": 1.8643837477918197, "grad_norm": 2.2274749279022217, "learning_rate": 0.0007670369615436881, "loss": 3.7187, "step": 27440 }, { "epoch": 1.8647234678624813, "grad_norm": 2.0752954483032227, "learning_rate": 0.0007669944965348553, "loss": 3.5567, "step": 27445 }, { "epoch": 1.865063187933143, "grad_norm": 2.2084999084472656, "learning_rate": 0.0007669520315260226, "loss": 3.3606, "step": 27450 }, { "epoch": 1.8654029080038048, "grad_norm": 1.9341368675231934, "learning_rate": 0.0007669095665171899, "loss": 3.3485, "step": 27455 }, { "epoch": 1.8657426280744667, "grad_norm": 1.5074478387832642, "learning_rate": 0.0007668671015083571, "loss": 3.6369, "step": 27460 }, { "epoch": 1.8660823481451283, "grad_norm": 2.5060596466064453, "learning_rate": 0.0007668246364995244, "loss": 3.4182, "step": 27465 }, { "epoch": 1.8664220682157902, "grad_norm": 1.9425159692764282, "learning_rate": 0.0007667821714906917, "loss": 3.6313, "step": 27470 }, { "epoch": 1.866761788286452, "grad_norm": 1.6872293949127197, "learning_rate": 0.000766739706481859, "loss": 3.3403, "step": 27475 }, { "epoch": 1.8671015083571136, "grad_norm": 2.104139804840088, "learning_rate": 0.0007666972414730262, "loss": 3.7063, "step": 27480 }, { "epoch": 1.8674412284277755, "grad_norm": 1.7065932750701904, "learning_rate": 0.0007666547764641936, "loss": 3.3974, "step": 27485 }, { "epoch": 1.8677809484984373, "grad_norm": 1.9270676374435425, "learning_rate": 0.0007666123114553608, "loss": 3.4484, "step": 27490 }, { "epoch": 1.868120668569099, "grad_norm": 2.2957165241241455, "learning_rate": 0.000766569846446528, "loss": 3.7676, "step": 27495 }, { "epoch": 1.8684603886397608, "grad_norm": 2.077911376953125, "learning_rate": 0.0007665273814376954, "loss": 3.4741, "step": 27500 }, { "epoch": 1.8688001087104227, "grad_norm": 1.7935850620269775, "learning_rate": 0.0007664849164288626, "loss": 3.6217, "step": 27505 }, { "epoch": 1.8691398287810843, "grad_norm": 2.337973117828369, "learning_rate": 0.0007664424514200299, "loss": 3.3187, "step": 27510 }, { "epoch": 1.8694795488517462, "grad_norm": 2.0130910873413086, "learning_rate": 0.0007663999864111973, "loss": 3.6014, "step": 27515 }, { "epoch": 1.869819268922408, "grad_norm": 2.080923080444336, "learning_rate": 0.0007663575214023645, "loss": 3.3611, "step": 27520 }, { "epoch": 1.8701589889930696, "grad_norm": 1.6021780967712402, "learning_rate": 0.0007663150563935317, "loss": 3.4502, "step": 27525 }, { "epoch": 1.8704987090637315, "grad_norm": 1.4824120998382568, "learning_rate": 0.000766272591384699, "loss": 3.6494, "step": 27530 }, { "epoch": 1.8708384291343934, "grad_norm": 2.2836201190948486, "learning_rate": 0.0007662301263758663, "loss": 3.2248, "step": 27535 }, { "epoch": 1.871178149205055, "grad_norm": 1.9475901126861572, "learning_rate": 0.0007661876613670335, "loss": 3.8353, "step": 27540 }, { "epoch": 1.8715178692757168, "grad_norm": 1.6267551183700562, "learning_rate": 0.0007661451963582009, "loss": 3.4895, "step": 27545 }, { "epoch": 1.8718575893463787, "grad_norm": 1.6735697984695435, "learning_rate": 0.0007661027313493682, "loss": 3.756, "step": 27550 }, { "epoch": 1.8721973094170403, "grad_norm": 1.881759762763977, "learning_rate": 0.0007660602663405354, "loss": 3.5021, "step": 27555 }, { "epoch": 1.8725370294877022, "grad_norm": 1.991339087486267, "learning_rate": 0.0007660178013317027, "loss": 3.1498, "step": 27560 }, { "epoch": 1.872876749558364, "grad_norm": 1.9200971126556396, "learning_rate": 0.00076597533632287, "loss": 3.4844, "step": 27565 }, { "epoch": 1.8732164696290257, "grad_norm": 1.6802716255187988, "learning_rate": 0.0007659328713140372, "loss": 3.3453, "step": 27570 }, { "epoch": 1.8735561896996873, "grad_norm": 2.1411325931549072, "learning_rate": 0.0007658904063052045, "loss": 3.7183, "step": 27575 }, { "epoch": 1.8738959097703494, "grad_norm": 2.4501194953918457, "learning_rate": 0.0007658479412963718, "loss": 3.788, "step": 27580 }, { "epoch": 1.874235629841011, "grad_norm": 2.1204168796539307, "learning_rate": 0.0007658054762875391, "loss": 3.5367, "step": 27585 }, { "epoch": 1.8745753499116726, "grad_norm": 1.5756750106811523, "learning_rate": 0.0007657630112787064, "loss": 3.3179, "step": 27590 }, { "epoch": 1.8749150699823347, "grad_norm": 1.9086779356002808, "learning_rate": 0.0007657205462698736, "loss": 3.4265, "step": 27595 }, { "epoch": 1.8752547900529963, "grad_norm": 3.391066312789917, "learning_rate": 0.0007656780812610409, "loss": 3.477, "step": 27600 }, { "epoch": 1.875594510123658, "grad_norm": 2.1803879737854004, "learning_rate": 0.0007656356162522082, "loss": 3.6374, "step": 27605 }, { "epoch": 1.87593423019432, "grad_norm": 1.8322089910507202, "learning_rate": 0.0007655931512433754, "loss": 3.4736, "step": 27610 }, { "epoch": 1.8762739502649817, "grad_norm": 1.8605490922927856, "learning_rate": 0.0007655506862345428, "loss": 3.7012, "step": 27615 }, { "epoch": 1.8766136703356433, "grad_norm": 1.7132232189178467, "learning_rate": 0.0007655082212257101, "loss": 3.4324, "step": 27620 }, { "epoch": 1.8769533904063052, "grad_norm": 2.09242844581604, "learning_rate": 0.0007654657562168773, "loss": 3.7417, "step": 27625 }, { "epoch": 1.877293110476967, "grad_norm": 1.464892864227295, "learning_rate": 0.0007654232912080445, "loss": 3.5543, "step": 27630 }, { "epoch": 1.8776328305476286, "grad_norm": 1.9798725843429565, "learning_rate": 0.0007653808261992119, "loss": 3.2289, "step": 27635 }, { "epoch": 1.8779725506182905, "grad_norm": 1.42918062210083, "learning_rate": 0.0007653383611903791, "loss": 3.5641, "step": 27640 }, { "epoch": 1.8783122706889523, "grad_norm": 1.6288189888000488, "learning_rate": 0.0007652958961815463, "loss": 3.275, "step": 27645 }, { "epoch": 1.878651990759614, "grad_norm": 2.805915117263794, "learning_rate": 0.0007652534311727138, "loss": 3.4246, "step": 27650 }, { "epoch": 1.8789917108302758, "grad_norm": 1.7315187454223633, "learning_rate": 0.000765210966163881, "loss": 3.487, "step": 27655 }, { "epoch": 1.8793314309009377, "grad_norm": 1.9297932386398315, "learning_rate": 0.0007651685011550482, "loss": 3.4719, "step": 27660 }, { "epoch": 1.8796711509715993, "grad_norm": 1.9264246225357056, "learning_rate": 0.0007651260361462156, "loss": 3.6169, "step": 27665 }, { "epoch": 1.8800108710422612, "grad_norm": 1.7767300605773926, "learning_rate": 0.0007650835711373828, "loss": 3.5024, "step": 27670 }, { "epoch": 1.880350591112923, "grad_norm": 2.0601871013641357, "learning_rate": 0.00076504110612855, "loss": 3.3928, "step": 27675 }, { "epoch": 1.8806903111835847, "grad_norm": 1.9747530221939087, "learning_rate": 0.0007649986411197175, "loss": 3.6233, "step": 27680 }, { "epoch": 1.8810300312542465, "grad_norm": 2.2197422981262207, "learning_rate": 0.0007649561761108847, "loss": 3.5871, "step": 27685 }, { "epoch": 1.8813697513249084, "grad_norm": 1.4547703266143799, "learning_rate": 0.0007649137111020519, "loss": 3.4974, "step": 27690 }, { "epoch": 1.88170947139557, "grad_norm": 1.85464608669281, "learning_rate": 0.0007648712460932192, "loss": 3.6547, "step": 27695 }, { "epoch": 1.8820491914662318, "grad_norm": 2.3354973793029785, "learning_rate": 0.0007648287810843865, "loss": 3.4248, "step": 27700 }, { "epoch": 1.8823889115368937, "grad_norm": 1.9363960027694702, "learning_rate": 0.0007647863160755537, "loss": 3.4439, "step": 27705 }, { "epoch": 1.8827286316075553, "grad_norm": 2.1306638717651367, "learning_rate": 0.000764743851066721, "loss": 3.5018, "step": 27710 }, { "epoch": 1.8830683516782172, "grad_norm": 1.5563091039657593, "learning_rate": 0.0007647013860578884, "loss": 3.5657, "step": 27715 }, { "epoch": 1.883408071748879, "grad_norm": 1.6503385305404663, "learning_rate": 0.0007646589210490556, "loss": 3.4182, "step": 27720 }, { "epoch": 1.8837477918195407, "grad_norm": 1.9152344465255737, "learning_rate": 0.0007646164560402229, "loss": 3.5722, "step": 27725 }, { "epoch": 1.8840875118902025, "grad_norm": 2.1330389976501465, "learning_rate": 0.0007645739910313901, "loss": 3.5757, "step": 27730 }, { "epoch": 1.8844272319608644, "grad_norm": 2.132085084915161, "learning_rate": 0.0007645315260225574, "loss": 3.6124, "step": 27735 }, { "epoch": 1.884766952031526, "grad_norm": 1.9649149179458618, "learning_rate": 0.0007644890610137247, "loss": 3.6149, "step": 27740 }, { "epoch": 1.8851066721021879, "grad_norm": 1.6321523189544678, "learning_rate": 0.0007644465960048919, "loss": 3.3434, "step": 27745 }, { "epoch": 1.8854463921728497, "grad_norm": 1.5805484056472778, "learning_rate": 0.0007644041309960593, "loss": 3.4039, "step": 27750 }, { "epoch": 1.8857861122435113, "grad_norm": 1.7296074628829956, "learning_rate": 0.0007643616659872266, "loss": 3.398, "step": 27755 }, { "epoch": 1.886125832314173, "grad_norm": 2.116931438446045, "learning_rate": 0.0007643192009783938, "loss": 3.6074, "step": 27760 }, { "epoch": 1.886465552384835, "grad_norm": 2.0501456260681152, "learning_rate": 0.000764276735969561, "loss": 3.5968, "step": 27765 }, { "epoch": 1.8868052724554967, "grad_norm": 2.0880563259124756, "learning_rate": 0.0007642342709607284, "loss": 3.6282, "step": 27770 }, { "epoch": 1.8871449925261583, "grad_norm": 1.931525468826294, "learning_rate": 0.0007641918059518956, "loss": 3.2721, "step": 27775 }, { "epoch": 1.8874847125968204, "grad_norm": 1.4629298448562622, "learning_rate": 0.0007641493409430629, "loss": 3.3643, "step": 27780 }, { "epoch": 1.887824432667482, "grad_norm": 2.765385866165161, "learning_rate": 0.0007641068759342303, "loss": 3.3614, "step": 27785 }, { "epoch": 1.8881641527381436, "grad_norm": 1.7487969398498535, "learning_rate": 0.0007640644109253975, "loss": 3.6919, "step": 27790 }, { "epoch": 1.8885038728088055, "grad_norm": 1.5541577339172363, "learning_rate": 0.0007640219459165648, "loss": 3.4471, "step": 27795 }, { "epoch": 1.8888435928794673, "grad_norm": 2.022425413131714, "learning_rate": 0.0007639794809077321, "loss": 3.4456, "step": 27800 }, { "epoch": 1.889183312950129, "grad_norm": 1.8671795129776, "learning_rate": 0.0007639370158988993, "loss": 3.4811, "step": 27805 }, { "epoch": 1.8895230330207908, "grad_norm": 1.6874555349349976, "learning_rate": 0.0007638945508900666, "loss": 3.7008, "step": 27810 }, { "epoch": 1.8898627530914527, "grad_norm": 3.04571795463562, "learning_rate": 0.0007638520858812339, "loss": 3.5146, "step": 27815 }, { "epoch": 1.8902024731621143, "grad_norm": 1.8366355895996094, "learning_rate": 0.0007638096208724012, "loss": 3.4814, "step": 27820 }, { "epoch": 1.8905421932327762, "grad_norm": 1.5408546924591064, "learning_rate": 0.0007637671558635685, "loss": 3.4645, "step": 27825 }, { "epoch": 1.890881913303438, "grad_norm": 2.0795793533325195, "learning_rate": 0.0007637246908547357, "loss": 3.8048, "step": 27830 }, { "epoch": 1.8912216333740997, "grad_norm": 1.89678955078125, "learning_rate": 0.000763682225845903, "loss": 3.631, "step": 27835 }, { "epoch": 1.8915613534447615, "grad_norm": 1.7071114778518677, "learning_rate": 0.0007636397608370703, "loss": 3.3974, "step": 27840 }, { "epoch": 1.8919010735154234, "grad_norm": 1.7507277727127075, "learning_rate": 0.0007635972958282375, "loss": 3.34, "step": 27845 }, { "epoch": 1.892240793586085, "grad_norm": 2.4447312355041504, "learning_rate": 0.0007635548308194048, "loss": 3.4779, "step": 27850 }, { "epoch": 1.8925805136567468, "grad_norm": 1.86448335647583, "learning_rate": 0.0007635123658105722, "loss": 3.8595, "step": 27855 }, { "epoch": 1.8929202337274087, "grad_norm": 1.6165060997009277, "learning_rate": 0.0007634699008017394, "loss": 3.8077, "step": 27860 }, { "epoch": 1.8932599537980703, "grad_norm": 2.584043264389038, "learning_rate": 0.0007634274357929067, "loss": 3.3282, "step": 27865 }, { "epoch": 1.8935996738687322, "grad_norm": 1.8645957708358765, "learning_rate": 0.000763384970784074, "loss": 3.7736, "step": 27870 }, { "epoch": 1.893939393939394, "grad_norm": 2.110426187515259, "learning_rate": 0.0007633425057752412, "loss": 3.7028, "step": 27875 }, { "epoch": 1.8942791140100557, "grad_norm": 1.7116352319717407, "learning_rate": 0.0007633000407664084, "loss": 3.5934, "step": 27880 }, { "epoch": 1.8946188340807175, "grad_norm": 2.0931789875030518, "learning_rate": 0.0007632575757575758, "loss": 3.3643, "step": 27885 }, { "epoch": 1.8949585541513794, "grad_norm": 1.4208035469055176, "learning_rate": 0.0007632151107487431, "loss": 3.4059, "step": 27890 }, { "epoch": 1.895298274222041, "grad_norm": 2.0944812297821045, "learning_rate": 0.0007631726457399103, "loss": 3.568, "step": 27895 }, { "epoch": 1.8956379942927029, "grad_norm": 2.161086082458496, "learning_rate": 0.0007631301807310777, "loss": 3.6291, "step": 27900 }, { "epoch": 1.8959777143633647, "grad_norm": 2.3614728450775146, "learning_rate": 0.0007630877157222449, "loss": 3.3966, "step": 27905 }, { "epoch": 1.8963174344340263, "grad_norm": 1.6953296661376953, "learning_rate": 0.0007630452507134121, "loss": 3.6248, "step": 27910 }, { "epoch": 1.8966571545046882, "grad_norm": 1.7463232278823853, "learning_rate": 0.0007630027857045795, "loss": 3.5164, "step": 27915 }, { "epoch": 1.89699687457535, "grad_norm": 1.943616271018982, "learning_rate": 0.0007629603206957467, "loss": 3.5923, "step": 27920 }, { "epoch": 1.8973365946460117, "grad_norm": 1.9371066093444824, "learning_rate": 0.000762917855686914, "loss": 3.5568, "step": 27925 }, { "epoch": 1.8976763147166733, "grad_norm": 1.7540066242218018, "learning_rate": 0.0007628753906780813, "loss": 3.6007, "step": 27930 }, { "epoch": 1.8980160347873354, "grad_norm": 2.2755703926086426, "learning_rate": 0.0007628329256692486, "loss": 3.5451, "step": 27935 }, { "epoch": 1.898355754857997, "grad_norm": 1.9106473922729492, "learning_rate": 0.0007627904606604158, "loss": 3.5522, "step": 27940 }, { "epoch": 1.8986954749286586, "grad_norm": 1.9178193807601929, "learning_rate": 0.0007627479956515831, "loss": 3.4102, "step": 27945 }, { "epoch": 1.8990351949993207, "grad_norm": 1.6012076139450073, "learning_rate": 0.0007627055306427504, "loss": 3.4316, "step": 27950 }, { "epoch": 1.8993749150699824, "grad_norm": 1.873356819152832, "learning_rate": 0.0007626630656339176, "loss": 3.2777, "step": 27955 }, { "epoch": 1.899714635140644, "grad_norm": 1.6986892223358154, "learning_rate": 0.000762620600625085, "loss": 3.4237, "step": 27960 }, { "epoch": 1.9000543552113058, "grad_norm": 1.9919183254241943, "learning_rate": 0.0007625781356162523, "loss": 3.6318, "step": 27965 }, { "epoch": 1.9003940752819677, "grad_norm": 1.6032383441925049, "learning_rate": 0.0007625356706074195, "loss": 3.7143, "step": 27970 }, { "epoch": 1.9007337953526293, "grad_norm": 1.6457756757736206, "learning_rate": 0.0007624932055985868, "loss": 3.5314, "step": 27975 }, { "epoch": 1.9010735154232912, "grad_norm": 1.4646779298782349, "learning_rate": 0.000762450740589754, "loss": 3.633, "step": 27980 }, { "epoch": 1.901413235493953, "grad_norm": 1.751785159111023, "learning_rate": 0.0007624082755809213, "loss": 3.9071, "step": 27985 }, { "epoch": 1.9017529555646147, "grad_norm": 3.3479223251342773, "learning_rate": 0.0007623658105720886, "loss": 3.5568, "step": 27990 }, { "epoch": 1.9020926756352765, "grad_norm": 2.483795404434204, "learning_rate": 0.0007623233455632559, "loss": 3.4899, "step": 27995 }, { "epoch": 1.9024323957059384, "grad_norm": 1.604198932647705, "learning_rate": 0.0007622808805544232, "loss": 3.4132, "step": 28000 }, { "epoch": 1.9027721157766, "grad_norm": 1.7448023557662964, "learning_rate": 0.0007622384155455905, "loss": 3.5578, "step": 28005 }, { "epoch": 1.9031118358472618, "grad_norm": 1.9264135360717773, "learning_rate": 0.0007621959505367577, "loss": 3.7029, "step": 28010 }, { "epoch": 1.9034515559179237, "grad_norm": 2.4374618530273438, "learning_rate": 0.0007621534855279249, "loss": 3.2869, "step": 28015 }, { "epoch": 1.9037912759885853, "grad_norm": 1.9254652261734009, "learning_rate": 0.0007621110205190923, "loss": 3.5583, "step": 28020 }, { "epoch": 1.9041309960592472, "grad_norm": 1.8234705924987793, "learning_rate": 0.0007620685555102595, "loss": 3.4874, "step": 28025 }, { "epoch": 1.904470716129909, "grad_norm": 1.5106357336044312, "learning_rate": 0.0007620260905014268, "loss": 3.499, "step": 28030 }, { "epoch": 1.9048104362005707, "grad_norm": 2.046135663986206, "learning_rate": 0.0007619836254925942, "loss": 3.5804, "step": 28035 }, { "epoch": 1.9051501562712325, "grad_norm": 2.367741584777832, "learning_rate": 0.0007619411604837614, "loss": 3.6505, "step": 28040 }, { "epoch": 1.9054898763418944, "grad_norm": 1.7085851430892944, "learning_rate": 0.0007618986954749286, "loss": 3.741, "step": 28045 }, { "epoch": 1.905829596412556, "grad_norm": 1.8679485321044922, "learning_rate": 0.000761856230466096, "loss": 3.3835, "step": 28050 }, { "epoch": 1.9061693164832179, "grad_norm": 2.247694253921509, "learning_rate": 0.0007618137654572632, "loss": 3.4863, "step": 28055 }, { "epoch": 1.9065090365538797, "grad_norm": 1.5807750225067139, "learning_rate": 0.0007617713004484304, "loss": 3.6126, "step": 28060 }, { "epoch": 1.9068487566245413, "grad_norm": 1.8321971893310547, "learning_rate": 0.0007617288354395979, "loss": 3.3364, "step": 28065 }, { "epoch": 1.9071884766952032, "grad_norm": 1.7839730978012085, "learning_rate": 0.0007616863704307651, "loss": 3.4742, "step": 28070 }, { "epoch": 1.907528196765865, "grad_norm": 1.9119974374771118, "learning_rate": 0.0007616439054219323, "loss": 3.64, "step": 28075 }, { "epoch": 1.9078679168365267, "grad_norm": 1.4788174629211426, "learning_rate": 0.0007616014404130996, "loss": 3.492, "step": 28080 }, { "epoch": 1.9082076369071885, "grad_norm": 2.093651533126831, "learning_rate": 0.0007615589754042669, "loss": 3.4982, "step": 28085 }, { "epoch": 1.9085473569778504, "grad_norm": 1.636441707611084, "learning_rate": 0.0007615165103954341, "loss": 3.6386, "step": 28090 }, { "epoch": 1.908887077048512, "grad_norm": 1.8356808423995972, "learning_rate": 0.0007614740453866014, "loss": 3.387, "step": 28095 }, { "epoch": 1.9092267971191736, "grad_norm": 2.1096115112304688, "learning_rate": 0.0007614315803777688, "loss": 3.5235, "step": 28100 }, { "epoch": 1.9095665171898357, "grad_norm": 1.8349194526672363, "learning_rate": 0.000761389115368936, "loss": 3.4315, "step": 28105 }, { "epoch": 1.9099062372604974, "grad_norm": 2.334705352783203, "learning_rate": 0.0007613466503601033, "loss": 3.5486, "step": 28110 }, { "epoch": 1.910245957331159, "grad_norm": 1.88735830783844, "learning_rate": 0.0007613041853512705, "loss": 3.4876, "step": 28115 }, { "epoch": 1.910585677401821, "grad_norm": 1.9708702564239502, "learning_rate": 0.0007612617203424379, "loss": 3.7457, "step": 28120 }, { "epoch": 1.9109253974724827, "grad_norm": 1.837418556213379, "learning_rate": 0.0007612192553336051, "loss": 3.8013, "step": 28125 }, { "epoch": 1.9112651175431443, "grad_norm": 2.112957000732422, "learning_rate": 0.0007611767903247723, "loss": 3.5412, "step": 28130 }, { "epoch": 1.9116048376138062, "grad_norm": 2.3838350772857666, "learning_rate": 0.0007611343253159398, "loss": 3.3621, "step": 28135 }, { "epoch": 1.911944557684468, "grad_norm": 1.7998567819595337, "learning_rate": 0.000761091860307107, "loss": 3.861, "step": 28140 }, { "epoch": 1.9122842777551297, "grad_norm": 1.747855544090271, "learning_rate": 0.0007610493952982742, "loss": 3.6448, "step": 28145 }, { "epoch": 1.9126239978257915, "grad_norm": 1.9601367712020874, "learning_rate": 0.0007610069302894416, "loss": 3.5738, "step": 28150 }, { "epoch": 1.9129637178964534, "grad_norm": 1.7412604093551636, "learning_rate": 0.0007609644652806088, "loss": 3.578, "step": 28155 }, { "epoch": 1.913303437967115, "grad_norm": 2.1478636264801025, "learning_rate": 0.000760922000271776, "loss": 3.6592, "step": 28160 }, { "epoch": 1.9136431580377768, "grad_norm": 2.0235440731048584, "learning_rate": 0.0007608795352629433, "loss": 3.7137, "step": 28165 }, { "epoch": 1.9139828781084387, "grad_norm": 2.1423239707946777, "learning_rate": 0.0007608370702541107, "loss": 3.4651, "step": 28170 }, { "epoch": 1.9143225981791003, "grad_norm": 2.184175968170166, "learning_rate": 0.0007607946052452779, "loss": 3.3112, "step": 28175 }, { "epoch": 1.9146623182497622, "grad_norm": 2.106250524520874, "learning_rate": 0.0007607521402364452, "loss": 3.6832, "step": 28180 }, { "epoch": 1.915002038320424, "grad_norm": 2.109384775161743, "learning_rate": 0.0007607096752276125, "loss": 3.5376, "step": 28185 }, { "epoch": 1.9153417583910857, "grad_norm": 1.8278833627700806, "learning_rate": 0.0007606672102187797, "loss": 3.454, "step": 28190 }, { "epoch": 1.9156814784617475, "grad_norm": 1.6486248970031738, "learning_rate": 0.000760624745209947, "loss": 3.3988, "step": 28195 }, { "epoch": 1.9160211985324094, "grad_norm": 1.8332490921020508, "learning_rate": 0.0007605822802011143, "loss": 3.8749, "step": 28200 }, { "epoch": 1.916360918603071, "grad_norm": 2.126307487487793, "learning_rate": 0.0007605398151922816, "loss": 3.611, "step": 28205 }, { "epoch": 1.9167006386737329, "grad_norm": 1.4921903610229492, "learning_rate": 0.0007604973501834489, "loss": 3.4248, "step": 28210 }, { "epoch": 1.9170403587443947, "grad_norm": 1.6121530532836914, "learning_rate": 0.0007604548851746161, "loss": 3.4253, "step": 28215 }, { "epoch": 1.9173800788150563, "grad_norm": 1.926805853843689, "learning_rate": 0.0007604124201657834, "loss": 3.5252, "step": 28220 }, { "epoch": 1.9177197988857182, "grad_norm": 1.9338845014572144, "learning_rate": 0.0007603699551569507, "loss": 3.377, "step": 28225 }, { "epoch": 1.91805951895638, "grad_norm": 2.2454264163970947, "learning_rate": 0.0007603274901481179, "loss": 3.1378, "step": 28230 }, { "epoch": 1.9183992390270417, "grad_norm": 1.7605195045471191, "learning_rate": 0.0007602850251392852, "loss": 3.4675, "step": 28235 }, { "epoch": 1.9187389590977035, "grad_norm": 1.550045132637024, "learning_rate": 0.0007602425601304526, "loss": 3.5583, "step": 28240 }, { "epoch": 1.9190786791683654, "grad_norm": 2.1527130603790283, "learning_rate": 0.0007602000951216198, "loss": 3.5364, "step": 28245 }, { "epoch": 1.919418399239027, "grad_norm": 2.344910144805908, "learning_rate": 0.0007601576301127871, "loss": 3.522, "step": 28250 }, { "epoch": 1.9197581193096889, "grad_norm": 2.154589891433716, "learning_rate": 0.0007601151651039544, "loss": 3.6445, "step": 28255 }, { "epoch": 1.9200978393803507, "grad_norm": 1.8258370161056519, "learning_rate": 0.0007600727000951216, "loss": 3.6341, "step": 28260 }, { "epoch": 1.9204375594510124, "grad_norm": 2.4775326251983643, "learning_rate": 0.0007600302350862888, "loss": 3.7845, "step": 28265 }, { "epoch": 1.920777279521674, "grad_norm": 2.12113356590271, "learning_rate": 0.0007599877700774563, "loss": 3.7073, "step": 28270 }, { "epoch": 1.921116999592336, "grad_norm": 1.8314423561096191, "learning_rate": 0.0007599453050686235, "loss": 3.614, "step": 28275 }, { "epoch": 1.9214567196629977, "grad_norm": 1.6748114824295044, "learning_rate": 0.0007599028400597907, "loss": 3.4161, "step": 28280 }, { "epoch": 1.9217964397336593, "grad_norm": 2.1523444652557373, "learning_rate": 0.0007598603750509581, "loss": 3.6553, "step": 28285 }, { "epoch": 1.9221361598043214, "grad_norm": 1.4952075481414795, "learning_rate": 0.0007598179100421253, "loss": 3.7, "step": 28290 }, { "epoch": 1.922475879874983, "grad_norm": 1.8015819787979126, "learning_rate": 0.0007597754450332925, "loss": 3.479, "step": 28295 }, { "epoch": 1.9228155999456447, "grad_norm": 1.847423791885376, "learning_rate": 0.0007597329800244599, "loss": 3.7572, "step": 28300 }, { "epoch": 1.9231553200163065, "grad_norm": 1.4192098379135132, "learning_rate": 0.0007596905150156272, "loss": 3.6658, "step": 28305 }, { "epoch": 1.9234950400869684, "grad_norm": 2.14261531829834, "learning_rate": 0.0007596480500067944, "loss": 3.6089, "step": 28310 }, { "epoch": 1.92383476015763, "grad_norm": 1.5160212516784668, "learning_rate": 0.0007596055849979618, "loss": 3.8181, "step": 28315 }, { "epoch": 1.9241744802282919, "grad_norm": 1.8384560346603394, "learning_rate": 0.000759563119989129, "loss": 3.5951, "step": 28320 }, { "epoch": 1.9245142002989537, "grad_norm": 1.851015329360962, "learning_rate": 0.0007595206549802962, "loss": 3.5664, "step": 28325 }, { "epoch": 1.9248539203696153, "grad_norm": 2.1799726486206055, "learning_rate": 0.0007594781899714635, "loss": 3.4986, "step": 28330 }, { "epoch": 1.9251936404402772, "grad_norm": 1.905552864074707, "learning_rate": 0.0007594357249626308, "loss": 3.595, "step": 28335 }, { "epoch": 1.925533360510939, "grad_norm": 2.3157501220703125, "learning_rate": 0.0007593932599537981, "loss": 3.6446, "step": 28340 }, { "epoch": 1.9258730805816007, "grad_norm": 2.3993630409240723, "learning_rate": 0.0007593507949449654, "loss": 3.4938, "step": 28345 }, { "epoch": 1.9262128006522625, "grad_norm": 1.8637268543243408, "learning_rate": 0.0007593083299361327, "loss": 3.5385, "step": 28350 }, { "epoch": 1.9265525207229244, "grad_norm": 1.9077672958374023, "learning_rate": 0.0007592658649272999, "loss": 3.3377, "step": 28355 }, { "epoch": 1.926892240793586, "grad_norm": 2.0901923179626465, "learning_rate": 0.0007592233999184672, "loss": 3.4485, "step": 28360 }, { "epoch": 1.9272319608642479, "grad_norm": 1.774741291999817, "learning_rate": 0.0007591809349096344, "loss": 3.5201, "step": 28365 }, { "epoch": 1.9275716809349097, "grad_norm": 1.516284465789795, "learning_rate": 0.0007591384699008017, "loss": 3.4041, "step": 28370 }, { "epoch": 1.9279114010055713, "grad_norm": 1.827627182006836, "learning_rate": 0.0007590960048919691, "loss": 3.4638, "step": 28375 }, { "epoch": 1.9282511210762332, "grad_norm": 2.27081298828125, "learning_rate": 0.0007590535398831363, "loss": 3.5166, "step": 28380 }, { "epoch": 1.928590841146895, "grad_norm": 1.5627328157424927, "learning_rate": 0.0007590110748743036, "loss": 3.3328, "step": 28385 }, { "epoch": 1.9289305612175567, "grad_norm": 2.2403078079223633, "learning_rate": 0.0007589686098654709, "loss": 3.6912, "step": 28390 }, { "epoch": 1.9292702812882185, "grad_norm": 1.8961644172668457, "learning_rate": 0.0007589261448566381, "loss": 3.5592, "step": 28395 }, { "epoch": 1.9296100013588804, "grad_norm": 2.175323009490967, "learning_rate": 0.0007588836798478053, "loss": 3.5682, "step": 28400 }, { "epoch": 1.929949721429542, "grad_norm": 1.873952031135559, "learning_rate": 0.0007588412148389727, "loss": 3.5682, "step": 28405 }, { "epoch": 1.9302894415002039, "grad_norm": 1.6054152250289917, "learning_rate": 0.00075879874983014, "loss": 3.4919, "step": 28410 }, { "epoch": 1.9306291615708657, "grad_norm": 2.051041841506958, "learning_rate": 0.0007587562848213072, "loss": 3.2253, "step": 28415 }, { "epoch": 1.9309688816415274, "grad_norm": 1.7479276657104492, "learning_rate": 0.0007587138198124746, "loss": 3.6674, "step": 28420 }, { "epoch": 1.9313086017121892, "grad_norm": 1.8047852516174316, "learning_rate": 0.0007586713548036418, "loss": 3.5146, "step": 28425 }, { "epoch": 1.931648321782851, "grad_norm": 2.0750033855438232, "learning_rate": 0.000758628889794809, "loss": 3.5138, "step": 28430 }, { "epoch": 1.9319880418535127, "grad_norm": 2.378443956375122, "learning_rate": 0.0007585864247859764, "loss": 3.5464, "step": 28435 }, { "epoch": 1.9323277619241743, "grad_norm": 2.2160730361938477, "learning_rate": 0.0007585439597771436, "loss": 3.5508, "step": 28440 }, { "epoch": 1.9326674819948364, "grad_norm": 1.8968526124954224, "learning_rate": 0.0007585014947683109, "loss": 3.5578, "step": 28445 }, { "epoch": 1.933007202065498, "grad_norm": 2.1503114700317383, "learning_rate": 0.0007584590297594783, "loss": 3.6001, "step": 28450 }, { "epoch": 1.9333469221361597, "grad_norm": 1.7196650505065918, "learning_rate": 0.0007584165647506455, "loss": 3.5613, "step": 28455 }, { "epoch": 1.9336866422068217, "grad_norm": 1.421899676322937, "learning_rate": 0.0007583740997418128, "loss": 3.6464, "step": 28460 }, { "epoch": 1.9340263622774834, "grad_norm": 1.99983811378479, "learning_rate": 0.00075833163473298, "loss": 3.646, "step": 28465 }, { "epoch": 1.934366082348145, "grad_norm": 1.6598037481307983, "learning_rate": 0.0007582891697241473, "loss": 3.322, "step": 28470 }, { "epoch": 1.9347058024188069, "grad_norm": 2.020644187927246, "learning_rate": 0.0007582467047153146, "loss": 3.7728, "step": 28475 }, { "epoch": 1.9350455224894687, "grad_norm": 1.4881832599639893, "learning_rate": 0.0007582042397064819, "loss": 3.6252, "step": 28480 }, { "epoch": 1.9353852425601303, "grad_norm": 2.073995351791382, "learning_rate": 0.0007581617746976492, "loss": 3.5419, "step": 28485 }, { "epoch": 1.9357249626307922, "grad_norm": 1.458837866783142, "learning_rate": 0.0007581193096888165, "loss": 3.3504, "step": 28490 }, { "epoch": 1.936064682701454, "grad_norm": 2.249512195587158, "learning_rate": 0.0007580768446799837, "loss": 3.5502, "step": 28495 }, { "epoch": 1.9364044027721157, "grad_norm": 1.9204119443893433, "learning_rate": 0.000758034379671151, "loss": 3.5526, "step": 28500 }, { "epoch": 1.9367441228427775, "grad_norm": 1.7445834875106812, "learning_rate": 0.0007579919146623183, "loss": 3.7304, "step": 28505 }, { "epoch": 1.9370838429134394, "grad_norm": 1.7977787256240845, "learning_rate": 0.0007579494496534855, "loss": 3.4463, "step": 28510 }, { "epoch": 1.937423562984101, "grad_norm": 2.1395652294158936, "learning_rate": 0.0007579069846446528, "loss": 3.5544, "step": 28515 }, { "epoch": 1.9377632830547629, "grad_norm": 2.1537935733795166, "learning_rate": 0.0007578645196358202, "loss": 3.6015, "step": 28520 }, { "epoch": 1.9381030031254247, "grad_norm": 1.7668956518173218, "learning_rate": 0.0007578220546269874, "loss": 3.5944, "step": 28525 }, { "epoch": 1.9384427231960863, "grad_norm": 1.655014991760254, "learning_rate": 0.0007577795896181546, "loss": 3.5555, "step": 28530 }, { "epoch": 1.9387824432667482, "grad_norm": 1.7464497089385986, "learning_rate": 0.000757737124609322, "loss": 3.5065, "step": 28535 }, { "epoch": 1.93912216333741, "grad_norm": 2.788832902908325, "learning_rate": 0.0007576946596004892, "loss": 3.5511, "step": 28540 }, { "epoch": 1.9394618834080717, "grad_norm": 1.9988064765930176, "learning_rate": 0.0007576521945916564, "loss": 3.5881, "step": 28545 }, { "epoch": 1.9398016034787335, "grad_norm": 1.9149843454360962, "learning_rate": 0.0007576097295828239, "loss": 3.6324, "step": 28550 }, { "epoch": 1.9401413235493954, "grad_norm": 1.7770277261734009, "learning_rate": 0.0007575672645739911, "loss": 3.4248, "step": 28555 }, { "epoch": 1.940481043620057, "grad_norm": 1.9440799951553345, "learning_rate": 0.0007575247995651583, "loss": 3.6937, "step": 28560 }, { "epoch": 1.9408207636907189, "grad_norm": 1.553069829940796, "learning_rate": 0.0007574823345563256, "loss": 3.4266, "step": 28565 }, { "epoch": 1.9411604837613807, "grad_norm": 2.0355639457702637, "learning_rate": 0.0007574398695474929, "loss": 3.5902, "step": 28570 }, { "epoch": 1.9415002038320424, "grad_norm": 1.9888957738876343, "learning_rate": 0.0007573974045386601, "loss": 3.2683, "step": 28575 }, { "epoch": 1.9418399239027042, "grad_norm": 1.9671660661697388, "learning_rate": 0.0007573549395298274, "loss": 3.6206, "step": 28580 }, { "epoch": 1.942179643973366, "grad_norm": 1.5865730047225952, "learning_rate": 0.0007573124745209948, "loss": 3.8099, "step": 28585 }, { "epoch": 1.9425193640440277, "grad_norm": 2.07572078704834, "learning_rate": 0.000757270009512162, "loss": 3.5342, "step": 28590 }, { "epoch": 1.9428590841146895, "grad_norm": 1.8833776712417603, "learning_rate": 0.0007572275445033293, "loss": 3.4821, "step": 28595 }, { "epoch": 1.9431988041853514, "grad_norm": 2.3456475734710693, "learning_rate": 0.0007571850794944966, "loss": 3.2746, "step": 28600 }, { "epoch": 1.943538524256013, "grad_norm": 1.9327313899993896, "learning_rate": 0.0007571426144856638, "loss": 3.4617, "step": 28605 }, { "epoch": 1.9438782443266747, "grad_norm": 2.032075881958008, "learning_rate": 0.0007571001494768311, "loss": 3.4481, "step": 28610 }, { "epoch": 1.9442179643973367, "grad_norm": 1.7196701765060425, "learning_rate": 0.0007570576844679983, "loss": 3.4893, "step": 28615 }, { "epoch": 1.9445576844679984, "grad_norm": 2.5035388469696045, "learning_rate": 0.0007570152194591657, "loss": 3.1272, "step": 28620 }, { "epoch": 1.94489740453866, "grad_norm": 2.0326919555664062, "learning_rate": 0.000756972754450333, "loss": 3.5014, "step": 28625 }, { "epoch": 1.945237124609322, "grad_norm": 1.7259987592697144, "learning_rate": 0.0007569302894415002, "loss": 3.5067, "step": 28630 }, { "epoch": 1.9455768446799837, "grad_norm": 1.8719161748886108, "learning_rate": 0.0007568878244326675, "loss": 3.4449, "step": 28635 }, { "epoch": 1.9459165647506453, "grad_norm": 1.7607271671295166, "learning_rate": 0.0007568453594238348, "loss": 3.4163, "step": 28640 }, { "epoch": 1.9462562848213072, "grad_norm": 2.055516242980957, "learning_rate": 0.000756802894415002, "loss": 3.5532, "step": 28645 }, { "epoch": 1.946596004891969, "grad_norm": 1.8951704502105713, "learning_rate": 0.0007567604294061692, "loss": 3.2273, "step": 28650 }, { "epoch": 1.9469357249626307, "grad_norm": 2.303652763366699, "learning_rate": 0.0007567179643973367, "loss": 3.6645, "step": 28655 }, { "epoch": 1.9472754450332925, "grad_norm": 2.305184841156006, "learning_rate": 0.0007566754993885039, "loss": 3.7632, "step": 28660 }, { "epoch": 1.9476151651039544, "grad_norm": 2.2297067642211914, "learning_rate": 0.0007566330343796711, "loss": 3.369, "step": 28665 }, { "epoch": 1.947954885174616, "grad_norm": 2.3588955402374268, "learning_rate": 0.0007565905693708385, "loss": 3.4572, "step": 28670 }, { "epoch": 1.9482946052452779, "grad_norm": 2.294879198074341, "learning_rate": 0.0007565481043620057, "loss": 3.2797, "step": 28675 }, { "epoch": 1.9486343253159397, "grad_norm": 1.7897660732269287, "learning_rate": 0.0007565056393531729, "loss": 3.7153, "step": 28680 }, { "epoch": 1.9489740453866014, "grad_norm": 1.95235013961792, "learning_rate": 0.0007564631743443403, "loss": 3.6701, "step": 28685 }, { "epoch": 1.9493137654572632, "grad_norm": 1.6529935598373413, "learning_rate": 0.0007564207093355076, "loss": 3.5324, "step": 28690 }, { "epoch": 1.949653485527925, "grad_norm": 1.7084274291992188, "learning_rate": 0.0007563782443266748, "loss": 3.5804, "step": 28695 }, { "epoch": 1.9499932055985867, "grad_norm": 2.6649410724639893, "learning_rate": 0.0007563357793178422, "loss": 3.5501, "step": 28700 }, { "epoch": 1.9503329256692485, "grad_norm": 1.6774508953094482, "learning_rate": 0.0007562933143090094, "loss": 3.4365, "step": 28705 }, { "epoch": 1.9506726457399104, "grad_norm": 2.3270039558410645, "learning_rate": 0.0007562508493001766, "loss": 3.7982, "step": 28710 }, { "epoch": 1.951012365810572, "grad_norm": 2.022951126098633, "learning_rate": 0.0007562083842913439, "loss": 3.6423, "step": 28715 }, { "epoch": 1.9513520858812339, "grad_norm": 1.731388807296753, "learning_rate": 0.0007561659192825112, "loss": 3.5857, "step": 28720 }, { "epoch": 1.9516918059518957, "grad_norm": 1.8960299491882324, "learning_rate": 0.0007561234542736785, "loss": 3.5687, "step": 28725 }, { "epoch": 1.9520315260225574, "grad_norm": 2.4986581802368164, "learning_rate": 0.0007560809892648458, "loss": 3.6629, "step": 28730 }, { "epoch": 1.9523712460932192, "grad_norm": 1.861708164215088, "learning_rate": 0.0007560385242560131, "loss": 3.7079, "step": 28735 }, { "epoch": 1.952710966163881, "grad_norm": 1.7683384418487549, "learning_rate": 0.0007559960592471803, "loss": 3.6523, "step": 28740 }, { "epoch": 1.9530506862345427, "grad_norm": 1.8964989185333252, "learning_rate": 0.0007559535942383476, "loss": 3.7326, "step": 28745 }, { "epoch": 1.9533904063052046, "grad_norm": 1.8964247703552246, "learning_rate": 0.0007559111292295148, "loss": 3.4772, "step": 28750 }, { "epoch": 1.9537301263758664, "grad_norm": 1.8667237758636475, "learning_rate": 0.0007558686642206821, "loss": 3.6133, "step": 28755 }, { "epoch": 1.954069846446528, "grad_norm": 1.7939597368240356, "learning_rate": 0.0007558261992118495, "loss": 3.6426, "step": 28760 }, { "epoch": 1.95440956651719, "grad_norm": 2.463562250137329, "learning_rate": 0.0007557837342030167, "loss": 3.6356, "step": 28765 }, { "epoch": 1.9547492865878517, "grad_norm": 2.17333984375, "learning_rate": 0.000755741269194184, "loss": 3.6152, "step": 28770 }, { "epoch": 1.9550890066585134, "grad_norm": 2.124601125717163, "learning_rate": 0.0007556988041853513, "loss": 3.4875, "step": 28775 }, { "epoch": 1.955428726729175, "grad_norm": 1.7632100582122803, "learning_rate": 0.0007556563391765185, "loss": 3.6744, "step": 28780 }, { "epoch": 1.955768446799837, "grad_norm": 2.292250394821167, "learning_rate": 0.0007556138741676858, "loss": 3.4933, "step": 28785 }, { "epoch": 1.9561081668704987, "grad_norm": 1.723986029624939, "learning_rate": 0.0007555714091588531, "loss": 3.6932, "step": 28790 }, { "epoch": 1.9564478869411603, "grad_norm": 2.0136196613311768, "learning_rate": 0.0007555289441500204, "loss": 3.5297, "step": 28795 }, { "epoch": 1.9567876070118224, "grad_norm": 1.5911043882369995, "learning_rate": 0.0007554864791411878, "loss": 3.4815, "step": 28800 }, { "epoch": 1.957127327082484, "grad_norm": 1.9275718927383423, "learning_rate": 0.000755444014132355, "loss": 3.2694, "step": 28805 }, { "epoch": 1.9574670471531457, "grad_norm": 2.158365249633789, "learning_rate": 0.0007554015491235222, "loss": 3.5819, "step": 28810 }, { "epoch": 1.9578067672238075, "grad_norm": 2.0875864028930664, "learning_rate": 0.0007553590841146895, "loss": 3.456, "step": 28815 }, { "epoch": 1.9581464872944694, "grad_norm": 1.8298521041870117, "learning_rate": 0.0007553166191058568, "loss": 3.5296, "step": 28820 }, { "epoch": 1.958486207365131, "grad_norm": 1.8438900709152222, "learning_rate": 0.000755274154097024, "loss": 3.4845, "step": 28825 }, { "epoch": 1.9588259274357929, "grad_norm": 2.5920352935791016, "learning_rate": 0.0007552316890881914, "loss": 3.4675, "step": 28830 }, { "epoch": 1.9591656475064547, "grad_norm": 2.399197816848755, "learning_rate": 0.0007551892240793587, "loss": 3.4275, "step": 28835 }, { "epoch": 1.9595053675771164, "grad_norm": 1.7226322889328003, "learning_rate": 0.0007551467590705259, "loss": 3.6601, "step": 28840 }, { "epoch": 1.9598450876477782, "grad_norm": 1.7449640035629272, "learning_rate": 0.0007551042940616932, "loss": 3.6092, "step": 28845 }, { "epoch": 1.96018480771844, "grad_norm": 2.5219874382019043, "learning_rate": 0.0007550618290528604, "loss": 3.5632, "step": 28850 }, { "epoch": 1.9605245277891017, "grad_norm": 1.5778591632843018, "learning_rate": 0.0007550193640440277, "loss": 3.5693, "step": 28855 }, { "epoch": 1.9608642478597635, "grad_norm": 1.9485149383544922, "learning_rate": 0.0007549768990351951, "loss": 3.6425, "step": 28860 }, { "epoch": 1.9612039679304254, "grad_norm": 1.5361815690994263, "learning_rate": 0.0007549344340263623, "loss": 3.5838, "step": 28865 }, { "epoch": 1.961543688001087, "grad_norm": 1.8980302810668945, "learning_rate": 0.0007548919690175296, "loss": 3.5268, "step": 28870 }, { "epoch": 1.9618834080717489, "grad_norm": 1.9017845392227173, "learning_rate": 0.0007548495040086969, "loss": 3.7575, "step": 28875 }, { "epoch": 1.9622231281424107, "grad_norm": 1.3959194421768188, "learning_rate": 0.0007548070389998641, "loss": 3.5146, "step": 28880 }, { "epoch": 1.9625628482130724, "grad_norm": 2.007405996322632, "learning_rate": 0.0007547645739910314, "loss": 3.3596, "step": 28885 }, { "epoch": 1.9629025682837342, "grad_norm": 1.7025660276412964, "learning_rate": 0.0007547221089821987, "loss": 3.4521, "step": 28890 }, { "epoch": 1.963242288354396, "grad_norm": 2.258234977722168, "learning_rate": 0.000754679643973366, "loss": 3.4873, "step": 28895 }, { "epoch": 1.9635820084250577, "grad_norm": 2.017670154571533, "learning_rate": 0.0007546371789645332, "loss": 3.4467, "step": 28900 }, { "epoch": 1.9639217284957196, "grad_norm": 2.6123709678649902, "learning_rate": 0.0007545947139557006, "loss": 3.785, "step": 28905 }, { "epoch": 1.9642614485663814, "grad_norm": 1.9019464254379272, "learning_rate": 0.0007545522489468678, "loss": 3.3439, "step": 28910 }, { "epoch": 1.964601168637043, "grad_norm": 1.9193705320358276, "learning_rate": 0.000754509783938035, "loss": 3.3862, "step": 28915 }, { "epoch": 1.964940888707705, "grad_norm": 1.6800028085708618, "learning_rate": 0.0007544673189292024, "loss": 3.7504, "step": 28920 }, { "epoch": 1.9652806087783667, "grad_norm": 1.7145472764968872, "learning_rate": 0.0007544248539203696, "loss": 3.4736, "step": 28925 }, { "epoch": 1.9656203288490284, "grad_norm": 2.188082695007324, "learning_rate": 0.0007543823889115369, "loss": 3.5295, "step": 28930 }, { "epoch": 1.9659600489196902, "grad_norm": 1.818285584449768, "learning_rate": 0.0007543399239027043, "loss": 3.4442, "step": 28935 }, { "epoch": 1.966299768990352, "grad_norm": 1.7168283462524414, "learning_rate": 0.0007542974588938715, "loss": 3.5333, "step": 28940 }, { "epoch": 1.9666394890610137, "grad_norm": 1.56576406955719, "learning_rate": 0.0007542549938850387, "loss": 3.6783, "step": 28945 }, { "epoch": 1.9669792091316753, "grad_norm": 2.633837938308716, "learning_rate": 0.000754212528876206, "loss": 3.7586, "step": 28950 }, { "epoch": 1.9673189292023374, "grad_norm": 2.4145548343658447, "learning_rate": 0.0007541700638673733, "loss": 3.4998, "step": 28955 }, { "epoch": 1.967658649272999, "grad_norm": 2.0024096965789795, "learning_rate": 0.0007541275988585405, "loss": 3.3117, "step": 28960 }, { "epoch": 1.9679983693436607, "grad_norm": 2.274745464324951, "learning_rate": 0.0007540851338497079, "loss": 3.7961, "step": 28965 }, { "epoch": 1.9683380894143228, "grad_norm": 2.3911356925964355, "learning_rate": 0.0007540426688408752, "loss": 3.6142, "step": 28970 }, { "epoch": 1.9686778094849844, "grad_norm": 2.2035274505615234, "learning_rate": 0.0007540002038320424, "loss": 3.293, "step": 28975 }, { "epoch": 1.969017529555646, "grad_norm": 1.8141746520996094, "learning_rate": 0.0007539577388232097, "loss": 3.3568, "step": 28980 }, { "epoch": 1.9693572496263079, "grad_norm": 2.431227445602417, "learning_rate": 0.000753915273814377, "loss": 3.4656, "step": 28985 }, { "epoch": 1.9696969696969697, "grad_norm": 1.917757272720337, "learning_rate": 0.0007538728088055442, "loss": 3.4849, "step": 28990 }, { "epoch": 1.9700366897676314, "grad_norm": 1.791429042816162, "learning_rate": 0.0007538303437967115, "loss": 3.3242, "step": 28995 }, { "epoch": 1.9703764098382932, "grad_norm": 1.8620473146438599, "learning_rate": 0.0007537878787878788, "loss": 3.5807, "step": 29000 }, { "epoch": 1.970716129908955, "grad_norm": 1.6148217916488647, "learning_rate": 0.0007537454137790461, "loss": 3.5006, "step": 29005 }, { "epoch": 1.9710558499796167, "grad_norm": 1.6159456968307495, "learning_rate": 0.0007537029487702134, "loss": 3.5477, "step": 29010 }, { "epoch": 1.9713955700502785, "grad_norm": 1.8861982822418213, "learning_rate": 0.0007536604837613806, "loss": 3.6466, "step": 29015 }, { "epoch": 1.9717352901209404, "grad_norm": 2.0305590629577637, "learning_rate": 0.0007536180187525479, "loss": 3.6395, "step": 29020 }, { "epoch": 1.972075010191602, "grad_norm": 1.8751572370529175, "learning_rate": 0.0007535755537437152, "loss": 3.6263, "step": 29025 }, { "epoch": 1.9724147302622639, "grad_norm": 2.5341989994049072, "learning_rate": 0.0007535330887348824, "loss": 3.7539, "step": 29030 }, { "epoch": 1.9727544503329257, "grad_norm": 2.24562931060791, "learning_rate": 0.0007534906237260498, "loss": 3.618, "step": 29035 }, { "epoch": 1.9730941704035874, "grad_norm": 1.8629150390625, "learning_rate": 0.0007534481587172171, "loss": 3.5527, "step": 29040 }, { "epoch": 1.9734338904742492, "grad_norm": 2.1426773071289062, "learning_rate": 0.0007534056937083843, "loss": 3.4526, "step": 29045 }, { "epoch": 1.973773610544911, "grad_norm": 2.4190216064453125, "learning_rate": 0.0007533632286995515, "loss": 3.5843, "step": 29050 }, { "epoch": 1.9741133306155727, "grad_norm": 2.365102529525757, "learning_rate": 0.0007533207636907189, "loss": 3.7991, "step": 29055 }, { "epoch": 1.9744530506862346, "grad_norm": 1.9239652156829834, "learning_rate": 0.0007532782986818861, "loss": 3.5544, "step": 29060 }, { "epoch": 1.9747927707568964, "grad_norm": 2.2655372619628906, "learning_rate": 0.0007532358336730533, "loss": 3.8223, "step": 29065 }, { "epoch": 1.975132490827558, "grad_norm": 1.9502642154693604, "learning_rate": 0.0007531933686642208, "loss": 3.5376, "step": 29070 }, { "epoch": 1.97547221089822, "grad_norm": 1.9787517786026, "learning_rate": 0.000753150903655388, "loss": 3.4854, "step": 29075 }, { "epoch": 1.9758119309688817, "grad_norm": 2.047438383102417, "learning_rate": 0.0007531084386465552, "loss": 3.4157, "step": 29080 }, { "epoch": 1.9761516510395434, "grad_norm": 1.7394310235977173, "learning_rate": 0.0007530659736377226, "loss": 3.5927, "step": 29085 }, { "epoch": 1.9764913711102052, "grad_norm": 2.0294618606567383, "learning_rate": 0.0007530235086288898, "loss": 3.5234, "step": 29090 }, { "epoch": 1.976831091180867, "grad_norm": 1.8053058385849, "learning_rate": 0.000752981043620057, "loss": 3.5837, "step": 29095 }, { "epoch": 1.9771708112515287, "grad_norm": 1.7532650232315063, "learning_rate": 0.0007529385786112243, "loss": 3.3219, "step": 29100 }, { "epoch": 1.9775105313221906, "grad_norm": 1.4941080808639526, "learning_rate": 0.0007528961136023917, "loss": 3.7626, "step": 29105 }, { "epoch": 1.9778502513928524, "grad_norm": 2.2670509815216064, "learning_rate": 0.0007528536485935589, "loss": 3.353, "step": 29110 }, { "epoch": 1.978189971463514, "grad_norm": 1.5637143850326538, "learning_rate": 0.0007528111835847262, "loss": 3.5123, "step": 29115 }, { "epoch": 1.9785296915341757, "grad_norm": 2.128967523574829, "learning_rate": 0.0007527687185758935, "loss": 3.617, "step": 29120 }, { "epoch": 1.9788694116048378, "grad_norm": 1.931474208831787, "learning_rate": 0.0007527262535670607, "loss": 3.7643, "step": 29125 }, { "epoch": 1.9792091316754994, "grad_norm": 2.0021188259124756, "learning_rate": 0.000752683788558228, "loss": 3.6429, "step": 29130 }, { "epoch": 1.979548851746161, "grad_norm": 2.0161354541778564, "learning_rate": 0.0007526413235493952, "loss": 3.6609, "step": 29135 }, { "epoch": 1.979888571816823, "grad_norm": 2.3659608364105225, "learning_rate": 0.0007525988585405627, "loss": 3.3591, "step": 29140 }, { "epoch": 1.9802282918874847, "grad_norm": 1.5043543577194214, "learning_rate": 0.0007525563935317299, "loss": 3.5788, "step": 29145 }, { "epoch": 1.9805680119581464, "grad_norm": 1.9235931634902954, "learning_rate": 0.0007525139285228971, "loss": 3.5646, "step": 29150 }, { "epoch": 1.9809077320288082, "grad_norm": 2.0343010425567627, "learning_rate": 0.0007524714635140645, "loss": 3.4408, "step": 29155 }, { "epoch": 1.98124745209947, "grad_norm": 2.431312084197998, "learning_rate": 0.0007524289985052317, "loss": 3.5626, "step": 29160 }, { "epoch": 1.9815871721701317, "grad_norm": 2.248112916946411, "learning_rate": 0.0007523950264981655, "loss": 3.3483, "step": 29165 }, { "epoch": 1.9819268922407935, "grad_norm": 2.2379894256591797, "learning_rate": 0.0007523525614893329, "loss": 3.7009, "step": 29170 }, { "epoch": 1.9822666123114554, "grad_norm": 1.5641663074493408, "learning_rate": 0.0007523100964805001, "loss": 3.7592, "step": 29175 }, { "epoch": 1.982606332382117, "grad_norm": 1.902764916419983, "learning_rate": 0.0007522676314716673, "loss": 3.4273, "step": 29180 }, { "epoch": 1.9829460524527789, "grad_norm": 1.7651537656784058, "learning_rate": 0.0007522251664628346, "loss": 3.6427, "step": 29185 }, { "epoch": 1.9832857725234407, "grad_norm": 2.3310766220092773, "learning_rate": 0.0007521827014540019, "loss": 3.7154, "step": 29190 }, { "epoch": 1.9836254925941024, "grad_norm": 2.1901087760925293, "learning_rate": 0.0007521402364451692, "loss": 3.6827, "step": 29195 }, { "epoch": 1.9839652126647642, "grad_norm": 1.980005145072937, "learning_rate": 0.0007520977714363365, "loss": 3.6978, "step": 29200 }, { "epoch": 1.984304932735426, "grad_norm": 1.707291841506958, "learning_rate": 0.0007520553064275038, "loss": 3.5326, "step": 29205 }, { "epoch": 1.9846446528060877, "grad_norm": 1.8333523273468018, "learning_rate": 0.000752012841418671, "loss": 3.2974, "step": 29210 }, { "epoch": 1.9849843728767496, "grad_norm": 2.0814261436462402, "learning_rate": 0.0007519703764098383, "loss": 3.6697, "step": 29215 }, { "epoch": 1.9853240929474114, "grad_norm": 2.5821640491485596, "learning_rate": 0.0007519279114010056, "loss": 3.3336, "step": 29220 }, { "epoch": 1.985663813018073, "grad_norm": 2.109795331954956, "learning_rate": 0.0007518854463921728, "loss": 3.6146, "step": 29225 }, { "epoch": 1.986003533088735, "grad_norm": 2.0932857990264893, "learning_rate": 0.0007518429813833402, "loss": 3.6056, "step": 29230 }, { "epoch": 1.9863432531593967, "grad_norm": 1.6833833456039429, "learning_rate": 0.0007518005163745074, "loss": 3.4467, "step": 29235 }, { "epoch": 1.9866829732300584, "grad_norm": 2.5543699264526367, "learning_rate": 0.0007517580513656747, "loss": 3.3901, "step": 29240 }, { "epoch": 1.9870226933007202, "grad_norm": 2.062884569168091, "learning_rate": 0.000751715586356842, "loss": 3.6753, "step": 29245 }, { "epoch": 1.987362413371382, "grad_norm": 1.8369117975234985, "learning_rate": 0.0007516731213480092, "loss": 3.6053, "step": 29250 }, { "epoch": 1.9877021334420437, "grad_norm": 2.355513572692871, "learning_rate": 0.0007516306563391765, "loss": 3.4892, "step": 29255 }, { "epoch": 1.9880418535127056, "grad_norm": 1.8547275066375732, "learning_rate": 0.0007515881913303438, "loss": 3.672, "step": 29260 }, { "epoch": 1.9883815735833674, "grad_norm": 2.121372938156128, "learning_rate": 0.0007515457263215111, "loss": 3.3587, "step": 29265 }, { "epoch": 1.988721293654029, "grad_norm": 1.6615222692489624, "learning_rate": 0.0007515032613126784, "loss": 3.5121, "step": 29270 }, { "epoch": 1.989061013724691, "grad_norm": 1.902091145515442, "learning_rate": 0.0007514607963038457, "loss": 3.3307, "step": 29275 }, { "epoch": 1.9894007337953528, "grad_norm": 1.568825364112854, "learning_rate": 0.0007514183312950129, "loss": 3.4552, "step": 29280 }, { "epoch": 1.9897404538660144, "grad_norm": 2.5182738304138184, "learning_rate": 0.0007513758662861801, "loss": 3.5882, "step": 29285 }, { "epoch": 1.990080173936676, "grad_norm": 1.5656875371932983, "learning_rate": 0.0007513334012773475, "loss": 3.4812, "step": 29290 }, { "epoch": 1.990419894007338, "grad_norm": 1.5409862995147705, "learning_rate": 0.0007512909362685147, "loss": 3.5452, "step": 29295 }, { "epoch": 1.9907596140779997, "grad_norm": 1.8573204278945923, "learning_rate": 0.000751248471259682, "loss": 3.7327, "step": 29300 }, { "epoch": 1.9910993341486614, "grad_norm": 1.679760217666626, "learning_rate": 0.0007512060062508494, "loss": 3.4503, "step": 29305 }, { "epoch": 1.9914390542193234, "grad_norm": 1.466003656387329, "learning_rate": 0.0007511635412420166, "loss": 3.5572, "step": 29310 }, { "epoch": 1.991778774289985, "grad_norm": 1.6113601922988892, "learning_rate": 0.0007511210762331838, "loss": 3.3862, "step": 29315 }, { "epoch": 1.9921184943606467, "grad_norm": 1.458174467086792, "learning_rate": 0.0007510786112243512, "loss": 3.5036, "step": 29320 }, { "epoch": 1.9924582144313085, "grad_norm": 2.1704609394073486, "learning_rate": 0.0007510361462155184, "loss": 3.6621, "step": 29325 }, { "epoch": 1.9927979345019704, "grad_norm": 1.6312795877456665, "learning_rate": 0.0007509936812066856, "loss": 3.2935, "step": 29330 }, { "epoch": 1.993137654572632, "grad_norm": 2.043511152267456, "learning_rate": 0.000750951216197853, "loss": 3.5122, "step": 29335 }, { "epoch": 1.9934773746432939, "grad_norm": 1.8705493211746216, "learning_rate": 0.0007509087511890203, "loss": 3.7759, "step": 29340 }, { "epoch": 1.9938170947139557, "grad_norm": 2.0898756980895996, "learning_rate": 0.0007508662861801876, "loss": 3.2773, "step": 29345 }, { "epoch": 1.9941568147846174, "grad_norm": 1.6628000736236572, "learning_rate": 0.0007508238211713548, "loss": 3.7648, "step": 29350 }, { "epoch": 1.9944965348552792, "grad_norm": 1.906299352645874, "learning_rate": 0.0007507813561625221, "loss": 3.4073, "step": 29355 }, { "epoch": 1.994836254925941, "grad_norm": 3.0363376140594482, "learning_rate": 0.0007507388911536894, "loss": 3.7363, "step": 29360 }, { "epoch": 1.9951759749966027, "grad_norm": 1.8027880191802979, "learning_rate": 0.0007506964261448566, "loss": 3.5643, "step": 29365 }, { "epoch": 1.9955156950672646, "grad_norm": 1.9639673233032227, "learning_rate": 0.000750653961136024, "loss": 3.6097, "step": 29370 }, { "epoch": 1.9958554151379264, "grad_norm": 1.9833250045776367, "learning_rate": 0.0007506114961271913, "loss": 3.5809, "step": 29375 }, { "epoch": 1.996195135208588, "grad_norm": 1.4519531726837158, "learning_rate": 0.0007505690311183585, "loss": 3.6537, "step": 29380 }, { "epoch": 1.99653485527925, "grad_norm": 2.5220232009887695, "learning_rate": 0.0007505265661095257, "loss": 3.5642, "step": 29385 }, { "epoch": 1.9968745753499118, "grad_norm": 1.9467740058898926, "learning_rate": 0.0007504841011006931, "loss": 3.4298, "step": 29390 }, { "epoch": 1.9972142954205734, "grad_norm": 1.4719994068145752, "learning_rate": 0.0007504416360918603, "loss": 3.6378, "step": 29395 }, { "epoch": 1.9975540154912352, "grad_norm": 2.037175178527832, "learning_rate": 0.0007503991710830275, "loss": 3.7117, "step": 29400 }, { "epoch": 1.997893735561897, "grad_norm": 1.8172155618667603, "learning_rate": 0.000750356706074195, "loss": 3.619, "step": 29405 }, { "epoch": 1.9982334556325587, "grad_norm": 1.849860429763794, "learning_rate": 0.0007503142410653622, "loss": 3.537, "step": 29410 }, { "epoch": 1.9985731757032206, "grad_norm": 2.2396528720855713, "learning_rate": 0.0007502717760565294, "loss": 3.4417, "step": 29415 }, { "epoch": 1.9989128957738824, "grad_norm": 2.1252148151397705, "learning_rate": 0.0007502293110476968, "loss": 3.6664, "step": 29420 }, { "epoch": 1.999252615844544, "grad_norm": 1.6592305898666382, "learning_rate": 0.000750186846038864, "loss": 3.5278, "step": 29425 }, { "epoch": 1.999592335915206, "grad_norm": 1.792149305343628, "learning_rate": 0.0007501443810300312, "loss": 3.5551, "step": 29430 }, { "epoch": 1.9999320559858678, "grad_norm": 1.665282130241394, "learning_rate": 0.0007501019160211985, "loss": 3.5893, "step": 29435 }, { "epoch": 2.0, "eval_bertscore": { "f1": 0.8269799677065748, "precision": 0.8278645145152588, "recall": 0.8296164700037426 }, "eval_bleu_4": 0.003442206200937784, "eval_exact_match": 0.0, "eval_loss": 3.48945689201355, "eval_meteor": 0.07679834377204181, "eval_rouge": { "rouge1": 0.11364371460054923, "rouge2": 0.012708733404032332, "rougeL": 0.09638231625886695, "rougeLsum": 0.09637940811843684 }, "eval_runtime": 1966.8601, "eval_samples_per_second": 5.246, "eval_steps_per_second": 0.656, "step": 29436 } ], "logging_steps": 5, "max_steps": 117744, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.381645213984358e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }