{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.37733333333333335, "eval_steps": 500, "global_step": 150000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1e-05, "grad_norm": 4.676956653594971, "learning_rate": 0.0, "loss": 11.414031982421875, "step": 1 }, { "epoch": 5e-05, "grad_norm": 10.332587242126465, "learning_rate": 2e-08, "loss": 11.403215408325195, "step": 5 }, { "epoch": 0.0001, "grad_norm": 7.231582164764404, "learning_rate": 4.5e-08, "loss": 11.417165374755859, "step": 10 }, { "epoch": 0.00015, "grad_norm": 4.355790138244629, "learning_rate": 7e-08, "loss": 11.399943542480468, "step": 15 }, { "epoch": 0.0002, "grad_norm": 4.519984245300293, "learning_rate": 9.5e-08, "loss": 11.40735321044922, "step": 20 }, { "epoch": 0.00025, "grad_norm": 4.442804336547852, "learning_rate": 1.2000000000000002e-07, "loss": 11.400213623046875, "step": 25 }, { "epoch": 0.0003, "grad_norm": 8.635957717895508, "learning_rate": 1.4500000000000001e-07, "loss": 11.401361083984375, "step": 30 }, { "epoch": 0.00035, "grad_norm": 4.658578395843506, "learning_rate": 1.7000000000000001e-07, "loss": 11.403047943115235, "step": 35 }, { "epoch": 0.0004, "grad_norm": 4.7170820236206055, "learning_rate": 1.95e-07, "loss": 11.405581665039062, "step": 40 }, { "epoch": 0.00045, "grad_norm": 4.315395355224609, "learning_rate": 2.2e-07, "loss": 11.401987457275391, "step": 45 }, { "epoch": 0.0005, "grad_norm": 4.307101726531982, "learning_rate": 2.4500000000000004e-07, "loss": 11.401129150390625, "step": 50 }, { "epoch": 0.00055, "grad_norm": 4.62723445892334, "learning_rate": 2.7e-07, "loss": 11.401227569580078, "step": 55 }, { "epoch": 0.0006, "grad_norm": 4.4634857177734375, "learning_rate": 2.9500000000000003e-07, "loss": 11.399066162109374, "step": 60 }, { "epoch": 0.00065, "grad_norm": 5.220989227294922, "learning_rate": 3.2e-07, "loss": 11.39878158569336, "step": 65 }, { "epoch": 0.0007, "grad_norm": 4.540184020996094, "learning_rate": 3.4500000000000003e-07, "loss": 11.40390625, "step": 70 }, { "epoch": 0.00075, "grad_norm": 4.261510848999023, "learning_rate": 3.7e-07, "loss": 11.390472412109375, "step": 75 }, { "epoch": 0.0008, "grad_norm": 4.405035972595215, "learning_rate": 3.9500000000000003e-07, "loss": 11.383260345458984, "step": 80 }, { "epoch": 0.00085, "grad_norm": 4.522156238555908, "learning_rate": 4.2000000000000006e-07, "loss": 11.392791748046875, "step": 85 }, { "epoch": 0.0009, "grad_norm": 4.81120491027832, "learning_rate": 4.4500000000000003e-07, "loss": 11.392074584960938, "step": 90 }, { "epoch": 0.00095, "grad_norm": 4.380856990814209, "learning_rate": 4.7000000000000005e-07, "loss": 11.390422821044922, "step": 95 }, { "epoch": 0.001, "grad_norm": 4.641089916229248, "learning_rate": 4.95e-07, "loss": 11.393937683105468, "step": 100 }, { "epoch": 0.00105, "grad_norm": 4.656015396118164, "learning_rate": 5.2e-07, "loss": 11.375308990478516, "step": 105 }, { "epoch": 0.0011, "grad_norm": 4.197414398193359, "learning_rate": 5.450000000000001e-07, "loss": 11.383442687988282, "step": 110 }, { "epoch": 0.00115, "grad_norm": 4.3266706466674805, "learning_rate": 5.7e-07, "loss": 11.371492004394531, "step": 115 }, { "epoch": 0.0012, "grad_norm": 4.769295692443848, "learning_rate": 5.95e-07, "loss": 11.372700500488282, "step": 120 }, { "epoch": 0.00125, "grad_norm": 4.18520975112915, "learning_rate": 6.200000000000001e-07, "loss": 11.3720947265625, "step": 125 }, { "epoch": 0.0013, "grad_norm": 4.398457050323486, "learning_rate": 6.450000000000001e-07, "loss": 11.370771789550782, "step": 130 }, { "epoch": 0.00135, "grad_norm": 4.747100353240967, "learning_rate": 6.7e-07, "loss": 11.357752990722656, "step": 135 }, { "epoch": 0.0014, "grad_norm": 4.509190559387207, "learning_rate": 6.950000000000001e-07, "loss": 11.364111328125, "step": 140 }, { "epoch": 0.00145, "grad_norm": 4.6881585121154785, "learning_rate": 7.2e-07, "loss": 11.366980743408202, "step": 145 }, { "epoch": 0.0015, "grad_norm": 4.881180763244629, "learning_rate": 7.450000000000001e-07, "loss": 11.363412475585937, "step": 150 }, { "epoch": 0.00155, "grad_norm": 4.727649688720703, "learning_rate": 7.7e-07, "loss": 11.352542114257812, "step": 155 }, { "epoch": 0.0016, "grad_norm": 12.317937850952148, "learning_rate": 7.950000000000001e-07, "loss": 11.34375228881836, "step": 160 }, { "epoch": 0.00165, "grad_norm": 4.48068380355835, "learning_rate": 8.200000000000001e-07, "loss": 11.34930877685547, "step": 165 }, { "epoch": 0.0017, "grad_norm": 5.245968341827393, "learning_rate": 8.450000000000002e-07, "loss": 11.34669189453125, "step": 170 }, { "epoch": 0.00175, "grad_norm": 4.364780902862549, "learning_rate": 8.7e-07, "loss": 11.34139404296875, "step": 175 }, { "epoch": 0.0018, "grad_norm": 4.651112079620361, "learning_rate": 8.95e-07, "loss": 11.33032684326172, "step": 180 }, { "epoch": 0.00185, "grad_norm": 4.255319595336914, "learning_rate": 9.200000000000001e-07, "loss": 11.325601196289062, "step": 185 }, { "epoch": 0.0019, "grad_norm": 4.345580101013184, "learning_rate": 9.450000000000001e-07, "loss": 11.318833160400391, "step": 190 }, { "epoch": 0.00195, "grad_norm": 8.251481056213379, "learning_rate": 9.7e-07, "loss": 11.305097198486328, "step": 195 }, { "epoch": 0.002, "grad_norm": 5.441329479217529, "learning_rate": 9.950000000000002e-07, "loss": 11.3010498046875, "step": 200 }, { "epoch": 0.00205, "grad_norm": 4.740893363952637, "learning_rate": 1.02e-06, "loss": 11.303424072265624, "step": 205 }, { "epoch": 0.0021, "grad_norm": 4.759396553039551, "learning_rate": 1.045e-06, "loss": 11.29638900756836, "step": 210 }, { "epoch": 0.00215, "grad_norm": 4.8090291023254395, "learning_rate": 1.0700000000000001e-06, "loss": 11.297976684570312, "step": 215 }, { "epoch": 0.0022, "grad_norm": 4.619208812713623, "learning_rate": 1.095e-06, "loss": 11.30591278076172, "step": 220 }, { "epoch": 0.00225, "grad_norm": 7.215763092041016, "learning_rate": 1.12e-06, "loss": 11.271207427978515, "step": 225 }, { "epoch": 0.0023, "grad_norm": 4.4626688957214355, "learning_rate": 1.145e-06, "loss": 11.283354187011719, "step": 230 }, { "epoch": 0.00235, "grad_norm": 5.522148609161377, "learning_rate": 1.1700000000000002e-06, "loss": 11.276052093505859, "step": 235 }, { "epoch": 0.0024, "grad_norm": 5.718818187713623, "learning_rate": 1.195e-06, "loss": 11.265927124023438, "step": 240 }, { "epoch": 0.00245, "grad_norm": 4.384155750274658, "learning_rate": 1.2200000000000002e-06, "loss": 11.270028686523437, "step": 245 }, { "epoch": 0.0025, "grad_norm": 4.693068027496338, "learning_rate": 1.2450000000000002e-06, "loss": 11.256238555908203, "step": 250 }, { "epoch": 0.00255, "grad_norm": 4.629441261291504, "learning_rate": 1.2700000000000001e-06, "loss": 11.241466522216797, "step": 255 }, { "epoch": 0.0026, "grad_norm": 4.69270133972168, "learning_rate": 1.295e-06, "loss": 11.24886245727539, "step": 260 }, { "epoch": 0.00265, "grad_norm": 4.940598964691162, "learning_rate": 1.32e-06, "loss": 11.239512634277343, "step": 265 }, { "epoch": 0.0027, "grad_norm": 4.755590915679932, "learning_rate": 1.3450000000000003e-06, "loss": 11.232038879394532, "step": 270 }, { "epoch": 0.00275, "grad_norm": 4.594224452972412, "learning_rate": 1.3700000000000002e-06, "loss": 11.22227783203125, "step": 275 }, { "epoch": 0.0028, "grad_norm": 4.521237373352051, "learning_rate": 1.3950000000000002e-06, "loss": 11.230762481689453, "step": 280 }, { "epoch": 0.00285, "grad_norm": 4.605922698974609, "learning_rate": 1.42e-06, "loss": 11.216458129882813, "step": 285 }, { "epoch": 0.0029, "grad_norm": 4.5475873947143555, "learning_rate": 1.445e-06, "loss": 11.196221923828125, "step": 290 }, { "epoch": 0.00295, "grad_norm": 4.879011631011963, "learning_rate": 1.4700000000000001e-06, "loss": 11.179991912841796, "step": 295 }, { "epoch": 0.003, "grad_norm": 4.8475661277771, "learning_rate": 1.495e-06, "loss": 11.187455749511718, "step": 300 }, { "epoch": 0.00305, "grad_norm": 4.775590419769287, "learning_rate": 1.52e-06, "loss": 11.185417175292969, "step": 305 }, { "epoch": 0.0031, "grad_norm": 5.093811511993408, "learning_rate": 1.545e-06, "loss": 11.17120590209961, "step": 310 }, { "epoch": 0.00315, "grad_norm": 5.057686805725098, "learning_rate": 1.5700000000000002e-06, "loss": 11.157087707519532, "step": 315 }, { "epoch": 0.0032, "grad_norm": 7.531545162200928, "learning_rate": 1.5950000000000002e-06, "loss": 11.172886657714844, "step": 320 }, { "epoch": 0.00325, "grad_norm": 4.64385461807251, "learning_rate": 1.6200000000000002e-06, "loss": 11.122666931152343, "step": 325 }, { "epoch": 0.0033, "grad_norm": 4.897433280944824, "learning_rate": 1.6450000000000001e-06, "loss": 11.130364990234375, "step": 330 }, { "epoch": 0.00335, "grad_norm": 4.999792098999023, "learning_rate": 1.6700000000000003e-06, "loss": 11.133758544921875, "step": 335 }, { "epoch": 0.0034, "grad_norm": 4.8241190910339355, "learning_rate": 1.6950000000000003e-06, "loss": 11.120867919921874, "step": 340 }, { "epoch": 0.00345, "grad_norm": 5.0612311363220215, "learning_rate": 1.72e-06, "loss": 11.110643768310547, "step": 345 }, { "epoch": 0.0035, "grad_norm": 4.9558916091918945, "learning_rate": 1.745e-06, "loss": 11.094322204589844, "step": 350 }, { "epoch": 0.00355, "grad_norm": 5.024559497833252, "learning_rate": 1.77e-06, "loss": 11.085276794433593, "step": 355 }, { "epoch": 0.0036, "grad_norm": 4.878489017486572, "learning_rate": 1.7950000000000002e-06, "loss": 11.085975646972656, "step": 360 }, { "epoch": 0.00365, "grad_norm": 4.913398742675781, "learning_rate": 1.8200000000000002e-06, "loss": 11.061337280273438, "step": 365 }, { "epoch": 0.0037, "grad_norm": 4.793575286865234, "learning_rate": 1.8450000000000001e-06, "loss": 11.044109344482422, "step": 370 }, { "epoch": 0.00375, "grad_norm": 5.193029880523682, "learning_rate": 1.87e-06, "loss": 11.037064361572266, "step": 375 }, { "epoch": 0.0038, "grad_norm": 5.144454479217529, "learning_rate": 1.895e-06, "loss": 11.01653823852539, "step": 380 }, { "epoch": 0.00385, "grad_norm": 5.089953422546387, "learning_rate": 1.9200000000000003e-06, "loss": 11.017008209228516, "step": 385 }, { "epoch": 0.0039, "grad_norm": 5.064090251922607, "learning_rate": 1.945e-06, "loss": 11.004759979248046, "step": 390 }, { "epoch": 0.00395, "grad_norm": 5.31633186340332, "learning_rate": 1.97e-06, "loss": 10.998065185546874, "step": 395 }, { "epoch": 0.004, "grad_norm": 5.426773548126221, "learning_rate": 1.9950000000000004e-06, "loss": 10.97863311767578, "step": 400 }, { "epoch": 0.00405, "grad_norm": 5.769895076751709, "learning_rate": 2.02e-06, "loss": 11.002171325683594, "step": 405 }, { "epoch": 0.0041, "grad_norm": 5.271365642547607, "learning_rate": 2.045e-06, "loss": 10.941307067871094, "step": 410 }, { "epoch": 0.00415, "grad_norm": 5.61900520324707, "learning_rate": 2.07e-06, "loss": 10.91864471435547, "step": 415 }, { "epoch": 0.0042, "grad_norm": 5.293263912200928, "learning_rate": 2.0950000000000003e-06, "loss": 10.908339691162109, "step": 420 }, { "epoch": 0.00425, "grad_norm": 5.301192760467529, "learning_rate": 2.12e-06, "loss": 10.903659057617187, "step": 425 }, { "epoch": 0.0043, "grad_norm": 5.45012092590332, "learning_rate": 2.1450000000000002e-06, "loss": 10.882176971435547, "step": 430 }, { "epoch": 0.00435, "grad_norm": 5.297408103942871, "learning_rate": 2.17e-06, "loss": 10.886808776855469, "step": 435 }, { "epoch": 0.0044, "grad_norm": 5.473090648651123, "learning_rate": 2.195e-06, "loss": 10.851908874511718, "step": 440 }, { "epoch": 0.00445, "grad_norm": 5.314564228057861, "learning_rate": 2.2200000000000003e-06, "loss": 10.840705871582031, "step": 445 }, { "epoch": 0.0045, "grad_norm": 5.945340156555176, "learning_rate": 2.245e-06, "loss": 10.824274444580078, "step": 450 }, { "epoch": 0.00455, "grad_norm": 5.417142391204834, "learning_rate": 2.2700000000000003e-06, "loss": 10.797765350341797, "step": 455 }, { "epoch": 0.0046, "grad_norm": 5.716139316558838, "learning_rate": 2.2950000000000005e-06, "loss": 10.765834045410156, "step": 460 }, { "epoch": 0.00465, "grad_norm": 6.437534809112549, "learning_rate": 2.3200000000000002e-06, "loss": 10.756231689453125, "step": 465 }, { "epoch": 0.0047, "grad_norm": 5.495182514190674, "learning_rate": 2.345e-06, "loss": 10.733113098144532, "step": 470 }, { "epoch": 0.00475, "grad_norm": 5.419945240020752, "learning_rate": 2.37e-06, "loss": 10.732737731933593, "step": 475 }, { "epoch": 0.0048, "grad_norm": 5.564905643463135, "learning_rate": 2.395e-06, "loss": 10.691459655761719, "step": 480 }, { "epoch": 0.00485, "grad_norm": 6.678794860839844, "learning_rate": 2.42e-06, "loss": 10.691938018798828, "step": 485 }, { "epoch": 0.0049, "grad_norm": 6.339458465576172, "learning_rate": 2.4450000000000003e-06, "loss": 10.660790252685548, "step": 490 }, { "epoch": 0.00495, "grad_norm": 5.781903266906738, "learning_rate": 2.47e-06, "loss": 10.624126434326172, "step": 495 }, { "epoch": 0.005, "grad_norm": 5.440398216247559, "learning_rate": 2.4950000000000003e-06, "loss": 10.589701843261718, "step": 500 }, { "epoch": 0.00505, "grad_norm": 5.540033340454102, "learning_rate": 2.52e-06, "loss": 10.57830352783203, "step": 505 }, { "epoch": 0.0051, "grad_norm": 5.747284412384033, "learning_rate": 2.545e-06, "loss": 10.555665588378906, "step": 510 }, { "epoch": 0.00515, "grad_norm": 6.939126491546631, "learning_rate": 2.5700000000000004e-06, "loss": 10.73156967163086, "step": 515 }, { "epoch": 0.0052, "grad_norm": 5.709467887878418, "learning_rate": 2.595e-06, "loss": 10.511280822753907, "step": 520 }, { "epoch": 0.00525, "grad_norm": 5.473364353179932, "learning_rate": 2.6200000000000003e-06, "loss": 10.478748321533203, "step": 525 }, { "epoch": 0.0053, "grad_norm": 5.8979668617248535, "learning_rate": 2.6450000000000005e-06, "loss": 10.464718627929688, "step": 530 }, { "epoch": 0.00535, "grad_norm": 5.645345687866211, "learning_rate": 2.6700000000000003e-06, "loss": 10.445115661621093, "step": 535 }, { "epoch": 0.0054, "grad_norm": 5.631922721862793, "learning_rate": 2.6950000000000005e-06, "loss": 10.427560424804687, "step": 540 }, { "epoch": 0.00545, "grad_norm": 5.477216720581055, "learning_rate": 2.7200000000000002e-06, "loss": 10.402005004882813, "step": 545 }, { "epoch": 0.0055, "grad_norm": 6.0502142906188965, "learning_rate": 2.7450000000000004e-06, "loss": 10.38473129272461, "step": 550 }, { "epoch": 0.00555, "grad_norm": 5.447207450866699, "learning_rate": 2.7700000000000006e-06, "loss": 10.337748718261718, "step": 555 }, { "epoch": 0.0056, "grad_norm": 5.643229007720947, "learning_rate": 2.7950000000000003e-06, "loss": 10.345030975341796, "step": 560 }, { "epoch": 0.00565, "grad_norm": 5.683911323547363, "learning_rate": 2.82e-06, "loss": 10.273091888427734, "step": 565 }, { "epoch": 0.0057, "grad_norm": 5.915828704833984, "learning_rate": 2.845e-06, "loss": 10.261133575439453, "step": 570 }, { "epoch": 0.00575, "grad_norm": 5.684077739715576, "learning_rate": 2.87e-06, "loss": 10.23939208984375, "step": 575 }, { "epoch": 0.0058, "grad_norm": 5.337005138397217, "learning_rate": 2.8950000000000002e-06, "loss": 10.22220687866211, "step": 580 }, { "epoch": 0.00585, "grad_norm": 5.290330410003662, "learning_rate": 2.92e-06, "loss": 10.167778015136719, "step": 585 }, { "epoch": 0.0059, "grad_norm": 5.386977195739746, "learning_rate": 2.945e-06, "loss": 10.156690979003907, "step": 590 }, { "epoch": 0.00595, "grad_norm": 5.265517234802246, "learning_rate": 2.97e-06, "loss": 10.104603576660157, "step": 595 }, { "epoch": 0.006, "grad_norm": 5.228604793548584, "learning_rate": 2.995e-06, "loss": 10.105325317382812, "step": 600 }, { "epoch": 0.00605, "grad_norm": 5.296448707580566, "learning_rate": 3.0200000000000003e-06, "loss": 10.060831451416016, "step": 605 }, { "epoch": 0.0061, "grad_norm": 4.951342582702637, "learning_rate": 3.045e-06, "loss": 10.031975555419923, "step": 610 }, { "epoch": 0.00615, "grad_norm": 5.207087993621826, "learning_rate": 3.0700000000000003e-06, "loss": 9.999942779541016, "step": 615 }, { "epoch": 0.0062, "grad_norm": 5.053549289703369, "learning_rate": 3.0950000000000004e-06, "loss": 9.995388793945313, "step": 620 }, { "epoch": 0.00625, "grad_norm": 5.550538539886475, "learning_rate": 3.12e-06, "loss": 9.951909637451172, "step": 625 }, { "epoch": 0.0063, "grad_norm": 4.984603404998779, "learning_rate": 3.1450000000000004e-06, "loss": 9.862808990478516, "step": 630 }, { "epoch": 0.00635, "grad_norm": 7.60646915435791, "learning_rate": 3.17e-06, "loss": 9.843303680419922, "step": 635 }, { "epoch": 0.0064, "grad_norm": 4.427210330963135, "learning_rate": 3.1950000000000003e-06, "loss": 9.847752380371094, "step": 640 }, { "epoch": 0.00645, "grad_norm": 4.919750690460205, "learning_rate": 3.2200000000000005e-06, "loss": 9.77484130859375, "step": 645 }, { "epoch": 0.0065, "grad_norm": 4.5155839920043945, "learning_rate": 3.2450000000000003e-06, "loss": 9.778485107421876, "step": 650 }, { "epoch": 0.00655, "grad_norm": 4.460862636566162, "learning_rate": 3.2700000000000005e-06, "loss": 9.770641326904297, "step": 655 }, { "epoch": 0.0066, "grad_norm": 10.43223762512207, "learning_rate": 3.2950000000000002e-06, "loss": 9.763922119140625, "step": 660 }, { "epoch": 0.00665, "grad_norm": 4.754331588745117, "learning_rate": 3.3200000000000004e-06, "loss": 9.727837371826173, "step": 665 }, { "epoch": 0.0067, "grad_norm": 4.076197624206543, "learning_rate": 3.3450000000000006e-06, "loss": 9.735386657714844, "step": 670 }, { "epoch": 0.00675, "grad_norm": 4.477645397186279, "learning_rate": 3.3700000000000003e-06, "loss": 9.641921997070312, "step": 675 }, { "epoch": 0.0068, "grad_norm": 4.307309627532959, "learning_rate": 3.3950000000000005e-06, "loss": 9.635581970214844, "step": 680 }, { "epoch": 0.00685, "grad_norm": 6.096124172210693, "learning_rate": 3.4200000000000007e-06, "loss": 9.605391693115234, "step": 685 }, { "epoch": 0.0069, "grad_norm": 3.967247724533081, "learning_rate": 3.445e-06, "loss": 9.544625854492187, "step": 690 }, { "epoch": 0.00695, "grad_norm": 3.9932079315185547, "learning_rate": 3.4700000000000002e-06, "loss": 9.510562133789062, "step": 695 }, { "epoch": 0.007, "grad_norm": 4.364710807800293, "learning_rate": 3.495e-06, "loss": 9.504785919189453, "step": 700 }, { "epoch": 0.00705, "grad_norm": 3.816746473312378, "learning_rate": 3.52e-06, "loss": 9.480088043212891, "step": 705 }, { "epoch": 0.0071, "grad_norm": 3.6354165077209473, "learning_rate": 3.545e-06, "loss": 9.415628814697266, "step": 710 }, { "epoch": 0.00715, "grad_norm": 3.682079553604126, "learning_rate": 3.57e-06, "loss": 9.418021392822265, "step": 715 }, { "epoch": 0.0072, "grad_norm": 3.706894874572754, "learning_rate": 3.5950000000000003e-06, "loss": 9.412801361083984, "step": 720 }, { "epoch": 0.00725, "grad_norm": 3.8119115829467773, "learning_rate": 3.62e-06, "loss": 9.358140563964843, "step": 725 }, { "epoch": 0.0073, "grad_norm": 3.4215471744537354, "learning_rate": 3.6450000000000003e-06, "loss": 9.317784881591797, "step": 730 }, { "epoch": 0.00735, "grad_norm": 3.706967353820801, "learning_rate": 3.6700000000000004e-06, "loss": 9.306209564208984, "step": 735 }, { "epoch": 0.0074, "grad_norm": 3.479414939880371, "learning_rate": 3.695e-06, "loss": 9.294970703125, "step": 740 }, { "epoch": 0.00745, "grad_norm": 3.5228655338287354, "learning_rate": 3.7200000000000004e-06, "loss": 9.236659240722656, "step": 745 }, { "epoch": 0.0075, "grad_norm": 3.159956216812134, "learning_rate": 3.745e-06, "loss": 9.229638671875, "step": 750 }, { "epoch": 0.00755, "grad_norm": 3.3315820693969727, "learning_rate": 3.7700000000000003e-06, "loss": 9.265616607666015, "step": 755 }, { "epoch": 0.0076, "grad_norm": 3.3987746238708496, "learning_rate": 3.7950000000000005e-06, "loss": 9.200483703613282, "step": 760 }, { "epoch": 0.00765, "grad_norm": 3.2334847450256348, "learning_rate": 3.820000000000001e-06, "loss": 9.15868682861328, "step": 765 }, { "epoch": 0.0077, "grad_norm": 3.0223312377929688, "learning_rate": 3.8450000000000005e-06, "loss": 9.138261413574218, "step": 770 }, { "epoch": 0.00775, "grad_norm": 3.094388246536255, "learning_rate": 3.87e-06, "loss": 9.107332611083985, "step": 775 }, { "epoch": 0.0078, "grad_norm": 2.978381395339966, "learning_rate": 3.895000000000001e-06, "loss": 9.111074829101563, "step": 780 }, { "epoch": 0.00785, "grad_norm": 3.2139904499053955, "learning_rate": 3.920000000000001e-06, "loss": 9.064675903320312, "step": 785 }, { "epoch": 0.0079, "grad_norm": 3.1312804222106934, "learning_rate": 3.945e-06, "loss": 9.033372497558593, "step": 790 }, { "epoch": 0.00795, "grad_norm": 2.888423204421997, "learning_rate": 3.97e-06, "loss": 9.024428558349609, "step": 795 }, { "epoch": 0.008, "grad_norm": 3.2791998386383057, "learning_rate": 3.995000000000001e-06, "loss": 9.001249694824219, "step": 800 }, { "epoch": 0.00805, "grad_norm": 6.945764541625977, "learning_rate": 4.0200000000000005e-06, "loss": 9.062470245361329, "step": 805 }, { "epoch": 0.0081, "grad_norm": 2.9852473735809326, "learning_rate": 4.045e-06, "loss": 8.994219207763672, "step": 810 }, { "epoch": 0.00815, "grad_norm": 3.3515257835388184, "learning_rate": 4.07e-06, "loss": 8.921066284179688, "step": 815 }, { "epoch": 0.0082, "grad_norm": 3.1530566215515137, "learning_rate": 4.095e-06, "loss": 9.04370346069336, "step": 820 }, { "epoch": 0.00825, "grad_norm": 2.6221582889556885, "learning_rate": 4.12e-06, "loss": 8.895455932617187, "step": 825 }, { "epoch": 0.0083, "grad_norm": 2.511815071105957, "learning_rate": 4.145e-06, "loss": 8.86104507446289, "step": 830 }, { "epoch": 0.00835, "grad_norm": 2.502098560333252, "learning_rate": 4.17e-06, "loss": 8.794476318359376, "step": 835 }, { "epoch": 0.0084, "grad_norm": 2.589139461517334, "learning_rate": 4.1950000000000005e-06, "loss": 8.840362548828125, "step": 840 }, { "epoch": 0.00845, "grad_norm": 2.3018338680267334, "learning_rate": 4.22e-06, "loss": 8.811112976074218, "step": 845 }, { "epoch": 0.0085, "grad_norm": 2.4454731941223145, "learning_rate": 4.245e-06, "loss": 8.858855438232421, "step": 850 }, { "epoch": 0.00855, "grad_norm": 2.5547730922698975, "learning_rate": 4.270000000000001e-06, "loss": 8.768284606933594, "step": 855 }, { "epoch": 0.0086, "grad_norm": 2.480674982070923, "learning_rate": 4.295e-06, "loss": 8.701139831542969, "step": 860 }, { "epoch": 0.00865, "grad_norm": 2.4156012535095215, "learning_rate": 4.32e-06, "loss": 8.764662933349609, "step": 865 }, { "epoch": 0.0087, "grad_norm": 2.280367851257324, "learning_rate": 4.345000000000001e-06, "loss": 8.722775268554688, "step": 870 }, { "epoch": 0.00875, "grad_norm": 2.808201313018799, "learning_rate": 4.3700000000000005e-06, "loss": 8.717276763916015, "step": 875 }, { "epoch": 0.0088, "grad_norm": 2.246891736984253, "learning_rate": 4.395e-06, "loss": 8.67289047241211, "step": 880 }, { "epoch": 0.00885, "grad_norm": 2.4555671215057373, "learning_rate": 4.42e-06, "loss": 8.661814117431641, "step": 885 }, { "epoch": 0.0089, "grad_norm": 1.9921596050262451, "learning_rate": 4.445000000000001e-06, "loss": 8.645142364501954, "step": 890 }, { "epoch": 0.00895, "grad_norm": 1.919730305671692, "learning_rate": 4.47e-06, "loss": 8.640831756591798, "step": 895 }, { "epoch": 0.009, "grad_norm": 1.8581984043121338, "learning_rate": 4.495e-06, "loss": 8.641734313964843, "step": 900 }, { "epoch": 0.00905, "grad_norm": 1.8872401714324951, "learning_rate": 4.520000000000001e-06, "loss": 8.6050048828125, "step": 905 }, { "epoch": 0.0091, "grad_norm": 2.2412946224212646, "learning_rate": 4.5450000000000005e-06, "loss": 8.586141204833984, "step": 910 }, { "epoch": 0.00915, "grad_norm": 1.865574836730957, "learning_rate": 4.57e-06, "loss": 8.594630432128906, "step": 915 }, { "epoch": 0.0092, "grad_norm": 1.8206228017807007, "learning_rate": 4.595000000000001e-06, "loss": 8.573612213134766, "step": 920 }, { "epoch": 0.00925, "grad_norm": 1.799764633178711, "learning_rate": 4.620000000000001e-06, "loss": 8.560906982421875, "step": 925 }, { "epoch": 0.0093, "grad_norm": 1.602264642715454, "learning_rate": 4.645e-06, "loss": 8.535279083251954, "step": 930 }, { "epoch": 0.00935, "grad_norm": 2.0852370262145996, "learning_rate": 4.670000000000001e-06, "loss": 8.463192749023438, "step": 935 }, { "epoch": 0.0094, "grad_norm": 1.6677930355072021, "learning_rate": 4.695e-06, "loss": 8.598103332519532, "step": 940 }, { "epoch": 0.00945, "grad_norm": 1.5061208009719849, "learning_rate": 4.7200000000000005e-06, "loss": 8.520038604736328, "step": 945 }, { "epoch": 0.0095, "grad_norm": 1.5706422328948975, "learning_rate": 4.745e-06, "loss": 8.51326904296875, "step": 950 }, { "epoch": 0.00955, "grad_norm": 1.6344046592712402, "learning_rate": 4.77e-06, "loss": 8.496923828125, "step": 955 }, { "epoch": 0.0096, "grad_norm": 1.7965465784072876, "learning_rate": 4.795e-06, "loss": 8.496626281738282, "step": 960 }, { "epoch": 0.00965, "grad_norm": 1.3700742721557617, "learning_rate": 4.8200000000000004e-06, "loss": 8.495254516601562, "step": 965 }, { "epoch": 0.0097, "grad_norm": 1.9607622623443604, "learning_rate": 4.845e-06, "loss": 8.435150909423829, "step": 970 }, { "epoch": 0.00975, "grad_norm": 1.677876591682434, "learning_rate": 4.87e-06, "loss": 8.460844421386719, "step": 975 }, { "epoch": 0.0098, "grad_norm": 1.5053876638412476, "learning_rate": 4.8950000000000006e-06, "loss": 8.346241760253907, "step": 980 }, { "epoch": 0.00985, "grad_norm": 1.5707275867462158, "learning_rate": 4.92e-06, "loss": 8.436741638183594, "step": 985 }, { "epoch": 0.0099, "grad_norm": 2.479970693588257, "learning_rate": 4.945e-06, "loss": 8.436065673828125, "step": 990 }, { "epoch": 0.00995, "grad_norm": 1.4045437574386597, "learning_rate": 4.970000000000001e-06, "loss": 8.4346923828125, "step": 995 }, { "epoch": 0.01, "grad_norm": 1.0770483016967773, "learning_rate": 4.9950000000000005e-06, "loss": 8.442188262939453, "step": 1000 }, { "epoch": 0.01005, "grad_norm": 1.4825741052627563, "learning_rate": 4.99979797979798e-06, "loss": 8.436553955078125, "step": 1005 }, { "epoch": 0.0101, "grad_norm": 1.169847846031189, "learning_rate": 4.9995454545454546e-06, "loss": 8.397321319580078, "step": 1010 }, { "epoch": 0.01015, "grad_norm": 1.318166971206665, "learning_rate": 4.99929292929293e-06, "loss": 8.369618988037109, "step": 1015 }, { "epoch": 0.0102, "grad_norm": 1.6131330728530884, "learning_rate": 4.999040404040405e-06, "loss": 8.368437957763671, "step": 1020 }, { "epoch": 0.01025, "grad_norm": 1.4233115911483765, "learning_rate": 4.998787878787879e-06, "loss": 8.343531036376953, "step": 1025 }, { "epoch": 0.0103, "grad_norm": 1.1530827283859253, "learning_rate": 4.998535353535354e-06, "loss": 8.427300262451173, "step": 1030 }, { "epoch": 0.01035, "grad_norm": 2.184964656829834, "learning_rate": 4.9982828282828286e-06, "loss": 8.355789184570312, "step": 1035 }, { "epoch": 0.0104, "grad_norm": 1.1493545770645142, "learning_rate": 4.998030303030303e-06, "loss": 8.739952850341798, "step": 1040 }, { "epoch": 0.01045, "grad_norm": 3.762376308441162, "learning_rate": 4.997777777777778e-06, "loss": 8.376821136474609, "step": 1045 }, { "epoch": 0.0105, "grad_norm": 1.2931392192840576, "learning_rate": 4.9975252525252525e-06, "loss": 8.38915786743164, "step": 1050 }, { "epoch": 0.01055, "grad_norm": 1.0293680429458618, "learning_rate": 4.997272727272728e-06, "loss": 8.409986114501953, "step": 1055 }, { "epoch": 0.0106, "grad_norm": 1.488648772239685, "learning_rate": 4.9970202020202026e-06, "loss": 8.383646392822266, "step": 1060 }, { "epoch": 0.01065, "grad_norm": 1.538806438446045, "learning_rate": 4.996767676767677e-06, "loss": 8.384919738769531, "step": 1065 }, { "epoch": 0.0107, "grad_norm": 1.0452971458435059, "learning_rate": 4.996515151515152e-06, "loss": 8.335368347167968, "step": 1070 }, { "epoch": 0.01075, "grad_norm": 1.1162161827087402, "learning_rate": 4.9962626262626265e-06, "loss": 8.351953125, "step": 1075 }, { "epoch": 0.0108, "grad_norm": 1.0097211599349976, "learning_rate": 4.996010101010101e-06, "loss": 8.36413803100586, "step": 1080 }, { "epoch": 0.01085, "grad_norm": 1.7642394304275513, "learning_rate": 4.995757575757576e-06, "loss": 8.332969665527344, "step": 1085 }, { "epoch": 0.0109, "grad_norm": 1.237863540649414, "learning_rate": 4.995505050505051e-06, "loss": 8.341726684570313, "step": 1090 }, { "epoch": 0.01095, "grad_norm": 1.0263097286224365, "learning_rate": 4.995252525252526e-06, "loss": 8.328056335449219, "step": 1095 }, { "epoch": 0.011, "grad_norm": 1.1123780012130737, "learning_rate": 4.9950000000000005e-06, "loss": 8.23682098388672, "step": 1100 }, { "epoch": 0.01105, "grad_norm": 1.0373117923736572, "learning_rate": 4.994747474747475e-06, "loss": 8.368669128417968, "step": 1105 }, { "epoch": 0.0111, "grad_norm": 0.9184291958808899, "learning_rate": 4.9944949494949506e-06, "loss": 8.34291763305664, "step": 1110 }, { "epoch": 0.01115, "grad_norm": 0.9140387177467346, "learning_rate": 4.994242424242425e-06, "loss": 8.37460708618164, "step": 1115 }, { "epoch": 0.0112, "grad_norm": 1.2094776630401611, "learning_rate": 4.993989898989899e-06, "loss": 8.383152770996094, "step": 1120 }, { "epoch": 0.01125, "grad_norm": 4.631429672241211, "learning_rate": 4.993737373737374e-06, "loss": 8.334056091308593, "step": 1125 }, { "epoch": 0.0113, "grad_norm": 1.4649916887283325, "learning_rate": 4.993484848484849e-06, "loss": 8.296734619140626, "step": 1130 }, { "epoch": 0.01135, "grad_norm": 0.9813880324363708, "learning_rate": 4.993232323232324e-06, "loss": 8.341432189941406, "step": 1135 }, { "epoch": 0.0114, "grad_norm": 1.2152888774871826, "learning_rate": 4.992979797979798e-06, "loss": 8.343881225585937, "step": 1140 }, { "epoch": 0.01145, "grad_norm": 1.2691558599472046, "learning_rate": 4.992727272727273e-06, "loss": 8.350188446044921, "step": 1145 }, { "epoch": 0.0115, "grad_norm": 1.2595213651657104, "learning_rate": 4.9924747474747485e-06, "loss": 8.384588623046875, "step": 1150 }, { "epoch": 0.01155, "grad_norm": 1.326491355895996, "learning_rate": 4.992222222222223e-06, "loss": 8.233026123046875, "step": 1155 }, { "epoch": 0.0116, "grad_norm": 1.1023447513580322, "learning_rate": 4.991969696969698e-06, "loss": 8.323655700683593, "step": 1160 }, { "epoch": 0.01165, "grad_norm": 1.0880986452102661, "learning_rate": 4.991717171717172e-06, "loss": 8.350698852539063, "step": 1165 }, { "epoch": 0.0117, "grad_norm": 0.9525489807128906, "learning_rate": 4.991464646464647e-06, "loss": 8.385467529296875, "step": 1170 }, { "epoch": 0.01175, "grad_norm": 1.2980769872665405, "learning_rate": 4.991212121212122e-06, "loss": 8.30013198852539, "step": 1175 }, { "epoch": 0.0118, "grad_norm": 0.8786501288414001, "learning_rate": 4.990959595959596e-06, "loss": 8.295884704589843, "step": 1180 }, { "epoch": 0.01185, "grad_norm": 2.9689643383026123, "learning_rate": 4.990707070707071e-06, "loss": 8.389551544189453, "step": 1185 }, { "epoch": 0.0119, "grad_norm": 1.0820271968841553, "learning_rate": 4.990454545454546e-06, "loss": 8.233956909179687, "step": 1190 }, { "epoch": 0.01195, "grad_norm": 1.6440669298171997, "learning_rate": 4.990202020202021e-06, "loss": 8.34623031616211, "step": 1195 }, { "epoch": 0.012, "grad_norm": 1.1372045278549194, "learning_rate": 4.989949494949496e-06, "loss": 8.282433319091798, "step": 1200 }, { "epoch": 0.01205, "grad_norm": 1.4206640720367432, "learning_rate": 4.98969696969697e-06, "loss": 8.280522918701172, "step": 1205 }, { "epoch": 0.0121, "grad_norm": 1.087672472000122, "learning_rate": 4.989444444444445e-06, "loss": 8.324800109863281, "step": 1210 }, { "epoch": 0.01215, "grad_norm": 1.224624514579773, "learning_rate": 4.9891919191919195e-06, "loss": 8.31344223022461, "step": 1215 }, { "epoch": 0.0122, "grad_norm": 1.49359130859375, "learning_rate": 4.988939393939394e-06, "loss": 8.351203918457031, "step": 1220 }, { "epoch": 0.01225, "grad_norm": 1.5071840286254883, "learning_rate": 4.988686868686869e-06, "loss": 8.358255767822266, "step": 1225 }, { "epoch": 0.0123, "grad_norm": 0.9653416275978088, "learning_rate": 4.988434343434344e-06, "loss": 8.356095123291016, "step": 1230 }, { "epoch": 0.01235, "grad_norm": 3.9678640365600586, "learning_rate": 4.988181818181819e-06, "loss": 8.356867218017578, "step": 1235 }, { "epoch": 0.0124, "grad_norm": 1.0885186195373535, "learning_rate": 4.9879292929292935e-06, "loss": 8.315809631347657, "step": 1240 }, { "epoch": 0.01245, "grad_norm": 1.2559725046157837, "learning_rate": 4.987676767676768e-06, "loss": 8.334637451171876, "step": 1245 }, { "epoch": 0.0125, "grad_norm": 1.6730226278305054, "learning_rate": 4.987424242424243e-06, "loss": 8.255744934082031, "step": 1250 }, { "epoch": 0.01255, "grad_norm": 0.9193032383918762, "learning_rate": 4.987171717171717e-06, "loss": 8.322821807861327, "step": 1255 }, { "epoch": 0.0126, "grad_norm": 1.4625719785690308, "learning_rate": 4.986919191919192e-06, "loss": 8.404915618896485, "step": 1260 }, { "epoch": 0.01265, "grad_norm": 1.6066993474960327, "learning_rate": 4.986666666666667e-06, "loss": 8.289643096923829, "step": 1265 }, { "epoch": 0.0127, "grad_norm": 1.2661678791046143, "learning_rate": 4.986414141414142e-06, "loss": 8.324212646484375, "step": 1270 }, { "epoch": 0.01275, "grad_norm": 1.1655652523040771, "learning_rate": 4.986161616161617e-06, "loss": 8.273467254638671, "step": 1275 }, { "epoch": 0.0128, "grad_norm": 1.2104438543319702, "learning_rate": 4.985909090909091e-06, "loss": 8.323265838623048, "step": 1280 }, { "epoch": 0.01285, "grad_norm": 2.3040058612823486, "learning_rate": 4.985656565656566e-06, "loss": 8.300347137451173, "step": 1285 }, { "epoch": 0.0129, "grad_norm": 1.1858747005462646, "learning_rate": 4.985404040404041e-06, "loss": 8.232485961914062, "step": 1290 }, { "epoch": 0.01295, "grad_norm": 1.0296400785446167, "learning_rate": 4.985151515151515e-06, "loss": 8.266015625, "step": 1295 }, { "epoch": 0.013, "grad_norm": 1.1126850843429565, "learning_rate": 4.98489898989899e-06, "loss": 8.301464080810547, "step": 1300 }, { "epoch": 0.01305, "grad_norm": 11.48436450958252, "learning_rate": 4.9846464646464645e-06, "loss": 8.282581329345703, "step": 1305 }, { "epoch": 0.0131, "grad_norm": 2.132294178009033, "learning_rate": 4.98439393939394e-06, "loss": 8.259134674072266, "step": 1310 }, { "epoch": 0.01315, "grad_norm": 1.302921175956726, "learning_rate": 4.984141414141415e-06, "loss": 8.292393493652344, "step": 1315 }, { "epoch": 0.0132, "grad_norm": 1.0039948225021362, "learning_rate": 4.983888888888889e-06, "loss": 8.319845581054688, "step": 1320 }, { "epoch": 0.01325, "grad_norm": 1.3267877101898193, "learning_rate": 4.983636363636364e-06, "loss": 8.22216339111328, "step": 1325 }, { "epoch": 0.0133, "grad_norm": 1.763733148574829, "learning_rate": 4.983383838383839e-06, "loss": 8.319412994384766, "step": 1330 }, { "epoch": 0.01335, "grad_norm": 1.1584218740463257, "learning_rate": 4.983131313131314e-06, "loss": 8.271452331542969, "step": 1335 }, { "epoch": 0.0134, "grad_norm": 3.1952457427978516, "learning_rate": 4.982878787878788e-06, "loss": 8.158982849121093, "step": 1340 }, { "epoch": 0.01345, "grad_norm": 1.6762995719909668, "learning_rate": 4.982626262626262e-06, "loss": 8.305218505859376, "step": 1345 }, { "epoch": 0.0135, "grad_norm": 1.278686761856079, "learning_rate": 4.982373737373738e-06, "loss": 8.268582153320313, "step": 1350 }, { "epoch": 0.01355, "grad_norm": 1.3549070358276367, "learning_rate": 4.9821212121212125e-06, "loss": 8.244148254394531, "step": 1355 }, { "epoch": 0.0136, "grad_norm": 1.3042219877243042, "learning_rate": 4.981868686868687e-06, "loss": 8.29886245727539, "step": 1360 }, { "epoch": 0.01365, "grad_norm": 1.8016526699066162, "learning_rate": 4.981616161616162e-06, "loss": 8.295376586914063, "step": 1365 }, { "epoch": 0.0137, "grad_norm": 1.8723101615905762, "learning_rate": 4.981363636363637e-06, "loss": 8.270258331298828, "step": 1370 }, { "epoch": 0.01375, "grad_norm": 1.7364376783370972, "learning_rate": 4.981111111111112e-06, "loss": 8.238687133789062, "step": 1375 }, { "epoch": 0.0138, "grad_norm": 1.3971294164657593, "learning_rate": 4.9808585858585865e-06, "loss": 8.189012145996093, "step": 1380 }, { "epoch": 0.01385, "grad_norm": 1.3947763442993164, "learning_rate": 4.980606060606061e-06, "loss": 8.272319793701172, "step": 1385 }, { "epoch": 0.0139, "grad_norm": 1.2999768257141113, "learning_rate": 4.980353535353536e-06, "loss": 8.312237548828126, "step": 1390 }, { "epoch": 0.01395, "grad_norm": 1.1922824382781982, "learning_rate": 4.98010101010101e-06, "loss": 8.265843200683594, "step": 1395 }, { "epoch": 0.014, "grad_norm": 1.6726585626602173, "learning_rate": 4.979848484848485e-06, "loss": 8.325080871582031, "step": 1400 }, { "epoch": 0.01405, "grad_norm": 1.3737518787384033, "learning_rate": 4.97959595959596e-06, "loss": 8.261061859130859, "step": 1405 }, { "epoch": 0.0141, "grad_norm": 1.5159094333648682, "learning_rate": 4.979343434343435e-06, "loss": 8.244552612304688, "step": 1410 }, { "epoch": 0.01415, "grad_norm": 1.6030014753341675, "learning_rate": 4.97909090909091e-06, "loss": 8.31212158203125, "step": 1415 }, { "epoch": 0.0142, "grad_norm": 1.867902159690857, "learning_rate": 4.978838383838384e-06, "loss": 8.281008911132812, "step": 1420 }, { "epoch": 0.01425, "grad_norm": 1.9161847829818726, "learning_rate": 4.978585858585859e-06, "loss": 8.23026351928711, "step": 1425 }, { "epoch": 0.0143, "grad_norm": 1.7140098810195923, "learning_rate": 4.978333333333334e-06, "loss": 8.162289428710938, "step": 1430 }, { "epoch": 0.01435, "grad_norm": 1.3801689147949219, "learning_rate": 4.978080808080808e-06, "loss": 8.208426666259765, "step": 1435 }, { "epoch": 0.0144, "grad_norm": 2.036461591720581, "learning_rate": 4.977828282828283e-06, "loss": 8.216313934326172, "step": 1440 }, { "epoch": 0.01445, "grad_norm": 1.6685024499893188, "learning_rate": 4.9775757575757576e-06, "loss": 8.277519226074219, "step": 1445 }, { "epoch": 0.0145, "grad_norm": 1.6854643821716309, "learning_rate": 4.977323232323233e-06, "loss": 8.279377746582032, "step": 1450 }, { "epoch": 0.01455, "grad_norm": 1.5399439334869385, "learning_rate": 4.977070707070708e-06, "loss": 8.23476791381836, "step": 1455 }, { "epoch": 0.0146, "grad_norm": 1.4562209844589233, "learning_rate": 4.976818181818182e-06, "loss": 8.242851257324219, "step": 1460 }, { "epoch": 0.01465, "grad_norm": 1.4651610851287842, "learning_rate": 4.976565656565657e-06, "loss": 8.255519104003906, "step": 1465 }, { "epoch": 0.0147, "grad_norm": 2.307976484298706, "learning_rate": 4.9763131313131316e-06, "loss": 8.249298095703125, "step": 1470 }, { "epoch": 0.01475, "grad_norm": 1.7262852191925049, "learning_rate": 4.976060606060606e-06, "loss": 8.182123565673828, "step": 1475 }, { "epoch": 0.0148, "grad_norm": 1.709336757659912, "learning_rate": 4.975808080808081e-06, "loss": 8.241169738769532, "step": 1480 }, { "epoch": 0.01485, "grad_norm": 1.8760100603103638, "learning_rate": 4.9755555555555554e-06, "loss": 8.270980834960938, "step": 1485 }, { "epoch": 0.0149, "grad_norm": 2.741426944732666, "learning_rate": 4.975303030303031e-06, "loss": 8.22678451538086, "step": 1490 }, { "epoch": 0.01495, "grad_norm": 2.2993693351745605, "learning_rate": 4.9750505050505056e-06, "loss": 8.23228759765625, "step": 1495 }, { "epoch": 0.015, "grad_norm": 1.5586732625961304, "learning_rate": 4.97479797979798e-06, "loss": 8.23129653930664, "step": 1500 }, { "epoch": 0.01505, "grad_norm": 1.794472336769104, "learning_rate": 4.974545454545455e-06, "loss": 8.245462036132812, "step": 1505 }, { "epoch": 0.0151, "grad_norm": 1.5695534944534302, "learning_rate": 4.9742929292929294e-06, "loss": 8.248947143554688, "step": 1510 }, { "epoch": 0.01515, "grad_norm": 4.60466194152832, "learning_rate": 4.974040404040404e-06, "loss": 8.123861694335938, "step": 1515 }, { "epoch": 0.0152, "grad_norm": 2.188272476196289, "learning_rate": 4.973787878787879e-06, "loss": 8.14218978881836, "step": 1520 }, { "epoch": 0.01525, "grad_norm": 1.847472071647644, "learning_rate": 4.973535353535354e-06, "loss": 8.244377899169923, "step": 1525 }, { "epoch": 0.0153, "grad_norm": 2.6713147163391113, "learning_rate": 4.973282828282829e-06, "loss": 8.238223266601562, "step": 1530 }, { "epoch": 0.01535, "grad_norm": 1.853307843208313, "learning_rate": 4.9730303030303034e-06, "loss": 8.246791076660156, "step": 1535 }, { "epoch": 0.0154, "grad_norm": 4.513730525970459, "learning_rate": 4.972777777777778e-06, "loss": 8.196895599365234, "step": 1540 }, { "epoch": 0.01545, "grad_norm": 2.098773241043091, "learning_rate": 4.9725252525252535e-06, "loss": 8.245899963378907, "step": 1545 }, { "epoch": 0.0155, "grad_norm": 2.297443151473999, "learning_rate": 4.972272727272728e-06, "loss": 8.204507446289062, "step": 1550 }, { "epoch": 0.01555, "grad_norm": 2.2420904636383057, "learning_rate": 4.972020202020203e-06, "loss": 8.204434204101563, "step": 1555 }, { "epoch": 0.0156, "grad_norm": 2.2537333965301514, "learning_rate": 4.9717676767676774e-06, "loss": 8.087725830078124, "step": 1560 }, { "epoch": 0.01565, "grad_norm": 2.0685620307922363, "learning_rate": 4.971515151515152e-06, "loss": 8.217706298828125, "step": 1565 }, { "epoch": 0.0157, "grad_norm": 2.1148107051849365, "learning_rate": 4.971262626262627e-06, "loss": 8.211723327636719, "step": 1570 }, { "epoch": 0.01575, "grad_norm": 2.5793955326080322, "learning_rate": 4.971010101010101e-06, "loss": 8.20735855102539, "step": 1575 }, { "epoch": 0.0158, "grad_norm": 2.0584473609924316, "learning_rate": 4.970757575757576e-06, "loss": 8.058718872070312, "step": 1580 }, { "epoch": 0.01585, "grad_norm": 3.901336431503296, "learning_rate": 4.9705050505050514e-06, "loss": 8.289500427246093, "step": 1585 }, { "epoch": 0.0159, "grad_norm": 2.5759079456329346, "learning_rate": 4.970252525252526e-06, "loss": 8.212875366210938, "step": 1590 }, { "epoch": 0.01595, "grad_norm": 1.9839305877685547, "learning_rate": 4.970000000000001e-06, "loss": 8.300537872314454, "step": 1595 }, { "epoch": 0.016, "grad_norm": 1.9357192516326904, "learning_rate": 4.969747474747475e-06, "loss": 8.246839904785157, "step": 1600 }, { "epoch": 0.01605, "grad_norm": 3.190471649169922, "learning_rate": 4.96949494949495e-06, "loss": 8.204385375976562, "step": 1605 }, { "epoch": 0.0161, "grad_norm": 2.9330544471740723, "learning_rate": 4.969242424242425e-06, "loss": 8.233213806152344, "step": 1610 }, { "epoch": 0.01615, "grad_norm": 2.462989568710327, "learning_rate": 4.968989898989899e-06, "loss": 8.218523406982422, "step": 1615 }, { "epoch": 0.0162, "grad_norm": 3.291127920150757, "learning_rate": 4.968737373737374e-06, "loss": 8.209732055664062, "step": 1620 }, { "epoch": 0.01625, "grad_norm": 2.2086427211761475, "learning_rate": 4.968484848484849e-06, "loss": 8.242097473144531, "step": 1625 }, { "epoch": 0.0163, "grad_norm": 2.1802282333374023, "learning_rate": 4.968232323232324e-06, "loss": 8.20031280517578, "step": 1630 }, { "epoch": 0.01635, "grad_norm": 5.150691032409668, "learning_rate": 4.967979797979799e-06, "loss": 8.0207763671875, "step": 1635 }, { "epoch": 0.0164, "grad_norm": 2.3476431369781494, "learning_rate": 4.967727272727273e-06, "loss": 8.181890869140625, "step": 1640 }, { "epoch": 0.01645, "grad_norm": 3.2155909538269043, "learning_rate": 4.967474747474748e-06, "loss": 8.183082580566406, "step": 1645 }, { "epoch": 0.0165, "grad_norm": 2.1364448070526123, "learning_rate": 4.9672222222222225e-06, "loss": 8.185452270507813, "step": 1650 }, { "epoch": 0.01655, "grad_norm": 5.2213134765625, "learning_rate": 4.966969696969697e-06, "loss": 8.198963165283203, "step": 1655 }, { "epoch": 0.0166, "grad_norm": 2.034255266189575, "learning_rate": 4.966717171717172e-06, "loss": 8.136032104492188, "step": 1660 }, { "epoch": 0.01665, "grad_norm": 2.1195871829986572, "learning_rate": 4.966464646464647e-06, "loss": 8.166607666015626, "step": 1665 }, { "epoch": 0.0167, "grad_norm": 1.8533639907836914, "learning_rate": 4.966212121212122e-06, "loss": 8.183841705322266, "step": 1670 }, { "epoch": 0.01675, "grad_norm": 2.154772996902466, "learning_rate": 4.9659595959595965e-06, "loss": 8.168988037109376, "step": 1675 }, { "epoch": 0.0168, "grad_norm": 2.774803400039673, "learning_rate": 4.965707070707071e-06, "loss": 8.151445770263672, "step": 1680 }, { "epoch": 0.01685, "grad_norm": 2.98892879486084, "learning_rate": 4.965454545454546e-06, "loss": 8.142127227783202, "step": 1685 }, { "epoch": 0.0169, "grad_norm": 2.1599631309509277, "learning_rate": 4.96520202020202e-06, "loss": 8.061070251464844, "step": 1690 }, { "epoch": 0.01695, "grad_norm": 3.964365243911743, "learning_rate": 4.964949494949495e-06, "loss": 8.14443359375, "step": 1695 }, { "epoch": 0.017, "grad_norm": 3.2876358032226562, "learning_rate": 4.96469696969697e-06, "loss": 8.151109313964843, "step": 1700 }, { "epoch": 0.01705, "grad_norm": 2.347782850265503, "learning_rate": 4.964444444444445e-06, "loss": 8.152350616455077, "step": 1705 }, { "epoch": 0.0171, "grad_norm": 3.300475597381592, "learning_rate": 4.96419191919192e-06, "loss": 8.193429565429687, "step": 1710 }, { "epoch": 0.01715, "grad_norm": 3.056680679321289, "learning_rate": 4.963939393939394e-06, "loss": 8.149285888671875, "step": 1715 }, { "epoch": 0.0172, "grad_norm": 3.2774817943573, "learning_rate": 4.963686868686869e-06, "loss": 8.1973876953125, "step": 1720 }, { "epoch": 0.01725, "grad_norm": 2.7714900970458984, "learning_rate": 4.9634343434343445e-06, "loss": 8.127362060546876, "step": 1725 }, { "epoch": 0.0173, "grad_norm": 2.7771408557891846, "learning_rate": 4.963181818181818e-06, "loss": 8.206829833984376, "step": 1730 }, { "epoch": 0.01735, "grad_norm": 3.5485599040985107, "learning_rate": 4.962929292929293e-06, "loss": 8.19534912109375, "step": 1735 }, { "epoch": 0.0174, "grad_norm": 1.8435674905776978, "learning_rate": 4.9626767676767675e-06, "loss": 8.124722290039063, "step": 1740 }, { "epoch": 0.01745, "grad_norm": 4.529754161834717, "learning_rate": 4.962424242424243e-06, "loss": 8.174091339111328, "step": 1745 }, { "epoch": 0.0175, "grad_norm": 2.947138786315918, "learning_rate": 4.962171717171718e-06, "loss": 8.167623138427734, "step": 1750 }, { "epoch": 0.01755, "grad_norm": 2.6368660926818848, "learning_rate": 4.961919191919192e-06, "loss": 8.04675064086914, "step": 1755 }, { "epoch": 0.0176, "grad_norm": 3.2398524284362793, "learning_rate": 4.961666666666667e-06, "loss": 8.208754730224609, "step": 1760 }, { "epoch": 0.01765, "grad_norm": 3.359651803970337, "learning_rate": 4.961414141414142e-06, "loss": 8.151076507568359, "step": 1765 }, { "epoch": 0.0177, "grad_norm": 2.2264723777770996, "learning_rate": 4.961161616161617e-06, "loss": 8.173772430419922, "step": 1770 }, { "epoch": 0.01775, "grad_norm": 2.2416670322418213, "learning_rate": 4.960909090909092e-06, "loss": 8.156338500976563, "step": 1775 }, { "epoch": 0.0178, "grad_norm": 4.545805931091309, "learning_rate": 4.960656565656566e-06, "loss": 8.006956481933594, "step": 1780 }, { "epoch": 0.01785, "grad_norm": 3.801161289215088, "learning_rate": 4.960404040404041e-06, "loss": 8.150926208496093, "step": 1785 }, { "epoch": 0.0179, "grad_norm": 3.7516281604766846, "learning_rate": 4.9601515151515155e-06, "loss": 8.203780364990234, "step": 1790 }, { "epoch": 0.01795, "grad_norm": 2.7432525157928467, "learning_rate": 4.95989898989899e-06, "loss": 8.10575180053711, "step": 1795 }, { "epoch": 0.018, "grad_norm": 3.5147500038146973, "learning_rate": 4.959646464646465e-06, "loss": 8.136005401611328, "step": 1800 }, { "epoch": 0.01805, "grad_norm": 2.278280258178711, "learning_rate": 4.95939393939394e-06, "loss": 8.149613189697266, "step": 1805 }, { "epoch": 0.0181, "grad_norm": 2.4351775646209717, "learning_rate": 4.959141414141415e-06, "loss": 8.175203704833985, "step": 1810 }, { "epoch": 0.01815, "grad_norm": 3.244508743286133, "learning_rate": 4.9588888888888895e-06, "loss": 8.155046081542968, "step": 1815 }, { "epoch": 0.0182, "grad_norm": 3.4676265716552734, "learning_rate": 4.958636363636364e-06, "loss": 8.126185607910156, "step": 1820 }, { "epoch": 0.01825, "grad_norm": 2.132542848587036, "learning_rate": 4.958383838383839e-06, "loss": 8.163625335693359, "step": 1825 }, { "epoch": 0.0183, "grad_norm": 2.23634672164917, "learning_rate": 4.958131313131313e-06, "loss": 8.157920837402344, "step": 1830 }, { "epoch": 0.01835, "grad_norm": 3.5908403396606445, "learning_rate": 4.957878787878788e-06, "loss": 8.149976348876953, "step": 1835 }, { "epoch": 0.0184, "grad_norm": 4.151937484741211, "learning_rate": 4.957626262626263e-06, "loss": 8.092743682861329, "step": 1840 }, { "epoch": 0.01845, "grad_norm": 3.0511488914489746, "learning_rate": 4.957373737373738e-06, "loss": 7.971682739257813, "step": 1845 }, { "epoch": 0.0185, "grad_norm": 3.2392444610595703, "learning_rate": 4.957121212121213e-06, "loss": 8.141278839111328, "step": 1850 }, { "epoch": 0.01855, "grad_norm": 3.98538875579834, "learning_rate": 4.956868686868687e-06, "loss": 8.137117004394531, "step": 1855 }, { "epoch": 0.0186, "grad_norm": 3.7867796421051025, "learning_rate": 4.956616161616162e-06, "loss": 8.163440704345703, "step": 1860 }, { "epoch": 0.01865, "grad_norm": 3.0009243488311768, "learning_rate": 4.956363636363637e-06, "loss": 8.146736907958985, "step": 1865 }, { "epoch": 0.0187, "grad_norm": 3.323636293411255, "learning_rate": 4.956111111111111e-06, "loss": 8.136614990234374, "step": 1870 }, { "epoch": 0.01875, "grad_norm": 3.658799886703491, "learning_rate": 4.955858585858586e-06, "loss": 8.1032470703125, "step": 1875 }, { "epoch": 0.0188, "grad_norm": 3.793306350708008, "learning_rate": 4.9556060606060605e-06, "loss": 8.142462921142577, "step": 1880 }, { "epoch": 0.01885, "grad_norm": 4.059318542480469, "learning_rate": 4.955353535353536e-06, "loss": 8.102430725097657, "step": 1885 }, { "epoch": 0.0189, "grad_norm": 3.8674840927124023, "learning_rate": 4.955101010101011e-06, "loss": 8.140837097167969, "step": 1890 }, { "epoch": 0.01895, "grad_norm": 3.5070888996124268, "learning_rate": 4.954848484848485e-06, "loss": 8.174871826171875, "step": 1895 }, { "epoch": 0.019, "grad_norm": 4.296961784362793, "learning_rate": 4.95459595959596e-06, "loss": 8.14097900390625, "step": 1900 }, { "epoch": 0.01905, "grad_norm": 2.8373847007751465, "learning_rate": 4.9543434343434345e-06, "loss": 7.948394775390625, "step": 1905 }, { "epoch": 0.0191, "grad_norm": 3.476529121398926, "learning_rate": 4.954090909090909e-06, "loss": 8.136420440673827, "step": 1910 }, { "epoch": 0.01915, "grad_norm": 3.5385100841522217, "learning_rate": 4.953838383838384e-06, "loss": 7.72955322265625, "step": 1915 }, { "epoch": 0.0192, "grad_norm": 3.5107741355895996, "learning_rate": 4.9535858585858584e-06, "loss": 7.953826141357422, "step": 1920 }, { "epoch": 0.01925, "grad_norm": 4.546454906463623, "learning_rate": 4.953333333333334e-06, "loss": 8.236188507080078, "step": 1925 }, { "epoch": 0.0193, "grad_norm": 4.274033069610596, "learning_rate": 4.9530808080808085e-06, "loss": 8.139183044433594, "step": 1930 }, { "epoch": 0.01935, "grad_norm": 4.179872035980225, "learning_rate": 4.952828282828283e-06, "loss": 8.03671875, "step": 1935 }, { "epoch": 0.0194, "grad_norm": 2.947314739227295, "learning_rate": 4.952575757575759e-06, "loss": 8.058915710449218, "step": 1940 }, { "epoch": 0.01945, "grad_norm": 4.584015369415283, "learning_rate": 4.952323232323233e-06, "loss": 8.143842315673828, "step": 1945 }, { "epoch": 0.0195, "grad_norm": 4.209578514099121, "learning_rate": 4.952070707070707e-06, "loss": 8.122565460205077, "step": 1950 }, { "epoch": 0.01955, "grad_norm": 3.676715612411499, "learning_rate": 4.951818181818182e-06, "loss": 8.11377410888672, "step": 1955 }, { "epoch": 0.0196, "grad_norm": 3.392622232437134, "learning_rate": 4.951565656565657e-06, "loss": 8.147087097167969, "step": 1960 }, { "epoch": 0.01965, "grad_norm": 4.783401012420654, "learning_rate": 4.951313131313132e-06, "loss": 8.171436309814453, "step": 1965 }, { "epoch": 0.0197, "grad_norm": 7.393720626831055, "learning_rate": 4.9510606060606064e-06, "loss": 8.0789306640625, "step": 1970 }, { "epoch": 0.01975, "grad_norm": 3.860348701477051, "learning_rate": 4.950808080808081e-06, "loss": 8.099192047119141, "step": 1975 }, { "epoch": 0.0198, "grad_norm": 4.815975189208984, "learning_rate": 4.9505555555555565e-06, "loss": 8.116313171386718, "step": 1980 }, { "epoch": 0.01985, "grad_norm": 5.488703727722168, "learning_rate": 4.950303030303031e-06, "loss": 8.131668853759766, "step": 1985 }, { "epoch": 0.0199, "grad_norm": 3.7611827850341797, "learning_rate": 4.950050505050506e-06, "loss": 8.047259521484374, "step": 1990 }, { "epoch": 0.01995, "grad_norm": 4.414346218109131, "learning_rate": 4.94979797979798e-06, "loss": 8.146755981445313, "step": 1995 }, { "epoch": 0.02, "grad_norm": 3.402703285217285, "learning_rate": 4.949545454545455e-06, "loss": 8.124602508544921, "step": 2000 }, { "epoch": 0.02005, "grad_norm": 4.558883190155029, "learning_rate": 4.94929292929293e-06, "loss": 8.436622619628906, "step": 2005 }, { "epoch": 0.0201, "grad_norm": 4.240245819091797, "learning_rate": 4.949040404040404e-06, "loss": 8.050159454345703, "step": 2010 }, { "epoch": 0.02015, "grad_norm": 4.767601013183594, "learning_rate": 4.948787878787879e-06, "loss": 8.230625915527344, "step": 2015 }, { "epoch": 0.0202, "grad_norm": 4.285569190979004, "learning_rate": 4.948535353535354e-06, "loss": 8.195152282714844, "step": 2020 }, { "epoch": 0.02025, "grad_norm": 3.8273346424102783, "learning_rate": 4.948282828282829e-06, "loss": 8.145726013183594, "step": 2025 }, { "epoch": 0.0203, "grad_norm": 4.112914562225342, "learning_rate": 4.948030303030304e-06, "loss": 8.114585876464844, "step": 2030 }, { "epoch": 0.02035, "grad_norm": 4.19883394241333, "learning_rate": 4.947777777777778e-06, "loss": 8.107263946533203, "step": 2035 }, { "epoch": 0.0204, "grad_norm": 4.548821449279785, "learning_rate": 4.947525252525253e-06, "loss": 8.17945556640625, "step": 2040 }, { "epoch": 0.02045, "grad_norm": 6.7020463943481445, "learning_rate": 4.9472727272727276e-06, "loss": 7.99320068359375, "step": 2045 }, { "epoch": 0.0205, "grad_norm": 3.062143325805664, "learning_rate": 4.947020202020202e-06, "loss": 8.131001281738282, "step": 2050 }, { "epoch": 0.02055, "grad_norm": 4.944661617279053, "learning_rate": 4.946767676767677e-06, "loss": 8.119766235351562, "step": 2055 }, { "epoch": 0.0206, "grad_norm": 5.674378871917725, "learning_rate": 4.946515151515152e-06, "loss": 8.064532470703124, "step": 2060 }, { "epoch": 0.02065, "grad_norm": 4.104469299316406, "learning_rate": 4.946262626262627e-06, "loss": 8.15120620727539, "step": 2065 }, { "epoch": 0.0207, "grad_norm": 4.435698986053467, "learning_rate": 4.9460101010101016e-06, "loss": 8.12084732055664, "step": 2070 }, { "epoch": 0.02075, "grad_norm": 3.825582265853882, "learning_rate": 4.945757575757576e-06, "loss": 8.003488922119141, "step": 2075 }, { "epoch": 0.0208, "grad_norm": 4.849913597106934, "learning_rate": 4.945505050505051e-06, "loss": 8.076895141601563, "step": 2080 }, { "epoch": 0.02085, "grad_norm": 10.164915084838867, "learning_rate": 4.9452525252525255e-06, "loss": 8.01006851196289, "step": 2085 }, { "epoch": 0.0209, "grad_norm": 4.137729644775391, "learning_rate": 4.945e-06, "loss": 7.7697021484375, "step": 2090 }, { "epoch": 0.02095, "grad_norm": 6.361881256103516, "learning_rate": 4.944747474747475e-06, "loss": 8.000723266601563, "step": 2095 }, { "epoch": 0.021, "grad_norm": 3.5998408794403076, "learning_rate": 4.94449494949495e-06, "loss": 8.129924774169922, "step": 2100 }, { "epoch": 0.02105, "grad_norm": 5.961328029632568, "learning_rate": 4.944242424242425e-06, "loss": 8.079069519042969, "step": 2105 }, { "epoch": 0.0211, "grad_norm": 5.388092517852783, "learning_rate": 4.9439898989898995e-06, "loss": 8.065097045898437, "step": 2110 }, { "epoch": 0.02115, "grad_norm": 3.7849602699279785, "learning_rate": 4.943737373737374e-06, "loss": 8.195255279541016, "step": 2115 }, { "epoch": 0.0212, "grad_norm": 4.172582149505615, "learning_rate": 4.943484848484849e-06, "loss": 8.083509063720703, "step": 2120 }, { "epoch": 0.02125, "grad_norm": 3.7657487392425537, "learning_rate": 4.943232323232323e-06, "loss": 8.090748596191407, "step": 2125 }, { "epoch": 0.0213, "grad_norm": 3.168128490447998, "learning_rate": 4.942979797979798e-06, "loss": 8.040937805175782, "step": 2130 }, { "epoch": 0.02135, "grad_norm": 5.141269683837891, "learning_rate": 4.942727272727273e-06, "loss": 8.065060424804688, "step": 2135 }, { "epoch": 0.0214, "grad_norm": 5.394471645355225, "learning_rate": 4.942474747474748e-06, "loss": 8.099494171142577, "step": 2140 }, { "epoch": 0.02145, "grad_norm": 6.429492473602295, "learning_rate": 4.942222222222223e-06, "loss": 8.062808227539062, "step": 2145 }, { "epoch": 0.0215, "grad_norm": 5.63588809967041, "learning_rate": 4.941969696969697e-06, "loss": 8.26043472290039, "step": 2150 }, { "epoch": 0.02155, "grad_norm": 3.661998987197876, "learning_rate": 4.941717171717172e-06, "loss": 8.102381134033203, "step": 2155 }, { "epoch": 0.0216, "grad_norm": 5.674729824066162, "learning_rate": 4.9414646464646475e-06, "loss": 7.86041259765625, "step": 2160 }, { "epoch": 0.02165, "grad_norm": 5.142356872558594, "learning_rate": 4.941212121212122e-06, "loss": 8.079032897949219, "step": 2165 }, { "epoch": 0.0217, "grad_norm": 7.163041114807129, "learning_rate": 4.940959595959597e-06, "loss": 8.09962158203125, "step": 2170 }, { "epoch": 0.02175, "grad_norm": 5.5377960205078125, "learning_rate": 4.9407070707070705e-06, "loss": 8.033216094970703, "step": 2175 }, { "epoch": 0.0218, "grad_norm": 5.31917142868042, "learning_rate": 4.940454545454546e-06, "loss": 8.07091827392578, "step": 2180 }, { "epoch": 0.02185, "grad_norm": 6.305392265319824, "learning_rate": 4.940202020202021e-06, "loss": 8.0578125, "step": 2185 }, { "epoch": 0.0219, "grad_norm": 6.275941371917725, "learning_rate": 4.939949494949495e-06, "loss": 8.107493591308593, "step": 2190 }, { "epoch": 0.02195, "grad_norm": 5.42586088180542, "learning_rate": 4.93969696969697e-06, "loss": 8.012085723876954, "step": 2195 }, { "epoch": 0.022, "grad_norm": 6.528047561645508, "learning_rate": 4.939444444444445e-06, "loss": 8.079855346679688, "step": 2200 }, { "epoch": 0.02205, "grad_norm": 5.952470302581787, "learning_rate": 4.93919191919192e-06, "loss": 8.036585998535156, "step": 2205 }, { "epoch": 0.0221, "grad_norm": 8.109021186828613, "learning_rate": 4.938939393939395e-06, "loss": 8.097537994384766, "step": 2210 }, { "epoch": 0.02215, "grad_norm": 5.504205703735352, "learning_rate": 4.938686868686869e-06, "loss": 8.087356567382812, "step": 2215 }, { "epoch": 0.0222, "grad_norm": 4.862380027770996, "learning_rate": 4.938434343434344e-06, "loss": 8.14871826171875, "step": 2220 }, { "epoch": 0.02225, "grad_norm": 6.553999423980713, "learning_rate": 4.9381818181818185e-06, "loss": 8.10479278564453, "step": 2225 }, { "epoch": 0.0223, "grad_norm": 4.806943416595459, "learning_rate": 4.937929292929293e-06, "loss": 8.0728271484375, "step": 2230 }, { "epoch": 0.02235, "grad_norm": 5.197163105010986, "learning_rate": 4.937676767676768e-06, "loss": 8.084114074707031, "step": 2235 }, { "epoch": 0.0224, "grad_norm": 5.497702121734619, "learning_rate": 4.937424242424243e-06, "loss": 8.068180084228516, "step": 2240 }, { "epoch": 0.02245, "grad_norm": 9.232624053955078, "learning_rate": 4.937171717171718e-06, "loss": 8.192984771728515, "step": 2245 }, { "epoch": 0.0225, "grad_norm": 6.503833293914795, "learning_rate": 4.9369191919191925e-06, "loss": 8.080185699462891, "step": 2250 }, { "epoch": 0.02255, "grad_norm": 4.67305850982666, "learning_rate": 4.936666666666667e-06, "loss": 8.047933959960938, "step": 2255 }, { "epoch": 0.0226, "grad_norm": 6.938578128814697, "learning_rate": 4.936414141414142e-06, "loss": 8.16378173828125, "step": 2260 }, { "epoch": 0.02265, "grad_norm": 5.685107231140137, "learning_rate": 4.936161616161616e-06, "loss": 8.037014770507813, "step": 2265 }, { "epoch": 0.0227, "grad_norm": 5.197331428527832, "learning_rate": 4.935909090909091e-06, "loss": 8.067678833007813, "step": 2270 }, { "epoch": 0.02275, "grad_norm": 4.970640659332275, "learning_rate": 4.935656565656566e-06, "loss": 8.045899963378906, "step": 2275 }, { "epoch": 0.0228, "grad_norm": 5.287182807922363, "learning_rate": 4.935404040404041e-06, "loss": 8.066535949707031, "step": 2280 }, { "epoch": 0.02285, "grad_norm": 8.051145553588867, "learning_rate": 4.935151515151516e-06, "loss": 8.10162811279297, "step": 2285 }, { "epoch": 0.0229, "grad_norm": 7.445028305053711, "learning_rate": 4.93489898989899e-06, "loss": 8.092851257324218, "step": 2290 }, { "epoch": 0.02295, "grad_norm": 7.598878860473633, "learning_rate": 4.934646464646465e-06, "loss": 8.033217620849609, "step": 2295 }, { "epoch": 0.023, "grad_norm": 5.209986686706543, "learning_rate": 4.93439393939394e-06, "loss": 8.017487335205079, "step": 2300 }, { "epoch": 0.02305, "grad_norm": 6.6041259765625, "learning_rate": 4.934141414141414e-06, "loss": 8.076496124267578, "step": 2305 }, { "epoch": 0.0231, "grad_norm": 6.017110347747803, "learning_rate": 4.933888888888889e-06, "loss": 8.03545684814453, "step": 2310 }, { "epoch": 0.02315, "grad_norm": 5.089906692504883, "learning_rate": 4.9336363636363635e-06, "loss": 8.035746002197266, "step": 2315 }, { "epoch": 0.0232, "grad_norm": 5.970499515533447, "learning_rate": 4.933383838383839e-06, "loss": 8.065373992919922, "step": 2320 }, { "epoch": 0.02325, "grad_norm": 6.824329376220703, "learning_rate": 4.933131313131314e-06, "loss": 8.065229797363282, "step": 2325 }, { "epoch": 0.0233, "grad_norm": 6.626498699188232, "learning_rate": 4.932878787878788e-06, "loss": 7.949836730957031, "step": 2330 }, { "epoch": 0.02335, "grad_norm": 5.37431526184082, "learning_rate": 4.932626262626263e-06, "loss": 8.07468032836914, "step": 2335 }, { "epoch": 0.0234, "grad_norm": 5.490968704223633, "learning_rate": 4.9323737373737375e-06, "loss": 7.988227844238281, "step": 2340 }, { "epoch": 0.02345, "grad_norm": 6.844717979431152, "learning_rate": 4.932121212121212e-06, "loss": 8.041460418701172, "step": 2345 }, { "epoch": 0.0235, "grad_norm": 7.442416667938232, "learning_rate": 4.931868686868687e-06, "loss": 8.04156951904297, "step": 2350 }, { "epoch": 0.02355, "grad_norm": 5.549331188201904, "learning_rate": 4.931616161616161e-06, "loss": 7.958740234375, "step": 2355 }, { "epoch": 0.0236, "grad_norm": 6.274140357971191, "learning_rate": 4.931363636363637e-06, "loss": 8.044733428955078, "step": 2360 }, { "epoch": 0.02365, "grad_norm": 6.433976173400879, "learning_rate": 4.9311111111111115e-06, "loss": 8.084085845947266, "step": 2365 }, { "epoch": 0.0237, "grad_norm": 6.738949298858643, "learning_rate": 4.930858585858586e-06, "loss": 7.934294128417969, "step": 2370 }, { "epoch": 0.02375, "grad_norm": 6.82579231262207, "learning_rate": 4.930606060606062e-06, "loss": 8.111377716064453, "step": 2375 }, { "epoch": 0.0238, "grad_norm": 7.230797290802002, "learning_rate": 4.930353535353536e-06, "loss": 8.003812408447265, "step": 2380 }, { "epoch": 0.02385, "grad_norm": 8.453668594360352, "learning_rate": 4.930101010101011e-06, "loss": 7.9839332580566404, "step": 2385 }, { "epoch": 0.0239, "grad_norm": 5.416663646697998, "learning_rate": 4.9298484848484855e-06, "loss": 7.968634796142578, "step": 2390 }, { "epoch": 0.02395, "grad_norm": 7.59089469909668, "learning_rate": 4.92959595959596e-06, "loss": 7.9937744140625, "step": 2395 }, { "epoch": 0.024, "grad_norm": 5.495401859283447, "learning_rate": 4.929343434343435e-06, "loss": 7.990375518798828, "step": 2400 }, { "epoch": 0.02405, "grad_norm": 7.65642786026001, "learning_rate": 4.929090909090909e-06, "loss": 8.064468383789062, "step": 2405 }, { "epoch": 0.0241, "grad_norm": 8.040745735168457, "learning_rate": 4.928838383838384e-06, "loss": 8.00512924194336, "step": 2410 }, { "epoch": 0.02415, "grad_norm": 8.093311309814453, "learning_rate": 4.9285858585858595e-06, "loss": 8.033771514892578, "step": 2415 }, { "epoch": 0.0242, "grad_norm": 6.817811489105225, "learning_rate": 4.928333333333334e-06, "loss": 8.010509490966797, "step": 2420 }, { "epoch": 0.02425, "grad_norm": 9.66916275024414, "learning_rate": 4.928080808080809e-06, "loss": 8.025614929199218, "step": 2425 }, { "epoch": 0.0243, "grad_norm": 7.142340660095215, "learning_rate": 4.927828282828283e-06, "loss": 8.056129455566406, "step": 2430 }, { "epoch": 0.02435, "grad_norm": 6.461586952209473, "learning_rate": 4.927575757575758e-06, "loss": 7.995268249511719, "step": 2435 }, { "epoch": 0.0244, "grad_norm": 4.535359859466553, "learning_rate": 4.927323232323233e-06, "loss": 7.990110778808594, "step": 2440 }, { "epoch": 0.02445, "grad_norm": 7.250387191772461, "learning_rate": 4.927070707070707e-06, "loss": 8.015898895263671, "step": 2445 }, { "epoch": 0.0245, "grad_norm": 9.70677661895752, "learning_rate": 4.926818181818182e-06, "loss": 8.054452514648437, "step": 2450 }, { "epoch": 0.02455, "grad_norm": 9.1210298538208, "learning_rate": 4.926565656565657e-06, "loss": 7.917488861083984, "step": 2455 }, { "epoch": 0.0246, "grad_norm": 9.08738899230957, "learning_rate": 4.926313131313132e-06, "loss": 8.037545776367187, "step": 2460 }, { "epoch": 0.02465, "grad_norm": 6.994068145751953, "learning_rate": 4.926060606060607e-06, "loss": 8.030938720703125, "step": 2465 }, { "epoch": 0.0247, "grad_norm": 5.324690341949463, "learning_rate": 4.925808080808081e-06, "loss": 8.021089172363281, "step": 2470 }, { "epoch": 0.02475, "grad_norm": 6.795166015625, "learning_rate": 4.925555555555556e-06, "loss": 8.030506134033203, "step": 2475 }, { "epoch": 0.0248, "grad_norm": 5.970880031585693, "learning_rate": 4.9253030303030306e-06, "loss": 8.040354156494141, "step": 2480 }, { "epoch": 0.02485, "grad_norm": 6.029690742492676, "learning_rate": 4.925050505050505e-06, "loss": 8.04490966796875, "step": 2485 }, { "epoch": 0.0249, "grad_norm": 5.285550117492676, "learning_rate": 4.92479797979798e-06, "loss": 8.029734802246093, "step": 2490 }, { "epoch": 0.02495, "grad_norm": 7.552892208099365, "learning_rate": 4.924545454545455e-06, "loss": 8.002020263671875, "step": 2495 }, { "epoch": 0.025, "grad_norm": 6.6681060791015625, "learning_rate": 4.92429292929293e-06, "loss": 7.9726104736328125, "step": 2500 }, { "epoch": 0.02505, "grad_norm": 6.780698299407959, "learning_rate": 4.9240404040404046e-06, "loss": 8.02074966430664, "step": 2505 }, { "epoch": 0.0251, "grad_norm": 6.418056488037109, "learning_rate": 4.923787878787879e-06, "loss": 7.974566650390625, "step": 2510 }, { "epoch": 0.02515, "grad_norm": 7.972861289978027, "learning_rate": 4.923535353535354e-06, "loss": 8.010226440429687, "step": 2515 }, { "epoch": 0.0252, "grad_norm": 8.138399124145508, "learning_rate": 4.9232828282828284e-06, "loss": 7.941558837890625, "step": 2520 }, { "epoch": 0.02525, "grad_norm": 9.1495943069458, "learning_rate": 4.923030303030303e-06, "loss": 7.997895812988281, "step": 2525 }, { "epoch": 0.0253, "grad_norm": 8.501659393310547, "learning_rate": 4.922777777777778e-06, "loss": 7.9309333801269535, "step": 2530 }, { "epoch": 0.02535, "grad_norm": 6.139895439147949, "learning_rate": 4.922525252525253e-06, "loss": 7.815684509277344, "step": 2535 }, { "epoch": 0.0254, "grad_norm": 8.120899200439453, "learning_rate": 4.922272727272728e-06, "loss": 8.073867797851562, "step": 2540 }, { "epoch": 0.02545, "grad_norm": 8.050490379333496, "learning_rate": 4.9220202020202024e-06, "loss": 8.003661346435546, "step": 2545 }, { "epoch": 0.0255, "grad_norm": 5.850531578063965, "learning_rate": 4.921767676767677e-06, "loss": 7.973641204833984, "step": 2550 }, { "epoch": 0.02555, "grad_norm": 8.398384094238281, "learning_rate": 4.9215151515151525e-06, "loss": 7.994668579101562, "step": 2555 }, { "epoch": 0.0256, "grad_norm": 6.711146831512451, "learning_rate": 4.921262626262626e-06, "loss": 7.976986694335937, "step": 2560 }, { "epoch": 0.02565, "grad_norm": 8.427578926086426, "learning_rate": 4.921010101010101e-06, "loss": 7.953701019287109, "step": 2565 }, { "epoch": 0.0257, "grad_norm": 10.573576927185059, "learning_rate": 4.920757575757576e-06, "loss": 8.051031494140625, "step": 2570 }, { "epoch": 0.02575, "grad_norm": 9.25417709350586, "learning_rate": 4.920505050505051e-06, "loss": 8.050728607177735, "step": 2575 }, { "epoch": 0.0258, "grad_norm": 9.895792961120605, "learning_rate": 4.920252525252526e-06, "loss": 7.974163055419922, "step": 2580 }, { "epoch": 0.02585, "grad_norm": 10.633347511291504, "learning_rate": 4.92e-06, "loss": 7.978343200683594, "step": 2585 }, { "epoch": 0.0259, "grad_norm": 6.666189670562744, "learning_rate": 4.919747474747475e-06, "loss": 7.923368072509765, "step": 2590 }, { "epoch": 0.02595, "grad_norm": 6.58552360534668, "learning_rate": 4.9194949494949504e-06, "loss": 7.935690307617188, "step": 2595 }, { "epoch": 0.026, "grad_norm": 10.140469551086426, "learning_rate": 4.919242424242425e-06, "loss": 7.962259674072266, "step": 2600 }, { "epoch": 0.02605, "grad_norm": 7.739034652709961, "learning_rate": 4.9189898989899e-06, "loss": 7.971367645263672, "step": 2605 }, { "epoch": 0.0261, "grad_norm": 11.827165603637695, "learning_rate": 4.918737373737374e-06, "loss": 8.027304077148438, "step": 2610 }, { "epoch": 0.02615, "grad_norm": 6.215206146240234, "learning_rate": 4.918484848484849e-06, "loss": 7.96094970703125, "step": 2615 }, { "epoch": 0.0262, "grad_norm": 9.400468826293945, "learning_rate": 4.918232323232324e-06, "loss": 7.980947875976563, "step": 2620 }, { "epoch": 0.02625, "grad_norm": 10.95596694946289, "learning_rate": 4.917979797979798e-06, "loss": 7.796273040771484, "step": 2625 }, { "epoch": 0.0263, "grad_norm": 5.988711357116699, "learning_rate": 4.917727272727273e-06, "loss": 7.997755432128907, "step": 2630 }, { "epoch": 0.02635, "grad_norm": 8.493010520935059, "learning_rate": 4.917474747474748e-06, "loss": 7.957850646972656, "step": 2635 }, { "epoch": 0.0264, "grad_norm": 8.662044525146484, "learning_rate": 4.917222222222223e-06, "loss": 7.934742736816406, "step": 2640 }, { "epoch": 0.02645, "grad_norm": 8.78908920288086, "learning_rate": 4.916969696969698e-06, "loss": 7.969111633300781, "step": 2645 }, { "epoch": 0.0265, "grad_norm": 9.42647933959961, "learning_rate": 4.916717171717172e-06, "loss": 7.922297668457031, "step": 2650 }, { "epoch": 0.02655, "grad_norm": 6.514276027679443, "learning_rate": 4.916464646464647e-06, "loss": 7.990278625488282, "step": 2655 }, { "epoch": 0.0266, "grad_norm": 11.466804504394531, "learning_rate": 4.9162121212121215e-06, "loss": 7.898329162597657, "step": 2660 }, { "epoch": 0.02665, "grad_norm": 8.086190223693848, "learning_rate": 4.915959595959596e-06, "loss": 7.999431610107422, "step": 2665 }, { "epoch": 0.0267, "grad_norm": 10.377226829528809, "learning_rate": 4.915707070707071e-06, "loss": 7.841584777832031, "step": 2670 }, { "epoch": 0.02675, "grad_norm": 10.032018661499023, "learning_rate": 4.915454545454546e-06, "loss": 7.929267883300781, "step": 2675 }, { "epoch": 0.0268, "grad_norm": 10.496599197387695, "learning_rate": 4.915202020202021e-06, "loss": 7.9510986328125, "step": 2680 }, { "epoch": 0.02685, "grad_norm": 9.127882957458496, "learning_rate": 4.9149494949494955e-06, "loss": 7.903672790527343, "step": 2685 }, { "epoch": 0.0269, "grad_norm": 10.187844276428223, "learning_rate": 4.91469696969697e-06, "loss": 7.946096801757813, "step": 2690 }, { "epoch": 0.02695, "grad_norm": 6.688825607299805, "learning_rate": 4.914444444444445e-06, "loss": 7.877186584472656, "step": 2695 }, { "epoch": 0.027, "grad_norm": 11.667146682739258, "learning_rate": 4.914191919191919e-06, "loss": 7.966148376464844, "step": 2700 }, { "epoch": 0.02705, "grad_norm": 8.144512176513672, "learning_rate": 4.913939393939394e-06, "loss": 7.914895629882812, "step": 2705 }, { "epoch": 0.0271, "grad_norm": 9.607114791870117, "learning_rate": 4.913686868686869e-06, "loss": 7.946595764160156, "step": 2710 }, { "epoch": 0.02715, "grad_norm": 10.659721374511719, "learning_rate": 4.913434343434344e-06, "loss": 7.916932678222656, "step": 2715 }, { "epoch": 0.0272, "grad_norm": 7.175497055053711, "learning_rate": 4.913181818181819e-06, "loss": 8.014002990722656, "step": 2720 }, { "epoch": 0.02725, "grad_norm": 8.887285232543945, "learning_rate": 4.912929292929293e-06, "loss": 7.975077819824219, "step": 2725 }, { "epoch": 0.0273, "grad_norm": 12.670340538024902, "learning_rate": 4.912676767676768e-06, "loss": 7.895428466796875, "step": 2730 }, { "epoch": 0.02735, "grad_norm": 7.938830375671387, "learning_rate": 4.912424242424243e-06, "loss": 7.87036361694336, "step": 2735 }, { "epoch": 0.0274, "grad_norm": 9.395071983337402, "learning_rate": 4.912171717171717e-06, "loss": 7.878598022460937, "step": 2740 }, { "epoch": 0.02745, "grad_norm": 8.282781600952148, "learning_rate": 4.911919191919192e-06, "loss": 7.9313507080078125, "step": 2745 }, { "epoch": 0.0275, "grad_norm": 9.37558364868164, "learning_rate": 4.9116666666666665e-06, "loss": 7.942018127441406, "step": 2750 }, { "epoch": 0.02755, "grad_norm": 8.028214454650879, "learning_rate": 4.911414141414142e-06, "loss": 7.937666320800782, "step": 2755 }, { "epoch": 0.0276, "grad_norm": 10.93342113494873, "learning_rate": 4.911161616161617e-06, "loss": 7.900523376464844, "step": 2760 }, { "epoch": 0.02765, "grad_norm": 8.307437896728516, "learning_rate": 4.910909090909091e-06, "loss": 7.877653503417969, "step": 2765 }, { "epoch": 0.0277, "grad_norm": 9.448577880859375, "learning_rate": 4.910656565656566e-06, "loss": 7.928202819824219, "step": 2770 }, { "epoch": 0.02775, "grad_norm": 9.391576766967773, "learning_rate": 4.910404040404041e-06, "loss": 7.874234771728515, "step": 2775 }, { "epoch": 0.0278, "grad_norm": 10.955571174621582, "learning_rate": 4.910151515151515e-06, "loss": 7.928572082519532, "step": 2780 }, { "epoch": 0.02785, "grad_norm": 6.9731059074401855, "learning_rate": 4.90989898989899e-06, "loss": 7.952867889404297, "step": 2785 }, { "epoch": 0.0279, "grad_norm": 12.694716453552246, "learning_rate": 4.909646464646464e-06, "loss": 7.904873657226562, "step": 2790 }, { "epoch": 0.02795, "grad_norm": 11.881571769714355, "learning_rate": 4.90939393939394e-06, "loss": 7.741000366210938, "step": 2795 }, { "epoch": 0.028, "grad_norm": 7.595656394958496, "learning_rate": 4.9091414141414145e-06, "loss": 7.935591125488282, "step": 2800 }, { "epoch": 0.02805, "grad_norm": 10.601820945739746, "learning_rate": 4.908888888888889e-06, "loss": 7.7391204833984375, "step": 2805 }, { "epoch": 0.0281, "grad_norm": 11.143033027648926, "learning_rate": 4.908636363636365e-06, "loss": 7.914286804199219, "step": 2810 }, { "epoch": 0.02815, "grad_norm": 8.2409086227417, "learning_rate": 4.908383838383839e-06, "loss": 7.9224494934082035, "step": 2815 }, { "epoch": 0.0282, "grad_norm": 7.917463302612305, "learning_rate": 4.908131313131314e-06, "loss": 7.907286071777344, "step": 2820 }, { "epoch": 0.02825, "grad_norm": 10.662066459655762, "learning_rate": 4.9078787878787885e-06, "loss": 7.8909049987792965, "step": 2825 }, { "epoch": 0.0283, "grad_norm": 10.457559585571289, "learning_rate": 4.907626262626263e-06, "loss": 7.929391479492187, "step": 2830 }, { "epoch": 0.02835, "grad_norm": 7.949315071105957, "learning_rate": 4.907373737373738e-06, "loss": 7.959058380126953, "step": 2835 }, { "epoch": 0.0284, "grad_norm": 6.844080448150635, "learning_rate": 4.907121212121212e-06, "loss": 7.7963714599609375, "step": 2840 }, { "epoch": 0.02845, "grad_norm": 8.774474143981934, "learning_rate": 4.906868686868687e-06, "loss": 7.892610931396485, "step": 2845 }, { "epoch": 0.0285, "grad_norm": 7.159884929656982, "learning_rate": 4.9066161616161625e-06, "loss": 7.884919738769531, "step": 2850 }, { "epoch": 0.02855, "grad_norm": 12.522878646850586, "learning_rate": 4.906363636363637e-06, "loss": 7.867255401611328, "step": 2855 }, { "epoch": 0.0286, "grad_norm": 11.305131912231445, "learning_rate": 4.906111111111112e-06, "loss": 7.904405212402343, "step": 2860 }, { "epoch": 0.02865, "grad_norm": 10.203548431396484, "learning_rate": 4.905858585858586e-06, "loss": 7.980509948730469, "step": 2865 }, { "epoch": 0.0287, "grad_norm": 12.34007453918457, "learning_rate": 4.905606060606061e-06, "loss": 7.892252349853516, "step": 2870 }, { "epoch": 0.02875, "grad_norm": 11.29151725769043, "learning_rate": 4.905353535353536e-06, "loss": 7.9203041076660154, "step": 2875 }, { "epoch": 0.0288, "grad_norm": 11.179515838623047, "learning_rate": 4.90510101010101e-06, "loss": 7.847711181640625, "step": 2880 }, { "epoch": 0.02885, "grad_norm": 10.366399765014648, "learning_rate": 4.904848484848485e-06, "loss": 7.845014953613282, "step": 2885 }, { "epoch": 0.0289, "grad_norm": 15.523353576660156, "learning_rate": 4.90459595959596e-06, "loss": 7.868599700927734, "step": 2890 }, { "epoch": 0.02895, "grad_norm": 9.0343599319458, "learning_rate": 4.904343434343435e-06, "loss": 7.899134826660156, "step": 2895 }, { "epoch": 0.029, "grad_norm": 11.841954231262207, "learning_rate": 4.90409090909091e-06, "loss": 7.867620849609375, "step": 2900 }, { "epoch": 0.02905, "grad_norm": 9.042365074157715, "learning_rate": 4.903838383838384e-06, "loss": 7.835514831542969, "step": 2905 }, { "epoch": 0.0291, "grad_norm": 8.674809455871582, "learning_rate": 4.903585858585859e-06, "loss": 7.838459777832031, "step": 2910 }, { "epoch": 0.02915, "grad_norm": 11.006317138671875, "learning_rate": 4.9033333333333335e-06, "loss": 7.876612854003906, "step": 2915 }, { "epoch": 0.0292, "grad_norm": 9.22298812866211, "learning_rate": 4.903080808080808e-06, "loss": 7.823573303222656, "step": 2920 }, { "epoch": 0.02925, "grad_norm": 13.188190460205078, "learning_rate": 4.902828282828283e-06, "loss": 7.854862213134766, "step": 2925 }, { "epoch": 0.0293, "grad_norm": 9.189652442932129, "learning_rate": 4.902575757575758e-06, "loss": 7.88726577758789, "step": 2930 }, { "epoch": 0.02935, "grad_norm": 11.630542755126953, "learning_rate": 4.902323232323233e-06, "loss": 7.84576416015625, "step": 2935 }, { "epoch": 0.0294, "grad_norm": 21.05582046508789, "learning_rate": 4.9020707070707075e-06, "loss": 7.9663246154785154, "step": 2940 }, { "epoch": 0.02945, "grad_norm": 10.875149726867676, "learning_rate": 4.901818181818182e-06, "loss": 7.875367736816406, "step": 2945 }, { "epoch": 0.0295, "grad_norm": 11.986106872558594, "learning_rate": 4.901565656565657e-06, "loss": 7.928326416015625, "step": 2950 }, { "epoch": 0.02955, "grad_norm": 10.485430717468262, "learning_rate": 4.9013131313131314e-06, "loss": 7.969045257568359, "step": 2955 }, { "epoch": 0.0296, "grad_norm": 10.710868835449219, "learning_rate": 4.901060606060606e-06, "loss": 7.916181182861328, "step": 2960 }, { "epoch": 0.02965, "grad_norm": 7.628809928894043, "learning_rate": 4.900808080808081e-06, "loss": 7.860700225830078, "step": 2965 }, { "epoch": 0.0297, "grad_norm": 13.407437324523926, "learning_rate": 4.900555555555556e-06, "loss": 7.857862854003907, "step": 2970 }, { "epoch": 0.02975, "grad_norm": 9.821551322937012, "learning_rate": 4.900303030303031e-06, "loss": 7.998422241210937, "step": 2975 }, { "epoch": 0.0298, "grad_norm": 16.291223526000977, "learning_rate": 4.9000505050505054e-06, "loss": 7.767679595947266, "step": 2980 }, { "epoch": 0.02985, "grad_norm": 9.692864418029785, "learning_rate": 4.89979797979798e-06, "loss": 7.7779685974121096, "step": 2985 }, { "epoch": 0.0299, "grad_norm": 8.559731483459473, "learning_rate": 4.8995454545454555e-06, "loss": 7.768988037109375, "step": 2990 }, { "epoch": 0.02995, "grad_norm": 11.138997077941895, "learning_rate": 4.89929292929293e-06, "loss": 7.858936309814453, "step": 2995 }, { "epoch": 0.03, "grad_norm": 14.760675430297852, "learning_rate": 4.899040404040405e-06, "loss": 7.702169036865234, "step": 3000 }, { "epoch": 0.03005, "grad_norm": 11.880956649780273, "learning_rate": 4.8987878787878786e-06, "loss": 7.877500915527344, "step": 3005 }, { "epoch": 0.0301, "grad_norm": 9.274894714355469, "learning_rate": 4.898535353535354e-06, "loss": 7.837750244140625, "step": 3010 }, { "epoch": 0.03015, "grad_norm": 11.026609420776367, "learning_rate": 4.898282828282829e-06, "loss": 7.882962799072265, "step": 3015 }, { "epoch": 0.0302, "grad_norm": 9.467840194702148, "learning_rate": 4.898030303030303e-06, "loss": 7.864433288574219, "step": 3020 }, { "epoch": 0.03025, "grad_norm": 10.039671897888184, "learning_rate": 4.897777777777778e-06, "loss": 7.764442443847656, "step": 3025 }, { "epoch": 0.0303, "grad_norm": 14.373766899108887, "learning_rate": 4.897525252525253e-06, "loss": 7.7815399169921875, "step": 3030 }, { "epoch": 0.03035, "grad_norm": 10.865833282470703, "learning_rate": 4.897272727272728e-06, "loss": 7.799806213378906, "step": 3035 }, { "epoch": 0.0304, "grad_norm": 10.74267292022705, "learning_rate": 4.897020202020203e-06, "loss": 7.863565063476562, "step": 3040 }, { "epoch": 0.03045, "grad_norm": 15.908251762390137, "learning_rate": 4.896767676767677e-06, "loss": 7.915567016601562, "step": 3045 }, { "epoch": 0.0305, "grad_norm": 11.165153503417969, "learning_rate": 4.896515151515152e-06, "loss": 7.824169158935547, "step": 3050 }, { "epoch": 0.03055, "grad_norm": 10.920608520507812, "learning_rate": 4.8962626262626266e-06, "loss": 7.876708984375, "step": 3055 }, { "epoch": 0.0306, "grad_norm": 12.577484130859375, "learning_rate": 4.896010101010101e-06, "loss": 7.826139068603515, "step": 3060 }, { "epoch": 0.03065, "grad_norm": 8.74162769317627, "learning_rate": 4.895757575757576e-06, "loss": 7.811336517333984, "step": 3065 }, { "epoch": 0.0307, "grad_norm": 13.383956909179688, "learning_rate": 4.895505050505051e-06, "loss": 7.799156951904297, "step": 3070 }, { "epoch": 0.03075, "grad_norm": 12.37629508972168, "learning_rate": 4.895252525252526e-06, "loss": 7.838725280761719, "step": 3075 }, { "epoch": 0.0308, "grad_norm": 12.448808670043945, "learning_rate": 4.8950000000000006e-06, "loss": 7.899265289306641, "step": 3080 }, { "epoch": 0.03085, "grad_norm": 11.424176216125488, "learning_rate": 4.894747474747475e-06, "loss": 7.849867248535157, "step": 3085 }, { "epoch": 0.0309, "grad_norm": 10.070093154907227, "learning_rate": 4.89449494949495e-06, "loss": 7.8531547546386715, "step": 3090 }, { "epoch": 0.03095, "grad_norm": 11.844435691833496, "learning_rate": 4.8942424242424245e-06, "loss": 7.892112731933594, "step": 3095 }, { "epoch": 0.031, "grad_norm": 14.5014066696167, "learning_rate": 4.893989898989899e-06, "loss": 7.995967102050781, "step": 3100 }, { "epoch": 0.03105, "grad_norm": 10.123559951782227, "learning_rate": 4.893737373737374e-06, "loss": 7.7989662170410154, "step": 3105 }, { "epoch": 0.0311, "grad_norm": 6.547076225280762, "learning_rate": 4.893484848484849e-06, "loss": 7.9394691467285154, "step": 3110 }, { "epoch": 0.03115, "grad_norm": 8.729823112487793, "learning_rate": 4.893232323232324e-06, "loss": 7.886512756347656, "step": 3115 }, { "epoch": 0.0312, "grad_norm": 11.189919471740723, "learning_rate": 4.8929797979797985e-06, "loss": 7.83263931274414, "step": 3120 }, { "epoch": 0.03125, "grad_norm": 9.90947437286377, "learning_rate": 4.892727272727273e-06, "loss": 7.826506042480469, "step": 3125 }, { "epoch": 0.0313, "grad_norm": 10.01137924194336, "learning_rate": 4.892474747474748e-06, "loss": 7.817379760742187, "step": 3130 }, { "epoch": 0.03135, "grad_norm": 14.131281852722168, "learning_rate": 4.892222222222222e-06, "loss": 7.8085884094238285, "step": 3135 }, { "epoch": 0.0314, "grad_norm": 17.40117835998535, "learning_rate": 4.891969696969697e-06, "loss": 7.877810668945313, "step": 3140 }, { "epoch": 0.03145, "grad_norm": 9.756576538085938, "learning_rate": 4.891717171717172e-06, "loss": 7.851835632324219, "step": 3145 }, { "epoch": 0.0315, "grad_norm": 13.252785682678223, "learning_rate": 4.891464646464647e-06, "loss": 7.835446166992187, "step": 3150 }, { "epoch": 0.03155, "grad_norm": 17.378522872924805, "learning_rate": 4.891212121212122e-06, "loss": 7.894921112060547, "step": 3155 }, { "epoch": 0.0316, "grad_norm": 13.091304779052734, "learning_rate": 4.890959595959596e-06, "loss": 7.817993927001953, "step": 3160 }, { "epoch": 0.03165, "grad_norm": 11.820381164550781, "learning_rate": 4.890707070707071e-06, "loss": 7.849030303955078, "step": 3165 }, { "epoch": 0.0317, "grad_norm": 8.794419288635254, "learning_rate": 4.890454545454546e-06, "loss": 7.851338195800781, "step": 3170 }, { "epoch": 0.03175, "grad_norm": 13.124003410339355, "learning_rate": 4.89020202020202e-06, "loss": 7.814474487304688, "step": 3175 }, { "epoch": 0.0318, "grad_norm": 26.210655212402344, "learning_rate": 4.889949494949495e-06, "loss": 7.9348876953125, "step": 3180 }, { "epoch": 0.03185, "grad_norm": 7.255050182342529, "learning_rate": 4.8896969696969695e-06, "loss": 7.86275634765625, "step": 3185 }, { "epoch": 0.0319, "grad_norm": 12.721116065979004, "learning_rate": 4.889444444444445e-06, "loss": 7.847522735595703, "step": 3190 }, { "epoch": 0.03195, "grad_norm": 13.578813552856445, "learning_rate": 4.88919191919192e-06, "loss": 7.8977783203125, "step": 3195 }, { "epoch": 0.032, "grad_norm": 13.801925659179688, "learning_rate": 4.888939393939394e-06, "loss": 7.824278259277344, "step": 3200 }, { "epoch": 0.03205, "grad_norm": 13.033172607421875, "learning_rate": 4.888686868686869e-06, "loss": 7.8088737487792965, "step": 3205 }, { "epoch": 0.0321, "grad_norm": 12.746626853942871, "learning_rate": 4.888434343434344e-06, "loss": 7.7941162109375, "step": 3210 }, { "epoch": 0.03215, "grad_norm": 16.548858642578125, "learning_rate": 4.888181818181819e-06, "loss": 7.633403015136719, "step": 3215 }, { "epoch": 0.0322, "grad_norm": 14.139762878417969, "learning_rate": 4.887929292929294e-06, "loss": 7.902616882324219, "step": 3220 }, { "epoch": 0.03225, "grad_norm": 12.207056999206543, "learning_rate": 4.887676767676768e-06, "loss": 7.9230804443359375, "step": 3225 }, { "epoch": 0.0323, "grad_norm": 15.276846885681152, "learning_rate": 4.887424242424243e-06, "loss": 7.835971069335938, "step": 3230 }, { "epoch": 0.03235, "grad_norm": 10.172354698181152, "learning_rate": 4.8871717171717175e-06, "loss": 7.786913299560547, "step": 3235 }, { "epoch": 0.0324, "grad_norm": 10.403327941894531, "learning_rate": 4.886919191919192e-06, "loss": 7.803310394287109, "step": 3240 }, { "epoch": 0.03245, "grad_norm": 12.75370979309082, "learning_rate": 4.886666666666668e-06, "loss": 7.826334381103516, "step": 3245 }, { "epoch": 0.0325, "grad_norm": 9.607781410217285, "learning_rate": 4.886414141414142e-06, "loss": 7.865657806396484, "step": 3250 }, { "epoch": 0.03255, "grad_norm": 12.135005950927734, "learning_rate": 4.886161616161617e-06, "loss": 7.838246154785156, "step": 3255 }, { "epoch": 0.0326, "grad_norm": 11.085494041442871, "learning_rate": 4.8859090909090915e-06, "loss": 7.838145446777344, "step": 3260 }, { "epoch": 0.03265, "grad_norm": 7.834097862243652, "learning_rate": 4.885656565656566e-06, "loss": 7.820167541503906, "step": 3265 }, { "epoch": 0.0327, "grad_norm": 13.46898078918457, "learning_rate": 4.885404040404041e-06, "loss": 7.819593048095703, "step": 3270 }, { "epoch": 0.03275, "grad_norm": 9.230295181274414, "learning_rate": 4.885151515151515e-06, "loss": 7.825096130371094, "step": 3275 }, { "epoch": 0.0328, "grad_norm": 12.399385452270508, "learning_rate": 4.88489898989899e-06, "loss": 7.8096565246582035, "step": 3280 }, { "epoch": 0.03285, "grad_norm": 8.679828643798828, "learning_rate": 4.8846464646464655e-06, "loss": 7.786044311523438, "step": 3285 }, { "epoch": 0.0329, "grad_norm": 12.155028343200684, "learning_rate": 4.88439393939394e-06, "loss": 7.740892028808593, "step": 3290 }, { "epoch": 0.03295, "grad_norm": 9.502835273742676, "learning_rate": 4.884141414141415e-06, "loss": 7.781934356689453, "step": 3295 }, { "epoch": 0.033, "grad_norm": 11.309555053710938, "learning_rate": 4.883888888888889e-06, "loss": 7.623343658447266, "step": 3300 }, { "epoch": 0.03305, "grad_norm": 10.365216255187988, "learning_rate": 4.883636363636364e-06, "loss": 7.831349945068359, "step": 3305 }, { "epoch": 0.0331, "grad_norm": 14.26781177520752, "learning_rate": 4.883383838383839e-06, "loss": 7.744242858886719, "step": 3310 }, { "epoch": 0.03315, "grad_norm": 10.67682933807373, "learning_rate": 4.883131313131313e-06, "loss": 7.791787719726562, "step": 3315 }, { "epoch": 0.0332, "grad_norm": 12.412944793701172, "learning_rate": 4.882878787878788e-06, "loss": 7.76440658569336, "step": 3320 }, { "epoch": 0.03325, "grad_norm": 9.830862998962402, "learning_rate": 4.882626262626263e-06, "loss": 7.769853973388672, "step": 3325 }, { "epoch": 0.0333, "grad_norm": 16.250930786132812, "learning_rate": 4.882373737373738e-06, "loss": 7.786974334716797, "step": 3330 }, { "epoch": 0.03335, "grad_norm": 9.169273376464844, "learning_rate": 4.882121212121213e-06, "loss": 7.778826904296875, "step": 3335 }, { "epoch": 0.0334, "grad_norm": 13.367536544799805, "learning_rate": 4.881868686868687e-06, "loss": 7.774156951904297, "step": 3340 }, { "epoch": 0.03345, "grad_norm": 37.796852111816406, "learning_rate": 4.881616161616162e-06, "loss": 7.740109252929687, "step": 3345 }, { "epoch": 0.0335, "grad_norm": 16.945404052734375, "learning_rate": 4.8813636363636365e-06, "loss": 7.7668098449707035, "step": 3350 }, { "epoch": 0.03355, "grad_norm": 13.706332206726074, "learning_rate": 4.881111111111111e-06, "loss": 7.816128540039062, "step": 3355 }, { "epoch": 0.0336, "grad_norm": 7.511087417602539, "learning_rate": 4.880858585858586e-06, "loss": 7.75469970703125, "step": 3360 }, { "epoch": 0.03365, "grad_norm": 11.351165771484375, "learning_rate": 4.880606060606061e-06, "loss": 7.7419486999511715, "step": 3365 }, { "epoch": 0.0337, "grad_norm": 10.796441078186035, "learning_rate": 4.880353535353536e-06, "loss": 7.771745300292968, "step": 3370 }, { "epoch": 0.03375, "grad_norm": 16.676347732543945, "learning_rate": 4.8801010101010105e-06, "loss": 7.580793762207032, "step": 3375 }, { "epoch": 0.0338, "grad_norm": 15.298117637634277, "learning_rate": 4.879848484848485e-06, "loss": 7.8134208679199215, "step": 3380 }, { "epoch": 0.03385, "grad_norm": 12.955474853515625, "learning_rate": 4.879595959595961e-06, "loss": 7.789730834960937, "step": 3385 }, { "epoch": 0.0339, "grad_norm": 10.590466499328613, "learning_rate": 4.879343434343434e-06, "loss": 7.814054870605469, "step": 3390 }, { "epoch": 0.03395, "grad_norm": 9.768908500671387, "learning_rate": 4.879090909090909e-06, "loss": 7.757822418212891, "step": 3395 }, { "epoch": 0.034, "grad_norm": 14.224404335021973, "learning_rate": 4.878838383838384e-06, "loss": 7.782001495361328, "step": 3400 }, { "epoch": 0.03405, "grad_norm": 11.204718589782715, "learning_rate": 4.878585858585859e-06, "loss": 7.794731140136719, "step": 3405 }, { "epoch": 0.0341, "grad_norm": 15.120346069335938, "learning_rate": 4.878333333333334e-06, "loss": 7.779092407226562, "step": 3410 }, { "epoch": 0.03415, "grad_norm": 8.615245819091797, "learning_rate": 4.878080808080808e-06, "loss": 7.713128662109375, "step": 3415 }, { "epoch": 0.0342, "grad_norm": 11.683296203613281, "learning_rate": 4.877828282828283e-06, "loss": 7.730987548828125, "step": 3420 }, { "epoch": 0.03425, "grad_norm": 12.098353385925293, "learning_rate": 4.8775757575757585e-06, "loss": 7.7273681640625, "step": 3425 }, { "epoch": 0.0343, "grad_norm": 15.773181915283203, "learning_rate": 4.877323232323233e-06, "loss": 7.8121803283691404, "step": 3430 }, { "epoch": 0.03435, "grad_norm": 13.781766891479492, "learning_rate": 4.877070707070708e-06, "loss": 7.811567687988282, "step": 3435 }, { "epoch": 0.0344, "grad_norm": 13.988224983215332, "learning_rate": 4.876818181818182e-06, "loss": 7.798973083496094, "step": 3440 }, { "epoch": 0.03445, "grad_norm": 12.717635154724121, "learning_rate": 4.876565656565657e-06, "loss": 7.812104797363281, "step": 3445 }, { "epoch": 0.0345, "grad_norm": 12.864235877990723, "learning_rate": 4.876313131313132e-06, "loss": 7.818812561035156, "step": 3450 }, { "epoch": 0.03455, "grad_norm": 13.872276306152344, "learning_rate": 4.876060606060606e-06, "loss": 7.742041778564453, "step": 3455 }, { "epoch": 0.0346, "grad_norm": 44.00494384765625, "learning_rate": 4.875808080808081e-06, "loss": 7.885556030273437, "step": 3460 }, { "epoch": 0.03465, "grad_norm": 11.197190284729004, "learning_rate": 4.875555555555556e-06, "loss": 7.77191162109375, "step": 3465 }, { "epoch": 0.0347, "grad_norm": 9.688435554504395, "learning_rate": 4.875303030303031e-06, "loss": 7.770354461669922, "step": 3470 }, { "epoch": 0.03475, "grad_norm": 13.284727096557617, "learning_rate": 4.875050505050506e-06, "loss": 7.782114410400391, "step": 3475 }, { "epoch": 0.0348, "grad_norm": 9.348957061767578, "learning_rate": 4.87479797979798e-06, "loss": 7.729949951171875, "step": 3480 }, { "epoch": 0.03485, "grad_norm": 15.270503044128418, "learning_rate": 4.874545454545455e-06, "loss": 7.761311340332031, "step": 3485 }, { "epoch": 0.0349, "grad_norm": 11.449048042297363, "learning_rate": 4.8742929292929296e-06, "loss": 7.716648101806641, "step": 3490 }, { "epoch": 0.03495, "grad_norm": 13.489399909973145, "learning_rate": 4.874040404040404e-06, "loss": 7.776431274414063, "step": 3495 }, { "epoch": 0.035, "grad_norm": 13.304274559020996, "learning_rate": 4.873787878787879e-06, "loss": 7.819698333740234, "step": 3500 }, { "epoch": 0.03505, "grad_norm": 8.34121036529541, "learning_rate": 4.873535353535354e-06, "loss": 7.81788330078125, "step": 3505 }, { "epoch": 0.0351, "grad_norm": 16.415550231933594, "learning_rate": 4.873282828282829e-06, "loss": 7.787786865234375, "step": 3510 }, { "epoch": 0.03515, "grad_norm": 16.252601623535156, "learning_rate": 4.8730303030303036e-06, "loss": 7.6178230285644535, "step": 3515 }, { "epoch": 0.0352, "grad_norm": 19.282560348510742, "learning_rate": 4.872777777777778e-06, "loss": 7.788005828857422, "step": 3520 }, { "epoch": 0.03525, "grad_norm": 9.406903266906738, "learning_rate": 4.872525252525253e-06, "loss": 7.751939392089843, "step": 3525 }, { "epoch": 0.0353, "grad_norm": 12.081809997558594, "learning_rate": 4.8722727272727274e-06, "loss": 7.71480712890625, "step": 3530 }, { "epoch": 0.03535, "grad_norm": 14.101256370544434, "learning_rate": 4.872020202020202e-06, "loss": 7.75115966796875, "step": 3535 }, { "epoch": 0.0354, "grad_norm": 12.560755729675293, "learning_rate": 4.871767676767677e-06, "loss": 7.680768585205078, "step": 3540 }, { "epoch": 0.03545, "grad_norm": 11.515758514404297, "learning_rate": 4.871515151515152e-06, "loss": 7.749300384521485, "step": 3545 }, { "epoch": 0.0355, "grad_norm": 14.9647798538208, "learning_rate": 4.871262626262627e-06, "loss": 7.757780456542969, "step": 3550 }, { "epoch": 0.03555, "grad_norm": 9.771180152893066, "learning_rate": 4.8710101010101014e-06, "loss": 7.757294464111328, "step": 3555 }, { "epoch": 0.0356, "grad_norm": 13.884908676147461, "learning_rate": 4.870757575757576e-06, "loss": 7.759504699707032, "step": 3560 }, { "epoch": 0.03565, "grad_norm": 12.535131454467773, "learning_rate": 4.870505050505051e-06, "loss": 7.782355499267578, "step": 3565 }, { "epoch": 0.0357, "grad_norm": 12.51498031616211, "learning_rate": 4.870252525252525e-06, "loss": 7.738442993164062, "step": 3570 }, { "epoch": 0.03575, "grad_norm": 15.00146770477295, "learning_rate": 4.87e-06, "loss": 7.756126403808594, "step": 3575 }, { "epoch": 0.0358, "grad_norm": 15.180660247802734, "learning_rate": 4.869747474747475e-06, "loss": 7.742086791992188, "step": 3580 }, { "epoch": 0.03585, "grad_norm": 8.391596794128418, "learning_rate": 4.86949494949495e-06, "loss": 7.743172454833984, "step": 3585 }, { "epoch": 0.0359, "grad_norm": 11.850814819335938, "learning_rate": 4.869242424242425e-06, "loss": 7.737158966064453, "step": 3590 }, { "epoch": 0.03595, "grad_norm": 5.815839767456055, "learning_rate": 4.868989898989899e-06, "loss": 7.836625671386718, "step": 3595 }, { "epoch": 0.036, "grad_norm": 11.614660263061523, "learning_rate": 4.868737373737374e-06, "loss": 7.730132293701172, "step": 3600 }, { "epoch": 0.03605, "grad_norm": 14.573387145996094, "learning_rate": 4.8684848484848494e-06, "loss": 7.728646087646484, "step": 3605 }, { "epoch": 0.0361, "grad_norm": 13.125894546508789, "learning_rate": 4.868232323232324e-06, "loss": 7.785981750488281, "step": 3610 }, { "epoch": 0.03615, "grad_norm": 14.153606414794922, "learning_rate": 4.867979797979798e-06, "loss": 7.760801696777344, "step": 3615 }, { "epoch": 0.0362, "grad_norm": 12.918556213378906, "learning_rate": 4.8677272727272725e-06, "loss": 7.75940170288086, "step": 3620 }, { "epoch": 0.03625, "grad_norm": 10.163634300231934, "learning_rate": 4.867474747474748e-06, "loss": 7.8027702331542965, "step": 3625 }, { "epoch": 0.0363, "grad_norm": 15.58565902709961, "learning_rate": 4.867222222222223e-06, "loss": 7.707752227783203, "step": 3630 }, { "epoch": 0.03635, "grad_norm": 13.527798652648926, "learning_rate": 4.866969696969697e-06, "loss": 7.739234924316406, "step": 3635 }, { "epoch": 0.0364, "grad_norm": 35.88656234741211, "learning_rate": 4.866717171717172e-06, "loss": 7.563335418701172, "step": 3640 }, { "epoch": 0.03645, "grad_norm": 10.77745532989502, "learning_rate": 4.866464646464647e-06, "loss": 7.723351287841797, "step": 3645 }, { "epoch": 0.0365, "grad_norm": 17.415369033813477, "learning_rate": 4.866212121212122e-06, "loss": 7.711228942871093, "step": 3650 }, { "epoch": 0.03655, "grad_norm": 12.007569313049316, "learning_rate": 4.865959595959597e-06, "loss": 7.737602996826172, "step": 3655 }, { "epoch": 0.0366, "grad_norm": 18.992454528808594, "learning_rate": 4.865707070707071e-06, "loss": 7.739046478271485, "step": 3660 }, { "epoch": 0.03665, "grad_norm": 13.618474006652832, "learning_rate": 4.865454545454546e-06, "loss": 7.712057495117188, "step": 3665 }, { "epoch": 0.0367, "grad_norm": 10.757424354553223, "learning_rate": 4.8652020202020205e-06, "loss": 7.73437728881836, "step": 3670 }, { "epoch": 0.03675, "grad_norm": 12.903244018554688, "learning_rate": 4.864949494949495e-06, "loss": 7.683928680419922, "step": 3675 }, { "epoch": 0.0368, "grad_norm": 12.843277931213379, "learning_rate": 4.864696969696971e-06, "loss": 7.620243835449219, "step": 3680 }, { "epoch": 0.03685, "grad_norm": 17.190006256103516, "learning_rate": 4.864444444444445e-06, "loss": 7.738597106933594, "step": 3685 }, { "epoch": 0.0369, "grad_norm": 13.910325050354004, "learning_rate": 4.86419191919192e-06, "loss": 7.7253364562988285, "step": 3690 }, { "epoch": 0.03695, "grad_norm": 13.186540603637695, "learning_rate": 4.8639393939393945e-06, "loss": 7.701548004150391, "step": 3695 }, { "epoch": 0.037, "grad_norm": 13.836938858032227, "learning_rate": 4.863686868686869e-06, "loss": 7.756971740722657, "step": 3700 }, { "epoch": 0.03705, "grad_norm": 18.493896484375, "learning_rate": 4.863434343434344e-06, "loss": 7.649756622314453, "step": 3705 }, { "epoch": 0.0371, "grad_norm": 16.073001861572266, "learning_rate": 4.863181818181818e-06, "loss": 7.735836029052734, "step": 3710 }, { "epoch": 0.03715, "grad_norm": 12.871485710144043, "learning_rate": 4.862929292929293e-06, "loss": 7.784506988525391, "step": 3715 }, { "epoch": 0.0372, "grad_norm": 13.114919662475586, "learning_rate": 4.8626767676767685e-06, "loss": 7.7187034606933596, "step": 3720 }, { "epoch": 0.03725, "grad_norm": 11.471478462219238, "learning_rate": 4.862424242424243e-06, "loss": 7.707196044921875, "step": 3725 }, { "epoch": 0.0373, "grad_norm": 14.614140510559082, "learning_rate": 4.862171717171718e-06, "loss": 7.726932525634766, "step": 3730 }, { "epoch": 0.03735, "grad_norm": 13.066116333007812, "learning_rate": 4.861919191919192e-06, "loss": 7.75953369140625, "step": 3735 }, { "epoch": 0.0374, "grad_norm": 16.491609573364258, "learning_rate": 4.861666666666667e-06, "loss": 7.751536560058594, "step": 3740 }, { "epoch": 0.03745, "grad_norm": 14.286236763000488, "learning_rate": 4.861414141414142e-06, "loss": 7.713172912597656, "step": 3745 }, { "epoch": 0.0375, "grad_norm": 8.426702499389648, "learning_rate": 4.861161616161616e-06, "loss": 7.841518402099609, "step": 3750 }, { "epoch": 0.03755, "grad_norm": 18.59311294555664, "learning_rate": 4.860909090909091e-06, "loss": 7.849253845214844, "step": 3755 }, { "epoch": 0.0376, "grad_norm": 13.200291633605957, "learning_rate": 4.860656565656566e-06, "loss": 7.809975433349609, "step": 3760 }, { "epoch": 0.03765, "grad_norm": 12.900741577148438, "learning_rate": 4.860404040404041e-06, "loss": 7.745358276367187, "step": 3765 }, { "epoch": 0.0377, "grad_norm": 14.837626457214355, "learning_rate": 4.860151515151516e-06, "loss": 7.759149169921875, "step": 3770 }, { "epoch": 0.03775, "grad_norm": 11.645899772644043, "learning_rate": 4.85989898989899e-06, "loss": 7.754624938964843, "step": 3775 }, { "epoch": 0.0378, "grad_norm": 15.362348556518555, "learning_rate": 4.859646464646465e-06, "loss": 7.73228759765625, "step": 3780 }, { "epoch": 0.03785, "grad_norm": 17.280366897583008, "learning_rate": 4.8593939393939395e-06, "loss": 7.691263580322266, "step": 3785 }, { "epoch": 0.0379, "grad_norm": 14.246164321899414, "learning_rate": 4.859141414141414e-06, "loss": 7.673373413085938, "step": 3790 }, { "epoch": 0.03795, "grad_norm": 14.515605926513672, "learning_rate": 4.858888888888889e-06, "loss": 7.734326934814453, "step": 3795 }, { "epoch": 0.038, "grad_norm": 12.578495025634766, "learning_rate": 4.858636363636364e-06, "loss": 7.653805541992187, "step": 3800 }, { "epoch": 0.03805, "grad_norm": 15.745588302612305, "learning_rate": 4.858383838383839e-06, "loss": 7.675619506835938, "step": 3805 }, { "epoch": 0.0381, "grad_norm": 12.859699249267578, "learning_rate": 4.8581313131313135e-06, "loss": 7.747759246826172, "step": 3810 }, { "epoch": 0.03815, "grad_norm": 16.589061737060547, "learning_rate": 4.857878787878788e-06, "loss": 7.6911369323730465, "step": 3815 }, { "epoch": 0.0382, "grad_norm": 11.842117309570312, "learning_rate": 4.857626262626264e-06, "loss": 7.714280700683593, "step": 3820 }, { "epoch": 0.03825, "grad_norm": 12.489605903625488, "learning_rate": 4.857373737373738e-06, "loss": 7.73913803100586, "step": 3825 }, { "epoch": 0.0383, "grad_norm": 16.863588333129883, "learning_rate": 4.857121212121213e-06, "loss": 7.719682312011718, "step": 3830 }, { "epoch": 0.03835, "grad_norm": 13.52422046661377, "learning_rate": 4.856868686868687e-06, "loss": 7.680511474609375, "step": 3835 }, { "epoch": 0.0384, "grad_norm": 10.945335388183594, "learning_rate": 4.856616161616162e-06, "loss": 7.664241790771484, "step": 3840 }, { "epoch": 0.03845, "grad_norm": 15.795232772827148, "learning_rate": 4.856363636363637e-06, "loss": 7.6729286193847654, "step": 3845 }, { "epoch": 0.0385, "grad_norm": 13.654508590698242, "learning_rate": 4.856111111111111e-06, "loss": 7.665700531005859, "step": 3850 }, { "epoch": 0.03855, "grad_norm": 14.343942642211914, "learning_rate": 4.855858585858586e-06, "loss": 7.692675018310547, "step": 3855 }, { "epoch": 0.0386, "grad_norm": 13.14936351776123, "learning_rate": 4.8556060606060615e-06, "loss": 7.710245513916016, "step": 3860 }, { "epoch": 0.03865, "grad_norm": 16.306766510009766, "learning_rate": 4.855353535353536e-06, "loss": 7.531985473632813, "step": 3865 }, { "epoch": 0.0387, "grad_norm": 13.651342391967773, "learning_rate": 4.855101010101011e-06, "loss": 7.626345825195313, "step": 3870 }, { "epoch": 0.03875, "grad_norm": 11.634018898010254, "learning_rate": 4.854848484848485e-06, "loss": 7.65216293334961, "step": 3875 }, { "epoch": 0.0388, "grad_norm": 17.27486228942871, "learning_rate": 4.85459595959596e-06, "loss": 7.7066902160644535, "step": 3880 }, { "epoch": 0.03885, "grad_norm": 11.476853370666504, "learning_rate": 4.854343434343435e-06, "loss": 7.699725341796875, "step": 3885 }, { "epoch": 0.0389, "grad_norm": 17.949369430541992, "learning_rate": 4.854090909090909e-06, "loss": 7.683647155761719, "step": 3890 }, { "epoch": 0.03895, "grad_norm": 13.202509880065918, "learning_rate": 4.853838383838384e-06, "loss": 7.888057708740234, "step": 3895 }, { "epoch": 0.039, "grad_norm": 12.673317909240723, "learning_rate": 4.853585858585859e-06, "loss": 7.70223388671875, "step": 3900 }, { "epoch": 0.03905, "grad_norm": 15.716659545898438, "learning_rate": 4.853333333333334e-06, "loss": 7.728702545166016, "step": 3905 }, { "epoch": 0.0391, "grad_norm": 13.93894100189209, "learning_rate": 4.853080808080809e-06, "loss": 7.722405242919922, "step": 3910 }, { "epoch": 0.03915, "grad_norm": 14.372413635253906, "learning_rate": 4.852828282828283e-06, "loss": 7.714093017578125, "step": 3915 }, { "epoch": 0.0392, "grad_norm": 12.8680419921875, "learning_rate": 4.852575757575758e-06, "loss": 7.681906890869141, "step": 3920 }, { "epoch": 0.03925, "grad_norm": 19.018709182739258, "learning_rate": 4.8523232323232325e-06, "loss": 7.779640197753906, "step": 3925 }, { "epoch": 0.0393, "grad_norm": 10.585620880126953, "learning_rate": 4.852070707070707e-06, "loss": 7.666333770751953, "step": 3930 }, { "epoch": 0.03935, "grad_norm": 16.859939575195312, "learning_rate": 4.851818181818182e-06, "loss": 7.710064697265625, "step": 3935 }, { "epoch": 0.0394, "grad_norm": 11.206755638122559, "learning_rate": 4.851565656565657e-06, "loss": 7.592276000976563, "step": 3940 }, { "epoch": 0.03945, "grad_norm": 18.729412078857422, "learning_rate": 4.851313131313132e-06, "loss": 7.3603462219238285, "step": 3945 }, { "epoch": 0.0395, "grad_norm": 19.423795700073242, "learning_rate": 4.8510606060606065e-06, "loss": 7.782301330566407, "step": 3950 }, { "epoch": 0.03955, "grad_norm": 13.597569465637207, "learning_rate": 4.850808080808081e-06, "loss": 7.724127197265625, "step": 3955 }, { "epoch": 0.0396, "grad_norm": 15.545955657958984, "learning_rate": 4.850555555555556e-06, "loss": 7.724192810058594, "step": 3960 }, { "epoch": 0.03965, "grad_norm": 16.692977905273438, "learning_rate": 4.8503030303030304e-06, "loss": 7.781207275390625, "step": 3965 }, { "epoch": 0.0397, "grad_norm": 12.277082443237305, "learning_rate": 4.850050505050505e-06, "loss": 7.832563781738282, "step": 3970 }, { "epoch": 0.03975, "grad_norm": 14.98580265045166, "learning_rate": 4.84979797979798e-06, "loss": 7.665301513671875, "step": 3975 }, { "epoch": 0.0398, "grad_norm": 15.455510139465332, "learning_rate": 4.849545454545455e-06, "loss": 7.700946807861328, "step": 3980 }, { "epoch": 0.03985, "grad_norm": 14.547542572021484, "learning_rate": 4.84929292929293e-06, "loss": 7.641242218017578, "step": 3985 }, { "epoch": 0.0399, "grad_norm": 14.083468437194824, "learning_rate": 4.849040404040404e-06, "loss": 7.6455078125, "step": 3990 }, { "epoch": 0.03995, "grad_norm": 14.966310501098633, "learning_rate": 4.848787878787879e-06, "loss": 7.675653076171875, "step": 3995 }, { "epoch": 0.04, "grad_norm": 17.443546295166016, "learning_rate": 4.848535353535354e-06, "loss": 7.711181640625, "step": 4000 }, { "epoch": 0.04005, "grad_norm": 19.981847763061523, "learning_rate": 4.848282828282828e-06, "loss": 7.633578491210938, "step": 4005 }, { "epoch": 0.0401, "grad_norm": 17.87622833251953, "learning_rate": 4.848030303030303e-06, "loss": 7.610812377929688, "step": 4010 }, { "epoch": 0.04015, "grad_norm": 13.932934761047363, "learning_rate": 4.8477777777777776e-06, "loss": 7.638751220703125, "step": 4015 }, { "epoch": 0.0402, "grad_norm": 16.35774803161621, "learning_rate": 4.847525252525253e-06, "loss": 7.602130889892578, "step": 4020 }, { "epoch": 0.04025, "grad_norm": 27.973905563354492, "learning_rate": 4.847272727272728e-06, "loss": 7.621735382080078, "step": 4025 }, { "epoch": 0.0403, "grad_norm": 19.40078353881836, "learning_rate": 4.847020202020202e-06, "loss": 7.620541381835937, "step": 4030 }, { "epoch": 0.04035, "grad_norm": 16.58570671081543, "learning_rate": 4.846767676767677e-06, "loss": 7.641993713378906, "step": 4035 }, { "epoch": 0.0404, "grad_norm": 13.258164405822754, "learning_rate": 4.846515151515152e-06, "loss": 7.647190093994141, "step": 4040 }, { "epoch": 0.04045, "grad_norm": 11.842677116394043, "learning_rate": 4.846262626262627e-06, "loss": 7.694181823730469, "step": 4045 }, { "epoch": 0.0405, "grad_norm": 13.986248016357422, "learning_rate": 4.846010101010102e-06, "loss": 7.566976928710938, "step": 4050 }, { "epoch": 0.04055, "grad_norm": 17.84900665283203, "learning_rate": 4.8457575757575755e-06, "loss": 7.714181518554687, "step": 4055 }, { "epoch": 0.0406, "grad_norm": 10.624317169189453, "learning_rate": 4.845505050505051e-06, "loss": 7.6819923400878904, "step": 4060 }, { "epoch": 0.04065, "grad_norm": 18.679351806640625, "learning_rate": 4.8452525252525256e-06, "loss": 7.698751068115234, "step": 4065 }, { "epoch": 0.0407, "grad_norm": 15.299365997314453, "learning_rate": 4.845e-06, "loss": 7.615534973144531, "step": 4070 }, { "epoch": 0.04075, "grad_norm": 14.55405044555664, "learning_rate": 4.844747474747476e-06, "loss": 7.635582733154297, "step": 4075 }, { "epoch": 0.0408, "grad_norm": 13.300416946411133, "learning_rate": 4.84449494949495e-06, "loss": 7.6629280090332035, "step": 4080 }, { "epoch": 0.04085, "grad_norm": 13.400941848754883, "learning_rate": 4.844242424242425e-06, "loss": 7.692926025390625, "step": 4085 }, { "epoch": 0.0409, "grad_norm": 14.231049537658691, "learning_rate": 4.8439898989898996e-06, "loss": 7.654347229003906, "step": 4090 }, { "epoch": 0.04095, "grad_norm": 12.940564155578613, "learning_rate": 4.843737373737374e-06, "loss": 7.706210327148438, "step": 4095 }, { "epoch": 0.041, "grad_norm": 16.55497169494629, "learning_rate": 4.843484848484849e-06, "loss": 7.6326957702636715, "step": 4100 }, { "epoch": 0.04105, "grad_norm": 13.479339599609375, "learning_rate": 4.8432323232323235e-06, "loss": 7.657125091552734, "step": 4105 }, { "epoch": 0.0411, "grad_norm": 13.092320442199707, "learning_rate": 4.842979797979798e-06, "loss": 7.640754699707031, "step": 4110 }, { "epoch": 0.04115, "grad_norm": 13.751302719116211, "learning_rate": 4.8427272727272736e-06, "loss": 7.686354064941407, "step": 4115 }, { "epoch": 0.0412, "grad_norm": 15.210939407348633, "learning_rate": 4.842474747474748e-06, "loss": 7.665440368652344, "step": 4120 }, { "epoch": 0.04125, "grad_norm": 16.909770965576172, "learning_rate": 4.842222222222223e-06, "loss": 7.6381683349609375, "step": 4125 }, { "epoch": 0.0413, "grad_norm": 12.866313934326172, "learning_rate": 4.8419696969696975e-06, "loss": 7.716291809082032, "step": 4130 }, { "epoch": 0.04135, "grad_norm": 19.399578094482422, "learning_rate": 4.841717171717172e-06, "loss": 7.6649169921875, "step": 4135 }, { "epoch": 0.0414, "grad_norm": 12.714970588684082, "learning_rate": 4.841464646464647e-06, "loss": 7.613687133789062, "step": 4140 }, { "epoch": 0.04145, "grad_norm": 14.162364959716797, "learning_rate": 4.841212121212121e-06, "loss": 7.625340270996094, "step": 4145 }, { "epoch": 0.0415, "grad_norm": 10.602949142456055, "learning_rate": 4.840959595959596e-06, "loss": 7.680445861816406, "step": 4150 }, { "epoch": 0.04155, "grad_norm": 18.514949798583984, "learning_rate": 4.8407070707070715e-06, "loss": 7.652581024169922, "step": 4155 }, { "epoch": 0.0416, "grad_norm": 15.988984107971191, "learning_rate": 4.840454545454546e-06, "loss": 7.637830352783203, "step": 4160 }, { "epoch": 0.04165, "grad_norm": 15.996387481689453, "learning_rate": 4.840202020202021e-06, "loss": 7.652818298339843, "step": 4165 }, { "epoch": 0.0417, "grad_norm": 15.495753288269043, "learning_rate": 4.839949494949495e-06, "loss": 7.515275573730468, "step": 4170 }, { "epoch": 0.04175, "grad_norm": 19.722932815551758, "learning_rate": 4.83969696969697e-06, "loss": 7.660208129882813, "step": 4175 }, { "epoch": 0.0418, "grad_norm": 37.776729583740234, "learning_rate": 4.839444444444445e-06, "loss": 7.7129676818847654, "step": 4180 }, { "epoch": 0.04185, "grad_norm": 16.460308074951172, "learning_rate": 4.839191919191919e-06, "loss": 7.772220611572266, "step": 4185 }, { "epoch": 0.0419, "grad_norm": 9.483863830566406, "learning_rate": 4.838939393939394e-06, "loss": 7.748939514160156, "step": 4190 }, { "epoch": 0.04195, "grad_norm": 15.430987358093262, "learning_rate": 4.838686868686869e-06, "loss": 7.662649536132813, "step": 4195 }, { "epoch": 0.042, "grad_norm": 14.180061340332031, "learning_rate": 4.838434343434344e-06, "loss": 7.616654968261718, "step": 4200 }, { "epoch": 0.04205, "grad_norm": 18.337146759033203, "learning_rate": 4.838181818181819e-06, "loss": 7.617402648925781, "step": 4205 }, { "epoch": 0.0421, "grad_norm": 16.109094619750977, "learning_rate": 4.837929292929293e-06, "loss": 7.69122314453125, "step": 4210 }, { "epoch": 0.04215, "grad_norm": 10.394975662231445, "learning_rate": 4.837676767676769e-06, "loss": 7.663023376464844, "step": 4215 }, { "epoch": 0.0422, "grad_norm": 16.000280380249023, "learning_rate": 4.8374242424242425e-06, "loss": 7.65257568359375, "step": 4220 }, { "epoch": 0.04225, "grad_norm": 18.61859893798828, "learning_rate": 4.837171717171717e-06, "loss": 7.626732635498047, "step": 4225 }, { "epoch": 0.0423, "grad_norm": 11.536083221435547, "learning_rate": 4.836919191919192e-06, "loss": 7.665022277832032, "step": 4230 }, { "epoch": 0.04235, "grad_norm": 41.57855987548828, "learning_rate": 4.836666666666667e-06, "loss": 7.699269104003906, "step": 4235 }, { "epoch": 0.0424, "grad_norm": 14.417675018310547, "learning_rate": 4.836414141414142e-06, "loss": 7.644993591308594, "step": 4240 }, { "epoch": 0.04245, "grad_norm": 12.666250228881836, "learning_rate": 4.8361616161616165e-06, "loss": 7.627330017089844, "step": 4245 }, { "epoch": 0.0425, "grad_norm": 19.791791915893555, "learning_rate": 4.835909090909091e-06, "loss": 7.640000915527343, "step": 4250 }, { "epoch": 0.04255, "grad_norm": 15.094975471496582, "learning_rate": 4.835656565656567e-06, "loss": 7.6606597900390625, "step": 4255 }, { "epoch": 0.0426, "grad_norm": 18.30975341796875, "learning_rate": 4.835404040404041e-06, "loss": 7.660453033447266, "step": 4260 }, { "epoch": 0.04265, "grad_norm": 11.49875545501709, "learning_rate": 4.835151515151516e-06, "loss": 7.6065528869628904, "step": 4265 }, { "epoch": 0.0427, "grad_norm": 15.26554012298584, "learning_rate": 4.8348989898989905e-06, "loss": 7.63875732421875, "step": 4270 }, { "epoch": 0.04275, "grad_norm": 14.745803833007812, "learning_rate": 4.834646464646465e-06, "loss": 7.5786598205566404, "step": 4275 }, { "epoch": 0.0428, "grad_norm": 13.778339385986328, "learning_rate": 4.83439393939394e-06, "loss": 7.577809143066406, "step": 4280 }, { "epoch": 0.04285, "grad_norm": 71.23065948486328, "learning_rate": 4.834141414141414e-06, "loss": 7.838609313964843, "step": 4285 }, { "epoch": 0.0429, "grad_norm": 15.611302375793457, "learning_rate": 4.833888888888889e-06, "loss": 7.669642639160156, "step": 4290 }, { "epoch": 0.04295, "grad_norm": 27.88692855834961, "learning_rate": 4.8336363636363645e-06, "loss": 7.691693878173828, "step": 4295 }, { "epoch": 0.043, "grad_norm": 13.574361801147461, "learning_rate": 4.833383838383839e-06, "loss": 7.574779510498047, "step": 4300 }, { "epoch": 0.04305, "grad_norm": 16.315078735351562, "learning_rate": 4.833131313131314e-06, "loss": 7.610124206542968, "step": 4305 }, { "epoch": 0.0431, "grad_norm": 17.865846633911133, "learning_rate": 4.832878787878788e-06, "loss": 7.666054534912109, "step": 4310 }, { "epoch": 0.04315, "grad_norm": 15.589762687683105, "learning_rate": 4.832626262626263e-06, "loss": 7.654141235351562, "step": 4315 }, { "epoch": 0.0432, "grad_norm": 14.384224891662598, "learning_rate": 4.832373737373738e-06, "loss": 7.657331085205078, "step": 4320 }, { "epoch": 0.04325, "grad_norm": 18.870365142822266, "learning_rate": 4.832121212121212e-06, "loss": 7.663608551025391, "step": 4325 }, { "epoch": 0.0433, "grad_norm": 13.699721336364746, "learning_rate": 4.831868686868687e-06, "loss": 7.61583251953125, "step": 4330 }, { "epoch": 0.04335, "grad_norm": 10.430669784545898, "learning_rate": 4.831616161616162e-06, "loss": 7.440367126464844, "step": 4335 }, { "epoch": 0.0434, "grad_norm": 17.84463119506836, "learning_rate": 4.831363636363637e-06, "loss": 7.6256263732910154, "step": 4340 }, { "epoch": 0.04345, "grad_norm": 18.664295196533203, "learning_rate": 4.831111111111112e-06, "loss": 7.5646514892578125, "step": 4345 }, { "epoch": 0.0435, "grad_norm": 14.913771629333496, "learning_rate": 4.830858585858586e-06, "loss": 7.592693328857422, "step": 4350 }, { "epoch": 0.04355, "grad_norm": 18.53111457824707, "learning_rate": 4.830606060606061e-06, "loss": 7.609026336669922, "step": 4355 }, { "epoch": 0.0436, "grad_norm": 14.158620834350586, "learning_rate": 4.8303535353535355e-06, "loss": 7.6582801818847654, "step": 4360 }, { "epoch": 0.04365, "grad_norm": 11.471774101257324, "learning_rate": 4.83010101010101e-06, "loss": 7.5953857421875, "step": 4365 }, { "epoch": 0.0437, "grad_norm": 15.537967681884766, "learning_rate": 4.829848484848485e-06, "loss": 7.632429504394532, "step": 4370 }, { "epoch": 0.04375, "grad_norm": 12.672298431396484, "learning_rate": 4.82959595959596e-06, "loss": 7.557473754882812, "step": 4375 }, { "epoch": 0.0438, "grad_norm": 12.74938678741455, "learning_rate": 4.829343434343435e-06, "loss": 7.6074066162109375, "step": 4380 }, { "epoch": 0.04385, "grad_norm": 22.59604835510254, "learning_rate": 4.8290909090909095e-06, "loss": 7.599050903320313, "step": 4385 }, { "epoch": 0.0439, "grad_norm": 14.229994773864746, "learning_rate": 4.828838383838384e-06, "loss": 7.580418395996094, "step": 4390 }, { "epoch": 0.04395, "grad_norm": 14.12797737121582, "learning_rate": 4.828585858585859e-06, "loss": 7.601800537109375, "step": 4395 }, { "epoch": 0.044, "grad_norm": 15.110177040100098, "learning_rate": 4.828333333333333e-06, "loss": 7.57899169921875, "step": 4400 }, { "epoch": 0.04405, "grad_norm": 14.646195411682129, "learning_rate": 4.828080808080808e-06, "loss": 7.601806640625, "step": 4405 }, { "epoch": 0.0441, "grad_norm": 15.992776870727539, "learning_rate": 4.827828282828283e-06, "loss": 7.629190826416016, "step": 4410 }, { "epoch": 0.04415, "grad_norm": 15.082976341247559, "learning_rate": 4.827575757575758e-06, "loss": 7.578925323486328, "step": 4415 }, { "epoch": 0.0442, "grad_norm": 16.59264373779297, "learning_rate": 4.827323232323233e-06, "loss": 7.598355865478515, "step": 4420 }, { "epoch": 0.04425, "grad_norm": 17.09652328491211, "learning_rate": 4.827070707070707e-06, "loss": 7.630061340332031, "step": 4425 }, { "epoch": 0.0443, "grad_norm": 10.543344497680664, "learning_rate": 4.826818181818182e-06, "loss": 7.549812316894531, "step": 4430 }, { "epoch": 0.04435, "grad_norm": 15.059334754943848, "learning_rate": 4.8265656565656575e-06, "loss": 7.591337585449219, "step": 4435 }, { "epoch": 0.0444, "grad_norm": 42.26758575439453, "learning_rate": 4.826313131313132e-06, "loss": 7.545437622070312, "step": 4440 }, { "epoch": 0.04445, "grad_norm": 21.28137969970703, "learning_rate": 4.826060606060606e-06, "loss": 7.5824432373046875, "step": 4445 }, { "epoch": 0.0445, "grad_norm": 18.189760208129883, "learning_rate": 4.8258080808080806e-06, "loss": 7.641652679443359, "step": 4450 }, { "epoch": 0.04455, "grad_norm": 13.544581413269043, "learning_rate": 4.825555555555556e-06, "loss": 7.615351867675781, "step": 4455 }, { "epoch": 0.0446, "grad_norm": 18.1938533782959, "learning_rate": 4.825303030303031e-06, "loss": 7.605780029296875, "step": 4460 }, { "epoch": 0.04465, "grad_norm": 11.93684196472168, "learning_rate": 4.825050505050505e-06, "loss": 7.622013854980469, "step": 4465 }, { "epoch": 0.0447, "grad_norm": 15.508646965026855, "learning_rate": 4.82479797979798e-06, "loss": 7.598514556884766, "step": 4470 }, { "epoch": 0.04475, "grad_norm": 9.869802474975586, "learning_rate": 4.824545454545455e-06, "loss": 7.584848022460937, "step": 4475 }, { "epoch": 0.0448, "grad_norm": 21.632083892822266, "learning_rate": 4.82429292929293e-06, "loss": 7.568325805664062, "step": 4480 }, { "epoch": 0.04485, "grad_norm": 14.175422668457031, "learning_rate": 4.824040404040405e-06, "loss": 7.59146728515625, "step": 4485 }, { "epoch": 0.0449, "grad_norm": 17.476022720336914, "learning_rate": 4.823787878787879e-06, "loss": 7.551641845703125, "step": 4490 }, { "epoch": 0.04495, "grad_norm": 14.810221672058105, "learning_rate": 4.823535353535354e-06, "loss": 7.574581146240234, "step": 4495 }, { "epoch": 0.045, "grad_norm": 13.696442604064941, "learning_rate": 4.8232828282828286e-06, "loss": 7.586608123779297, "step": 4500 }, { "epoch": 0.04505, "grad_norm": 19.58176040649414, "learning_rate": 4.823030303030303e-06, "loss": 7.5743247985839846, "step": 4505 }, { "epoch": 0.0451, "grad_norm": 34.318687438964844, "learning_rate": 4.822777777777779e-06, "loss": 7.771149444580078, "step": 4510 }, { "epoch": 0.04515, "grad_norm": 11.742945671081543, "learning_rate": 4.822525252525253e-06, "loss": 7.63128890991211, "step": 4515 }, { "epoch": 0.0452, "grad_norm": 15.364375114440918, "learning_rate": 4.822272727272728e-06, "loss": 7.526148986816406, "step": 4520 }, { "epoch": 0.04525, "grad_norm": 15.04275131225586, "learning_rate": 4.8220202020202026e-06, "loss": 7.563510894775391, "step": 4525 }, { "epoch": 0.0453, "grad_norm": 18.64378547668457, "learning_rate": 4.821767676767677e-06, "loss": 7.586882781982422, "step": 4530 }, { "epoch": 0.04535, "grad_norm": 15.710166931152344, "learning_rate": 4.821515151515152e-06, "loss": 7.612484741210937, "step": 4535 }, { "epoch": 0.0454, "grad_norm": 16.124164581298828, "learning_rate": 4.8212626262626264e-06, "loss": 7.580674743652343, "step": 4540 }, { "epoch": 0.04545, "grad_norm": 12.570262908935547, "learning_rate": 4.821010101010101e-06, "loss": 7.37879638671875, "step": 4545 }, { "epoch": 0.0455, "grad_norm": 11.539961814880371, "learning_rate": 4.8207575757575765e-06, "loss": 7.5137886047363285, "step": 4550 }, { "epoch": 0.04555, "grad_norm": 18.446704864501953, "learning_rate": 4.820505050505051e-06, "loss": 7.493421936035157, "step": 4555 }, { "epoch": 0.0456, "grad_norm": 21.907268524169922, "learning_rate": 4.820252525252526e-06, "loss": 7.589166259765625, "step": 4560 }, { "epoch": 0.04565, "grad_norm": 17.77687644958496, "learning_rate": 4.8200000000000004e-06, "loss": 7.596076965332031, "step": 4565 }, { "epoch": 0.0457, "grad_norm": 19.436031341552734, "learning_rate": 4.819747474747475e-06, "loss": 7.584585571289063, "step": 4570 }, { "epoch": 0.04575, "grad_norm": 19.615520477294922, "learning_rate": 4.81949494949495e-06, "loss": 7.550484466552734, "step": 4575 }, { "epoch": 0.0458, "grad_norm": 13.078214645385742, "learning_rate": 4.819242424242424e-06, "loss": 7.630445098876953, "step": 4580 }, { "epoch": 0.04585, "grad_norm": 17.013944625854492, "learning_rate": 4.818989898989899e-06, "loss": 7.588507843017578, "step": 4585 }, { "epoch": 0.0459, "grad_norm": 16.1025333404541, "learning_rate": 4.8187373737373744e-06, "loss": 7.583362579345703, "step": 4590 }, { "epoch": 0.04595, "grad_norm": 16.102659225463867, "learning_rate": 4.818484848484849e-06, "loss": 7.343174743652344, "step": 4595 }, { "epoch": 0.046, "grad_norm": 15.440755844116211, "learning_rate": 4.818232323232324e-06, "loss": 7.604143524169922, "step": 4600 }, { "epoch": 0.04605, "grad_norm": 16.040618896484375, "learning_rate": 4.817979797979798e-06, "loss": 7.608893585205078, "step": 4605 }, { "epoch": 0.0461, "grad_norm": 20.063100814819336, "learning_rate": 4.817727272727273e-06, "loss": 7.608234405517578, "step": 4610 }, { "epoch": 0.04615, "grad_norm": 14.415709495544434, "learning_rate": 4.817474747474748e-06, "loss": 7.62647705078125, "step": 4615 }, { "epoch": 0.0462, "grad_norm": 16.893007278442383, "learning_rate": 4.817222222222222e-06, "loss": 7.648519134521484, "step": 4620 }, { "epoch": 0.04625, "grad_norm": 9.975411415100098, "learning_rate": 4.816969696969697e-06, "loss": 7.553386688232422, "step": 4625 }, { "epoch": 0.0463, "grad_norm": 14.31112289428711, "learning_rate": 4.816717171717172e-06, "loss": 7.556731414794922, "step": 4630 }, { "epoch": 0.04635, "grad_norm": 16.138113021850586, "learning_rate": 4.816464646464647e-06, "loss": 7.578101348876953, "step": 4635 }, { "epoch": 0.0464, "grad_norm": 12.48101806640625, "learning_rate": 4.816212121212122e-06, "loss": 7.721635437011718, "step": 4640 }, { "epoch": 0.04645, "grad_norm": 18.264278411865234, "learning_rate": 4.815959595959596e-06, "loss": 7.547030639648438, "step": 4645 }, { "epoch": 0.0465, "grad_norm": 14.309183120727539, "learning_rate": 4.815707070707072e-06, "loss": 7.534114074707031, "step": 4650 }, { "epoch": 0.04655, "grad_norm": 22.08705711364746, "learning_rate": 4.815454545454546e-06, "loss": 7.593316650390625, "step": 4655 }, { "epoch": 0.0466, "grad_norm": 18.096111297607422, "learning_rate": 4.815202020202021e-06, "loss": 7.591447448730468, "step": 4660 }, { "epoch": 0.04665, "grad_norm": 16.11399269104004, "learning_rate": 4.814949494949495e-06, "loss": 7.489380645751953, "step": 4665 }, { "epoch": 0.0467, "grad_norm": 14.18309497833252, "learning_rate": 4.81469696969697e-06, "loss": 7.5733489990234375, "step": 4670 }, { "epoch": 0.04675, "grad_norm": 17.680999755859375, "learning_rate": 4.814444444444445e-06, "loss": 7.596977233886719, "step": 4675 }, { "epoch": 0.0468, "grad_norm": 15.915877342224121, "learning_rate": 4.8141919191919195e-06, "loss": 7.519697570800782, "step": 4680 }, { "epoch": 0.04685, "grad_norm": 19.06841278076172, "learning_rate": 4.813939393939394e-06, "loss": 7.534217834472656, "step": 4685 }, { "epoch": 0.0469, "grad_norm": 17.933359146118164, "learning_rate": 4.81368686868687e-06, "loss": 7.566926574707031, "step": 4690 }, { "epoch": 0.04695, "grad_norm": 13.312339782714844, "learning_rate": 4.813434343434344e-06, "loss": 7.5886085510253904, "step": 4695 }, { "epoch": 0.047, "grad_norm": 20.210153579711914, "learning_rate": 4.813181818181819e-06, "loss": 7.641795349121094, "step": 4700 }, { "epoch": 0.04705, "grad_norm": 14.58133316040039, "learning_rate": 4.8129292929292935e-06, "loss": 7.751400756835937, "step": 4705 }, { "epoch": 0.0471, "grad_norm": 22.124670028686523, "learning_rate": 4.812676767676768e-06, "loss": 7.57174072265625, "step": 4710 }, { "epoch": 0.04715, "grad_norm": 15.9932222366333, "learning_rate": 4.812424242424243e-06, "loss": 7.5612060546875, "step": 4715 }, { "epoch": 0.0472, "grad_norm": 14.722535133361816, "learning_rate": 4.812171717171717e-06, "loss": 7.5255584716796875, "step": 4720 }, { "epoch": 0.04725, "grad_norm": 15.170860290527344, "learning_rate": 4.811919191919192e-06, "loss": 7.5550689697265625, "step": 4725 }, { "epoch": 0.0473, "grad_norm": 20.454242706298828, "learning_rate": 4.8116666666666675e-06, "loss": 7.587982940673828, "step": 4730 }, { "epoch": 0.04735, "grad_norm": 18.162342071533203, "learning_rate": 4.811414141414142e-06, "loss": 7.589356231689453, "step": 4735 }, { "epoch": 0.0474, "grad_norm": 17.458749771118164, "learning_rate": 4.811161616161617e-06, "loss": 7.7700035095214846, "step": 4740 }, { "epoch": 0.04745, "grad_norm": 15.818519592285156, "learning_rate": 4.810909090909091e-06, "loss": 7.659767150878906, "step": 4745 }, { "epoch": 0.0475, "grad_norm": 11.826310157775879, "learning_rate": 4.810656565656566e-06, "loss": 7.645790100097656, "step": 4750 }, { "epoch": 0.04755, "grad_norm": 16.453763961791992, "learning_rate": 4.810404040404041e-06, "loss": 7.629660797119141, "step": 4755 }, { "epoch": 0.0476, "grad_norm": 12.051210403442383, "learning_rate": 4.810151515151515e-06, "loss": 7.539124298095703, "step": 4760 }, { "epoch": 0.04765, "grad_norm": 21.523422241210938, "learning_rate": 4.80989898989899e-06, "loss": 7.548561859130859, "step": 4765 }, { "epoch": 0.0477, "grad_norm": 19.74825668334961, "learning_rate": 4.809646464646465e-06, "loss": 7.584028625488282, "step": 4770 }, { "epoch": 0.04775, "grad_norm": 12.196378707885742, "learning_rate": 4.80939393939394e-06, "loss": 7.508290863037109, "step": 4775 }, { "epoch": 0.0478, "grad_norm": 17.529130935668945, "learning_rate": 4.809141414141415e-06, "loss": 7.550864410400391, "step": 4780 }, { "epoch": 0.04785, "grad_norm": 15.10629940032959, "learning_rate": 4.808888888888889e-06, "loss": 7.556507873535156, "step": 4785 }, { "epoch": 0.0479, "grad_norm": 22.792617797851562, "learning_rate": 4.808636363636364e-06, "loss": 7.595049285888672, "step": 4790 }, { "epoch": 0.04795, "grad_norm": 21.179412841796875, "learning_rate": 4.8083838383838385e-06, "loss": 7.605289459228516, "step": 4795 }, { "epoch": 0.048, "grad_norm": 16.074243545532227, "learning_rate": 4.808131313131313e-06, "loss": 7.588247680664063, "step": 4800 }, { "epoch": 0.04805, "grad_norm": 17.73287010192871, "learning_rate": 4.807878787878788e-06, "loss": 7.501219177246094, "step": 4805 }, { "epoch": 0.0481, "grad_norm": 18.561107635498047, "learning_rate": 4.807626262626263e-06, "loss": 7.586344909667969, "step": 4810 }, { "epoch": 0.04815, "grad_norm": 25.415470123291016, "learning_rate": 4.807373737373738e-06, "loss": 7.525871276855469, "step": 4815 }, { "epoch": 0.0482, "grad_norm": 20.804353713989258, "learning_rate": 4.8071212121212125e-06, "loss": 7.51147232055664, "step": 4820 }, { "epoch": 0.04825, "grad_norm": 13.60634708404541, "learning_rate": 4.806868686868687e-06, "loss": 7.527156829833984, "step": 4825 }, { "epoch": 0.0483, "grad_norm": 21.619220733642578, "learning_rate": 4.806616161616162e-06, "loss": 7.626271057128906, "step": 4830 }, { "epoch": 0.04835, "grad_norm": 16.1898136138916, "learning_rate": 4.806363636363636e-06, "loss": 7.527126312255859, "step": 4835 }, { "epoch": 0.0484, "grad_norm": 15.244047164916992, "learning_rate": 4.806111111111111e-06, "loss": 7.544068145751953, "step": 4840 }, { "epoch": 0.04845, "grad_norm": 18.65515899658203, "learning_rate": 4.805858585858586e-06, "loss": 7.53946533203125, "step": 4845 }, { "epoch": 0.0485, "grad_norm": 22.728239059448242, "learning_rate": 4.805606060606061e-06, "loss": 7.54278793334961, "step": 4850 }, { "epoch": 0.04855, "grad_norm": 16.173503875732422, "learning_rate": 4.805353535353536e-06, "loss": 7.519659423828125, "step": 4855 }, { "epoch": 0.0486, "grad_norm": 21.88563346862793, "learning_rate": 4.80510101010101e-06, "loss": 7.544681549072266, "step": 4860 }, { "epoch": 0.04865, "grad_norm": 17.139995574951172, "learning_rate": 4.804848484848485e-06, "loss": 7.562866973876953, "step": 4865 }, { "epoch": 0.0487, "grad_norm": 13.342567443847656, "learning_rate": 4.8045959595959605e-06, "loss": 7.545689392089844, "step": 4870 }, { "epoch": 0.04875, "grad_norm": 19.979005813598633, "learning_rate": 4.804343434343435e-06, "loss": 7.578728485107422, "step": 4875 }, { "epoch": 0.0488, "grad_norm": 19.63646697998047, "learning_rate": 4.80409090909091e-06, "loss": 7.698130035400391, "step": 4880 }, { "epoch": 0.04885, "grad_norm": 15.644451141357422, "learning_rate": 4.803838383838384e-06, "loss": 7.596628570556641, "step": 4885 }, { "epoch": 0.0489, "grad_norm": 16.400794982910156, "learning_rate": 4.803585858585859e-06, "loss": 7.576779174804687, "step": 4890 }, { "epoch": 0.04895, "grad_norm": 17.221128463745117, "learning_rate": 4.803333333333334e-06, "loss": 7.566455841064453, "step": 4895 }, { "epoch": 0.049, "grad_norm": 18.944021224975586, "learning_rate": 4.803080808080808e-06, "loss": 7.49578857421875, "step": 4900 }, { "epoch": 0.04905, "grad_norm": 17.878643035888672, "learning_rate": 4.802828282828283e-06, "loss": 7.499090576171875, "step": 4905 }, { "epoch": 0.0491, "grad_norm": 27.95353889465332, "learning_rate": 4.802575757575758e-06, "loss": 7.5913749694824215, "step": 4910 }, { "epoch": 0.04915, "grad_norm": 14.443374633789062, "learning_rate": 4.802323232323233e-06, "loss": 7.548728179931641, "step": 4915 }, { "epoch": 0.0492, "grad_norm": 15.017359733581543, "learning_rate": 4.802070707070708e-06, "loss": 7.561008453369141, "step": 4920 }, { "epoch": 0.04925, "grad_norm": 23.801401138305664, "learning_rate": 4.801818181818182e-06, "loss": 7.655378723144532, "step": 4925 }, { "epoch": 0.0493, "grad_norm": 15.707382202148438, "learning_rate": 4.801565656565657e-06, "loss": 7.516139984130859, "step": 4930 }, { "epoch": 0.04935, "grad_norm": 21.225231170654297, "learning_rate": 4.8013131313131315e-06, "loss": 7.549354553222656, "step": 4935 }, { "epoch": 0.0494, "grad_norm": 13.392284393310547, "learning_rate": 4.801060606060606e-06, "loss": 7.507106781005859, "step": 4940 }, { "epoch": 0.04945, "grad_norm": 21.269744873046875, "learning_rate": 4.800808080808082e-06, "loss": 7.503388977050781, "step": 4945 }, { "epoch": 0.0495, "grad_norm": 12.102234840393066, "learning_rate": 4.800555555555556e-06, "loss": 7.577073669433593, "step": 4950 }, { "epoch": 0.04955, "grad_norm": 14.913641929626465, "learning_rate": 4.800303030303031e-06, "loss": 7.5271141052246096, "step": 4955 }, { "epoch": 0.0496, "grad_norm": 15.583343505859375, "learning_rate": 4.8000505050505055e-06, "loss": 7.509233093261718, "step": 4960 }, { "epoch": 0.04965, "grad_norm": 18.66119956970215, "learning_rate": 4.79979797979798e-06, "loss": 7.359549713134766, "step": 4965 }, { "epoch": 0.0497, "grad_norm": 12.998414993286133, "learning_rate": 4.799545454545455e-06, "loss": 7.490478515625, "step": 4970 }, { "epoch": 0.04975, "grad_norm": 20.66922950744629, "learning_rate": 4.7992929292929294e-06, "loss": 7.518321990966797, "step": 4975 }, { "epoch": 0.0498, "grad_norm": 17.732738494873047, "learning_rate": 4.799040404040404e-06, "loss": 7.524884796142578, "step": 4980 }, { "epoch": 0.04985, "grad_norm": 14.715062141418457, "learning_rate": 4.7987878787878795e-06, "loss": 7.502314758300781, "step": 4985 }, { "epoch": 0.0499, "grad_norm": 18.176855087280273, "learning_rate": 4.798535353535354e-06, "loss": 7.567788696289062, "step": 4990 }, { "epoch": 0.04995, "grad_norm": 17.261615753173828, "learning_rate": 4.798282828282829e-06, "loss": 7.556626892089843, "step": 4995 }, { "epoch": 0.05, "grad_norm": 14.33521556854248, "learning_rate": 4.798030303030303e-06, "loss": 7.559138488769531, "step": 5000 }, { "epoch": 0.05005, "grad_norm": 26.54514503479004, "learning_rate": 4.797777777777778e-06, "loss": 7.518259429931641, "step": 5005 }, { "epoch": 0.0501, "grad_norm": 13.362397193908691, "learning_rate": 4.797525252525253e-06, "loss": 7.508517456054688, "step": 5010 }, { "epoch": 0.05015, "grad_norm": 15.099980354309082, "learning_rate": 4.797272727272727e-06, "loss": 7.548574066162109, "step": 5015 }, { "epoch": 0.0502, "grad_norm": 16.685609817504883, "learning_rate": 4.797020202020202e-06, "loss": 7.466999053955078, "step": 5020 }, { "epoch": 0.05025, "grad_norm": 13.186056137084961, "learning_rate": 4.796767676767677e-06, "loss": 7.510874176025391, "step": 5025 }, { "epoch": 0.0503, "grad_norm": 20.400978088378906, "learning_rate": 4.796515151515152e-06, "loss": 7.572563934326172, "step": 5030 }, { "epoch": 0.05035, "grad_norm": 14.745682716369629, "learning_rate": 4.796262626262627e-06, "loss": 7.496928405761719, "step": 5035 }, { "epoch": 0.0504, "grad_norm": 30.0478515625, "learning_rate": 4.796010101010101e-06, "loss": 7.5281219482421875, "step": 5040 }, { "epoch": 0.05045, "grad_norm": 16.133399963378906, "learning_rate": 4.795757575757577e-06, "loss": 7.3615776062011715, "step": 5045 }, { "epoch": 0.0505, "grad_norm": 18.25689125061035, "learning_rate": 4.795505050505051e-06, "loss": 7.5327201843261715, "step": 5050 }, { "epoch": 0.05055, "grad_norm": 19.379413604736328, "learning_rate": 4.795252525252525e-06, "loss": 7.5313232421875, "step": 5055 }, { "epoch": 0.0506, "grad_norm": 23.21569061279297, "learning_rate": 4.795e-06, "loss": 7.495737457275391, "step": 5060 }, { "epoch": 0.05065, "grad_norm": 12.954337120056152, "learning_rate": 4.794747474747475e-06, "loss": 7.507353210449219, "step": 5065 }, { "epoch": 0.0507, "grad_norm": 16.900930404663086, "learning_rate": 4.79449494949495e-06, "loss": 7.504391479492187, "step": 5070 }, { "epoch": 0.05075, "grad_norm": 20.350893020629883, "learning_rate": 4.7942424242424246e-06, "loss": 7.526070404052734, "step": 5075 }, { "epoch": 0.0508, "grad_norm": 11.945466995239258, "learning_rate": 4.793989898989899e-06, "loss": 7.475864410400391, "step": 5080 }, { "epoch": 0.05085, "grad_norm": 20.322402954101562, "learning_rate": 4.793737373737375e-06, "loss": 7.530809783935547, "step": 5085 }, { "epoch": 0.0509, "grad_norm": 14.522372245788574, "learning_rate": 4.793484848484849e-06, "loss": 7.545920562744141, "step": 5090 }, { "epoch": 0.05095, "grad_norm": 18.42091178894043, "learning_rate": 4.793232323232324e-06, "loss": 7.466795349121094, "step": 5095 }, { "epoch": 0.051, "grad_norm": 18.778827667236328, "learning_rate": 4.7929797979797986e-06, "loss": 7.5226287841796875, "step": 5100 }, { "epoch": 0.05105, "grad_norm": 16.349294662475586, "learning_rate": 4.792727272727273e-06, "loss": 7.515128326416016, "step": 5105 }, { "epoch": 0.0511, "grad_norm": 13.0361967086792, "learning_rate": 4.792474747474748e-06, "loss": 7.5631591796875, "step": 5110 }, { "epoch": 0.05115, "grad_norm": 18.514728546142578, "learning_rate": 4.7922222222222225e-06, "loss": 7.484203338623047, "step": 5115 }, { "epoch": 0.0512, "grad_norm": 17.603557586669922, "learning_rate": 4.791969696969697e-06, "loss": 7.4693450927734375, "step": 5120 }, { "epoch": 0.05125, "grad_norm": 17.964557647705078, "learning_rate": 4.7917171717171726e-06, "loss": 7.538607788085938, "step": 5125 }, { "epoch": 0.0513, "grad_norm": 15.679680824279785, "learning_rate": 4.791464646464647e-06, "loss": 7.353537750244141, "step": 5130 }, { "epoch": 0.05135, "grad_norm": 13.421891212463379, "learning_rate": 4.791212121212122e-06, "loss": 7.4834739685058596, "step": 5135 }, { "epoch": 0.0514, "grad_norm": 20.7985897064209, "learning_rate": 4.7909595959595965e-06, "loss": 7.527686309814453, "step": 5140 }, { "epoch": 0.05145, "grad_norm": 18.19741439819336, "learning_rate": 4.790707070707071e-06, "loss": 7.496607971191406, "step": 5145 }, { "epoch": 0.0515, "grad_norm": 13.317227363586426, "learning_rate": 4.790454545454546e-06, "loss": 7.431857299804688, "step": 5150 }, { "epoch": 0.05155, "grad_norm": 18.611129760742188, "learning_rate": 4.79020202020202e-06, "loss": 7.542837524414063, "step": 5155 }, { "epoch": 0.0516, "grad_norm": 16.28496742248535, "learning_rate": 4.789949494949495e-06, "loss": 7.477743530273438, "step": 5160 }, { "epoch": 0.05165, "grad_norm": 13.240856170654297, "learning_rate": 4.7896969696969705e-06, "loss": 7.575761413574218, "step": 5165 }, { "epoch": 0.0517, "grad_norm": 25.75421905517578, "learning_rate": 4.789444444444445e-06, "loss": 7.474327087402344, "step": 5170 }, { "epoch": 0.05175, "grad_norm": 49.50589370727539, "learning_rate": 4.78919191919192e-06, "loss": 7.590592956542968, "step": 5175 }, { "epoch": 0.0518, "grad_norm": 16.417898178100586, "learning_rate": 4.788939393939394e-06, "loss": 7.5394752502441404, "step": 5180 }, { "epoch": 0.05185, "grad_norm": 15.303547859191895, "learning_rate": 4.788686868686869e-06, "loss": 7.531949615478515, "step": 5185 }, { "epoch": 0.0519, "grad_norm": 15.834310531616211, "learning_rate": 4.788434343434344e-06, "loss": 7.516173553466797, "step": 5190 }, { "epoch": 0.05195, "grad_norm": 23.687057495117188, "learning_rate": 4.788181818181818e-06, "loss": 7.565447998046875, "step": 5195 }, { "epoch": 0.052, "grad_norm": 17.73265266418457, "learning_rate": 4.787929292929293e-06, "loss": 7.516236877441406, "step": 5200 }, { "epoch": 0.05205, "grad_norm": 21.97943115234375, "learning_rate": 4.787676767676768e-06, "loss": 7.490366363525391, "step": 5205 }, { "epoch": 0.0521, "grad_norm": 14.607939720153809, "learning_rate": 4.787424242424243e-06, "loss": 7.483009338378906, "step": 5210 }, { "epoch": 0.05215, "grad_norm": 15.756275177001953, "learning_rate": 4.787171717171718e-06, "loss": 7.5189666748046875, "step": 5215 }, { "epoch": 0.0522, "grad_norm": 15.3902006149292, "learning_rate": 4.786919191919192e-06, "loss": 7.52350082397461, "step": 5220 }, { "epoch": 0.05225, "grad_norm": 16.54079246520996, "learning_rate": 4.786666666666667e-06, "loss": 7.476360321044922, "step": 5225 }, { "epoch": 0.0523, "grad_norm": 17.062969207763672, "learning_rate": 4.7864141414141415e-06, "loss": 7.478168487548828, "step": 5230 }, { "epoch": 0.05235, "grad_norm": 15.629810333251953, "learning_rate": 4.786161616161616e-06, "loss": 7.500523376464844, "step": 5235 }, { "epoch": 0.0524, "grad_norm": 18.422964096069336, "learning_rate": 4.785909090909091e-06, "loss": 7.442372894287109, "step": 5240 }, { "epoch": 0.05245, "grad_norm": 13.538203239440918, "learning_rate": 4.785656565656566e-06, "loss": 7.482205963134765, "step": 5245 }, { "epoch": 0.0525, "grad_norm": 16.7735652923584, "learning_rate": 4.785404040404041e-06, "loss": 7.457496643066406, "step": 5250 }, { "epoch": 0.05255, "grad_norm": 16.305166244506836, "learning_rate": 4.7851515151515155e-06, "loss": 7.4603736877441404, "step": 5255 }, { "epoch": 0.0526, "grad_norm": 18.291223526000977, "learning_rate": 4.78489898989899e-06, "loss": 7.552133178710937, "step": 5260 }, { "epoch": 0.05265, "grad_norm": 21.955812454223633, "learning_rate": 4.784646464646466e-06, "loss": 7.497503662109375, "step": 5265 }, { "epoch": 0.0527, "grad_norm": 17.777254104614258, "learning_rate": 4.78439393939394e-06, "loss": 7.460443115234375, "step": 5270 }, { "epoch": 0.05275, "grad_norm": 13.909947395324707, "learning_rate": 4.784141414141414e-06, "loss": 7.6993156433105465, "step": 5275 }, { "epoch": 0.0528, "grad_norm": 20.330089569091797, "learning_rate": 4.783888888888889e-06, "loss": 7.191036987304687, "step": 5280 }, { "epoch": 0.05285, "grad_norm": 20.374126434326172, "learning_rate": 4.783636363636364e-06, "loss": 7.568328857421875, "step": 5285 }, { "epoch": 0.0529, "grad_norm": 49.577213287353516, "learning_rate": 4.783383838383839e-06, "loss": 8.022931671142578, "step": 5290 }, { "epoch": 0.05295, "grad_norm": 18.86116600036621, "learning_rate": 4.783131313131313e-06, "loss": 7.488610076904297, "step": 5295 }, { "epoch": 0.053, "grad_norm": 23.066532135009766, "learning_rate": 4.782878787878788e-06, "loss": 7.5725250244140625, "step": 5300 }, { "epoch": 0.05305, "grad_norm": 16.35528564453125, "learning_rate": 4.7826262626262635e-06, "loss": 7.4015350341796875, "step": 5305 }, { "epoch": 0.0531, "grad_norm": 22.492420196533203, "learning_rate": 4.782373737373738e-06, "loss": 7.523082733154297, "step": 5310 }, { "epoch": 0.05315, "grad_norm": 17.719839096069336, "learning_rate": 4.782121212121213e-06, "loss": 7.547672271728516, "step": 5315 }, { "epoch": 0.0532, "grad_norm": 18.264446258544922, "learning_rate": 4.781868686868687e-06, "loss": 7.47720947265625, "step": 5320 }, { "epoch": 0.05325, "grad_norm": 13.545428276062012, "learning_rate": 4.781616161616162e-06, "loss": 7.468904113769531, "step": 5325 }, { "epoch": 0.0533, "grad_norm": 18.139869689941406, "learning_rate": 4.781363636363637e-06, "loss": 7.513977813720703, "step": 5330 }, { "epoch": 0.05335, "grad_norm": 33.880428314208984, "learning_rate": 4.781111111111111e-06, "loss": 7.5272064208984375, "step": 5335 }, { "epoch": 0.0534, "grad_norm": 23.774633407592773, "learning_rate": 4.780858585858586e-06, "loss": 7.473148345947266, "step": 5340 }, { "epoch": 0.05345, "grad_norm": 20.118289947509766, "learning_rate": 4.780606060606061e-06, "loss": 7.478847503662109, "step": 5345 }, { "epoch": 0.0535, "grad_norm": 19.948165893554688, "learning_rate": 4.780353535353536e-06, "loss": 7.473389434814453, "step": 5350 }, { "epoch": 0.05355, "grad_norm": 24.138887405395508, "learning_rate": 4.780101010101011e-06, "loss": 7.5852302551269535, "step": 5355 }, { "epoch": 0.0536, "grad_norm": 23.258432388305664, "learning_rate": 4.779848484848485e-06, "loss": 7.578569030761718, "step": 5360 }, { "epoch": 0.05365, "grad_norm": 14.515973091125488, "learning_rate": 4.77959595959596e-06, "loss": 7.512255096435547, "step": 5365 }, { "epoch": 0.0537, "grad_norm": 13.8118257522583, "learning_rate": 4.7793434343434345e-06, "loss": 7.4754280090332035, "step": 5370 }, { "epoch": 0.05375, "grad_norm": 24.481937408447266, "learning_rate": 4.779090909090909e-06, "loss": 7.43607177734375, "step": 5375 }, { "epoch": 0.0538, "grad_norm": 50.230194091796875, "learning_rate": 4.778838383838385e-06, "loss": 7.5603782653808596, "step": 5380 }, { "epoch": 0.05385, "grad_norm": 25.99995231628418, "learning_rate": 4.778585858585859e-06, "loss": 7.010266876220703, "step": 5385 }, { "epoch": 0.0539, "grad_norm": 22.664758682250977, "learning_rate": 4.778333333333334e-06, "loss": 7.597390747070312, "step": 5390 }, { "epoch": 0.05395, "grad_norm": 18.75278091430664, "learning_rate": 4.7780808080808085e-06, "loss": 7.5601036071777346, "step": 5395 }, { "epoch": 0.054, "grad_norm": 19.403547286987305, "learning_rate": 4.777828282828283e-06, "loss": 7.604866790771484, "step": 5400 }, { "epoch": 0.05405, "grad_norm": 19.46187400817871, "learning_rate": 4.777575757575758e-06, "loss": 7.472673034667968, "step": 5405 }, { "epoch": 0.0541, "grad_norm": 18.884601593017578, "learning_rate": 4.777323232323232e-06, "loss": 7.421488189697266, "step": 5410 }, { "epoch": 0.05415, "grad_norm": 12.1611967086792, "learning_rate": 4.777070707070707e-06, "loss": 7.479060363769531, "step": 5415 }, { "epoch": 0.0542, "grad_norm": 25.839569091796875, "learning_rate": 4.7768181818181825e-06, "loss": 7.458383178710937, "step": 5420 }, { "epoch": 0.05425, "grad_norm": 18.64609146118164, "learning_rate": 4.776565656565657e-06, "loss": 7.477471160888672, "step": 5425 }, { "epoch": 0.0543, "grad_norm": 21.13758087158203, "learning_rate": 4.776313131313132e-06, "loss": 7.474122619628906, "step": 5430 }, { "epoch": 0.05435, "grad_norm": 13.830058097839355, "learning_rate": 4.776060606060606e-06, "loss": 7.426272583007813, "step": 5435 }, { "epoch": 0.0544, "grad_norm": 18.458003997802734, "learning_rate": 4.775808080808081e-06, "loss": 7.548300170898438, "step": 5440 }, { "epoch": 0.05445, "grad_norm": 20.15646743774414, "learning_rate": 4.775555555555556e-06, "loss": 7.780398559570313, "step": 5445 }, { "epoch": 0.0545, "grad_norm": 14.22573471069336, "learning_rate": 4.77530303030303e-06, "loss": 7.4881431579589846, "step": 5450 }, { "epoch": 0.05455, "grad_norm": 17.285425186157227, "learning_rate": 4.775050505050505e-06, "loss": 7.508261108398438, "step": 5455 }, { "epoch": 0.0546, "grad_norm": 22.39250373840332, "learning_rate": 4.77479797979798e-06, "loss": 7.486381530761719, "step": 5460 }, { "epoch": 0.05465, "grad_norm": 17.744413375854492, "learning_rate": 4.774545454545455e-06, "loss": 7.4675750732421875, "step": 5465 }, { "epoch": 0.0547, "grad_norm": 24.95531463623047, "learning_rate": 4.77429292929293e-06, "loss": 7.439553070068359, "step": 5470 }, { "epoch": 0.05475, "grad_norm": 17.995616912841797, "learning_rate": 4.774040404040404e-06, "loss": 7.664738464355469, "step": 5475 }, { "epoch": 0.0548, "grad_norm": 18.295888900756836, "learning_rate": 4.77378787878788e-06, "loss": 7.508366394042969, "step": 5480 }, { "epoch": 0.05485, "grad_norm": 25.90740394592285, "learning_rate": 4.773535353535354e-06, "loss": 7.480747222900391, "step": 5485 }, { "epoch": 0.0549, "grad_norm": 14.703533172607422, "learning_rate": 4.773282828282829e-06, "loss": 7.433132171630859, "step": 5490 }, { "epoch": 0.05495, "grad_norm": 17.049379348754883, "learning_rate": 4.773030303030303e-06, "loss": 7.452099609375, "step": 5495 }, { "epoch": 0.055, "grad_norm": 16.568214416503906, "learning_rate": 4.772777777777778e-06, "loss": 7.5099029541015625, "step": 5500 }, { "epoch": 0.05505, "grad_norm": 17.528520584106445, "learning_rate": 4.772525252525253e-06, "loss": 7.372364044189453, "step": 5505 }, { "epoch": 0.0551, "grad_norm": 15.64583683013916, "learning_rate": 4.7722727272727276e-06, "loss": 7.384050750732422, "step": 5510 }, { "epoch": 0.05515, "grad_norm": 17.44878578186035, "learning_rate": 4.772020202020202e-06, "loss": 7.494329833984375, "step": 5515 }, { "epoch": 0.0552, "grad_norm": 24.4046630859375, "learning_rate": 4.771767676767678e-06, "loss": 7.46461181640625, "step": 5520 }, { "epoch": 0.05525, "grad_norm": 20.80097198486328, "learning_rate": 4.771515151515152e-06, "loss": 7.495587158203125, "step": 5525 }, { "epoch": 0.0553, "grad_norm": 15.278010368347168, "learning_rate": 4.771262626262627e-06, "loss": 7.4817054748535154, "step": 5530 }, { "epoch": 0.05535, "grad_norm": 18.904678344726562, "learning_rate": 4.7710101010101015e-06, "loss": 7.528675842285156, "step": 5535 }, { "epoch": 0.0554, "grad_norm": 18.53925895690918, "learning_rate": 4.770757575757576e-06, "loss": 7.446332550048828, "step": 5540 }, { "epoch": 0.05545, "grad_norm": 15.812921524047852, "learning_rate": 4.770505050505051e-06, "loss": 7.4189300537109375, "step": 5545 }, { "epoch": 0.0555, "grad_norm": 19.764219284057617, "learning_rate": 4.7702525252525254e-06, "loss": 7.466627502441407, "step": 5550 }, { "epoch": 0.05555, "grad_norm": 16.635623931884766, "learning_rate": 4.77e-06, "loss": 7.436837005615234, "step": 5555 }, { "epoch": 0.0556, "grad_norm": 21.88321876525879, "learning_rate": 4.7697474747474755e-06, "loss": 7.460847473144531, "step": 5560 }, { "epoch": 0.05565, "grad_norm": 10.644245147705078, "learning_rate": 4.76949494949495e-06, "loss": 7.412858581542968, "step": 5565 }, { "epoch": 0.0557, "grad_norm": 17.232799530029297, "learning_rate": 4.769242424242425e-06, "loss": 7.449269104003906, "step": 5570 }, { "epoch": 0.05575, "grad_norm": 19.90821075439453, "learning_rate": 4.7689898989898994e-06, "loss": 7.412100982666016, "step": 5575 }, { "epoch": 0.0558, "grad_norm": 17.362276077270508, "learning_rate": 4.768737373737374e-06, "loss": 7.4560081481933596, "step": 5580 }, { "epoch": 0.05585, "grad_norm": 21.034788131713867, "learning_rate": 4.768484848484849e-06, "loss": 7.414686584472657, "step": 5585 }, { "epoch": 0.0559, "grad_norm": 13.796605110168457, "learning_rate": 4.768232323232323e-06, "loss": 7.424214172363281, "step": 5590 }, { "epoch": 0.05595, "grad_norm": 14.169798851013184, "learning_rate": 4.767979797979798e-06, "loss": 7.424193572998047, "step": 5595 }, { "epoch": 0.056, "grad_norm": 28.761127471923828, "learning_rate": 4.7677272727272734e-06, "loss": 7.456349182128906, "step": 5600 }, { "epoch": 0.05605, "grad_norm": 13.71801471710205, "learning_rate": 4.767474747474748e-06, "loss": 7.412640380859375, "step": 5605 }, { "epoch": 0.0561, "grad_norm": 18.31907081604004, "learning_rate": 4.767222222222223e-06, "loss": 7.445587158203125, "step": 5610 }, { "epoch": 0.05615, "grad_norm": 13.993244171142578, "learning_rate": 4.766969696969697e-06, "loss": 7.474983978271484, "step": 5615 }, { "epoch": 0.0562, "grad_norm": 28.316524505615234, "learning_rate": 4.766717171717172e-06, "loss": 7.348838806152344, "step": 5620 }, { "epoch": 0.05625, "grad_norm": 14.15115737915039, "learning_rate": 4.766464646464647e-06, "loss": 7.479405212402344, "step": 5625 }, { "epoch": 0.0563, "grad_norm": 19.541553497314453, "learning_rate": 4.766212121212121e-06, "loss": 7.474996185302734, "step": 5630 }, { "epoch": 0.05635, "grad_norm": 14.739532470703125, "learning_rate": 4.765959595959596e-06, "loss": 7.4811347961425785, "step": 5635 }, { "epoch": 0.0564, "grad_norm": 25.074840545654297, "learning_rate": 4.765707070707071e-06, "loss": 7.459928131103515, "step": 5640 }, { "epoch": 0.05645, "grad_norm": 16.748214721679688, "learning_rate": 4.765454545454546e-06, "loss": 7.441943359375, "step": 5645 }, { "epoch": 0.0565, "grad_norm": 14.341911315917969, "learning_rate": 4.765202020202021e-06, "loss": 7.402958679199219, "step": 5650 }, { "epoch": 0.05655, "grad_norm": 18.087499618530273, "learning_rate": 4.764949494949495e-06, "loss": 7.448354339599609, "step": 5655 }, { "epoch": 0.0566, "grad_norm": 11.65689754486084, "learning_rate": 4.76469696969697e-06, "loss": 7.430799865722657, "step": 5660 }, { "epoch": 0.05665, "grad_norm": 18.495588302612305, "learning_rate": 4.7644444444444445e-06, "loss": 7.422891998291016, "step": 5665 }, { "epoch": 0.0567, "grad_norm": 17.07127571105957, "learning_rate": 4.764191919191919e-06, "loss": 7.538980865478516, "step": 5670 }, { "epoch": 0.05675, "grad_norm": 13.97375202178955, "learning_rate": 4.763939393939394e-06, "loss": 7.435960388183593, "step": 5675 }, { "epoch": 0.0568, "grad_norm": 16.4046630859375, "learning_rate": 4.763686868686869e-06, "loss": 7.449042510986328, "step": 5680 }, { "epoch": 0.05685, "grad_norm": 25.520517349243164, "learning_rate": 4.763434343434344e-06, "loss": 7.434669494628906, "step": 5685 }, { "epoch": 0.0569, "grad_norm": 12.56342887878418, "learning_rate": 4.7631818181818185e-06, "loss": 7.415824127197266, "step": 5690 }, { "epoch": 0.05695, "grad_norm": 23.469507217407227, "learning_rate": 4.762929292929293e-06, "loss": 6.7615509033203125, "step": 5695 }, { "epoch": 0.057, "grad_norm": 14.662023544311523, "learning_rate": 4.762676767676769e-06, "loss": 7.162270355224609, "step": 5700 }, { "epoch": 0.05705, "grad_norm": 18.740440368652344, "learning_rate": 4.762424242424243e-06, "loss": 7.4644828796386715, "step": 5705 }, { "epoch": 0.0571, "grad_norm": 16.590961456298828, "learning_rate": 4.762171717171718e-06, "loss": 7.493648529052734, "step": 5710 }, { "epoch": 0.05715, "grad_norm": 21.32218360900879, "learning_rate": 4.7619191919191925e-06, "loss": 7.4714599609375, "step": 5715 }, { "epoch": 0.0572, "grad_norm": 23.445404052734375, "learning_rate": 4.761666666666667e-06, "loss": 7.281668090820313, "step": 5720 }, { "epoch": 0.05725, "grad_norm": 17.04100227355957, "learning_rate": 4.761414141414142e-06, "loss": 7.4137428283691404, "step": 5725 }, { "epoch": 0.0573, "grad_norm": 15.200889587402344, "learning_rate": 4.761161616161616e-06, "loss": 7.411590576171875, "step": 5730 }, { "epoch": 0.05735, "grad_norm": 21.38682746887207, "learning_rate": 4.760909090909091e-06, "loss": 7.445545959472656, "step": 5735 }, { "epoch": 0.0574, "grad_norm": 16.31803321838379, "learning_rate": 4.7606565656565665e-06, "loss": 7.4399559020996096, "step": 5740 }, { "epoch": 0.05745, "grad_norm": 19.280553817749023, "learning_rate": 4.760404040404041e-06, "loss": 7.455693054199219, "step": 5745 }, { "epoch": 0.0575, "grad_norm": 16.625455856323242, "learning_rate": 4.760151515151516e-06, "loss": 7.403818511962891, "step": 5750 }, { "epoch": 0.05755, "grad_norm": 11.560348510742188, "learning_rate": 4.75989898989899e-06, "loss": 7.3734375, "step": 5755 }, { "epoch": 0.0576, "grad_norm": 19.700429916381836, "learning_rate": 4.759646464646465e-06, "loss": 7.44625244140625, "step": 5760 }, { "epoch": 0.05765, "grad_norm": 16.45919418334961, "learning_rate": 4.75939393939394e-06, "loss": 7.451748657226562, "step": 5765 }, { "epoch": 0.0577, "grad_norm": 23.0037899017334, "learning_rate": 4.759141414141414e-06, "loss": 7.435987854003907, "step": 5770 }, { "epoch": 0.05775, "grad_norm": 18.895755767822266, "learning_rate": 4.758888888888889e-06, "loss": 7.387342071533203, "step": 5775 }, { "epoch": 0.0578, "grad_norm": 12.486214637756348, "learning_rate": 4.758636363636364e-06, "loss": 7.410451507568359, "step": 5780 }, { "epoch": 0.05785, "grad_norm": 19.218772888183594, "learning_rate": 4.758383838383839e-06, "loss": 7.443010711669922, "step": 5785 }, { "epoch": 0.0579, "grad_norm": 12.704147338867188, "learning_rate": 4.758131313131314e-06, "loss": 7.405137634277343, "step": 5790 }, { "epoch": 0.05795, "grad_norm": 21.602676391601562, "learning_rate": 4.757878787878788e-06, "loss": 7.535107421875, "step": 5795 }, { "epoch": 0.058, "grad_norm": 17.215662002563477, "learning_rate": 4.757626262626263e-06, "loss": 7.363690948486328, "step": 5800 }, { "epoch": 0.05805, "grad_norm": 20.03301429748535, "learning_rate": 4.7573737373737375e-06, "loss": 7.365547180175781, "step": 5805 }, { "epoch": 0.0581, "grad_norm": 27.06032371520996, "learning_rate": 4.757121212121212e-06, "loss": 7.432145690917968, "step": 5810 }, { "epoch": 0.05815, "grad_norm": 11.073099136352539, "learning_rate": 4.756868686868688e-06, "loss": 7.408209991455078, "step": 5815 }, { "epoch": 0.0582, "grad_norm": 18.43425178527832, "learning_rate": 4.756616161616162e-06, "loss": 7.3963478088378904, "step": 5820 }, { "epoch": 0.05825, "grad_norm": 18.0744571685791, "learning_rate": 4.756363636363637e-06, "loss": 7.348499298095703, "step": 5825 }, { "epoch": 0.0583, "grad_norm": 17.661806106567383, "learning_rate": 4.7561111111111115e-06, "loss": 7.414032745361328, "step": 5830 }, { "epoch": 0.05835, "grad_norm": 17.43125343322754, "learning_rate": 4.755858585858586e-06, "loss": 7.3651878356933596, "step": 5835 }, { "epoch": 0.0584, "grad_norm": 20.05885887145996, "learning_rate": 4.755606060606061e-06, "loss": 7.46289291381836, "step": 5840 }, { "epoch": 0.05845, "grad_norm": 13.228622436523438, "learning_rate": 4.755353535353535e-06, "loss": 7.426290130615234, "step": 5845 }, { "epoch": 0.0585, "grad_norm": 16.293603897094727, "learning_rate": 4.75510101010101e-06, "loss": 7.423129272460938, "step": 5850 }, { "epoch": 0.05855, "grad_norm": 17.442737579345703, "learning_rate": 4.7548484848484855e-06, "loss": 7.355368804931641, "step": 5855 }, { "epoch": 0.0586, "grad_norm": 16.99224090576172, "learning_rate": 4.75459595959596e-06, "loss": 7.361419677734375, "step": 5860 }, { "epoch": 0.05865, "grad_norm": 22.993154525756836, "learning_rate": 4.754343434343435e-06, "loss": 7.391024780273438, "step": 5865 }, { "epoch": 0.0587, "grad_norm": 29.75162124633789, "learning_rate": 4.754090909090909e-06, "loss": 7.217912292480468, "step": 5870 }, { "epoch": 0.05875, "grad_norm": 18.345291137695312, "learning_rate": 4.753838383838385e-06, "loss": 7.426860809326172, "step": 5875 }, { "epoch": 0.0588, "grad_norm": 15.290863037109375, "learning_rate": 4.7535858585858595e-06, "loss": 7.3977607727050785, "step": 5880 }, { "epoch": 0.05885, "grad_norm": 17.886123657226562, "learning_rate": 4.753333333333333e-06, "loss": 7.325679016113281, "step": 5885 }, { "epoch": 0.0589, "grad_norm": 21.9522647857666, "learning_rate": 4.753080808080808e-06, "loss": 7.381395721435547, "step": 5890 }, { "epoch": 0.05895, "grad_norm": 12.989285469055176, "learning_rate": 4.752828282828283e-06, "loss": 7.365306091308594, "step": 5895 }, { "epoch": 0.059, "grad_norm": 20.4886531829834, "learning_rate": 4.752575757575758e-06, "loss": 7.3723907470703125, "step": 5900 }, { "epoch": 0.05905, "grad_norm": 43.66281509399414, "learning_rate": 4.752323232323233e-06, "loss": 7.589836120605469, "step": 5905 }, { "epoch": 0.0591, "grad_norm": 21.818506240844727, "learning_rate": 4.752070707070707e-06, "loss": 7.481194305419922, "step": 5910 }, { "epoch": 0.05915, "grad_norm": 18.65826416015625, "learning_rate": 4.751818181818183e-06, "loss": 7.426165771484375, "step": 5915 }, { "epoch": 0.0592, "grad_norm": 15.598796844482422, "learning_rate": 4.751565656565657e-06, "loss": 7.3957061767578125, "step": 5920 }, { "epoch": 0.05925, "grad_norm": 17.28043556213379, "learning_rate": 4.751313131313132e-06, "loss": 7.373983764648438, "step": 5925 }, { "epoch": 0.0593, "grad_norm": 19.29776954650879, "learning_rate": 4.751060606060607e-06, "loss": 7.460882568359375, "step": 5930 }, { "epoch": 0.05935, "grad_norm": 15.937277793884277, "learning_rate": 4.750808080808081e-06, "loss": 7.494353485107422, "step": 5935 }, { "epoch": 0.0594, "grad_norm": 20.630159378051758, "learning_rate": 4.750555555555556e-06, "loss": 7.516507720947265, "step": 5940 }, { "epoch": 0.05945, "grad_norm": 14.242840766906738, "learning_rate": 4.7503030303030305e-06, "loss": 7.405439758300782, "step": 5945 }, { "epoch": 0.0595, "grad_norm": 20.529651641845703, "learning_rate": 4.750050505050505e-06, "loss": 7.404287719726563, "step": 5950 }, { "epoch": 0.05955, "grad_norm": 22.474390029907227, "learning_rate": 4.749797979797981e-06, "loss": 7.3753204345703125, "step": 5955 }, { "epoch": 0.0596, "grad_norm": 15.492379188537598, "learning_rate": 4.749545454545455e-06, "loss": 7.312710571289062, "step": 5960 }, { "epoch": 0.05965, "grad_norm": 15.615510940551758, "learning_rate": 4.74929292929293e-06, "loss": 7.586792755126953, "step": 5965 }, { "epoch": 0.0597, "grad_norm": 24.81792449951172, "learning_rate": 4.7490404040404045e-06, "loss": 7.421467590332031, "step": 5970 }, { "epoch": 0.05975, "grad_norm": 21.100526809692383, "learning_rate": 4.748787878787879e-06, "loss": 7.423560333251953, "step": 5975 }, { "epoch": 0.0598, "grad_norm": 17.285804748535156, "learning_rate": 4.748535353535354e-06, "loss": 7.376319122314453, "step": 5980 }, { "epoch": 0.05985, "grad_norm": 30.1626033782959, "learning_rate": 4.7482828282828284e-06, "loss": 7.436541748046875, "step": 5985 }, { "epoch": 0.0599, "grad_norm": 10.938192367553711, "learning_rate": 4.748030303030303e-06, "loss": 7.410193634033203, "step": 5990 }, { "epoch": 0.05995, "grad_norm": 17.781036376953125, "learning_rate": 4.7477777777777785e-06, "loss": 7.324515533447266, "step": 5995 }, { "epoch": 0.06, "grad_norm": 12.970977783203125, "learning_rate": 4.747525252525253e-06, "loss": 7.388859558105469, "step": 6000 }, { "epoch": 0.06005, "grad_norm": 16.363014221191406, "learning_rate": 4.747272727272728e-06, "loss": 7.4362640380859375, "step": 6005 }, { "epoch": 0.0601, "grad_norm": 17.426462173461914, "learning_rate": 4.747020202020202e-06, "loss": 7.434485626220703, "step": 6010 }, { "epoch": 0.06015, "grad_norm": 21.50102996826172, "learning_rate": 4.746767676767677e-06, "loss": 7.4320838928222654, "step": 6015 }, { "epoch": 0.0602, "grad_norm": 17.60247039794922, "learning_rate": 4.746515151515152e-06, "loss": 7.4154411315917965, "step": 6020 }, { "epoch": 0.06025, "grad_norm": 11.022239685058594, "learning_rate": 4.746262626262626e-06, "loss": 7.354352569580078, "step": 6025 }, { "epoch": 0.0603, "grad_norm": 21.815902709960938, "learning_rate": 4.746010101010101e-06, "loss": 7.417121124267578, "step": 6030 }, { "epoch": 0.06035, "grad_norm": 14.970441818237305, "learning_rate": 4.745757575757576e-06, "loss": 7.431978607177735, "step": 6035 }, { "epoch": 0.0604, "grad_norm": 14.935155868530273, "learning_rate": 4.745505050505051e-06, "loss": 7.3781074523925785, "step": 6040 }, { "epoch": 0.06045, "grad_norm": 22.588817596435547, "learning_rate": 4.745252525252526e-06, "loss": 7.343415832519531, "step": 6045 }, { "epoch": 0.0605, "grad_norm": 21.562217712402344, "learning_rate": 4.745e-06, "loss": 7.612806701660157, "step": 6050 }, { "epoch": 0.06055, "grad_norm": 13.157464027404785, "learning_rate": 4.744747474747475e-06, "loss": 7.344144439697265, "step": 6055 }, { "epoch": 0.0606, "grad_norm": 19.955015182495117, "learning_rate": 4.7444949494949496e-06, "loss": 7.372591400146485, "step": 6060 }, { "epoch": 0.06065, "grad_norm": 13.40371322631836, "learning_rate": 4.744242424242424e-06, "loss": 7.287052917480469, "step": 6065 }, { "epoch": 0.0607, "grad_norm": 12.70802116394043, "learning_rate": 4.743989898989899e-06, "loss": 7.401102447509766, "step": 6070 }, { "epoch": 0.06075, "grad_norm": 23.34242820739746, "learning_rate": 4.743737373737374e-06, "loss": 7.388157653808594, "step": 6075 }, { "epoch": 0.0608, "grad_norm": 20.732418060302734, "learning_rate": 4.743484848484849e-06, "loss": 7.532572174072266, "step": 6080 }, { "epoch": 0.06085, "grad_norm": 19.82404136657715, "learning_rate": 4.7432323232323236e-06, "loss": 7.419532775878906, "step": 6085 }, { "epoch": 0.0609, "grad_norm": 19.236736297607422, "learning_rate": 4.742979797979798e-06, "loss": 7.358146667480469, "step": 6090 }, { "epoch": 0.06095, "grad_norm": 12.853151321411133, "learning_rate": 4.742727272727274e-06, "loss": 7.368955993652344, "step": 6095 }, { "epoch": 0.061, "grad_norm": 15.969011306762695, "learning_rate": 4.742474747474748e-06, "loss": 7.365327453613281, "step": 6100 }, { "epoch": 0.06105, "grad_norm": 21.67343521118164, "learning_rate": 4.742222222222222e-06, "loss": 7.420275115966797, "step": 6105 }, { "epoch": 0.0611, "grad_norm": 17.516590118408203, "learning_rate": 4.741969696969697e-06, "loss": 7.38909683227539, "step": 6110 }, { "epoch": 0.06115, "grad_norm": 20.0014591217041, "learning_rate": 4.741717171717172e-06, "loss": 7.404148101806641, "step": 6115 }, { "epoch": 0.0612, "grad_norm": 13.528839111328125, "learning_rate": 4.741464646464647e-06, "loss": 7.370179748535156, "step": 6120 }, { "epoch": 0.06125, "grad_norm": 21.39055061340332, "learning_rate": 4.7412121212121215e-06, "loss": 7.466165924072266, "step": 6125 }, { "epoch": 0.0613, "grad_norm": 17.44588851928711, "learning_rate": 4.740959595959596e-06, "loss": 7.420116424560547, "step": 6130 }, { "epoch": 0.06135, "grad_norm": 15.444082260131836, "learning_rate": 4.7407070707070716e-06, "loss": 7.370317077636718, "step": 6135 }, { "epoch": 0.0614, "grad_norm": 20.454729080200195, "learning_rate": 4.740454545454546e-06, "loss": 7.410682678222656, "step": 6140 }, { "epoch": 0.06145, "grad_norm": 13.47769546508789, "learning_rate": 4.740202020202021e-06, "loss": 7.53656005859375, "step": 6145 }, { "epoch": 0.0615, "grad_norm": 26.07635498046875, "learning_rate": 4.7399494949494955e-06, "loss": 7.4091957092285154, "step": 6150 }, { "epoch": 0.06155, "grad_norm": 22.732656478881836, "learning_rate": 4.73969696969697e-06, "loss": 7.333550262451172, "step": 6155 }, { "epoch": 0.0616, "grad_norm": 15.837180137634277, "learning_rate": 4.739444444444445e-06, "loss": 7.354871368408203, "step": 6160 }, { "epoch": 0.06165, "grad_norm": 14.899121284484863, "learning_rate": 4.739191919191919e-06, "loss": 7.312581634521484, "step": 6165 }, { "epoch": 0.0617, "grad_norm": 23.64531898498535, "learning_rate": 4.738939393939394e-06, "loss": 7.410173034667968, "step": 6170 }, { "epoch": 0.06175, "grad_norm": 18.29912757873535, "learning_rate": 4.7386868686868695e-06, "loss": 7.352548217773437, "step": 6175 }, { "epoch": 0.0618, "grad_norm": 14.630946159362793, "learning_rate": 4.738434343434344e-06, "loss": 7.401498413085937, "step": 6180 }, { "epoch": 0.06185, "grad_norm": 12.845257759094238, "learning_rate": 4.738181818181819e-06, "loss": 7.335762786865234, "step": 6185 }, { "epoch": 0.0619, "grad_norm": 21.218656539916992, "learning_rate": 4.737929292929293e-06, "loss": 7.358460998535156, "step": 6190 }, { "epoch": 0.06195, "grad_norm": 13.853650093078613, "learning_rate": 4.737676767676768e-06, "loss": 7.312032318115234, "step": 6195 }, { "epoch": 0.062, "grad_norm": 22.474580764770508, "learning_rate": 4.737424242424243e-06, "loss": 7.365679931640625, "step": 6200 }, { "epoch": 0.06205, "grad_norm": 17.139989852905273, "learning_rate": 4.737171717171717e-06, "loss": 7.375102996826172, "step": 6205 }, { "epoch": 0.0621, "grad_norm": 19.642553329467773, "learning_rate": 4.736919191919193e-06, "loss": 7.1638954162597654, "step": 6210 }, { "epoch": 0.06215, "grad_norm": 15.117340087890625, "learning_rate": 4.736666666666667e-06, "loss": 7.306954193115234, "step": 6215 }, { "epoch": 0.0622, "grad_norm": 16.355966567993164, "learning_rate": 4.736414141414142e-06, "loss": 7.327334594726563, "step": 6220 }, { "epoch": 0.06225, "grad_norm": 14.20102310180664, "learning_rate": 4.736161616161617e-06, "loss": 7.356663513183594, "step": 6225 }, { "epoch": 0.0623, "grad_norm": 25.963598251342773, "learning_rate": 4.735909090909091e-06, "loss": 7.352782440185547, "step": 6230 }, { "epoch": 0.06235, "grad_norm": 16.251602172851562, "learning_rate": 4.735656565656566e-06, "loss": 7.3658607482910154, "step": 6235 }, { "epoch": 0.0624, "grad_norm": 16.524738311767578, "learning_rate": 4.7354040404040405e-06, "loss": 7.355733489990234, "step": 6240 }, { "epoch": 0.06245, "grad_norm": 17.232032775878906, "learning_rate": 4.735151515151515e-06, "loss": 7.378041076660156, "step": 6245 }, { "epoch": 0.0625, "grad_norm": 20.492265701293945, "learning_rate": 4.734898989898991e-06, "loss": 7.378736114501953, "step": 6250 }, { "epoch": 0.06255, "grad_norm": 12.40377140045166, "learning_rate": 4.734646464646465e-06, "loss": 7.312510681152344, "step": 6255 }, { "epoch": 0.0626, "grad_norm": 21.939966201782227, "learning_rate": 4.73439393939394e-06, "loss": 7.345623779296875, "step": 6260 }, { "epoch": 0.06265, "grad_norm": 13.608049392700195, "learning_rate": 4.7341414141414145e-06, "loss": 7.391778564453125, "step": 6265 }, { "epoch": 0.0627, "grad_norm": 20.501012802124023, "learning_rate": 4.733888888888889e-06, "loss": 7.3935302734375, "step": 6270 }, { "epoch": 0.06275, "grad_norm": 19.37184715270996, "learning_rate": 4.733636363636364e-06, "loss": 7.306283569335937, "step": 6275 }, { "epoch": 0.0628, "grad_norm": 13.488133430480957, "learning_rate": 4.733383838383838e-06, "loss": 7.396861267089844, "step": 6280 }, { "epoch": 0.06285, "grad_norm": 19.10703468322754, "learning_rate": 4.733131313131313e-06, "loss": 7.358041381835937, "step": 6285 }, { "epoch": 0.0629, "grad_norm": 17.555648803710938, "learning_rate": 4.7328787878787885e-06, "loss": 7.356269073486328, "step": 6290 }, { "epoch": 0.06295, "grad_norm": 14.135422706604004, "learning_rate": 4.732626262626263e-06, "loss": 7.390798950195313, "step": 6295 }, { "epoch": 0.063, "grad_norm": 16.306180953979492, "learning_rate": 4.732373737373738e-06, "loss": 7.316192626953125, "step": 6300 }, { "epoch": 0.06305, "grad_norm": 19.998371124267578, "learning_rate": 4.732121212121212e-06, "loss": 7.359248352050781, "step": 6305 }, { "epoch": 0.0631, "grad_norm": 17.871797561645508, "learning_rate": 4.731868686868688e-06, "loss": 7.306805419921875, "step": 6310 }, { "epoch": 0.06315, "grad_norm": 11.288522720336914, "learning_rate": 4.7316161616161625e-06, "loss": 7.328477478027343, "step": 6315 }, { "epoch": 0.0632, "grad_norm": 22.13749122619629, "learning_rate": 4.731363636363637e-06, "loss": 7.459363555908203, "step": 6320 }, { "epoch": 0.06325, "grad_norm": 14.10296630859375, "learning_rate": 4.731111111111112e-06, "loss": 7.4331611633300785, "step": 6325 }, { "epoch": 0.0633, "grad_norm": 23.86881446838379, "learning_rate": 4.730858585858586e-06, "loss": 7.342826843261719, "step": 6330 }, { "epoch": 0.06335, "grad_norm": 23.636579513549805, "learning_rate": 4.730606060606061e-06, "loss": 7.37843017578125, "step": 6335 }, { "epoch": 0.0634, "grad_norm": 16.015174865722656, "learning_rate": 4.730353535353536e-06, "loss": 7.353832244873047, "step": 6340 }, { "epoch": 0.06345, "grad_norm": 20.537078857421875, "learning_rate": 4.73010101010101e-06, "loss": 7.316778564453125, "step": 6345 }, { "epoch": 0.0635, "grad_norm": 16.991735458374023, "learning_rate": 4.729848484848486e-06, "loss": 7.385282897949219, "step": 6350 }, { "epoch": 0.06355, "grad_norm": 11.62346076965332, "learning_rate": 4.72959595959596e-06, "loss": 7.33648681640625, "step": 6355 }, { "epoch": 0.0636, "grad_norm": 14.831382751464844, "learning_rate": 4.729343434343435e-06, "loss": 7.299131774902344, "step": 6360 }, { "epoch": 0.06365, "grad_norm": 13.414105415344238, "learning_rate": 4.72909090909091e-06, "loss": 7.257669830322266, "step": 6365 }, { "epoch": 0.0637, "grad_norm": 18.439241409301758, "learning_rate": 4.728838383838384e-06, "loss": 7.417701721191406, "step": 6370 }, { "epoch": 0.06375, "grad_norm": 18.860816955566406, "learning_rate": 4.728585858585859e-06, "loss": 7.369428253173828, "step": 6375 }, { "epoch": 0.0638, "grad_norm": 15.246565818786621, "learning_rate": 4.7283333333333335e-06, "loss": 7.373403167724609, "step": 6380 }, { "epoch": 0.06385, "grad_norm": 18.29256248474121, "learning_rate": 4.728080808080808e-06, "loss": 7.309664154052735, "step": 6385 }, { "epoch": 0.0639, "grad_norm": 21.724143981933594, "learning_rate": 4.727828282828284e-06, "loss": 7.331757354736328, "step": 6390 }, { "epoch": 0.06395, "grad_norm": 10.249874114990234, "learning_rate": 4.727575757575758e-06, "loss": 7.28857421875, "step": 6395 }, { "epoch": 0.064, "grad_norm": 22.463497161865234, "learning_rate": 4.727323232323233e-06, "loss": 7.329997253417969, "step": 6400 }, { "epoch": 0.06405, "grad_norm": 16.391769409179688, "learning_rate": 4.7270707070707075e-06, "loss": 7.312666320800782, "step": 6405 }, { "epoch": 0.0641, "grad_norm": 16.032358169555664, "learning_rate": 4.726818181818182e-06, "loss": 7.431202697753906, "step": 6410 }, { "epoch": 0.06415, "grad_norm": 13.866803169250488, "learning_rate": 4.726565656565657e-06, "loss": 7.3273262023925785, "step": 6415 }, { "epoch": 0.0642, "grad_norm": 23.178359985351562, "learning_rate": 4.726313131313131e-06, "loss": 7.303889465332031, "step": 6420 }, { "epoch": 0.06425, "grad_norm": 18.35276222229004, "learning_rate": 4.726060606060606e-06, "loss": 7.341036224365235, "step": 6425 }, { "epoch": 0.0643, "grad_norm": 14.11735725402832, "learning_rate": 4.7258080808080815e-06, "loss": 7.382530212402344, "step": 6430 }, { "epoch": 0.06435, "grad_norm": 17.06545639038086, "learning_rate": 4.725555555555556e-06, "loss": 7.311770629882813, "step": 6435 }, { "epoch": 0.0644, "grad_norm": 12.276899337768555, "learning_rate": 4.725303030303031e-06, "loss": 7.329534912109375, "step": 6440 }, { "epoch": 0.06445, "grad_norm": 19.888385772705078, "learning_rate": 4.725050505050505e-06, "loss": 7.334482574462891, "step": 6445 }, { "epoch": 0.0645, "grad_norm": 21.440914154052734, "learning_rate": 4.72479797979798e-06, "loss": 7.305474853515625, "step": 6450 }, { "epoch": 0.06455, "grad_norm": 16.971323013305664, "learning_rate": 4.724545454545455e-06, "loss": 7.328004455566406, "step": 6455 }, { "epoch": 0.0646, "grad_norm": 12.29317569732666, "learning_rate": 4.724292929292929e-06, "loss": 7.2856605529785154, "step": 6460 }, { "epoch": 0.06465, "grad_norm": 22.287874221801758, "learning_rate": 4.724040404040404e-06, "loss": 7.333892822265625, "step": 6465 }, { "epoch": 0.0647, "grad_norm": 16.47034454345703, "learning_rate": 4.723787878787879e-06, "loss": 7.3681884765625, "step": 6470 }, { "epoch": 0.06475, "grad_norm": 13.40788459777832, "learning_rate": 4.723535353535354e-06, "loss": 7.3021400451660154, "step": 6475 }, { "epoch": 0.0648, "grad_norm": 26.350791931152344, "learning_rate": 4.723282828282829e-06, "loss": 7.362093353271485, "step": 6480 }, { "epoch": 0.06485, "grad_norm": 12.530688285827637, "learning_rate": 4.723030303030303e-06, "loss": 7.333232879638672, "step": 6485 }, { "epoch": 0.0649, "grad_norm": 20.948619842529297, "learning_rate": 4.722777777777779e-06, "loss": 7.376651763916016, "step": 6490 }, { "epoch": 0.06495, "grad_norm": 24.572309494018555, "learning_rate": 4.7225252525252526e-06, "loss": 7.288954162597657, "step": 6495 }, { "epoch": 0.065, "grad_norm": 15.652830123901367, "learning_rate": 4.722272727272727e-06, "loss": 7.271328735351562, "step": 6500 }, { "epoch": 0.06505, "grad_norm": 13.813000679016113, "learning_rate": 4.722020202020202e-06, "loss": 7.340414428710938, "step": 6505 }, { "epoch": 0.0651, "grad_norm": 19.120975494384766, "learning_rate": 4.721767676767677e-06, "loss": 7.4265380859375, "step": 6510 }, { "epoch": 0.06515, "grad_norm": 19.44134521484375, "learning_rate": 4.721515151515152e-06, "loss": 7.286913299560547, "step": 6515 }, { "epoch": 0.0652, "grad_norm": 12.709969520568848, "learning_rate": 4.7212626262626266e-06, "loss": 7.287065124511718, "step": 6520 }, { "epoch": 0.06525, "grad_norm": 20.855287551879883, "learning_rate": 4.721010101010101e-06, "loss": 7.298835754394531, "step": 6525 }, { "epoch": 0.0653, "grad_norm": 13.956583976745605, "learning_rate": 4.720757575757577e-06, "loss": 7.324440765380859, "step": 6530 }, { "epoch": 0.06535, "grad_norm": 15.965331077575684, "learning_rate": 4.720505050505051e-06, "loss": 7.29022445678711, "step": 6535 }, { "epoch": 0.0654, "grad_norm": 22.82750129699707, "learning_rate": 4.720252525252526e-06, "loss": 7.31793441772461, "step": 6540 }, { "epoch": 0.06545, "grad_norm": 10.450855255126953, "learning_rate": 4.7200000000000005e-06, "loss": 7.300748443603515, "step": 6545 }, { "epoch": 0.0655, "grad_norm": 15.687492370605469, "learning_rate": 4.719747474747475e-06, "loss": 7.2619171142578125, "step": 6550 }, { "epoch": 0.06555, "grad_norm": 13.17013931274414, "learning_rate": 4.71949494949495e-06, "loss": 7.371894836425781, "step": 6555 }, { "epoch": 0.0656, "grad_norm": 20.62639045715332, "learning_rate": 4.7192424242424244e-06, "loss": 7.390414428710938, "step": 6560 }, { "epoch": 0.06565, "grad_norm": 22.231746673583984, "learning_rate": 4.718989898989899e-06, "loss": 7.291191101074219, "step": 6565 }, { "epoch": 0.0657, "grad_norm": 21.956789016723633, "learning_rate": 4.7187373737373745e-06, "loss": 7.307810211181641, "step": 6570 }, { "epoch": 0.06575, "grad_norm": 19.16622543334961, "learning_rate": 4.718484848484849e-06, "loss": 7.324440765380859, "step": 6575 }, { "epoch": 0.0658, "grad_norm": 18.338985443115234, "learning_rate": 4.718232323232324e-06, "loss": 7.281874084472657, "step": 6580 }, { "epoch": 0.06585, "grad_norm": 13.890073776245117, "learning_rate": 4.7179797979797984e-06, "loss": 7.351361846923828, "step": 6585 }, { "epoch": 0.0659, "grad_norm": 17.487232208251953, "learning_rate": 4.717727272727273e-06, "loss": 7.337972259521484, "step": 6590 }, { "epoch": 0.06595, "grad_norm": 13.280776977539062, "learning_rate": 4.717474747474748e-06, "loss": 7.3161460876464846, "step": 6595 }, { "epoch": 0.066, "grad_norm": 23.972814559936523, "learning_rate": 4.717222222222222e-06, "loss": 7.347524261474609, "step": 6600 }, { "epoch": 0.06605, "grad_norm": 15.621679306030273, "learning_rate": 4.716969696969697e-06, "loss": 7.298374938964844, "step": 6605 }, { "epoch": 0.0661, "grad_norm": 18.897226333618164, "learning_rate": 4.7167171717171724e-06, "loss": 7.268804931640625, "step": 6610 }, { "epoch": 0.06615, "grad_norm": 12.835030555725098, "learning_rate": 4.716464646464647e-06, "loss": 7.304584503173828, "step": 6615 }, { "epoch": 0.0662, "grad_norm": 12.50285816192627, "learning_rate": 4.716212121212122e-06, "loss": 7.3490234375, "step": 6620 }, { "epoch": 0.06625, "grad_norm": 15.55599594116211, "learning_rate": 4.715959595959596e-06, "loss": 7.278350067138672, "step": 6625 }, { "epoch": 0.0663, "grad_norm": 13.388211250305176, "learning_rate": 4.715707070707071e-06, "loss": 7.289142608642578, "step": 6630 }, { "epoch": 0.06635, "grad_norm": 28.649948120117188, "learning_rate": 4.715454545454546e-06, "loss": 7.2746940612792965, "step": 6635 }, { "epoch": 0.0664, "grad_norm": 17.62480354309082, "learning_rate": 4.71520202020202e-06, "loss": 7.408467102050781, "step": 6640 }, { "epoch": 0.06645, "grad_norm": 17.75600814819336, "learning_rate": 4.714949494949496e-06, "loss": 7.299745178222656, "step": 6645 }, { "epoch": 0.0665, "grad_norm": 10.599562644958496, "learning_rate": 4.71469696969697e-06, "loss": 7.262963104248047, "step": 6650 }, { "epoch": 0.06655, "grad_norm": 13.631043434143066, "learning_rate": 4.714444444444445e-06, "loss": 7.232206726074219, "step": 6655 }, { "epoch": 0.0666, "grad_norm": 19.355812072753906, "learning_rate": 4.71419191919192e-06, "loss": 7.2416847229003904, "step": 6660 }, { "epoch": 0.06665, "grad_norm": 17.202749252319336, "learning_rate": 4.713939393939394e-06, "loss": 7.272434997558594, "step": 6665 }, { "epoch": 0.0667, "grad_norm": 10.6626615524292, "learning_rate": 4.713686868686869e-06, "loss": 7.341461944580078, "step": 6670 }, { "epoch": 0.06675, "grad_norm": 18.189058303833008, "learning_rate": 4.7134343434343435e-06, "loss": 7.295185852050781, "step": 6675 }, { "epoch": 0.0668, "grad_norm": 13.035717964172363, "learning_rate": 4.713181818181818e-06, "loss": 7.28192367553711, "step": 6680 }, { "epoch": 0.06685, "grad_norm": 24.853967666625977, "learning_rate": 4.712929292929294e-06, "loss": 7.299000549316406, "step": 6685 }, { "epoch": 0.0669, "grad_norm": 17.72437858581543, "learning_rate": 4.712676767676768e-06, "loss": 7.289732360839844, "step": 6690 }, { "epoch": 0.06695, "grad_norm": 16.3730411529541, "learning_rate": 4.712424242424243e-06, "loss": 7.295805358886719, "step": 6695 }, { "epoch": 0.067, "grad_norm": 16.94779396057129, "learning_rate": 4.7121717171717175e-06, "loss": 7.249663543701172, "step": 6700 }, { "epoch": 0.06705, "grad_norm": 18.565229415893555, "learning_rate": 4.711919191919193e-06, "loss": 7.270211791992187, "step": 6705 }, { "epoch": 0.0671, "grad_norm": 13.987051963806152, "learning_rate": 4.711666666666668e-06, "loss": 7.2154182434082035, "step": 6710 }, { "epoch": 0.06715, "grad_norm": 13.697429656982422, "learning_rate": 4.711414141414141e-06, "loss": 7.586630249023438, "step": 6715 }, { "epoch": 0.0672, "grad_norm": 14.224781036376953, "learning_rate": 4.711161616161616e-06, "loss": 7.301143646240234, "step": 6720 }, { "epoch": 0.06725, "grad_norm": 21.061784744262695, "learning_rate": 4.7109090909090915e-06, "loss": 7.325705718994141, "step": 6725 }, { "epoch": 0.0673, "grad_norm": 18.457120895385742, "learning_rate": 4.710656565656566e-06, "loss": 7.290209197998047, "step": 6730 }, { "epoch": 0.06735, "grad_norm": 17.7507266998291, "learning_rate": 4.710404040404041e-06, "loss": 7.264765930175781, "step": 6735 }, { "epoch": 0.0674, "grad_norm": 17.166187286376953, "learning_rate": 4.710151515151515e-06, "loss": 7.258859252929687, "step": 6740 }, { "epoch": 0.06745, "grad_norm": 18.604665756225586, "learning_rate": 4.709898989898991e-06, "loss": 7.295572662353516, "step": 6745 }, { "epoch": 0.0675, "grad_norm": 19.921525955200195, "learning_rate": 4.7096464646464655e-06, "loss": 7.341793060302734, "step": 6750 }, { "epoch": 0.06755, "grad_norm": 11.9418363571167, "learning_rate": 4.70939393939394e-06, "loss": 7.224874877929688, "step": 6755 }, { "epoch": 0.0676, "grad_norm": 20.216550827026367, "learning_rate": 4.709141414141415e-06, "loss": 7.266896057128906, "step": 6760 }, { "epoch": 0.06765, "grad_norm": 13.853838920593262, "learning_rate": 4.708888888888889e-06, "loss": 7.250466918945312, "step": 6765 }, { "epoch": 0.0677, "grad_norm": 16.157703399658203, "learning_rate": 4.708636363636364e-06, "loss": 7.245932769775391, "step": 6770 }, { "epoch": 0.06775, "grad_norm": 17.650476455688477, "learning_rate": 4.708383838383839e-06, "loss": 7.282032775878906, "step": 6775 }, { "epoch": 0.0678, "grad_norm": 19.55583381652832, "learning_rate": 4.708131313131313e-06, "loss": 7.254768371582031, "step": 6780 }, { "epoch": 0.06785, "grad_norm": 13.151528358459473, "learning_rate": 4.707878787878789e-06, "loss": 7.2370460510253904, "step": 6785 }, { "epoch": 0.0679, "grad_norm": 21.066972732543945, "learning_rate": 4.707626262626263e-06, "loss": 7.244229125976562, "step": 6790 }, { "epoch": 0.06795, "grad_norm": 20.489511489868164, "learning_rate": 4.707373737373738e-06, "loss": 7.35509033203125, "step": 6795 }, { "epoch": 0.068, "grad_norm": 17.795164108276367, "learning_rate": 4.707121212121213e-06, "loss": 7.163232421875, "step": 6800 }, { "epoch": 0.06805, "grad_norm": 32.03147506713867, "learning_rate": 4.706868686868687e-06, "loss": 7.261328125, "step": 6805 }, { "epoch": 0.0681, "grad_norm": 16.134010314941406, "learning_rate": 4.706616161616162e-06, "loss": 7.186199188232422, "step": 6810 }, { "epoch": 0.06815, "grad_norm": 11.905620574951172, "learning_rate": 4.7063636363636365e-06, "loss": 7.251412963867187, "step": 6815 }, { "epoch": 0.0682, "grad_norm": 16.414987564086914, "learning_rate": 4.706111111111111e-06, "loss": 7.2604515075683596, "step": 6820 }, { "epoch": 0.06825, "grad_norm": 25.271652221679688, "learning_rate": 4.705858585858587e-06, "loss": 7.286151123046875, "step": 6825 }, { "epoch": 0.0683, "grad_norm": 18.41845703125, "learning_rate": 4.705606060606061e-06, "loss": 7.249119567871094, "step": 6830 }, { "epoch": 0.06835, "grad_norm": 11.497271537780762, "learning_rate": 4.705353535353536e-06, "loss": 7.231997680664063, "step": 6835 }, { "epoch": 0.0684, "grad_norm": 23.832277297973633, "learning_rate": 4.7051010101010105e-06, "loss": 7.287998199462891, "step": 6840 }, { "epoch": 0.06845, "grad_norm": 10.077827453613281, "learning_rate": 4.704848484848485e-06, "loss": 7.225074005126953, "step": 6845 }, { "epoch": 0.0685, "grad_norm": 19.457963943481445, "learning_rate": 4.70459595959596e-06, "loss": 7.1588279724121096, "step": 6850 }, { "epoch": 0.06855, "grad_norm": 11.325835227966309, "learning_rate": 4.704343434343434e-06, "loss": 7.279461669921875, "step": 6855 }, { "epoch": 0.0686, "grad_norm": 26.75511932373047, "learning_rate": 4.704090909090909e-06, "loss": 7.32629623413086, "step": 6860 }, { "epoch": 0.06865, "grad_norm": 14.392694473266602, "learning_rate": 4.7038383838383845e-06, "loss": 7.232565307617188, "step": 6865 }, { "epoch": 0.0687, "grad_norm": 22.007972717285156, "learning_rate": 4.703585858585859e-06, "loss": 7.286494445800781, "step": 6870 }, { "epoch": 0.06875, "grad_norm": 16.357524871826172, "learning_rate": 4.703333333333334e-06, "loss": 7.236724090576172, "step": 6875 }, { "epoch": 0.0688, "grad_norm": 17.820871353149414, "learning_rate": 4.703080808080808e-06, "loss": 7.165650177001953, "step": 6880 }, { "epoch": 0.06885, "grad_norm": 23.891380310058594, "learning_rate": 4.702828282828283e-06, "loss": 7.356695556640625, "step": 6885 }, { "epoch": 0.0689, "grad_norm": 13.085551261901855, "learning_rate": 4.702575757575758e-06, "loss": 7.252647399902344, "step": 6890 }, { "epoch": 0.06895, "grad_norm": 17.1274471282959, "learning_rate": 4.702323232323232e-06, "loss": 7.260833740234375, "step": 6895 }, { "epoch": 0.069, "grad_norm": 13.84726619720459, "learning_rate": 4.702070707070707e-06, "loss": 7.2034751892089846, "step": 6900 }, { "epoch": 0.06905, "grad_norm": 22.258691787719727, "learning_rate": 4.701818181818182e-06, "loss": 7.318419647216797, "step": 6905 }, { "epoch": 0.0691, "grad_norm": 11.701295852661133, "learning_rate": 4.701565656565657e-06, "loss": 7.31909408569336, "step": 6910 }, { "epoch": 0.06915, "grad_norm": 17.130931854248047, "learning_rate": 4.701313131313132e-06, "loss": 7.226158905029297, "step": 6915 }, { "epoch": 0.0692, "grad_norm": 17.088153839111328, "learning_rate": 4.701060606060606e-06, "loss": 7.226222991943359, "step": 6920 }, { "epoch": 0.06925, "grad_norm": 15.763093948364258, "learning_rate": 4.700808080808082e-06, "loss": 7.255010986328125, "step": 6925 }, { "epoch": 0.0693, "grad_norm": 16.385208129882812, "learning_rate": 4.700555555555556e-06, "loss": 7.245035552978516, "step": 6930 }, { "epoch": 0.06935, "grad_norm": 14.967244148254395, "learning_rate": 4.70030303030303e-06, "loss": 7.230848693847657, "step": 6935 }, { "epoch": 0.0694, "grad_norm": 12.815757751464844, "learning_rate": 4.700050505050505e-06, "loss": 7.303600311279297, "step": 6940 }, { "epoch": 0.06945, "grad_norm": 14.421931266784668, "learning_rate": 4.69979797979798e-06, "loss": 7.264089202880859, "step": 6945 }, { "epoch": 0.0695, "grad_norm": 20.66776466369629, "learning_rate": 4.699545454545455e-06, "loss": 7.245578002929688, "step": 6950 }, { "epoch": 0.06955, "grad_norm": 9.479636192321777, "learning_rate": 4.6992929292929295e-06, "loss": 7.260986328125, "step": 6955 }, { "epoch": 0.0696, "grad_norm": 34.23777389526367, "learning_rate": 4.699040404040404e-06, "loss": 7.279319763183594, "step": 6960 }, { "epoch": 0.06965, "grad_norm": 10.83349895477295, "learning_rate": 4.69878787878788e-06, "loss": 7.04869384765625, "step": 6965 }, { "epoch": 0.0697, "grad_norm": 19.926706314086914, "learning_rate": 4.698535353535354e-06, "loss": 7.311029052734375, "step": 6970 }, { "epoch": 0.06975, "grad_norm": 16.94400405883789, "learning_rate": 4.698282828282829e-06, "loss": 7.258890533447266, "step": 6975 }, { "epoch": 0.0698, "grad_norm": 12.457530975341797, "learning_rate": 4.6980303030303035e-06, "loss": 7.205348205566406, "step": 6980 }, { "epoch": 0.06985, "grad_norm": 16.392662048339844, "learning_rate": 4.697777777777778e-06, "loss": 7.217481994628907, "step": 6985 }, { "epoch": 0.0699, "grad_norm": 24.25126075744629, "learning_rate": 4.697525252525253e-06, "loss": 7.33740005493164, "step": 6990 }, { "epoch": 0.06995, "grad_norm": 18.634384155273438, "learning_rate": 4.697272727272727e-06, "loss": 7.2506462097167965, "step": 6995 }, { "epoch": 0.07, "grad_norm": 14.947834014892578, "learning_rate": 4.697020202020202e-06, "loss": 7.188893127441406, "step": 7000 }, { "epoch": 0.07005, "grad_norm": 14.509441375732422, "learning_rate": 4.6967676767676775e-06, "loss": 7.213644409179688, "step": 7005 }, { "epoch": 0.0701, "grad_norm": 21.309185028076172, "learning_rate": 4.696515151515152e-06, "loss": 7.27984619140625, "step": 7010 }, { "epoch": 0.07015, "grad_norm": 17.988183975219727, "learning_rate": 4.696262626262627e-06, "loss": 7.249726867675781, "step": 7015 }, { "epoch": 0.0702, "grad_norm": 14.676074028015137, "learning_rate": 4.696010101010101e-06, "loss": 7.271430206298828, "step": 7020 }, { "epoch": 0.07025, "grad_norm": 20.313596725463867, "learning_rate": 4.695757575757576e-06, "loss": 7.2612358093261715, "step": 7025 }, { "epoch": 0.0703, "grad_norm": 14.956355094909668, "learning_rate": 4.695505050505051e-06, "loss": 7.3380584716796875, "step": 7030 }, { "epoch": 0.07035, "grad_norm": 17.274581909179688, "learning_rate": 4.695252525252525e-06, "loss": 7.183248901367188, "step": 7035 }, { "epoch": 0.0704, "grad_norm": 16.541122436523438, "learning_rate": 4.695e-06, "loss": 7.187129211425781, "step": 7040 }, { "epoch": 0.07045, "grad_norm": 145.8016357421875, "learning_rate": 4.694747474747475e-06, "loss": 7.6001945495605465, "step": 7045 }, { "epoch": 0.0705, "grad_norm": 14.267823219299316, "learning_rate": 4.69449494949495e-06, "loss": 7.1746368408203125, "step": 7050 }, { "epoch": 0.07055, "grad_norm": 16.932870864868164, "learning_rate": 4.694242424242425e-06, "loss": 7.250926208496094, "step": 7055 }, { "epoch": 0.0706, "grad_norm": 13.7836275100708, "learning_rate": 4.693989898989899e-06, "loss": 7.135867309570313, "step": 7060 }, { "epoch": 0.07065, "grad_norm": 12.472376823425293, "learning_rate": 4.693737373737374e-06, "loss": 7.223270416259766, "step": 7065 }, { "epoch": 0.0707, "grad_norm": 18.159093856811523, "learning_rate": 4.6934848484848486e-06, "loss": 7.265229034423828, "step": 7070 }, { "epoch": 0.07075, "grad_norm": 12.178616523742676, "learning_rate": 4.693232323232323e-06, "loss": 7.144489288330078, "step": 7075 }, { "epoch": 0.0708, "grad_norm": 19.236019134521484, "learning_rate": 4.692979797979799e-06, "loss": 7.207997131347656, "step": 7080 }, { "epoch": 0.07085, "grad_norm": 17.932714462280273, "learning_rate": 4.692727272727273e-06, "loss": 7.26055908203125, "step": 7085 }, { "epoch": 0.0709, "grad_norm": 14.417783737182617, "learning_rate": 4.692474747474748e-06, "loss": 7.187083435058594, "step": 7090 }, { "epoch": 0.07095, "grad_norm": 17.155166625976562, "learning_rate": 4.6922222222222226e-06, "loss": 7.206239318847656, "step": 7095 }, { "epoch": 0.071, "grad_norm": 15.061664581298828, "learning_rate": 4.691969696969697e-06, "loss": 7.2268821716308596, "step": 7100 }, { "epoch": 0.07105, "grad_norm": 11.822611808776855, "learning_rate": 4.691717171717172e-06, "loss": 7.167335510253906, "step": 7105 }, { "epoch": 0.0711, "grad_norm": 17.461933135986328, "learning_rate": 4.6914646464646465e-06, "loss": 7.2065788269042965, "step": 7110 }, { "epoch": 0.07115, "grad_norm": 28.14124870300293, "learning_rate": 4.691212121212121e-06, "loss": 7.212700653076172, "step": 7115 }, { "epoch": 0.0712, "grad_norm": 10.756271362304688, "learning_rate": 4.6909595959595966e-06, "loss": 7.279926300048828, "step": 7120 }, { "epoch": 0.07125, "grad_norm": 17.512990951538086, "learning_rate": 4.690707070707071e-06, "loss": 7.233715057373047, "step": 7125 }, { "epoch": 0.0713, "grad_norm": 13.29555892944336, "learning_rate": 4.690454545454546e-06, "loss": 7.181877899169922, "step": 7130 }, { "epoch": 0.07135, "grad_norm": 14.677163124084473, "learning_rate": 4.6902020202020205e-06, "loss": 7.224195861816407, "step": 7135 }, { "epoch": 0.0714, "grad_norm": 38.25970458984375, "learning_rate": 4.689949494949496e-06, "loss": 7.291853332519532, "step": 7140 }, { "epoch": 0.07145, "grad_norm": 15.821635246276855, "learning_rate": 4.6896969696969706e-06, "loss": 7.189391326904297, "step": 7145 }, { "epoch": 0.0715, "grad_norm": 12.712769508361816, "learning_rate": 4.689444444444445e-06, "loss": 7.206454467773438, "step": 7150 }, { "epoch": 0.07155, "grad_norm": 18.818788528442383, "learning_rate": 4.68919191919192e-06, "loss": 7.195346832275391, "step": 7155 }, { "epoch": 0.0716, "grad_norm": 14.541913032531738, "learning_rate": 4.6889393939393945e-06, "loss": 7.301519775390625, "step": 7160 }, { "epoch": 0.07165, "grad_norm": 14.378938674926758, "learning_rate": 4.688686868686869e-06, "loss": 7.170602416992187, "step": 7165 }, { "epoch": 0.0717, "grad_norm": 17.976091384887695, "learning_rate": 4.688434343434344e-06, "loss": 7.22420654296875, "step": 7170 }, { "epoch": 0.07175, "grad_norm": 11.28448486328125, "learning_rate": 4.688181818181818e-06, "loss": 7.190703582763672, "step": 7175 }, { "epoch": 0.0718, "grad_norm": 16.151044845581055, "learning_rate": 4.687929292929294e-06, "loss": 7.1992942810058596, "step": 7180 }, { "epoch": 0.07185, "grad_norm": 12.705129623413086, "learning_rate": 4.6876767676767684e-06, "loss": 7.2491302490234375, "step": 7185 }, { "epoch": 0.0719, "grad_norm": 15.767026901245117, "learning_rate": 4.687424242424243e-06, "loss": 7.191873931884766, "step": 7190 }, { "epoch": 0.07195, "grad_norm": 16.44830322265625, "learning_rate": 4.687171717171718e-06, "loss": 7.1904052734375, "step": 7195 }, { "epoch": 0.072, "grad_norm": 11.954500198364258, "learning_rate": 4.686919191919192e-06, "loss": 7.149915313720703, "step": 7200 }, { "epoch": 0.07205, "grad_norm": 27.732746124267578, "learning_rate": 4.686666666666667e-06, "loss": 7.225939178466797, "step": 7205 }, { "epoch": 0.0721, "grad_norm": 13.856980323791504, "learning_rate": 4.686414141414142e-06, "loss": 7.278646087646484, "step": 7210 }, { "epoch": 0.07215, "grad_norm": 10.242147445678711, "learning_rate": 4.686161616161616e-06, "loss": 7.239305877685547, "step": 7215 }, { "epoch": 0.0722, "grad_norm": 19.304935455322266, "learning_rate": 4.685909090909092e-06, "loss": 7.192593383789062, "step": 7220 }, { "epoch": 0.07225, "grad_norm": 15.23874568939209, "learning_rate": 4.685656565656566e-06, "loss": 7.151276397705078, "step": 7225 }, { "epoch": 0.0723, "grad_norm": 14.224249839782715, "learning_rate": 4.685404040404041e-06, "loss": 7.164997100830078, "step": 7230 }, { "epoch": 0.07235, "grad_norm": 18.315486907958984, "learning_rate": 4.685151515151516e-06, "loss": 7.1989906311035154, "step": 7235 }, { "epoch": 0.0724, "grad_norm": 14.671064376831055, "learning_rate": 4.68489898989899e-06, "loss": 7.256375885009765, "step": 7240 }, { "epoch": 0.07245, "grad_norm": 9.044522285461426, "learning_rate": 4.684646464646465e-06, "loss": 7.163508605957031, "step": 7245 }, { "epoch": 0.0725, "grad_norm": 16.056352615356445, "learning_rate": 4.6843939393939395e-06, "loss": 7.2094062805175785, "step": 7250 }, { "epoch": 0.07255, "grad_norm": 18.901168823242188, "learning_rate": 4.684141414141414e-06, "loss": 7.167227172851563, "step": 7255 }, { "epoch": 0.0726, "grad_norm": 10.074485778808594, "learning_rate": 4.68388888888889e-06, "loss": 7.130014038085937, "step": 7260 }, { "epoch": 0.07265, "grad_norm": 21.598567962646484, "learning_rate": 4.683636363636364e-06, "loss": 7.203331756591797, "step": 7265 }, { "epoch": 0.0727, "grad_norm": 11.920461654663086, "learning_rate": 4.683383838383839e-06, "loss": 7.156430053710937, "step": 7270 }, { "epoch": 0.07275, "grad_norm": 20.37619972229004, "learning_rate": 4.6831313131313135e-06, "loss": 7.212739562988281, "step": 7275 }, { "epoch": 0.0728, "grad_norm": 12.022930145263672, "learning_rate": 4.682878787878788e-06, "loss": 7.147222900390625, "step": 7280 }, { "epoch": 0.07285, "grad_norm": 19.028615951538086, "learning_rate": 4.682626262626263e-06, "loss": 7.211968994140625, "step": 7285 }, { "epoch": 0.0729, "grad_norm": 10.775739669799805, "learning_rate": 4.682373737373737e-06, "loss": 7.172687530517578, "step": 7290 }, { "epoch": 0.07295, "grad_norm": 16.974082946777344, "learning_rate": 4.682121212121212e-06, "loss": 7.192015075683594, "step": 7295 }, { "epoch": 0.073, "grad_norm": 15.989989280700684, "learning_rate": 4.6818686868686875e-06, "loss": 7.168571472167969, "step": 7300 }, { "epoch": 0.07305, "grad_norm": 11.238309860229492, "learning_rate": 4.681616161616162e-06, "loss": 7.205010986328125, "step": 7305 }, { "epoch": 0.0731, "grad_norm": 12.807601928710938, "learning_rate": 4.681363636363637e-06, "loss": 7.223105621337891, "step": 7310 }, { "epoch": 0.07315, "grad_norm": 16.245805740356445, "learning_rate": 4.681111111111111e-06, "loss": 7.252875518798828, "step": 7315 }, { "epoch": 0.0732, "grad_norm": 14.222648620605469, "learning_rate": 4.680858585858587e-06, "loss": 7.176698303222656, "step": 7320 }, { "epoch": 0.07325, "grad_norm": 11.99246597290039, "learning_rate": 4.680606060606061e-06, "loss": 7.147579956054687, "step": 7325 }, { "epoch": 0.0733, "grad_norm": 16.405704498291016, "learning_rate": 4.680353535353535e-06, "loss": 7.150675964355469, "step": 7330 }, { "epoch": 0.07335, "grad_norm": 14.745780944824219, "learning_rate": 4.68010101010101e-06, "loss": 7.175132751464844, "step": 7335 }, { "epoch": 0.0734, "grad_norm": 11.090824127197266, "learning_rate": 4.679848484848485e-06, "loss": 7.143619537353516, "step": 7340 }, { "epoch": 0.07345, "grad_norm": 14.733095169067383, "learning_rate": 4.67959595959596e-06, "loss": 7.178958129882813, "step": 7345 }, { "epoch": 0.0735, "grad_norm": 14.5630464553833, "learning_rate": 4.679343434343435e-06, "loss": 7.201460266113282, "step": 7350 }, { "epoch": 0.07355, "grad_norm": 15.773290634155273, "learning_rate": 4.679090909090909e-06, "loss": 7.198712158203125, "step": 7355 }, { "epoch": 0.0736, "grad_norm": 15.687095642089844, "learning_rate": 4.678838383838385e-06, "loss": 7.2458343505859375, "step": 7360 }, { "epoch": 0.07365, "grad_norm": 14.286864280700684, "learning_rate": 4.678585858585859e-06, "loss": 7.1060951232910154, "step": 7365 }, { "epoch": 0.0737, "grad_norm": 13.308348655700684, "learning_rate": 4.678333333333334e-06, "loss": 7.205097961425781, "step": 7370 }, { "epoch": 0.07375, "grad_norm": 14.035758972167969, "learning_rate": 4.678080808080809e-06, "loss": 7.182550048828125, "step": 7375 }, { "epoch": 0.0738, "grad_norm": 13.458452224731445, "learning_rate": 4.677828282828283e-06, "loss": 7.1582176208496096, "step": 7380 }, { "epoch": 0.07385, "grad_norm": 13.81605052947998, "learning_rate": 4.677575757575758e-06, "loss": 7.145982360839843, "step": 7385 }, { "epoch": 0.0739, "grad_norm": 18.860471725463867, "learning_rate": 4.6773232323232325e-06, "loss": 7.1929878234863285, "step": 7390 }, { "epoch": 0.07395, "grad_norm": 10.516059875488281, "learning_rate": 4.677070707070707e-06, "loss": 7.172614288330078, "step": 7395 }, { "epoch": 0.074, "grad_norm": 16.022247314453125, "learning_rate": 4.676818181818183e-06, "loss": 7.194376373291016, "step": 7400 }, { "epoch": 0.07405, "grad_norm": 10.775367736816406, "learning_rate": 4.676565656565657e-06, "loss": 7.292423248291016, "step": 7405 }, { "epoch": 0.0741, "grad_norm": 9.246915817260742, "learning_rate": 4.676313131313132e-06, "loss": 7.2463539123535154, "step": 7410 }, { "epoch": 0.07415, "grad_norm": 15.759200096130371, "learning_rate": 4.6760606060606065e-06, "loss": 7.179792785644532, "step": 7415 }, { "epoch": 0.0742, "grad_norm": 12.837616920471191, "learning_rate": 4.675808080808081e-06, "loss": 7.094776916503906, "step": 7420 }, { "epoch": 0.07425, "grad_norm": 12.334062576293945, "learning_rate": 4.675555555555556e-06, "loss": 7.1465919494628904, "step": 7425 }, { "epoch": 0.0743, "grad_norm": 13.416414260864258, "learning_rate": 4.67530303030303e-06, "loss": 7.215462493896484, "step": 7430 }, { "epoch": 0.07435, "grad_norm": 14.021585464477539, "learning_rate": 4.675050505050505e-06, "loss": 7.188615417480468, "step": 7435 }, { "epoch": 0.0744, "grad_norm": 12.90938663482666, "learning_rate": 4.6747979797979805e-06, "loss": 7.124367523193359, "step": 7440 }, { "epoch": 0.07445, "grad_norm": 9.53298568725586, "learning_rate": 4.674545454545455e-06, "loss": 7.256126403808594, "step": 7445 }, { "epoch": 0.0745, "grad_norm": 12.647445678710938, "learning_rate": 4.67429292929293e-06, "loss": 7.194955444335937, "step": 7450 }, { "epoch": 0.07455, "grad_norm": 12.612387657165527, "learning_rate": 4.674040404040404e-06, "loss": 7.145745086669922, "step": 7455 }, { "epoch": 0.0746, "grad_norm": 16.620391845703125, "learning_rate": 4.673787878787879e-06, "loss": 7.137355804443359, "step": 7460 }, { "epoch": 0.07465, "grad_norm": 16.628915786743164, "learning_rate": 4.673535353535354e-06, "loss": 7.181729125976562, "step": 7465 }, { "epoch": 0.0747, "grad_norm": 9.955098152160645, "learning_rate": 4.673282828282828e-06, "loss": 7.1686866760253904, "step": 7470 }, { "epoch": 0.07475, "grad_norm": 14.952354431152344, "learning_rate": 4.673030303030303e-06, "loss": 7.253731536865234, "step": 7475 }, { "epoch": 0.0748, "grad_norm": 14.723752975463867, "learning_rate": 4.672777777777778e-06, "loss": 7.151538848876953, "step": 7480 }, { "epoch": 0.07485, "grad_norm": 16.6400089263916, "learning_rate": 4.672525252525253e-06, "loss": 7.186514282226563, "step": 7485 }, { "epoch": 0.0749, "grad_norm": 11.493280410766602, "learning_rate": 4.672272727272728e-06, "loss": 7.260272216796875, "step": 7490 }, { "epoch": 0.07495, "grad_norm": 10.865557670593262, "learning_rate": 4.672020202020202e-06, "loss": 7.1695198059082035, "step": 7495 }, { "epoch": 0.075, "grad_norm": 16.56884765625, "learning_rate": 4.671767676767677e-06, "loss": 7.194746398925782, "step": 7500 }, { "epoch": 0.07505, "grad_norm": 11.684234619140625, "learning_rate": 4.6715151515151516e-06, "loss": 7.115535736083984, "step": 7505 }, { "epoch": 0.0751, "grad_norm": 15.429750442504883, "learning_rate": 4.671262626262626e-06, "loss": 7.148614501953125, "step": 7510 }, { "epoch": 0.07515, "grad_norm": 19.932594299316406, "learning_rate": 4.671010101010102e-06, "loss": 7.151736450195313, "step": 7515 }, { "epoch": 0.0752, "grad_norm": 12.794001579284668, "learning_rate": 4.670757575757576e-06, "loss": 7.2143913269042965, "step": 7520 }, { "epoch": 0.07525, "grad_norm": 17.51900291442871, "learning_rate": 4.670505050505051e-06, "loss": 7.090669250488281, "step": 7525 }, { "epoch": 0.0753, "grad_norm": 13.913134574890137, "learning_rate": 4.6702525252525256e-06, "loss": 7.184165954589844, "step": 7530 }, { "epoch": 0.07535, "grad_norm": 12.449177742004395, "learning_rate": 4.670000000000001e-06, "loss": 7.177067565917969, "step": 7535 }, { "epoch": 0.0754, "grad_norm": 15.141054153442383, "learning_rate": 4.669747474747476e-06, "loss": 7.180377197265625, "step": 7540 }, { "epoch": 0.07545, "grad_norm": 10.056815147399902, "learning_rate": 4.6694949494949494e-06, "loss": 7.13543472290039, "step": 7545 }, { "epoch": 0.0755, "grad_norm": 18.899211883544922, "learning_rate": 4.669242424242424e-06, "loss": 7.1501708984375, "step": 7550 }, { "epoch": 0.07555, "grad_norm": 8.588541984558105, "learning_rate": 4.6689898989898995e-06, "loss": 7.158807373046875, "step": 7555 }, { "epoch": 0.0756, "grad_norm": 19.26127052307129, "learning_rate": 4.668737373737374e-06, "loss": 7.162855529785157, "step": 7560 }, { "epoch": 0.07565, "grad_norm": 14.681036949157715, "learning_rate": 4.668484848484849e-06, "loss": 7.144574737548828, "step": 7565 }, { "epoch": 0.0757, "grad_norm": 12.803994178771973, "learning_rate": 4.6682323232323234e-06, "loss": 7.16025390625, "step": 7570 }, { "epoch": 0.07575, "grad_norm": 14.650678634643555, "learning_rate": 4.667979797979799e-06, "loss": 7.180637359619141, "step": 7575 }, { "epoch": 0.0758, "grad_norm": 19.785337448120117, "learning_rate": 4.6677272727272735e-06, "loss": 7.161512756347657, "step": 7580 }, { "epoch": 0.07585, "grad_norm": 10.214943885803223, "learning_rate": 4.667474747474748e-06, "loss": 7.208462524414062, "step": 7585 }, { "epoch": 0.0759, "grad_norm": 13.401466369628906, "learning_rate": 4.667222222222223e-06, "loss": 7.151551055908203, "step": 7590 }, { "epoch": 0.07595, "grad_norm": 13.323257446289062, "learning_rate": 4.6669696969696974e-06, "loss": 7.104453277587891, "step": 7595 }, { "epoch": 0.076, "grad_norm": 8.73353099822998, "learning_rate": 4.666717171717172e-06, "loss": 7.130597686767578, "step": 7600 }, { "epoch": 0.07605, "grad_norm": 14.205131530761719, "learning_rate": 4.666464646464647e-06, "loss": 7.149769592285156, "step": 7605 }, { "epoch": 0.0761, "grad_norm": 13.18138313293457, "learning_rate": 4.666212121212121e-06, "loss": 7.157588195800781, "step": 7610 }, { "epoch": 0.07615, "grad_norm": 15.269906044006348, "learning_rate": 4.665959595959597e-06, "loss": 7.129432678222656, "step": 7615 }, { "epoch": 0.0762, "grad_norm": 15.373218536376953, "learning_rate": 4.6657070707070714e-06, "loss": 7.134799957275391, "step": 7620 }, { "epoch": 0.07625, "grad_norm": 15.53011417388916, "learning_rate": 4.665454545454546e-06, "loss": 7.1346923828125, "step": 7625 }, { "epoch": 0.0763, "grad_norm": 15.746675491333008, "learning_rate": 4.665202020202021e-06, "loss": 7.1609031677246096, "step": 7630 }, { "epoch": 0.07635, "grad_norm": 13.106550216674805, "learning_rate": 4.664949494949495e-06, "loss": 7.134526062011719, "step": 7635 }, { "epoch": 0.0764, "grad_norm": 14.241418838500977, "learning_rate": 4.66469696969697e-06, "loss": 7.096474456787109, "step": 7640 }, { "epoch": 0.07645, "grad_norm": 14.312854766845703, "learning_rate": 4.664444444444445e-06, "loss": 7.157828521728516, "step": 7645 }, { "epoch": 0.0765, "grad_norm": 13.353851318359375, "learning_rate": 4.664191919191919e-06, "loss": 7.1172332763671875, "step": 7650 }, { "epoch": 0.07655, "grad_norm": 14.366713523864746, "learning_rate": 4.663939393939395e-06, "loss": 7.146714782714843, "step": 7655 }, { "epoch": 0.0766, "grad_norm": 13.20458698272705, "learning_rate": 4.663686868686869e-06, "loss": 7.143280792236328, "step": 7660 }, { "epoch": 0.07665, "grad_norm": 12.329632759094238, "learning_rate": 4.663434343434344e-06, "loss": 7.184111785888672, "step": 7665 }, { "epoch": 0.0767, "grad_norm": 14.914095878601074, "learning_rate": 4.663181818181819e-06, "loss": 7.151043701171875, "step": 7670 }, { "epoch": 0.07675, "grad_norm": 13.259411811828613, "learning_rate": 4.662929292929293e-06, "loss": 7.109326171875, "step": 7675 }, { "epoch": 0.0768, "grad_norm": 15.127949714660645, "learning_rate": 4.662676767676768e-06, "loss": 7.155520629882813, "step": 7680 }, { "epoch": 0.07685, "grad_norm": 12.946768760681152, "learning_rate": 4.6624242424242425e-06, "loss": 7.141600036621094, "step": 7685 }, { "epoch": 0.0769, "grad_norm": 8.686307907104492, "learning_rate": 4.662171717171717e-06, "loss": 7.109571075439453, "step": 7690 }, { "epoch": 0.07695, "grad_norm": 13.677896499633789, "learning_rate": 4.661919191919193e-06, "loss": 7.119757080078125, "step": 7695 }, { "epoch": 0.077, "grad_norm": 14.438746452331543, "learning_rate": 4.661666666666667e-06, "loss": 7.110508728027344, "step": 7700 }, { "epoch": 0.07705, "grad_norm": 12.332862854003906, "learning_rate": 4.661414141414142e-06, "loss": 7.102262878417969, "step": 7705 }, { "epoch": 0.0771, "grad_norm": 13.872489929199219, "learning_rate": 4.6611616161616165e-06, "loss": 7.132161712646484, "step": 7710 }, { "epoch": 0.07715, "grad_norm": 15.19012451171875, "learning_rate": 4.660909090909091e-06, "loss": 7.148995208740234, "step": 7715 }, { "epoch": 0.0772, "grad_norm": 12.604557037353516, "learning_rate": 4.660656565656566e-06, "loss": 7.102450561523438, "step": 7720 }, { "epoch": 0.07725, "grad_norm": 12.488697052001953, "learning_rate": 4.66040404040404e-06, "loss": 7.168901824951172, "step": 7725 }, { "epoch": 0.0773, "grad_norm": 13.227538108825684, "learning_rate": 4.660151515151515e-06, "loss": 7.074463653564453, "step": 7730 }, { "epoch": 0.07735, "grad_norm": 15.450512886047363, "learning_rate": 4.6598989898989905e-06, "loss": 7.111332702636719, "step": 7735 }, { "epoch": 0.0774, "grad_norm": 14.068360328674316, "learning_rate": 4.659646464646465e-06, "loss": 7.130419921875, "step": 7740 }, { "epoch": 0.07745, "grad_norm": 13.30257511138916, "learning_rate": 4.65939393939394e-06, "loss": 7.113467407226563, "step": 7745 }, { "epoch": 0.0775, "grad_norm": 15.078557014465332, "learning_rate": 4.659141414141414e-06, "loss": 7.075376892089844, "step": 7750 }, { "epoch": 0.07755, "grad_norm": 15.024842262268066, "learning_rate": 4.65888888888889e-06, "loss": 7.1199699401855465, "step": 7755 }, { "epoch": 0.0776, "grad_norm": 14.53697395324707, "learning_rate": 4.6586363636363645e-06, "loss": 7.149957275390625, "step": 7760 }, { "epoch": 0.07765, "grad_norm": 10.921975135803223, "learning_rate": 4.658383838383839e-06, "loss": 7.174282836914062, "step": 7765 }, { "epoch": 0.0777, "grad_norm": 15.417535781860352, "learning_rate": 4.658131313131313e-06, "loss": 7.158491516113282, "step": 7770 }, { "epoch": 0.07775, "grad_norm": 12.464600563049316, "learning_rate": 4.657878787878788e-06, "loss": 7.124424743652344, "step": 7775 }, { "epoch": 0.0778, "grad_norm": 9.250466346740723, "learning_rate": 4.657626262626263e-06, "loss": 7.144343566894531, "step": 7780 }, { "epoch": 0.07785, "grad_norm": 13.965492248535156, "learning_rate": 4.657373737373738e-06, "loss": 7.101962280273438, "step": 7785 }, { "epoch": 0.0779, "grad_norm": 16.42563247680664, "learning_rate": 4.657121212121212e-06, "loss": 7.181169128417968, "step": 7790 }, { "epoch": 0.07795, "grad_norm": 11.249837875366211, "learning_rate": 4.656868686868688e-06, "loss": 7.108252716064453, "step": 7795 }, { "epoch": 0.078, "grad_norm": 13.15048599243164, "learning_rate": 4.656616161616162e-06, "loss": 7.3770393371582035, "step": 7800 }, { "epoch": 0.07805, "grad_norm": 12.964404106140137, "learning_rate": 4.656363636363637e-06, "loss": 7.160620880126953, "step": 7805 }, { "epoch": 0.0781, "grad_norm": 12.494027137756348, "learning_rate": 4.656111111111112e-06, "loss": 7.091893005371094, "step": 7810 }, { "epoch": 0.07815, "grad_norm": 9.275793075561523, "learning_rate": 4.655858585858586e-06, "loss": 7.136439514160156, "step": 7815 }, { "epoch": 0.0782, "grad_norm": 8.693899154663086, "learning_rate": 4.655606060606061e-06, "loss": 7.168768310546875, "step": 7820 }, { "epoch": 0.07825, "grad_norm": 17.57337760925293, "learning_rate": 4.6553535353535355e-06, "loss": 7.054325866699219, "step": 7825 }, { "epoch": 0.0783, "grad_norm": 13.529296875, "learning_rate": 4.65510101010101e-06, "loss": 7.141659545898437, "step": 7830 }, { "epoch": 0.07835, "grad_norm": 11.022156715393066, "learning_rate": 4.654848484848486e-06, "loss": 7.161221313476562, "step": 7835 }, { "epoch": 0.0784, "grad_norm": 13.002695083618164, "learning_rate": 4.65459595959596e-06, "loss": 7.13848876953125, "step": 7840 }, { "epoch": 0.07845, "grad_norm": 17.054367065429688, "learning_rate": 4.654343434343435e-06, "loss": 7.05651626586914, "step": 7845 }, { "epoch": 0.0785, "grad_norm": 10.648941993713379, "learning_rate": 4.6540909090909095e-06, "loss": 7.134423828125, "step": 7850 }, { "epoch": 0.07855, "grad_norm": 10.4478178024292, "learning_rate": 4.653838383838384e-06, "loss": 7.147296142578125, "step": 7855 }, { "epoch": 0.0786, "grad_norm": 16.388051986694336, "learning_rate": 4.653585858585859e-06, "loss": 7.1054634094238285, "step": 7860 }, { "epoch": 0.07865, "grad_norm": 13.608744621276855, "learning_rate": 4.653333333333333e-06, "loss": 7.131071472167969, "step": 7865 }, { "epoch": 0.0787, "grad_norm": 10.3717041015625, "learning_rate": 4.653080808080808e-06, "loss": 7.090081787109375, "step": 7870 }, { "epoch": 0.07875, "grad_norm": 14.938826560974121, "learning_rate": 4.6528282828282835e-06, "loss": 7.096900939941406, "step": 7875 }, { "epoch": 0.0788, "grad_norm": 10.912687301635742, "learning_rate": 4.652575757575758e-06, "loss": 7.062046813964844, "step": 7880 }, { "epoch": 0.07885, "grad_norm": 7.616029262542725, "learning_rate": 4.652323232323233e-06, "loss": 7.048287963867187, "step": 7885 }, { "epoch": 0.0789, "grad_norm": 9.438920021057129, "learning_rate": 4.652070707070707e-06, "loss": 7.105948638916016, "step": 7890 }, { "epoch": 0.07895, "grad_norm": 9.979214668273926, "learning_rate": 4.651818181818182e-06, "loss": 7.125117492675781, "step": 7895 }, { "epoch": 0.079, "grad_norm": 17.42769432067871, "learning_rate": 4.651565656565657e-06, "loss": 7.084651947021484, "step": 7900 }, { "epoch": 0.07905, "grad_norm": 13.459665298461914, "learning_rate": 4.651313131313131e-06, "loss": 7.136384582519531, "step": 7905 }, { "epoch": 0.0791, "grad_norm": 8.491365432739258, "learning_rate": 4.651060606060606e-06, "loss": 7.101498413085937, "step": 7910 }, { "epoch": 0.07915, "grad_norm": 14.3814058303833, "learning_rate": 4.650808080808081e-06, "loss": 7.11807861328125, "step": 7915 }, { "epoch": 0.0792, "grad_norm": 13.40147876739502, "learning_rate": 4.650555555555556e-06, "loss": 7.100657653808594, "step": 7920 }, { "epoch": 0.07925, "grad_norm": 11.681011199951172, "learning_rate": 4.650303030303031e-06, "loss": 7.203713989257812, "step": 7925 }, { "epoch": 0.0793, "grad_norm": 11.026641845703125, "learning_rate": 4.650050505050506e-06, "loss": 7.101087951660157, "step": 7930 }, { "epoch": 0.07935, "grad_norm": 10.357416152954102, "learning_rate": 4.64979797979798e-06, "loss": 7.124596405029297, "step": 7935 }, { "epoch": 0.0794, "grad_norm": 11.231729507446289, "learning_rate": 4.6495454545454545e-06, "loss": 7.1003578186035154, "step": 7940 }, { "epoch": 0.07945, "grad_norm": 15.133467674255371, "learning_rate": 4.649292929292929e-06, "loss": 7.060230255126953, "step": 7945 }, { "epoch": 0.0795, "grad_norm": 9.536593437194824, "learning_rate": 4.649040404040405e-06, "loss": 7.099029541015625, "step": 7950 }, { "epoch": 0.07955, "grad_norm": 14.419134140014648, "learning_rate": 4.648787878787879e-06, "loss": 7.063055419921875, "step": 7955 }, { "epoch": 0.0796, "grad_norm": 12.392709732055664, "learning_rate": 4.648535353535354e-06, "loss": 7.126834106445313, "step": 7960 }, { "epoch": 0.07965, "grad_norm": 9.54094409942627, "learning_rate": 4.6482828282828285e-06, "loss": 7.254203033447266, "step": 7965 }, { "epoch": 0.0797, "grad_norm": 16.68684196472168, "learning_rate": 4.648030303030304e-06, "loss": 7.121336364746094, "step": 7970 }, { "epoch": 0.07975, "grad_norm": 11.42245864868164, "learning_rate": 4.647777777777779e-06, "loss": 7.037115478515625, "step": 7975 }, { "epoch": 0.0798, "grad_norm": 13.26809310913086, "learning_rate": 4.647525252525253e-06, "loss": 7.074868011474609, "step": 7980 }, { "epoch": 0.07985, "grad_norm": 11.588258743286133, "learning_rate": 4.647272727272728e-06, "loss": 7.089334106445312, "step": 7985 }, { "epoch": 0.0799, "grad_norm": 10.668017387390137, "learning_rate": 4.6470202020202025e-06, "loss": 7.104509735107422, "step": 7990 }, { "epoch": 0.07995, "grad_norm": 12.000020027160645, "learning_rate": 4.646767676767677e-06, "loss": 7.081193542480468, "step": 7995 }, { "epoch": 0.08, "grad_norm": 11.83779239654541, "learning_rate": 4.646515151515152e-06, "loss": 7.043772125244141, "step": 8000 }, { "epoch": 0.08005, "grad_norm": 14.02678394317627, "learning_rate": 4.646262626262626e-06, "loss": 7.094363403320313, "step": 8005 }, { "epoch": 0.0801, "grad_norm": 24.539628982543945, "learning_rate": 4.646010101010102e-06, "loss": 8.424229431152344, "step": 8010 }, { "epoch": 0.08015, "grad_norm": 14.3515043258667, "learning_rate": 4.6457575757575765e-06, "loss": 7.0869697570800785, "step": 8015 }, { "epoch": 0.0802, "grad_norm": 12.146468162536621, "learning_rate": 4.645505050505051e-06, "loss": 7.100263977050782, "step": 8020 }, { "epoch": 0.08025, "grad_norm": 18.88066291809082, "learning_rate": 4.645252525252526e-06, "loss": 7.138927459716797, "step": 8025 }, { "epoch": 0.0803, "grad_norm": 12.326152801513672, "learning_rate": 4.645e-06, "loss": 7.106169891357422, "step": 8030 }, { "epoch": 0.08035, "grad_norm": 9.931388854980469, "learning_rate": 4.644747474747475e-06, "loss": 7.004976654052735, "step": 8035 }, { "epoch": 0.0804, "grad_norm": 16.568296432495117, "learning_rate": 4.64449494949495e-06, "loss": 7.0824951171875, "step": 8040 }, { "epoch": 0.08045, "grad_norm": 13.476337432861328, "learning_rate": 4.644242424242424e-06, "loss": 7.181253814697266, "step": 8045 }, { "epoch": 0.0805, "grad_norm": 12.9782133102417, "learning_rate": 4.6439898989899e-06, "loss": 7.055184936523437, "step": 8050 }, { "epoch": 0.08055, "grad_norm": 14.04794979095459, "learning_rate": 4.643737373737374e-06, "loss": 7.13084716796875, "step": 8055 }, { "epoch": 0.0806, "grad_norm": 9.37443733215332, "learning_rate": 4.643484848484849e-06, "loss": 6.871781921386718, "step": 8060 }, { "epoch": 0.08065, "grad_norm": 14.330850601196289, "learning_rate": 4.643232323232324e-06, "loss": 7.1192878723144535, "step": 8065 }, { "epoch": 0.0807, "grad_norm": 11.549955368041992, "learning_rate": 4.642979797979798e-06, "loss": 7.082984161376953, "step": 8070 }, { "epoch": 0.08075, "grad_norm": 11.674830436706543, "learning_rate": 4.642727272727273e-06, "loss": 7.123143005371094, "step": 8075 }, { "epoch": 0.0808, "grad_norm": 11.005705833435059, "learning_rate": 4.6424747474747476e-06, "loss": 7.055329132080078, "step": 8080 }, { "epoch": 0.08085, "grad_norm": 8.209931373596191, "learning_rate": 4.642222222222222e-06, "loss": 7.036091613769531, "step": 8085 }, { "epoch": 0.0809, "grad_norm": 21.43467903137207, "learning_rate": 4.641969696969698e-06, "loss": 7.09717025756836, "step": 8090 }, { "epoch": 0.08095, "grad_norm": 9.144308090209961, "learning_rate": 4.641717171717172e-06, "loss": 7.1012016296386715, "step": 8095 }, { "epoch": 0.081, "grad_norm": 13.185691833496094, "learning_rate": 4.641464646464647e-06, "loss": 7.128607177734375, "step": 8100 }, { "epoch": 0.08105, "grad_norm": 10.808123588562012, "learning_rate": 4.6412121212121216e-06, "loss": 7.0835113525390625, "step": 8105 }, { "epoch": 0.0811, "grad_norm": 10.47093677520752, "learning_rate": 4.640959595959596e-06, "loss": 7.1025390625, "step": 8110 }, { "epoch": 0.08115, "grad_norm": 15.302653312683105, "learning_rate": 4.640707070707071e-06, "loss": 7.0933174133300785, "step": 8115 }, { "epoch": 0.0812, "grad_norm": 11.155282020568848, "learning_rate": 4.6404545454545455e-06, "loss": 7.1060646057128904, "step": 8120 }, { "epoch": 0.08125, "grad_norm": 15.085137367248535, "learning_rate": 4.64020202020202e-06, "loss": 7.0690155029296875, "step": 8125 }, { "epoch": 0.0813, "grad_norm": 12.334951400756836, "learning_rate": 4.6399494949494956e-06, "loss": 7.1135414123535154, "step": 8130 }, { "epoch": 0.08135, "grad_norm": 10.982105255126953, "learning_rate": 4.63969696969697e-06, "loss": 7.105940246582032, "step": 8135 }, { "epoch": 0.0814, "grad_norm": 10.359132766723633, "learning_rate": 4.639444444444445e-06, "loss": 7.087940216064453, "step": 8140 }, { "epoch": 0.08145, "grad_norm": 17.37613868713379, "learning_rate": 4.6391919191919195e-06, "loss": 7.126576995849609, "step": 8145 }, { "epoch": 0.0815, "grad_norm": 9.434455871582031, "learning_rate": 4.638939393939395e-06, "loss": 7.20416259765625, "step": 8150 }, { "epoch": 0.08155, "grad_norm": 20.89838218688965, "learning_rate": 4.638686868686869e-06, "loss": 7.030848693847656, "step": 8155 }, { "epoch": 0.0816, "grad_norm": 9.592358589172363, "learning_rate": 4.638434343434343e-06, "loss": 6.986296081542969, "step": 8160 }, { "epoch": 0.08165, "grad_norm": 13.986207008361816, "learning_rate": 4.638181818181818e-06, "loss": 7.147360229492188, "step": 8165 }, { "epoch": 0.0817, "grad_norm": 21.99589729309082, "learning_rate": 4.6379292929292935e-06, "loss": 7.119601440429688, "step": 8170 }, { "epoch": 0.08175, "grad_norm": 11.296833038330078, "learning_rate": 4.637676767676768e-06, "loss": 7.092938995361328, "step": 8175 }, { "epoch": 0.0818, "grad_norm": 15.336142539978027, "learning_rate": 4.637424242424243e-06, "loss": 7.033964538574219, "step": 8180 }, { "epoch": 0.08185, "grad_norm": 16.62677574157715, "learning_rate": 4.637171717171717e-06, "loss": 7.1276802062988285, "step": 8185 }, { "epoch": 0.0819, "grad_norm": 9.059091567993164, "learning_rate": 4.636919191919193e-06, "loss": 7.172547912597656, "step": 8190 }, { "epoch": 0.08195, "grad_norm": 10.213953971862793, "learning_rate": 4.6366666666666674e-06, "loss": 7.069002532958985, "step": 8195 }, { "epoch": 0.082, "grad_norm": 14.678264617919922, "learning_rate": 4.636414141414142e-06, "loss": 7.077615356445312, "step": 8200 }, { "epoch": 0.08205, "grad_norm": 10.599089622497559, "learning_rate": 4.636161616161617e-06, "loss": 7.242488861083984, "step": 8205 }, { "epoch": 0.0821, "grad_norm": 15.295231819152832, "learning_rate": 4.635909090909091e-06, "loss": 7.056734466552735, "step": 8210 }, { "epoch": 0.08215, "grad_norm": 12.000826835632324, "learning_rate": 4.635656565656566e-06, "loss": 7.083193969726563, "step": 8215 }, { "epoch": 0.0822, "grad_norm": 7.757503509521484, "learning_rate": 4.635404040404041e-06, "loss": 7.031202697753907, "step": 8220 }, { "epoch": 0.08225, "grad_norm": 17.5812931060791, "learning_rate": 4.635151515151515e-06, "loss": 7.143338012695312, "step": 8225 }, { "epoch": 0.0823, "grad_norm": 16.34551239013672, "learning_rate": 4.634898989898991e-06, "loss": 7.045295715332031, "step": 8230 }, { "epoch": 0.08235, "grad_norm": 11.77586555480957, "learning_rate": 4.634646464646465e-06, "loss": 7.075539398193359, "step": 8235 }, { "epoch": 0.0824, "grad_norm": 13.300480842590332, "learning_rate": 4.63439393939394e-06, "loss": 7.063401794433593, "step": 8240 }, { "epoch": 0.08245, "grad_norm": 12.902743339538574, "learning_rate": 4.634141414141415e-06, "loss": 7.05254898071289, "step": 8245 }, { "epoch": 0.0825, "grad_norm": 9.859414100646973, "learning_rate": 4.633888888888889e-06, "loss": 7.032901000976563, "step": 8250 }, { "epoch": 0.08255, "grad_norm": 12.820615768432617, "learning_rate": 4.633636363636364e-06, "loss": 7.048063659667969, "step": 8255 }, { "epoch": 0.0826, "grad_norm": 9.728899002075195, "learning_rate": 4.6333838383838385e-06, "loss": 7.093550109863282, "step": 8260 }, { "epoch": 0.08265, "grad_norm": 9.958762168884277, "learning_rate": 4.633131313131313e-06, "loss": 7.067665863037109, "step": 8265 }, { "epoch": 0.0827, "grad_norm": 10.99690055847168, "learning_rate": 4.632878787878789e-06, "loss": 7.009818267822266, "step": 8270 }, { "epoch": 0.08275, "grad_norm": 11.927619934082031, "learning_rate": 4.632626262626263e-06, "loss": 7.041599273681641, "step": 8275 }, { "epoch": 0.0828, "grad_norm": 9.628263473510742, "learning_rate": 4.632373737373738e-06, "loss": 7.078289031982422, "step": 8280 }, { "epoch": 0.08285, "grad_norm": 14.6926851272583, "learning_rate": 4.6321212121212125e-06, "loss": 7.024065399169922, "step": 8285 }, { "epoch": 0.0829, "grad_norm": 12.629443168640137, "learning_rate": 4.631868686868687e-06, "loss": 7.053324127197266, "step": 8290 }, { "epoch": 0.08295, "grad_norm": 11.662714958190918, "learning_rate": 4.631616161616162e-06, "loss": 7.0044410705566404, "step": 8295 }, { "epoch": 0.083, "grad_norm": 7.714929580688477, "learning_rate": 4.631363636363636e-06, "loss": 6.870118713378906, "step": 8300 }, { "epoch": 0.08305, "grad_norm": 10.27112865447998, "learning_rate": 4.631111111111111e-06, "loss": 7.033052825927735, "step": 8305 }, { "epoch": 0.0831, "grad_norm": 9.160842895507812, "learning_rate": 4.6308585858585865e-06, "loss": 7.031269836425781, "step": 8310 }, { "epoch": 0.08315, "grad_norm": 15.955057144165039, "learning_rate": 4.630606060606061e-06, "loss": 7.070072937011719, "step": 8315 }, { "epoch": 0.0832, "grad_norm": 12.464604377746582, "learning_rate": 4.630353535353536e-06, "loss": 7.147128295898438, "step": 8320 }, { "epoch": 0.08325, "grad_norm": 12.384041786193848, "learning_rate": 4.63010101010101e-06, "loss": 7.00308837890625, "step": 8325 }, { "epoch": 0.0833, "grad_norm": 13.385063171386719, "learning_rate": 4.629848484848485e-06, "loss": 7.026906585693359, "step": 8330 }, { "epoch": 0.08335, "grad_norm": 14.736897468566895, "learning_rate": 4.62959595959596e-06, "loss": 7.082118225097656, "step": 8335 }, { "epoch": 0.0834, "grad_norm": 11.926923751831055, "learning_rate": 4.629343434343434e-06, "loss": 7.188260650634765, "step": 8340 }, { "epoch": 0.08345, "grad_norm": 9.965205192565918, "learning_rate": 4.62909090909091e-06, "loss": 7.025453186035156, "step": 8345 }, { "epoch": 0.0835, "grad_norm": 9.626023292541504, "learning_rate": 4.628838383838384e-06, "loss": 7.035075378417969, "step": 8350 }, { "epoch": 0.08355, "grad_norm": 8.821133613586426, "learning_rate": 4.628585858585859e-06, "loss": 7.155526733398437, "step": 8355 }, { "epoch": 0.0836, "grad_norm": 14.514681816101074, "learning_rate": 4.628333333333334e-06, "loss": 7.0042869567871096, "step": 8360 }, { "epoch": 0.08365, "grad_norm": 11.676492691040039, "learning_rate": 4.628080808080809e-06, "loss": 7.0094963073730465, "step": 8365 }, { "epoch": 0.0837, "grad_norm": 10.778614044189453, "learning_rate": 4.627828282828284e-06, "loss": 7.0627601623535154, "step": 8370 }, { "epoch": 0.08375, "grad_norm": 12.599152565002441, "learning_rate": 4.6275757575757575e-06, "loss": 7.020698547363281, "step": 8375 }, { "epoch": 0.0838, "grad_norm": 9.851097106933594, "learning_rate": 4.627323232323232e-06, "loss": 7.087709045410156, "step": 8380 }, { "epoch": 0.08385, "grad_norm": 14.637107849121094, "learning_rate": 4.627070707070708e-06, "loss": 7.035470581054687, "step": 8385 }, { "epoch": 0.0839, "grad_norm": 12.347566604614258, "learning_rate": 4.626818181818182e-06, "loss": 7.039939117431641, "step": 8390 }, { "epoch": 0.08395, "grad_norm": 10.541629791259766, "learning_rate": 4.626565656565657e-06, "loss": 7.028584289550781, "step": 8395 }, { "epoch": 0.084, "grad_norm": 13.734600067138672, "learning_rate": 4.6263131313131315e-06, "loss": 7.049537658691406, "step": 8400 }, { "epoch": 0.08405, "grad_norm": 11.209863662719727, "learning_rate": 4.626060606060607e-06, "loss": 7.05194320678711, "step": 8405 }, { "epoch": 0.0841, "grad_norm": 10.31779956817627, "learning_rate": 4.625808080808082e-06, "loss": 7.061079406738282, "step": 8410 }, { "epoch": 0.08415, "grad_norm": 15.373397827148438, "learning_rate": 4.625555555555556e-06, "loss": 6.995402526855469, "step": 8415 }, { "epoch": 0.0842, "grad_norm": 11.791592597961426, "learning_rate": 4.625303030303031e-06, "loss": 6.927073669433594, "step": 8420 }, { "epoch": 0.08425, "grad_norm": 10.075303077697754, "learning_rate": 4.6250505050505055e-06, "loss": 7.008555603027344, "step": 8425 }, { "epoch": 0.0843, "grad_norm": 12.547207832336426, "learning_rate": 4.62479797979798e-06, "loss": 7.027137756347656, "step": 8430 }, { "epoch": 0.08435, "grad_norm": 8.915721893310547, "learning_rate": 4.624545454545455e-06, "loss": 7.090331268310547, "step": 8435 }, { "epoch": 0.0844, "grad_norm": 16.654361724853516, "learning_rate": 4.624292929292929e-06, "loss": 7.087652587890625, "step": 8440 }, { "epoch": 0.08445, "grad_norm": 9.517629623413086, "learning_rate": 4.624040404040405e-06, "loss": 7.050588226318359, "step": 8445 }, { "epoch": 0.0845, "grad_norm": 11.81865119934082, "learning_rate": 4.6237878787878795e-06, "loss": 7.065787506103516, "step": 8450 }, { "epoch": 0.08455, "grad_norm": 13.40146255493164, "learning_rate": 4.623535353535354e-06, "loss": 7.019773864746094, "step": 8455 }, { "epoch": 0.0846, "grad_norm": 12.018403053283691, "learning_rate": 4.623282828282829e-06, "loss": 7.063872528076172, "step": 8460 }, { "epoch": 0.08465, "grad_norm": 12.831067085266113, "learning_rate": 4.623030303030303e-06, "loss": 7.096615600585937, "step": 8465 }, { "epoch": 0.0847, "grad_norm": 11.050119400024414, "learning_rate": 4.622777777777778e-06, "loss": 7.031575775146484, "step": 8470 }, { "epoch": 0.08475, "grad_norm": 9.614187240600586, "learning_rate": 4.622525252525253e-06, "loss": 6.996747589111328, "step": 8475 }, { "epoch": 0.0848, "grad_norm": 10.220989227294922, "learning_rate": 4.622272727272727e-06, "loss": 6.9593017578125, "step": 8480 }, { "epoch": 0.08485, "grad_norm": 10.00649642944336, "learning_rate": 4.622020202020203e-06, "loss": 7.0481422424316404, "step": 8485 }, { "epoch": 0.0849, "grad_norm": 20.84701919555664, "learning_rate": 4.621767676767677e-06, "loss": 7.049224853515625, "step": 8490 }, { "epoch": 0.08495, "grad_norm": 10.286824226379395, "learning_rate": 4.621515151515152e-06, "loss": 7.0858924865722654, "step": 8495 }, { "epoch": 0.085, "grad_norm": 13.741623878479004, "learning_rate": 4.621262626262627e-06, "loss": 7.028961181640625, "step": 8500 }, { "epoch": 0.08505, "grad_norm": 11.78243350982666, "learning_rate": 4.621010101010101e-06, "loss": 7.044227600097656, "step": 8505 }, { "epoch": 0.0851, "grad_norm": 13.78781509399414, "learning_rate": 4.620757575757576e-06, "loss": 7.0345306396484375, "step": 8510 }, { "epoch": 0.08515, "grad_norm": 12.163993835449219, "learning_rate": 4.6205050505050506e-06, "loss": 6.972857666015625, "step": 8515 }, { "epoch": 0.0852, "grad_norm": 12.0376615524292, "learning_rate": 4.620252525252525e-06, "loss": 6.994396209716797, "step": 8520 }, { "epoch": 0.08525, "grad_norm": 10.434892654418945, "learning_rate": 4.620000000000001e-06, "loss": 7.038103485107422, "step": 8525 }, { "epoch": 0.0853, "grad_norm": 10.116866111755371, "learning_rate": 4.619747474747475e-06, "loss": 6.94311752319336, "step": 8530 }, { "epoch": 0.08535, "grad_norm": 9.699699401855469, "learning_rate": 4.61949494949495e-06, "loss": 7.006007385253906, "step": 8535 }, { "epoch": 0.0854, "grad_norm": 9.601655006408691, "learning_rate": 4.6192424242424245e-06, "loss": 7.000652313232422, "step": 8540 }, { "epoch": 0.08545, "grad_norm": 12.33677864074707, "learning_rate": 4.618989898989899e-06, "loss": 6.891995239257812, "step": 8545 }, { "epoch": 0.0855, "grad_norm": 19.955005645751953, "learning_rate": 4.618737373737374e-06, "loss": 7.055172729492187, "step": 8550 }, { "epoch": 0.08555, "grad_norm": 13.735508918762207, "learning_rate": 4.6184848484848484e-06, "loss": 7.084560394287109, "step": 8555 }, { "epoch": 0.0856, "grad_norm": 10.539268493652344, "learning_rate": 4.618232323232323e-06, "loss": 6.977877807617188, "step": 8560 }, { "epoch": 0.08565, "grad_norm": 14.302754402160645, "learning_rate": 4.6179797979797985e-06, "loss": 7.095244598388672, "step": 8565 }, { "epoch": 0.0857, "grad_norm": 13.66472339630127, "learning_rate": 4.617727272727273e-06, "loss": 6.9892738342285154, "step": 8570 }, { "epoch": 0.08575, "grad_norm": 12.020188331604004, "learning_rate": 4.617474747474748e-06, "loss": 7.022763061523437, "step": 8575 }, { "epoch": 0.0858, "grad_norm": 11.358796119689941, "learning_rate": 4.6172222222222224e-06, "loss": 7.033963012695312, "step": 8580 }, { "epoch": 0.08585, "grad_norm": 7.480474948883057, "learning_rate": 4.616969696969698e-06, "loss": 7.015220642089844, "step": 8585 }, { "epoch": 0.0859, "grad_norm": 12.167500495910645, "learning_rate": 4.6167171717171725e-06, "loss": 7.014617919921875, "step": 8590 }, { "epoch": 0.08595, "grad_norm": 10.982778549194336, "learning_rate": 4.616464646464647e-06, "loss": 7.052307891845703, "step": 8595 }, { "epoch": 0.086, "grad_norm": 12.00369930267334, "learning_rate": 4.616212121212121e-06, "loss": 7.015129852294922, "step": 8600 }, { "epoch": 0.08605, "grad_norm": 12.80974006652832, "learning_rate": 4.6159595959595964e-06, "loss": 6.9491127014160154, "step": 8605 }, { "epoch": 0.0861, "grad_norm": 12.741839408874512, "learning_rate": 4.615707070707071e-06, "loss": 7.027971649169922, "step": 8610 }, { "epoch": 0.08615, "grad_norm": 9.436436653137207, "learning_rate": 4.615454545454546e-06, "loss": 6.988047027587891, "step": 8615 }, { "epoch": 0.0862, "grad_norm": 12.871213912963867, "learning_rate": 4.61520202020202e-06, "loss": 6.937876892089844, "step": 8620 }, { "epoch": 0.08625, "grad_norm": 17.28755760192871, "learning_rate": 4.614949494949496e-06, "loss": 7.053642272949219, "step": 8625 }, { "epoch": 0.0863, "grad_norm": 11.331725120544434, "learning_rate": 4.6146969696969704e-06, "loss": 6.957086181640625, "step": 8630 }, { "epoch": 0.08635, "grad_norm": 8.5512056350708, "learning_rate": 4.614444444444445e-06, "loss": 7.029003143310547, "step": 8635 }, { "epoch": 0.0864, "grad_norm": 11.16249942779541, "learning_rate": 4.61419191919192e-06, "loss": 7.063818359375, "step": 8640 }, { "epoch": 0.08645, "grad_norm": 16.802413940429688, "learning_rate": 4.613939393939394e-06, "loss": 6.833795166015625, "step": 8645 }, { "epoch": 0.0865, "grad_norm": 8.72449779510498, "learning_rate": 4.613686868686869e-06, "loss": 7.0414070129394535, "step": 8650 }, { "epoch": 0.08655, "grad_norm": 10.646709442138672, "learning_rate": 4.613434343434344e-06, "loss": 6.961878967285156, "step": 8655 }, { "epoch": 0.0866, "grad_norm": 10.272910118103027, "learning_rate": 4.613181818181818e-06, "loss": 6.999033355712891, "step": 8660 }, { "epoch": 0.08665, "grad_norm": 12.035758018493652, "learning_rate": 4.612929292929294e-06, "loss": 7.09505615234375, "step": 8665 }, { "epoch": 0.0867, "grad_norm": 11.226907730102539, "learning_rate": 4.612676767676768e-06, "loss": 7.1431427001953125, "step": 8670 }, { "epoch": 0.08675, "grad_norm": 12.158656120300293, "learning_rate": 4.612424242424243e-06, "loss": 7.011770629882813, "step": 8675 }, { "epoch": 0.0868, "grad_norm": 9.374982833862305, "learning_rate": 4.612171717171718e-06, "loss": 7.028767395019531, "step": 8680 }, { "epoch": 0.08685, "grad_norm": 14.920166969299316, "learning_rate": 4.611919191919192e-06, "loss": 7.011065673828125, "step": 8685 }, { "epoch": 0.0869, "grad_norm": 10.49083137512207, "learning_rate": 4.611666666666667e-06, "loss": 6.979369354248047, "step": 8690 }, { "epoch": 0.08695, "grad_norm": 19.360553741455078, "learning_rate": 4.6114141414141415e-06, "loss": 7.246497344970703, "step": 8695 }, { "epoch": 0.087, "grad_norm": 11.048300743103027, "learning_rate": 4.611161616161616e-06, "loss": 7.017113494873047, "step": 8700 }, { "epoch": 0.08705, "grad_norm": 8.130722999572754, "learning_rate": 4.610909090909092e-06, "loss": 6.965589904785157, "step": 8705 }, { "epoch": 0.0871, "grad_norm": 8.920684814453125, "learning_rate": 4.610656565656566e-06, "loss": 6.925331115722656, "step": 8710 }, { "epoch": 0.08715, "grad_norm": 11.635099411010742, "learning_rate": 4.610404040404041e-06, "loss": 6.987557983398437, "step": 8715 }, { "epoch": 0.0872, "grad_norm": 7.673483848571777, "learning_rate": 4.6101515151515155e-06, "loss": 6.932005310058594, "step": 8720 }, { "epoch": 0.08725, "grad_norm": 20.464824676513672, "learning_rate": 4.60989898989899e-06, "loss": 7.084622955322265, "step": 8725 }, { "epoch": 0.0873, "grad_norm": 14.617021560668945, "learning_rate": 4.609646464646465e-06, "loss": 6.958221435546875, "step": 8730 }, { "epoch": 0.08735, "grad_norm": 12.8867826461792, "learning_rate": 4.609393939393939e-06, "loss": 7.004589080810547, "step": 8735 }, { "epoch": 0.0874, "grad_norm": 11.662595748901367, "learning_rate": 4.609141414141414e-06, "loss": 6.996710968017578, "step": 8740 }, { "epoch": 0.08745, "grad_norm": 14.271039962768555, "learning_rate": 4.6088888888888895e-06, "loss": 7.0122825622558596, "step": 8745 }, { "epoch": 0.0875, "grad_norm": 11.641202926635742, "learning_rate": 4.608636363636364e-06, "loss": 7.097258758544922, "step": 8750 }, { "epoch": 0.08755, "grad_norm": 9.237883567810059, "learning_rate": 4.608383838383839e-06, "loss": 7.103736877441406, "step": 8755 }, { "epoch": 0.0876, "grad_norm": 8.978830337524414, "learning_rate": 4.608131313131313e-06, "loss": 6.994572448730469, "step": 8760 }, { "epoch": 0.08765, "grad_norm": 10.96242618560791, "learning_rate": 4.607878787878788e-06, "loss": 6.993846893310547, "step": 8765 }, { "epoch": 0.0877, "grad_norm": 11.615701675415039, "learning_rate": 4.607626262626263e-06, "loss": 7.036064910888672, "step": 8770 }, { "epoch": 0.08775, "grad_norm": 11.131321907043457, "learning_rate": 4.607373737373737e-06, "loss": 6.988642883300781, "step": 8775 }, { "epoch": 0.0878, "grad_norm": 9.342042922973633, "learning_rate": 4.607121212121213e-06, "loss": 7.0060371398925785, "step": 8780 }, { "epoch": 0.08785, "grad_norm": 11.590174674987793, "learning_rate": 4.606868686868687e-06, "loss": 6.9704345703125, "step": 8785 }, { "epoch": 0.0879, "grad_norm": 12.118947982788086, "learning_rate": 4.606616161616162e-06, "loss": 6.983943939208984, "step": 8790 }, { "epoch": 0.08795, "grad_norm": 9.74455738067627, "learning_rate": 4.606363636363637e-06, "loss": 6.968035125732422, "step": 8795 }, { "epoch": 0.088, "grad_norm": 9.964595794677734, "learning_rate": 4.606111111111112e-06, "loss": 6.9922607421875, "step": 8800 }, { "epoch": 0.08805, "grad_norm": 11.390090942382812, "learning_rate": 4.605858585858587e-06, "loss": 7.004647064208984, "step": 8805 }, { "epoch": 0.0881, "grad_norm": 11.85561752319336, "learning_rate": 4.605606060606061e-06, "loss": 6.978875732421875, "step": 8810 }, { "epoch": 0.08815, "grad_norm": 9.307329177856445, "learning_rate": 4.605353535353536e-06, "loss": 6.982460021972656, "step": 8815 }, { "epoch": 0.0882, "grad_norm": 11.975251197814941, "learning_rate": 4.605101010101011e-06, "loss": 6.924207305908203, "step": 8820 }, { "epoch": 0.08825, "grad_norm": 9.708422660827637, "learning_rate": 4.604848484848485e-06, "loss": 6.911203765869141, "step": 8825 }, { "epoch": 0.0883, "grad_norm": 11.741756439208984, "learning_rate": 4.60459595959596e-06, "loss": 6.991139984130859, "step": 8830 }, { "epoch": 0.08835, "grad_norm": 9.989776611328125, "learning_rate": 4.6043434343434345e-06, "loss": 7.0111236572265625, "step": 8835 }, { "epoch": 0.0884, "grad_norm": 12.123878479003906, "learning_rate": 4.60409090909091e-06, "loss": 6.962850189208984, "step": 8840 }, { "epoch": 0.08845, "grad_norm": 15.166815757751465, "learning_rate": 4.603838383838385e-06, "loss": 6.863700103759766, "step": 8845 }, { "epoch": 0.0885, "grad_norm": 11.956164360046387, "learning_rate": 4.603585858585859e-06, "loss": 7.100125885009765, "step": 8850 }, { "epoch": 0.08855, "grad_norm": 14.975172996520996, "learning_rate": 4.603333333333334e-06, "loss": 6.856311798095703, "step": 8855 }, { "epoch": 0.0886, "grad_norm": 11.481337547302246, "learning_rate": 4.6030808080808085e-06, "loss": 7.0222923278808596, "step": 8860 }, { "epoch": 0.08865, "grad_norm": 13.606314659118652, "learning_rate": 4.602828282828283e-06, "loss": 6.964949035644532, "step": 8865 }, { "epoch": 0.0887, "grad_norm": 11.051919937133789, "learning_rate": 4.602575757575758e-06, "loss": 6.992784118652343, "step": 8870 }, { "epoch": 0.08875, "grad_norm": 11.961569786071777, "learning_rate": 4.602323232323232e-06, "loss": 6.939424896240235, "step": 8875 }, { "epoch": 0.0888, "grad_norm": 12.348455429077148, "learning_rate": 4.602070707070708e-06, "loss": 6.963307952880859, "step": 8880 }, { "epoch": 0.08885, "grad_norm": 10.943476676940918, "learning_rate": 4.6018181818181825e-06, "loss": 7.011309051513672, "step": 8885 }, { "epoch": 0.0889, "grad_norm": 10.19846248626709, "learning_rate": 4.601565656565657e-06, "loss": 6.96124267578125, "step": 8890 }, { "epoch": 0.08895, "grad_norm": 9.726397514343262, "learning_rate": 4.601313131313132e-06, "loss": 7.024695587158203, "step": 8895 }, { "epoch": 0.089, "grad_norm": 7.165463447570801, "learning_rate": 4.601060606060606e-06, "loss": 7.011265563964844, "step": 8900 }, { "epoch": 0.08905, "grad_norm": 8.293192863464355, "learning_rate": 4.600808080808081e-06, "loss": 7.055661010742187, "step": 8905 }, { "epoch": 0.0891, "grad_norm": 12.67971134185791, "learning_rate": 4.600555555555556e-06, "loss": 7.008597564697266, "step": 8910 }, { "epoch": 0.08915, "grad_norm": 11.505000114440918, "learning_rate": 4.60030303030303e-06, "loss": 7.03277587890625, "step": 8915 }, { "epoch": 0.0892, "grad_norm": 13.057467460632324, "learning_rate": 4.600050505050506e-06, "loss": 7.0151115417480465, "step": 8920 }, { "epoch": 0.08925, "grad_norm": 8.910481452941895, "learning_rate": 4.59979797979798e-06, "loss": 7.027153015136719, "step": 8925 }, { "epoch": 0.0893, "grad_norm": 14.007189750671387, "learning_rate": 4.599545454545455e-06, "loss": 6.961451721191406, "step": 8930 }, { "epoch": 0.08935, "grad_norm": 9.628375053405762, "learning_rate": 4.59929292929293e-06, "loss": 6.9426826477050785, "step": 8935 }, { "epoch": 0.0894, "grad_norm": 13.70006275177002, "learning_rate": 4.599040404040404e-06, "loss": 6.968820953369141, "step": 8940 }, { "epoch": 0.08945, "grad_norm": 8.891744613647461, "learning_rate": 4.598787878787879e-06, "loss": 6.9783882141113285, "step": 8945 }, { "epoch": 0.0895, "grad_norm": 11.104719161987305, "learning_rate": 4.5985353535353535e-06, "loss": 6.952440643310547, "step": 8950 }, { "epoch": 0.08955, "grad_norm": 10.762900352478027, "learning_rate": 4.598282828282828e-06, "loss": 6.8633575439453125, "step": 8955 }, { "epoch": 0.0896, "grad_norm": 10.40408992767334, "learning_rate": 4.598030303030304e-06, "loss": 7.01196517944336, "step": 8960 }, { "epoch": 0.08965, "grad_norm": 10.966178894042969, "learning_rate": 4.597777777777778e-06, "loss": 6.940557861328125, "step": 8965 }, { "epoch": 0.0897, "grad_norm": 18.44215965270996, "learning_rate": 4.597525252525253e-06, "loss": 7.021847534179687, "step": 8970 }, { "epoch": 0.08975, "grad_norm": 12.66018009185791, "learning_rate": 4.5972727272727275e-06, "loss": 6.924337005615234, "step": 8975 }, { "epoch": 0.0898, "grad_norm": 11.37463665008545, "learning_rate": 4.597020202020203e-06, "loss": 6.976477813720703, "step": 8980 }, { "epoch": 0.08985, "grad_norm": 14.634991645812988, "learning_rate": 4.596767676767677e-06, "loss": 6.973858642578125, "step": 8985 }, { "epoch": 0.0899, "grad_norm": 9.247393608093262, "learning_rate": 4.5965151515151514e-06, "loss": 6.95599365234375, "step": 8990 }, { "epoch": 0.08995, "grad_norm": 10.670845985412598, "learning_rate": 4.596262626262626e-06, "loss": 6.918495941162109, "step": 8995 }, { "epoch": 0.09, "grad_norm": 7.9189772605896, "learning_rate": 4.5960101010101015e-06, "loss": 6.9922119140625, "step": 9000 }, { "epoch": 0.09005, "grad_norm": 8.94133186340332, "learning_rate": 4.595757575757576e-06, "loss": 7.0550285339355465, "step": 9005 }, { "epoch": 0.0901, "grad_norm": 9.91049575805664, "learning_rate": 4.595505050505051e-06, "loss": 6.917829895019532, "step": 9010 }, { "epoch": 0.09015, "grad_norm": 11.096470832824707, "learning_rate": 4.595252525252525e-06, "loss": 6.979194641113281, "step": 9015 }, { "epoch": 0.0902, "grad_norm": 9.518370628356934, "learning_rate": 4.595000000000001e-06, "loss": 6.965219116210937, "step": 9020 }, { "epoch": 0.09025, "grad_norm": 11.135085105895996, "learning_rate": 4.5947474747474755e-06, "loss": 6.952619934082032, "step": 9025 }, { "epoch": 0.0903, "grad_norm": 13.166329383850098, "learning_rate": 4.59449494949495e-06, "loss": 6.98123779296875, "step": 9030 }, { "epoch": 0.09035, "grad_norm": 10.624238014221191, "learning_rate": 4.594242424242425e-06, "loss": 6.928783416748047, "step": 9035 }, { "epoch": 0.0904, "grad_norm": 9.604555130004883, "learning_rate": 4.593989898989899e-06, "loss": 6.973194122314453, "step": 9040 }, { "epoch": 0.09045, "grad_norm": 8.17077922821045, "learning_rate": 4.593737373737374e-06, "loss": 6.997917938232422, "step": 9045 }, { "epoch": 0.0905, "grad_norm": 13.936361312866211, "learning_rate": 4.593484848484849e-06, "loss": 6.939244079589844, "step": 9050 }, { "epoch": 0.09055, "grad_norm": 10.350641250610352, "learning_rate": 4.593232323232323e-06, "loss": 6.972708129882813, "step": 9055 }, { "epoch": 0.0906, "grad_norm": 9.984773635864258, "learning_rate": 4.592979797979799e-06, "loss": 6.933958435058594, "step": 9060 }, { "epoch": 0.09065, "grad_norm": 10.395051002502441, "learning_rate": 4.592727272727273e-06, "loss": 6.982639312744141, "step": 9065 }, { "epoch": 0.0907, "grad_norm": 8.824694633483887, "learning_rate": 4.592474747474748e-06, "loss": 6.934629821777344, "step": 9070 }, { "epoch": 0.09075, "grad_norm": 11.539315223693848, "learning_rate": 4.592222222222223e-06, "loss": 6.957093811035156, "step": 9075 }, { "epoch": 0.0908, "grad_norm": 10.770848274230957, "learning_rate": 4.591969696969697e-06, "loss": 6.976052856445312, "step": 9080 }, { "epoch": 0.09085, "grad_norm": 11.850400924682617, "learning_rate": 4.591717171717172e-06, "loss": 6.942958068847656, "step": 9085 }, { "epoch": 0.0909, "grad_norm": 9.05173397064209, "learning_rate": 4.5914646464646466e-06, "loss": 7.011375427246094, "step": 9090 }, { "epoch": 0.09095, "grad_norm": 10.652250289916992, "learning_rate": 4.591212121212121e-06, "loss": 7.116246032714844, "step": 9095 }, { "epoch": 0.091, "grad_norm": 9.078658103942871, "learning_rate": 4.590959595959597e-06, "loss": 6.991673278808594, "step": 9100 }, { "epoch": 0.09105, "grad_norm": 10.937932014465332, "learning_rate": 4.590707070707071e-06, "loss": 6.984273529052734, "step": 9105 }, { "epoch": 0.0911, "grad_norm": 13.966568946838379, "learning_rate": 4.590454545454546e-06, "loss": 6.920789337158203, "step": 9110 }, { "epoch": 0.09115, "grad_norm": 11.54513168334961, "learning_rate": 4.5902020202020206e-06, "loss": 6.975831604003906, "step": 9115 }, { "epoch": 0.0912, "grad_norm": 8.723725318908691, "learning_rate": 4.589949494949495e-06, "loss": 7.014624786376953, "step": 9120 }, { "epoch": 0.09125, "grad_norm": 13.653387069702148, "learning_rate": 4.58969696969697e-06, "loss": 6.994258880615234, "step": 9125 }, { "epoch": 0.0913, "grad_norm": 11.781432151794434, "learning_rate": 4.5894444444444445e-06, "loss": 6.959329223632812, "step": 9130 }, { "epoch": 0.09135, "grad_norm": 10.753767967224121, "learning_rate": 4.589191919191919e-06, "loss": 6.955526733398438, "step": 9135 }, { "epoch": 0.0914, "grad_norm": 10.890110969543457, "learning_rate": 4.5889393939393946e-06, "loss": 6.910725402832031, "step": 9140 }, { "epoch": 0.09145, "grad_norm": 11.002923965454102, "learning_rate": 4.588686868686869e-06, "loss": 6.893228149414062, "step": 9145 }, { "epoch": 0.0915, "grad_norm": 10.365764617919922, "learning_rate": 4.588434343434344e-06, "loss": 7.003184509277344, "step": 9150 }, { "epoch": 0.09155, "grad_norm": 10.126511573791504, "learning_rate": 4.5881818181818185e-06, "loss": 6.934009552001953, "step": 9155 }, { "epoch": 0.0916, "grad_norm": 10.26491641998291, "learning_rate": 4.587929292929293e-06, "loss": 6.962069702148438, "step": 9160 }, { "epoch": 0.09165, "grad_norm": 10.374563217163086, "learning_rate": 4.587676767676768e-06, "loss": 6.962880706787109, "step": 9165 }, { "epoch": 0.0917, "grad_norm": 10.785993576049805, "learning_rate": 4.587424242424242e-06, "loss": 6.959246063232422, "step": 9170 }, { "epoch": 0.09175, "grad_norm": 9.757068634033203, "learning_rate": 4.587171717171717e-06, "loss": 7.039235687255859, "step": 9175 }, { "epoch": 0.0918, "grad_norm": 8.046100616455078, "learning_rate": 4.5869191919191925e-06, "loss": 6.943524169921875, "step": 9180 }, { "epoch": 0.09185, "grad_norm": 10.259284019470215, "learning_rate": 4.586666666666667e-06, "loss": 6.936454772949219, "step": 9185 }, { "epoch": 0.0919, "grad_norm": 11.514897346496582, "learning_rate": 4.586414141414142e-06, "loss": 6.928460693359375, "step": 9190 }, { "epoch": 0.09195, "grad_norm": 13.671236991882324, "learning_rate": 4.586161616161617e-06, "loss": 6.947331237792969, "step": 9195 }, { "epoch": 0.092, "grad_norm": 11.320274353027344, "learning_rate": 4.585909090909092e-06, "loss": 6.916675567626953, "step": 9200 }, { "epoch": 0.09205, "grad_norm": 10.631510734558105, "learning_rate": 4.5856565656565664e-06, "loss": 6.928887939453125, "step": 9205 }, { "epoch": 0.0921, "grad_norm": 10.629972457885742, "learning_rate": 4.58540404040404e-06, "loss": 6.923471069335937, "step": 9210 }, { "epoch": 0.09215, "grad_norm": 10.900455474853516, "learning_rate": 4.585151515151516e-06, "loss": 6.903469085693359, "step": 9215 }, { "epoch": 0.0922, "grad_norm": 10.217141151428223, "learning_rate": 4.58489898989899e-06, "loss": 7.020825958251953, "step": 9220 }, { "epoch": 0.09225, "grad_norm": 9.752537727355957, "learning_rate": 4.584646464646465e-06, "loss": 6.92403564453125, "step": 9225 }, { "epoch": 0.0923, "grad_norm": 13.075019836425781, "learning_rate": 4.58439393939394e-06, "loss": 6.928868103027344, "step": 9230 }, { "epoch": 0.09235, "grad_norm": 10.174188613891602, "learning_rate": 4.584141414141415e-06, "loss": 6.875286865234375, "step": 9235 }, { "epoch": 0.0924, "grad_norm": 9.03266429901123, "learning_rate": 4.58388888888889e-06, "loss": 6.9792022705078125, "step": 9240 }, { "epoch": 0.09245, "grad_norm": 9.093369483947754, "learning_rate": 4.583636363636364e-06, "loss": 7.003121185302734, "step": 9245 }, { "epoch": 0.0925, "grad_norm": 11.434791564941406, "learning_rate": 4.583383838383839e-06, "loss": 6.937904357910156, "step": 9250 }, { "epoch": 0.09255, "grad_norm": 9.684001922607422, "learning_rate": 4.583131313131314e-06, "loss": 6.935957336425782, "step": 9255 }, { "epoch": 0.0926, "grad_norm": 8.995768547058105, "learning_rate": 4.582878787878788e-06, "loss": 6.950570678710937, "step": 9260 }, { "epoch": 0.09265, "grad_norm": 8.236930847167969, "learning_rate": 4.582626262626263e-06, "loss": 6.948359680175781, "step": 9265 }, { "epoch": 0.0927, "grad_norm": 12.418370246887207, "learning_rate": 4.5823737373737375e-06, "loss": 6.944721221923828, "step": 9270 }, { "epoch": 0.09275, "grad_norm": 11.27679443359375, "learning_rate": 4.582121212121213e-06, "loss": 6.905613708496094, "step": 9275 }, { "epoch": 0.0928, "grad_norm": 10.900823593139648, "learning_rate": 4.581868686868688e-06, "loss": 6.949418640136718, "step": 9280 }, { "epoch": 0.09285, "grad_norm": 8.835074424743652, "learning_rate": 4.581616161616162e-06, "loss": 6.8901519775390625, "step": 9285 }, { "epoch": 0.0929, "grad_norm": 9.159908294677734, "learning_rate": 4.581363636363637e-06, "loss": 6.886199951171875, "step": 9290 }, { "epoch": 0.09295, "grad_norm": 10.648245811462402, "learning_rate": 4.5811111111111115e-06, "loss": 6.915488433837891, "step": 9295 }, { "epoch": 0.093, "grad_norm": 10.79227352142334, "learning_rate": 4.580858585858586e-06, "loss": 6.9617362976074215, "step": 9300 }, { "epoch": 0.09305, "grad_norm": 10.129059791564941, "learning_rate": 4.580606060606061e-06, "loss": 6.932711791992188, "step": 9305 }, { "epoch": 0.0931, "grad_norm": 10.591060638427734, "learning_rate": 4.580353535353535e-06, "loss": 7.020489501953125, "step": 9310 }, { "epoch": 0.09315, "grad_norm": 10.75668716430664, "learning_rate": 4.580101010101011e-06, "loss": 6.946543884277344, "step": 9315 }, { "epoch": 0.0932, "grad_norm": 8.404521942138672, "learning_rate": 4.5798484848484855e-06, "loss": 6.734163665771485, "step": 9320 }, { "epoch": 0.09325, "grad_norm": 12.331934928894043, "learning_rate": 4.57959595959596e-06, "loss": 6.9598640441894535, "step": 9325 }, { "epoch": 0.0933, "grad_norm": 10.616157531738281, "learning_rate": 4.579343434343435e-06, "loss": 6.981166839599609, "step": 9330 }, { "epoch": 0.09335, "grad_norm": 8.098773956298828, "learning_rate": 4.579090909090909e-06, "loss": 6.920321655273438, "step": 9335 }, { "epoch": 0.0934, "grad_norm": 8.003122329711914, "learning_rate": 4.578838383838384e-06, "loss": 6.926268005371094, "step": 9340 }, { "epoch": 0.09345, "grad_norm": 9.338400840759277, "learning_rate": 4.578585858585859e-06, "loss": 6.924126434326172, "step": 9345 }, { "epoch": 0.0935, "grad_norm": 9.561031341552734, "learning_rate": 4.578333333333333e-06, "loss": 6.9043830871582035, "step": 9350 }, { "epoch": 0.09355, "grad_norm": 8.847929000854492, "learning_rate": 4.578080808080809e-06, "loss": 6.934476470947265, "step": 9355 }, { "epoch": 0.0936, "grad_norm": 11.018107414245605, "learning_rate": 4.577828282828283e-06, "loss": 6.912567138671875, "step": 9360 }, { "epoch": 0.09365, "grad_norm": 12.286124229431152, "learning_rate": 4.577575757575758e-06, "loss": 6.90674057006836, "step": 9365 }, { "epoch": 0.0937, "grad_norm": 9.53593921661377, "learning_rate": 4.577323232323233e-06, "loss": 6.938062286376953, "step": 9370 }, { "epoch": 0.09375, "grad_norm": 13.270442962646484, "learning_rate": 4.577070707070707e-06, "loss": 6.963661956787109, "step": 9375 }, { "epoch": 0.0938, "grad_norm": 7.627886772155762, "learning_rate": 4.576818181818182e-06, "loss": 6.8824462890625, "step": 9380 }, { "epoch": 0.09385, "grad_norm": 10.083731651306152, "learning_rate": 4.5765656565656565e-06, "loss": 6.905934143066406, "step": 9385 }, { "epoch": 0.0939, "grad_norm": 9.277826309204102, "learning_rate": 4.576313131313131e-06, "loss": 6.882180023193359, "step": 9390 }, { "epoch": 0.09395, "grad_norm": 9.169386863708496, "learning_rate": 4.576060606060607e-06, "loss": 6.879779052734375, "step": 9395 }, { "epoch": 0.094, "grad_norm": 8.970600128173828, "learning_rate": 4.575808080808081e-06, "loss": 6.8739784240722654, "step": 9400 }, { "epoch": 0.09405, "grad_norm": 9.843295097351074, "learning_rate": 4.575555555555556e-06, "loss": 6.920269775390625, "step": 9405 }, { "epoch": 0.0941, "grad_norm": 10.052227973937988, "learning_rate": 4.5753030303030305e-06, "loss": 6.88603515625, "step": 9410 }, { "epoch": 0.09415, "grad_norm": 9.529752731323242, "learning_rate": 4.575050505050506e-06, "loss": 6.899557495117188, "step": 9415 }, { "epoch": 0.0942, "grad_norm": 9.85244083404541, "learning_rate": 4.574797979797981e-06, "loss": 6.889659881591797, "step": 9420 }, { "epoch": 0.09425, "grad_norm": 9.844796180725098, "learning_rate": 4.574545454545455e-06, "loss": 6.885385894775391, "step": 9425 }, { "epoch": 0.0943, "grad_norm": 19.329103469848633, "learning_rate": 4.574292929292929e-06, "loss": 7.0341644287109375, "step": 9430 }, { "epoch": 0.09435, "grad_norm": 6.379838943481445, "learning_rate": 4.5740404040404045e-06, "loss": 7.062014770507813, "step": 9435 }, { "epoch": 0.0944, "grad_norm": 10.539299964904785, "learning_rate": 4.573787878787879e-06, "loss": 7.116558074951172, "step": 9440 }, { "epoch": 0.09445, "grad_norm": 10.959689140319824, "learning_rate": 4.573535353535354e-06, "loss": 6.879178619384765, "step": 9445 }, { "epoch": 0.0945, "grad_norm": 8.679367065429688, "learning_rate": 4.573282828282828e-06, "loss": 6.847767639160156, "step": 9450 }, { "epoch": 0.09455, "grad_norm": 10.037178039550781, "learning_rate": 4.573030303030304e-06, "loss": 6.9302215576171875, "step": 9455 }, { "epoch": 0.0946, "grad_norm": 10.333447456359863, "learning_rate": 4.5727777777777785e-06, "loss": 6.91085205078125, "step": 9460 }, { "epoch": 0.09465, "grad_norm": 10.519474983215332, "learning_rate": 4.572525252525253e-06, "loss": 6.936357879638672, "step": 9465 }, { "epoch": 0.0947, "grad_norm": 8.598783493041992, "learning_rate": 4.572272727272728e-06, "loss": 6.864706420898438, "step": 9470 }, { "epoch": 0.09475, "grad_norm": 9.154701232910156, "learning_rate": 4.572020202020202e-06, "loss": 6.924003601074219, "step": 9475 }, { "epoch": 0.0948, "grad_norm": 9.662890434265137, "learning_rate": 4.571767676767677e-06, "loss": 6.868508911132812, "step": 9480 }, { "epoch": 0.09485, "grad_norm": 10.95689582824707, "learning_rate": 4.571515151515152e-06, "loss": 6.900152587890625, "step": 9485 }, { "epoch": 0.0949, "grad_norm": 11.14007568359375, "learning_rate": 4.571262626262626e-06, "loss": 7.045454406738282, "step": 9490 }, { "epoch": 0.09495, "grad_norm": 12.99349594116211, "learning_rate": 4.571010101010102e-06, "loss": 6.91607894897461, "step": 9495 }, { "epoch": 0.095, "grad_norm": 11.451969146728516, "learning_rate": 4.570757575757576e-06, "loss": 6.909648132324219, "step": 9500 }, { "epoch": 0.09505, "grad_norm": 9.418721199035645, "learning_rate": 4.570505050505051e-06, "loss": 6.9107810974121096, "step": 9505 }, { "epoch": 0.0951, "grad_norm": 8.88818359375, "learning_rate": 4.570252525252526e-06, "loss": 6.82100830078125, "step": 9510 }, { "epoch": 0.09515, "grad_norm": 8.626935005187988, "learning_rate": 4.57e-06, "loss": 6.906971740722656, "step": 9515 }, { "epoch": 0.0952, "grad_norm": 10.17044734954834, "learning_rate": 4.569747474747475e-06, "loss": 6.85956039428711, "step": 9520 }, { "epoch": 0.09525, "grad_norm": 9.495651245117188, "learning_rate": 4.5694949494949496e-06, "loss": 6.917705535888672, "step": 9525 }, { "epoch": 0.0953, "grad_norm": 12.615290641784668, "learning_rate": 4.569242424242424e-06, "loss": 7.044325256347657, "step": 9530 }, { "epoch": 0.09535, "grad_norm": 8.915230751037598, "learning_rate": 4.5689898989899e-06, "loss": 6.706251525878907, "step": 9535 }, { "epoch": 0.0954, "grad_norm": 10.297006607055664, "learning_rate": 4.568737373737374e-06, "loss": 6.885462951660156, "step": 9540 }, { "epoch": 0.09545, "grad_norm": 11.408024787902832, "learning_rate": 4.568484848484849e-06, "loss": 6.920161437988281, "step": 9545 }, { "epoch": 0.0955, "grad_norm": 7.775322437286377, "learning_rate": 4.5682323232323235e-06, "loss": 6.895552062988282, "step": 9550 }, { "epoch": 0.09555, "grad_norm": 12.66250228881836, "learning_rate": 4.567979797979798e-06, "loss": 6.8563385009765625, "step": 9555 }, { "epoch": 0.0956, "grad_norm": 8.786809921264648, "learning_rate": 4.567727272727273e-06, "loss": 6.949114990234375, "step": 9560 }, { "epoch": 0.09565, "grad_norm": 8.999879837036133, "learning_rate": 4.5674747474747474e-06, "loss": 6.850476837158203, "step": 9565 }, { "epoch": 0.0957, "grad_norm": 23.752296447753906, "learning_rate": 4.567222222222222e-06, "loss": 6.957427215576172, "step": 9570 }, { "epoch": 0.09575, "grad_norm": 10.494589805603027, "learning_rate": 4.5669696969696975e-06, "loss": 6.92352066040039, "step": 9575 }, { "epoch": 0.0958, "grad_norm": 10.032999992370605, "learning_rate": 4.566717171717172e-06, "loss": 6.927436828613281, "step": 9580 }, { "epoch": 0.09585, "grad_norm": 12.235912322998047, "learning_rate": 4.566464646464647e-06, "loss": 6.879893493652344, "step": 9585 }, { "epoch": 0.0959, "grad_norm": 8.25540828704834, "learning_rate": 4.5662121212121214e-06, "loss": 6.868061828613281, "step": 9590 }, { "epoch": 0.09595, "grad_norm": 7.315032482147217, "learning_rate": 4.565959595959596e-06, "loss": 6.866780090332031, "step": 9595 }, { "epoch": 0.096, "grad_norm": 10.719766616821289, "learning_rate": 4.565707070707071e-06, "loss": 6.861705780029297, "step": 9600 }, { "epoch": 0.09605, "grad_norm": 9.926324844360352, "learning_rate": 4.565454545454545e-06, "loss": 6.895835113525391, "step": 9605 }, { "epoch": 0.0961, "grad_norm": 9.650550842285156, "learning_rate": 4.56520202020202e-06, "loss": 6.881475830078125, "step": 9610 }, { "epoch": 0.09615, "grad_norm": 12.24812126159668, "learning_rate": 4.5649494949494954e-06, "loss": 6.87098388671875, "step": 9615 }, { "epoch": 0.0962, "grad_norm": 12.944478988647461, "learning_rate": 4.56469696969697e-06, "loss": 6.970864868164062, "step": 9620 }, { "epoch": 0.09625, "grad_norm": 9.495046615600586, "learning_rate": 4.564444444444445e-06, "loss": 6.873198699951172, "step": 9625 }, { "epoch": 0.0963, "grad_norm": 9.247722625732422, "learning_rate": 4.56419191919192e-06, "loss": 6.891322326660156, "step": 9630 }, { "epoch": 0.09635, "grad_norm": 7.756556034088135, "learning_rate": 4.563939393939395e-06, "loss": 6.904844665527344, "step": 9635 }, { "epoch": 0.0964, "grad_norm": 10.247211456298828, "learning_rate": 4.5636868686868694e-06, "loss": 6.933567810058594, "step": 9640 }, { "epoch": 0.09645, "grad_norm": 8.406776428222656, "learning_rate": 4.563434343434344e-06, "loss": 6.89685287475586, "step": 9645 }, { "epoch": 0.0965, "grad_norm": 10.155279159545898, "learning_rate": 4.563181818181819e-06, "loss": 6.858266448974609, "step": 9650 }, { "epoch": 0.09655, "grad_norm": 8.941664695739746, "learning_rate": 4.562929292929293e-06, "loss": 6.974774169921875, "step": 9655 }, { "epoch": 0.0966, "grad_norm": 9.457550048828125, "learning_rate": 4.562676767676768e-06, "loss": 6.886007690429688, "step": 9660 }, { "epoch": 0.09665, "grad_norm": 10.139677047729492, "learning_rate": 4.562424242424243e-06, "loss": 6.9136817932128904, "step": 9665 }, { "epoch": 0.0967, "grad_norm": 8.930248260498047, "learning_rate": 4.562171717171718e-06, "loss": 6.865504455566406, "step": 9670 }, { "epoch": 0.09675, "grad_norm": 8.662388801574707, "learning_rate": 4.561919191919193e-06, "loss": 6.865299987792969, "step": 9675 }, { "epoch": 0.0968, "grad_norm": 10.52776050567627, "learning_rate": 4.561666666666667e-06, "loss": 6.872999572753907, "step": 9680 }, { "epoch": 0.09685, "grad_norm": 8.371925354003906, "learning_rate": 4.561414141414142e-06, "loss": 6.8964790344238285, "step": 9685 }, { "epoch": 0.0969, "grad_norm": 14.037760734558105, "learning_rate": 4.561161616161617e-06, "loss": 6.859334564208984, "step": 9690 }, { "epoch": 0.09695, "grad_norm": 9.19921588897705, "learning_rate": 4.560909090909091e-06, "loss": 6.8322593688964846, "step": 9695 }, { "epoch": 0.097, "grad_norm": 8.956448554992676, "learning_rate": 4.560656565656566e-06, "loss": 6.868887329101563, "step": 9700 }, { "epoch": 0.09705, "grad_norm": 8.358606338500977, "learning_rate": 4.5604040404040405e-06, "loss": 6.889315032958985, "step": 9705 }, { "epoch": 0.0971, "grad_norm": 12.055523872375488, "learning_rate": 4.560151515151516e-06, "loss": 6.8727775573730465, "step": 9710 }, { "epoch": 0.09715, "grad_norm": 7.986931324005127, "learning_rate": 4.559898989898991e-06, "loss": 6.921642303466797, "step": 9715 }, { "epoch": 0.0972, "grad_norm": 9.531756401062012, "learning_rate": 4.559646464646465e-06, "loss": 6.854464721679688, "step": 9720 }, { "epoch": 0.09725, "grad_norm": 9.118194580078125, "learning_rate": 4.55939393939394e-06, "loss": 6.859737396240234, "step": 9725 }, { "epoch": 0.0973, "grad_norm": 8.401805877685547, "learning_rate": 4.5591414141414145e-06, "loss": 6.920757293701172, "step": 9730 }, { "epoch": 0.09735, "grad_norm": 9.299088478088379, "learning_rate": 4.558888888888889e-06, "loss": 6.9095619201660154, "step": 9735 }, { "epoch": 0.0974, "grad_norm": 12.430475234985352, "learning_rate": 4.558636363636364e-06, "loss": 6.851628112792969, "step": 9740 }, { "epoch": 0.09745, "grad_norm": 11.760705947875977, "learning_rate": 4.558383838383838e-06, "loss": 6.8301841735839846, "step": 9745 }, { "epoch": 0.0975, "grad_norm": 10.439096450805664, "learning_rate": 4.558131313131314e-06, "loss": 6.885912322998047, "step": 9750 }, { "epoch": 0.09755, "grad_norm": 11.126880645751953, "learning_rate": 4.5578787878787885e-06, "loss": 6.894627380371094, "step": 9755 }, { "epoch": 0.0976, "grad_norm": 7.515928268432617, "learning_rate": 4.557626262626263e-06, "loss": 6.8644966125488285, "step": 9760 }, { "epoch": 0.09765, "grad_norm": 8.86186408996582, "learning_rate": 4.557373737373738e-06, "loss": 6.863015747070312, "step": 9765 }, { "epoch": 0.0977, "grad_norm": 6.545344829559326, "learning_rate": 4.557121212121212e-06, "loss": 6.894357299804687, "step": 9770 }, { "epoch": 0.09775, "grad_norm": 12.361340522766113, "learning_rate": 4.556868686868687e-06, "loss": 6.91864013671875, "step": 9775 }, { "epoch": 0.0978, "grad_norm": 7.701848030090332, "learning_rate": 4.556616161616162e-06, "loss": 6.920528411865234, "step": 9780 }, { "epoch": 0.09785, "grad_norm": 14.436676979064941, "learning_rate": 4.556363636363636e-06, "loss": 6.8511810302734375, "step": 9785 }, { "epoch": 0.0979, "grad_norm": 6.7326202392578125, "learning_rate": 4.556111111111112e-06, "loss": 6.935173797607422, "step": 9790 }, { "epoch": 0.09795, "grad_norm": 10.577225685119629, "learning_rate": 4.555858585858586e-06, "loss": 6.885600280761719, "step": 9795 }, { "epoch": 0.098, "grad_norm": 10.066238403320312, "learning_rate": 4.555606060606061e-06, "loss": 8.671671295166016, "step": 9800 }, { "epoch": 0.09805, "grad_norm": 9.856620788574219, "learning_rate": 4.555353535353536e-06, "loss": 6.822401428222657, "step": 9805 }, { "epoch": 0.0981, "grad_norm": 12.700478553771973, "learning_rate": 4.555101010101011e-06, "loss": 6.898032379150391, "step": 9810 }, { "epoch": 0.09815, "grad_norm": 11.200740814208984, "learning_rate": 4.554848484848485e-06, "loss": 6.967012023925781, "step": 9815 }, { "epoch": 0.0982, "grad_norm": 11.26723575592041, "learning_rate": 4.5545959595959595e-06, "loss": 7.166909790039062, "step": 9820 }, { "epoch": 0.09825, "grad_norm": 8.22861385345459, "learning_rate": 4.554343434343434e-06, "loss": 6.907566070556641, "step": 9825 }, { "epoch": 0.0983, "grad_norm": 9.92597770690918, "learning_rate": 4.55409090909091e-06, "loss": 6.855133819580078, "step": 9830 }, { "epoch": 0.09835, "grad_norm": 11.476505279541016, "learning_rate": 4.553838383838384e-06, "loss": 6.869831848144531, "step": 9835 }, { "epoch": 0.0984, "grad_norm": 10.88169002532959, "learning_rate": 4.553585858585859e-06, "loss": 6.8975067138671875, "step": 9840 }, { "epoch": 0.09845, "grad_norm": 9.419955253601074, "learning_rate": 4.5533333333333335e-06, "loss": 6.888085174560547, "step": 9845 }, { "epoch": 0.0985, "grad_norm": 9.955551147460938, "learning_rate": 4.553080808080809e-06, "loss": 6.844129180908203, "step": 9850 }, { "epoch": 0.09855, "grad_norm": 8.792481422424316, "learning_rate": 4.552828282828284e-06, "loss": 6.820993804931641, "step": 9855 }, { "epoch": 0.0986, "grad_norm": 10.666019439697266, "learning_rate": 4.552575757575758e-06, "loss": 6.8444679260253904, "step": 9860 }, { "epoch": 0.09865, "grad_norm": 7.528670310974121, "learning_rate": 4.552323232323233e-06, "loss": 6.826388549804688, "step": 9865 }, { "epoch": 0.0987, "grad_norm": 10.20127010345459, "learning_rate": 4.5520707070707075e-06, "loss": 6.848408508300781, "step": 9870 }, { "epoch": 0.09875, "grad_norm": 6.641772747039795, "learning_rate": 4.551818181818182e-06, "loss": 6.847401428222656, "step": 9875 }, { "epoch": 0.0988, "grad_norm": 10.119627952575684, "learning_rate": 4.551565656565657e-06, "loss": 6.818115234375, "step": 9880 }, { "epoch": 0.09885, "grad_norm": 9.074414253234863, "learning_rate": 4.551313131313131e-06, "loss": 6.8143974304199215, "step": 9885 }, { "epoch": 0.0989, "grad_norm": 7.922760963439941, "learning_rate": 4.551060606060607e-06, "loss": 6.882887268066407, "step": 9890 }, { "epoch": 0.09895, "grad_norm": 12.476951599121094, "learning_rate": 4.5508080808080815e-06, "loss": 6.962390899658203, "step": 9895 }, { "epoch": 0.099, "grad_norm": 9.530937194824219, "learning_rate": 4.550555555555556e-06, "loss": 6.871343994140625, "step": 9900 }, { "epoch": 0.09905, "grad_norm": 8.332584381103516, "learning_rate": 4.550303030303031e-06, "loss": 6.860165405273437, "step": 9905 }, { "epoch": 0.0991, "grad_norm": 8.640633583068848, "learning_rate": 4.550050505050505e-06, "loss": 6.845024871826172, "step": 9910 }, { "epoch": 0.09915, "grad_norm": 8.826701164245605, "learning_rate": 4.54979797979798e-06, "loss": 6.860028839111328, "step": 9915 }, { "epoch": 0.0992, "grad_norm": 9.358051300048828, "learning_rate": 4.549545454545455e-06, "loss": 6.811455535888672, "step": 9920 }, { "epoch": 0.09925, "grad_norm": 8.784806251525879, "learning_rate": 4.549292929292929e-06, "loss": 6.855756378173828, "step": 9925 }, { "epoch": 0.0993, "grad_norm": 9.548070907592773, "learning_rate": 4.549040404040405e-06, "loss": 6.7995750427246096, "step": 9930 }, { "epoch": 0.09935, "grad_norm": 11.46341323852539, "learning_rate": 4.548787878787879e-06, "loss": 6.8029937744140625, "step": 9935 }, { "epoch": 0.0994, "grad_norm": 12.053924560546875, "learning_rate": 4.548535353535354e-06, "loss": 7.0472358703613285, "step": 9940 }, { "epoch": 0.09945, "grad_norm": 8.524255752563477, "learning_rate": 4.548282828282829e-06, "loss": 6.925016784667969, "step": 9945 }, { "epoch": 0.0995, "grad_norm": 7.0819478034973145, "learning_rate": 4.548030303030303e-06, "loss": 6.835140228271484, "step": 9950 }, { "epoch": 0.09955, "grad_norm": 9.256433486938477, "learning_rate": 4.547777777777778e-06, "loss": 6.823681640625, "step": 9955 }, { "epoch": 0.0996, "grad_norm": 10.68978214263916, "learning_rate": 4.5475252525252525e-06, "loss": 6.788855743408203, "step": 9960 }, { "epoch": 0.09965, "grad_norm": 9.126503944396973, "learning_rate": 4.547272727272727e-06, "loss": 6.838918304443359, "step": 9965 }, { "epoch": 0.0997, "grad_norm": 8.970675468444824, "learning_rate": 4.547020202020203e-06, "loss": 6.8585662841796875, "step": 9970 }, { "epoch": 0.09975, "grad_norm": 11.285412788391113, "learning_rate": 4.546767676767677e-06, "loss": 6.888837432861328, "step": 9975 }, { "epoch": 0.0998, "grad_norm": 11.419379234313965, "learning_rate": 4.546515151515152e-06, "loss": 6.8709465026855465, "step": 9980 }, { "epoch": 0.09985, "grad_norm": 10.288447380065918, "learning_rate": 4.5462626262626265e-06, "loss": 6.842796325683594, "step": 9985 }, { "epoch": 0.0999, "grad_norm": 8.651625633239746, "learning_rate": 4.546010101010101e-06, "loss": 6.834380340576172, "step": 9990 }, { "epoch": 0.09995, "grad_norm": 10.310647010803223, "learning_rate": 4.545757575757576e-06, "loss": 6.8765869140625, "step": 9995 }, { "epoch": 0.1, "grad_norm": 10.164254188537598, "learning_rate": 4.54550505050505e-06, "loss": 6.8682411193847654, "step": 10000 }, { "epoch": 0.10005, "grad_norm": 8.20759391784668, "learning_rate": 4.545252525252525e-06, "loss": 6.8823486328125, "step": 10005 }, { "epoch": 0.1001, "grad_norm": 7.979193210601807, "learning_rate": 4.5450000000000005e-06, "loss": 6.873066711425781, "step": 10010 }, { "epoch": 0.10015, "grad_norm": 11.778112411499023, "learning_rate": 4.544747474747475e-06, "loss": 7.219378662109375, "step": 10015 }, { "epoch": 0.1002, "grad_norm": 9.67769718170166, "learning_rate": 4.54449494949495e-06, "loss": 6.841419219970703, "step": 10020 }, { "epoch": 0.10025, "grad_norm": 7.152667045593262, "learning_rate": 4.544242424242424e-06, "loss": 6.632102203369141, "step": 10025 }, { "epoch": 0.1003, "grad_norm": 10.01180362701416, "learning_rate": 4.5439898989899e-06, "loss": 6.8050079345703125, "step": 10030 }, { "epoch": 0.10035, "grad_norm": 11.481300354003906, "learning_rate": 4.5437373737373745e-06, "loss": 6.903094482421875, "step": 10035 }, { "epoch": 0.1004, "grad_norm": 8.824755668640137, "learning_rate": 4.543484848484848e-06, "loss": 6.838265228271484, "step": 10040 }, { "epoch": 0.10045, "grad_norm": 8.62960147857666, "learning_rate": 4.543232323232323e-06, "loss": 6.78216323852539, "step": 10045 }, { "epoch": 0.1005, "grad_norm": 15.218633651733398, "learning_rate": 4.542979797979798e-06, "loss": 6.906192016601563, "step": 10050 }, { "epoch": 0.10055, "grad_norm": 10.21317195892334, "learning_rate": 4.542727272727273e-06, "loss": 6.922993469238281, "step": 10055 }, { "epoch": 0.1006, "grad_norm": 6.65881872177124, "learning_rate": 4.542474747474748e-06, "loss": 6.848235321044922, "step": 10060 }, { "epoch": 0.10065, "grad_norm": 8.290840148925781, "learning_rate": 4.542222222222223e-06, "loss": 6.841778564453125, "step": 10065 }, { "epoch": 0.1007, "grad_norm": 10.090519905090332, "learning_rate": 4.541969696969698e-06, "loss": 6.83782730102539, "step": 10070 }, { "epoch": 0.10075, "grad_norm": 8.216289520263672, "learning_rate": 4.541717171717172e-06, "loss": 6.8315483093261715, "step": 10075 }, { "epoch": 0.1008, "grad_norm": 8.74929428100586, "learning_rate": 4.541464646464647e-06, "loss": 6.8113555908203125, "step": 10080 }, { "epoch": 0.10085, "grad_norm": 8.636686325073242, "learning_rate": 4.541212121212122e-06, "loss": 6.838672637939453, "step": 10085 }, { "epoch": 0.1009, "grad_norm": 10.863844871520996, "learning_rate": 4.540959595959596e-06, "loss": 6.825121307373047, "step": 10090 }, { "epoch": 0.10095, "grad_norm": 8.985540390014648, "learning_rate": 4.540707070707071e-06, "loss": 6.800166320800781, "step": 10095 }, { "epoch": 0.101, "grad_norm": 7.546262264251709, "learning_rate": 4.5404545454545456e-06, "loss": 6.852406311035156, "step": 10100 }, { "epoch": 0.10105, "grad_norm": 7.637396335601807, "learning_rate": 4.540202020202021e-06, "loss": 6.767535400390625, "step": 10105 }, { "epoch": 0.1011, "grad_norm": 9.734448432922363, "learning_rate": 4.539949494949496e-06, "loss": 6.862636566162109, "step": 10110 }, { "epoch": 0.10115, "grad_norm": 9.72131061553955, "learning_rate": 4.53969696969697e-06, "loss": 6.822965240478515, "step": 10115 }, { "epoch": 0.1012, "grad_norm": 10.105347633361816, "learning_rate": 4.539444444444445e-06, "loss": 6.8082527160644535, "step": 10120 }, { "epoch": 0.10125, "grad_norm": 11.716263771057129, "learning_rate": 4.5391919191919196e-06, "loss": 6.774243927001953, "step": 10125 }, { "epoch": 0.1013, "grad_norm": 7.398629188537598, "learning_rate": 4.538939393939394e-06, "loss": 6.777813720703125, "step": 10130 }, { "epoch": 0.10135, "grad_norm": 10.922369003295898, "learning_rate": 4.538686868686869e-06, "loss": 7.022344970703125, "step": 10135 }, { "epoch": 0.1014, "grad_norm": 7.588442802429199, "learning_rate": 4.5384343434343435e-06, "loss": 6.841729736328125, "step": 10140 }, { "epoch": 0.10145, "grad_norm": 9.721165657043457, "learning_rate": 4.538181818181819e-06, "loss": 6.817334747314453, "step": 10145 }, { "epoch": 0.1015, "grad_norm": 9.496757507324219, "learning_rate": 4.5379292929292936e-06, "loss": 6.8150169372558596, "step": 10150 }, { "epoch": 0.10155, "grad_norm": 10.186479568481445, "learning_rate": 4.537676767676768e-06, "loss": 6.827941131591797, "step": 10155 }, { "epoch": 0.1016, "grad_norm": 6.250960826873779, "learning_rate": 4.537424242424243e-06, "loss": 6.821821594238282, "step": 10160 }, { "epoch": 0.10165, "grad_norm": 6.086466312408447, "learning_rate": 4.5371717171717175e-06, "loss": 7.0148979187011715, "step": 10165 }, { "epoch": 0.1017, "grad_norm": 9.325428009033203, "learning_rate": 4.536919191919192e-06, "loss": 6.814901733398438, "step": 10170 }, { "epoch": 0.10175, "grad_norm": 9.550406455993652, "learning_rate": 4.536666666666667e-06, "loss": 6.834889984130859, "step": 10175 }, { "epoch": 0.1018, "grad_norm": 9.295167922973633, "learning_rate": 4.536414141414141e-06, "loss": 6.838574981689453, "step": 10180 }, { "epoch": 0.10185, "grad_norm": 7.881017208099365, "learning_rate": 4.536161616161617e-06, "loss": 6.801853179931641, "step": 10185 }, { "epoch": 0.1019, "grad_norm": 9.531781196594238, "learning_rate": 4.5359090909090914e-06, "loss": 6.845233154296875, "step": 10190 }, { "epoch": 0.10195, "grad_norm": 8.697671890258789, "learning_rate": 4.535656565656566e-06, "loss": 6.84248046875, "step": 10195 }, { "epoch": 0.102, "grad_norm": 10.670230865478516, "learning_rate": 4.535404040404041e-06, "loss": 6.8018241882324215, "step": 10200 }, { "epoch": 0.10205, "grad_norm": 7.887033939361572, "learning_rate": 4.535151515151515e-06, "loss": 6.75720443725586, "step": 10205 }, { "epoch": 0.1021, "grad_norm": 11.630199432373047, "learning_rate": 4.53489898989899e-06, "loss": 6.815679931640625, "step": 10210 }, { "epoch": 0.10215, "grad_norm": 7.7320756912231445, "learning_rate": 4.534646464646465e-06, "loss": 6.886563110351562, "step": 10215 }, { "epoch": 0.1022, "grad_norm": 12.093297958374023, "learning_rate": 4.534393939393939e-06, "loss": 6.806790161132812, "step": 10220 }, { "epoch": 0.10225, "grad_norm": 10.053400039672852, "learning_rate": 4.534141414141415e-06, "loss": 6.8018333435058596, "step": 10225 }, { "epoch": 0.1023, "grad_norm": 10.040496826171875, "learning_rate": 4.533888888888889e-06, "loss": 6.8588005065917965, "step": 10230 }, { "epoch": 0.10235, "grad_norm": 9.86600399017334, "learning_rate": 4.533636363636364e-06, "loss": 6.708974456787109, "step": 10235 }, { "epoch": 0.1024, "grad_norm": 14.560491561889648, "learning_rate": 4.533383838383839e-06, "loss": 6.930031585693359, "step": 10240 }, { "epoch": 0.10245, "grad_norm": 7.954652786254883, "learning_rate": 4.533131313131314e-06, "loss": 6.979647064208985, "step": 10245 }, { "epoch": 0.1025, "grad_norm": 9.979862213134766, "learning_rate": 4.532878787878789e-06, "loss": 6.77722396850586, "step": 10250 }, { "epoch": 0.10255, "grad_norm": 7.011960506439209, "learning_rate": 4.532626262626263e-06, "loss": 6.847500610351562, "step": 10255 }, { "epoch": 0.1026, "grad_norm": 8.673044204711914, "learning_rate": 4.532373737373737e-06, "loss": 6.800447845458985, "step": 10260 }, { "epoch": 0.10265, "grad_norm": 9.20903205871582, "learning_rate": 4.532121212121213e-06, "loss": 6.821847534179687, "step": 10265 }, { "epoch": 0.1027, "grad_norm": 26.941720962524414, "learning_rate": 4.531868686868687e-06, "loss": 6.977058410644531, "step": 10270 }, { "epoch": 0.10275, "grad_norm": 9.725756645202637, "learning_rate": 4.531616161616162e-06, "loss": 6.815675354003906, "step": 10275 }, { "epoch": 0.1028, "grad_norm": 8.225324630737305, "learning_rate": 4.5313636363636365e-06, "loss": 6.820429992675781, "step": 10280 }, { "epoch": 0.10285, "grad_norm": 10.728477478027344, "learning_rate": 4.531111111111112e-06, "loss": 6.8285469055175785, "step": 10285 }, { "epoch": 0.1029, "grad_norm": 8.331260681152344, "learning_rate": 4.530858585858587e-06, "loss": 6.963614654541016, "step": 10290 }, { "epoch": 0.10295, "grad_norm": 10.605501174926758, "learning_rate": 4.530606060606061e-06, "loss": 6.832279968261719, "step": 10295 }, { "epoch": 0.103, "grad_norm": 10.63926887512207, "learning_rate": 4.530353535353536e-06, "loss": 6.778367614746093, "step": 10300 }, { "epoch": 0.10305, "grad_norm": 8.650009155273438, "learning_rate": 4.5301010101010105e-06, "loss": 6.814152526855469, "step": 10305 }, { "epoch": 0.1031, "grad_norm": 8.542475700378418, "learning_rate": 4.529848484848485e-06, "loss": 6.782415771484375, "step": 10310 }, { "epoch": 0.10315, "grad_norm": 9.568929672241211, "learning_rate": 4.52959595959596e-06, "loss": 6.882070922851563, "step": 10315 }, { "epoch": 0.1032, "grad_norm": 7.9086127281188965, "learning_rate": 4.529343434343434e-06, "loss": 6.748777770996094, "step": 10320 }, { "epoch": 0.10325, "grad_norm": 7.9676127433776855, "learning_rate": 4.52909090909091e-06, "loss": 6.748429870605468, "step": 10325 }, { "epoch": 0.1033, "grad_norm": 8.259550094604492, "learning_rate": 4.5288383838383845e-06, "loss": 6.801564025878906, "step": 10330 }, { "epoch": 0.10335, "grad_norm": 8.971419334411621, "learning_rate": 4.528585858585859e-06, "loss": 6.801553344726562, "step": 10335 }, { "epoch": 0.1034, "grad_norm": 8.85749626159668, "learning_rate": 4.528333333333334e-06, "loss": 6.8306938171386715, "step": 10340 }, { "epoch": 0.10345, "grad_norm": 6.997933864593506, "learning_rate": 4.528080808080808e-06, "loss": 6.901518249511719, "step": 10345 }, { "epoch": 0.1035, "grad_norm": 10.63986587524414, "learning_rate": 4.527828282828283e-06, "loss": 6.793247985839844, "step": 10350 }, { "epoch": 0.10355, "grad_norm": 9.295430183410645, "learning_rate": 4.527575757575758e-06, "loss": 6.793257141113282, "step": 10355 }, { "epoch": 0.1036, "grad_norm": 9.776360511779785, "learning_rate": 4.527323232323232e-06, "loss": 6.81164321899414, "step": 10360 }, { "epoch": 0.10365, "grad_norm": 10.217482566833496, "learning_rate": 4.527070707070708e-06, "loss": 6.858307647705078, "step": 10365 }, { "epoch": 0.1037, "grad_norm": 7.741351127624512, "learning_rate": 4.526818181818182e-06, "loss": 6.803752136230469, "step": 10370 }, { "epoch": 0.10375, "grad_norm": 11.02016544342041, "learning_rate": 4.526565656565657e-06, "loss": 6.86942138671875, "step": 10375 }, { "epoch": 0.1038, "grad_norm": 10.590338706970215, "learning_rate": 4.526313131313132e-06, "loss": 6.811348724365234, "step": 10380 }, { "epoch": 0.10385, "grad_norm": 8.158662796020508, "learning_rate": 4.526060606060606e-06, "loss": 6.764067077636719, "step": 10385 }, { "epoch": 0.1039, "grad_norm": 7.765247344970703, "learning_rate": 4.525808080808081e-06, "loss": 6.833324432373047, "step": 10390 }, { "epoch": 0.10395, "grad_norm": 8.552026748657227, "learning_rate": 4.5255555555555555e-06, "loss": 6.784595489501953, "step": 10395 }, { "epoch": 0.104, "grad_norm": 7.711726188659668, "learning_rate": 4.52530303030303e-06, "loss": 6.81518325805664, "step": 10400 }, { "epoch": 0.10405, "grad_norm": 8.508021354675293, "learning_rate": 4.525050505050506e-06, "loss": 6.806743621826172, "step": 10405 }, { "epoch": 0.1041, "grad_norm": 9.830904960632324, "learning_rate": 4.52479797979798e-06, "loss": 6.850068664550781, "step": 10410 }, { "epoch": 0.10415, "grad_norm": 9.13292121887207, "learning_rate": 4.524545454545455e-06, "loss": 6.780884552001953, "step": 10415 }, { "epoch": 0.1042, "grad_norm": 7.8246169090271, "learning_rate": 4.5242929292929295e-06, "loss": 6.811627197265625, "step": 10420 }, { "epoch": 0.10425, "grad_norm": 8.385489463806152, "learning_rate": 4.524040404040404e-06, "loss": 6.796039581298828, "step": 10425 }, { "epoch": 0.1043, "grad_norm": 11.130431175231934, "learning_rate": 4.523787878787879e-06, "loss": 6.704118347167968, "step": 10430 }, { "epoch": 0.10435, "grad_norm": 8.08370304107666, "learning_rate": 4.523535353535353e-06, "loss": 6.531423187255859, "step": 10435 }, { "epoch": 0.1044, "grad_norm": 8.021665573120117, "learning_rate": 4.523282828282828e-06, "loss": 6.798973846435547, "step": 10440 }, { "epoch": 0.10445, "grad_norm": 7.57808256149292, "learning_rate": 4.5230303030303035e-06, "loss": 6.76110610961914, "step": 10445 }, { "epoch": 0.1045, "grad_norm": 9.246503829956055, "learning_rate": 4.522777777777778e-06, "loss": 6.827373504638672, "step": 10450 }, { "epoch": 0.10455, "grad_norm": 8.783281326293945, "learning_rate": 4.522525252525253e-06, "loss": 6.799574279785157, "step": 10455 }, { "epoch": 0.1046, "grad_norm": 10.378202438354492, "learning_rate": 4.522272727272727e-06, "loss": 6.8349250793457035, "step": 10460 }, { "epoch": 0.10465, "grad_norm": 7.681434631347656, "learning_rate": 4.522020202020203e-06, "loss": 6.809269714355469, "step": 10465 }, { "epoch": 0.1047, "grad_norm": 7.874427318572998, "learning_rate": 4.5217676767676775e-06, "loss": 6.79495849609375, "step": 10470 }, { "epoch": 0.10475, "grad_norm": 9.04549789428711, "learning_rate": 4.521515151515152e-06, "loss": 6.730134582519531, "step": 10475 }, { "epoch": 0.1048, "grad_norm": 9.927407264709473, "learning_rate": 4.521262626262627e-06, "loss": 6.75667724609375, "step": 10480 }, { "epoch": 0.10485, "grad_norm": 10.67634105682373, "learning_rate": 4.521010101010101e-06, "loss": 6.763334655761719, "step": 10485 }, { "epoch": 0.1049, "grad_norm": 9.389852523803711, "learning_rate": 4.520757575757576e-06, "loss": 6.783106994628906, "step": 10490 }, { "epoch": 0.10495, "grad_norm": 12.468643188476562, "learning_rate": 4.520505050505051e-06, "loss": 6.82467041015625, "step": 10495 }, { "epoch": 0.105, "grad_norm": 9.92530632019043, "learning_rate": 4.520252525252526e-06, "loss": 6.78125228881836, "step": 10500 }, { "epoch": 0.10505, "grad_norm": 9.618744850158691, "learning_rate": 4.520000000000001e-06, "loss": 6.793302154541015, "step": 10505 }, { "epoch": 0.1051, "grad_norm": 9.825407981872559, "learning_rate": 4.519747474747475e-06, "loss": 6.788217926025391, "step": 10510 }, { "epoch": 0.10515, "grad_norm": 6.7132086753845215, "learning_rate": 4.51949494949495e-06, "loss": 6.7343803405761715, "step": 10515 }, { "epoch": 0.1052, "grad_norm": 8.860984802246094, "learning_rate": 4.519242424242425e-06, "loss": 6.818547058105469, "step": 10520 }, { "epoch": 0.10525, "grad_norm": 7.145396709442139, "learning_rate": 4.518989898989899e-06, "loss": 6.805262756347656, "step": 10525 }, { "epoch": 0.1053, "grad_norm": 11.210077285766602, "learning_rate": 4.518737373737374e-06, "loss": 6.789247131347656, "step": 10530 }, { "epoch": 0.10535, "grad_norm": 8.556869506835938, "learning_rate": 4.5184848484848486e-06, "loss": 6.77685775756836, "step": 10535 }, { "epoch": 0.1054, "grad_norm": 12.611933708190918, "learning_rate": 4.518232323232324e-06, "loss": 6.826222229003906, "step": 10540 }, { "epoch": 0.10545, "grad_norm": 6.495969772338867, "learning_rate": 4.517979797979799e-06, "loss": 6.792431640625, "step": 10545 }, { "epoch": 0.1055, "grad_norm": 9.135517120361328, "learning_rate": 4.517727272727273e-06, "loss": 6.8119384765625, "step": 10550 }, { "epoch": 0.10555, "grad_norm": 7.112025737762451, "learning_rate": 4.517474747474748e-06, "loss": 6.7536476135253904, "step": 10555 }, { "epoch": 0.1056, "grad_norm": 9.292410850524902, "learning_rate": 4.5172222222222225e-06, "loss": 7.047863006591797, "step": 10560 }, { "epoch": 0.10565, "grad_norm": 7.248304843902588, "learning_rate": 4.516969696969697e-06, "loss": 6.757450103759766, "step": 10565 }, { "epoch": 0.1057, "grad_norm": 7.4107441902160645, "learning_rate": 4.516717171717172e-06, "loss": 6.80086669921875, "step": 10570 }, { "epoch": 0.10575, "grad_norm": 8.414377212524414, "learning_rate": 4.5164646464646464e-06, "loss": 6.801142883300781, "step": 10575 }, { "epoch": 0.1058, "grad_norm": 8.207219123840332, "learning_rate": 4.516212121212122e-06, "loss": 6.766651153564453, "step": 10580 }, { "epoch": 0.10585, "grad_norm": 9.720832824707031, "learning_rate": 4.5159595959595965e-06, "loss": 6.783973693847656, "step": 10585 }, { "epoch": 0.1059, "grad_norm": 9.101816177368164, "learning_rate": 4.515707070707071e-06, "loss": 6.76546630859375, "step": 10590 }, { "epoch": 0.10595, "grad_norm": 7.374231338500977, "learning_rate": 4.515454545454546e-06, "loss": 6.740624237060547, "step": 10595 }, { "epoch": 0.106, "grad_norm": 7.708523273468018, "learning_rate": 4.5152020202020204e-06, "loss": 6.796148681640625, "step": 10600 }, { "epoch": 0.10605, "grad_norm": 7.782270431518555, "learning_rate": 4.514949494949495e-06, "loss": 6.674526214599609, "step": 10605 }, { "epoch": 0.1061, "grad_norm": 7.8321967124938965, "learning_rate": 4.51469696969697e-06, "loss": 6.76794662475586, "step": 10610 }, { "epoch": 0.10615, "grad_norm": 7.824036598205566, "learning_rate": 4.514444444444444e-06, "loss": 6.7337806701660154, "step": 10615 }, { "epoch": 0.1062, "grad_norm": 7.661462306976318, "learning_rate": 4.51419191919192e-06, "loss": 6.989064788818359, "step": 10620 }, { "epoch": 0.10625, "grad_norm": 17.30018424987793, "learning_rate": 4.5139393939393944e-06, "loss": 6.892916870117188, "step": 10625 }, { "epoch": 0.1063, "grad_norm": 9.594144821166992, "learning_rate": 4.513686868686869e-06, "loss": 6.813229370117187, "step": 10630 }, { "epoch": 0.10635, "grad_norm": 10.082630157470703, "learning_rate": 4.513434343434344e-06, "loss": 6.739459228515625, "step": 10635 }, { "epoch": 0.1064, "grad_norm": 7.233681678771973, "learning_rate": 4.513181818181819e-06, "loss": 6.742675018310547, "step": 10640 }, { "epoch": 0.10645, "grad_norm": 7.32993221282959, "learning_rate": 4.512929292929294e-06, "loss": 6.739006805419922, "step": 10645 }, { "epoch": 0.1065, "grad_norm": 9.604584693908691, "learning_rate": 4.512676767676768e-06, "loss": 6.770960998535156, "step": 10650 }, { "epoch": 0.10655, "grad_norm": 8.362753868103027, "learning_rate": 4.512424242424242e-06, "loss": 6.731631469726563, "step": 10655 }, { "epoch": 0.1066, "grad_norm": 9.047428131103516, "learning_rate": 4.512171717171718e-06, "loss": 6.794572448730468, "step": 10660 }, { "epoch": 0.10665, "grad_norm": 9.890609741210938, "learning_rate": 4.511919191919192e-06, "loss": 6.747506713867187, "step": 10665 }, { "epoch": 0.1067, "grad_norm": 9.081280708312988, "learning_rate": 4.511666666666667e-06, "loss": 6.738412475585937, "step": 10670 }, { "epoch": 0.10675, "grad_norm": 8.467997550964355, "learning_rate": 4.511414141414142e-06, "loss": 6.749243927001953, "step": 10675 }, { "epoch": 0.1068, "grad_norm": 6.500598430633545, "learning_rate": 4.511161616161617e-06, "loss": 6.7774200439453125, "step": 10680 }, { "epoch": 0.10685, "grad_norm": 10.23888874053955, "learning_rate": 4.510909090909092e-06, "loss": 6.843496704101563, "step": 10685 }, { "epoch": 0.1069, "grad_norm": 6.803131103515625, "learning_rate": 4.510656565656566e-06, "loss": 6.831559753417968, "step": 10690 }, { "epoch": 0.10695, "grad_norm": 7.9556965827941895, "learning_rate": 4.510404040404041e-06, "loss": 6.788623809814453, "step": 10695 }, { "epoch": 0.107, "grad_norm": 8.729307174682617, "learning_rate": 4.510151515151516e-06, "loss": 6.781925201416016, "step": 10700 }, { "epoch": 0.10705, "grad_norm": 7.6999688148498535, "learning_rate": 4.50989898989899e-06, "loss": 6.837645721435547, "step": 10705 }, { "epoch": 0.1071, "grad_norm": 8.138265609741211, "learning_rate": 4.509646464646465e-06, "loss": 6.781961822509766, "step": 10710 }, { "epoch": 0.10715, "grad_norm": 8.642674446105957, "learning_rate": 4.5093939393939395e-06, "loss": 6.731106567382812, "step": 10715 }, { "epoch": 0.1072, "grad_norm": 8.514413833618164, "learning_rate": 4.509141414141415e-06, "loss": 6.743327331542969, "step": 10720 }, { "epoch": 0.10725, "grad_norm": 8.892318725585938, "learning_rate": 4.50888888888889e-06, "loss": 6.747944641113281, "step": 10725 }, { "epoch": 0.1073, "grad_norm": 6.997361183166504, "learning_rate": 4.508636363636364e-06, "loss": 6.779306030273437, "step": 10730 }, { "epoch": 0.10735, "grad_norm": 10.444315910339355, "learning_rate": 4.508383838383839e-06, "loss": 6.872282409667969, "step": 10735 }, { "epoch": 0.1074, "grad_norm": 9.117467880249023, "learning_rate": 4.5081313131313135e-06, "loss": 6.7933502197265625, "step": 10740 }, { "epoch": 0.10745, "grad_norm": 6.413508415222168, "learning_rate": 4.507878787878788e-06, "loss": 6.736911010742188, "step": 10745 }, { "epoch": 0.1075, "grad_norm": 6.463379383087158, "learning_rate": 4.507626262626263e-06, "loss": 6.732374572753907, "step": 10750 }, { "epoch": 0.10755, "grad_norm": 8.804510116577148, "learning_rate": 4.507373737373737e-06, "loss": 6.744367980957032, "step": 10755 }, { "epoch": 0.1076, "grad_norm": 6.585135459899902, "learning_rate": 4.507121212121213e-06, "loss": 6.680055236816406, "step": 10760 }, { "epoch": 0.10765, "grad_norm": 7.2376837730407715, "learning_rate": 4.5068686868686875e-06, "loss": 6.749315643310547, "step": 10765 }, { "epoch": 0.1077, "grad_norm": 10.131718635559082, "learning_rate": 4.506616161616162e-06, "loss": 6.739723205566406, "step": 10770 }, { "epoch": 0.10775, "grad_norm": 9.172734260559082, "learning_rate": 4.506363636363637e-06, "loss": 6.7099159240722654, "step": 10775 }, { "epoch": 0.1078, "grad_norm": 6.226894378662109, "learning_rate": 4.506111111111111e-06, "loss": 6.7091423034667965, "step": 10780 }, { "epoch": 0.10785, "grad_norm": 8.517584800720215, "learning_rate": 4.505858585858586e-06, "loss": 6.690707397460938, "step": 10785 }, { "epoch": 0.1079, "grad_norm": 9.280488967895508, "learning_rate": 4.505606060606061e-06, "loss": 6.7915702819824215, "step": 10790 }, { "epoch": 0.10795, "grad_norm": 6.756470203399658, "learning_rate": 4.505353535353535e-06, "loss": 6.7418067932128904, "step": 10795 }, { "epoch": 0.108, "grad_norm": 9.961115837097168, "learning_rate": 4.505101010101011e-06, "loss": 6.721783447265625, "step": 10800 }, { "epoch": 0.10805, "grad_norm": 7.108649730682373, "learning_rate": 4.504848484848485e-06, "loss": 6.7423347473144535, "step": 10805 }, { "epoch": 0.1081, "grad_norm": 9.545676231384277, "learning_rate": 4.50459595959596e-06, "loss": 6.715361785888672, "step": 10810 }, { "epoch": 0.10815, "grad_norm": 9.266305923461914, "learning_rate": 4.504343434343435e-06, "loss": 6.709360504150391, "step": 10815 }, { "epoch": 0.1082, "grad_norm": 7.494429588317871, "learning_rate": 4.504090909090909e-06, "loss": 6.7161109924316404, "step": 10820 }, { "epoch": 0.10825, "grad_norm": 9.457154273986816, "learning_rate": 4.503838383838384e-06, "loss": 6.721696472167968, "step": 10825 }, { "epoch": 0.1083, "grad_norm": 8.692770957946777, "learning_rate": 4.5035858585858585e-06, "loss": 6.79005126953125, "step": 10830 }, { "epoch": 0.10835, "grad_norm": 8.719589233398438, "learning_rate": 4.503333333333333e-06, "loss": 7.250643920898438, "step": 10835 }, { "epoch": 0.1084, "grad_norm": 10.175653457641602, "learning_rate": 4.503080808080809e-06, "loss": 6.871045684814453, "step": 10840 }, { "epoch": 0.10845, "grad_norm": 6.755481719970703, "learning_rate": 4.502828282828283e-06, "loss": 6.767408752441407, "step": 10845 }, { "epoch": 0.1085, "grad_norm": 6.408545017242432, "learning_rate": 4.502575757575758e-06, "loss": 6.826924896240234, "step": 10850 }, { "epoch": 0.10855, "grad_norm": 6.611545562744141, "learning_rate": 4.5023232323232325e-06, "loss": 6.8118034362792965, "step": 10855 }, { "epoch": 0.1086, "grad_norm": 9.907986640930176, "learning_rate": 4.502070707070708e-06, "loss": 6.705509185791016, "step": 10860 }, { "epoch": 0.10865, "grad_norm": 9.282553672790527, "learning_rate": 4.501818181818183e-06, "loss": 6.785549926757812, "step": 10865 }, { "epoch": 0.1087, "grad_norm": 7.429384708404541, "learning_rate": 4.501565656565656e-06, "loss": 6.799143218994141, "step": 10870 }, { "epoch": 0.10875, "grad_norm": 8.071882247924805, "learning_rate": 4.501313131313131e-06, "loss": 6.756442260742188, "step": 10875 }, { "epoch": 0.1088, "grad_norm": 8.47547721862793, "learning_rate": 4.5010606060606065e-06, "loss": 6.711375427246094, "step": 10880 }, { "epoch": 0.10885, "grad_norm": 8.412904739379883, "learning_rate": 4.500808080808081e-06, "loss": 6.704298400878907, "step": 10885 }, { "epoch": 0.1089, "grad_norm": 7.7536797523498535, "learning_rate": 4.500555555555556e-06, "loss": 6.720106506347657, "step": 10890 }, { "epoch": 0.10895, "grad_norm": 7.646829605102539, "learning_rate": 4.50030303030303e-06, "loss": 6.750797271728516, "step": 10895 }, { "epoch": 0.109, "grad_norm": 9.310580253601074, "learning_rate": 4.500050505050506e-06, "loss": 6.821577453613282, "step": 10900 }, { "epoch": 0.10905, "grad_norm": 7.541567802429199, "learning_rate": 4.4997979797979805e-06, "loss": 6.753498077392578, "step": 10905 }, { "epoch": 0.1091, "grad_norm": 7.4607439041137695, "learning_rate": 4.499545454545455e-06, "loss": 6.8372352600097654, "step": 10910 }, { "epoch": 0.10915, "grad_norm": 5.868399143218994, "learning_rate": 4.49929292929293e-06, "loss": 6.703582763671875, "step": 10915 }, { "epoch": 0.1092, "grad_norm": 6.7235822677612305, "learning_rate": 4.499040404040404e-06, "loss": 6.710835266113281, "step": 10920 }, { "epoch": 0.10925, "grad_norm": 10.908285140991211, "learning_rate": 4.498787878787879e-06, "loss": 6.808950805664063, "step": 10925 }, { "epoch": 0.1093, "grad_norm": 5.785531997680664, "learning_rate": 4.498535353535354e-06, "loss": 6.807895660400391, "step": 10930 }, { "epoch": 0.10935, "grad_norm": 8.704164505004883, "learning_rate": 4.498282828282829e-06, "loss": 6.7197731018066404, "step": 10935 }, { "epoch": 0.1094, "grad_norm": 9.486784934997559, "learning_rate": 4.498030303030304e-06, "loss": 6.805103302001953, "step": 10940 }, { "epoch": 0.10945, "grad_norm": 9.621438026428223, "learning_rate": 4.497777777777778e-06, "loss": 6.7240760803222654, "step": 10945 }, { "epoch": 0.1095, "grad_norm": 8.090937614440918, "learning_rate": 4.497525252525253e-06, "loss": 6.805792236328125, "step": 10950 }, { "epoch": 0.10955, "grad_norm": 6.929759502410889, "learning_rate": 4.497272727272728e-06, "loss": 6.794142150878907, "step": 10955 }, { "epoch": 0.1096, "grad_norm": 7.955910682678223, "learning_rate": 4.497020202020202e-06, "loss": 6.7519691467285154, "step": 10960 }, { "epoch": 0.10965, "grad_norm": 8.469002723693848, "learning_rate": 4.496767676767677e-06, "loss": 6.741259765625, "step": 10965 }, { "epoch": 0.1097, "grad_norm": 8.495889663696289, "learning_rate": 4.4965151515151515e-06, "loss": 6.697187805175782, "step": 10970 }, { "epoch": 0.10975, "grad_norm": 7.8564534187316895, "learning_rate": 4.496262626262627e-06, "loss": 6.709109497070313, "step": 10975 }, { "epoch": 0.1098, "grad_norm": 7.697814464569092, "learning_rate": 4.496010101010102e-06, "loss": 6.836070251464844, "step": 10980 }, { "epoch": 0.10985, "grad_norm": 5.744918346405029, "learning_rate": 4.495757575757576e-06, "loss": 6.780767822265625, "step": 10985 }, { "epoch": 0.1099, "grad_norm": 8.755928993225098, "learning_rate": 4.495505050505051e-06, "loss": 6.737541198730469, "step": 10990 }, { "epoch": 0.10995, "grad_norm": 10.13265609741211, "learning_rate": 4.4952525252525255e-06, "loss": 6.855047607421875, "step": 10995 }, { "epoch": 0.11, "grad_norm": 8.116928100585938, "learning_rate": 4.495e-06, "loss": 6.798879241943359, "step": 11000 }, { "epoch": 0.11005, "grad_norm": 7.539651393890381, "learning_rate": 4.494747474747475e-06, "loss": 6.687651824951172, "step": 11005 }, { "epoch": 0.1101, "grad_norm": 11.257396697998047, "learning_rate": 4.494494949494949e-06, "loss": 6.772528839111328, "step": 11010 }, { "epoch": 0.11015, "grad_norm": 7.942442893981934, "learning_rate": 4.494242424242425e-06, "loss": 6.730797576904297, "step": 11015 }, { "epoch": 0.1102, "grad_norm": 7.521190643310547, "learning_rate": 4.4939898989898995e-06, "loss": 6.701113891601563, "step": 11020 }, { "epoch": 0.11025, "grad_norm": 6.87885856628418, "learning_rate": 4.493737373737374e-06, "loss": 6.720253753662109, "step": 11025 }, { "epoch": 0.1103, "grad_norm": 9.220795631408691, "learning_rate": 4.493484848484849e-06, "loss": 6.698004150390625, "step": 11030 }, { "epoch": 0.11035, "grad_norm": 6.997349262237549, "learning_rate": 4.493232323232323e-06, "loss": 6.7636268615722654, "step": 11035 }, { "epoch": 0.1104, "grad_norm": 7.85897159576416, "learning_rate": 4.492979797979798e-06, "loss": 6.71899642944336, "step": 11040 }, { "epoch": 0.11045, "grad_norm": 21.141061782836914, "learning_rate": 4.492727272727273e-06, "loss": 6.232658767700196, "step": 11045 }, { "epoch": 0.1105, "grad_norm": 22.92469024658203, "learning_rate": 4.492474747474747e-06, "loss": 5.92541275024414, "step": 11050 }, { "epoch": 0.11055, "grad_norm": 6.057345390319824, "learning_rate": 4.492222222222223e-06, "loss": 6.809288787841797, "step": 11055 }, { "epoch": 0.1106, "grad_norm": 9.18460464477539, "learning_rate": 4.491969696969697e-06, "loss": 6.75438232421875, "step": 11060 }, { "epoch": 0.11065, "grad_norm": 6.677265167236328, "learning_rate": 4.491717171717172e-06, "loss": 6.7246650695800785, "step": 11065 }, { "epoch": 0.1107, "grad_norm": 7.268076419830322, "learning_rate": 4.491464646464647e-06, "loss": 6.7602790832519535, "step": 11070 }, { "epoch": 0.11075, "grad_norm": 6.997748374938965, "learning_rate": 4.491212121212122e-06, "loss": 6.655445098876953, "step": 11075 }, { "epoch": 0.1108, "grad_norm": 7.6041259765625, "learning_rate": 4.490959595959597e-06, "loss": 6.736250305175782, "step": 11080 }, { "epoch": 0.11085, "grad_norm": 9.084789276123047, "learning_rate": 4.490707070707071e-06, "loss": 6.77698974609375, "step": 11085 }, { "epoch": 0.1109, "grad_norm": 6.969231128692627, "learning_rate": 4.490454545454546e-06, "loss": 6.740325927734375, "step": 11090 }, { "epoch": 0.11095, "grad_norm": 6.80157470703125, "learning_rate": 4.490202020202021e-06, "loss": 6.74328384399414, "step": 11095 }, { "epoch": 0.111, "grad_norm": 9.709576606750488, "learning_rate": 4.489949494949495e-06, "loss": 6.6937408447265625, "step": 11100 }, { "epoch": 0.11105, "grad_norm": 4.7287421226501465, "learning_rate": 4.48969696969697e-06, "loss": 6.773529052734375, "step": 11105 }, { "epoch": 0.1111, "grad_norm": 8.136981964111328, "learning_rate": 4.4894444444444446e-06, "loss": 6.731253814697266, "step": 11110 }, { "epoch": 0.11115, "grad_norm": 7.635490417480469, "learning_rate": 4.48919191919192e-06, "loss": 6.7756401062011715, "step": 11115 }, { "epoch": 0.1112, "grad_norm": 8.241446495056152, "learning_rate": 4.488939393939395e-06, "loss": 6.71661605834961, "step": 11120 }, { "epoch": 0.11125, "grad_norm": 6.649551868438721, "learning_rate": 4.488686868686869e-06, "loss": 6.728081512451172, "step": 11125 }, { "epoch": 0.1113, "grad_norm": 8.836507797241211, "learning_rate": 4.488434343434344e-06, "loss": 6.7904296875, "step": 11130 }, { "epoch": 0.11135, "grad_norm": 7.39402437210083, "learning_rate": 4.4881818181818186e-06, "loss": 6.727620697021484, "step": 11135 }, { "epoch": 0.1114, "grad_norm": 6.096280574798584, "learning_rate": 4.487929292929293e-06, "loss": 6.675344848632813, "step": 11140 }, { "epoch": 0.11145, "grad_norm": 9.598369598388672, "learning_rate": 4.487676767676768e-06, "loss": 6.932107543945312, "step": 11145 }, { "epoch": 0.1115, "grad_norm": 7.327721118927002, "learning_rate": 4.4874242424242425e-06, "loss": 6.844270324707031, "step": 11150 }, { "epoch": 0.11155, "grad_norm": 8.765827178955078, "learning_rate": 4.487171717171718e-06, "loss": 6.764921569824219, "step": 11155 }, { "epoch": 0.1116, "grad_norm": 7.266519546508789, "learning_rate": 4.4869191919191926e-06, "loss": 6.740020751953125, "step": 11160 }, { "epoch": 0.11165, "grad_norm": 6.997776985168457, "learning_rate": 4.486666666666667e-06, "loss": 6.740631866455078, "step": 11165 }, { "epoch": 0.1117, "grad_norm": 7.6577935218811035, "learning_rate": 4.486414141414142e-06, "loss": 6.6790412902832035, "step": 11170 }, { "epoch": 0.11175, "grad_norm": 9.849678993225098, "learning_rate": 4.4861616161616165e-06, "loss": 6.750882720947265, "step": 11175 }, { "epoch": 0.1118, "grad_norm": 8.67566967010498, "learning_rate": 4.485909090909091e-06, "loss": 6.727978515625, "step": 11180 }, { "epoch": 0.11185, "grad_norm": 7.9100341796875, "learning_rate": 4.485656565656566e-06, "loss": 6.667558288574218, "step": 11185 }, { "epoch": 0.1119, "grad_norm": 7.463539123535156, "learning_rate": 4.48540404040404e-06, "loss": 6.784141540527344, "step": 11190 }, { "epoch": 0.11195, "grad_norm": 7.002195835113525, "learning_rate": 4.485151515151516e-06, "loss": 6.7454170227050785, "step": 11195 }, { "epoch": 0.112, "grad_norm": 8.038938522338867, "learning_rate": 4.4848989898989904e-06, "loss": 6.7054901123046875, "step": 11200 }, { "epoch": 0.11205, "grad_norm": 6.37700080871582, "learning_rate": 4.484646464646465e-06, "loss": 6.708972930908203, "step": 11205 }, { "epoch": 0.1121, "grad_norm": 7.8927483558654785, "learning_rate": 4.48439393939394e-06, "loss": 6.7218780517578125, "step": 11210 }, { "epoch": 0.11215, "grad_norm": 6.519976615905762, "learning_rate": 4.484141414141414e-06, "loss": 6.702114868164062, "step": 11215 }, { "epoch": 0.1122, "grad_norm": 4.526371002197266, "learning_rate": 4.483888888888889e-06, "loss": 6.752705383300781, "step": 11220 }, { "epoch": 0.11225, "grad_norm": 10.73451042175293, "learning_rate": 4.483636363636364e-06, "loss": 6.745791625976563, "step": 11225 }, { "epoch": 0.1123, "grad_norm": 8.42340087890625, "learning_rate": 4.483383838383838e-06, "loss": 6.794837951660156, "step": 11230 }, { "epoch": 0.11235, "grad_norm": 6.101979732513428, "learning_rate": 4.483131313131314e-06, "loss": 6.89209213256836, "step": 11235 }, { "epoch": 0.1124, "grad_norm": 8.59909725189209, "learning_rate": 4.482878787878788e-06, "loss": 6.68564453125, "step": 11240 }, { "epoch": 0.11245, "grad_norm": 8.73400592803955, "learning_rate": 4.482626262626263e-06, "loss": 6.692226409912109, "step": 11245 }, { "epoch": 0.1125, "grad_norm": 7.014405727386475, "learning_rate": 4.482373737373738e-06, "loss": 6.748796081542968, "step": 11250 }, { "epoch": 0.11255, "grad_norm": 8.66714096069336, "learning_rate": 4.482121212121213e-06, "loss": 6.717597198486328, "step": 11255 }, { "epoch": 0.1126, "grad_norm": 9.031023979187012, "learning_rate": 4.481868686868687e-06, "loss": 6.709268188476562, "step": 11260 }, { "epoch": 0.11265, "grad_norm": 6.963812351226807, "learning_rate": 4.4816161616161615e-06, "loss": 6.751233673095703, "step": 11265 }, { "epoch": 0.1127, "grad_norm": 6.63762903213501, "learning_rate": 4.481363636363636e-06, "loss": 6.70263671875, "step": 11270 }, { "epoch": 0.11275, "grad_norm": 7.901537895202637, "learning_rate": 4.481111111111112e-06, "loss": 6.673733520507812, "step": 11275 }, { "epoch": 0.1128, "grad_norm": 7.541499614715576, "learning_rate": 4.480858585858586e-06, "loss": 6.725592041015625, "step": 11280 }, { "epoch": 0.11285, "grad_norm": 8.314555168151855, "learning_rate": 4.480606060606061e-06, "loss": 6.734666442871093, "step": 11285 }, { "epoch": 0.1129, "grad_norm": 7.230580806732178, "learning_rate": 4.4803535353535355e-06, "loss": 6.746113586425781, "step": 11290 }, { "epoch": 0.11295, "grad_norm": 6.362032413482666, "learning_rate": 4.480101010101011e-06, "loss": 6.692689514160156, "step": 11295 }, { "epoch": 0.113, "grad_norm": 10.312271118164062, "learning_rate": 4.479848484848486e-06, "loss": 6.682109069824219, "step": 11300 }, { "epoch": 0.11305, "grad_norm": 9.879929542541504, "learning_rate": 4.47959595959596e-06, "loss": 6.658822631835937, "step": 11305 }, { "epoch": 0.1131, "grad_norm": 7.774340629577637, "learning_rate": 4.479343434343435e-06, "loss": 6.665317535400391, "step": 11310 }, { "epoch": 0.11315, "grad_norm": 10.222126007080078, "learning_rate": 4.4790909090909095e-06, "loss": 6.708981323242187, "step": 11315 }, { "epoch": 0.1132, "grad_norm": 8.628718376159668, "learning_rate": 4.478838383838384e-06, "loss": 6.725852966308594, "step": 11320 }, { "epoch": 0.11325, "grad_norm": 7.848747730255127, "learning_rate": 4.478585858585859e-06, "loss": 6.725019836425782, "step": 11325 }, { "epoch": 0.1133, "grad_norm": 8.479503631591797, "learning_rate": 4.478333333333334e-06, "loss": 6.705209350585937, "step": 11330 }, { "epoch": 0.11335, "grad_norm": 7.807716369628906, "learning_rate": 4.478080808080809e-06, "loss": 6.62719955444336, "step": 11335 }, { "epoch": 0.1134, "grad_norm": 8.242722511291504, "learning_rate": 4.4778282828282835e-06, "loss": 6.726288604736328, "step": 11340 }, { "epoch": 0.11345, "grad_norm": 8.608518600463867, "learning_rate": 4.477575757575758e-06, "loss": 6.7047874450683596, "step": 11345 }, { "epoch": 0.1135, "grad_norm": 7.618953704833984, "learning_rate": 4.477323232323233e-06, "loss": 6.682763671875, "step": 11350 }, { "epoch": 0.11355, "grad_norm": 5.997776985168457, "learning_rate": 4.477070707070707e-06, "loss": 6.758094024658203, "step": 11355 }, { "epoch": 0.1136, "grad_norm": 5.493123531341553, "learning_rate": 4.476818181818182e-06, "loss": 6.714211273193359, "step": 11360 }, { "epoch": 0.11365, "grad_norm": 8.262296676635742, "learning_rate": 4.476565656565657e-06, "loss": 6.78668212890625, "step": 11365 }, { "epoch": 0.1137, "grad_norm": 7.427249431610107, "learning_rate": 4.476313131313132e-06, "loss": 6.64178695678711, "step": 11370 }, { "epoch": 0.11375, "grad_norm": 6.889303207397461, "learning_rate": 4.476060606060607e-06, "loss": 6.643212890625, "step": 11375 }, { "epoch": 0.1138, "grad_norm": 6.8834075927734375, "learning_rate": 4.475808080808081e-06, "loss": 6.699423217773438, "step": 11380 }, { "epoch": 0.11385, "grad_norm": 8.278688430786133, "learning_rate": 4.475555555555556e-06, "loss": 6.679457092285157, "step": 11385 }, { "epoch": 0.1139, "grad_norm": 7.574573993682861, "learning_rate": 4.475303030303031e-06, "loss": 6.698388671875, "step": 11390 }, { "epoch": 0.11395, "grad_norm": 7.504183769226074, "learning_rate": 4.475050505050505e-06, "loss": 6.670927429199219, "step": 11395 }, { "epoch": 0.114, "grad_norm": 7.981639862060547, "learning_rate": 4.47479797979798e-06, "loss": 6.735358428955078, "step": 11400 }, { "epoch": 0.11405, "grad_norm": 7.867335319519043, "learning_rate": 4.4745454545454545e-06, "loss": 6.712498474121094, "step": 11405 }, { "epoch": 0.1141, "grad_norm": 7.965330123901367, "learning_rate": 4.47429292929293e-06, "loss": 6.731381225585937, "step": 11410 }, { "epoch": 0.11415, "grad_norm": 4.916170120239258, "learning_rate": 4.474040404040405e-06, "loss": 6.70898666381836, "step": 11415 }, { "epoch": 0.1142, "grad_norm": 8.271017074584961, "learning_rate": 4.473787878787879e-06, "loss": 6.738390350341797, "step": 11420 }, { "epoch": 0.11425, "grad_norm": 6.844845771789551, "learning_rate": 4.473535353535354e-06, "loss": 6.706993103027344, "step": 11425 }, { "epoch": 0.1143, "grad_norm": 9.135414123535156, "learning_rate": 4.4732828282828285e-06, "loss": 6.681982421875, "step": 11430 }, { "epoch": 0.11435, "grad_norm": 7.757597923278809, "learning_rate": 4.473030303030303e-06, "loss": 6.736601257324219, "step": 11435 }, { "epoch": 0.1144, "grad_norm": 5.6090826988220215, "learning_rate": 4.472777777777778e-06, "loss": 6.654732513427734, "step": 11440 }, { "epoch": 0.11445, "grad_norm": 7.505268096923828, "learning_rate": 4.472525252525252e-06, "loss": 6.674571228027344, "step": 11445 }, { "epoch": 0.1145, "grad_norm": 7.081607341766357, "learning_rate": 4.472272727272728e-06, "loss": 6.692670440673828, "step": 11450 }, { "epoch": 0.11455, "grad_norm": 8.922418594360352, "learning_rate": 4.4720202020202025e-06, "loss": 6.743119812011718, "step": 11455 }, { "epoch": 0.1146, "grad_norm": 6.165041446685791, "learning_rate": 4.471767676767677e-06, "loss": 6.819284057617187, "step": 11460 }, { "epoch": 0.11465, "grad_norm": 8.778921127319336, "learning_rate": 4.471515151515152e-06, "loss": 6.644107818603516, "step": 11465 }, { "epoch": 0.1147, "grad_norm": 5.432400703430176, "learning_rate": 4.471262626262627e-06, "loss": 6.6879119873046875, "step": 11470 }, { "epoch": 0.11475, "grad_norm": 6.742037773132324, "learning_rate": 4.471010101010102e-06, "loss": 6.717813873291016, "step": 11475 }, { "epoch": 0.1148, "grad_norm": 6.924907684326172, "learning_rate": 4.470757575757576e-06, "loss": 6.645891571044922, "step": 11480 }, { "epoch": 0.11485, "grad_norm": 6.382384777069092, "learning_rate": 4.47050505050505e-06, "loss": 6.680551910400391, "step": 11485 }, { "epoch": 0.1149, "grad_norm": 8.071208000183105, "learning_rate": 4.470252525252526e-06, "loss": 6.690335845947265, "step": 11490 }, { "epoch": 0.11495, "grad_norm": 7.243064880371094, "learning_rate": 4.47e-06, "loss": 6.706063842773437, "step": 11495 }, { "epoch": 0.115, "grad_norm": 8.365403175354004, "learning_rate": 4.469747474747475e-06, "loss": 6.6935783386230465, "step": 11500 }, { "epoch": 0.11505, "grad_norm": 8.666587829589844, "learning_rate": 4.46949494949495e-06, "loss": 6.689654541015625, "step": 11505 }, { "epoch": 0.1151, "grad_norm": 8.05927848815918, "learning_rate": 4.469242424242425e-06, "loss": 6.6711265563964846, "step": 11510 }, { "epoch": 0.11515, "grad_norm": 6.215064525604248, "learning_rate": 4.4689898989899e-06, "loss": 6.703315734863281, "step": 11515 }, { "epoch": 0.1152, "grad_norm": 5.5478715896606445, "learning_rate": 4.468737373737374e-06, "loss": 6.701638031005859, "step": 11520 }, { "epoch": 0.11525, "grad_norm": 12.100967407226562, "learning_rate": 4.468484848484849e-06, "loss": 6.734468078613281, "step": 11525 }, { "epoch": 0.1153, "grad_norm": 7.502073764801025, "learning_rate": 4.468232323232324e-06, "loss": 6.670558166503906, "step": 11530 }, { "epoch": 0.11535, "grad_norm": 9.038009643554688, "learning_rate": 4.467979797979798e-06, "loss": 6.630006408691406, "step": 11535 }, { "epoch": 0.1154, "grad_norm": 6.378006935119629, "learning_rate": 4.467727272727273e-06, "loss": 6.72205810546875, "step": 11540 }, { "epoch": 0.11545, "grad_norm": 8.568671226501465, "learning_rate": 4.4674747474747475e-06, "loss": 6.673194122314453, "step": 11545 }, { "epoch": 0.1155, "grad_norm": 6.345792770385742, "learning_rate": 4.467222222222223e-06, "loss": 6.666522979736328, "step": 11550 }, { "epoch": 0.11555, "grad_norm": 13.264775276184082, "learning_rate": 4.466969696969698e-06, "loss": 6.751813507080078, "step": 11555 }, { "epoch": 0.1156, "grad_norm": 8.407709121704102, "learning_rate": 4.466717171717172e-06, "loss": 6.8270416259765625, "step": 11560 }, { "epoch": 0.11565, "grad_norm": 8.207230567932129, "learning_rate": 4.466464646464647e-06, "loss": 6.656624603271484, "step": 11565 }, { "epoch": 0.1157, "grad_norm": 7.467211723327637, "learning_rate": 4.4662121212121215e-06, "loss": 6.682273864746094, "step": 11570 }, { "epoch": 0.11575, "grad_norm": 9.120359420776367, "learning_rate": 4.465959595959596e-06, "loss": 6.729044342041016, "step": 11575 }, { "epoch": 0.1158, "grad_norm": 7.418501853942871, "learning_rate": 4.465707070707071e-06, "loss": 6.659566497802734, "step": 11580 }, { "epoch": 0.11585, "grad_norm": 7.646873474121094, "learning_rate": 4.4654545454545454e-06, "loss": 6.701144409179688, "step": 11585 }, { "epoch": 0.1159, "grad_norm": 5.415029525756836, "learning_rate": 4.465202020202021e-06, "loss": 6.700233459472656, "step": 11590 }, { "epoch": 0.11595, "grad_norm": 6.256311416625977, "learning_rate": 4.4649494949494955e-06, "loss": 6.705789184570312, "step": 11595 }, { "epoch": 0.116, "grad_norm": 10.637125015258789, "learning_rate": 4.46469696969697e-06, "loss": 6.645533752441406, "step": 11600 }, { "epoch": 0.11605, "grad_norm": 7.625982284545898, "learning_rate": 4.464444444444445e-06, "loss": 6.713024139404297, "step": 11605 }, { "epoch": 0.1161, "grad_norm": 7.428358554840088, "learning_rate": 4.4641919191919194e-06, "loss": 6.672904205322266, "step": 11610 }, { "epoch": 0.11615, "grad_norm": 7.514422416687012, "learning_rate": 4.463939393939394e-06, "loss": 6.66387710571289, "step": 11615 }, { "epoch": 0.1162, "grad_norm": 9.918282508850098, "learning_rate": 4.463686868686869e-06, "loss": 6.632412719726562, "step": 11620 }, { "epoch": 0.11625, "grad_norm": 7.652980804443359, "learning_rate": 4.463434343434343e-06, "loss": 6.608798980712891, "step": 11625 }, { "epoch": 0.1163, "grad_norm": 6.865577220916748, "learning_rate": 4.463181818181819e-06, "loss": 6.696461486816406, "step": 11630 }, { "epoch": 0.11635, "grad_norm": 9.791476249694824, "learning_rate": 4.4629292929292934e-06, "loss": 6.674513244628907, "step": 11635 }, { "epoch": 0.1164, "grad_norm": 7.629696846008301, "learning_rate": 4.462676767676768e-06, "loss": 6.689553070068359, "step": 11640 }, { "epoch": 0.11645, "grad_norm": 7.263654708862305, "learning_rate": 4.462424242424243e-06, "loss": 6.642758178710937, "step": 11645 }, { "epoch": 0.1165, "grad_norm": 8.054839134216309, "learning_rate": 4.462171717171717e-06, "loss": 6.745068359375, "step": 11650 }, { "epoch": 0.11655, "grad_norm": 4.872268199920654, "learning_rate": 4.461919191919192e-06, "loss": 6.779031372070312, "step": 11655 }, { "epoch": 0.1166, "grad_norm": 6.660367012023926, "learning_rate": 4.461666666666667e-06, "loss": 6.656312561035156, "step": 11660 }, { "epoch": 0.11665, "grad_norm": 7.92232084274292, "learning_rate": 4.461414141414141e-06, "loss": 6.671250915527343, "step": 11665 }, { "epoch": 0.1167, "grad_norm": 9.506084442138672, "learning_rate": 4.461161616161617e-06, "loss": 6.680210113525391, "step": 11670 }, { "epoch": 0.11675, "grad_norm": 7.788951873779297, "learning_rate": 4.460909090909091e-06, "loss": 6.682133483886719, "step": 11675 }, { "epoch": 0.1168, "grad_norm": 7.584954261779785, "learning_rate": 4.460656565656566e-06, "loss": 6.695178985595703, "step": 11680 }, { "epoch": 0.11685, "grad_norm": 9.551252365112305, "learning_rate": 4.460404040404041e-06, "loss": 6.658078002929687, "step": 11685 }, { "epoch": 0.1169, "grad_norm": 6.786887168884277, "learning_rate": 4.460151515151516e-06, "loss": 6.624845123291015, "step": 11690 }, { "epoch": 0.11695, "grad_norm": 8.554399490356445, "learning_rate": 4.459898989898991e-06, "loss": 6.676464080810547, "step": 11695 }, { "epoch": 0.117, "grad_norm": 3.9109599590301514, "learning_rate": 4.4596464646464645e-06, "loss": 6.581046295166016, "step": 11700 }, { "epoch": 0.11705, "grad_norm": 8.787460327148438, "learning_rate": 4.459393939393939e-06, "loss": 6.678755187988282, "step": 11705 }, { "epoch": 0.1171, "grad_norm": 6.879083633422852, "learning_rate": 4.459141414141415e-06, "loss": 6.735958099365234, "step": 11710 }, { "epoch": 0.11715, "grad_norm": 5.870671272277832, "learning_rate": 4.458888888888889e-06, "loss": 6.762865447998047, "step": 11715 }, { "epoch": 0.1172, "grad_norm": 8.176920890808105, "learning_rate": 4.458636363636364e-06, "loss": 6.6583984375, "step": 11720 }, { "epoch": 0.11725, "grad_norm": 7.126004695892334, "learning_rate": 4.4583838383838385e-06, "loss": 6.732073974609375, "step": 11725 }, { "epoch": 0.1173, "grad_norm": 5.918406963348389, "learning_rate": 4.458131313131314e-06, "loss": 6.664118957519531, "step": 11730 }, { "epoch": 0.11735, "grad_norm": 6.6982197761535645, "learning_rate": 4.4578787878787886e-06, "loss": 6.740029144287109, "step": 11735 }, { "epoch": 0.1174, "grad_norm": 9.963910102844238, "learning_rate": 4.457626262626263e-06, "loss": 6.642306518554688, "step": 11740 }, { "epoch": 0.11745, "grad_norm": 7.812318801879883, "learning_rate": 4.457373737373738e-06, "loss": 6.644652557373047, "step": 11745 }, { "epoch": 0.1175, "grad_norm": 6.962085247039795, "learning_rate": 4.4571212121212125e-06, "loss": 6.706044006347656, "step": 11750 }, { "epoch": 0.11755, "grad_norm": 6.053156852722168, "learning_rate": 4.456868686868687e-06, "loss": 6.867371368408203, "step": 11755 }, { "epoch": 0.1176, "grad_norm": 6.02919864654541, "learning_rate": 4.456616161616162e-06, "loss": 6.672666931152344, "step": 11760 }, { "epoch": 0.11765, "grad_norm": 8.394669532775879, "learning_rate": 4.456363636363637e-06, "loss": 6.635498809814453, "step": 11765 }, { "epoch": 0.1177, "grad_norm": 6.778385162353516, "learning_rate": 4.456111111111112e-06, "loss": 6.6498771667480465, "step": 11770 }, { "epoch": 0.11775, "grad_norm": 7.313399314880371, "learning_rate": 4.4558585858585865e-06, "loss": 6.659189605712891, "step": 11775 }, { "epoch": 0.1178, "grad_norm": 9.210928916931152, "learning_rate": 4.455606060606061e-06, "loss": 6.7036285400390625, "step": 11780 }, { "epoch": 0.11785, "grad_norm": 6.570183277130127, "learning_rate": 4.455353535353536e-06, "loss": 6.667906951904297, "step": 11785 }, { "epoch": 0.1179, "grad_norm": 6.3342061042785645, "learning_rate": 4.45510101010101e-06, "loss": 6.638532257080078, "step": 11790 }, { "epoch": 0.11795, "grad_norm": 6.82385778427124, "learning_rate": 4.454848484848485e-06, "loss": 6.535960388183594, "step": 11795 }, { "epoch": 0.118, "grad_norm": 8.59762191772461, "learning_rate": 4.45459595959596e-06, "loss": 6.675711822509766, "step": 11800 }, { "epoch": 0.11805, "grad_norm": 6.960986137390137, "learning_rate": 4.454343434343435e-06, "loss": 6.6502685546875, "step": 11805 }, { "epoch": 0.1181, "grad_norm": 7.035397052764893, "learning_rate": 4.45409090909091e-06, "loss": 6.622183990478516, "step": 11810 }, { "epoch": 0.11815, "grad_norm": 6.743802547454834, "learning_rate": 4.453838383838384e-06, "loss": 6.69317626953125, "step": 11815 }, { "epoch": 0.1182, "grad_norm": 7.679192543029785, "learning_rate": 4.453585858585859e-06, "loss": 6.6605674743652346, "step": 11820 }, { "epoch": 0.11825, "grad_norm": 6.481393814086914, "learning_rate": 4.453333333333334e-06, "loss": 6.670430755615234, "step": 11825 }, { "epoch": 0.1183, "grad_norm": 7.1831231117248535, "learning_rate": 4.453080808080808e-06, "loss": 6.683278656005859, "step": 11830 }, { "epoch": 0.11835, "grad_norm": 8.244776725769043, "learning_rate": 4.452828282828283e-06, "loss": 6.6725410461425785, "step": 11835 }, { "epoch": 0.1184, "grad_norm": 5.682984828948975, "learning_rate": 4.4525757575757575e-06, "loss": 6.664590454101562, "step": 11840 }, { "epoch": 0.11845, "grad_norm": 8.123457908630371, "learning_rate": 4.452323232323233e-06, "loss": 6.669721984863282, "step": 11845 }, { "epoch": 0.1185, "grad_norm": 6.573432922363281, "learning_rate": 4.452070707070708e-06, "loss": 6.654356384277344, "step": 11850 }, { "epoch": 0.11855, "grad_norm": 8.003045082092285, "learning_rate": 4.451818181818182e-06, "loss": 6.636386108398438, "step": 11855 }, { "epoch": 0.1186, "grad_norm": 6.324294567108154, "learning_rate": 4.451565656565657e-06, "loss": 6.640099334716797, "step": 11860 }, { "epoch": 0.11865, "grad_norm": 10.288304328918457, "learning_rate": 4.4513131313131315e-06, "loss": 6.587314605712891, "step": 11865 }, { "epoch": 0.1187, "grad_norm": 6.497466087341309, "learning_rate": 4.451060606060606e-06, "loss": 6.7092140197753904, "step": 11870 }, { "epoch": 0.11875, "grad_norm": 14.039261817932129, "learning_rate": 4.450808080808081e-06, "loss": 6.606791687011719, "step": 11875 }, { "epoch": 0.1188, "grad_norm": 7.966790676116943, "learning_rate": 4.450555555555555e-06, "loss": 6.621028137207031, "step": 11880 }, { "epoch": 0.11885, "grad_norm": 6.3787689208984375, "learning_rate": 4.450303030303031e-06, "loss": 6.643340301513672, "step": 11885 }, { "epoch": 0.1189, "grad_norm": 6.665733814239502, "learning_rate": 4.4500505050505055e-06, "loss": 6.609262084960937, "step": 11890 }, { "epoch": 0.11895, "grad_norm": 8.705538749694824, "learning_rate": 4.44979797979798e-06, "loss": 6.732475280761719, "step": 11895 }, { "epoch": 0.119, "grad_norm": 6.567404747009277, "learning_rate": 4.449545454545455e-06, "loss": 6.688424682617187, "step": 11900 }, { "epoch": 0.11905, "grad_norm": 12.193634986877441, "learning_rate": 4.44929292929293e-06, "loss": 6.970988464355469, "step": 11905 }, { "epoch": 0.1191, "grad_norm": 8.7346773147583, "learning_rate": 4.449040404040405e-06, "loss": 6.712151336669922, "step": 11910 }, { "epoch": 0.11915, "grad_norm": 7.14931058883667, "learning_rate": 4.4487878787878795e-06, "loss": 6.658013916015625, "step": 11915 }, { "epoch": 0.1192, "grad_norm": 6.501655578613281, "learning_rate": 4.448535353535354e-06, "loss": 6.640967559814453, "step": 11920 }, { "epoch": 0.11925, "grad_norm": 6.928194522857666, "learning_rate": 4.448282828282829e-06, "loss": 6.589490509033203, "step": 11925 }, { "epoch": 0.1193, "grad_norm": 6.944122314453125, "learning_rate": 4.448030303030303e-06, "loss": 6.62933578491211, "step": 11930 }, { "epoch": 0.11935, "grad_norm": 6.914945125579834, "learning_rate": 4.447777777777778e-06, "loss": 6.702698516845703, "step": 11935 }, { "epoch": 0.1194, "grad_norm": 7.620368003845215, "learning_rate": 4.447525252525253e-06, "loss": 6.603787231445312, "step": 11940 }, { "epoch": 0.11945, "grad_norm": 7.265963554382324, "learning_rate": 4.447272727272728e-06, "loss": 6.907044982910156, "step": 11945 }, { "epoch": 0.1195, "grad_norm": 7.310561656951904, "learning_rate": 4.447020202020203e-06, "loss": 6.605617523193359, "step": 11950 }, { "epoch": 0.11955, "grad_norm": 7.808162689208984, "learning_rate": 4.446767676767677e-06, "loss": 6.651712799072266, "step": 11955 }, { "epoch": 0.1196, "grad_norm": 7.8476738929748535, "learning_rate": 4.446515151515152e-06, "loss": 6.608367919921875, "step": 11960 }, { "epoch": 0.11965, "grad_norm": 5.923673629760742, "learning_rate": 4.446262626262627e-06, "loss": 6.7051025390625, "step": 11965 }, { "epoch": 0.1197, "grad_norm": 6.562834739685059, "learning_rate": 4.446010101010101e-06, "loss": 6.685700988769531, "step": 11970 }, { "epoch": 0.11975, "grad_norm": 7.240127086639404, "learning_rate": 4.445757575757576e-06, "loss": 6.6345672607421875, "step": 11975 }, { "epoch": 0.1198, "grad_norm": 6.063279151916504, "learning_rate": 4.4455050505050505e-06, "loss": 6.681693267822266, "step": 11980 }, { "epoch": 0.11985, "grad_norm": 7.512234210968018, "learning_rate": 4.445252525252526e-06, "loss": 6.641051483154297, "step": 11985 }, { "epoch": 0.1199, "grad_norm": 6.206341743469238, "learning_rate": 4.445000000000001e-06, "loss": 6.649131774902344, "step": 11990 }, { "epoch": 0.11995, "grad_norm": 7.6759772300720215, "learning_rate": 4.444747474747475e-06, "loss": 6.6482666015625, "step": 11995 }, { "epoch": 0.12, "grad_norm": 5.828083038330078, "learning_rate": 4.44449494949495e-06, "loss": 6.659750366210938, "step": 12000 }, { "epoch": 0.12005, "grad_norm": 10.237680435180664, "learning_rate": 4.4442424242424245e-06, "loss": 6.752980041503906, "step": 12005 }, { "epoch": 0.1201, "grad_norm": 6.018224239349365, "learning_rate": 4.443989898989899e-06, "loss": 6.684152984619141, "step": 12010 }, { "epoch": 0.12015, "grad_norm": 3.822580099105835, "learning_rate": 4.443737373737374e-06, "loss": 6.661377716064453, "step": 12015 }, { "epoch": 0.1202, "grad_norm": 4.100390434265137, "learning_rate": 4.443484848484848e-06, "loss": 6.60692138671875, "step": 12020 }, { "epoch": 0.12025, "grad_norm": 8.469488143920898, "learning_rate": 4.443232323232324e-06, "loss": 6.599698638916015, "step": 12025 }, { "epoch": 0.1203, "grad_norm": 7.663736343383789, "learning_rate": 4.4429797979797985e-06, "loss": 6.65811767578125, "step": 12030 }, { "epoch": 0.12035, "grad_norm": 7.562498092651367, "learning_rate": 4.442727272727273e-06, "loss": 6.6384635925292965, "step": 12035 }, { "epoch": 0.1204, "grad_norm": 7.32038688659668, "learning_rate": 4.442474747474748e-06, "loss": 6.6385650634765625, "step": 12040 }, { "epoch": 0.12045, "grad_norm": 6.187070846557617, "learning_rate": 4.442222222222222e-06, "loss": 6.576398468017578, "step": 12045 }, { "epoch": 0.1205, "grad_norm": 6.874589443206787, "learning_rate": 4.441969696969697e-06, "loss": 6.658609008789062, "step": 12050 }, { "epoch": 0.12055, "grad_norm": 7.506141662597656, "learning_rate": 4.441717171717172e-06, "loss": 6.571054840087891, "step": 12055 }, { "epoch": 0.1206, "grad_norm": 6.021216869354248, "learning_rate": 4.441464646464646e-06, "loss": 6.648954772949219, "step": 12060 }, { "epoch": 0.12065, "grad_norm": 9.796515464782715, "learning_rate": 4.441212121212122e-06, "loss": 6.743791198730468, "step": 12065 }, { "epoch": 0.1207, "grad_norm": 6.498764991760254, "learning_rate": 4.440959595959596e-06, "loss": 6.698743438720703, "step": 12070 }, { "epoch": 0.12075, "grad_norm": 7.1423726081848145, "learning_rate": 4.440707070707071e-06, "loss": 6.66296157836914, "step": 12075 }, { "epoch": 0.1208, "grad_norm": 5.8570027351379395, "learning_rate": 4.440454545454546e-06, "loss": 6.659297943115234, "step": 12080 }, { "epoch": 0.12085, "grad_norm": 6.949860572814941, "learning_rate": 4.440202020202021e-06, "loss": 6.620537567138672, "step": 12085 }, { "epoch": 0.1209, "grad_norm": 8.25634479522705, "learning_rate": 4.439949494949495e-06, "loss": 6.648139953613281, "step": 12090 }, { "epoch": 0.12095, "grad_norm": 5.866413116455078, "learning_rate": 4.4396969696969696e-06, "loss": 6.656912994384766, "step": 12095 }, { "epoch": 0.121, "grad_norm": 8.111223220825195, "learning_rate": 4.439444444444444e-06, "loss": 6.693880462646485, "step": 12100 }, { "epoch": 0.12105, "grad_norm": 9.190844535827637, "learning_rate": 4.43919191919192e-06, "loss": 6.510372924804687, "step": 12105 }, { "epoch": 0.1211, "grad_norm": 9.486174583435059, "learning_rate": 4.438939393939394e-06, "loss": 6.503070068359375, "step": 12110 }, { "epoch": 0.12115, "grad_norm": 8.201495170593262, "learning_rate": 4.438686868686869e-06, "loss": 6.673249816894531, "step": 12115 }, { "epoch": 0.1212, "grad_norm": 5.619077682495117, "learning_rate": 4.4384343434343436e-06, "loss": 6.624261474609375, "step": 12120 }, { "epoch": 0.12125, "grad_norm": 8.20879077911377, "learning_rate": 4.438181818181819e-06, "loss": 6.64574966430664, "step": 12125 }, { "epoch": 0.1213, "grad_norm": 7.750214099884033, "learning_rate": 4.437929292929294e-06, "loss": 6.645883178710937, "step": 12130 }, { "epoch": 0.12135, "grad_norm": 8.035916328430176, "learning_rate": 4.437676767676768e-06, "loss": 6.646780395507813, "step": 12135 }, { "epoch": 0.1214, "grad_norm": 6.070853233337402, "learning_rate": 4.437424242424243e-06, "loss": 6.615254974365234, "step": 12140 }, { "epoch": 0.12145, "grad_norm": 9.431777954101562, "learning_rate": 4.4371717171717176e-06, "loss": 6.672303771972656, "step": 12145 }, { "epoch": 0.1215, "grad_norm": 7.599648952484131, "learning_rate": 4.436919191919192e-06, "loss": 6.661110687255859, "step": 12150 }, { "epoch": 0.12155, "grad_norm": 7.767519474029541, "learning_rate": 4.436666666666667e-06, "loss": 6.676577758789063, "step": 12155 }, { "epoch": 0.1216, "grad_norm": 6.382675647735596, "learning_rate": 4.4364141414141415e-06, "loss": 6.645722961425781, "step": 12160 }, { "epoch": 0.12165, "grad_norm": 7.07301139831543, "learning_rate": 4.436161616161617e-06, "loss": 6.629798126220703, "step": 12165 }, { "epoch": 0.1217, "grad_norm": 5.670305252075195, "learning_rate": 4.4359090909090916e-06, "loss": 6.661373901367187, "step": 12170 }, { "epoch": 0.12175, "grad_norm": 7.340723991394043, "learning_rate": 4.435656565656566e-06, "loss": 6.6374259948730465, "step": 12175 }, { "epoch": 0.1218, "grad_norm": 6.339460372924805, "learning_rate": 4.435404040404041e-06, "loss": 6.648237609863282, "step": 12180 }, { "epoch": 0.12185, "grad_norm": 7.217440605163574, "learning_rate": 4.4351515151515155e-06, "loss": 6.628875732421875, "step": 12185 }, { "epoch": 0.1219, "grad_norm": 5.911125183105469, "learning_rate": 4.43489898989899e-06, "loss": 6.673748016357422, "step": 12190 }, { "epoch": 0.12195, "grad_norm": 8.736795425415039, "learning_rate": 4.434646464646465e-06, "loss": 6.678082275390625, "step": 12195 }, { "epoch": 0.122, "grad_norm": 6.857839107513428, "learning_rate": 4.43439393939394e-06, "loss": 6.585089874267578, "step": 12200 }, { "epoch": 0.12205, "grad_norm": 8.641898155212402, "learning_rate": 4.434141414141415e-06, "loss": 6.646121978759766, "step": 12205 }, { "epoch": 0.1221, "grad_norm": 5.431166172027588, "learning_rate": 4.4338888888888894e-06, "loss": 6.612884521484375, "step": 12210 }, { "epoch": 0.12215, "grad_norm": 6.868825435638428, "learning_rate": 4.433636363636364e-06, "loss": 6.6126152038574215, "step": 12215 }, { "epoch": 0.1222, "grad_norm": 6.38327169418335, "learning_rate": 4.433383838383839e-06, "loss": 6.592034912109375, "step": 12220 }, { "epoch": 0.12225, "grad_norm": 8.744927406311035, "learning_rate": 4.433131313131313e-06, "loss": 6.619538116455078, "step": 12225 }, { "epoch": 0.1223, "grad_norm": 7.746210098266602, "learning_rate": 4.432878787878788e-06, "loss": 6.618421936035157, "step": 12230 }, { "epoch": 0.12235, "grad_norm": 7.580576419830322, "learning_rate": 4.432626262626263e-06, "loss": 6.587223052978516, "step": 12235 }, { "epoch": 0.1224, "grad_norm": 9.400856971740723, "learning_rate": 4.432373737373738e-06, "loss": 6.710143280029297, "step": 12240 }, { "epoch": 0.12245, "grad_norm": 6.981375694274902, "learning_rate": 4.432121212121213e-06, "loss": 6.675993347167969, "step": 12245 }, { "epoch": 0.1225, "grad_norm": 8.844234466552734, "learning_rate": 4.431868686868687e-06, "loss": 6.5468284606933596, "step": 12250 }, { "epoch": 0.12255, "grad_norm": 8.11227035522461, "learning_rate": 4.431616161616162e-06, "loss": 6.632419586181641, "step": 12255 }, { "epoch": 0.1226, "grad_norm": 6.88632869720459, "learning_rate": 4.431363636363637e-06, "loss": 6.624640655517578, "step": 12260 }, { "epoch": 0.12265, "grad_norm": 7.080599784851074, "learning_rate": 4.431111111111111e-06, "loss": 6.6368865966796875, "step": 12265 }, { "epoch": 0.1227, "grad_norm": 7.134051322937012, "learning_rate": 4.430858585858586e-06, "loss": 6.661359405517578, "step": 12270 }, { "epoch": 0.12275, "grad_norm": 6.474223613739014, "learning_rate": 4.4306060606060605e-06, "loss": 6.767222595214844, "step": 12275 }, { "epoch": 0.1228, "grad_norm": 7.766777515411377, "learning_rate": 4.430353535353536e-06, "loss": 6.591732025146484, "step": 12280 }, { "epoch": 0.12285, "grad_norm": 7.16113805770874, "learning_rate": 4.430101010101011e-06, "loss": 6.656890106201172, "step": 12285 }, { "epoch": 0.1229, "grad_norm": 9.583966255187988, "learning_rate": 4.429848484848485e-06, "loss": 6.879824829101563, "step": 12290 }, { "epoch": 0.12295, "grad_norm": 9.517423629760742, "learning_rate": 4.42959595959596e-06, "loss": 6.602201843261719, "step": 12295 }, { "epoch": 0.123, "grad_norm": 5.747146129608154, "learning_rate": 4.429343434343435e-06, "loss": 6.620672607421875, "step": 12300 }, { "epoch": 0.12305, "grad_norm": 8.343412399291992, "learning_rate": 4.42909090909091e-06, "loss": 6.618527221679687, "step": 12305 }, { "epoch": 0.1231, "grad_norm": 5.71321964263916, "learning_rate": 4.428838383838384e-06, "loss": 6.663050842285156, "step": 12310 }, { "epoch": 0.12315, "grad_norm": 7.199941635131836, "learning_rate": 4.428585858585858e-06, "loss": 6.6380561828613285, "step": 12315 }, { "epoch": 0.1232, "grad_norm": 4.853968620300293, "learning_rate": 4.428333333333334e-06, "loss": 6.722941589355469, "step": 12320 }, { "epoch": 0.12325, "grad_norm": 6.74652624130249, "learning_rate": 4.4280808080808085e-06, "loss": 6.613443756103516, "step": 12325 }, { "epoch": 0.1233, "grad_norm": 7.306309700012207, "learning_rate": 4.427828282828283e-06, "loss": 6.663143920898437, "step": 12330 }, { "epoch": 0.12335, "grad_norm": 7.852407932281494, "learning_rate": 4.427575757575758e-06, "loss": 6.668875122070313, "step": 12335 }, { "epoch": 0.1234, "grad_norm": 9.298074722290039, "learning_rate": 4.427323232323233e-06, "loss": 6.625917053222656, "step": 12340 }, { "epoch": 0.12345, "grad_norm": 9.462701797485352, "learning_rate": 4.427070707070708e-06, "loss": 6.691188812255859, "step": 12345 }, { "epoch": 0.1235, "grad_norm": 9.513928413391113, "learning_rate": 4.4268181818181825e-06, "loss": 6.603678131103516, "step": 12350 }, { "epoch": 0.12355, "grad_norm": 5.003096103668213, "learning_rate": 4.426565656565657e-06, "loss": 6.619847106933594, "step": 12355 }, { "epoch": 0.1236, "grad_norm": 8.565526008605957, "learning_rate": 4.426313131313132e-06, "loss": 6.556051635742188, "step": 12360 }, { "epoch": 0.12365, "grad_norm": 5.429032325744629, "learning_rate": 4.426060606060606e-06, "loss": 6.689067077636719, "step": 12365 }, { "epoch": 0.1237, "grad_norm": 8.978614807128906, "learning_rate": 4.425808080808081e-06, "loss": 6.6162162780761715, "step": 12370 }, { "epoch": 0.12375, "grad_norm": 7.164863109588623, "learning_rate": 4.425555555555556e-06, "loss": 6.609568786621094, "step": 12375 }, { "epoch": 0.1238, "grad_norm": 6.471225738525391, "learning_rate": 4.425303030303031e-06, "loss": 6.673133850097656, "step": 12380 }, { "epoch": 0.12385, "grad_norm": 5.225468635559082, "learning_rate": 4.425050505050506e-06, "loss": 6.641944122314453, "step": 12385 }, { "epoch": 0.1239, "grad_norm": 6.548422336578369, "learning_rate": 4.42479797979798e-06, "loss": 6.622144317626953, "step": 12390 }, { "epoch": 0.12395, "grad_norm": 12.78561782836914, "learning_rate": 4.424545454545455e-06, "loss": 6.630912780761719, "step": 12395 }, { "epoch": 0.124, "grad_norm": 8.174867630004883, "learning_rate": 4.42429292929293e-06, "loss": 6.566593933105469, "step": 12400 }, { "epoch": 0.12405, "grad_norm": 6.9312214851379395, "learning_rate": 4.424040404040404e-06, "loss": 6.608587646484375, "step": 12405 }, { "epoch": 0.1241, "grad_norm": 7.813210964202881, "learning_rate": 4.423787878787879e-06, "loss": 6.631421661376953, "step": 12410 }, { "epoch": 0.12415, "grad_norm": 3.4682302474975586, "learning_rate": 4.4235353535353535e-06, "loss": 6.609188842773437, "step": 12415 }, { "epoch": 0.1242, "grad_norm": 7.337265491485596, "learning_rate": 4.423282828282829e-06, "loss": 6.617091369628906, "step": 12420 }, { "epoch": 0.12425, "grad_norm": 5.893402099609375, "learning_rate": 4.423030303030304e-06, "loss": 6.583802795410156, "step": 12425 }, { "epoch": 0.1243, "grad_norm": 7.616423606872559, "learning_rate": 4.422777777777778e-06, "loss": 6.605377197265625, "step": 12430 }, { "epoch": 0.12435, "grad_norm": 8.125720977783203, "learning_rate": 4.422525252525253e-06, "loss": 6.592890930175781, "step": 12435 }, { "epoch": 0.1244, "grad_norm": 8.013082504272461, "learning_rate": 4.4222727272727275e-06, "loss": 6.587590026855469, "step": 12440 }, { "epoch": 0.12445, "grad_norm": 6.299210071563721, "learning_rate": 4.422020202020202e-06, "loss": 6.584461975097656, "step": 12445 }, { "epoch": 0.1245, "grad_norm": 6.78801965713501, "learning_rate": 4.421767676767677e-06, "loss": 6.744377899169922, "step": 12450 }, { "epoch": 0.12455, "grad_norm": 7.512958526611328, "learning_rate": 4.421515151515151e-06, "loss": 6.508950805664062, "step": 12455 }, { "epoch": 0.1246, "grad_norm": 6.876133441925049, "learning_rate": 4.421262626262627e-06, "loss": 6.559793090820312, "step": 12460 }, { "epoch": 0.12465, "grad_norm": 7.147305011749268, "learning_rate": 4.4210101010101015e-06, "loss": 6.575061798095703, "step": 12465 }, { "epoch": 0.1247, "grad_norm": 4.269500732421875, "learning_rate": 4.420757575757576e-06, "loss": 6.578298187255859, "step": 12470 }, { "epoch": 0.12475, "grad_norm": 7.159173011779785, "learning_rate": 4.420505050505051e-06, "loss": 6.607083892822265, "step": 12475 }, { "epoch": 0.1248, "grad_norm": 10.041376113891602, "learning_rate": 4.420252525252525e-06, "loss": 6.557640075683594, "step": 12480 }, { "epoch": 0.12485, "grad_norm": 7.027968406677246, "learning_rate": 4.42e-06, "loss": 6.760285949707031, "step": 12485 }, { "epoch": 0.1249, "grad_norm": 7.417386054992676, "learning_rate": 4.419747474747475e-06, "loss": 6.6073768615722654, "step": 12490 }, { "epoch": 0.12495, "grad_norm": 5.643060207366943, "learning_rate": 4.419494949494949e-06, "loss": 6.626945495605469, "step": 12495 }, { "epoch": 0.125, "grad_norm": 5.586667060852051, "learning_rate": 4.419242424242425e-06, "loss": 6.586241912841797, "step": 12500 }, { "epoch": 0.12505, "grad_norm": 5.892370223999023, "learning_rate": 4.418989898989899e-06, "loss": 6.625193786621094, "step": 12505 }, { "epoch": 0.1251, "grad_norm": 6.440410614013672, "learning_rate": 4.418737373737374e-06, "loss": 6.6524711608886715, "step": 12510 }, { "epoch": 0.12515, "grad_norm": 5.779638290405273, "learning_rate": 4.418484848484849e-06, "loss": 6.596590423583985, "step": 12515 }, { "epoch": 0.1252, "grad_norm": 4.878487586975098, "learning_rate": 4.418232323232324e-06, "loss": 6.614450073242187, "step": 12520 }, { "epoch": 0.12525, "grad_norm": 10.206794738769531, "learning_rate": 4.417979797979799e-06, "loss": 6.653176116943359, "step": 12525 }, { "epoch": 0.1253, "grad_norm": 6.641039848327637, "learning_rate": 4.417727272727273e-06, "loss": 6.623502349853515, "step": 12530 }, { "epoch": 0.12535, "grad_norm": 4.900634288787842, "learning_rate": 4.417474747474747e-06, "loss": 6.589875793457031, "step": 12535 }, { "epoch": 0.1254, "grad_norm": 6.62321662902832, "learning_rate": 4.417222222222223e-06, "loss": 6.622923278808594, "step": 12540 }, { "epoch": 0.12545, "grad_norm": 6.860763072967529, "learning_rate": 4.416969696969697e-06, "loss": 6.550096130371093, "step": 12545 }, { "epoch": 0.1255, "grad_norm": 6.757787704467773, "learning_rate": 4.416717171717172e-06, "loss": 6.606890869140625, "step": 12550 }, { "epoch": 0.12555, "grad_norm": 5.761354923248291, "learning_rate": 4.4164646464646465e-06, "loss": 6.6196952819824215, "step": 12555 }, { "epoch": 0.1256, "grad_norm": 8.060144424438477, "learning_rate": 4.416212121212122e-06, "loss": 6.603024291992187, "step": 12560 }, { "epoch": 0.12565, "grad_norm": 4.574258327484131, "learning_rate": 4.415959595959597e-06, "loss": 6.577935791015625, "step": 12565 }, { "epoch": 0.1257, "grad_norm": 7.494865894317627, "learning_rate": 4.415707070707071e-06, "loss": 6.599676513671875, "step": 12570 }, { "epoch": 0.12575, "grad_norm": 7.7642502784729, "learning_rate": 4.415454545454546e-06, "loss": 6.575405120849609, "step": 12575 }, { "epoch": 0.1258, "grad_norm": 5.876504898071289, "learning_rate": 4.4152020202020205e-06, "loss": 6.620777893066406, "step": 12580 }, { "epoch": 0.12585, "grad_norm": 8.761871337890625, "learning_rate": 4.414949494949495e-06, "loss": 6.608722686767578, "step": 12585 }, { "epoch": 0.1259, "grad_norm": 6.383142471313477, "learning_rate": 4.41469696969697e-06, "loss": 6.654683685302734, "step": 12590 }, { "epoch": 0.12595, "grad_norm": 5.874724388122559, "learning_rate": 4.4144444444444444e-06, "loss": 6.5874580383300785, "step": 12595 }, { "epoch": 0.126, "grad_norm": 6.039971351623535, "learning_rate": 4.41419191919192e-06, "loss": 6.618856048583984, "step": 12600 }, { "epoch": 0.12605, "grad_norm": 5.681456089019775, "learning_rate": 4.4139393939393945e-06, "loss": 6.601901245117188, "step": 12605 }, { "epoch": 0.1261, "grad_norm": 6.7694993019104, "learning_rate": 4.413686868686869e-06, "loss": 6.564167785644531, "step": 12610 }, { "epoch": 0.12615, "grad_norm": 10.624802589416504, "learning_rate": 4.413434343434344e-06, "loss": 6.6367134094238285, "step": 12615 }, { "epoch": 0.1262, "grad_norm": 8.073686599731445, "learning_rate": 4.4131818181818184e-06, "loss": 6.515515899658203, "step": 12620 }, { "epoch": 0.12625, "grad_norm": 7.613341808319092, "learning_rate": 4.412929292929293e-06, "loss": 6.624018859863281, "step": 12625 }, { "epoch": 0.1263, "grad_norm": 6.098074913024902, "learning_rate": 4.412676767676768e-06, "loss": 6.598291015625, "step": 12630 }, { "epoch": 0.12635, "grad_norm": 7.839053630828857, "learning_rate": 4.412424242424243e-06, "loss": 6.589786529541016, "step": 12635 }, { "epoch": 0.1264, "grad_norm": 5.42978572845459, "learning_rate": 4.412171717171718e-06, "loss": 6.57183609008789, "step": 12640 }, { "epoch": 0.12645, "grad_norm": 7.431525707244873, "learning_rate": 4.4119191919191924e-06, "loss": 6.5600028991699215, "step": 12645 }, { "epoch": 0.1265, "grad_norm": 6.343501091003418, "learning_rate": 4.411666666666667e-06, "loss": 6.629693603515625, "step": 12650 }, { "epoch": 0.12655, "grad_norm": 6.990732669830322, "learning_rate": 4.411414141414142e-06, "loss": 6.664942932128906, "step": 12655 }, { "epoch": 0.1266, "grad_norm": 7.520793437957764, "learning_rate": 4.411161616161616e-06, "loss": 6.527961730957031, "step": 12660 }, { "epoch": 0.12665, "grad_norm": 7.496662139892578, "learning_rate": 4.410909090909091e-06, "loss": 6.802854919433594, "step": 12665 }, { "epoch": 0.1267, "grad_norm": 7.071206092834473, "learning_rate": 4.410656565656566e-06, "loss": 6.580714416503906, "step": 12670 }, { "epoch": 0.12675, "grad_norm": 6.568024635314941, "learning_rate": 4.410404040404041e-06, "loss": 6.624336242675781, "step": 12675 }, { "epoch": 0.1268, "grad_norm": 26.84260368347168, "learning_rate": 4.410151515151516e-06, "loss": 6.463529205322265, "step": 12680 }, { "epoch": 0.12685, "grad_norm": 5.697927474975586, "learning_rate": 4.40989898989899e-06, "loss": 6.549830627441406, "step": 12685 }, { "epoch": 0.1269, "grad_norm": 9.276164054870605, "learning_rate": 4.409646464646465e-06, "loss": 6.628563690185547, "step": 12690 }, { "epoch": 0.12695, "grad_norm": 4.908891201019287, "learning_rate": 4.4093939393939404e-06, "loss": 6.6826019287109375, "step": 12695 }, { "epoch": 0.127, "grad_norm": 7.830609321594238, "learning_rate": 4.409141414141414e-06, "loss": 6.596215057373047, "step": 12700 }, { "epoch": 0.12705, "grad_norm": 6.696679592132568, "learning_rate": 4.408888888888889e-06, "loss": 6.591298675537109, "step": 12705 }, { "epoch": 0.1271, "grad_norm": 7.296292781829834, "learning_rate": 4.4086363636363635e-06, "loss": 6.615792846679687, "step": 12710 }, { "epoch": 0.12715, "grad_norm": 7.801994323730469, "learning_rate": 4.408383838383839e-06, "loss": 6.610444641113281, "step": 12715 }, { "epoch": 0.1272, "grad_norm": 15.497446060180664, "learning_rate": 4.408131313131314e-06, "loss": 6.888043212890625, "step": 12720 }, { "epoch": 0.12725, "grad_norm": 6.213104724884033, "learning_rate": 4.407878787878788e-06, "loss": 6.599344635009766, "step": 12725 }, { "epoch": 0.1273, "grad_norm": 5.288925647735596, "learning_rate": 4.407626262626263e-06, "loss": 6.5864402770996096, "step": 12730 }, { "epoch": 0.12735, "grad_norm": 7.666492462158203, "learning_rate": 4.407373737373738e-06, "loss": 6.575246429443359, "step": 12735 }, { "epoch": 0.1274, "grad_norm": 6.724938869476318, "learning_rate": 4.407121212121213e-06, "loss": 6.5879058837890625, "step": 12740 }, { "epoch": 0.12745, "grad_norm": 6.928877830505371, "learning_rate": 4.4068686868686876e-06, "loss": 6.635107421875, "step": 12745 }, { "epoch": 0.1275, "grad_norm": 6.935092449188232, "learning_rate": 4.406616161616162e-06, "loss": 6.6074066162109375, "step": 12750 }, { "epoch": 0.12755, "grad_norm": 6.70556640625, "learning_rate": 4.406363636363637e-06, "loss": 6.6036323547363285, "step": 12755 }, { "epoch": 0.1276, "grad_norm": 7.489940643310547, "learning_rate": 4.4061111111111115e-06, "loss": 6.587263488769532, "step": 12760 }, { "epoch": 0.12765, "grad_norm": 4.819136619567871, "learning_rate": 4.405858585858586e-06, "loss": 6.581439208984375, "step": 12765 }, { "epoch": 0.1277, "grad_norm": 7.038593292236328, "learning_rate": 4.405606060606061e-06, "loss": 6.711682891845703, "step": 12770 }, { "epoch": 0.12775, "grad_norm": 7.531280517578125, "learning_rate": 4.405353535353536e-06, "loss": 6.627598571777344, "step": 12775 }, { "epoch": 0.1278, "grad_norm": 7.9522786140441895, "learning_rate": 4.405101010101011e-06, "loss": 6.623530578613281, "step": 12780 }, { "epoch": 0.12785, "grad_norm": 5.589216232299805, "learning_rate": 4.4048484848484855e-06, "loss": 6.635856628417969, "step": 12785 }, { "epoch": 0.1279, "grad_norm": 7.282034873962402, "learning_rate": 4.40459595959596e-06, "loss": 6.610883331298828, "step": 12790 }, { "epoch": 0.12795, "grad_norm": 10.175748825073242, "learning_rate": 4.404343434343435e-06, "loss": 6.667753601074219, "step": 12795 }, { "epoch": 0.128, "grad_norm": 6.393523693084717, "learning_rate": 4.404090909090909e-06, "loss": 6.633856201171875, "step": 12800 }, { "epoch": 0.12805, "grad_norm": 7.585989952087402, "learning_rate": 4.403838383838384e-06, "loss": 6.643668365478516, "step": 12805 }, { "epoch": 0.1281, "grad_norm": 6.162771701812744, "learning_rate": 4.403585858585859e-06, "loss": 6.5789939880371096, "step": 12810 }, { "epoch": 0.12815, "grad_norm": 4.587239742279053, "learning_rate": 4.403333333333334e-06, "loss": 6.527218627929687, "step": 12815 }, { "epoch": 0.1282, "grad_norm": 8.336813926696777, "learning_rate": 4.403080808080809e-06, "loss": 6.583148193359375, "step": 12820 }, { "epoch": 0.12825, "grad_norm": 7.570258140563965, "learning_rate": 4.402828282828283e-06, "loss": 6.553457641601563, "step": 12825 }, { "epoch": 0.1283, "grad_norm": 7.336277961730957, "learning_rate": 4.402575757575758e-06, "loss": 6.610113525390625, "step": 12830 }, { "epoch": 0.12835, "grad_norm": 7.090346336364746, "learning_rate": 4.402323232323233e-06, "loss": 6.573282623291016, "step": 12835 }, { "epoch": 0.1284, "grad_norm": 8.756439208984375, "learning_rate": 4.402070707070707e-06, "loss": 6.635145568847657, "step": 12840 }, { "epoch": 0.12845, "grad_norm": 5.032799243927002, "learning_rate": 4.401818181818182e-06, "loss": 6.5738883972167965, "step": 12845 }, { "epoch": 0.1285, "grad_norm": 5.437840938568115, "learning_rate": 4.4015656565656565e-06, "loss": 6.618701171875, "step": 12850 }, { "epoch": 0.12855, "grad_norm": 7.423827171325684, "learning_rate": 4.401313131313132e-06, "loss": 6.712397766113281, "step": 12855 }, { "epoch": 0.1286, "grad_norm": 4.9987287521362305, "learning_rate": 4.401060606060607e-06, "loss": 6.5731658935546875, "step": 12860 }, { "epoch": 0.12865, "grad_norm": 6.1990180015563965, "learning_rate": 4.400808080808081e-06, "loss": 6.565669250488281, "step": 12865 }, { "epoch": 0.1287, "grad_norm": 7.156524658203125, "learning_rate": 4.400555555555556e-06, "loss": 6.585256958007813, "step": 12870 }, { "epoch": 0.12875, "grad_norm": 5.346681118011475, "learning_rate": 4.4003030303030305e-06, "loss": 6.608418273925781, "step": 12875 }, { "epoch": 0.1288, "grad_norm": 8.012805938720703, "learning_rate": 4.400050505050505e-06, "loss": 6.546559906005859, "step": 12880 }, { "epoch": 0.12885, "grad_norm": 6.4220075607299805, "learning_rate": 4.39979797979798e-06, "loss": 6.568792724609375, "step": 12885 }, { "epoch": 0.1289, "grad_norm": 7.722383975982666, "learning_rate": 4.399545454545454e-06, "loss": 6.6194618225097654, "step": 12890 }, { "epoch": 0.12895, "grad_norm": 6.677108287811279, "learning_rate": 4.39929292929293e-06, "loss": 6.629985809326172, "step": 12895 }, { "epoch": 0.129, "grad_norm": 6.774127006530762, "learning_rate": 4.3990404040404045e-06, "loss": 6.546192169189453, "step": 12900 }, { "epoch": 0.12905, "grad_norm": 15.723640441894531, "learning_rate": 4.398787878787879e-06, "loss": 6.60009994506836, "step": 12905 }, { "epoch": 0.1291, "grad_norm": 7.306577682495117, "learning_rate": 4.398535353535354e-06, "loss": 6.585028076171875, "step": 12910 }, { "epoch": 0.12915, "grad_norm": 5.321139335632324, "learning_rate": 4.398282828282829e-06, "loss": 6.58838119506836, "step": 12915 }, { "epoch": 0.1292, "grad_norm": 5.884561061859131, "learning_rate": 4.398030303030303e-06, "loss": 6.575692749023437, "step": 12920 }, { "epoch": 0.12925, "grad_norm": 7.3779778480529785, "learning_rate": 4.397777777777778e-06, "loss": 6.769737243652344, "step": 12925 }, { "epoch": 0.1293, "grad_norm": 6.844476699829102, "learning_rate": 4.397525252525252e-06, "loss": 6.523384094238281, "step": 12930 }, { "epoch": 0.12935, "grad_norm": 5.474123954772949, "learning_rate": 4.397272727272728e-06, "loss": 6.542234802246094, "step": 12935 }, { "epoch": 0.1294, "grad_norm": 5.591811656951904, "learning_rate": 4.397020202020202e-06, "loss": 6.5532989501953125, "step": 12940 }, { "epoch": 0.12945, "grad_norm": 6.812725067138672, "learning_rate": 4.396767676767677e-06, "loss": 6.547698211669922, "step": 12945 }, { "epoch": 0.1295, "grad_norm": 5.4568657875061035, "learning_rate": 4.396515151515152e-06, "loss": 6.565674591064453, "step": 12950 }, { "epoch": 0.12955, "grad_norm": 7.566133499145508, "learning_rate": 4.396262626262627e-06, "loss": 6.575655364990235, "step": 12955 }, { "epoch": 0.1296, "grad_norm": 3.689892292022705, "learning_rate": 4.396010101010102e-06, "loss": 6.546958160400391, "step": 12960 }, { "epoch": 0.12965, "grad_norm": 6.145777225494385, "learning_rate": 4.395757575757576e-06, "loss": 6.625157165527344, "step": 12965 }, { "epoch": 0.1297, "grad_norm": 5.322461128234863, "learning_rate": 4.395505050505051e-06, "loss": 6.574761962890625, "step": 12970 }, { "epoch": 0.12975, "grad_norm": 4.9474711418151855, "learning_rate": 4.395252525252526e-06, "loss": 6.590449523925781, "step": 12975 }, { "epoch": 0.1298, "grad_norm": 9.711262702941895, "learning_rate": 4.395e-06, "loss": 6.580953979492188, "step": 12980 }, { "epoch": 0.12985, "grad_norm": 4.619517803192139, "learning_rate": 4.394747474747475e-06, "loss": 6.574144744873047, "step": 12985 }, { "epoch": 0.1299, "grad_norm": 6.642370223999023, "learning_rate": 4.3944949494949495e-06, "loss": 6.826264953613281, "step": 12990 }, { "epoch": 0.12995, "grad_norm": 6.686898231506348, "learning_rate": 4.394242424242425e-06, "loss": 6.681038665771484, "step": 12995 }, { "epoch": 0.13, "grad_norm": 4.450437545776367, "learning_rate": 4.3939898989899e-06, "loss": 6.555269622802735, "step": 13000 }, { "epoch": 0.13005, "grad_norm": 7.127375602722168, "learning_rate": 4.393737373737374e-06, "loss": 6.637094116210937, "step": 13005 }, { "epoch": 0.1301, "grad_norm": 5.599140167236328, "learning_rate": 4.393484848484849e-06, "loss": 6.57909927368164, "step": 13010 }, { "epoch": 0.13015, "grad_norm": 4.385603427886963, "learning_rate": 4.3932323232323235e-06, "loss": 6.583733367919922, "step": 13015 }, { "epoch": 0.1302, "grad_norm": 6.767740726470947, "learning_rate": 4.392979797979798e-06, "loss": 6.556077575683593, "step": 13020 }, { "epoch": 0.13025, "grad_norm": 5.749100208282471, "learning_rate": 4.392727272727273e-06, "loss": 6.562895202636719, "step": 13025 }, { "epoch": 0.1303, "grad_norm": 8.352856636047363, "learning_rate": 4.392474747474747e-06, "loss": 6.786083984375, "step": 13030 }, { "epoch": 0.13035, "grad_norm": 6.729434967041016, "learning_rate": 4.392222222222223e-06, "loss": 6.577222442626953, "step": 13035 }, { "epoch": 0.1304, "grad_norm": 12.371199607849121, "learning_rate": 4.3919696969696975e-06, "loss": 6.68060531616211, "step": 13040 }, { "epoch": 0.13045, "grad_norm": 7.77764892578125, "learning_rate": 4.391717171717172e-06, "loss": 6.6484527587890625, "step": 13045 }, { "epoch": 0.1305, "grad_norm": 7.185997486114502, "learning_rate": 4.391464646464647e-06, "loss": 6.610094451904297, "step": 13050 }, { "epoch": 0.13055, "grad_norm": 5.922632217407227, "learning_rate": 4.391212121212121e-06, "loss": 6.552619171142578, "step": 13055 }, { "epoch": 0.1306, "grad_norm": 5.4697489738464355, "learning_rate": 4.390959595959596e-06, "loss": 6.601950073242188, "step": 13060 }, { "epoch": 0.13065, "grad_norm": 7.527877330780029, "learning_rate": 4.390707070707071e-06, "loss": 6.530242919921875, "step": 13065 }, { "epoch": 0.1307, "grad_norm": 6.332054615020752, "learning_rate": 4.390454545454546e-06, "loss": 6.5843650817871096, "step": 13070 }, { "epoch": 0.13075, "grad_norm": 6.370695114135742, "learning_rate": 4.390202020202021e-06, "loss": 6.588865661621094, "step": 13075 }, { "epoch": 0.1308, "grad_norm": 4.335629940032959, "learning_rate": 4.389949494949495e-06, "loss": 6.467770385742187, "step": 13080 }, { "epoch": 0.13085, "grad_norm": 4.688568592071533, "learning_rate": 4.38969696969697e-06, "loss": 6.533481597900391, "step": 13085 }, { "epoch": 0.1309, "grad_norm": 5.448612689971924, "learning_rate": 4.389444444444445e-06, "loss": 6.604885864257812, "step": 13090 }, { "epoch": 0.13095, "grad_norm": 4.982358932495117, "learning_rate": 4.389191919191919e-06, "loss": 6.5724853515625, "step": 13095 }, { "epoch": 0.131, "grad_norm": 6.2866692543029785, "learning_rate": 4.388939393939394e-06, "loss": 6.552033996582031, "step": 13100 }, { "epoch": 0.13105, "grad_norm": 7.841899871826172, "learning_rate": 4.3886868686868686e-06, "loss": 6.601052856445312, "step": 13105 }, { "epoch": 0.1311, "grad_norm": 5.6587982177734375, "learning_rate": 4.388434343434344e-06, "loss": 6.555398559570312, "step": 13110 }, { "epoch": 0.13115, "grad_norm": 6.541658401489258, "learning_rate": 4.388181818181819e-06, "loss": 6.547983551025391, "step": 13115 }, { "epoch": 0.1312, "grad_norm": 6.143819332122803, "learning_rate": 4.387929292929293e-06, "loss": 6.615617370605468, "step": 13120 }, { "epoch": 0.13125, "grad_norm": 6.301352024078369, "learning_rate": 4.387676767676768e-06, "loss": 6.595268249511719, "step": 13125 }, { "epoch": 0.1313, "grad_norm": 4.854258060455322, "learning_rate": 4.387424242424243e-06, "loss": 6.524568176269531, "step": 13130 }, { "epoch": 0.13135, "grad_norm": 6.151346683502197, "learning_rate": 4.387171717171718e-06, "loss": 6.472438812255859, "step": 13135 }, { "epoch": 0.1314, "grad_norm": 4.431459903717041, "learning_rate": 4.386919191919192e-06, "loss": 6.597514343261719, "step": 13140 }, { "epoch": 0.13145, "grad_norm": 5.507728576660156, "learning_rate": 4.3866666666666665e-06, "loss": 6.638600158691406, "step": 13145 }, { "epoch": 0.1315, "grad_norm": 6.143698215484619, "learning_rate": 4.386414141414142e-06, "loss": 6.5630653381347654, "step": 13150 }, { "epoch": 0.13155, "grad_norm": 9.06602954864502, "learning_rate": 4.3861616161616166e-06, "loss": 6.575822448730468, "step": 13155 }, { "epoch": 0.1316, "grad_norm": 6.003904819488525, "learning_rate": 4.385909090909091e-06, "loss": 6.548851013183594, "step": 13160 }, { "epoch": 0.13165, "grad_norm": 6.66605806350708, "learning_rate": 4.385656565656566e-06, "loss": 6.556060028076172, "step": 13165 }, { "epoch": 0.1317, "grad_norm": 5.614581108093262, "learning_rate": 4.385404040404041e-06, "loss": 6.5536949157714846, "step": 13170 }, { "epoch": 0.13175, "grad_norm": 5.959263324737549, "learning_rate": 4.385151515151516e-06, "loss": 6.5476844787597654, "step": 13175 }, { "epoch": 0.1318, "grad_norm": 7.963070869445801, "learning_rate": 4.3848989898989906e-06, "loss": 6.583737182617187, "step": 13180 }, { "epoch": 0.13185, "grad_norm": 16.012292861938477, "learning_rate": 4.384646464646465e-06, "loss": 6.7764442443847654, "step": 13185 }, { "epoch": 0.1319, "grad_norm": 18.817781448364258, "learning_rate": 4.38439393939394e-06, "loss": 6.875830078125, "step": 13190 }, { "epoch": 0.13195, "grad_norm": 4.896108150482178, "learning_rate": 4.3841414141414144e-06, "loss": 6.605316162109375, "step": 13195 }, { "epoch": 0.132, "grad_norm": 5.910688877105713, "learning_rate": 4.383888888888889e-06, "loss": 6.563930511474609, "step": 13200 }, { "epoch": 0.13205, "grad_norm": 5.109065055847168, "learning_rate": 4.383636363636364e-06, "loss": 6.5531867980957035, "step": 13205 }, { "epoch": 0.1321, "grad_norm": 4.942147731781006, "learning_rate": 4.383383838383839e-06, "loss": 6.587897491455078, "step": 13210 }, { "epoch": 0.13215, "grad_norm": 6.3470916748046875, "learning_rate": 4.383131313131314e-06, "loss": 6.572882843017578, "step": 13215 }, { "epoch": 0.1322, "grad_norm": 6.6825852394104, "learning_rate": 4.3828787878787884e-06, "loss": 6.484651947021485, "step": 13220 }, { "epoch": 0.13225, "grad_norm": 5.534315586090088, "learning_rate": 4.382626262626263e-06, "loss": 6.635595703125, "step": 13225 }, { "epoch": 0.1323, "grad_norm": 48.43961715698242, "learning_rate": 4.382373737373738e-06, "loss": 7.513655090332032, "step": 13230 }, { "epoch": 0.13235, "grad_norm": 8.966623306274414, "learning_rate": 4.382121212121212e-06, "loss": 6.666837310791015, "step": 13235 }, { "epoch": 0.1324, "grad_norm": 5.94663143157959, "learning_rate": 4.381868686868687e-06, "loss": 6.598735809326172, "step": 13240 }, { "epoch": 0.13245, "grad_norm": 8.0265474319458, "learning_rate": 4.381616161616162e-06, "loss": 6.598406219482422, "step": 13245 }, { "epoch": 0.1325, "grad_norm": 7.211330413818359, "learning_rate": 4.381363636363637e-06, "loss": 6.5505523681640625, "step": 13250 }, { "epoch": 0.13255, "grad_norm": 5.78046989440918, "learning_rate": 4.381111111111112e-06, "loss": 6.562543487548828, "step": 13255 }, { "epoch": 0.1326, "grad_norm": 5.83360481262207, "learning_rate": 4.380858585858586e-06, "loss": 6.555998992919922, "step": 13260 }, { "epoch": 0.13265, "grad_norm": 6.59598445892334, "learning_rate": 4.380606060606061e-06, "loss": 6.564450073242187, "step": 13265 }, { "epoch": 0.1327, "grad_norm": 19.441951751708984, "learning_rate": 4.380353535353536e-06, "loss": 6.264900207519531, "step": 13270 }, { "epoch": 0.13275, "grad_norm": 9.619547843933105, "learning_rate": 4.38010101010101e-06, "loss": 5.6240486145019535, "step": 13275 }, { "epoch": 0.1328, "grad_norm": 11.320345878601074, "learning_rate": 4.379848484848485e-06, "loss": 5.4317577362060545, "step": 13280 }, { "epoch": 0.13285, "grad_norm": 10.32242488861084, "learning_rate": 4.3795959595959595e-06, "loss": 5.419979858398437, "step": 13285 }, { "epoch": 0.1329, "grad_norm": 12.549286842346191, "learning_rate": 4.379343434343435e-06, "loss": 5.459717559814453, "step": 13290 }, { "epoch": 0.13295, "grad_norm": 11.920552253723145, "learning_rate": 4.37909090909091e-06, "loss": 5.293944549560547, "step": 13295 }, { "epoch": 0.133, "grad_norm": 7.661688804626465, "learning_rate": 4.378838383838384e-06, "loss": 5.363082504272461, "step": 13300 }, { "epoch": 0.13305, "grad_norm": 9.667998313903809, "learning_rate": 4.378585858585859e-06, "loss": 5.3594818115234375, "step": 13305 }, { "epoch": 0.1331, "grad_norm": 11.059600830078125, "learning_rate": 4.3783333333333335e-06, "loss": 5.2897697448730465, "step": 13310 }, { "epoch": 0.13315, "grad_norm": 22.217756271362305, "learning_rate": 4.378080808080808e-06, "loss": 5.914235687255859, "step": 13315 }, { "epoch": 0.1332, "grad_norm": 6.101000785827637, "learning_rate": 4.377828282828283e-06, "loss": 6.808040618896484, "step": 13320 }, { "epoch": 0.13325, "grad_norm": 15.003532409667969, "learning_rate": 4.377575757575757e-06, "loss": 6.637843322753906, "step": 13325 }, { "epoch": 0.1333, "grad_norm": 6.24652099609375, "learning_rate": 4.377323232323233e-06, "loss": 6.570067596435547, "step": 13330 }, { "epoch": 0.13335, "grad_norm": 6.918397426605225, "learning_rate": 4.3770707070707075e-06, "loss": 6.603662109375, "step": 13335 }, { "epoch": 0.1334, "grad_norm": 10.585373878479004, "learning_rate": 4.376818181818182e-06, "loss": 6.606230926513672, "step": 13340 }, { "epoch": 0.13345, "grad_norm": 7.795679569244385, "learning_rate": 4.376565656565657e-06, "loss": 6.594889831542969, "step": 13345 }, { "epoch": 0.1335, "grad_norm": 6.871470928192139, "learning_rate": 4.376313131313132e-06, "loss": 6.606973266601562, "step": 13350 }, { "epoch": 0.13355, "grad_norm": 6.673295497894287, "learning_rate": 4.376060606060607e-06, "loss": 6.56099853515625, "step": 13355 }, { "epoch": 0.1336, "grad_norm": 4.907089710235596, "learning_rate": 4.3758080808080815e-06, "loss": 6.5595947265625, "step": 13360 }, { "epoch": 0.13365, "grad_norm": 13.015910148620605, "learning_rate": 4.375555555555555e-06, "loss": 6.668220520019531, "step": 13365 }, { "epoch": 0.1337, "grad_norm": 6.62224817276001, "learning_rate": 4.375303030303031e-06, "loss": 6.580960083007812, "step": 13370 }, { "epoch": 0.13375, "grad_norm": 5.373807430267334, "learning_rate": 4.375050505050505e-06, "loss": 6.649559783935547, "step": 13375 }, { "epoch": 0.1338, "grad_norm": 6.667301177978516, "learning_rate": 4.37479797979798e-06, "loss": 6.65179672241211, "step": 13380 }, { "epoch": 0.13385, "grad_norm": 7.2569475173950195, "learning_rate": 4.374545454545455e-06, "loss": 6.611940002441406, "step": 13385 }, { "epoch": 0.1339, "grad_norm": 4.814261436462402, "learning_rate": 4.37429292929293e-06, "loss": 6.521966552734375, "step": 13390 }, { "epoch": 0.13395, "grad_norm": 8.186121940612793, "learning_rate": 4.374040404040405e-06, "loss": 6.5525634765625, "step": 13395 }, { "epoch": 0.134, "grad_norm": 6.1510844230651855, "learning_rate": 4.373787878787879e-06, "loss": 6.566693115234375, "step": 13400 }, { "epoch": 0.13405, "grad_norm": 7.397584438323975, "learning_rate": 4.373535353535354e-06, "loss": 6.541569519042969, "step": 13405 }, { "epoch": 0.1341, "grad_norm": 6.555458068847656, "learning_rate": 4.373282828282829e-06, "loss": 6.522840118408203, "step": 13410 }, { "epoch": 0.13415, "grad_norm": 7.875580310821533, "learning_rate": 4.373030303030303e-06, "loss": 6.558406066894531, "step": 13415 }, { "epoch": 0.1342, "grad_norm": 7.2556939125061035, "learning_rate": 4.372777777777778e-06, "loss": 6.629817199707031, "step": 13420 }, { "epoch": 0.13425, "grad_norm": 6.456410884857178, "learning_rate": 4.3725252525252525e-06, "loss": 6.5772758483886715, "step": 13425 }, { "epoch": 0.1343, "grad_norm": 7.533398628234863, "learning_rate": 4.372272727272728e-06, "loss": 6.564890289306641, "step": 13430 }, { "epoch": 0.13435, "grad_norm": 5.017254829406738, "learning_rate": 4.372020202020203e-06, "loss": 6.579578399658203, "step": 13435 }, { "epoch": 0.1344, "grad_norm": 5.904133319854736, "learning_rate": 4.371767676767677e-06, "loss": 6.537528991699219, "step": 13440 }, { "epoch": 0.13445, "grad_norm": 6.857013702392578, "learning_rate": 4.371515151515152e-06, "loss": 6.626432037353515, "step": 13445 }, { "epoch": 0.1345, "grad_norm": 4.555907726287842, "learning_rate": 4.3712626262626265e-06, "loss": 6.557879638671875, "step": 13450 }, { "epoch": 0.13455, "grad_norm": 5.749830722808838, "learning_rate": 4.371010101010101e-06, "loss": 6.554874420166016, "step": 13455 }, { "epoch": 0.1346, "grad_norm": 7.085402488708496, "learning_rate": 4.370757575757576e-06, "loss": 6.618862152099609, "step": 13460 }, { "epoch": 0.13465, "grad_norm": 7.899674892425537, "learning_rate": 4.370505050505051e-06, "loss": 6.584581756591797, "step": 13465 }, { "epoch": 0.1347, "grad_norm": 5.793210029602051, "learning_rate": 4.370252525252526e-06, "loss": 6.544436645507813, "step": 13470 }, { "epoch": 0.13475, "grad_norm": 5.781255722045898, "learning_rate": 4.3700000000000005e-06, "loss": 6.542974853515625, "step": 13475 }, { "epoch": 0.1348, "grad_norm": 5.642959117889404, "learning_rate": 4.369747474747475e-06, "loss": 6.607247161865234, "step": 13480 }, { "epoch": 0.13485, "grad_norm": 5.0881242752075195, "learning_rate": 4.36949494949495e-06, "loss": 6.552019500732422, "step": 13485 }, { "epoch": 0.1349, "grad_norm": 5.804238796234131, "learning_rate": 4.369242424242424e-06, "loss": 6.604991912841797, "step": 13490 }, { "epoch": 0.13495, "grad_norm": 7.616212368011475, "learning_rate": 4.368989898989899e-06, "loss": 6.495879364013672, "step": 13495 }, { "epoch": 0.135, "grad_norm": 19.99734115600586, "learning_rate": 4.368737373737374e-06, "loss": 6.515738677978516, "step": 13500 }, { "epoch": 0.13505, "grad_norm": 6.203067302703857, "learning_rate": 4.368484848484849e-06, "loss": 6.515697479248047, "step": 13505 }, { "epoch": 0.1351, "grad_norm": 4.324533462524414, "learning_rate": 4.368232323232324e-06, "loss": 6.565006256103516, "step": 13510 }, { "epoch": 0.13515, "grad_norm": 8.419879913330078, "learning_rate": 4.367979797979798e-06, "loss": 6.5975196838378904, "step": 13515 }, { "epoch": 0.1352, "grad_norm": 5.7902140617370605, "learning_rate": 4.367727272727273e-06, "loss": 6.631572723388672, "step": 13520 }, { "epoch": 0.13525, "grad_norm": 6.758319854736328, "learning_rate": 4.3674747474747485e-06, "loss": 6.573725891113281, "step": 13525 }, { "epoch": 0.1353, "grad_norm": 5.251282691955566, "learning_rate": 4.367222222222222e-06, "loss": 6.5599723815917965, "step": 13530 }, { "epoch": 0.13535, "grad_norm": 5.6779632568359375, "learning_rate": 4.366969696969697e-06, "loss": 6.612477111816406, "step": 13535 }, { "epoch": 0.1354, "grad_norm": 3.4199399948120117, "learning_rate": 4.3667171717171716e-06, "loss": 6.575975036621093, "step": 13540 }, { "epoch": 0.13545, "grad_norm": 5.370236873626709, "learning_rate": 4.366464646464647e-06, "loss": 6.550260162353515, "step": 13545 }, { "epoch": 0.1355, "grad_norm": 5.245161056518555, "learning_rate": 4.366212121212122e-06, "loss": 6.619131469726563, "step": 13550 }, { "epoch": 0.13555, "grad_norm": 6.620304584503174, "learning_rate": 4.365959595959596e-06, "loss": 6.539714050292969, "step": 13555 }, { "epoch": 0.1356, "grad_norm": 4.4160871505737305, "learning_rate": 4.365707070707071e-06, "loss": 6.608586120605469, "step": 13560 }, { "epoch": 0.13565, "grad_norm": 5.510900020599365, "learning_rate": 4.365454545454546e-06, "loss": 6.583766937255859, "step": 13565 }, { "epoch": 0.1357, "grad_norm": 6.635898113250732, "learning_rate": 4.365202020202021e-06, "loss": 6.594246673583984, "step": 13570 }, { "epoch": 0.13575, "grad_norm": 4.3367438316345215, "learning_rate": 4.364949494949496e-06, "loss": 6.5484153747558596, "step": 13575 }, { "epoch": 0.1358, "grad_norm": 7.074277877807617, "learning_rate": 4.36469696969697e-06, "loss": 6.5746620178222654, "step": 13580 }, { "epoch": 0.13585, "grad_norm": 5.021775722503662, "learning_rate": 4.364444444444445e-06, "loss": 6.572523498535157, "step": 13585 }, { "epoch": 0.1359, "grad_norm": 5.077556133270264, "learning_rate": 4.3641919191919195e-06, "loss": 6.563087463378906, "step": 13590 }, { "epoch": 0.13595, "grad_norm": 6.487242698669434, "learning_rate": 4.363939393939394e-06, "loss": 6.565785217285156, "step": 13595 }, { "epoch": 0.136, "grad_norm": 4.688015460968018, "learning_rate": 4.363686868686869e-06, "loss": 6.537853240966797, "step": 13600 }, { "epoch": 0.13605, "grad_norm": 5.344944000244141, "learning_rate": 4.363434343434344e-06, "loss": 6.596098327636719, "step": 13605 }, { "epoch": 0.1361, "grad_norm": 6.854960918426514, "learning_rate": 4.363181818181819e-06, "loss": 6.526051330566406, "step": 13610 }, { "epoch": 0.13615, "grad_norm": 8.835715293884277, "learning_rate": 4.3629292929292935e-06, "loss": 6.582119750976562, "step": 13615 }, { "epoch": 0.1362, "grad_norm": 6.784730434417725, "learning_rate": 4.362676767676768e-06, "loss": 6.550890350341797, "step": 13620 }, { "epoch": 0.13625, "grad_norm": 14.88840103149414, "learning_rate": 4.362424242424243e-06, "loss": 6.64081039428711, "step": 13625 }, { "epoch": 0.1363, "grad_norm": 6.062664985656738, "learning_rate": 4.3621717171717174e-06, "loss": 6.587320709228516, "step": 13630 }, { "epoch": 0.13635, "grad_norm": 5.945765972137451, "learning_rate": 4.361919191919192e-06, "loss": 6.554399871826172, "step": 13635 }, { "epoch": 0.1364, "grad_norm": 6.985334396362305, "learning_rate": 4.361666666666667e-06, "loss": 6.515711975097656, "step": 13640 }, { "epoch": 0.13645, "grad_norm": 6.991008758544922, "learning_rate": 4.361414141414142e-06, "loss": 6.512911987304688, "step": 13645 }, { "epoch": 0.1365, "grad_norm": 6.314210414886475, "learning_rate": 4.361161616161617e-06, "loss": 6.515476226806641, "step": 13650 }, { "epoch": 0.13655, "grad_norm": 5.160733222961426, "learning_rate": 4.3609090909090914e-06, "loss": 6.569754028320313, "step": 13655 }, { "epoch": 0.1366, "grad_norm": 5.614889621734619, "learning_rate": 4.360656565656566e-06, "loss": 6.597878265380859, "step": 13660 }, { "epoch": 0.13665, "grad_norm": 7.286579132080078, "learning_rate": 4.360404040404041e-06, "loss": 6.598075103759766, "step": 13665 }, { "epoch": 0.1367, "grad_norm": 5.781999111175537, "learning_rate": 4.360151515151515e-06, "loss": 6.562397766113281, "step": 13670 }, { "epoch": 0.13675, "grad_norm": 4.151571750640869, "learning_rate": 4.35989898989899e-06, "loss": 6.546784210205078, "step": 13675 }, { "epoch": 0.1368, "grad_norm": 4.307172775268555, "learning_rate": 4.359646464646465e-06, "loss": 6.5684967041015625, "step": 13680 }, { "epoch": 0.13685, "grad_norm": 4.636131286621094, "learning_rate": 4.35939393939394e-06, "loss": 6.508258056640625, "step": 13685 }, { "epoch": 0.1369, "grad_norm": 5.407622337341309, "learning_rate": 4.359141414141415e-06, "loss": 6.531399536132812, "step": 13690 }, { "epoch": 0.13695, "grad_norm": 7.6680450439453125, "learning_rate": 4.358888888888889e-06, "loss": 6.560820007324219, "step": 13695 }, { "epoch": 0.137, "grad_norm": 11.78183364868164, "learning_rate": 4.358636363636364e-06, "loss": 6.413780975341797, "step": 13700 }, { "epoch": 0.13705, "grad_norm": 3.590447187423706, "learning_rate": 4.358383838383839e-06, "loss": 6.540304565429688, "step": 13705 }, { "epoch": 0.1371, "grad_norm": 7.167390823364258, "learning_rate": 4.358131313131313e-06, "loss": 6.577391815185547, "step": 13710 }, { "epoch": 0.13715, "grad_norm": 6.223818302154541, "learning_rate": 4.357878787878788e-06, "loss": 6.50611572265625, "step": 13715 }, { "epoch": 0.1372, "grad_norm": 7.719262599945068, "learning_rate": 4.3576262626262625e-06, "loss": 6.550630950927735, "step": 13720 }, { "epoch": 0.13725, "grad_norm": 6.653991222381592, "learning_rate": 4.357373737373738e-06, "loss": 6.5149482727050785, "step": 13725 }, { "epoch": 0.1373, "grad_norm": 5.933197498321533, "learning_rate": 4.357121212121213e-06, "loss": 6.540068817138672, "step": 13730 }, { "epoch": 0.13735, "grad_norm": 4.953078746795654, "learning_rate": 4.356868686868687e-06, "loss": 6.541503143310547, "step": 13735 }, { "epoch": 0.1374, "grad_norm": 7.403383255004883, "learning_rate": 4.356616161616162e-06, "loss": 6.515544891357422, "step": 13740 }, { "epoch": 0.13745, "grad_norm": 5.124805450439453, "learning_rate": 4.356363636363637e-06, "loss": 6.510807800292969, "step": 13745 }, { "epoch": 0.1375, "grad_norm": 5.604551315307617, "learning_rate": 4.356111111111111e-06, "loss": 6.536998748779297, "step": 13750 }, { "epoch": 0.13755, "grad_norm": 5.601884841918945, "learning_rate": 4.355858585858586e-06, "loss": 6.510617065429687, "step": 13755 }, { "epoch": 0.1376, "grad_norm": 7.024634838104248, "learning_rate": 4.35560606060606e-06, "loss": 6.476402282714844, "step": 13760 }, { "epoch": 0.13765, "grad_norm": 5.438458442687988, "learning_rate": 4.355353535353536e-06, "loss": 6.631826782226563, "step": 13765 }, { "epoch": 0.1377, "grad_norm": 5.7325568199157715, "learning_rate": 4.3551010101010105e-06, "loss": 6.7425071716308596, "step": 13770 }, { "epoch": 0.13775, "grad_norm": 5.936344623565674, "learning_rate": 4.354848484848485e-06, "loss": 6.528660583496094, "step": 13775 }, { "epoch": 0.1378, "grad_norm": 8.153071403503418, "learning_rate": 4.35459595959596e-06, "loss": 6.6352485656738285, "step": 13780 }, { "epoch": 0.13785, "grad_norm": 4.747334957122803, "learning_rate": 4.354343434343435e-06, "loss": 6.609022521972657, "step": 13785 }, { "epoch": 0.1379, "grad_norm": 5.052206039428711, "learning_rate": 4.35409090909091e-06, "loss": 6.659009552001953, "step": 13790 }, { "epoch": 0.13795, "grad_norm": 5.520045280456543, "learning_rate": 4.3538383838383845e-06, "loss": 6.514006805419922, "step": 13795 }, { "epoch": 0.138, "grad_norm": 9.580724716186523, "learning_rate": 4.353585858585859e-06, "loss": 6.588554382324219, "step": 13800 }, { "epoch": 0.13805, "grad_norm": 5.170382022857666, "learning_rate": 4.353333333333334e-06, "loss": 6.686502838134766, "step": 13805 }, { "epoch": 0.1381, "grad_norm": 3.7683098316192627, "learning_rate": 4.353080808080808e-06, "loss": 6.688575744628906, "step": 13810 }, { "epoch": 0.13815, "grad_norm": 7.113677978515625, "learning_rate": 4.352828282828283e-06, "loss": 6.544367980957031, "step": 13815 }, { "epoch": 0.1382, "grad_norm": 3.7242441177368164, "learning_rate": 4.352575757575758e-06, "loss": 6.5177459716796875, "step": 13820 }, { "epoch": 0.13825, "grad_norm": 6.222108364105225, "learning_rate": 4.352323232323233e-06, "loss": 6.529122161865234, "step": 13825 }, { "epoch": 0.1383, "grad_norm": 5.999891757965088, "learning_rate": 4.352070707070708e-06, "loss": 6.508694458007812, "step": 13830 }, { "epoch": 0.13835, "grad_norm": 3.7492382526397705, "learning_rate": 4.351818181818182e-06, "loss": 6.55543212890625, "step": 13835 }, { "epoch": 0.1384, "grad_norm": 3.442106008529663, "learning_rate": 4.351565656565657e-06, "loss": 6.860899353027344, "step": 13840 }, { "epoch": 0.13845, "grad_norm": 5.615467548370361, "learning_rate": 4.351313131313132e-06, "loss": 6.484190368652344, "step": 13845 }, { "epoch": 0.1385, "grad_norm": 7.101502418518066, "learning_rate": 4.351060606060606e-06, "loss": 6.746304321289062, "step": 13850 }, { "epoch": 0.13855, "grad_norm": 4.839536190032959, "learning_rate": 4.350808080808081e-06, "loss": 6.5345924377441404, "step": 13855 }, { "epoch": 0.1386, "grad_norm": 4.910141468048096, "learning_rate": 4.3505555555555555e-06, "loss": 6.541117095947266, "step": 13860 }, { "epoch": 0.13865, "grad_norm": 5.360110759735107, "learning_rate": 4.350303030303031e-06, "loss": 6.494764709472657, "step": 13865 }, { "epoch": 0.1387, "grad_norm": 6.496982574462891, "learning_rate": 4.350050505050506e-06, "loss": 6.550455474853516, "step": 13870 }, { "epoch": 0.13875, "grad_norm": 5.692515850067139, "learning_rate": 4.34979797979798e-06, "loss": 6.497555541992187, "step": 13875 }, { "epoch": 0.1388, "grad_norm": 6.436381816864014, "learning_rate": 4.349545454545455e-06, "loss": 6.541709899902344, "step": 13880 }, { "epoch": 0.13885, "grad_norm": 11.637542724609375, "learning_rate": 4.3492929292929295e-06, "loss": 6.546212005615234, "step": 13885 }, { "epoch": 0.1389, "grad_norm": 7.243457317352295, "learning_rate": 4.349040404040404e-06, "loss": 6.525433349609375, "step": 13890 }, { "epoch": 0.13895, "grad_norm": 4.819112777709961, "learning_rate": 4.348787878787879e-06, "loss": 6.531202697753907, "step": 13895 }, { "epoch": 0.139, "grad_norm": 20.725271224975586, "learning_rate": 4.348535353535354e-06, "loss": 6.424302673339843, "step": 13900 }, { "epoch": 0.13905, "grad_norm": 10.850876808166504, "learning_rate": 4.348282828282829e-06, "loss": 6.549897003173828, "step": 13905 }, { "epoch": 0.1391, "grad_norm": 6.797266483306885, "learning_rate": 4.3480303030303035e-06, "loss": 6.547071075439453, "step": 13910 }, { "epoch": 0.13915, "grad_norm": 5.088925838470459, "learning_rate": 4.347777777777778e-06, "loss": 6.507669830322266, "step": 13915 }, { "epoch": 0.1392, "grad_norm": 5.894556999206543, "learning_rate": 4.347525252525253e-06, "loss": 6.509986877441406, "step": 13920 }, { "epoch": 0.13925, "grad_norm": 3.964097738265991, "learning_rate": 4.347272727272727e-06, "loss": 6.553738403320312, "step": 13925 }, { "epoch": 0.1393, "grad_norm": 5.403038024902344, "learning_rate": 4.347020202020202e-06, "loss": 6.585074615478516, "step": 13930 }, { "epoch": 0.13935, "grad_norm": 13.526225090026855, "learning_rate": 4.346767676767677e-06, "loss": 6.461221313476562, "step": 13935 }, { "epoch": 0.1394, "grad_norm": 6.232922077178955, "learning_rate": 4.346515151515152e-06, "loss": 6.48297119140625, "step": 13940 }, { "epoch": 0.13945, "grad_norm": 7.176338195800781, "learning_rate": 4.346262626262627e-06, "loss": 6.52057113647461, "step": 13945 }, { "epoch": 0.1395, "grad_norm": 4.167044639587402, "learning_rate": 4.346010101010101e-06, "loss": 6.5397705078125, "step": 13950 }, { "epoch": 0.13955, "grad_norm": 6.495669841766357, "learning_rate": 4.345757575757576e-06, "loss": 6.540348052978516, "step": 13955 }, { "epoch": 0.1396, "grad_norm": 5.539823055267334, "learning_rate": 4.3455050505050515e-06, "loss": 6.526655578613282, "step": 13960 }, { "epoch": 0.13965, "grad_norm": 3.297114610671997, "learning_rate": 4.345252525252526e-06, "loss": 6.511965942382813, "step": 13965 }, { "epoch": 0.1397, "grad_norm": 6.075845241546631, "learning_rate": 4.345000000000001e-06, "loss": 6.533074951171875, "step": 13970 }, { "epoch": 0.13975, "grad_norm": 4.201687812805176, "learning_rate": 4.3447474747474745e-06, "loss": 6.4918678283691404, "step": 13975 }, { "epoch": 0.1398, "grad_norm": 5.503535747528076, "learning_rate": 4.34449494949495e-06, "loss": 6.53587646484375, "step": 13980 }, { "epoch": 0.13985, "grad_norm": 6.805941581726074, "learning_rate": 4.344242424242425e-06, "loss": 6.528433990478516, "step": 13985 }, { "epoch": 0.1399, "grad_norm": 5.673381805419922, "learning_rate": 4.343989898989899e-06, "loss": 6.478689575195313, "step": 13990 }, { "epoch": 0.13995, "grad_norm": 4.338888168334961, "learning_rate": 4.343737373737374e-06, "loss": 6.580543518066406, "step": 13995 }, { "epoch": 0.14, "grad_norm": 6.228000164031982, "learning_rate": 4.343484848484849e-06, "loss": 6.5130149841308596, "step": 14000 }, { "epoch": 0.14005, "grad_norm": 3.834697961807251, "learning_rate": 4.343232323232324e-06, "loss": 6.514757537841797, "step": 14005 }, { "epoch": 0.1401, "grad_norm": 5.73401403427124, "learning_rate": 4.342979797979799e-06, "loss": 6.605332946777343, "step": 14010 }, { "epoch": 0.14015, "grad_norm": 4.545821189880371, "learning_rate": 4.342727272727273e-06, "loss": 6.638435363769531, "step": 14015 }, { "epoch": 0.1402, "grad_norm": 3.9365694522857666, "learning_rate": 4.342474747474748e-06, "loss": 6.495378112792968, "step": 14020 }, { "epoch": 0.14025, "grad_norm": 4.974645137786865, "learning_rate": 4.3422222222222225e-06, "loss": 6.568733978271484, "step": 14025 }, { "epoch": 0.1403, "grad_norm": 8.039318084716797, "learning_rate": 4.341969696969697e-06, "loss": 6.543017578125, "step": 14030 }, { "epoch": 0.14035, "grad_norm": 5.573063373565674, "learning_rate": 4.341717171717172e-06, "loss": 6.561911010742188, "step": 14035 }, { "epoch": 0.1404, "grad_norm": 3.6700947284698486, "learning_rate": 4.341464646464647e-06, "loss": 6.519844818115234, "step": 14040 }, { "epoch": 0.14045, "grad_norm": 4.803610324859619, "learning_rate": 4.341212121212122e-06, "loss": 6.56133804321289, "step": 14045 }, { "epoch": 0.1405, "grad_norm": 3.6127679347991943, "learning_rate": 4.3409595959595965e-06, "loss": 6.529145050048828, "step": 14050 }, { "epoch": 0.14055, "grad_norm": 5.550541877746582, "learning_rate": 4.340707070707071e-06, "loss": 6.542337799072266, "step": 14055 }, { "epoch": 0.1406, "grad_norm": 6.881302356719971, "learning_rate": 4.340454545454546e-06, "loss": 6.58592529296875, "step": 14060 }, { "epoch": 0.14065, "grad_norm": 7.844677925109863, "learning_rate": 4.34020202020202e-06, "loss": 6.490559387207031, "step": 14065 }, { "epoch": 0.1407, "grad_norm": 6.483325004577637, "learning_rate": 4.339949494949495e-06, "loss": 6.576725006103516, "step": 14070 }, { "epoch": 0.14075, "grad_norm": 5.2703375816345215, "learning_rate": 4.33969696969697e-06, "loss": 6.503639221191406, "step": 14075 }, { "epoch": 0.1408, "grad_norm": 18.594127655029297, "learning_rate": 4.339444444444445e-06, "loss": 6.421221923828125, "step": 14080 }, { "epoch": 0.14085, "grad_norm": 3.941756248474121, "learning_rate": 4.33919191919192e-06, "loss": 6.484239196777343, "step": 14085 }, { "epoch": 0.1409, "grad_norm": 6.215319633483887, "learning_rate": 4.338939393939394e-06, "loss": 6.603050231933594, "step": 14090 }, { "epoch": 0.14095, "grad_norm": 6.176536560058594, "learning_rate": 4.338686868686869e-06, "loss": 6.507418060302735, "step": 14095 }, { "epoch": 0.141, "grad_norm": 6.382437229156494, "learning_rate": 4.338434343434344e-06, "loss": 6.4269775390625, "step": 14100 }, { "epoch": 0.14105, "grad_norm": 5.784091472625732, "learning_rate": 4.338181818181818e-06, "loss": 6.546530151367188, "step": 14105 }, { "epoch": 0.1411, "grad_norm": 6.278915882110596, "learning_rate": 4.337929292929293e-06, "loss": 6.436186218261719, "step": 14110 }, { "epoch": 0.14115, "grad_norm": 5.590086460113525, "learning_rate": 4.3376767676767676e-06, "loss": 6.3966716766357425, "step": 14115 }, { "epoch": 0.1412, "grad_norm": 4.662004470825195, "learning_rate": 4.337424242424243e-06, "loss": 6.557894897460938, "step": 14120 }, { "epoch": 0.14125, "grad_norm": 4.196485996246338, "learning_rate": 4.337171717171718e-06, "loss": 6.550306701660157, "step": 14125 }, { "epoch": 0.1413, "grad_norm": 6.539920806884766, "learning_rate": 4.336919191919192e-06, "loss": 6.534381103515625, "step": 14130 }, { "epoch": 0.14135, "grad_norm": 3.892888307571411, "learning_rate": 4.336666666666667e-06, "loss": 6.527234649658203, "step": 14135 }, { "epoch": 0.1414, "grad_norm": 11.26467227935791, "learning_rate": 4.3364141414141416e-06, "loss": 6.421456146240234, "step": 14140 }, { "epoch": 0.14145, "grad_norm": 6.143599987030029, "learning_rate": 4.336161616161616e-06, "loss": 6.534821319580078, "step": 14145 }, { "epoch": 0.1415, "grad_norm": 16.419330596923828, "learning_rate": 4.335909090909091e-06, "loss": 6.537339782714843, "step": 14150 }, { "epoch": 0.14155, "grad_norm": 6.697803974151611, "learning_rate": 4.3356565656565655e-06, "loss": 6.5183662414550785, "step": 14155 }, { "epoch": 0.1416, "grad_norm": 4.582880020141602, "learning_rate": 4.335404040404041e-06, "loss": 6.456946563720703, "step": 14160 }, { "epoch": 0.14165, "grad_norm": 4.281856536865234, "learning_rate": 4.3351515151515156e-06, "loss": 6.494428253173828, "step": 14165 }, { "epoch": 0.1417, "grad_norm": 6.076306343078613, "learning_rate": 4.33489898989899e-06, "loss": 6.528401184082031, "step": 14170 }, { "epoch": 0.14175, "grad_norm": 6.657958030700684, "learning_rate": 4.334646464646465e-06, "loss": 6.558966064453125, "step": 14175 }, { "epoch": 0.1418, "grad_norm": 4.873692035675049, "learning_rate": 4.33439393939394e-06, "loss": 6.59327392578125, "step": 14180 }, { "epoch": 0.14185, "grad_norm": 2.7678329944610596, "learning_rate": 4.334141414141415e-06, "loss": 6.620573425292969, "step": 14185 }, { "epoch": 0.1419, "grad_norm": 4.473451614379883, "learning_rate": 4.3338888888888896e-06, "loss": 6.516474914550781, "step": 14190 }, { "epoch": 0.14195, "grad_norm": 5.579326629638672, "learning_rate": 4.333636363636363e-06, "loss": 6.552039337158203, "step": 14195 }, { "epoch": 0.142, "grad_norm": 5.514776706695557, "learning_rate": 4.333383838383839e-06, "loss": 6.510921478271484, "step": 14200 }, { "epoch": 0.14205, "grad_norm": 3.8424384593963623, "learning_rate": 4.3331313131313134e-06, "loss": 6.53594970703125, "step": 14205 }, { "epoch": 0.1421, "grad_norm": 4.838391304016113, "learning_rate": 4.332878787878788e-06, "loss": 6.492235565185547, "step": 14210 }, { "epoch": 0.14215, "grad_norm": 4.3304924964904785, "learning_rate": 4.332626262626263e-06, "loss": 6.595969390869141, "step": 14215 }, { "epoch": 0.1422, "grad_norm": 5.4820451736450195, "learning_rate": 4.332373737373738e-06, "loss": 6.551347351074218, "step": 14220 }, { "epoch": 0.14225, "grad_norm": 5.222568035125732, "learning_rate": 4.332121212121213e-06, "loss": 6.486341857910157, "step": 14225 }, { "epoch": 0.1423, "grad_norm": 4.984682083129883, "learning_rate": 4.3318686868686874e-06, "loss": 6.467498016357422, "step": 14230 }, { "epoch": 0.14235, "grad_norm": 6.749375820159912, "learning_rate": 4.331616161616162e-06, "loss": 6.585597991943359, "step": 14235 }, { "epoch": 0.1424, "grad_norm": 3.3506500720977783, "learning_rate": 4.331363636363637e-06, "loss": 6.506601715087891, "step": 14240 }, { "epoch": 0.14245, "grad_norm": 12.950323104858398, "learning_rate": 4.331111111111111e-06, "loss": 6.593412780761719, "step": 14245 }, { "epoch": 0.1425, "grad_norm": 4.347170352935791, "learning_rate": 4.330858585858586e-06, "loss": 6.551225280761718, "step": 14250 }, { "epoch": 0.14255, "grad_norm": 5.522425651550293, "learning_rate": 4.330606060606061e-06, "loss": 6.461836242675782, "step": 14255 }, { "epoch": 0.1426, "grad_norm": 4.25492525100708, "learning_rate": 4.330353535353536e-06, "loss": 6.8859405517578125, "step": 14260 }, { "epoch": 0.14265, "grad_norm": 3.856323480606079, "learning_rate": 4.330101010101011e-06, "loss": 6.534323120117188, "step": 14265 }, { "epoch": 0.1427, "grad_norm": 4.004523754119873, "learning_rate": 4.329848484848485e-06, "loss": 6.623683166503906, "step": 14270 }, { "epoch": 0.14275, "grad_norm": 5.171566486358643, "learning_rate": 4.32959595959596e-06, "loss": 6.52197494506836, "step": 14275 }, { "epoch": 0.1428, "grad_norm": 5.911258697509766, "learning_rate": 4.329343434343435e-06, "loss": 6.562123870849609, "step": 14280 }, { "epoch": 0.14285, "grad_norm": 7.42061710357666, "learning_rate": 4.329090909090909e-06, "loss": 6.523275756835938, "step": 14285 }, { "epoch": 0.1429, "grad_norm": 9.739375114440918, "learning_rate": 4.328838383838384e-06, "loss": 6.4745124816894535, "step": 14290 }, { "epoch": 0.14295, "grad_norm": 7.004310131072998, "learning_rate": 4.3285858585858585e-06, "loss": 6.57169189453125, "step": 14295 }, { "epoch": 0.143, "grad_norm": 4.4141926765441895, "learning_rate": 4.328333333333334e-06, "loss": 6.5318351745605465, "step": 14300 }, { "epoch": 0.14305, "grad_norm": 5.736510276794434, "learning_rate": 4.328080808080809e-06, "loss": 6.5494132995605465, "step": 14305 }, { "epoch": 0.1431, "grad_norm": 13.325483322143555, "learning_rate": 4.327828282828283e-06, "loss": 6.858155822753906, "step": 14310 }, { "epoch": 0.14315, "grad_norm": 10.436591148376465, "learning_rate": 4.327575757575758e-06, "loss": 6.465720367431641, "step": 14315 }, { "epoch": 0.1432, "grad_norm": 4.234917640686035, "learning_rate": 4.3273232323232325e-06, "loss": 6.534442901611328, "step": 14320 }, { "epoch": 0.14325, "grad_norm": 4.1409430503845215, "learning_rate": 4.327070707070707e-06, "loss": 6.514961242675781, "step": 14325 }, { "epoch": 0.1433, "grad_norm": 7.228165626525879, "learning_rate": 4.326818181818182e-06, "loss": 6.476844024658203, "step": 14330 }, { "epoch": 0.14335, "grad_norm": 3.754058599472046, "learning_rate": 4.326565656565657e-06, "loss": 6.480076599121094, "step": 14335 }, { "epoch": 0.1434, "grad_norm": 7.077367305755615, "learning_rate": 4.326313131313132e-06, "loss": 6.665241241455078, "step": 14340 }, { "epoch": 0.14345, "grad_norm": 2.789890766143799, "learning_rate": 4.3260606060606065e-06, "loss": 6.511434936523438, "step": 14345 }, { "epoch": 0.1435, "grad_norm": 4.183691501617432, "learning_rate": 4.325808080808081e-06, "loss": 6.506459045410156, "step": 14350 }, { "epoch": 0.14355, "grad_norm": 5.679547309875488, "learning_rate": 4.325555555555557e-06, "loss": 6.5519561767578125, "step": 14355 }, { "epoch": 0.1436, "grad_norm": 6.831256866455078, "learning_rate": 4.32530303030303e-06, "loss": 6.459384918212891, "step": 14360 }, { "epoch": 0.14365, "grad_norm": 7.045140266418457, "learning_rate": 4.325050505050505e-06, "loss": 6.478128814697266, "step": 14365 }, { "epoch": 0.1437, "grad_norm": 4.254992961883545, "learning_rate": 4.32479797979798e-06, "loss": 6.489494323730469, "step": 14370 }, { "epoch": 0.14375, "grad_norm": 4.402240753173828, "learning_rate": 4.324545454545455e-06, "loss": 6.50782470703125, "step": 14375 }, { "epoch": 0.1438, "grad_norm": 5.494905471801758, "learning_rate": 4.32429292929293e-06, "loss": 6.4835655212402346, "step": 14380 }, { "epoch": 0.14385, "grad_norm": 4.614975929260254, "learning_rate": 4.324040404040404e-06, "loss": 6.483982086181641, "step": 14385 }, { "epoch": 0.1439, "grad_norm": 5.7779388427734375, "learning_rate": 4.323787878787879e-06, "loss": 6.582749938964843, "step": 14390 }, { "epoch": 0.14395, "grad_norm": 6.095193862915039, "learning_rate": 4.3235353535353545e-06, "loss": 6.501593017578125, "step": 14395 }, { "epoch": 0.144, "grad_norm": 5.558094501495361, "learning_rate": 4.323282828282829e-06, "loss": 6.5114601135253904, "step": 14400 }, { "epoch": 0.14405, "grad_norm": 6.089951515197754, "learning_rate": 4.323030303030304e-06, "loss": 6.549974822998047, "step": 14405 }, { "epoch": 0.1441, "grad_norm": 5.017982482910156, "learning_rate": 4.322777777777778e-06, "loss": 6.490240478515625, "step": 14410 }, { "epoch": 0.14415, "grad_norm": 5.1229071617126465, "learning_rate": 4.322525252525253e-06, "loss": 6.502989196777344, "step": 14415 }, { "epoch": 0.1442, "grad_norm": 8.210447311401367, "learning_rate": 4.322272727272728e-06, "loss": 6.4963623046875, "step": 14420 }, { "epoch": 0.14425, "grad_norm": 3.8306381702423096, "learning_rate": 4.322020202020202e-06, "loss": 6.506784057617187, "step": 14425 }, { "epoch": 0.1443, "grad_norm": 7.546472072601318, "learning_rate": 4.321767676767677e-06, "loss": 6.521236419677734, "step": 14430 }, { "epoch": 0.14435, "grad_norm": 3.3358654975891113, "learning_rate": 4.321515151515152e-06, "loss": 6.473580932617187, "step": 14435 }, { "epoch": 0.1444, "grad_norm": 4.303221702575684, "learning_rate": 4.321262626262627e-06, "loss": 6.490567779541015, "step": 14440 }, { "epoch": 0.14445, "grad_norm": 5.313779830932617, "learning_rate": 4.321010101010102e-06, "loss": 6.5432373046875, "step": 14445 }, { "epoch": 0.1445, "grad_norm": 8.019837379455566, "learning_rate": 4.320757575757576e-06, "loss": 6.469643402099609, "step": 14450 }, { "epoch": 0.14455, "grad_norm": 4.934937477111816, "learning_rate": 4.320505050505051e-06, "loss": 6.479988861083984, "step": 14455 }, { "epoch": 0.1446, "grad_norm": 5.1939544677734375, "learning_rate": 4.3202525252525255e-06, "loss": 6.526238250732422, "step": 14460 }, { "epoch": 0.14465, "grad_norm": 3.4489359855651855, "learning_rate": 4.32e-06, "loss": 6.488211822509766, "step": 14465 }, { "epoch": 0.1447, "grad_norm": 7.205262660980225, "learning_rate": 4.319747474747475e-06, "loss": 6.536859130859375, "step": 14470 }, { "epoch": 0.14475, "grad_norm": 4.711112976074219, "learning_rate": 4.31949494949495e-06, "loss": 6.513069152832031, "step": 14475 }, { "epoch": 0.1448, "grad_norm": 3.2250239849090576, "learning_rate": 4.319242424242425e-06, "loss": 6.497682189941406, "step": 14480 }, { "epoch": 0.14485, "grad_norm": 10.501614570617676, "learning_rate": 4.3189898989898995e-06, "loss": 6.678610229492188, "step": 14485 }, { "epoch": 0.1449, "grad_norm": 3.614701747894287, "learning_rate": 4.318737373737374e-06, "loss": 6.515364074707032, "step": 14490 }, { "epoch": 0.14495, "grad_norm": 5.2057013511657715, "learning_rate": 4.318484848484849e-06, "loss": 6.535688781738282, "step": 14495 }, { "epoch": 0.145, "grad_norm": 4.747753620147705, "learning_rate": 4.318232323232323e-06, "loss": 6.516139984130859, "step": 14500 }, { "epoch": 0.14505, "grad_norm": 4.500584125518799, "learning_rate": 4.317979797979798e-06, "loss": 6.449169921875, "step": 14505 }, { "epoch": 0.1451, "grad_norm": 2.604335308074951, "learning_rate": 4.317727272727273e-06, "loss": 6.472538757324219, "step": 14510 }, { "epoch": 0.14515, "grad_norm": 5.561988353729248, "learning_rate": 4.317474747474748e-06, "loss": 6.455146026611328, "step": 14515 }, { "epoch": 0.1452, "grad_norm": 5.776001930236816, "learning_rate": 4.317222222222223e-06, "loss": 6.418576049804687, "step": 14520 }, { "epoch": 0.14525, "grad_norm": 4.265985488891602, "learning_rate": 4.316969696969697e-06, "loss": 6.496917724609375, "step": 14525 }, { "epoch": 0.1453, "grad_norm": 4.061425685882568, "learning_rate": 4.316717171717172e-06, "loss": 6.508099365234375, "step": 14530 }, { "epoch": 0.14535, "grad_norm": 4.93050479888916, "learning_rate": 4.316464646464647e-06, "loss": 6.4641357421875, "step": 14535 }, { "epoch": 0.1454, "grad_norm": 4.784628391265869, "learning_rate": 4.316212121212121e-06, "loss": 6.592555236816406, "step": 14540 }, { "epoch": 0.14545, "grad_norm": 4.695350170135498, "learning_rate": 4.315959595959596e-06, "loss": 6.4828125, "step": 14545 }, { "epoch": 0.1455, "grad_norm": 5.348476886749268, "learning_rate": 4.3157070707070705e-06, "loss": 6.478262329101563, "step": 14550 }, { "epoch": 0.14555, "grad_norm": 5.309117794036865, "learning_rate": 4.315454545454546e-06, "loss": 6.470433044433594, "step": 14555 }, { "epoch": 0.1456, "grad_norm": 4.549057960510254, "learning_rate": 4.315202020202021e-06, "loss": 6.963652038574219, "step": 14560 }, { "epoch": 0.14565, "grad_norm": 4.2072434425354, "learning_rate": 4.314949494949495e-06, "loss": 6.5826873779296875, "step": 14565 }, { "epoch": 0.1457, "grad_norm": 5.458916664123535, "learning_rate": 4.31469696969697e-06, "loss": 6.463063049316406, "step": 14570 }, { "epoch": 0.14575, "grad_norm": 4.661675453186035, "learning_rate": 4.314444444444445e-06, "loss": 6.494541931152344, "step": 14575 }, { "epoch": 0.1458, "grad_norm": 5.717794895172119, "learning_rate": 4.314191919191919e-06, "loss": 6.517290496826172, "step": 14580 }, { "epoch": 0.14585, "grad_norm": 3.911789655685425, "learning_rate": 4.313939393939394e-06, "loss": 6.532636260986328, "step": 14585 }, { "epoch": 0.1459, "grad_norm": 4.7801313400268555, "learning_rate": 4.3136868686868684e-06, "loss": 6.485527038574219, "step": 14590 }, { "epoch": 0.14595, "grad_norm": 5.400557518005371, "learning_rate": 4.313434343434344e-06, "loss": 6.53970947265625, "step": 14595 }, { "epoch": 0.146, "grad_norm": 5.87947940826416, "learning_rate": 4.3131818181818185e-06, "loss": 6.52135009765625, "step": 14600 }, { "epoch": 0.14605, "grad_norm": 3.1970720291137695, "learning_rate": 4.312929292929293e-06, "loss": 6.4888359069824215, "step": 14605 }, { "epoch": 0.1461, "grad_norm": 4.866981506347656, "learning_rate": 4.312676767676768e-06, "loss": 6.5334007263183596, "step": 14610 }, { "epoch": 0.14615, "grad_norm": 8.984414100646973, "learning_rate": 4.312424242424243e-06, "loss": 6.557095336914062, "step": 14615 }, { "epoch": 0.1462, "grad_norm": 3.7784862518310547, "learning_rate": 4.312171717171718e-06, "loss": 6.498558807373047, "step": 14620 }, { "epoch": 0.14625, "grad_norm": 7.6340789794921875, "learning_rate": 4.3119191919191925e-06, "loss": 6.523674011230469, "step": 14625 }, { "epoch": 0.1463, "grad_norm": 3.902782440185547, "learning_rate": 4.311666666666667e-06, "loss": 6.476576232910157, "step": 14630 }, { "epoch": 0.14635, "grad_norm": 4.196352958679199, "learning_rate": 4.311414141414142e-06, "loss": 6.485125732421875, "step": 14635 }, { "epoch": 0.1464, "grad_norm": 4.741200923919678, "learning_rate": 4.3111616161616164e-06, "loss": 6.554374694824219, "step": 14640 }, { "epoch": 0.14645, "grad_norm": 10.72707748413086, "learning_rate": 4.310909090909091e-06, "loss": 6.461141967773438, "step": 14645 }, { "epoch": 0.1465, "grad_norm": 5.616623401641846, "learning_rate": 4.310656565656566e-06, "loss": 6.443881225585938, "step": 14650 }, { "epoch": 0.14655, "grad_norm": 7.175601005554199, "learning_rate": 4.310404040404041e-06, "loss": 6.532682800292969, "step": 14655 }, { "epoch": 0.1466, "grad_norm": 5.958141326904297, "learning_rate": 4.310151515151516e-06, "loss": 6.511766052246093, "step": 14660 }, { "epoch": 0.14665, "grad_norm": 5.862859725952148, "learning_rate": 4.3098989898989904e-06, "loss": 6.5302574157714846, "step": 14665 }, { "epoch": 0.1467, "grad_norm": 4.616973400115967, "learning_rate": 4.309646464646465e-06, "loss": 6.586998748779297, "step": 14670 }, { "epoch": 0.14675, "grad_norm": 6.046182155609131, "learning_rate": 4.30939393939394e-06, "loss": 6.474101257324219, "step": 14675 }, { "epoch": 0.1468, "grad_norm": 4.557863235473633, "learning_rate": 4.309141414141414e-06, "loss": 6.49437255859375, "step": 14680 }, { "epoch": 0.14685, "grad_norm": 5.643327236175537, "learning_rate": 4.308888888888889e-06, "loss": 6.500736999511719, "step": 14685 }, { "epoch": 0.1469, "grad_norm": 3.4382882118225098, "learning_rate": 4.308636363636364e-06, "loss": 6.498165893554687, "step": 14690 }, { "epoch": 0.14695, "grad_norm": 4.999553680419922, "learning_rate": 4.308383838383839e-06, "loss": 6.4668434143066404, "step": 14695 }, { "epoch": 0.147, "grad_norm": 4.659677982330322, "learning_rate": 4.308131313131314e-06, "loss": 6.479248046875, "step": 14700 }, { "epoch": 0.14705, "grad_norm": 4.9387288093566895, "learning_rate": 4.307878787878788e-06, "loss": 6.488901519775391, "step": 14705 }, { "epoch": 0.1471, "grad_norm": 5.101020812988281, "learning_rate": 4.307626262626263e-06, "loss": 6.4221336364746096, "step": 14710 }, { "epoch": 0.14715, "grad_norm": 4.61722993850708, "learning_rate": 4.307373737373738e-06, "loss": 6.5040443420410154, "step": 14715 }, { "epoch": 0.1472, "grad_norm": 7.156398773193359, "learning_rate": 4.307121212121212e-06, "loss": 6.476431274414063, "step": 14720 }, { "epoch": 0.14725, "grad_norm": 6.127005577087402, "learning_rate": 4.306868686868687e-06, "loss": 6.486894226074218, "step": 14725 }, { "epoch": 0.1473, "grad_norm": 7.027503490447998, "learning_rate": 4.3066161616161615e-06, "loss": 6.475852966308594, "step": 14730 }, { "epoch": 0.14735, "grad_norm": 3.895451784133911, "learning_rate": 4.306363636363637e-06, "loss": 6.456636810302735, "step": 14735 }, { "epoch": 0.1474, "grad_norm": 9.537215232849121, "learning_rate": 4.3061111111111116e-06, "loss": 6.543526458740234, "step": 14740 }, { "epoch": 0.14745, "grad_norm": 4.5582499504089355, "learning_rate": 4.305858585858586e-06, "loss": 6.545819091796875, "step": 14745 }, { "epoch": 0.1475, "grad_norm": 10.131399154663086, "learning_rate": 4.305606060606061e-06, "loss": 6.397181701660156, "step": 14750 }, { "epoch": 0.14755, "grad_norm": 5.107814311981201, "learning_rate": 4.3053535353535355e-06, "loss": 6.481792449951172, "step": 14755 }, { "epoch": 0.1476, "grad_norm": 6.956871032714844, "learning_rate": 4.30510101010101e-06, "loss": 6.492296600341797, "step": 14760 }, { "epoch": 0.14765, "grad_norm": 4.397143840789795, "learning_rate": 4.304848484848485e-06, "loss": 6.5448150634765625, "step": 14765 }, { "epoch": 0.1477, "grad_norm": 3.2974228858947754, "learning_rate": 4.30459595959596e-06, "loss": 6.468733215332032, "step": 14770 }, { "epoch": 0.14775, "grad_norm": 5.759087085723877, "learning_rate": 4.304343434343435e-06, "loss": 6.496035766601563, "step": 14775 }, { "epoch": 0.1478, "grad_norm": 5.014657020568848, "learning_rate": 4.3040909090909095e-06, "loss": 6.641156768798828, "step": 14780 }, { "epoch": 0.14785, "grad_norm": 4.510643005371094, "learning_rate": 4.303838383838384e-06, "loss": 6.473520660400391, "step": 14785 }, { "epoch": 0.1479, "grad_norm": 5.0355963706970215, "learning_rate": 4.3035858585858596e-06, "loss": 6.483433532714844, "step": 14790 }, { "epoch": 0.14795, "grad_norm": 13.209962844848633, "learning_rate": 4.303333333333334e-06, "loss": 6.452657318115234, "step": 14795 }, { "epoch": 0.148, "grad_norm": 4.418889999389648, "learning_rate": 4.303080808080809e-06, "loss": 6.536647033691406, "step": 14800 }, { "epoch": 0.14805, "grad_norm": 4.153820991516113, "learning_rate": 4.302828282828283e-06, "loss": 6.560569763183594, "step": 14805 }, { "epoch": 0.1481, "grad_norm": 4.985516548156738, "learning_rate": 4.302575757575758e-06, "loss": 6.457383728027343, "step": 14810 }, { "epoch": 0.14815, "grad_norm": 6.6732306480407715, "learning_rate": 4.302323232323233e-06, "loss": 6.513744354248047, "step": 14815 }, { "epoch": 0.1482, "grad_norm": 5.008304119110107, "learning_rate": 4.302070707070707e-06, "loss": 6.492566680908203, "step": 14820 }, { "epoch": 0.14825, "grad_norm": 2.884697675704956, "learning_rate": 4.301818181818182e-06, "loss": 6.49169692993164, "step": 14825 }, { "epoch": 0.1483, "grad_norm": 3.100182294845581, "learning_rate": 4.3015656565656575e-06, "loss": 6.497740936279297, "step": 14830 }, { "epoch": 0.14835, "grad_norm": 4.723951816558838, "learning_rate": 4.301313131313132e-06, "loss": 6.491586303710937, "step": 14835 }, { "epoch": 0.1484, "grad_norm": 4.806491374969482, "learning_rate": 4.301060606060607e-06, "loss": 6.537146759033203, "step": 14840 }, { "epoch": 0.14845, "grad_norm": 4.38887357711792, "learning_rate": 4.300808080808081e-06, "loss": 6.472910308837891, "step": 14845 }, { "epoch": 0.1485, "grad_norm": 4.043336391448975, "learning_rate": 4.300555555555556e-06, "loss": 6.454359436035157, "step": 14850 }, { "epoch": 0.14855, "grad_norm": 4.632874011993408, "learning_rate": 4.300303030303031e-06, "loss": 6.556086730957031, "step": 14855 }, { "epoch": 0.1486, "grad_norm": 4.47702693939209, "learning_rate": 4.300050505050505e-06, "loss": 6.428596496582031, "step": 14860 }, { "epoch": 0.14865, "grad_norm": 4.245589733123779, "learning_rate": 4.29979797979798e-06, "loss": 6.468850708007812, "step": 14865 }, { "epoch": 0.1487, "grad_norm": 5.9859089851379395, "learning_rate": 4.299545454545455e-06, "loss": 6.450080108642578, "step": 14870 }, { "epoch": 0.14875, "grad_norm": 5.922009468078613, "learning_rate": 4.29929292929293e-06, "loss": 6.4823448181152346, "step": 14875 }, { "epoch": 0.1488, "grad_norm": 5.425019264221191, "learning_rate": 4.299040404040405e-06, "loss": 6.496971893310547, "step": 14880 }, { "epoch": 0.14885, "grad_norm": 4.5376200675964355, "learning_rate": 4.298787878787879e-06, "loss": 6.492557525634766, "step": 14885 }, { "epoch": 0.1489, "grad_norm": 5.699253082275391, "learning_rate": 4.298535353535354e-06, "loss": 6.4660179138183596, "step": 14890 }, { "epoch": 0.14895, "grad_norm": 8.569064140319824, "learning_rate": 4.2982828282828285e-06, "loss": 6.515065002441406, "step": 14895 }, { "epoch": 0.149, "grad_norm": 5.352481365203857, "learning_rate": 4.298030303030303e-06, "loss": 6.427183532714844, "step": 14900 }, { "epoch": 0.14905, "grad_norm": 5.747870922088623, "learning_rate": 4.297777777777778e-06, "loss": 6.459695434570312, "step": 14905 }, { "epoch": 0.1491, "grad_norm": 5.506577491760254, "learning_rate": 4.297525252525253e-06, "loss": 6.484357452392578, "step": 14910 }, { "epoch": 0.14915, "grad_norm": 3.5765957832336426, "learning_rate": 4.297272727272728e-06, "loss": 6.4624275207519535, "step": 14915 }, { "epoch": 0.1492, "grad_norm": 6.133081912994385, "learning_rate": 4.2970202020202025e-06, "loss": 6.46981201171875, "step": 14920 }, { "epoch": 0.14925, "grad_norm": 4.246498107910156, "learning_rate": 4.296767676767677e-06, "loss": 6.3881980895996096, "step": 14925 }, { "epoch": 0.1493, "grad_norm": 4.6936798095703125, "learning_rate": 4.296515151515152e-06, "loss": 6.489932250976563, "step": 14930 }, { "epoch": 0.14935, "grad_norm": 4.877340316772461, "learning_rate": 4.296262626262626e-06, "loss": 6.526812744140625, "step": 14935 }, { "epoch": 0.1494, "grad_norm": 3.394186496734619, "learning_rate": 4.296010101010101e-06, "loss": 6.492457580566406, "step": 14940 }, { "epoch": 0.14945, "grad_norm": 4.982087135314941, "learning_rate": 4.295757575757576e-06, "loss": 6.8960418701171875, "step": 14945 }, { "epoch": 0.1495, "grad_norm": 5.543653964996338, "learning_rate": 4.295505050505051e-06, "loss": 6.493752288818359, "step": 14950 }, { "epoch": 0.14955, "grad_norm": 9.777607917785645, "learning_rate": 4.295252525252526e-06, "loss": 6.4962409973144535, "step": 14955 }, { "epoch": 0.1496, "grad_norm": 4.880670070648193, "learning_rate": 4.295e-06, "loss": 6.519473266601563, "step": 14960 }, { "epoch": 0.14965, "grad_norm": 5.841375350952148, "learning_rate": 4.294747474747475e-06, "loss": 6.467085266113282, "step": 14965 }, { "epoch": 0.1497, "grad_norm": 5.128256320953369, "learning_rate": 4.29449494949495e-06, "loss": 6.493290710449219, "step": 14970 }, { "epoch": 0.14975, "grad_norm": 5.132215976715088, "learning_rate": 4.294242424242424e-06, "loss": 6.461683654785157, "step": 14975 }, { "epoch": 0.1498, "grad_norm": 5.222896575927734, "learning_rate": 4.293989898989899e-06, "loss": 6.462319183349609, "step": 14980 }, { "epoch": 0.14985, "grad_norm": 4.722568988800049, "learning_rate": 4.2937373737373735e-06, "loss": 6.516603088378906, "step": 14985 }, { "epoch": 0.1499, "grad_norm": 5.505730628967285, "learning_rate": 4.293484848484849e-06, "loss": 6.455241394042969, "step": 14990 }, { "epoch": 0.14995, "grad_norm": 5.882155418395996, "learning_rate": 4.293232323232324e-06, "loss": 6.437882995605468, "step": 14995 }, { "epoch": 0.15, "grad_norm": 2.874830961227417, "learning_rate": 4.292979797979798e-06, "loss": 6.56455078125, "step": 15000 }, { "epoch": 0.15005, "grad_norm": 5.374883651733398, "learning_rate": 4.292727272727273e-06, "loss": 6.41693115234375, "step": 15005 }, { "epoch": 0.1501, "grad_norm": 6.426100730895996, "learning_rate": 4.292474747474748e-06, "loss": 6.540182495117188, "step": 15010 }, { "epoch": 0.15015, "grad_norm": 4.927196979522705, "learning_rate": 4.292222222222223e-06, "loss": 6.503851318359375, "step": 15015 }, { "epoch": 0.1502, "grad_norm": 3.9625937938690186, "learning_rate": 4.291969696969698e-06, "loss": 6.476905822753906, "step": 15020 }, { "epoch": 0.15025, "grad_norm": 5.241052150726318, "learning_rate": 4.291717171717171e-06, "loss": 6.482189178466797, "step": 15025 }, { "epoch": 0.1503, "grad_norm": 10.917927742004395, "learning_rate": 4.291464646464647e-06, "loss": 6.606055450439453, "step": 15030 }, { "epoch": 0.15035, "grad_norm": 3.4210097789764404, "learning_rate": 4.2912121212121215e-06, "loss": 6.551166534423828, "step": 15035 }, { "epoch": 0.1504, "grad_norm": 4.141200542449951, "learning_rate": 4.290959595959596e-06, "loss": 6.485906982421875, "step": 15040 }, { "epoch": 0.15045, "grad_norm": 4.687851905822754, "learning_rate": 4.290707070707071e-06, "loss": 6.47657470703125, "step": 15045 }, { "epoch": 0.1505, "grad_norm": 7.292572498321533, "learning_rate": 4.290454545454546e-06, "loss": 6.466509246826172, "step": 15050 }, { "epoch": 0.15055, "grad_norm": 3.882255792617798, "learning_rate": 4.290202020202021e-06, "loss": 6.457622528076172, "step": 15055 }, { "epoch": 0.1506, "grad_norm": 4.3983917236328125, "learning_rate": 4.2899494949494955e-06, "loss": 6.488330078125, "step": 15060 }, { "epoch": 0.15065, "grad_norm": 5.001917362213135, "learning_rate": 4.28969696969697e-06, "loss": 6.448454284667969, "step": 15065 }, { "epoch": 0.1507, "grad_norm": 4.295877456665039, "learning_rate": 4.289444444444445e-06, "loss": 6.4445030212402346, "step": 15070 }, { "epoch": 0.15075, "grad_norm": 4.518013000488281, "learning_rate": 4.289191919191919e-06, "loss": 6.49169921875, "step": 15075 }, { "epoch": 0.1508, "grad_norm": 3.9882376194000244, "learning_rate": 4.288939393939394e-06, "loss": 6.540575408935547, "step": 15080 }, { "epoch": 0.15085, "grad_norm": 6.651825428009033, "learning_rate": 4.288686868686869e-06, "loss": 6.600589752197266, "step": 15085 }, { "epoch": 0.1509, "grad_norm": 4.089620590209961, "learning_rate": 4.288434343434344e-06, "loss": 6.454471588134766, "step": 15090 }, { "epoch": 0.15095, "grad_norm": 14.069843292236328, "learning_rate": 4.288181818181819e-06, "loss": 6.381552124023438, "step": 15095 }, { "epoch": 0.151, "grad_norm": 4.626638889312744, "learning_rate": 4.287929292929293e-06, "loss": 6.426313781738282, "step": 15100 }, { "epoch": 0.15105, "grad_norm": 3.8909311294555664, "learning_rate": 4.287676767676768e-06, "loss": 6.460169982910156, "step": 15105 }, { "epoch": 0.1511, "grad_norm": 5.135706424713135, "learning_rate": 4.287424242424243e-06, "loss": 6.479164123535156, "step": 15110 }, { "epoch": 0.15115, "grad_norm": 4.316091537475586, "learning_rate": 4.287171717171717e-06, "loss": 6.462512969970703, "step": 15115 }, { "epoch": 0.1512, "grad_norm": 6.174134731292725, "learning_rate": 4.286919191919192e-06, "loss": 6.475363159179688, "step": 15120 }, { "epoch": 0.15125, "grad_norm": 2.9341185092926025, "learning_rate": 4.2866666666666666e-06, "loss": 6.482684326171875, "step": 15125 }, { "epoch": 0.1513, "grad_norm": 4.788311958312988, "learning_rate": 4.286414141414142e-06, "loss": 6.483296203613281, "step": 15130 }, { "epoch": 0.15135, "grad_norm": 11.157282829284668, "learning_rate": 4.286161616161617e-06, "loss": 6.597923278808594, "step": 15135 }, { "epoch": 0.1514, "grad_norm": 5.697007179260254, "learning_rate": 4.285909090909091e-06, "loss": 6.557782745361328, "step": 15140 }, { "epoch": 0.15145, "grad_norm": 6.109874725341797, "learning_rate": 4.285656565656566e-06, "loss": 6.511138916015625, "step": 15145 }, { "epoch": 0.1515, "grad_norm": 11.675722122192383, "learning_rate": 4.2854040404040406e-06, "loss": 6.6112617492675785, "step": 15150 }, { "epoch": 0.15155, "grad_norm": 4.847055912017822, "learning_rate": 4.285151515151515e-06, "loss": 6.460076141357422, "step": 15155 }, { "epoch": 0.1516, "grad_norm": 4.370905876159668, "learning_rate": 4.28489898989899e-06, "loss": 6.465380859375, "step": 15160 }, { "epoch": 0.15165, "grad_norm": 5.00922155380249, "learning_rate": 4.2846464646464645e-06, "loss": 6.477693176269531, "step": 15165 }, { "epoch": 0.1517, "grad_norm": 6.926941871643066, "learning_rate": 4.28439393939394e-06, "loss": 6.494999694824219, "step": 15170 }, { "epoch": 0.15175, "grad_norm": 7.067318439483643, "learning_rate": 4.2841414141414146e-06, "loss": 6.454267120361328, "step": 15175 }, { "epoch": 0.1518, "grad_norm": 5.276823997497559, "learning_rate": 4.283888888888889e-06, "loss": 6.490467834472656, "step": 15180 }, { "epoch": 0.15185, "grad_norm": 4.030696868896484, "learning_rate": 4.283636363636365e-06, "loss": 6.449317169189453, "step": 15185 }, { "epoch": 0.1519, "grad_norm": 5.973870754241943, "learning_rate": 4.2833838383838384e-06, "loss": 6.456842041015625, "step": 15190 }, { "epoch": 0.15195, "grad_norm": 4.584961414337158, "learning_rate": 4.283131313131313e-06, "loss": 6.5231376647949215, "step": 15195 }, { "epoch": 0.152, "grad_norm": 7.10871696472168, "learning_rate": 4.282878787878788e-06, "loss": 6.4605224609375, "step": 15200 }, { "epoch": 0.15205, "grad_norm": 3.936967134475708, "learning_rate": 4.282626262626263e-06, "loss": 6.4440559387207035, "step": 15205 }, { "epoch": 0.1521, "grad_norm": 5.9034223556518555, "learning_rate": 4.282373737373738e-06, "loss": 6.515449523925781, "step": 15210 }, { "epoch": 0.15215, "grad_norm": 5.04292631149292, "learning_rate": 4.2821212121212124e-06, "loss": 6.488088226318359, "step": 15215 }, { "epoch": 0.1522, "grad_norm": 2.7983410358428955, "learning_rate": 4.281868686868687e-06, "loss": 6.503152465820312, "step": 15220 }, { "epoch": 0.15225, "grad_norm": 4.151852607727051, "learning_rate": 4.2816161616161626e-06, "loss": 6.520710754394531, "step": 15225 }, { "epoch": 0.1523, "grad_norm": 2.711380958557129, "learning_rate": 4.281363636363637e-06, "loss": 6.507637786865234, "step": 15230 }, { "epoch": 0.15235, "grad_norm": 3.676347494125366, "learning_rate": 4.281111111111112e-06, "loss": 6.627845764160156, "step": 15235 }, { "epoch": 0.1524, "grad_norm": 7.82301139831543, "learning_rate": 4.2808585858585864e-06, "loss": 6.4846038818359375, "step": 15240 }, { "epoch": 0.15245, "grad_norm": 16.551183700561523, "learning_rate": 4.280606060606061e-06, "loss": 6.516114044189453, "step": 15245 }, { "epoch": 0.1525, "grad_norm": 6.4076457023620605, "learning_rate": 4.280353535353536e-06, "loss": 6.470185089111328, "step": 15250 }, { "epoch": 0.15255, "grad_norm": 7.124907493591309, "learning_rate": 4.28010101010101e-06, "loss": 6.547100830078125, "step": 15255 }, { "epoch": 0.1526, "grad_norm": 3.8710622787475586, "learning_rate": 4.279848484848485e-06, "loss": 6.496997833251953, "step": 15260 }, { "epoch": 0.15265, "grad_norm": 3.8036274909973145, "learning_rate": 4.2795959595959604e-06, "loss": 6.475382232666016, "step": 15265 }, { "epoch": 0.1527, "grad_norm": 3.911621570587158, "learning_rate": 4.279343434343435e-06, "loss": 6.449348449707031, "step": 15270 }, { "epoch": 0.15275, "grad_norm": 4.880951404571533, "learning_rate": 4.27909090909091e-06, "loss": 6.475069427490235, "step": 15275 }, { "epoch": 0.1528, "grad_norm": 4.6760735511779785, "learning_rate": 4.278838383838384e-06, "loss": 6.485789489746094, "step": 15280 }, { "epoch": 0.15285, "grad_norm": 5.210073947906494, "learning_rate": 4.278585858585859e-06, "loss": 6.442451477050781, "step": 15285 }, { "epoch": 0.1529, "grad_norm": 7.644658088684082, "learning_rate": 4.278333333333334e-06, "loss": 6.429612731933593, "step": 15290 }, { "epoch": 0.15295, "grad_norm": 9.424300193786621, "learning_rate": 4.278080808080808e-06, "loss": 6.242014694213867, "step": 15295 }, { "epoch": 0.153, "grad_norm": 5.6265482902526855, "learning_rate": 4.277828282828283e-06, "loss": 6.485637664794922, "step": 15300 }, { "epoch": 0.15305, "grad_norm": 3.479794502258301, "learning_rate": 4.277575757575758e-06, "loss": 6.459632873535156, "step": 15305 }, { "epoch": 0.1531, "grad_norm": 6.739943981170654, "learning_rate": 4.277323232323233e-06, "loss": 6.423279571533203, "step": 15310 }, { "epoch": 0.15315, "grad_norm": 5.397625923156738, "learning_rate": 4.277070707070708e-06, "loss": 6.39183349609375, "step": 15315 }, { "epoch": 0.1532, "grad_norm": 4.833497524261475, "learning_rate": 4.276818181818182e-06, "loss": 6.482825469970703, "step": 15320 }, { "epoch": 0.15325, "grad_norm": 4.2414350509643555, "learning_rate": 4.276565656565657e-06, "loss": 6.431367492675781, "step": 15325 }, { "epoch": 0.1533, "grad_norm": 5.612492084503174, "learning_rate": 4.2763131313131315e-06, "loss": 6.538777160644531, "step": 15330 }, { "epoch": 0.15335, "grad_norm": 3.536933660507202, "learning_rate": 4.276060606060606e-06, "loss": 6.455058288574219, "step": 15335 }, { "epoch": 0.1534, "grad_norm": 4.763200283050537, "learning_rate": 4.275808080808081e-06, "loss": 6.467333221435547, "step": 15340 }, { "epoch": 0.15345, "grad_norm": 3.4811580181121826, "learning_rate": 4.275555555555556e-06, "loss": 6.456903076171875, "step": 15345 }, { "epoch": 0.1535, "grad_norm": 3.8126108646392822, "learning_rate": 4.275303030303031e-06, "loss": 6.481922149658203, "step": 15350 }, { "epoch": 0.15355, "grad_norm": 4.745048522949219, "learning_rate": 4.2750505050505055e-06, "loss": 6.415636444091797, "step": 15355 }, { "epoch": 0.1536, "grad_norm": 3.74552321434021, "learning_rate": 4.27479797979798e-06, "loss": 6.447415924072265, "step": 15360 }, { "epoch": 0.15365, "grad_norm": 3.551398515701294, "learning_rate": 4.274545454545455e-06, "loss": 6.47808609008789, "step": 15365 }, { "epoch": 0.1537, "grad_norm": 5.395285606384277, "learning_rate": 4.274292929292929e-06, "loss": 6.53711166381836, "step": 15370 }, { "epoch": 0.15375, "grad_norm": 4.588326930999756, "learning_rate": 4.274040404040404e-06, "loss": 6.501445007324219, "step": 15375 }, { "epoch": 0.1538, "grad_norm": 5.968113899230957, "learning_rate": 4.273787878787879e-06, "loss": 6.423214721679687, "step": 15380 }, { "epoch": 0.15385, "grad_norm": 4.7754716873168945, "learning_rate": 4.273535353535354e-06, "loss": 6.4765777587890625, "step": 15385 }, { "epoch": 0.1539, "grad_norm": 4.135946750640869, "learning_rate": 4.273282828282829e-06, "loss": 6.456243896484375, "step": 15390 }, { "epoch": 0.15395, "grad_norm": 6.109796047210693, "learning_rate": 4.273030303030303e-06, "loss": 6.487821960449219, "step": 15395 }, { "epoch": 0.154, "grad_norm": 3.726837158203125, "learning_rate": 4.272777777777778e-06, "loss": 6.4888450622558596, "step": 15400 }, { "epoch": 0.15405, "grad_norm": 6.013373851776123, "learning_rate": 4.2725252525252535e-06, "loss": 6.419358825683593, "step": 15405 }, { "epoch": 0.1541, "grad_norm": 4.755584716796875, "learning_rate": 4.272272727272728e-06, "loss": 6.391104888916016, "step": 15410 }, { "epoch": 0.15415, "grad_norm": 8.34873104095459, "learning_rate": 4.272020202020202e-06, "loss": 6.522925567626953, "step": 15415 }, { "epoch": 0.1542, "grad_norm": 4.9861345291137695, "learning_rate": 4.2717676767676765e-06, "loss": 6.448655700683593, "step": 15420 }, { "epoch": 0.15425, "grad_norm": 3.004512071609497, "learning_rate": 4.271515151515152e-06, "loss": 6.487810516357422, "step": 15425 }, { "epoch": 0.1543, "grad_norm": 7.083635330200195, "learning_rate": 4.271262626262627e-06, "loss": 6.430406188964843, "step": 15430 }, { "epoch": 0.15435, "grad_norm": 6.485072135925293, "learning_rate": 4.271010101010101e-06, "loss": 6.438578796386719, "step": 15435 }, { "epoch": 0.1544, "grad_norm": 3.134366989135742, "learning_rate": 4.270757575757576e-06, "loss": 6.4809715270996096, "step": 15440 }, { "epoch": 0.15445, "grad_norm": 6.267679691314697, "learning_rate": 4.270505050505051e-06, "loss": 6.476234436035156, "step": 15445 }, { "epoch": 0.1545, "grad_norm": 6.279084205627441, "learning_rate": 4.270252525252526e-06, "loss": 6.377063751220703, "step": 15450 }, { "epoch": 0.15455, "grad_norm": 5.905559062957764, "learning_rate": 4.270000000000001e-06, "loss": 6.439605712890625, "step": 15455 }, { "epoch": 0.1546, "grad_norm": 4.622612476348877, "learning_rate": 4.269747474747475e-06, "loss": 6.4858848571777346, "step": 15460 }, { "epoch": 0.15465, "grad_norm": 4.669487476348877, "learning_rate": 4.26949494949495e-06, "loss": 6.452729797363281, "step": 15465 }, { "epoch": 0.1547, "grad_norm": 4.418135166168213, "learning_rate": 4.2692424242424245e-06, "loss": 6.426240539550781, "step": 15470 }, { "epoch": 0.15475, "grad_norm": 3.560588836669922, "learning_rate": 4.268989898989899e-06, "loss": 6.4837890625, "step": 15475 }, { "epoch": 0.1548, "grad_norm": 4.369415283203125, "learning_rate": 4.268737373737374e-06, "loss": 6.432186889648437, "step": 15480 }, { "epoch": 0.15485, "grad_norm": 3.06827712059021, "learning_rate": 4.268484848484849e-06, "loss": 6.425032043457032, "step": 15485 }, { "epoch": 0.1549, "grad_norm": 2.942209482192993, "learning_rate": 4.268232323232324e-06, "loss": 6.43695297241211, "step": 15490 }, { "epoch": 0.15495, "grad_norm": 3.718376636505127, "learning_rate": 4.2679797979797985e-06, "loss": 6.4039451599121096, "step": 15495 }, { "epoch": 0.155, "grad_norm": 2.4150173664093018, "learning_rate": 4.267727272727273e-06, "loss": 6.434722900390625, "step": 15500 }, { "epoch": 0.15505, "grad_norm": 3.2223780155181885, "learning_rate": 4.267474747474748e-06, "loss": 6.430596160888672, "step": 15505 }, { "epoch": 0.1551, "grad_norm": 4.392515659332275, "learning_rate": 4.267222222222222e-06, "loss": 6.474813079833984, "step": 15510 }, { "epoch": 0.15515, "grad_norm": 4.735413074493408, "learning_rate": 4.266969696969697e-06, "loss": 6.471153259277344, "step": 15515 }, { "epoch": 0.1552, "grad_norm": 5.082212924957275, "learning_rate": 4.266717171717172e-06, "loss": 6.5118560791015625, "step": 15520 }, { "epoch": 0.15525, "grad_norm": 3.3092105388641357, "learning_rate": 4.266464646464647e-06, "loss": 6.461073303222657, "step": 15525 }, { "epoch": 0.1553, "grad_norm": 3.698885202407837, "learning_rate": 4.266212121212122e-06, "loss": 6.488520812988281, "step": 15530 }, { "epoch": 0.15535, "grad_norm": 4.972824573516846, "learning_rate": 4.265959595959596e-06, "loss": 6.503556823730468, "step": 15535 }, { "epoch": 0.1554, "grad_norm": 11.466278076171875, "learning_rate": 4.265707070707071e-06, "loss": 6.56033935546875, "step": 15540 }, { "epoch": 0.15545, "grad_norm": 3.650855779647827, "learning_rate": 4.265454545454546e-06, "loss": 6.436418914794922, "step": 15545 }, { "epoch": 0.1555, "grad_norm": 6.978936195373535, "learning_rate": 4.26520202020202e-06, "loss": 6.483848571777344, "step": 15550 }, { "epoch": 0.15555, "grad_norm": 3.0578649044036865, "learning_rate": 4.264949494949495e-06, "loss": 6.448268127441406, "step": 15555 }, { "epoch": 0.1556, "grad_norm": 4.982611656188965, "learning_rate": 4.2646969696969695e-06, "loss": 6.4537498474121096, "step": 15560 }, { "epoch": 0.15565, "grad_norm": 3.5222654342651367, "learning_rate": 4.264444444444445e-06, "loss": 6.394025039672852, "step": 15565 }, { "epoch": 0.1557, "grad_norm": 3.464869260787964, "learning_rate": 4.26419191919192e-06, "loss": 6.476780700683594, "step": 15570 }, { "epoch": 0.15575, "grad_norm": 6.591947078704834, "learning_rate": 4.263939393939394e-06, "loss": 6.462862396240235, "step": 15575 }, { "epoch": 0.1558, "grad_norm": 5.201397895812988, "learning_rate": 4.263686868686869e-06, "loss": 6.475666809082031, "step": 15580 }, { "epoch": 0.15585, "grad_norm": 3.8624680042266846, "learning_rate": 4.2634343434343435e-06, "loss": 6.459147644042969, "step": 15585 }, { "epoch": 0.1559, "grad_norm": 5.241911888122559, "learning_rate": 4.263181818181818e-06, "loss": 6.546476745605469, "step": 15590 }, { "epoch": 0.15595, "grad_norm": 4.291407108306885, "learning_rate": 4.262929292929293e-06, "loss": 6.45372314453125, "step": 15595 }, { "epoch": 0.156, "grad_norm": 5.526819705963135, "learning_rate": 4.262676767676768e-06, "loss": 6.457973480224609, "step": 15600 }, { "epoch": 0.15605, "grad_norm": 6.068036079406738, "learning_rate": 4.262424242424243e-06, "loss": 6.456462097167969, "step": 15605 }, { "epoch": 0.1561, "grad_norm": 3.9966280460357666, "learning_rate": 4.2621717171717175e-06, "loss": 6.523426055908203, "step": 15610 }, { "epoch": 0.15615, "grad_norm": 4.546053409576416, "learning_rate": 4.261919191919192e-06, "loss": 6.510003662109375, "step": 15615 }, { "epoch": 0.1562, "grad_norm": 4.027541637420654, "learning_rate": 4.261666666666668e-06, "loss": 6.423981475830078, "step": 15620 }, { "epoch": 0.15625, "grad_norm": 4.187471866607666, "learning_rate": 4.261414141414142e-06, "loss": 6.4303535461425785, "step": 15625 }, { "epoch": 0.1563, "grad_norm": 3.792581558227539, "learning_rate": 4.261161616161617e-06, "loss": 6.47076416015625, "step": 15630 }, { "epoch": 0.15635, "grad_norm": 2.9656736850738525, "learning_rate": 4.260909090909091e-06, "loss": 6.456990814208984, "step": 15635 }, { "epoch": 0.1564, "grad_norm": 6.446865081787109, "learning_rate": 4.260656565656566e-06, "loss": 6.459697723388672, "step": 15640 }, { "epoch": 0.15645, "grad_norm": 6.295422077178955, "learning_rate": 4.260404040404041e-06, "loss": 6.442035675048828, "step": 15645 }, { "epoch": 0.1565, "grad_norm": 4.920889854431152, "learning_rate": 4.2601515151515154e-06, "loss": 6.4969482421875, "step": 15650 }, { "epoch": 0.15655, "grad_norm": 5.755523681640625, "learning_rate": 4.25989898989899e-06, "loss": 6.4463653564453125, "step": 15655 }, { "epoch": 0.1566, "grad_norm": 3.9147746562957764, "learning_rate": 4.2596464646464655e-06, "loss": 6.42156982421875, "step": 15660 }, { "epoch": 0.15665, "grad_norm": 3.0561859607696533, "learning_rate": 4.25939393939394e-06, "loss": 6.472958374023437, "step": 15665 }, { "epoch": 0.1567, "grad_norm": 4.439169406890869, "learning_rate": 4.259141414141415e-06, "loss": 6.426939392089844, "step": 15670 }, { "epoch": 0.15675, "grad_norm": 5.381335735321045, "learning_rate": 4.2588888888888894e-06, "loss": 6.46109619140625, "step": 15675 }, { "epoch": 0.1568, "grad_norm": 3.7233333587646484, "learning_rate": 4.258636363636364e-06, "loss": 6.452667999267578, "step": 15680 }, { "epoch": 0.15685, "grad_norm": 3.594089984893799, "learning_rate": 4.258383838383839e-06, "loss": 6.462760925292969, "step": 15685 }, { "epoch": 0.1569, "grad_norm": 8.306628227233887, "learning_rate": 4.258131313131313e-06, "loss": 6.450215148925781, "step": 15690 }, { "epoch": 0.15695, "grad_norm": 3.309375762939453, "learning_rate": 4.257878787878788e-06, "loss": 6.490792846679687, "step": 15695 }, { "epoch": 0.157, "grad_norm": 6.406224727630615, "learning_rate": 4.2576262626262634e-06, "loss": 6.388658905029297, "step": 15700 }, { "epoch": 0.15705, "grad_norm": 3.6580753326416016, "learning_rate": 4.257373737373738e-06, "loss": 6.43719253540039, "step": 15705 }, { "epoch": 0.1571, "grad_norm": 4.949578285217285, "learning_rate": 4.257121212121213e-06, "loss": 6.4233856201171875, "step": 15710 }, { "epoch": 0.15715, "grad_norm": 3.7107763290405273, "learning_rate": 4.256868686868687e-06, "loss": 6.479306030273437, "step": 15715 }, { "epoch": 0.1572, "grad_norm": 3.848506450653076, "learning_rate": 4.256616161616162e-06, "loss": 6.4586952209472654, "step": 15720 }, { "epoch": 0.15725, "grad_norm": 6.629461765289307, "learning_rate": 4.256363636363637e-06, "loss": 6.454522705078125, "step": 15725 }, { "epoch": 0.1573, "grad_norm": 2.8838393688201904, "learning_rate": 4.256111111111111e-06, "loss": 6.425444030761719, "step": 15730 }, { "epoch": 0.15735, "grad_norm": 3.326627492904663, "learning_rate": 4.255858585858586e-06, "loss": 6.343922424316406, "step": 15735 }, { "epoch": 0.1574, "grad_norm": 4.511828899383545, "learning_rate": 4.255606060606061e-06, "loss": 6.442816925048828, "step": 15740 }, { "epoch": 0.15745, "grad_norm": 5.042054176330566, "learning_rate": 4.255353535353536e-06, "loss": 6.480522155761719, "step": 15745 }, { "epoch": 0.1575, "grad_norm": 3.0742592811584473, "learning_rate": 4.2551010101010106e-06, "loss": 6.439981079101562, "step": 15750 }, { "epoch": 0.15755, "grad_norm": 5.707451820373535, "learning_rate": 4.254848484848485e-06, "loss": 6.4064888000488285, "step": 15755 }, { "epoch": 0.1576, "grad_norm": 2.4448063373565674, "learning_rate": 4.25459595959596e-06, "loss": 6.451398468017578, "step": 15760 }, { "epoch": 0.15765, "grad_norm": 4.62144136428833, "learning_rate": 4.2543434343434345e-06, "loss": 6.470231628417968, "step": 15765 }, { "epoch": 0.1577, "grad_norm": 4.919021129608154, "learning_rate": 4.254090909090909e-06, "loss": 6.481451416015625, "step": 15770 }, { "epoch": 0.15775, "grad_norm": 6.0084967613220215, "learning_rate": 4.253838383838384e-06, "loss": 6.450239562988282, "step": 15775 }, { "epoch": 0.1578, "grad_norm": 3.159843921661377, "learning_rate": 4.253585858585859e-06, "loss": 6.491239166259765, "step": 15780 }, { "epoch": 0.15785, "grad_norm": 6.70779275894165, "learning_rate": 4.253333333333334e-06, "loss": 6.557952117919922, "step": 15785 }, { "epoch": 0.1579, "grad_norm": 4.855912685394287, "learning_rate": 4.2530808080808085e-06, "loss": 6.498250579833984, "step": 15790 }, { "epoch": 0.15795, "grad_norm": 4.173107624053955, "learning_rate": 4.252828282828283e-06, "loss": 6.474813079833984, "step": 15795 }, { "epoch": 0.158, "grad_norm": 3.8840222358703613, "learning_rate": 4.252575757575758e-06, "loss": 6.458590698242188, "step": 15800 }, { "epoch": 0.15805, "grad_norm": 4.655211925506592, "learning_rate": 4.252323232323232e-06, "loss": 6.4545234680175785, "step": 15805 }, { "epoch": 0.1581, "grad_norm": 4.692041873931885, "learning_rate": 4.252070707070707e-06, "loss": 6.57176513671875, "step": 15810 }, { "epoch": 0.15815, "grad_norm": 4.872548580169678, "learning_rate": 4.251818181818182e-06, "loss": 6.473286437988281, "step": 15815 }, { "epoch": 0.1582, "grad_norm": 5.402608394622803, "learning_rate": 4.251565656565657e-06, "loss": 6.470040893554687, "step": 15820 }, { "epoch": 0.15825, "grad_norm": 3.5323402881622314, "learning_rate": 4.251313131313132e-06, "loss": 6.431044006347657, "step": 15825 }, { "epoch": 0.1583, "grad_norm": 4.865192890167236, "learning_rate": 4.251060606060606e-06, "loss": 6.433296203613281, "step": 15830 }, { "epoch": 0.15835, "grad_norm": 3.996983766555786, "learning_rate": 4.250808080808081e-06, "loss": 6.40709228515625, "step": 15835 }, { "epoch": 0.1584, "grad_norm": 6.394336223602295, "learning_rate": 4.2505555555555565e-06, "loss": 6.473453521728516, "step": 15840 }, { "epoch": 0.15845, "grad_norm": 4.792330741882324, "learning_rate": 4.250303030303031e-06, "loss": 6.519275665283203, "step": 15845 }, { "epoch": 0.1585, "grad_norm": 3.551593542098999, "learning_rate": 4.250050505050506e-06, "loss": 6.4656929016113285, "step": 15850 }, { "epoch": 0.15855, "grad_norm": 4.062755584716797, "learning_rate": 4.2497979797979795e-06, "loss": 6.459420776367187, "step": 15855 }, { "epoch": 0.1586, "grad_norm": 4.40252685546875, "learning_rate": 4.249545454545455e-06, "loss": 6.4105064392089846, "step": 15860 }, { "epoch": 0.15865, "grad_norm": 4.899846076965332, "learning_rate": 4.24929292929293e-06, "loss": 6.432331848144531, "step": 15865 }, { "epoch": 0.1587, "grad_norm": 5.257704257965088, "learning_rate": 4.249040404040404e-06, "loss": 6.46602783203125, "step": 15870 }, { "epoch": 0.15875, "grad_norm": 7.245583534240723, "learning_rate": 4.248787878787879e-06, "loss": 6.464693450927735, "step": 15875 }, { "epoch": 0.1588, "grad_norm": 3.1377813816070557, "learning_rate": 4.248535353535354e-06, "loss": 6.447336578369141, "step": 15880 }, { "epoch": 0.15885, "grad_norm": 7.206132888793945, "learning_rate": 4.248282828282829e-06, "loss": 6.431194305419922, "step": 15885 }, { "epoch": 0.1589, "grad_norm": 4.869760990142822, "learning_rate": 4.248030303030304e-06, "loss": 6.476543426513672, "step": 15890 }, { "epoch": 0.15895, "grad_norm": 3.3923707008361816, "learning_rate": 4.247777777777778e-06, "loss": 6.49322509765625, "step": 15895 }, { "epoch": 0.159, "grad_norm": 5.229952812194824, "learning_rate": 4.247525252525253e-06, "loss": 6.600070190429688, "step": 15900 }, { "epoch": 0.15905, "grad_norm": 5.926819324493408, "learning_rate": 4.2472727272727275e-06, "loss": 6.485419464111328, "step": 15905 }, { "epoch": 0.1591, "grad_norm": 4.104551792144775, "learning_rate": 4.247020202020202e-06, "loss": 6.436927795410156, "step": 15910 }, { "epoch": 0.15915, "grad_norm": 6.449350357055664, "learning_rate": 4.246767676767677e-06, "loss": 6.429209136962891, "step": 15915 }, { "epoch": 0.1592, "grad_norm": 4.858819961547852, "learning_rate": 4.246515151515152e-06, "loss": 6.492255401611328, "step": 15920 }, { "epoch": 0.15925, "grad_norm": 6.511256694793701, "learning_rate": 4.246262626262627e-06, "loss": 6.427235412597656, "step": 15925 }, { "epoch": 0.1593, "grad_norm": 11.22827434539795, "learning_rate": 4.2460101010101015e-06, "loss": 6.5309288024902346, "step": 15930 }, { "epoch": 0.15935, "grad_norm": 2.253542423248291, "learning_rate": 4.245757575757576e-06, "loss": 6.505329895019531, "step": 15935 }, { "epoch": 0.1594, "grad_norm": 3.4731669425964355, "learning_rate": 4.245505050505051e-06, "loss": 6.468534088134765, "step": 15940 }, { "epoch": 0.15945, "grad_norm": 7.399446487426758, "learning_rate": 4.245252525252525e-06, "loss": 6.4223175048828125, "step": 15945 }, { "epoch": 0.1595, "grad_norm": 5.531643867492676, "learning_rate": 4.245e-06, "loss": 6.480675506591797, "step": 15950 }, { "epoch": 0.15955, "grad_norm": 3.969433069229126, "learning_rate": 4.244747474747475e-06, "loss": 6.410305023193359, "step": 15955 }, { "epoch": 0.1596, "grad_norm": 5.107474327087402, "learning_rate": 4.24449494949495e-06, "loss": 6.411351013183594, "step": 15960 }, { "epoch": 0.15965, "grad_norm": 3.7712581157684326, "learning_rate": 4.244242424242425e-06, "loss": 6.4707489013671875, "step": 15965 }, { "epoch": 0.1597, "grad_norm": 4.941935062408447, "learning_rate": 4.243989898989899e-06, "loss": 6.427676391601563, "step": 15970 }, { "epoch": 0.15975, "grad_norm": 4.40817403793335, "learning_rate": 4.243737373737374e-06, "loss": 6.438856506347657, "step": 15975 }, { "epoch": 0.1598, "grad_norm": 3.7497217655181885, "learning_rate": 4.243484848484849e-06, "loss": 6.443362426757813, "step": 15980 }, { "epoch": 0.15985, "grad_norm": 3.2543506622314453, "learning_rate": 4.243232323232323e-06, "loss": 6.457855987548828, "step": 15985 }, { "epoch": 0.1599, "grad_norm": 6.414821147918701, "learning_rate": 4.242979797979798e-06, "loss": 6.4006591796875, "step": 15990 }, { "epoch": 0.15995, "grad_norm": 4.551130771636963, "learning_rate": 4.2427272727272725e-06, "loss": 6.426502990722656, "step": 15995 }, { "epoch": 0.16, "grad_norm": 5.783714771270752, "learning_rate": 4.242474747474748e-06, "loss": 6.468351745605469, "step": 16000 }, { "epoch": 0.16005, "grad_norm": 2.406008243560791, "learning_rate": 4.242222222222223e-06, "loss": 6.443050384521484, "step": 16005 }, { "epoch": 0.1601, "grad_norm": 10.063053131103516, "learning_rate": 4.241969696969697e-06, "loss": 6.442817687988281, "step": 16010 }, { "epoch": 0.16015, "grad_norm": 3.2604401111602783, "learning_rate": 4.241717171717172e-06, "loss": 6.50952377319336, "step": 16015 }, { "epoch": 0.1602, "grad_norm": 5.585784912109375, "learning_rate": 4.2414646464646465e-06, "loss": 6.365631103515625, "step": 16020 }, { "epoch": 0.16025, "grad_norm": 4.35003662109375, "learning_rate": 4.241212121212121e-06, "loss": 6.465089416503906, "step": 16025 }, { "epoch": 0.1603, "grad_norm": 2.479652166366577, "learning_rate": 4.240959595959596e-06, "loss": 6.496056365966797, "step": 16030 }, { "epoch": 0.16035, "grad_norm": 4.349809646606445, "learning_rate": 4.240707070707071e-06, "loss": 6.424103546142578, "step": 16035 }, { "epoch": 0.1604, "grad_norm": 5.36475944519043, "learning_rate": 4.240454545454546e-06, "loss": 6.509464263916016, "step": 16040 }, { "epoch": 0.16045, "grad_norm": 4.1829915046691895, "learning_rate": 4.2402020202020205e-06, "loss": 6.462553405761719, "step": 16045 }, { "epoch": 0.1605, "grad_norm": 7.2911481857299805, "learning_rate": 4.239949494949495e-06, "loss": 6.483542633056641, "step": 16050 }, { "epoch": 0.16055, "grad_norm": 3.8825504779815674, "learning_rate": 4.239696969696971e-06, "loss": 6.458918762207031, "step": 16055 }, { "epoch": 0.1606, "grad_norm": 3.251694679260254, "learning_rate": 4.239444444444445e-06, "loss": 6.447150421142578, "step": 16060 }, { "epoch": 0.16065, "grad_norm": 3.755464792251587, "learning_rate": 4.23919191919192e-06, "loss": 6.4382484436035154, "step": 16065 }, { "epoch": 0.1607, "grad_norm": 4.762530326843262, "learning_rate": 4.2389393939393945e-06, "loss": 6.448199462890625, "step": 16070 }, { "epoch": 0.16075, "grad_norm": 2.7810277938842773, "learning_rate": 4.238686868686869e-06, "loss": 6.489947509765625, "step": 16075 }, { "epoch": 0.1608, "grad_norm": 4.165729522705078, "learning_rate": 4.238434343434344e-06, "loss": 6.441806793212891, "step": 16080 }, { "epoch": 0.16085, "grad_norm": 3.935276985168457, "learning_rate": 4.238181818181818e-06, "loss": 6.470269775390625, "step": 16085 }, { "epoch": 0.1609, "grad_norm": 4.503995418548584, "learning_rate": 4.237929292929293e-06, "loss": 6.4163330078125, "step": 16090 }, { "epoch": 0.16095, "grad_norm": 3.801670789718628, "learning_rate": 4.2376767676767685e-06, "loss": 6.395263290405273, "step": 16095 }, { "epoch": 0.161, "grad_norm": 7.7292914390563965, "learning_rate": 4.237424242424243e-06, "loss": 6.516740417480468, "step": 16100 }, { "epoch": 0.16105, "grad_norm": 6.423474311828613, "learning_rate": 4.237171717171718e-06, "loss": 6.4338432312011715, "step": 16105 }, { "epoch": 0.1611, "grad_norm": 5.955942153930664, "learning_rate": 4.236919191919192e-06, "loss": 6.4390716552734375, "step": 16110 }, { "epoch": 0.16115, "grad_norm": 4.293849945068359, "learning_rate": 4.236666666666667e-06, "loss": 6.424281311035156, "step": 16115 }, { "epoch": 0.1612, "grad_norm": 2.4379286766052246, "learning_rate": 4.236414141414142e-06, "loss": 6.511492919921875, "step": 16120 }, { "epoch": 0.16125, "grad_norm": 4.320854187011719, "learning_rate": 4.236161616161616e-06, "loss": 6.437481689453125, "step": 16125 }, { "epoch": 0.1613, "grad_norm": 4.887234210968018, "learning_rate": 4.235909090909091e-06, "loss": 6.466165161132812, "step": 16130 }, { "epoch": 0.16135, "grad_norm": 3.7911219596862793, "learning_rate": 4.235656565656566e-06, "loss": 6.4869636535644535, "step": 16135 }, { "epoch": 0.1614, "grad_norm": 2.759038209915161, "learning_rate": 4.235404040404041e-06, "loss": 6.481079864501953, "step": 16140 }, { "epoch": 0.16145, "grad_norm": 4.109643936157227, "learning_rate": 4.235151515151516e-06, "loss": 6.435280609130859, "step": 16145 }, { "epoch": 0.1615, "grad_norm": 4.310450077056885, "learning_rate": 4.23489898989899e-06, "loss": 6.439903259277344, "step": 16150 }, { "epoch": 0.16155, "grad_norm": 3.872220277786255, "learning_rate": 4.234646464646465e-06, "loss": 6.4138938903808596, "step": 16155 }, { "epoch": 0.1616, "grad_norm": 5.182963848114014, "learning_rate": 4.2343939393939396e-06, "loss": 6.544090270996094, "step": 16160 }, { "epoch": 0.16165, "grad_norm": 3.5009608268737793, "learning_rate": 4.234141414141414e-06, "loss": 6.46972885131836, "step": 16165 }, { "epoch": 0.1617, "grad_norm": 4.864871501922607, "learning_rate": 4.233888888888889e-06, "loss": 6.4555107116699215, "step": 16170 }, { "epoch": 0.16175, "grad_norm": 3.036008596420288, "learning_rate": 4.233636363636364e-06, "loss": 6.42877197265625, "step": 16175 }, { "epoch": 0.1618, "grad_norm": 11.388215065002441, "learning_rate": 4.233383838383839e-06, "loss": 6.619580078125, "step": 16180 }, { "epoch": 0.16185, "grad_norm": 4.7118048667907715, "learning_rate": 4.2331313131313136e-06, "loss": 6.42541275024414, "step": 16185 }, { "epoch": 0.1619, "grad_norm": 23.974830627441406, "learning_rate": 4.232878787878788e-06, "loss": 6.428492736816406, "step": 16190 }, { "epoch": 0.16195, "grad_norm": 14.154603958129883, "learning_rate": 4.232626262626263e-06, "loss": 5.733763885498047, "step": 16195 }, { "epoch": 0.162, "grad_norm": 19.878480911254883, "learning_rate": 4.2323737373737374e-06, "loss": 5.483457946777344, "step": 16200 }, { "epoch": 0.16205, "grad_norm": 7.669607162475586, "learning_rate": 4.232121212121212e-06, "loss": 6.450257110595703, "step": 16205 }, { "epoch": 0.1621, "grad_norm": 7.47354793548584, "learning_rate": 4.231868686868687e-06, "loss": 6.644809722900391, "step": 16210 }, { "epoch": 0.16215, "grad_norm": 6.442617416381836, "learning_rate": 4.231616161616162e-06, "loss": 6.458718109130859, "step": 16215 }, { "epoch": 0.1622, "grad_norm": 5.833910942077637, "learning_rate": 4.231363636363637e-06, "loss": 6.422599792480469, "step": 16220 }, { "epoch": 0.16225, "grad_norm": 7.300108909606934, "learning_rate": 4.2311111111111114e-06, "loss": 6.402127075195312, "step": 16225 }, { "epoch": 0.1623, "grad_norm": 3.6949117183685303, "learning_rate": 4.230858585858586e-06, "loss": 6.414455413818359, "step": 16230 }, { "epoch": 0.16235, "grad_norm": 19.971410751342773, "learning_rate": 4.2306060606060616e-06, "loss": 6.355553436279297, "step": 16235 }, { "epoch": 0.1624, "grad_norm": 6.709630489349365, "learning_rate": 4.230353535353536e-06, "loss": 6.310438919067383, "step": 16240 }, { "epoch": 0.16245, "grad_norm": 4.709168434143066, "learning_rate": 4.23010101010101e-06, "loss": 6.455739593505859, "step": 16245 }, { "epoch": 0.1625, "grad_norm": 4.906622409820557, "learning_rate": 4.229848484848485e-06, "loss": 6.443818664550781, "step": 16250 }, { "epoch": 0.16255, "grad_norm": 6.273735523223877, "learning_rate": 4.22959595959596e-06, "loss": 6.441227722167969, "step": 16255 }, { "epoch": 0.1626, "grad_norm": 4.77756929397583, "learning_rate": 4.229343434343435e-06, "loss": 6.433401489257813, "step": 16260 }, { "epoch": 0.16265, "grad_norm": 6.260764122009277, "learning_rate": 4.229090909090909e-06, "loss": 6.4640869140625, "step": 16265 }, { "epoch": 0.1627, "grad_norm": 5.687631607055664, "learning_rate": 4.228838383838384e-06, "loss": 6.535980224609375, "step": 16270 }, { "epoch": 0.16275, "grad_norm": 10.759283065795898, "learning_rate": 4.2285858585858594e-06, "loss": 6.4888359069824215, "step": 16275 }, { "epoch": 0.1628, "grad_norm": 9.458077430725098, "learning_rate": 4.228333333333334e-06, "loss": 6.507443237304687, "step": 16280 }, { "epoch": 0.16285, "grad_norm": 5.979526042938232, "learning_rate": 4.228080808080809e-06, "loss": 6.38446044921875, "step": 16285 }, { "epoch": 0.1629, "grad_norm": 18.028772354125977, "learning_rate": 4.227828282828283e-06, "loss": 6.341520309448242, "step": 16290 }, { "epoch": 0.16295, "grad_norm": 6.105372905731201, "learning_rate": 4.227575757575758e-06, "loss": 6.422767639160156, "step": 16295 }, { "epoch": 0.163, "grad_norm": 3.7926185131073, "learning_rate": 4.227323232323233e-06, "loss": 6.500867462158203, "step": 16300 }, { "epoch": 0.16305, "grad_norm": 5.025500774383545, "learning_rate": 4.227070707070707e-06, "loss": 6.442127990722656, "step": 16305 }, { "epoch": 0.1631, "grad_norm": 7.424211502075195, "learning_rate": 4.226818181818182e-06, "loss": 6.457170867919922, "step": 16310 }, { "epoch": 0.16315, "grad_norm": 4.195115566253662, "learning_rate": 4.226565656565657e-06, "loss": 6.412262725830078, "step": 16315 }, { "epoch": 0.1632, "grad_norm": 4.0624494552612305, "learning_rate": 4.226313131313132e-06, "loss": 6.422985076904297, "step": 16320 }, { "epoch": 0.16325, "grad_norm": 6.469831466674805, "learning_rate": 4.226060606060607e-06, "loss": 6.4789482116699215, "step": 16325 }, { "epoch": 0.1633, "grad_norm": 6.715437412261963, "learning_rate": 4.225808080808081e-06, "loss": 6.409690856933594, "step": 16330 }, { "epoch": 0.16335, "grad_norm": 2.799893379211426, "learning_rate": 4.225555555555556e-06, "loss": 6.52038345336914, "step": 16335 }, { "epoch": 0.1634, "grad_norm": 4.6705241203308105, "learning_rate": 4.2253030303030305e-06, "loss": 6.38519515991211, "step": 16340 }, { "epoch": 0.16345, "grad_norm": 3.5847480297088623, "learning_rate": 4.225050505050505e-06, "loss": 6.457364654541015, "step": 16345 }, { "epoch": 0.1635, "grad_norm": 4.613053798675537, "learning_rate": 4.22479797979798e-06, "loss": 6.430731201171875, "step": 16350 }, { "epoch": 0.16355, "grad_norm": 3.464993476867676, "learning_rate": 4.224545454545455e-06, "loss": 6.417494201660157, "step": 16355 }, { "epoch": 0.1636, "grad_norm": 5.135258197784424, "learning_rate": 4.22429292929293e-06, "loss": 6.408103179931641, "step": 16360 }, { "epoch": 0.16365, "grad_norm": 4.050636291503906, "learning_rate": 4.2240404040404045e-06, "loss": 6.46990966796875, "step": 16365 }, { "epoch": 0.1637, "grad_norm": 6.1001667976379395, "learning_rate": 4.223787878787879e-06, "loss": 6.467813873291016, "step": 16370 }, { "epoch": 0.16375, "grad_norm": 16.962297439575195, "learning_rate": 4.223535353535354e-06, "loss": 6.569458770751953, "step": 16375 }, { "epoch": 0.1638, "grad_norm": 6.115970611572266, "learning_rate": 4.223282828282828e-06, "loss": 6.472386169433594, "step": 16380 }, { "epoch": 0.16385, "grad_norm": 4.759397983551025, "learning_rate": 4.223030303030303e-06, "loss": 6.567878723144531, "step": 16385 }, { "epoch": 0.1639, "grad_norm": 6.329344749450684, "learning_rate": 4.222777777777778e-06, "loss": 6.429273223876953, "step": 16390 }, { "epoch": 0.16395, "grad_norm": 3.9045960903167725, "learning_rate": 4.222525252525253e-06, "loss": 6.453604888916016, "step": 16395 }, { "epoch": 0.164, "grad_norm": 5.703576564788818, "learning_rate": 4.222272727272728e-06, "loss": 6.397611236572265, "step": 16400 }, { "epoch": 0.16405, "grad_norm": 6.27435827255249, "learning_rate": 4.222020202020202e-06, "loss": 6.600962829589844, "step": 16405 }, { "epoch": 0.1641, "grad_norm": 3.1268818378448486, "learning_rate": 4.221767676767677e-06, "loss": 6.422604370117187, "step": 16410 }, { "epoch": 0.16415, "grad_norm": 7.610786437988281, "learning_rate": 4.221515151515152e-06, "loss": 6.395438766479492, "step": 16415 }, { "epoch": 0.1642, "grad_norm": 5.743587493896484, "learning_rate": 4.221262626262626e-06, "loss": 6.492610931396484, "step": 16420 }, { "epoch": 0.16425, "grad_norm": 3.787879467010498, "learning_rate": 4.221010101010101e-06, "loss": 6.3909423828125, "step": 16425 }, { "epoch": 0.1643, "grad_norm": 3.1085855960845947, "learning_rate": 4.2207575757575755e-06, "loss": 6.434197998046875, "step": 16430 }, { "epoch": 0.16435, "grad_norm": 5.400134563446045, "learning_rate": 4.220505050505051e-06, "loss": 6.434619903564453, "step": 16435 }, { "epoch": 0.1644, "grad_norm": 5.58909273147583, "learning_rate": 4.220252525252526e-06, "loss": 6.423196411132812, "step": 16440 }, { "epoch": 0.16445, "grad_norm": 3.8947064876556396, "learning_rate": 4.22e-06, "loss": 6.438726806640625, "step": 16445 }, { "epoch": 0.1645, "grad_norm": 6.359738349914551, "learning_rate": 4.219747474747476e-06, "loss": 6.455271911621094, "step": 16450 }, { "epoch": 0.16455, "grad_norm": 3.528735637664795, "learning_rate": 4.21949494949495e-06, "loss": 6.377635955810547, "step": 16455 }, { "epoch": 0.1646, "grad_norm": 5.543984889984131, "learning_rate": 4.219242424242425e-06, "loss": 6.423641204833984, "step": 16460 }, { "epoch": 0.16465, "grad_norm": 3.6377551555633545, "learning_rate": 4.218989898989899e-06, "loss": 6.395376968383789, "step": 16465 }, { "epoch": 0.1647, "grad_norm": 5.678615093231201, "learning_rate": 4.218737373737374e-06, "loss": 6.425778198242187, "step": 16470 }, { "epoch": 0.16475, "grad_norm": 5.990975856781006, "learning_rate": 4.218484848484849e-06, "loss": 6.44583740234375, "step": 16475 }, { "epoch": 0.1648, "grad_norm": 4.143665790557861, "learning_rate": 4.2182323232323235e-06, "loss": 6.431034088134766, "step": 16480 }, { "epoch": 0.16485, "grad_norm": 4.261204719543457, "learning_rate": 4.217979797979798e-06, "loss": 6.540504455566406, "step": 16485 }, { "epoch": 0.1649, "grad_norm": 7.385230541229248, "learning_rate": 4.217727272727274e-06, "loss": 6.517402648925781, "step": 16490 }, { "epoch": 0.16495, "grad_norm": 2.9505698680877686, "learning_rate": 4.217474747474748e-06, "loss": 6.397237396240234, "step": 16495 }, { "epoch": 0.165, "grad_norm": 6.0776848793029785, "learning_rate": 4.217222222222223e-06, "loss": 6.448817443847656, "step": 16500 }, { "epoch": 0.16505, "grad_norm": 3.2803962230682373, "learning_rate": 4.2169696969696975e-06, "loss": 6.691989135742188, "step": 16505 }, { "epoch": 0.1651, "grad_norm": 6.6952738761901855, "learning_rate": 4.216717171717172e-06, "loss": 6.415699768066406, "step": 16510 }, { "epoch": 0.16515, "grad_norm": 4.204198837280273, "learning_rate": 4.216464646464647e-06, "loss": 6.425327301025391, "step": 16515 }, { "epoch": 0.1652, "grad_norm": 2.6166863441467285, "learning_rate": 4.216212121212121e-06, "loss": 6.437786865234375, "step": 16520 }, { "epoch": 0.16525, "grad_norm": 5.264849662780762, "learning_rate": 4.215959595959596e-06, "loss": 6.4240478515625, "step": 16525 }, { "epoch": 0.1653, "grad_norm": 5.0223541259765625, "learning_rate": 4.2157070707070715e-06, "loss": 6.4648796081542965, "step": 16530 }, { "epoch": 0.16535, "grad_norm": 3.013516902923584, "learning_rate": 4.215454545454546e-06, "loss": 6.4239501953125, "step": 16535 }, { "epoch": 0.1654, "grad_norm": 3.243837833404541, "learning_rate": 4.215202020202021e-06, "loss": 6.460826873779297, "step": 16540 }, { "epoch": 0.16545, "grad_norm": 3.9929494857788086, "learning_rate": 4.214949494949495e-06, "loss": 6.456593322753906, "step": 16545 }, { "epoch": 0.1655, "grad_norm": 4.792909622192383, "learning_rate": 4.21469696969697e-06, "loss": 6.432769012451172, "step": 16550 }, { "epoch": 0.16555, "grad_norm": 2.9042487144470215, "learning_rate": 4.214444444444445e-06, "loss": 6.412268829345703, "step": 16555 }, { "epoch": 0.1656, "grad_norm": 4.825240612030029, "learning_rate": 4.214191919191919e-06, "loss": 6.461263275146484, "step": 16560 }, { "epoch": 0.16565, "grad_norm": 4.655351161956787, "learning_rate": 4.213939393939394e-06, "loss": 6.40477294921875, "step": 16565 }, { "epoch": 0.1657, "grad_norm": 5.673925399780273, "learning_rate": 4.213686868686869e-06, "loss": 6.397513961791992, "step": 16570 }, { "epoch": 0.16575, "grad_norm": 4.848138809204102, "learning_rate": 4.213434343434344e-06, "loss": 6.403958129882812, "step": 16575 }, { "epoch": 0.1658, "grad_norm": 3.40531325340271, "learning_rate": 4.213181818181819e-06, "loss": 6.418567657470703, "step": 16580 }, { "epoch": 0.16585, "grad_norm": 5.7261176109313965, "learning_rate": 4.212929292929293e-06, "loss": 6.472645568847656, "step": 16585 }, { "epoch": 0.1659, "grad_norm": 4.942929267883301, "learning_rate": 4.212676767676768e-06, "loss": 6.542838287353516, "step": 16590 }, { "epoch": 0.16595, "grad_norm": 7.8102030754089355, "learning_rate": 4.2124242424242425e-06, "loss": 6.411373901367187, "step": 16595 }, { "epoch": 0.166, "grad_norm": 5.693139553070068, "learning_rate": 4.212171717171717e-06, "loss": 6.39751091003418, "step": 16600 }, { "epoch": 0.16605, "grad_norm": 4.0060296058654785, "learning_rate": 4.211919191919192e-06, "loss": 6.429615783691406, "step": 16605 }, { "epoch": 0.1661, "grad_norm": 3.6315958499908447, "learning_rate": 4.211666666666667e-06, "loss": 6.361412429809571, "step": 16610 }, { "epoch": 0.16615, "grad_norm": 4.41163444519043, "learning_rate": 4.211414141414142e-06, "loss": 6.422952270507812, "step": 16615 }, { "epoch": 0.1662, "grad_norm": 3.4001805782318115, "learning_rate": 4.2111616161616165e-06, "loss": 6.464498901367188, "step": 16620 }, { "epoch": 0.16625, "grad_norm": 6.489120960235596, "learning_rate": 4.210909090909091e-06, "loss": 6.366706085205078, "step": 16625 }, { "epoch": 0.1663, "grad_norm": 3.466364622116089, "learning_rate": 4.210656565656566e-06, "loss": 6.416133117675781, "step": 16630 }, { "epoch": 0.16635, "grad_norm": 3.7080860137939453, "learning_rate": 4.2104040404040404e-06, "loss": 6.393110656738282, "step": 16635 }, { "epoch": 0.1664, "grad_norm": 2.685086727142334, "learning_rate": 4.210151515151515e-06, "loss": 6.409326171875, "step": 16640 }, { "epoch": 0.16645, "grad_norm": 6.166691780090332, "learning_rate": 4.20989898989899e-06, "loss": 6.438764190673828, "step": 16645 }, { "epoch": 0.1665, "grad_norm": 2.764755964279175, "learning_rate": 4.209646464646465e-06, "loss": 6.452267456054687, "step": 16650 }, { "epoch": 0.16655, "grad_norm": 6.233506202697754, "learning_rate": 4.20939393939394e-06, "loss": 6.4062744140625, "step": 16655 }, { "epoch": 0.1666, "grad_norm": 22.86174201965332, "learning_rate": 4.2091414141414144e-06, "loss": 6.593110656738281, "step": 16660 }, { "epoch": 0.16665, "grad_norm": 4.848666191101074, "learning_rate": 4.208888888888889e-06, "loss": 6.4826301574707035, "step": 16665 }, { "epoch": 0.1667, "grad_norm": 4.253280162811279, "learning_rate": 4.2086363636363645e-06, "loss": 6.425471496582031, "step": 16670 }, { "epoch": 0.16675, "grad_norm": 7.824203968048096, "learning_rate": 4.208383838383839e-06, "loss": 6.325553131103516, "step": 16675 }, { "epoch": 0.1668, "grad_norm": 3.3523967266082764, "learning_rate": 4.208131313131314e-06, "loss": 6.446279907226563, "step": 16680 }, { "epoch": 0.16685, "grad_norm": 3.283317804336548, "learning_rate": 4.2078787878787884e-06, "loss": 6.469338989257812, "step": 16685 }, { "epoch": 0.1669, "grad_norm": 2.6600000858306885, "learning_rate": 4.207626262626263e-06, "loss": 6.399596405029297, "step": 16690 }, { "epoch": 0.16695, "grad_norm": 8.969189643859863, "learning_rate": 4.207373737373738e-06, "loss": 6.5745361328125, "step": 16695 }, { "epoch": 0.167, "grad_norm": 2.7679190635681152, "learning_rate": 4.207121212121212e-06, "loss": 6.4103271484375, "step": 16700 }, { "epoch": 0.16705, "grad_norm": 4.022441864013672, "learning_rate": 4.206868686868687e-06, "loss": 6.421254730224609, "step": 16705 }, { "epoch": 0.1671, "grad_norm": 2.6387112140655518, "learning_rate": 4.2066161616161624e-06, "loss": 6.412324523925781, "step": 16710 }, { "epoch": 0.16715, "grad_norm": 5.649288654327393, "learning_rate": 4.206363636363637e-06, "loss": 6.423363494873047, "step": 16715 }, { "epoch": 0.1672, "grad_norm": 11.844217300415039, "learning_rate": 4.206111111111112e-06, "loss": 6.355509567260742, "step": 16720 }, { "epoch": 0.16725, "grad_norm": 18.722537994384766, "learning_rate": 4.205858585858586e-06, "loss": 6.233999633789063, "step": 16725 }, { "epoch": 0.1673, "grad_norm": 3.016089916229248, "learning_rate": 4.205606060606061e-06, "loss": 6.463859558105469, "step": 16730 }, { "epoch": 0.16735, "grad_norm": 3.5590295791625977, "learning_rate": 4.2053535353535356e-06, "loss": 6.446290588378906, "step": 16735 }, { "epoch": 0.1674, "grad_norm": 4.0839338302612305, "learning_rate": 4.20510101010101e-06, "loss": 6.398403167724609, "step": 16740 }, { "epoch": 0.16745, "grad_norm": 4.5346856117248535, "learning_rate": 4.204848484848485e-06, "loss": 6.474373626708984, "step": 16745 }, { "epoch": 0.1675, "grad_norm": 6.564062118530273, "learning_rate": 4.20459595959596e-06, "loss": 6.395368576049805, "step": 16750 }, { "epoch": 0.16755, "grad_norm": 8.376795768737793, "learning_rate": 4.204343434343435e-06, "loss": 6.430104064941406, "step": 16755 }, { "epoch": 0.1676, "grad_norm": 22.471263885498047, "learning_rate": 4.2040909090909096e-06, "loss": 6.042660522460937, "step": 16760 }, { "epoch": 0.16765, "grad_norm": 14.265641212463379, "learning_rate": 4.203838383838384e-06, "loss": 5.460973739624023, "step": 16765 }, { "epoch": 0.1677, "grad_norm": 16.555116653442383, "learning_rate": 4.203585858585859e-06, "loss": 5.206225204467773, "step": 16770 }, { "epoch": 0.16775, "grad_norm": 12.322586059570312, "learning_rate": 4.2033333333333335e-06, "loss": 5.089409637451172, "step": 16775 }, { "epoch": 0.1678, "grad_norm": 14.999155044555664, "learning_rate": 4.203080808080808e-06, "loss": 5.152135848999023, "step": 16780 }, { "epoch": 0.16785, "grad_norm": 13.250843048095703, "learning_rate": 4.202828282828283e-06, "loss": 5.19085464477539, "step": 16785 }, { "epoch": 0.1679, "grad_norm": 11.851985931396484, "learning_rate": 4.202575757575758e-06, "loss": 5.205342864990234, "step": 16790 }, { "epoch": 0.16795, "grad_norm": 17.398193359375, "learning_rate": 4.202323232323233e-06, "loss": 5.029646301269532, "step": 16795 }, { "epoch": 0.168, "grad_norm": 11.01009464263916, "learning_rate": 4.2020707070707075e-06, "loss": 4.7829338073730465, "step": 16800 }, { "epoch": 0.16805, "grad_norm": 17.41663932800293, "learning_rate": 4.201818181818182e-06, "loss": 5.041512680053711, "step": 16805 }, { "epoch": 0.1681, "grad_norm": 7.868896007537842, "learning_rate": 4.201565656565657e-06, "loss": 5.04722900390625, "step": 16810 }, { "epoch": 0.16815, "grad_norm": 11.509933471679688, "learning_rate": 4.201313131313131e-06, "loss": 4.91136474609375, "step": 16815 }, { "epoch": 0.1682, "grad_norm": 13.429520606994629, "learning_rate": 4.201060606060606e-06, "loss": 5.026221466064453, "step": 16820 }, { "epoch": 0.16825, "grad_norm": 10.728781700134277, "learning_rate": 4.200808080808081e-06, "loss": 5.033795166015625, "step": 16825 }, { "epoch": 0.1683, "grad_norm": 14.340149879455566, "learning_rate": 4.200555555555556e-06, "loss": 5.0122325897216795, "step": 16830 }, { "epoch": 0.16835, "grad_norm": 9.439867973327637, "learning_rate": 4.200303030303031e-06, "loss": 4.954117202758789, "step": 16835 }, { "epoch": 0.1684, "grad_norm": 14.598928451538086, "learning_rate": 4.200050505050505e-06, "loss": 5.096096038818359, "step": 16840 }, { "epoch": 0.16845, "grad_norm": 7.051349639892578, "learning_rate": 4.19979797979798e-06, "loss": 5.180331039428711, "step": 16845 }, { "epoch": 0.1685, "grad_norm": 11.347814559936523, "learning_rate": 4.1995454545454555e-06, "loss": 5.158349609375, "step": 16850 }, { "epoch": 0.16855, "grad_norm": 11.40089225769043, "learning_rate": 4.199292929292929e-06, "loss": 4.920751571655273, "step": 16855 }, { "epoch": 0.1686, "grad_norm": 9.11288070678711, "learning_rate": 4.199040404040404e-06, "loss": 4.822760009765625, "step": 16860 }, { "epoch": 0.16865, "grad_norm": 12.327704429626465, "learning_rate": 4.1987878787878785e-06, "loss": 4.742352676391602, "step": 16865 }, { "epoch": 0.1687, "grad_norm": 11.233525276184082, "learning_rate": 4.198535353535354e-06, "loss": 4.822747039794922, "step": 16870 }, { "epoch": 0.16875, "grad_norm": 11.971772193908691, "learning_rate": 4.198282828282829e-06, "loss": 4.834860992431641, "step": 16875 }, { "epoch": 0.1688, "grad_norm": 9.623886108398438, "learning_rate": 4.198030303030303e-06, "loss": 4.91375732421875, "step": 16880 }, { "epoch": 0.16885, "grad_norm": 10.706768989562988, "learning_rate": 4.197777777777779e-06, "loss": 4.750580596923828, "step": 16885 }, { "epoch": 0.1689, "grad_norm": 20.55947494506836, "learning_rate": 4.197525252525253e-06, "loss": 5.914939880371094, "step": 16890 }, { "epoch": 0.16895, "grad_norm": 12.224813461303711, "learning_rate": 4.197272727272728e-06, "loss": 6.822359466552735, "step": 16895 }, { "epoch": 0.169, "grad_norm": 10.106796264648438, "learning_rate": 4.197020202020203e-06, "loss": 6.5930328369140625, "step": 16900 }, { "epoch": 0.16905, "grad_norm": 11.854436874389648, "learning_rate": 4.196767676767677e-06, "loss": 6.874118804931641, "step": 16905 }, { "epoch": 0.1691, "grad_norm": 12.526688575744629, "learning_rate": 4.196515151515152e-06, "loss": 6.562670135498047, "step": 16910 }, { "epoch": 0.16915, "grad_norm": 8.327079772949219, "learning_rate": 4.1962626262626265e-06, "loss": 6.549639129638672, "step": 16915 }, { "epoch": 0.1692, "grad_norm": 15.654070854187012, "learning_rate": 4.196010101010101e-06, "loss": 6.556614685058594, "step": 16920 }, { "epoch": 0.16925, "grad_norm": 14.302448272705078, "learning_rate": 4.195757575757577e-06, "loss": 6.5026802062988285, "step": 16925 }, { "epoch": 0.1693, "grad_norm": 11.121437072753906, "learning_rate": 4.195505050505051e-06, "loss": 6.508833312988282, "step": 16930 }, { "epoch": 0.16935, "grad_norm": 12.440268516540527, "learning_rate": 4.195252525252526e-06, "loss": 6.535239410400391, "step": 16935 }, { "epoch": 0.1694, "grad_norm": 11.622078895568848, "learning_rate": 4.1950000000000005e-06, "loss": 6.572379302978516, "step": 16940 }, { "epoch": 0.16945, "grad_norm": 9.849069595336914, "learning_rate": 4.194747474747475e-06, "loss": 6.552913665771484, "step": 16945 }, { "epoch": 0.1695, "grad_norm": 9.711047172546387, "learning_rate": 4.19449494949495e-06, "loss": 6.705776214599609, "step": 16950 }, { "epoch": 0.16955, "grad_norm": 8.60631275177002, "learning_rate": 4.194242424242424e-06, "loss": 6.5051429748535154, "step": 16955 }, { "epoch": 0.1696, "grad_norm": 10.695279121398926, "learning_rate": 4.193989898989899e-06, "loss": 6.710808563232422, "step": 16960 }, { "epoch": 0.16965, "grad_norm": 5.619162082672119, "learning_rate": 4.1937373737373745e-06, "loss": 6.575418090820312, "step": 16965 }, { "epoch": 0.1697, "grad_norm": 10.238823890686035, "learning_rate": 4.193484848484849e-06, "loss": 6.506493377685547, "step": 16970 }, { "epoch": 0.16975, "grad_norm": 12.84684944152832, "learning_rate": 4.193232323232324e-06, "loss": 6.530406188964844, "step": 16975 }, { "epoch": 0.1698, "grad_norm": 9.53205394744873, "learning_rate": 4.192979797979798e-06, "loss": 6.5736236572265625, "step": 16980 }, { "epoch": 0.16985, "grad_norm": 11.560309410095215, "learning_rate": 4.192727272727273e-06, "loss": 6.483158874511719, "step": 16985 }, { "epoch": 0.1699, "grad_norm": 10.62267780303955, "learning_rate": 4.192474747474748e-06, "loss": 6.490876770019531, "step": 16990 }, { "epoch": 0.16995, "grad_norm": 8.452971458435059, "learning_rate": 4.192222222222222e-06, "loss": 6.468659973144531, "step": 16995 }, { "epoch": 0.17, "grad_norm": 10.7777738571167, "learning_rate": 4.191969696969697e-06, "loss": 6.47882080078125, "step": 17000 }, { "epoch": 0.17005, "grad_norm": 9.507896423339844, "learning_rate": 4.191717171717172e-06, "loss": 6.477640533447266, "step": 17005 }, { "epoch": 0.1701, "grad_norm": 10.258687973022461, "learning_rate": 4.191464646464647e-06, "loss": 6.466783142089843, "step": 17010 }, { "epoch": 0.17015, "grad_norm": 7.577059745788574, "learning_rate": 4.191212121212122e-06, "loss": 6.554931640625, "step": 17015 }, { "epoch": 0.1702, "grad_norm": 10.8995943069458, "learning_rate": 4.190959595959596e-06, "loss": 6.473652648925781, "step": 17020 }, { "epoch": 0.17025, "grad_norm": 8.471739768981934, "learning_rate": 4.190707070707071e-06, "loss": 6.4854682922363285, "step": 17025 }, { "epoch": 0.1703, "grad_norm": 10.117073059082031, "learning_rate": 4.1904545454545455e-06, "loss": 6.458444213867187, "step": 17030 }, { "epoch": 0.17035, "grad_norm": 8.907591819763184, "learning_rate": 4.19020202020202e-06, "loss": 6.444873046875, "step": 17035 }, { "epoch": 0.1704, "grad_norm": 9.194348335266113, "learning_rate": 4.189949494949495e-06, "loss": 6.454792022705078, "step": 17040 }, { "epoch": 0.17045, "grad_norm": 8.759921073913574, "learning_rate": 4.18969696969697e-06, "loss": 6.51410140991211, "step": 17045 }, { "epoch": 0.1705, "grad_norm": 9.898579597473145, "learning_rate": 4.189444444444445e-06, "loss": 6.442343139648438, "step": 17050 }, { "epoch": 0.17055, "grad_norm": 8.211372375488281, "learning_rate": 4.1891919191919195e-06, "loss": 6.438838195800781, "step": 17055 }, { "epoch": 0.1706, "grad_norm": 9.545480728149414, "learning_rate": 4.188939393939394e-06, "loss": 6.448329925537109, "step": 17060 }, { "epoch": 0.17065, "grad_norm": 7.313986301422119, "learning_rate": 4.18868686868687e-06, "loss": 6.488162994384766, "step": 17065 }, { "epoch": 0.1707, "grad_norm": 10.340070724487305, "learning_rate": 4.188434343434344e-06, "loss": 6.453190612792969, "step": 17070 }, { "epoch": 0.17075, "grad_norm": 7.9209184646606445, "learning_rate": 4.188181818181818e-06, "loss": 6.419523620605469, "step": 17075 }, { "epoch": 0.1708, "grad_norm": 4.9438605308532715, "learning_rate": 4.187929292929293e-06, "loss": 6.7833610534667965, "step": 17080 }, { "epoch": 0.17085, "grad_norm": 9.614295959472656, "learning_rate": 4.187676767676768e-06, "loss": 6.460714721679688, "step": 17085 }, { "epoch": 0.1709, "grad_norm": 6.835304260253906, "learning_rate": 4.187424242424243e-06, "loss": 6.426715087890625, "step": 17090 }, { "epoch": 0.17095, "grad_norm": 7.65523099899292, "learning_rate": 4.187171717171717e-06, "loss": 6.445516204833984, "step": 17095 }, { "epoch": 0.171, "grad_norm": 7.406911849975586, "learning_rate": 4.186919191919192e-06, "loss": 6.425837707519531, "step": 17100 }, { "epoch": 0.17105, "grad_norm": 8.357217788696289, "learning_rate": 4.1866666666666675e-06, "loss": 6.390534210205078, "step": 17105 }, { "epoch": 0.1711, "grad_norm": 7.785171985626221, "learning_rate": 4.186414141414142e-06, "loss": 6.419249725341797, "step": 17110 }, { "epoch": 0.17115, "grad_norm": 9.406428337097168, "learning_rate": 4.186161616161617e-06, "loss": 6.485873413085938, "step": 17115 }, { "epoch": 0.1712, "grad_norm": 7.082335948944092, "learning_rate": 4.185909090909091e-06, "loss": 6.437287139892578, "step": 17120 }, { "epoch": 0.17125, "grad_norm": 6.526735305786133, "learning_rate": 4.185656565656566e-06, "loss": 6.41693344116211, "step": 17125 }, { "epoch": 0.1713, "grad_norm": 5.555368423461914, "learning_rate": 4.185404040404041e-06, "loss": 6.420165252685547, "step": 17130 }, { "epoch": 0.17135, "grad_norm": 7.253785133361816, "learning_rate": 4.185151515151515e-06, "loss": 6.431817626953125, "step": 17135 }, { "epoch": 0.1714, "grad_norm": 13.742109298706055, "learning_rate": 4.18489898989899e-06, "loss": 6.399580001831055, "step": 17140 }, { "epoch": 0.17145, "grad_norm": 6.071108818054199, "learning_rate": 4.184646464646465e-06, "loss": 6.417243957519531, "step": 17145 }, { "epoch": 0.1715, "grad_norm": 5.235020160675049, "learning_rate": 4.18439393939394e-06, "loss": 6.481988525390625, "step": 17150 }, { "epoch": 0.17155, "grad_norm": 5.20255184173584, "learning_rate": 4.184141414141415e-06, "loss": 6.4199981689453125, "step": 17155 }, { "epoch": 0.1716, "grad_norm": 6.821967124938965, "learning_rate": 4.183888888888889e-06, "loss": 6.417259216308594, "step": 17160 }, { "epoch": 0.17165, "grad_norm": 7.748119831085205, "learning_rate": 4.183636363636364e-06, "loss": 6.447384643554687, "step": 17165 }, { "epoch": 0.1717, "grad_norm": 5.931854248046875, "learning_rate": 4.1833838383838386e-06, "loss": 6.413048553466797, "step": 17170 }, { "epoch": 0.17175, "grad_norm": 8.45263385772705, "learning_rate": 4.183131313131313e-06, "loss": 6.531688690185547, "step": 17175 }, { "epoch": 0.1718, "grad_norm": 5.919859886169434, "learning_rate": 4.182878787878788e-06, "loss": 6.547940063476562, "step": 17180 }, { "epoch": 0.17185, "grad_norm": 6.635117530822754, "learning_rate": 4.182626262626263e-06, "loss": 6.45544662475586, "step": 17185 }, { "epoch": 0.1719, "grad_norm": 18.976661682128906, "learning_rate": 4.182373737373738e-06, "loss": 6.5513458251953125, "step": 17190 }, { "epoch": 0.17195, "grad_norm": 7.768197536468506, "learning_rate": 4.1821212121212126e-06, "loss": 6.418101501464844, "step": 17195 }, { "epoch": 0.172, "grad_norm": 6.595417022705078, "learning_rate": 4.181868686868687e-06, "loss": 6.416110992431641, "step": 17200 }, { "epoch": 0.17205, "grad_norm": 6.638600826263428, "learning_rate": 4.181616161616162e-06, "loss": 6.491832733154297, "step": 17205 }, { "epoch": 0.1721, "grad_norm": 7.196567058563232, "learning_rate": 4.1813636363636364e-06, "loss": 6.394417190551758, "step": 17210 }, { "epoch": 0.17215, "grad_norm": 6.881023406982422, "learning_rate": 4.181111111111111e-06, "loss": 6.457846069335938, "step": 17215 }, { "epoch": 0.1722, "grad_norm": 7.562849998474121, "learning_rate": 4.180858585858586e-06, "loss": 6.410661315917968, "step": 17220 }, { "epoch": 0.17225, "grad_norm": 6.282824516296387, "learning_rate": 4.180606060606061e-06, "loss": 6.466946411132812, "step": 17225 }, { "epoch": 0.1723, "grad_norm": 6.726131916046143, "learning_rate": 4.180353535353536e-06, "loss": 6.439227294921875, "step": 17230 }, { "epoch": 0.17235, "grad_norm": 4.565032482147217, "learning_rate": 4.1801010101010104e-06, "loss": 6.402475738525391, "step": 17235 }, { "epoch": 0.1724, "grad_norm": 6.987288475036621, "learning_rate": 4.179848484848485e-06, "loss": 6.500509643554688, "step": 17240 }, { "epoch": 0.17245, "grad_norm": 13.380884170532227, "learning_rate": 4.17959595959596e-06, "loss": 6.488036346435547, "step": 17245 }, { "epoch": 0.1725, "grad_norm": 11.736968040466309, "learning_rate": 4.179343434343434e-06, "loss": 6.508195495605468, "step": 17250 }, { "epoch": 0.17255, "grad_norm": 6.783361911773682, "learning_rate": 4.179090909090909e-06, "loss": 6.436824035644531, "step": 17255 }, { "epoch": 0.1726, "grad_norm": 6.1072258949279785, "learning_rate": 4.178838383838384e-06, "loss": 6.416410827636719, "step": 17260 }, { "epoch": 0.17265, "grad_norm": 5.337968826293945, "learning_rate": 4.178585858585859e-06, "loss": 6.511647033691406, "step": 17265 }, { "epoch": 0.1727, "grad_norm": 5.16908073425293, "learning_rate": 4.178333333333334e-06, "loss": 6.449606323242188, "step": 17270 }, { "epoch": 0.17275, "grad_norm": 6.056143283843994, "learning_rate": 4.178080808080808e-06, "loss": 6.4072021484375, "step": 17275 }, { "epoch": 0.1728, "grad_norm": 10.612218856811523, "learning_rate": 4.177828282828283e-06, "loss": 6.558159637451172, "step": 17280 }, { "epoch": 0.17285, "grad_norm": 6.515764236450195, "learning_rate": 4.1775757575757584e-06, "loss": 6.424083709716797, "step": 17285 }, { "epoch": 0.1729, "grad_norm": 6.095462322235107, "learning_rate": 4.177323232323233e-06, "loss": 6.457191467285156, "step": 17290 }, { "epoch": 0.17295, "grad_norm": 4.993344783782959, "learning_rate": 4.177070707070707e-06, "loss": 6.403738403320313, "step": 17295 }, { "epoch": 0.173, "grad_norm": 5.78537130355835, "learning_rate": 4.1768181818181815e-06, "loss": 6.4168548583984375, "step": 17300 }, { "epoch": 0.17305, "grad_norm": 6.331363677978516, "learning_rate": 4.176565656565657e-06, "loss": 6.429273223876953, "step": 17305 }, { "epoch": 0.1731, "grad_norm": 5.943604469299316, "learning_rate": 4.176313131313132e-06, "loss": 6.447319030761719, "step": 17310 }, { "epoch": 0.17315, "grad_norm": 4.52032995223999, "learning_rate": 4.176060606060606e-06, "loss": 6.385718917846679, "step": 17315 }, { "epoch": 0.1732, "grad_norm": 7.164947032928467, "learning_rate": 4.175808080808082e-06, "loss": 6.49749984741211, "step": 17320 }, { "epoch": 0.17325, "grad_norm": 6.533723831176758, "learning_rate": 4.175555555555556e-06, "loss": 6.4166206359863285, "step": 17325 }, { "epoch": 0.1733, "grad_norm": 5.742626667022705, "learning_rate": 4.175303030303031e-06, "loss": 6.401493072509766, "step": 17330 }, { "epoch": 0.17335, "grad_norm": 6.5434417724609375, "learning_rate": 4.175050505050506e-06, "loss": 6.40130386352539, "step": 17335 }, { "epoch": 0.1734, "grad_norm": 6.1866936683654785, "learning_rate": 4.17479797979798e-06, "loss": 6.423633575439453, "step": 17340 }, { "epoch": 0.17345, "grad_norm": 6.386356830596924, "learning_rate": 4.174545454545455e-06, "loss": 6.401957702636719, "step": 17345 }, { "epoch": 0.1735, "grad_norm": 7.245843887329102, "learning_rate": 4.1742929292929295e-06, "loss": 6.442058563232422, "step": 17350 }, { "epoch": 0.17355, "grad_norm": 5.0525126457214355, "learning_rate": 4.174040404040404e-06, "loss": 6.441812896728516, "step": 17355 }, { "epoch": 0.1736, "grad_norm": 7.086814880371094, "learning_rate": 4.17378787878788e-06, "loss": 6.414032745361328, "step": 17360 }, { "epoch": 0.17365, "grad_norm": 5.409482479095459, "learning_rate": 4.173535353535354e-06, "loss": 6.429686737060547, "step": 17365 }, { "epoch": 0.1737, "grad_norm": 5.636053085327148, "learning_rate": 4.173282828282829e-06, "loss": 6.454517364501953, "step": 17370 }, { "epoch": 0.17375, "grad_norm": 5.619957447052002, "learning_rate": 4.1730303030303035e-06, "loss": 6.400043487548828, "step": 17375 }, { "epoch": 0.1738, "grad_norm": 4.436914920806885, "learning_rate": 4.172777777777778e-06, "loss": 6.433793640136718, "step": 17380 }, { "epoch": 0.17385, "grad_norm": 5.435208797454834, "learning_rate": 4.172525252525253e-06, "loss": 6.4859870910644535, "step": 17385 }, { "epoch": 0.1739, "grad_norm": 7.154338836669922, "learning_rate": 4.172272727272727e-06, "loss": 6.390882110595703, "step": 17390 }, { "epoch": 0.17395, "grad_norm": 5.2114715576171875, "learning_rate": 4.172020202020202e-06, "loss": 6.412842559814453, "step": 17395 }, { "epoch": 0.174, "grad_norm": 5.873260974884033, "learning_rate": 4.1717676767676775e-06, "loss": 6.464102935791016, "step": 17400 }, { "epoch": 0.17405, "grad_norm": 5.772125244140625, "learning_rate": 4.171515151515152e-06, "loss": 6.401849365234375, "step": 17405 }, { "epoch": 0.1741, "grad_norm": 4.968996524810791, "learning_rate": 4.171262626262627e-06, "loss": 6.4329475402832035, "step": 17410 }, { "epoch": 0.17415, "grad_norm": 6.859051704406738, "learning_rate": 4.171010101010101e-06, "loss": 6.390068817138672, "step": 17415 }, { "epoch": 0.1742, "grad_norm": 6.093839645385742, "learning_rate": 4.170757575757576e-06, "loss": 6.40628662109375, "step": 17420 }, { "epoch": 0.17425, "grad_norm": 5.708087921142578, "learning_rate": 4.170505050505051e-06, "loss": 6.3928993225097654, "step": 17425 }, { "epoch": 0.1743, "grad_norm": 6.439298629760742, "learning_rate": 4.170252525252525e-06, "loss": 6.50152587890625, "step": 17430 }, { "epoch": 0.17435, "grad_norm": 6.141254425048828, "learning_rate": 4.17e-06, "loss": 6.488623809814453, "step": 17435 }, { "epoch": 0.1744, "grad_norm": 5.671915531158447, "learning_rate": 4.169747474747475e-06, "loss": 6.437236785888672, "step": 17440 }, { "epoch": 0.17445, "grad_norm": 5.198111534118652, "learning_rate": 4.16949494949495e-06, "loss": 6.421900939941406, "step": 17445 }, { "epoch": 0.1745, "grad_norm": 6.319764614105225, "learning_rate": 4.169242424242425e-06, "loss": 6.422528076171875, "step": 17450 }, { "epoch": 0.17455, "grad_norm": 4.992312908172607, "learning_rate": 4.168989898989899e-06, "loss": 6.409911346435547, "step": 17455 }, { "epoch": 0.1746, "grad_norm": 6.413256645202637, "learning_rate": 4.168737373737374e-06, "loss": 6.450712585449219, "step": 17460 }, { "epoch": 0.17465, "grad_norm": 4.193538665771484, "learning_rate": 4.1684848484848485e-06, "loss": 6.422123718261719, "step": 17465 }, { "epoch": 0.1747, "grad_norm": 5.913649559020996, "learning_rate": 4.168232323232323e-06, "loss": 6.427279663085938, "step": 17470 }, { "epoch": 0.17475, "grad_norm": 3.7433533668518066, "learning_rate": 4.167979797979798e-06, "loss": 6.5682518005371096, "step": 17475 }, { "epoch": 0.1748, "grad_norm": 3.9158248901367188, "learning_rate": 4.167727272727273e-06, "loss": 6.439411163330078, "step": 17480 }, { "epoch": 0.17485, "grad_norm": 7.356685638427734, "learning_rate": 4.167474747474748e-06, "loss": 6.39733772277832, "step": 17485 }, { "epoch": 0.1749, "grad_norm": 5.304576873779297, "learning_rate": 4.1672222222222225e-06, "loss": 6.466148376464844, "step": 17490 }, { "epoch": 0.17495, "grad_norm": 6.207536220550537, "learning_rate": 4.166969696969697e-06, "loss": 6.409827423095703, "step": 17495 }, { "epoch": 0.175, "grad_norm": 6.6697235107421875, "learning_rate": 4.166717171717173e-06, "loss": 6.3732177734375, "step": 17500 }, { "epoch": 0.17505, "grad_norm": 4.117204189300537, "learning_rate": 4.166464646464647e-06, "loss": 6.396261596679688, "step": 17505 }, { "epoch": 0.1751, "grad_norm": 4.056059837341309, "learning_rate": 4.166212121212122e-06, "loss": 6.423698425292969, "step": 17510 }, { "epoch": 0.17515, "grad_norm": 5.56590461730957, "learning_rate": 4.1659595959595965e-06, "loss": 6.443589782714843, "step": 17515 }, { "epoch": 0.1752, "grad_norm": 4.619938373565674, "learning_rate": 4.165707070707071e-06, "loss": 6.428021240234375, "step": 17520 }, { "epoch": 0.17525, "grad_norm": 4.585859298706055, "learning_rate": 4.165454545454546e-06, "loss": 6.433175659179687, "step": 17525 }, { "epoch": 0.1753, "grad_norm": 6.342316150665283, "learning_rate": 4.16520202020202e-06, "loss": 6.424404144287109, "step": 17530 }, { "epoch": 0.17535, "grad_norm": 7.624713897705078, "learning_rate": 4.164949494949495e-06, "loss": 6.395957565307617, "step": 17535 }, { "epoch": 0.1754, "grad_norm": 7.729001522064209, "learning_rate": 4.1646969696969705e-06, "loss": 6.436745452880859, "step": 17540 }, { "epoch": 0.17545, "grad_norm": 4.1594414710998535, "learning_rate": 4.164444444444445e-06, "loss": 6.450293731689453, "step": 17545 }, { "epoch": 0.1755, "grad_norm": 5.528448104858398, "learning_rate": 4.16419191919192e-06, "loss": 6.437165069580078, "step": 17550 }, { "epoch": 0.17555, "grad_norm": 3.597606897354126, "learning_rate": 4.163939393939394e-06, "loss": 6.339387512207031, "step": 17555 }, { "epoch": 0.1756, "grad_norm": 9.42526912689209, "learning_rate": 4.163686868686869e-06, "loss": 6.457988739013672, "step": 17560 }, { "epoch": 0.17565, "grad_norm": 4.962278842926025, "learning_rate": 4.163434343434344e-06, "loss": 6.435968017578125, "step": 17565 }, { "epoch": 0.1757, "grad_norm": 4.133020401000977, "learning_rate": 4.163181818181818e-06, "loss": 6.388762664794922, "step": 17570 }, { "epoch": 0.17575, "grad_norm": 4.525731086730957, "learning_rate": 4.162929292929293e-06, "loss": 6.38193359375, "step": 17575 }, { "epoch": 0.1758, "grad_norm": 4.709101676940918, "learning_rate": 4.162676767676768e-06, "loss": 6.436134338378906, "step": 17580 }, { "epoch": 0.17585, "grad_norm": 5.122340679168701, "learning_rate": 4.162424242424243e-06, "loss": 6.3754535675048825, "step": 17585 }, { "epoch": 0.1759, "grad_norm": 4.543242931365967, "learning_rate": 4.162171717171718e-06, "loss": 6.414791107177734, "step": 17590 }, { "epoch": 0.17595, "grad_norm": 5.000589847564697, "learning_rate": 4.161919191919192e-06, "loss": 6.4275146484375, "step": 17595 }, { "epoch": 0.176, "grad_norm": 4.8625078201293945, "learning_rate": 4.161666666666667e-06, "loss": 6.459661865234375, "step": 17600 }, { "epoch": 0.17605, "grad_norm": 5.643690586090088, "learning_rate": 4.1614141414141415e-06, "loss": 6.381939315795899, "step": 17605 }, { "epoch": 0.1761, "grad_norm": 4.322327613830566, "learning_rate": 4.161161616161616e-06, "loss": 6.1563880920410154, "step": 17610 }, { "epoch": 0.17615, "grad_norm": 4.578833103179932, "learning_rate": 4.160909090909091e-06, "loss": 6.462340545654297, "step": 17615 }, { "epoch": 0.1762, "grad_norm": 3.9061672687530518, "learning_rate": 4.160656565656566e-06, "loss": 6.400978851318359, "step": 17620 }, { "epoch": 0.17625, "grad_norm": 3.8795502185821533, "learning_rate": 4.160404040404041e-06, "loss": 6.417348480224609, "step": 17625 }, { "epoch": 0.1763, "grad_norm": 6.673102855682373, "learning_rate": 4.1601515151515155e-06, "loss": 6.394385528564453, "step": 17630 }, { "epoch": 0.17635, "grad_norm": 4.727807998657227, "learning_rate": 4.15989898989899e-06, "loss": 6.395341110229492, "step": 17635 }, { "epoch": 0.1764, "grad_norm": 5.2912492752075195, "learning_rate": 4.159646464646465e-06, "loss": 6.403656768798828, "step": 17640 }, { "epoch": 0.17645, "grad_norm": 4.197628498077393, "learning_rate": 4.1593939393939394e-06, "loss": 6.338999176025391, "step": 17645 }, { "epoch": 0.1765, "grad_norm": 4.196425437927246, "learning_rate": 4.159141414141414e-06, "loss": 6.409450531005859, "step": 17650 }, { "epoch": 0.17655, "grad_norm": 3.9479475021362305, "learning_rate": 4.158888888888889e-06, "loss": 6.404264068603515, "step": 17655 }, { "epoch": 0.1766, "grad_norm": 3.4079887866973877, "learning_rate": 4.158636363636364e-06, "loss": 6.403955078125, "step": 17660 }, { "epoch": 0.17665, "grad_norm": 4.526871204376221, "learning_rate": 4.158383838383839e-06, "loss": 6.372063827514649, "step": 17665 }, { "epoch": 0.1767, "grad_norm": 8.466011047363281, "learning_rate": 4.1581313131313134e-06, "loss": 6.402127075195312, "step": 17670 }, { "epoch": 0.17675, "grad_norm": 4.089807033538818, "learning_rate": 4.157878787878788e-06, "loss": 6.429524230957031, "step": 17675 }, { "epoch": 0.1768, "grad_norm": 7.356545925140381, "learning_rate": 4.1576262626262635e-06, "loss": 6.45697021484375, "step": 17680 }, { "epoch": 0.17685, "grad_norm": 3.0544562339782715, "learning_rate": 4.157373737373737e-06, "loss": 6.424396514892578, "step": 17685 }, { "epoch": 0.1769, "grad_norm": 6.849983215332031, "learning_rate": 4.157121212121212e-06, "loss": 6.4557243347167965, "step": 17690 }, { "epoch": 0.17695, "grad_norm": 4.952725410461426, "learning_rate": 4.156868686868687e-06, "loss": 6.38331298828125, "step": 17695 }, { "epoch": 0.177, "grad_norm": 2.5618324279785156, "learning_rate": 4.156616161616162e-06, "loss": 6.606365203857422, "step": 17700 }, { "epoch": 0.17705, "grad_norm": 4.756274223327637, "learning_rate": 4.156363636363637e-06, "loss": 6.3896636962890625, "step": 17705 }, { "epoch": 0.1771, "grad_norm": 4.932427406311035, "learning_rate": 4.156111111111111e-06, "loss": 6.385951232910156, "step": 17710 }, { "epoch": 0.17715, "grad_norm": 5.095156192779541, "learning_rate": 4.155858585858586e-06, "loss": 6.41995849609375, "step": 17715 }, { "epoch": 0.1772, "grad_norm": 5.027000427246094, "learning_rate": 4.1556060606060614e-06, "loss": 6.471839141845703, "step": 17720 }, { "epoch": 0.17725, "grad_norm": 3.7389347553253174, "learning_rate": 4.155353535353536e-06, "loss": 6.432771301269531, "step": 17725 }, { "epoch": 0.1773, "grad_norm": 4.164002418518066, "learning_rate": 4.155101010101011e-06, "loss": 6.426138305664063, "step": 17730 }, { "epoch": 0.17735, "grad_norm": 4.007206916809082, "learning_rate": 4.154848484848485e-06, "loss": 6.403254699707031, "step": 17735 }, { "epoch": 0.1774, "grad_norm": 2.2781336307525635, "learning_rate": 4.15459595959596e-06, "loss": 6.410115051269531, "step": 17740 }, { "epoch": 0.17745, "grad_norm": 4.687213897705078, "learning_rate": 4.1543434343434346e-06, "loss": 6.443010711669922, "step": 17745 }, { "epoch": 0.1775, "grad_norm": 16.690937042236328, "learning_rate": 4.154090909090909e-06, "loss": 6.448246765136719, "step": 17750 }, { "epoch": 0.17755, "grad_norm": 5.532197952270508, "learning_rate": 4.153838383838385e-06, "loss": 6.457949829101563, "step": 17755 }, { "epoch": 0.1776, "grad_norm": 7.8364152908325195, "learning_rate": 4.153585858585859e-06, "loss": 6.415885925292969, "step": 17760 }, { "epoch": 0.17765, "grad_norm": 3.5128841400146484, "learning_rate": 4.153333333333334e-06, "loss": 6.405172729492188, "step": 17765 }, { "epoch": 0.1777, "grad_norm": 2.819309949874878, "learning_rate": 4.1530808080808086e-06, "loss": 6.379837417602539, "step": 17770 }, { "epoch": 0.17775, "grad_norm": 6.065360069274902, "learning_rate": 4.152828282828283e-06, "loss": 6.320343399047852, "step": 17775 }, { "epoch": 0.1778, "grad_norm": 4.882674217224121, "learning_rate": 4.152575757575758e-06, "loss": 6.4123687744140625, "step": 17780 }, { "epoch": 0.17785, "grad_norm": 5.380648612976074, "learning_rate": 4.1523232323232325e-06, "loss": 6.4424491882324215, "step": 17785 }, { "epoch": 0.1779, "grad_norm": 4.1288604736328125, "learning_rate": 4.152070707070707e-06, "loss": 6.349679183959961, "step": 17790 }, { "epoch": 0.17795, "grad_norm": 5.336260795593262, "learning_rate": 4.1518181818181826e-06, "loss": 6.407053375244141, "step": 17795 }, { "epoch": 0.178, "grad_norm": 3.549156427383423, "learning_rate": 4.151565656565657e-06, "loss": 6.339466857910156, "step": 17800 }, { "epoch": 0.17805, "grad_norm": 5.420167446136475, "learning_rate": 4.151313131313132e-06, "loss": 6.394767761230469, "step": 17805 }, { "epoch": 0.1781, "grad_norm": 2.4497735500335693, "learning_rate": 4.1510606060606065e-06, "loss": 6.481326293945313, "step": 17810 }, { "epoch": 0.17815, "grad_norm": 4.207220077514648, "learning_rate": 4.150808080808081e-06, "loss": 6.399176025390625, "step": 17815 }, { "epoch": 0.1782, "grad_norm": 2.7428503036499023, "learning_rate": 4.150555555555556e-06, "loss": 6.422046661376953, "step": 17820 }, { "epoch": 0.17825, "grad_norm": 4.042768478393555, "learning_rate": 4.15030303030303e-06, "loss": 6.360757827758789, "step": 17825 }, { "epoch": 0.1783, "grad_norm": 14.64224910736084, "learning_rate": 4.150050505050505e-06, "loss": 6.507855224609375, "step": 17830 }, { "epoch": 0.17835, "grad_norm": 4.005972385406494, "learning_rate": 4.1497979797979805e-06, "loss": 6.431740570068359, "step": 17835 }, { "epoch": 0.1784, "grad_norm": 3.6754980087280273, "learning_rate": 4.149545454545455e-06, "loss": 6.374139022827149, "step": 17840 }, { "epoch": 0.17845, "grad_norm": 11.627903938293457, "learning_rate": 4.14929292929293e-06, "loss": 6.428823089599609, "step": 17845 }, { "epoch": 0.1785, "grad_norm": 4.835561752319336, "learning_rate": 4.149040404040404e-06, "loss": 6.341254806518554, "step": 17850 }, { "epoch": 0.17855, "grad_norm": 3.635707378387451, "learning_rate": 4.148787878787879e-06, "loss": 6.385338211059571, "step": 17855 }, { "epoch": 0.1786, "grad_norm": 3.0221989154815674, "learning_rate": 4.148535353535354e-06, "loss": 6.421871185302734, "step": 17860 }, { "epoch": 0.17865, "grad_norm": 3.353701114654541, "learning_rate": 4.148282828282828e-06, "loss": 6.373555755615234, "step": 17865 }, { "epoch": 0.1787, "grad_norm": 3.926018476486206, "learning_rate": 4.148030303030303e-06, "loss": 6.426369476318359, "step": 17870 }, { "epoch": 0.17875, "grad_norm": 5.145357131958008, "learning_rate": 4.147777777777778e-06, "loss": 6.470442199707032, "step": 17875 }, { "epoch": 0.1788, "grad_norm": 4.2623701095581055, "learning_rate": 4.147525252525253e-06, "loss": 6.352566528320312, "step": 17880 }, { "epoch": 0.17885, "grad_norm": 6.20504903793335, "learning_rate": 4.147272727272728e-06, "loss": 6.482119750976563, "step": 17885 }, { "epoch": 0.1789, "grad_norm": 4.889270782470703, "learning_rate": 4.147020202020202e-06, "loss": 6.407502746582031, "step": 17890 }, { "epoch": 0.17895, "grad_norm": 2.942037343978882, "learning_rate": 4.146767676767678e-06, "loss": 6.435406494140625, "step": 17895 }, { "epoch": 0.179, "grad_norm": 5.89697790145874, "learning_rate": 4.146515151515152e-06, "loss": 6.395201110839844, "step": 17900 }, { "epoch": 0.17905, "grad_norm": 3.983527898788452, "learning_rate": 4.146262626262626e-06, "loss": 6.332804870605469, "step": 17905 }, { "epoch": 0.1791, "grad_norm": 5.685372829437256, "learning_rate": 4.146010101010101e-06, "loss": 6.366495895385742, "step": 17910 }, { "epoch": 0.17915, "grad_norm": 4.464791297912598, "learning_rate": 4.145757575757576e-06, "loss": 6.437127685546875, "step": 17915 }, { "epoch": 0.1792, "grad_norm": 4.8290510177612305, "learning_rate": 4.145505050505051e-06, "loss": 6.430024719238281, "step": 17920 }, { "epoch": 0.17925, "grad_norm": 5.221956253051758, "learning_rate": 4.1452525252525255e-06, "loss": 6.360372161865234, "step": 17925 }, { "epoch": 0.1793, "grad_norm": 4.1787800788879395, "learning_rate": 4.145e-06, "loss": 6.351602935791016, "step": 17930 }, { "epoch": 0.17935, "grad_norm": 3.660050630569458, "learning_rate": 4.144747474747476e-06, "loss": 6.3813934326171875, "step": 17935 }, { "epoch": 0.1794, "grad_norm": 2.994593381881714, "learning_rate": 4.14449494949495e-06, "loss": 6.450845336914062, "step": 17940 }, { "epoch": 0.17945, "grad_norm": 2.965104818344116, "learning_rate": 4.144242424242425e-06, "loss": 6.471961212158203, "step": 17945 }, { "epoch": 0.1795, "grad_norm": 4.346787929534912, "learning_rate": 4.1439898989898995e-06, "loss": 6.433719635009766, "step": 17950 }, { "epoch": 0.17955, "grad_norm": 4.137913703918457, "learning_rate": 4.143737373737374e-06, "loss": 6.379754638671875, "step": 17955 }, { "epoch": 0.1796, "grad_norm": 4.917163848876953, "learning_rate": 4.143484848484849e-06, "loss": 6.435311889648437, "step": 17960 }, { "epoch": 0.17965, "grad_norm": 8.237060546875, "learning_rate": 4.143232323232323e-06, "loss": 6.6358283996582035, "step": 17965 }, { "epoch": 0.1797, "grad_norm": 5.986239910125732, "learning_rate": 4.142979797979798e-06, "loss": 6.373171997070313, "step": 17970 }, { "epoch": 0.17975, "grad_norm": 4.570013523101807, "learning_rate": 4.1427272727272735e-06, "loss": 6.4172004699707035, "step": 17975 }, { "epoch": 0.1798, "grad_norm": 2.863872766494751, "learning_rate": 4.142474747474748e-06, "loss": 6.433051300048828, "step": 17980 }, { "epoch": 0.17985, "grad_norm": 5.6194281578063965, "learning_rate": 4.142222222222223e-06, "loss": 6.372331619262695, "step": 17985 }, { "epoch": 0.1799, "grad_norm": 3.641463041305542, "learning_rate": 4.141969696969697e-06, "loss": 6.382128524780273, "step": 17990 }, { "epoch": 0.17995, "grad_norm": 14.63641357421875, "learning_rate": 4.141717171717172e-06, "loss": 6.514051055908203, "step": 17995 }, { "epoch": 0.18, "grad_norm": 3.9042491912841797, "learning_rate": 4.141464646464647e-06, "loss": 6.322743988037109, "step": 18000 }, { "epoch": 0.18005, "grad_norm": 3.8603897094726562, "learning_rate": 4.141212121212121e-06, "loss": 6.385019683837891, "step": 18005 }, { "epoch": 0.1801, "grad_norm": 3.7539544105529785, "learning_rate": 4.140959595959596e-06, "loss": 6.441951751708984, "step": 18010 }, { "epoch": 0.18015, "grad_norm": 7.779005527496338, "learning_rate": 4.140707070707071e-06, "loss": 6.393532562255859, "step": 18015 }, { "epoch": 0.1802, "grad_norm": 4.659090518951416, "learning_rate": 4.140454545454546e-06, "loss": 6.451337432861328, "step": 18020 }, { "epoch": 0.18025, "grad_norm": 2.936098337173462, "learning_rate": 4.140202020202021e-06, "loss": 6.382490539550782, "step": 18025 }, { "epoch": 0.1803, "grad_norm": 5.232431411743164, "learning_rate": 4.139949494949495e-06, "loss": 6.39757194519043, "step": 18030 }, { "epoch": 0.18035, "grad_norm": 4.1672492027282715, "learning_rate": 4.13969696969697e-06, "loss": 6.360111999511719, "step": 18035 }, { "epoch": 0.1804, "grad_norm": 5.584742546081543, "learning_rate": 4.1394444444444445e-06, "loss": 6.398577117919922, "step": 18040 }, { "epoch": 0.18045, "grad_norm": 3.900513172149658, "learning_rate": 4.139191919191919e-06, "loss": 6.414949035644531, "step": 18045 }, { "epoch": 0.1805, "grad_norm": 7.5035271644592285, "learning_rate": 4.138939393939394e-06, "loss": 6.677559661865234, "step": 18050 }, { "epoch": 0.18055, "grad_norm": 5.5633544921875, "learning_rate": 4.138686868686869e-06, "loss": 6.430255126953125, "step": 18055 }, { "epoch": 0.1806, "grad_norm": 6.292913436889648, "learning_rate": 4.138434343434344e-06, "loss": 5.793987655639649, "step": 18060 }, { "epoch": 0.18065, "grad_norm": 4.113793849945068, "learning_rate": 4.1381818181818185e-06, "loss": 6.406172180175782, "step": 18065 }, { "epoch": 0.1807, "grad_norm": 5.962163925170898, "learning_rate": 4.137929292929293e-06, "loss": 6.412632751464844, "step": 18070 }, { "epoch": 0.18075, "grad_norm": 4.996208667755127, "learning_rate": 4.137676767676768e-06, "loss": 6.4496917724609375, "step": 18075 }, { "epoch": 0.1808, "grad_norm": 7.009472370147705, "learning_rate": 4.137424242424242e-06, "loss": 6.3844047546386715, "step": 18080 }, { "epoch": 0.18085, "grad_norm": 9.235272407531738, "learning_rate": 4.137171717171717e-06, "loss": 6.353970336914062, "step": 18085 }, { "epoch": 0.1809, "grad_norm": 5.965597629547119, "learning_rate": 4.136919191919192e-06, "loss": 6.401161956787109, "step": 18090 }, { "epoch": 0.18095, "grad_norm": 3.8810954093933105, "learning_rate": 4.136666666666667e-06, "loss": 6.3313346862792965, "step": 18095 }, { "epoch": 0.181, "grad_norm": 2.873263120651245, "learning_rate": 4.136414141414142e-06, "loss": 6.451673889160157, "step": 18100 }, { "epoch": 0.18105, "grad_norm": 5.597329616546631, "learning_rate": 4.136161616161616e-06, "loss": 6.402189636230469, "step": 18105 }, { "epoch": 0.1811, "grad_norm": 9.492162704467773, "learning_rate": 4.135909090909091e-06, "loss": 6.572526550292968, "step": 18110 }, { "epoch": 0.18115, "grad_norm": 3.0517191886901855, "learning_rate": 4.1356565656565665e-06, "loss": 6.391358184814453, "step": 18115 }, { "epoch": 0.1812, "grad_norm": 6.402818202972412, "learning_rate": 4.135404040404041e-06, "loss": 6.416557312011719, "step": 18120 }, { "epoch": 0.18125, "grad_norm": 4.124929428100586, "learning_rate": 4.135151515151516e-06, "loss": 6.421649169921875, "step": 18125 }, { "epoch": 0.1813, "grad_norm": 4.122948169708252, "learning_rate": 4.1348989898989896e-06, "loss": 6.432498168945313, "step": 18130 }, { "epoch": 0.18135, "grad_norm": 3.5551657676696777, "learning_rate": 4.134646464646465e-06, "loss": 6.381999969482422, "step": 18135 }, { "epoch": 0.1814, "grad_norm": 4.830156326293945, "learning_rate": 4.13439393939394e-06, "loss": 6.463905334472656, "step": 18140 }, { "epoch": 0.18145, "grad_norm": 9.221014976501465, "learning_rate": 4.134141414141414e-06, "loss": 6.3699089050292965, "step": 18145 }, { "epoch": 0.1815, "grad_norm": 4.070011615753174, "learning_rate": 4.133888888888889e-06, "loss": 6.357383346557617, "step": 18150 }, { "epoch": 0.18155, "grad_norm": 4.126086711883545, "learning_rate": 4.133636363636364e-06, "loss": 6.4556221008300785, "step": 18155 }, { "epoch": 0.1816, "grad_norm": 14.883360862731934, "learning_rate": 4.133383838383839e-06, "loss": 6.45667724609375, "step": 18160 }, { "epoch": 0.18165, "grad_norm": 4.866841793060303, "learning_rate": 4.133131313131314e-06, "loss": 6.397457122802734, "step": 18165 }, { "epoch": 0.1817, "grad_norm": 3.7541723251342773, "learning_rate": 4.132878787878788e-06, "loss": 6.392228698730468, "step": 18170 }, { "epoch": 0.18175, "grad_norm": 5.025020122528076, "learning_rate": 4.132626262626263e-06, "loss": 6.437315368652344, "step": 18175 }, { "epoch": 0.1818, "grad_norm": 10.332810401916504, "learning_rate": 4.1323737373737376e-06, "loss": 6.466764068603515, "step": 18180 }, { "epoch": 0.18185, "grad_norm": 3.210064172744751, "learning_rate": 4.132121212121212e-06, "loss": 6.380583572387695, "step": 18185 }, { "epoch": 0.1819, "grad_norm": 3.3123245239257812, "learning_rate": 4.131868686868688e-06, "loss": 6.458525848388672, "step": 18190 }, { "epoch": 0.18195, "grad_norm": 3.816985607147217, "learning_rate": 4.131616161616162e-06, "loss": 6.392555618286133, "step": 18195 }, { "epoch": 0.182, "grad_norm": 3.551572799682617, "learning_rate": 4.131363636363637e-06, "loss": 6.426686859130859, "step": 18200 }, { "epoch": 0.18205, "grad_norm": 7.632850170135498, "learning_rate": 4.1311111111111116e-06, "loss": 6.3911277770996096, "step": 18205 }, { "epoch": 0.1821, "grad_norm": 9.603696823120117, "learning_rate": 4.130858585858586e-06, "loss": 6.459105682373047, "step": 18210 }, { "epoch": 0.18215, "grad_norm": 3.142162561416626, "learning_rate": 4.130606060606061e-06, "loss": 6.359768676757812, "step": 18215 }, { "epoch": 0.1822, "grad_norm": 3.639529228210449, "learning_rate": 4.1303535353535354e-06, "loss": 6.386132431030274, "step": 18220 }, { "epoch": 0.18225, "grad_norm": 2.928650140762329, "learning_rate": 4.13010101010101e-06, "loss": 6.441036987304687, "step": 18225 }, { "epoch": 0.1823, "grad_norm": 2.8675637245178223, "learning_rate": 4.1298484848484856e-06, "loss": 6.380276489257812, "step": 18230 }, { "epoch": 0.18235, "grad_norm": 5.014830589294434, "learning_rate": 4.12959595959596e-06, "loss": 6.442393493652344, "step": 18235 }, { "epoch": 0.1824, "grad_norm": 4.158809661865234, "learning_rate": 4.129343434343435e-06, "loss": 6.402660369873047, "step": 18240 }, { "epoch": 0.18245, "grad_norm": 3.887037992477417, "learning_rate": 4.1290909090909094e-06, "loss": 6.349750900268555, "step": 18245 }, { "epoch": 0.1825, "grad_norm": 5.277118682861328, "learning_rate": 4.128838383838384e-06, "loss": 6.3683929443359375, "step": 18250 }, { "epoch": 0.18255, "grad_norm": 4.377228260040283, "learning_rate": 4.128585858585859e-06, "loss": 6.414987945556641, "step": 18255 }, { "epoch": 0.1826, "grad_norm": 3.8347971439361572, "learning_rate": 4.128333333333333e-06, "loss": 6.385688018798828, "step": 18260 }, { "epoch": 0.18265, "grad_norm": 4.1411566734313965, "learning_rate": 4.128080808080808e-06, "loss": 6.424810791015625, "step": 18265 }, { "epoch": 0.1827, "grad_norm": 3.672717332839966, "learning_rate": 4.1278282828282834e-06, "loss": 6.391205596923828, "step": 18270 }, { "epoch": 0.18275, "grad_norm": 4.8362812995910645, "learning_rate": 4.127575757575758e-06, "loss": 6.428981018066406, "step": 18275 }, { "epoch": 0.1828, "grad_norm": 3.996149778366089, "learning_rate": 4.127323232323233e-06, "loss": 6.369114685058594, "step": 18280 }, { "epoch": 0.18285, "grad_norm": 4.488272666931152, "learning_rate": 4.127070707070707e-06, "loss": 6.364942932128907, "step": 18285 }, { "epoch": 0.1829, "grad_norm": 3.9969406127929688, "learning_rate": 4.126818181818183e-06, "loss": 6.367502212524414, "step": 18290 }, { "epoch": 0.18295, "grad_norm": 3.2120578289031982, "learning_rate": 4.126565656565657e-06, "loss": 6.390921401977539, "step": 18295 }, { "epoch": 0.183, "grad_norm": 5.215273380279541, "learning_rate": 4.126313131313131e-06, "loss": 6.375188827514648, "step": 18300 }, { "epoch": 0.18305, "grad_norm": 4.307608604431152, "learning_rate": 4.126060606060606e-06, "loss": 6.362895584106445, "step": 18305 }, { "epoch": 0.1831, "grad_norm": 4.531371593475342, "learning_rate": 4.125808080808081e-06, "loss": 6.3978126525878904, "step": 18310 }, { "epoch": 0.18315, "grad_norm": 3.7404820919036865, "learning_rate": 4.125555555555556e-06, "loss": 6.41856689453125, "step": 18315 }, { "epoch": 0.1832, "grad_norm": 2.3453781604766846, "learning_rate": 4.125303030303031e-06, "loss": 6.432758331298828, "step": 18320 }, { "epoch": 0.18325, "grad_norm": 4.288193225860596, "learning_rate": 4.125050505050505e-06, "loss": 6.414971160888672, "step": 18325 }, { "epoch": 0.1833, "grad_norm": 4.88767147064209, "learning_rate": 4.124797979797981e-06, "loss": 6.419315338134766, "step": 18330 }, { "epoch": 0.18335, "grad_norm": 3.612919569015503, "learning_rate": 4.124545454545455e-06, "loss": 6.398788070678711, "step": 18335 }, { "epoch": 0.1834, "grad_norm": 3.5745019912719727, "learning_rate": 4.12429292929293e-06, "loss": 6.34692153930664, "step": 18340 }, { "epoch": 0.18345, "grad_norm": 13.459613800048828, "learning_rate": 4.124040404040405e-06, "loss": 6.464740753173828, "step": 18345 }, { "epoch": 0.1835, "grad_norm": 4.3153204917907715, "learning_rate": 4.123787878787879e-06, "loss": 6.411036682128906, "step": 18350 }, { "epoch": 0.18355, "grad_norm": 5.809116840362549, "learning_rate": 4.123535353535354e-06, "loss": 6.341804122924804, "step": 18355 }, { "epoch": 0.1836, "grad_norm": 3.907810688018799, "learning_rate": 4.1232828282828285e-06, "loss": 6.411479949951172, "step": 18360 }, { "epoch": 0.18365, "grad_norm": 3.241471290588379, "learning_rate": 4.123030303030303e-06, "loss": 6.508728790283203, "step": 18365 }, { "epoch": 0.1837, "grad_norm": 2.958793878555298, "learning_rate": 4.122777777777779e-06, "loss": 6.3868865966796875, "step": 18370 }, { "epoch": 0.18375, "grad_norm": 2.37656569480896, "learning_rate": 4.122525252525253e-06, "loss": 6.397617340087891, "step": 18375 }, { "epoch": 0.1838, "grad_norm": 4.623712539672852, "learning_rate": 4.122272727272728e-06, "loss": 6.414192962646484, "step": 18380 }, { "epoch": 0.18385, "grad_norm": 5.8188676834106445, "learning_rate": 4.1220202020202025e-06, "loss": 6.359071350097656, "step": 18385 }, { "epoch": 0.1839, "grad_norm": 4.843662261962891, "learning_rate": 4.121767676767677e-06, "loss": 6.4203239440917965, "step": 18390 }, { "epoch": 0.18395, "grad_norm": 3.507455587387085, "learning_rate": 4.121515151515152e-06, "loss": 6.380838012695312, "step": 18395 }, { "epoch": 0.184, "grad_norm": 5.842238426208496, "learning_rate": 4.121262626262626e-06, "loss": 6.428423309326172, "step": 18400 }, { "epoch": 0.18405, "grad_norm": 8.048908233642578, "learning_rate": 4.121010101010101e-06, "loss": 6.384454345703125, "step": 18405 }, { "epoch": 0.1841, "grad_norm": 3.6490299701690674, "learning_rate": 4.1207575757575765e-06, "loss": 6.443302154541016, "step": 18410 }, { "epoch": 0.18415, "grad_norm": 2.7851459980010986, "learning_rate": 4.120505050505051e-06, "loss": 6.399865341186524, "step": 18415 }, { "epoch": 0.1842, "grad_norm": 3.903510808944702, "learning_rate": 4.120252525252526e-06, "loss": 6.385411071777344, "step": 18420 }, { "epoch": 0.18425, "grad_norm": 4.104851245880127, "learning_rate": 4.12e-06, "loss": 6.391850280761719, "step": 18425 }, { "epoch": 0.1843, "grad_norm": 3.4672253131866455, "learning_rate": 4.119747474747475e-06, "loss": 6.449886322021484, "step": 18430 }, { "epoch": 0.18435, "grad_norm": 4.13779354095459, "learning_rate": 4.11949494949495e-06, "loss": 6.375531768798828, "step": 18435 }, { "epoch": 0.1844, "grad_norm": 3.1252825260162354, "learning_rate": 4.119242424242424e-06, "loss": 6.397152328491211, "step": 18440 }, { "epoch": 0.18445, "grad_norm": 3.66825270652771, "learning_rate": 4.118989898989899e-06, "loss": 6.451752471923828, "step": 18445 }, { "epoch": 0.1845, "grad_norm": 2.2796757221221924, "learning_rate": 4.118737373737374e-06, "loss": 6.444062042236328, "step": 18450 }, { "epoch": 0.18455, "grad_norm": 3.508664608001709, "learning_rate": 4.118484848484849e-06, "loss": 6.3871711730957035, "step": 18455 }, { "epoch": 0.1846, "grad_norm": 5.344874858856201, "learning_rate": 4.118232323232324e-06, "loss": 6.386994552612305, "step": 18460 }, { "epoch": 0.18465, "grad_norm": 4.438442230224609, "learning_rate": 4.117979797979798e-06, "loss": 6.411752319335937, "step": 18465 }, { "epoch": 0.1847, "grad_norm": 3.4890995025634766, "learning_rate": 4.117727272727273e-06, "loss": 6.397238540649414, "step": 18470 }, { "epoch": 0.18475, "grad_norm": 7.6365861892700195, "learning_rate": 4.1174747474747475e-06, "loss": 6.349295425415039, "step": 18475 }, { "epoch": 0.1848, "grad_norm": 4.482211589813232, "learning_rate": 4.117222222222222e-06, "loss": 6.362925720214844, "step": 18480 }, { "epoch": 0.18485, "grad_norm": 8.941511154174805, "learning_rate": 4.116969696969697e-06, "loss": 6.468252563476563, "step": 18485 }, { "epoch": 0.1849, "grad_norm": 3.3515055179595947, "learning_rate": 4.116717171717172e-06, "loss": 6.37664909362793, "step": 18490 }, { "epoch": 0.18495, "grad_norm": 4.280879497528076, "learning_rate": 4.116464646464647e-06, "loss": 6.35040168762207, "step": 18495 }, { "epoch": 0.185, "grad_norm": 3.7930750846862793, "learning_rate": 4.1162121212121215e-06, "loss": 6.380929183959961, "step": 18500 }, { "epoch": 0.18505, "grad_norm": 2.7962775230407715, "learning_rate": 4.115959595959596e-06, "loss": 6.347495651245117, "step": 18505 }, { "epoch": 0.1851, "grad_norm": 5.352860927581787, "learning_rate": 4.115707070707072e-06, "loss": 6.351335144042968, "step": 18510 }, { "epoch": 0.18515, "grad_norm": 8.67905330657959, "learning_rate": 4.115454545454545e-06, "loss": 6.445870971679687, "step": 18515 }, { "epoch": 0.1852, "grad_norm": 4.409884452819824, "learning_rate": 4.11520202020202e-06, "loss": 6.387173461914062, "step": 18520 }, { "epoch": 0.18525, "grad_norm": 4.126728057861328, "learning_rate": 4.114949494949495e-06, "loss": 6.353235244750977, "step": 18525 }, { "epoch": 0.1853, "grad_norm": 6.891617298126221, "learning_rate": 4.11469696969697e-06, "loss": 6.431670379638672, "step": 18530 }, { "epoch": 0.18535, "grad_norm": 4.330852031707764, "learning_rate": 4.114444444444445e-06, "loss": 6.46275634765625, "step": 18535 }, { "epoch": 0.1854, "grad_norm": 7.346864700317383, "learning_rate": 4.114191919191919e-06, "loss": 6.380028915405274, "step": 18540 }, { "epoch": 0.18545, "grad_norm": 4.961719036102295, "learning_rate": 4.113939393939394e-06, "loss": 6.346750640869141, "step": 18545 }, { "epoch": 0.1855, "grad_norm": 4.159801959991455, "learning_rate": 4.1136868686868695e-06, "loss": 6.4321342468261715, "step": 18550 }, { "epoch": 0.18555, "grad_norm": 4.6045379638671875, "learning_rate": 4.113434343434344e-06, "loss": 6.415462493896484, "step": 18555 }, { "epoch": 0.1856, "grad_norm": 6.012345790863037, "learning_rate": 4.113181818181819e-06, "loss": 6.40484619140625, "step": 18560 }, { "epoch": 0.18565, "grad_norm": 3.395472764968872, "learning_rate": 4.112929292929293e-06, "loss": 6.341675567626953, "step": 18565 }, { "epoch": 0.1857, "grad_norm": 8.381654739379883, "learning_rate": 4.112676767676768e-06, "loss": 6.584989929199219, "step": 18570 }, { "epoch": 0.18575, "grad_norm": 6.1465535163879395, "learning_rate": 4.112424242424243e-06, "loss": 6.356340408325195, "step": 18575 }, { "epoch": 0.1858, "grad_norm": 3.254153251647949, "learning_rate": 4.112171717171717e-06, "loss": 6.3848419189453125, "step": 18580 }, { "epoch": 0.18585, "grad_norm": 3.3442800045013428, "learning_rate": 4.111919191919193e-06, "loss": 6.450904846191406, "step": 18585 }, { "epoch": 0.1859, "grad_norm": 31.767776489257812, "learning_rate": 4.111666666666667e-06, "loss": 6.32515869140625, "step": 18590 }, { "epoch": 0.18595, "grad_norm": 4.978888988494873, "learning_rate": 4.111414141414142e-06, "loss": 6.362800598144531, "step": 18595 }, { "epoch": 0.186, "grad_norm": 5.091334819793701, "learning_rate": 4.111161616161617e-06, "loss": 6.442819976806641, "step": 18600 }, { "epoch": 0.18605, "grad_norm": 5.625749588012695, "learning_rate": 4.110909090909091e-06, "loss": 6.350368881225586, "step": 18605 }, { "epoch": 0.1861, "grad_norm": 4.97780704498291, "learning_rate": 4.110656565656566e-06, "loss": 6.453205871582031, "step": 18610 }, { "epoch": 0.18615, "grad_norm": 4.993187427520752, "learning_rate": 4.1104040404040405e-06, "loss": 6.42978515625, "step": 18615 }, { "epoch": 0.1862, "grad_norm": 7.1760053634643555, "learning_rate": 4.110151515151515e-06, "loss": 6.408012390136719, "step": 18620 }, { "epoch": 0.18625, "grad_norm": 5.640987396240234, "learning_rate": 4.109898989898991e-06, "loss": 6.351853561401367, "step": 18625 }, { "epoch": 0.1863, "grad_norm": 4.701223850250244, "learning_rate": 4.109646464646465e-06, "loss": 6.426744079589843, "step": 18630 }, { "epoch": 0.18635, "grad_norm": 4.341739177703857, "learning_rate": 4.10939393939394e-06, "loss": 6.452140808105469, "step": 18635 }, { "epoch": 0.1864, "grad_norm": 4.185910701751709, "learning_rate": 4.1091414141414145e-06, "loss": 6.388509368896484, "step": 18640 }, { "epoch": 0.18645, "grad_norm": 2.5337862968444824, "learning_rate": 4.108888888888889e-06, "loss": 6.33899154663086, "step": 18645 }, { "epoch": 0.1865, "grad_norm": 5.432725429534912, "learning_rate": 4.108636363636364e-06, "loss": 6.344834899902343, "step": 18650 }, { "epoch": 0.18655, "grad_norm": 5.37282657623291, "learning_rate": 4.1083838383838384e-06, "loss": 6.3460540771484375, "step": 18655 }, { "epoch": 0.1866, "grad_norm": 4.252388000488281, "learning_rate": 4.108131313131313e-06, "loss": 6.380041885375976, "step": 18660 }, { "epoch": 0.18665, "grad_norm": 6.479921340942383, "learning_rate": 4.1078787878787885e-06, "loss": 6.358160018920898, "step": 18665 }, { "epoch": 0.1867, "grad_norm": 4.933605670928955, "learning_rate": 4.107626262626263e-06, "loss": 6.409915924072266, "step": 18670 }, { "epoch": 0.18675, "grad_norm": 3.1543002128601074, "learning_rate": 4.107373737373738e-06, "loss": 6.370730590820313, "step": 18675 }, { "epoch": 0.1868, "grad_norm": 4.705506324768066, "learning_rate": 4.1071212121212124e-06, "loss": 6.349428939819336, "step": 18680 }, { "epoch": 0.18685, "grad_norm": 7.357447147369385, "learning_rate": 4.106868686868687e-06, "loss": 6.397298431396484, "step": 18685 }, { "epoch": 0.1869, "grad_norm": 3.900815725326538, "learning_rate": 4.106616161616162e-06, "loss": 6.336140823364258, "step": 18690 }, { "epoch": 0.18695, "grad_norm": 3.511648654937744, "learning_rate": 4.106363636363636e-06, "loss": 6.4429267883300785, "step": 18695 }, { "epoch": 0.187, "grad_norm": 9.923333168029785, "learning_rate": 4.106111111111111e-06, "loss": 6.635107421875, "step": 18700 }, { "epoch": 0.18705, "grad_norm": 3.7744333744049072, "learning_rate": 4.1058585858585864e-06, "loss": 6.387216186523437, "step": 18705 }, { "epoch": 0.1871, "grad_norm": 5.8100810050964355, "learning_rate": 4.105606060606061e-06, "loss": 6.396151733398438, "step": 18710 }, { "epoch": 0.18715, "grad_norm": 2.323702573776245, "learning_rate": 4.105353535353536e-06, "loss": 6.377402496337891, "step": 18715 }, { "epoch": 0.1872, "grad_norm": 4.939958572387695, "learning_rate": 4.10510101010101e-06, "loss": 6.386288070678711, "step": 18720 }, { "epoch": 0.18725, "grad_norm": 2.8512496948242188, "learning_rate": 4.104848484848486e-06, "loss": 6.500436401367187, "step": 18725 }, { "epoch": 0.1873, "grad_norm": 6.149589538574219, "learning_rate": 4.1045959595959604e-06, "loss": 6.384429550170898, "step": 18730 }, { "epoch": 0.18735, "grad_norm": 5.6558709144592285, "learning_rate": 4.104343434343434e-06, "loss": 6.391135406494141, "step": 18735 }, { "epoch": 0.1874, "grad_norm": 4.573460578918457, "learning_rate": 4.104090909090909e-06, "loss": 6.394968414306641, "step": 18740 }, { "epoch": 0.18745, "grad_norm": 3.2834384441375732, "learning_rate": 4.103838383838384e-06, "loss": 6.4182861328125, "step": 18745 }, { "epoch": 0.1875, "grad_norm": 4.8945136070251465, "learning_rate": 4.103585858585859e-06, "loss": 6.391152191162109, "step": 18750 }, { "epoch": 0.18755, "grad_norm": 4.571887016296387, "learning_rate": 4.1033333333333336e-06, "loss": 6.5467170715332035, "step": 18755 }, { "epoch": 0.1876, "grad_norm": 4.609233856201172, "learning_rate": 4.103080808080808e-06, "loss": 6.356549453735352, "step": 18760 }, { "epoch": 0.18765, "grad_norm": 3.5118002891540527, "learning_rate": 4.102828282828284e-06, "loss": 6.2901660919189455, "step": 18765 }, { "epoch": 0.1877, "grad_norm": 8.11195182800293, "learning_rate": 4.102575757575758e-06, "loss": 6.489777374267578, "step": 18770 }, { "epoch": 0.18775, "grad_norm": 5.960768699645996, "learning_rate": 4.102323232323233e-06, "loss": 6.506047058105469, "step": 18775 }, { "epoch": 0.1878, "grad_norm": 4.986604690551758, "learning_rate": 4.1020707070707076e-06, "loss": 6.397085189819336, "step": 18780 }, { "epoch": 0.18785, "grad_norm": 5.6428728103637695, "learning_rate": 4.101818181818182e-06, "loss": 6.332464981079101, "step": 18785 }, { "epoch": 0.1879, "grad_norm": 5.230681896209717, "learning_rate": 4.101565656565657e-06, "loss": 6.373043823242187, "step": 18790 }, { "epoch": 0.18795, "grad_norm": 5.805160999298096, "learning_rate": 4.1013131313131315e-06, "loss": 6.30499496459961, "step": 18795 }, { "epoch": 0.188, "grad_norm": 4.065610408782959, "learning_rate": 4.101060606060606e-06, "loss": 6.358146667480469, "step": 18800 }, { "epoch": 0.18805, "grad_norm": 3.005723476409912, "learning_rate": 4.1008080808080816e-06, "loss": 6.408080291748047, "step": 18805 }, { "epoch": 0.1881, "grad_norm": 14.198698997497559, "learning_rate": 4.100555555555556e-06, "loss": 6.6229499816894535, "step": 18810 }, { "epoch": 0.18815, "grad_norm": 26.405759811401367, "learning_rate": 4.100303030303031e-06, "loss": 6.7030082702636715, "step": 18815 }, { "epoch": 0.1882, "grad_norm": 6.3094892501831055, "learning_rate": 4.1000505050505055e-06, "loss": 6.363460540771484, "step": 18820 }, { "epoch": 0.18825, "grad_norm": 3.865670919418335, "learning_rate": 4.09979797979798e-06, "loss": 6.445598602294922, "step": 18825 }, { "epoch": 0.1883, "grad_norm": 3.3652799129486084, "learning_rate": 4.099545454545455e-06, "loss": 6.384987258911133, "step": 18830 }, { "epoch": 0.18835, "grad_norm": 3.480128526687622, "learning_rate": 4.099292929292929e-06, "loss": 6.352599334716797, "step": 18835 }, { "epoch": 0.1884, "grad_norm": 9.45141315460205, "learning_rate": 4.099040404040404e-06, "loss": 6.438786315917969, "step": 18840 }, { "epoch": 0.18845, "grad_norm": 4.715724945068359, "learning_rate": 4.0987878787878795e-06, "loss": 6.432010650634766, "step": 18845 }, { "epoch": 0.1885, "grad_norm": 4.591580390930176, "learning_rate": 4.098535353535354e-06, "loss": 6.39819107055664, "step": 18850 }, { "epoch": 0.18855, "grad_norm": 4.263213157653809, "learning_rate": 4.098282828282829e-06, "loss": 6.4058380126953125, "step": 18855 }, { "epoch": 0.1886, "grad_norm": 3.536532402038574, "learning_rate": 4.098030303030303e-06, "loss": 6.382630920410156, "step": 18860 }, { "epoch": 0.18865, "grad_norm": 21.694562911987305, "learning_rate": 4.097777777777778e-06, "loss": 6.1880035400390625, "step": 18865 }, { "epoch": 0.1887, "grad_norm": 10.979371070861816, "learning_rate": 4.097525252525253e-06, "loss": 6.557093811035156, "step": 18870 }, { "epoch": 0.18875, "grad_norm": 5.3563690185546875, "learning_rate": 4.097272727272727e-06, "loss": 6.425485992431641, "step": 18875 }, { "epoch": 0.1888, "grad_norm": 4.186330318450928, "learning_rate": 4.097020202020202e-06, "loss": 6.407283020019531, "step": 18880 }, { "epoch": 0.18885, "grad_norm": 2.599738121032715, "learning_rate": 4.096767676767677e-06, "loss": 6.473275756835937, "step": 18885 }, { "epoch": 0.1889, "grad_norm": 6.206857204437256, "learning_rate": 4.096515151515152e-06, "loss": 6.466458129882812, "step": 18890 }, { "epoch": 0.18895, "grad_norm": 7.857926845550537, "learning_rate": 4.096262626262627e-06, "loss": 6.426356506347656, "step": 18895 }, { "epoch": 0.189, "grad_norm": 11.808489799499512, "learning_rate": 4.096010101010101e-06, "loss": 6.325814819335937, "step": 18900 }, { "epoch": 0.18905, "grad_norm": 3.4066081047058105, "learning_rate": 4.095757575757576e-06, "loss": 6.384674835205078, "step": 18905 }, { "epoch": 0.1891, "grad_norm": 3.9022538661956787, "learning_rate": 4.0955050505050505e-06, "loss": 6.37455940246582, "step": 18910 }, { "epoch": 0.18915, "grad_norm": 3.720107078552246, "learning_rate": 4.095252525252525e-06, "loss": 6.400296783447265, "step": 18915 }, { "epoch": 0.1892, "grad_norm": 3.5938057899475098, "learning_rate": 4.095e-06, "loss": 6.381180191040039, "step": 18920 }, { "epoch": 0.18925, "grad_norm": 5.452615261077881, "learning_rate": 4.094747474747475e-06, "loss": 6.368849182128907, "step": 18925 }, { "epoch": 0.1893, "grad_norm": 5.877171993255615, "learning_rate": 4.09449494949495e-06, "loss": 6.407469177246094, "step": 18930 }, { "epoch": 0.18935, "grad_norm": 4.015141010284424, "learning_rate": 4.0942424242424245e-06, "loss": 6.37620735168457, "step": 18935 }, { "epoch": 0.1894, "grad_norm": 3.9595513343811035, "learning_rate": 4.093989898989899e-06, "loss": 6.378712844848633, "step": 18940 }, { "epoch": 0.18945, "grad_norm": 5.180746555328369, "learning_rate": 4.093737373737375e-06, "loss": 6.387689208984375, "step": 18945 }, { "epoch": 0.1895, "grad_norm": 5.589240550994873, "learning_rate": 4.093484848484849e-06, "loss": 6.405711364746094, "step": 18950 }, { "epoch": 0.18955, "grad_norm": 4.655160903930664, "learning_rate": 4.093232323232324e-06, "loss": 6.422606658935547, "step": 18955 }, { "epoch": 0.1896, "grad_norm": 6.271744728088379, "learning_rate": 4.092979797979798e-06, "loss": 6.399873733520508, "step": 18960 }, { "epoch": 0.18965, "grad_norm": 15.119043350219727, "learning_rate": 4.092727272727273e-06, "loss": 6.466285705566406, "step": 18965 }, { "epoch": 0.1897, "grad_norm": 5.369762420654297, "learning_rate": 4.092474747474748e-06, "loss": 6.075682830810547, "step": 18970 }, { "epoch": 0.18975, "grad_norm": 2.7201786041259766, "learning_rate": 4.092222222222222e-06, "loss": 6.427912902832031, "step": 18975 }, { "epoch": 0.1898, "grad_norm": 4.975379943847656, "learning_rate": 4.091969696969697e-06, "loss": 6.355951309204102, "step": 18980 }, { "epoch": 0.18985, "grad_norm": 4.431337833404541, "learning_rate": 4.0917171717171725e-06, "loss": 6.4079833984375, "step": 18985 }, { "epoch": 0.1899, "grad_norm": 3.1027958393096924, "learning_rate": 4.091464646464647e-06, "loss": 6.340965270996094, "step": 18990 }, { "epoch": 0.18995, "grad_norm": 4.999954700469971, "learning_rate": 4.091212121212122e-06, "loss": 6.383933258056641, "step": 18995 }, { "epoch": 0.19, "grad_norm": 5.367707252502441, "learning_rate": 4.090959595959596e-06, "loss": 6.5344688415527346, "step": 19000 }, { "epoch": 0.19005, "grad_norm": 5.335751056671143, "learning_rate": 4.090707070707071e-06, "loss": 6.349771118164062, "step": 19005 }, { "epoch": 0.1901, "grad_norm": 5.725353240966797, "learning_rate": 4.090454545454546e-06, "loss": 6.421317291259766, "step": 19010 }, { "epoch": 0.19015, "grad_norm": 5.332465171813965, "learning_rate": 4.09020202020202e-06, "loss": 6.366862106323242, "step": 19015 }, { "epoch": 0.1902, "grad_norm": 4.207252025604248, "learning_rate": 4.089949494949496e-06, "loss": 6.423389434814453, "step": 19020 }, { "epoch": 0.19025, "grad_norm": 5.3898162841796875, "learning_rate": 4.08969696969697e-06, "loss": 6.387900543212891, "step": 19025 }, { "epoch": 0.1903, "grad_norm": 5.395333290100098, "learning_rate": 4.089444444444445e-06, "loss": 6.406847381591797, "step": 19030 }, { "epoch": 0.19035, "grad_norm": 5.68665075302124, "learning_rate": 4.08919191919192e-06, "loss": 6.517096710205078, "step": 19035 }, { "epoch": 0.1904, "grad_norm": 3.878544807434082, "learning_rate": 4.088939393939394e-06, "loss": 6.372022247314453, "step": 19040 }, { "epoch": 0.19045, "grad_norm": 3.718287229537964, "learning_rate": 4.088686868686869e-06, "loss": 6.374387741088867, "step": 19045 }, { "epoch": 0.1905, "grad_norm": 3.4842898845672607, "learning_rate": 4.0884343434343435e-06, "loss": 6.36516227722168, "step": 19050 }, { "epoch": 0.19055, "grad_norm": 2.6360905170440674, "learning_rate": 4.088181818181818e-06, "loss": 6.412567138671875, "step": 19055 }, { "epoch": 0.1906, "grad_norm": 5.842061519622803, "learning_rate": 4.087929292929294e-06, "loss": 6.337042999267578, "step": 19060 }, { "epoch": 0.19065, "grad_norm": 3.3465144634246826, "learning_rate": 4.087676767676768e-06, "loss": 6.4214111328125, "step": 19065 }, { "epoch": 0.1907, "grad_norm": 3.3122689723968506, "learning_rate": 4.087424242424243e-06, "loss": 6.403544616699219, "step": 19070 }, { "epoch": 0.19075, "grad_norm": 3.9021453857421875, "learning_rate": 4.0871717171717175e-06, "loss": 6.428553771972656, "step": 19075 }, { "epoch": 0.1908, "grad_norm": 4.800110340118408, "learning_rate": 4.086919191919192e-06, "loss": 6.373813629150391, "step": 19080 }, { "epoch": 0.19085, "grad_norm": 3.8612091541290283, "learning_rate": 4.086666666666667e-06, "loss": 6.32589111328125, "step": 19085 }, { "epoch": 0.1909, "grad_norm": 5.518439292907715, "learning_rate": 4.086414141414141e-06, "loss": 6.3477825164794925, "step": 19090 }, { "epoch": 0.19095, "grad_norm": 3.6552371978759766, "learning_rate": 4.086161616161616e-06, "loss": 6.344318389892578, "step": 19095 }, { "epoch": 0.191, "grad_norm": 4.328754425048828, "learning_rate": 4.0859090909090915e-06, "loss": 6.398830413818359, "step": 19100 }, { "epoch": 0.19105, "grad_norm": 3.9813215732574463, "learning_rate": 4.085656565656566e-06, "loss": 6.401284790039062, "step": 19105 }, { "epoch": 0.1911, "grad_norm": 3.6071853637695312, "learning_rate": 4.085404040404041e-06, "loss": 6.309893035888672, "step": 19110 }, { "epoch": 0.19115, "grad_norm": 4.8512091636657715, "learning_rate": 4.085151515151515e-06, "loss": 6.27001953125, "step": 19115 }, { "epoch": 0.1912, "grad_norm": 3.6291110515594482, "learning_rate": 4.084898989898991e-06, "loss": 6.355105590820313, "step": 19120 }, { "epoch": 0.19125, "grad_norm": 5.238492488861084, "learning_rate": 4.084646464646465e-06, "loss": 6.379811859130859, "step": 19125 }, { "epoch": 0.1913, "grad_norm": 3.5164597034454346, "learning_rate": 4.084393939393939e-06, "loss": 6.448822021484375, "step": 19130 }, { "epoch": 0.19135, "grad_norm": 4.247466564178467, "learning_rate": 4.084141414141414e-06, "loss": 6.382938385009766, "step": 19135 }, { "epoch": 0.1914, "grad_norm": 4.903182506561279, "learning_rate": 4.083888888888889e-06, "loss": 6.374135971069336, "step": 19140 }, { "epoch": 0.19145, "grad_norm": 4.6411566734313965, "learning_rate": 4.083636363636364e-06, "loss": 6.35018310546875, "step": 19145 }, { "epoch": 0.1915, "grad_norm": 4.573929786682129, "learning_rate": 4.083383838383839e-06, "loss": 6.304139709472656, "step": 19150 }, { "epoch": 0.19155, "grad_norm": 7.880543231964111, "learning_rate": 4.083131313131313e-06, "loss": 6.3727294921875, "step": 19155 }, { "epoch": 0.1916, "grad_norm": 3.8419902324676514, "learning_rate": 4.082878787878789e-06, "loss": 6.359456634521484, "step": 19160 }, { "epoch": 0.19165, "grad_norm": 3.3154256343841553, "learning_rate": 4.082626262626263e-06, "loss": 6.403604888916016, "step": 19165 }, { "epoch": 0.1917, "grad_norm": 4.630642890930176, "learning_rate": 4.082373737373738e-06, "loss": 6.430747222900391, "step": 19170 }, { "epoch": 0.19175, "grad_norm": 17.105091094970703, "learning_rate": 4.082121212121213e-06, "loss": 6.446407318115234, "step": 19175 }, { "epoch": 0.1918, "grad_norm": 3.1228904724121094, "learning_rate": 4.081868686868687e-06, "loss": 6.392394256591797, "step": 19180 }, { "epoch": 0.19185, "grad_norm": 7.5376973152160645, "learning_rate": 4.081616161616162e-06, "loss": 6.612911987304687, "step": 19185 }, { "epoch": 0.1919, "grad_norm": 3.201850414276123, "learning_rate": 4.0813636363636366e-06, "loss": 6.370494842529297, "step": 19190 }, { "epoch": 0.19195, "grad_norm": 4.05242919921875, "learning_rate": 4.081111111111111e-06, "loss": 6.383204650878906, "step": 19195 }, { "epoch": 0.192, "grad_norm": 5.6457719802856445, "learning_rate": 4.080858585858587e-06, "loss": 6.365170288085937, "step": 19200 }, { "epoch": 0.19205, "grad_norm": 3.8193633556365967, "learning_rate": 4.080606060606061e-06, "loss": 6.3537040710449215, "step": 19205 }, { "epoch": 0.1921, "grad_norm": 4.5553154945373535, "learning_rate": 4.080353535353536e-06, "loss": 6.398690795898437, "step": 19210 }, { "epoch": 0.19215, "grad_norm": 4.434987545013428, "learning_rate": 4.0801010101010106e-06, "loss": 6.3793388366699215, "step": 19215 }, { "epoch": 0.1922, "grad_norm": 6.056620121002197, "learning_rate": 4.079848484848485e-06, "loss": 6.401212310791015, "step": 19220 }, { "epoch": 0.19225, "grad_norm": 3.3419487476348877, "learning_rate": 4.07959595959596e-06, "loss": 6.318290328979492, "step": 19225 }, { "epoch": 0.1923, "grad_norm": 3.8567047119140625, "learning_rate": 4.0793434343434344e-06, "loss": 6.611088562011719, "step": 19230 }, { "epoch": 0.19235, "grad_norm": 16.598146438598633, "learning_rate": 4.079090909090909e-06, "loss": 6.373753356933594, "step": 19235 }, { "epoch": 0.1924, "grad_norm": 5.0385565757751465, "learning_rate": 4.0788383838383846e-06, "loss": 6.510621643066406, "step": 19240 }, { "epoch": 0.19245, "grad_norm": 4.291235446929932, "learning_rate": 4.078585858585859e-06, "loss": 6.3659507751464846, "step": 19245 }, { "epoch": 0.1925, "grad_norm": 4.005524158477783, "learning_rate": 4.078333333333334e-06, "loss": 6.426875305175781, "step": 19250 }, { "epoch": 0.19255, "grad_norm": 4.021867275238037, "learning_rate": 4.0780808080808084e-06, "loss": 6.656010437011719, "step": 19255 }, { "epoch": 0.1926, "grad_norm": 9.877291679382324, "learning_rate": 4.077828282828283e-06, "loss": 6.364139556884766, "step": 19260 }, { "epoch": 0.19265, "grad_norm": 4.1913347244262695, "learning_rate": 4.077575757575758e-06, "loss": 6.360807037353515, "step": 19265 }, { "epoch": 0.1927, "grad_norm": 4.964813232421875, "learning_rate": 4.077323232323232e-06, "loss": 6.399176025390625, "step": 19270 }, { "epoch": 0.19275, "grad_norm": 3.131068706512451, "learning_rate": 4.077070707070707e-06, "loss": 6.438235473632813, "step": 19275 }, { "epoch": 0.1928, "grad_norm": 5.006486892700195, "learning_rate": 4.0768181818181824e-06, "loss": 6.389376831054688, "step": 19280 }, { "epoch": 0.19285, "grad_norm": 5.7917561531066895, "learning_rate": 4.076565656565657e-06, "loss": 6.410574340820313, "step": 19285 }, { "epoch": 0.1929, "grad_norm": 3.185687303543091, "learning_rate": 4.076313131313132e-06, "loss": 6.546735382080078, "step": 19290 }, { "epoch": 0.19295, "grad_norm": 4.408816814422607, "learning_rate": 4.076060606060606e-06, "loss": 6.405015563964843, "step": 19295 }, { "epoch": 0.193, "grad_norm": 4.005836009979248, "learning_rate": 4.075808080808081e-06, "loss": 6.359957504272461, "step": 19300 }, { "epoch": 0.19305, "grad_norm": 5.21028470993042, "learning_rate": 4.075555555555556e-06, "loss": 6.3653205871582035, "step": 19305 }, { "epoch": 0.1931, "grad_norm": 3.582012414932251, "learning_rate": 4.07530303030303e-06, "loss": 6.364418029785156, "step": 19310 }, { "epoch": 0.19315, "grad_norm": 4.695324897766113, "learning_rate": 4.075050505050505e-06, "loss": 6.359355926513672, "step": 19315 }, { "epoch": 0.1932, "grad_norm": 9.45710277557373, "learning_rate": 4.07479797979798e-06, "loss": 6.406756591796875, "step": 19320 }, { "epoch": 0.19325, "grad_norm": 5.592437744140625, "learning_rate": 4.074545454545455e-06, "loss": 6.347634124755859, "step": 19325 }, { "epoch": 0.1933, "grad_norm": 6.338645935058594, "learning_rate": 4.07429292929293e-06, "loss": 6.383368682861328, "step": 19330 }, { "epoch": 0.19335, "grad_norm": 3.5367844104766846, "learning_rate": 4.074040404040404e-06, "loss": 6.381031036376953, "step": 19335 }, { "epoch": 0.1934, "grad_norm": 3.1967060565948486, "learning_rate": 4.07378787878788e-06, "loss": 6.480411529541016, "step": 19340 }, { "epoch": 0.19345, "grad_norm": 4.4588212966918945, "learning_rate": 4.0735353535353535e-06, "loss": 6.3442131042480465, "step": 19345 }, { "epoch": 0.1935, "grad_norm": 6.410840034484863, "learning_rate": 4.073282828282828e-06, "loss": 6.356620025634766, "step": 19350 }, { "epoch": 0.19355, "grad_norm": 4.463810443878174, "learning_rate": 4.073030303030303e-06, "loss": 6.356292343139648, "step": 19355 }, { "epoch": 0.1936, "grad_norm": 5.350219249725342, "learning_rate": 4.072777777777778e-06, "loss": 6.371220397949219, "step": 19360 }, { "epoch": 0.19365, "grad_norm": 11.31662368774414, "learning_rate": 4.072525252525253e-06, "loss": 6.499986267089843, "step": 19365 }, { "epoch": 0.1937, "grad_norm": 4.373028755187988, "learning_rate": 4.0722727272727275e-06, "loss": 6.34986686706543, "step": 19370 }, { "epoch": 0.19375, "grad_norm": 5.775130271911621, "learning_rate": 4.072020202020202e-06, "loss": 6.3454242706298825, "step": 19375 }, { "epoch": 0.1938, "grad_norm": 6.275132656097412, "learning_rate": 4.071767676767678e-06, "loss": 6.397661590576172, "step": 19380 }, { "epoch": 0.19385, "grad_norm": 4.542804718017578, "learning_rate": 4.071515151515152e-06, "loss": 6.368367767333984, "step": 19385 }, { "epoch": 0.1939, "grad_norm": 5.219408988952637, "learning_rate": 4.071262626262627e-06, "loss": 6.424284362792969, "step": 19390 }, { "epoch": 0.19395, "grad_norm": 9.797083854675293, "learning_rate": 4.0710101010101015e-06, "loss": 6.387657165527344, "step": 19395 }, { "epoch": 0.194, "grad_norm": 4.218315601348877, "learning_rate": 4.070757575757576e-06, "loss": 6.405934143066406, "step": 19400 }, { "epoch": 0.19405, "grad_norm": 3.369438886642456, "learning_rate": 4.070505050505051e-06, "loss": 6.367124557495117, "step": 19405 }, { "epoch": 0.1941, "grad_norm": 3.927542209625244, "learning_rate": 4.070252525252525e-06, "loss": 6.346889877319336, "step": 19410 }, { "epoch": 0.19415, "grad_norm": 4.782622337341309, "learning_rate": 4.07e-06, "loss": 6.346836090087891, "step": 19415 }, { "epoch": 0.1942, "grad_norm": 4.404238224029541, "learning_rate": 4.0697474747474755e-06, "loss": 6.370536804199219, "step": 19420 }, { "epoch": 0.19425, "grad_norm": 5.4158244132995605, "learning_rate": 4.06949494949495e-06, "loss": 6.416272735595703, "step": 19425 }, { "epoch": 0.1943, "grad_norm": 4.474217414855957, "learning_rate": 4.069242424242425e-06, "loss": 6.370992279052734, "step": 19430 }, { "epoch": 0.19435, "grad_norm": 4.019426345825195, "learning_rate": 4.068989898989899e-06, "loss": 6.350820159912109, "step": 19435 }, { "epoch": 0.1944, "grad_norm": 2.479823112487793, "learning_rate": 4.068737373737374e-06, "loss": 6.462271118164063, "step": 19440 }, { "epoch": 0.19445, "grad_norm": 4.858013153076172, "learning_rate": 4.068484848484849e-06, "loss": 6.431369781494141, "step": 19445 }, { "epoch": 0.1945, "grad_norm": 2.7684266567230225, "learning_rate": 4.068232323232323e-06, "loss": 6.348640060424804, "step": 19450 }, { "epoch": 0.19455, "grad_norm": 3.7940924167633057, "learning_rate": 4.067979797979799e-06, "loss": 6.345696258544922, "step": 19455 }, { "epoch": 0.1946, "grad_norm": 10.329361915588379, "learning_rate": 4.067727272727273e-06, "loss": 6.394172286987304, "step": 19460 }, { "epoch": 0.19465, "grad_norm": 5.23352575302124, "learning_rate": 4.067474747474748e-06, "loss": 6.348777770996094, "step": 19465 }, { "epoch": 0.1947, "grad_norm": 5.59263277053833, "learning_rate": 4.067222222222223e-06, "loss": 6.427117919921875, "step": 19470 }, { "epoch": 0.19475, "grad_norm": 3.72698712348938, "learning_rate": 4.066969696969697e-06, "loss": 6.396751022338867, "step": 19475 }, { "epoch": 0.1948, "grad_norm": 4.382380962371826, "learning_rate": 4.066717171717172e-06, "loss": 6.416648101806641, "step": 19480 }, { "epoch": 0.19485, "grad_norm": 3.350860834121704, "learning_rate": 4.0664646464646465e-06, "loss": 6.167864990234375, "step": 19485 }, { "epoch": 0.1949, "grad_norm": 9.833253860473633, "learning_rate": 4.066212121212121e-06, "loss": 6.383861541748047, "step": 19490 }, { "epoch": 0.19495, "grad_norm": 2.236875057220459, "learning_rate": 4.065959595959597e-06, "loss": 6.398822784423828, "step": 19495 }, { "epoch": 0.195, "grad_norm": 3.5608301162719727, "learning_rate": 4.065707070707071e-06, "loss": 6.420928192138672, "step": 19500 }, { "epoch": 0.19505, "grad_norm": 4.265907287597656, "learning_rate": 4.065454545454546e-06, "loss": 6.334685516357422, "step": 19505 }, { "epoch": 0.1951, "grad_norm": 12.511089324951172, "learning_rate": 4.0652020202020205e-06, "loss": 6.6551353454589846, "step": 19510 }, { "epoch": 0.19515, "grad_norm": 8.818058967590332, "learning_rate": 4.064949494949495e-06, "loss": 6.324756622314453, "step": 19515 }, { "epoch": 0.1952, "grad_norm": 4.294338226318359, "learning_rate": 4.06469696969697e-06, "loss": 6.352555847167968, "step": 19520 }, { "epoch": 0.19525, "grad_norm": 5.974384784698486, "learning_rate": 4.064444444444444e-06, "loss": 6.4365394592285154, "step": 19525 }, { "epoch": 0.1953, "grad_norm": 4.288761138916016, "learning_rate": 4.064191919191919e-06, "loss": 6.35997428894043, "step": 19530 }, { "epoch": 0.19535, "grad_norm": 5.314708232879639, "learning_rate": 4.0639393939393945e-06, "loss": 6.403207397460937, "step": 19535 }, { "epoch": 0.1954, "grad_norm": 4.448174476623535, "learning_rate": 4.063686868686869e-06, "loss": 6.357504272460938, "step": 19540 }, { "epoch": 0.19545, "grad_norm": 3.8217105865478516, "learning_rate": 4.063434343434344e-06, "loss": 6.421980285644532, "step": 19545 }, { "epoch": 0.1955, "grad_norm": 5.5884599685668945, "learning_rate": 4.063181818181818e-06, "loss": 6.432899475097656, "step": 19550 }, { "epoch": 0.19555, "grad_norm": 5.290658950805664, "learning_rate": 4.062929292929294e-06, "loss": 6.406802368164063, "step": 19555 }, { "epoch": 0.1956, "grad_norm": 6.350554466247559, "learning_rate": 4.0626767676767685e-06, "loss": 6.377091598510742, "step": 19560 }, { "epoch": 0.19565, "grad_norm": 6.207547187805176, "learning_rate": 4.062424242424243e-06, "loss": 6.388495635986328, "step": 19565 }, { "epoch": 0.1957, "grad_norm": 24.059823989868164, "learning_rate": 4.062171717171717e-06, "loss": 6.038714599609375, "step": 19570 }, { "epoch": 0.19575, "grad_norm": 3.023705005645752, "learning_rate": 4.061919191919192e-06, "loss": 6.432948303222656, "step": 19575 }, { "epoch": 0.1958, "grad_norm": 3.2975223064422607, "learning_rate": 4.061666666666667e-06, "loss": 6.4373115539550785, "step": 19580 }, { "epoch": 0.19585, "grad_norm": 3.8520004749298096, "learning_rate": 4.061414141414142e-06, "loss": 6.3886066436767575, "step": 19585 }, { "epoch": 0.1959, "grad_norm": 4.509420394897461, "learning_rate": 4.061161616161616e-06, "loss": 6.355691909790039, "step": 19590 }, { "epoch": 0.19595, "grad_norm": 4.087698459625244, "learning_rate": 4.060909090909092e-06, "loss": 6.342591094970703, "step": 19595 }, { "epoch": 0.196, "grad_norm": 3.9765119552612305, "learning_rate": 4.060656565656566e-06, "loss": 6.409799957275391, "step": 19600 }, { "epoch": 0.19605, "grad_norm": 5.4682135581970215, "learning_rate": 4.060404040404041e-06, "loss": 6.375949859619141, "step": 19605 }, { "epoch": 0.1961, "grad_norm": 5.131296634674072, "learning_rate": 4.060151515151516e-06, "loss": 6.420603942871094, "step": 19610 }, { "epoch": 0.19615, "grad_norm": 5.33582878112793, "learning_rate": 4.05989898989899e-06, "loss": 6.349749755859375, "step": 19615 }, { "epoch": 0.1962, "grad_norm": 3.639460325241089, "learning_rate": 4.059646464646465e-06, "loss": 6.342902374267578, "step": 19620 }, { "epoch": 0.19625, "grad_norm": 5.6021904945373535, "learning_rate": 4.0593939393939395e-06, "loss": 6.437205505371094, "step": 19625 }, { "epoch": 0.1963, "grad_norm": 5.687469959259033, "learning_rate": 4.059141414141414e-06, "loss": 6.432957458496094, "step": 19630 }, { "epoch": 0.19635, "grad_norm": 24.563066482543945, "learning_rate": 4.05888888888889e-06, "loss": 6.3532562255859375, "step": 19635 }, { "epoch": 0.1964, "grad_norm": 3.304119110107422, "learning_rate": 4.058636363636364e-06, "loss": 6.4160713195800785, "step": 19640 }, { "epoch": 0.19645, "grad_norm": 6.5999226570129395, "learning_rate": 4.058383838383839e-06, "loss": 6.3621360778808596, "step": 19645 }, { "epoch": 0.1965, "grad_norm": 3.1531336307525635, "learning_rate": 4.0581313131313135e-06, "loss": 6.363382339477539, "step": 19650 }, { "epoch": 0.19655, "grad_norm": 2.853524684906006, "learning_rate": 4.057878787878788e-06, "loss": 6.321531677246094, "step": 19655 }, { "epoch": 0.1966, "grad_norm": 7.633263111114502, "learning_rate": 4.057626262626263e-06, "loss": 6.369779968261719, "step": 19660 }, { "epoch": 0.19665, "grad_norm": 5.661573886871338, "learning_rate": 4.0573737373737374e-06, "loss": 6.3661144256591795, "step": 19665 }, { "epoch": 0.1967, "grad_norm": 5.467764854431152, "learning_rate": 4.057121212121212e-06, "loss": 6.4112297058105465, "step": 19670 }, { "epoch": 0.19675, "grad_norm": 4.099609375, "learning_rate": 4.0568686868686875e-06, "loss": 6.416462707519531, "step": 19675 }, { "epoch": 0.1968, "grad_norm": 4.346899032592773, "learning_rate": 4.056616161616162e-06, "loss": 6.487928771972657, "step": 19680 }, { "epoch": 0.19685, "grad_norm": 10.25285530090332, "learning_rate": 4.056363636363637e-06, "loss": 6.363898086547851, "step": 19685 }, { "epoch": 0.1969, "grad_norm": 5.477935791015625, "learning_rate": 4.0561111111111114e-06, "loss": 6.415487670898438, "step": 19690 }, { "epoch": 0.19695, "grad_norm": 3.3069064617156982, "learning_rate": 4.055858585858586e-06, "loss": 6.437055206298828, "step": 19695 }, { "epoch": 0.197, "grad_norm": 5.783933162689209, "learning_rate": 4.055606060606061e-06, "loss": 6.3557781219482425, "step": 19700 }, { "epoch": 0.19705, "grad_norm": 8.561756134033203, "learning_rate": 4.055353535353535e-06, "loss": 6.503081512451172, "step": 19705 }, { "epoch": 0.1971, "grad_norm": 11.107012748718262, "learning_rate": 4.05510101010101e-06, "loss": 6.355319976806641, "step": 19710 }, { "epoch": 0.19715, "grad_norm": 2.707581043243408, "learning_rate": 4.0548484848484854e-06, "loss": 6.396902084350586, "step": 19715 }, { "epoch": 0.1972, "grad_norm": 5.244641304016113, "learning_rate": 4.05459595959596e-06, "loss": 6.354716491699219, "step": 19720 }, { "epoch": 0.19725, "grad_norm": 3.835785150527954, "learning_rate": 4.054343434343435e-06, "loss": 6.369344711303711, "step": 19725 }, { "epoch": 0.1973, "grad_norm": 10.75365924835205, "learning_rate": 4.054090909090909e-06, "loss": 6.444098663330078, "step": 19730 }, { "epoch": 0.19735, "grad_norm": 3.008230209350586, "learning_rate": 4.053838383838384e-06, "loss": 6.3549652099609375, "step": 19735 }, { "epoch": 0.1974, "grad_norm": 7.80946159362793, "learning_rate": 4.0535858585858586e-06, "loss": 6.361135482788086, "step": 19740 }, { "epoch": 0.19745, "grad_norm": 6.916878700256348, "learning_rate": 4.053333333333333e-06, "loss": 6.496293640136718, "step": 19745 }, { "epoch": 0.1975, "grad_norm": 6.121494293212891, "learning_rate": 4.053080808080808e-06, "loss": 6.375544738769531, "step": 19750 }, { "epoch": 0.19755, "grad_norm": 3.5816383361816406, "learning_rate": 4.052828282828283e-06, "loss": 6.542870330810547, "step": 19755 }, { "epoch": 0.1976, "grad_norm": 5.016317367553711, "learning_rate": 4.052575757575758e-06, "loss": 6.3715972900390625, "step": 19760 }, { "epoch": 0.19765, "grad_norm": 3.470564603805542, "learning_rate": 4.0523232323232326e-06, "loss": 6.35956916809082, "step": 19765 }, { "epoch": 0.1977, "grad_norm": 4.387614727020264, "learning_rate": 4.052070707070707e-06, "loss": 6.362211227416992, "step": 19770 }, { "epoch": 0.19775, "grad_norm": 3.1623826026916504, "learning_rate": 4.051818181818183e-06, "loss": 6.65032958984375, "step": 19775 }, { "epoch": 0.1978, "grad_norm": 3.035311698913574, "learning_rate": 4.051565656565657e-06, "loss": 6.340599822998047, "step": 19780 }, { "epoch": 0.19785, "grad_norm": 2.67224383354187, "learning_rate": 4.051313131313132e-06, "loss": 6.46024169921875, "step": 19785 }, { "epoch": 0.1979, "grad_norm": 4.361361026763916, "learning_rate": 4.051060606060606e-06, "loss": 6.395320892333984, "step": 19790 }, { "epoch": 0.19795, "grad_norm": 3.6205594539642334, "learning_rate": 4.050808080808081e-06, "loss": 6.327168273925781, "step": 19795 }, { "epoch": 0.198, "grad_norm": 4.760246276855469, "learning_rate": 4.050555555555556e-06, "loss": 6.373647308349609, "step": 19800 }, { "epoch": 0.19805, "grad_norm": 4.943057060241699, "learning_rate": 4.0503030303030305e-06, "loss": 6.3605602264404295, "step": 19805 }, { "epoch": 0.1981, "grad_norm": 5.4091925621032715, "learning_rate": 4.050050505050505e-06, "loss": 6.29454460144043, "step": 19810 }, { "epoch": 0.19815, "grad_norm": 5.322918891906738, "learning_rate": 4.0497979797979806e-06, "loss": 6.341519546508789, "step": 19815 }, { "epoch": 0.1982, "grad_norm": 4.914088249206543, "learning_rate": 4.049545454545455e-06, "loss": 6.3788604736328125, "step": 19820 }, { "epoch": 0.19825, "grad_norm": 4.1996235847473145, "learning_rate": 4.04929292929293e-06, "loss": 6.354228973388672, "step": 19825 }, { "epoch": 0.1983, "grad_norm": 4.204217910766602, "learning_rate": 4.0490404040404045e-06, "loss": 6.352099227905273, "step": 19830 }, { "epoch": 0.19835, "grad_norm": 4.682660102844238, "learning_rate": 4.048787878787879e-06, "loss": 6.395558547973633, "step": 19835 }, { "epoch": 0.1984, "grad_norm": 4.341474533081055, "learning_rate": 4.048535353535354e-06, "loss": 6.373513793945312, "step": 19840 }, { "epoch": 0.19845, "grad_norm": 6.945283889770508, "learning_rate": 4.048282828282828e-06, "loss": 6.478843688964844, "step": 19845 }, { "epoch": 0.1985, "grad_norm": 2.3319458961486816, "learning_rate": 4.048030303030303e-06, "loss": 6.34193000793457, "step": 19850 }, { "epoch": 0.19855, "grad_norm": 4.012521743774414, "learning_rate": 4.0477777777777785e-06, "loss": 6.367397689819336, "step": 19855 }, { "epoch": 0.1986, "grad_norm": 3.307687997817993, "learning_rate": 4.047525252525253e-06, "loss": 6.360920715332031, "step": 19860 }, { "epoch": 0.19865, "grad_norm": 9.928263664245605, "learning_rate": 4.047272727272728e-06, "loss": 6.390003204345703, "step": 19865 }, { "epoch": 0.1987, "grad_norm": 6.413904666900635, "learning_rate": 4.047020202020202e-06, "loss": 6.328386306762695, "step": 19870 }, { "epoch": 0.19875, "grad_norm": 5.568958759307861, "learning_rate": 4.046767676767677e-06, "loss": 6.344242095947266, "step": 19875 }, { "epoch": 0.1988, "grad_norm": 5.069599628448486, "learning_rate": 4.046515151515152e-06, "loss": 6.374430847167969, "step": 19880 }, { "epoch": 0.19885, "grad_norm": 6.529012203216553, "learning_rate": 4.046262626262626e-06, "loss": 6.387958526611328, "step": 19885 }, { "epoch": 0.1989, "grad_norm": 6.083582878112793, "learning_rate": 4.046010101010102e-06, "loss": 6.386810684204102, "step": 19890 }, { "epoch": 0.19895, "grad_norm": 5.383504867553711, "learning_rate": 4.045757575757576e-06, "loss": 6.4219520568847654, "step": 19895 }, { "epoch": 0.199, "grad_norm": 5.496938228607178, "learning_rate": 4.045505050505051e-06, "loss": 6.395150756835937, "step": 19900 }, { "epoch": 0.19905, "grad_norm": 3.7606313228607178, "learning_rate": 4.045252525252526e-06, "loss": 6.364843368530273, "step": 19905 }, { "epoch": 0.1991, "grad_norm": 4.031744480133057, "learning_rate": 4.045e-06, "loss": 6.333458709716797, "step": 19910 }, { "epoch": 0.19915, "grad_norm": 3.2359061241149902, "learning_rate": 4.044747474747475e-06, "loss": 6.390080261230469, "step": 19915 }, { "epoch": 0.1992, "grad_norm": 5.553016662597656, "learning_rate": 4.0444949494949495e-06, "loss": 6.3586585998535154, "step": 19920 }, { "epoch": 0.19925, "grad_norm": 3.186434507369995, "learning_rate": 4.044242424242424e-06, "loss": 6.337453079223633, "step": 19925 }, { "epoch": 0.1993, "grad_norm": 3.828582763671875, "learning_rate": 4.0439898989899e-06, "loss": 6.385867309570313, "step": 19930 }, { "epoch": 0.19935, "grad_norm": 6.198828220367432, "learning_rate": 4.043737373737374e-06, "loss": 6.370376968383789, "step": 19935 }, { "epoch": 0.1994, "grad_norm": 7.457594871520996, "learning_rate": 4.043484848484849e-06, "loss": 6.373007202148438, "step": 19940 }, { "epoch": 0.19945, "grad_norm": 4.879168510437012, "learning_rate": 4.0432323232323235e-06, "loss": 6.37254638671875, "step": 19945 }, { "epoch": 0.1995, "grad_norm": 4.50380802154541, "learning_rate": 4.042979797979799e-06, "loss": 6.3638965606689455, "step": 19950 }, { "epoch": 0.19955, "grad_norm": 4.207643032073975, "learning_rate": 4.042727272727273e-06, "loss": 6.405306243896485, "step": 19955 }, { "epoch": 0.1996, "grad_norm": 5.415639877319336, "learning_rate": 4.042474747474747e-06, "loss": 6.446342468261719, "step": 19960 }, { "epoch": 0.19965, "grad_norm": 3.5389747619628906, "learning_rate": 4.042222222222222e-06, "loss": 6.35137710571289, "step": 19965 }, { "epoch": 0.1997, "grad_norm": 6.6378278732299805, "learning_rate": 4.0419696969696975e-06, "loss": 6.288910675048828, "step": 19970 }, { "epoch": 0.19975, "grad_norm": 4.267999172210693, "learning_rate": 4.041717171717172e-06, "loss": 6.3775585174560545, "step": 19975 }, { "epoch": 0.1998, "grad_norm": 3.7011568546295166, "learning_rate": 4.041464646464647e-06, "loss": 6.396281814575195, "step": 19980 }, { "epoch": 0.19985, "grad_norm": 4.070542812347412, "learning_rate": 4.041212121212121e-06, "loss": 6.342377853393555, "step": 19985 }, { "epoch": 0.1999, "grad_norm": 6.719453811645508, "learning_rate": 4.040959595959597e-06, "loss": 6.316285705566406, "step": 19990 }, { "epoch": 0.19995, "grad_norm": 5.616065979003906, "learning_rate": 4.0407070707070715e-06, "loss": 6.407460021972656, "step": 19995 }, { "epoch": 0.2, "grad_norm": 3.8747830390930176, "learning_rate": 4.040454545454546e-06, "loss": 6.351764678955078, "step": 20000 }, { "epoch": 0.20005, "grad_norm": 2.803332567214966, "learning_rate": 4.040202020202021e-06, "loss": 6.347883987426758, "step": 20005 }, { "epoch": 0.2001, "grad_norm": 3.437774181365967, "learning_rate": 4.039949494949495e-06, "loss": 6.421685028076172, "step": 20010 }, { "epoch": 0.20015, "grad_norm": 3.378933906555176, "learning_rate": 4.03969696969697e-06, "loss": 6.340965652465821, "step": 20015 }, { "epoch": 0.2002, "grad_norm": 4.846768379211426, "learning_rate": 4.039444444444445e-06, "loss": 6.299121856689453, "step": 20020 }, { "epoch": 0.20025, "grad_norm": 5.000956058502197, "learning_rate": 4.039191919191919e-06, "loss": 6.35535888671875, "step": 20025 }, { "epoch": 0.2003, "grad_norm": 6.020407676696777, "learning_rate": 4.038939393939395e-06, "loss": 6.394396209716797, "step": 20030 }, { "epoch": 0.20035, "grad_norm": 4.5149006843566895, "learning_rate": 4.038686868686869e-06, "loss": 6.3966926574707035, "step": 20035 }, { "epoch": 0.2004, "grad_norm": 13.636775016784668, "learning_rate": 4.038434343434344e-06, "loss": 6.628173828125, "step": 20040 }, { "epoch": 0.20045, "grad_norm": 4.228687763214111, "learning_rate": 4.038181818181819e-06, "loss": 6.3697154998779295, "step": 20045 }, { "epoch": 0.2005, "grad_norm": 10.238622665405273, "learning_rate": 4.037929292929293e-06, "loss": 6.334245300292968, "step": 20050 }, { "epoch": 0.20055, "grad_norm": 6.0647101402282715, "learning_rate": 4.037676767676768e-06, "loss": 6.407884979248047, "step": 20055 }, { "epoch": 0.2006, "grad_norm": 5.248980522155762, "learning_rate": 4.0374242424242425e-06, "loss": 6.374581909179687, "step": 20060 }, { "epoch": 0.20065, "grad_norm": 4.553674221038818, "learning_rate": 4.037171717171717e-06, "loss": 6.380126953125, "step": 20065 }, { "epoch": 0.2007, "grad_norm": 3.4165122509002686, "learning_rate": 4.036919191919193e-06, "loss": 6.3405517578125, "step": 20070 }, { "epoch": 0.20075, "grad_norm": 4.1999030113220215, "learning_rate": 4.036666666666667e-06, "loss": 6.374715423583984, "step": 20075 }, { "epoch": 0.2008, "grad_norm": 4.286134243011475, "learning_rate": 4.036414141414142e-06, "loss": 6.420569610595703, "step": 20080 }, { "epoch": 0.20085, "grad_norm": 3.9801321029663086, "learning_rate": 4.0361616161616165e-06, "loss": 6.395820999145508, "step": 20085 }, { "epoch": 0.2009, "grad_norm": 4.8740434646606445, "learning_rate": 4.035909090909091e-06, "loss": 6.404183197021484, "step": 20090 }, { "epoch": 0.20095, "grad_norm": 3.2778172492980957, "learning_rate": 4.035656565656566e-06, "loss": 6.384103775024414, "step": 20095 }, { "epoch": 0.201, "grad_norm": 2.1818039417266846, "learning_rate": 4.03540404040404e-06, "loss": 6.385401916503906, "step": 20100 }, { "epoch": 0.20105, "grad_norm": 3.5134024620056152, "learning_rate": 4.035151515151515e-06, "loss": 6.367840194702149, "step": 20105 }, { "epoch": 0.2011, "grad_norm": 4.840574741363525, "learning_rate": 4.0348989898989905e-06, "loss": 6.366757583618164, "step": 20110 }, { "epoch": 0.20115, "grad_norm": 3.6360416412353516, "learning_rate": 4.034646464646465e-06, "loss": 6.30010986328125, "step": 20115 }, { "epoch": 0.2012, "grad_norm": 5.701900959014893, "learning_rate": 4.03439393939394e-06, "loss": 6.348086929321289, "step": 20120 }, { "epoch": 0.20125, "grad_norm": 7.492800235748291, "learning_rate": 4.034141414141414e-06, "loss": 6.406241607666016, "step": 20125 }, { "epoch": 0.2013, "grad_norm": 7.411669731140137, "learning_rate": 4.033888888888889e-06, "loss": 6.348960876464844, "step": 20130 }, { "epoch": 0.20135, "grad_norm": 3.811933755874634, "learning_rate": 4.033636363636364e-06, "loss": 6.345926666259766, "step": 20135 }, { "epoch": 0.2014, "grad_norm": 7.607759475708008, "learning_rate": 4.033383838383838e-06, "loss": 6.404225158691406, "step": 20140 }, { "epoch": 0.20145, "grad_norm": 3.8762452602386475, "learning_rate": 4.033131313131313e-06, "loss": 6.398588562011719, "step": 20145 }, { "epoch": 0.2015, "grad_norm": 5.732476234436035, "learning_rate": 4.032878787878788e-06, "loss": 6.357340240478516, "step": 20150 }, { "epoch": 0.20155, "grad_norm": 5.358293533325195, "learning_rate": 4.032626262626263e-06, "loss": 6.37530288696289, "step": 20155 }, { "epoch": 0.2016, "grad_norm": 5.777172565460205, "learning_rate": 4.032373737373738e-06, "loss": 6.334160995483399, "step": 20160 }, { "epoch": 0.20165, "grad_norm": 2.4954335689544678, "learning_rate": 4.032121212121212e-06, "loss": 6.310306549072266, "step": 20165 }, { "epoch": 0.2017, "grad_norm": 14.948736190795898, "learning_rate": 4.031868686868688e-06, "loss": 6.526329803466797, "step": 20170 }, { "epoch": 0.20175, "grad_norm": 2.2543420791625977, "learning_rate": 4.0316161616161616e-06, "loss": 6.356016159057617, "step": 20175 }, { "epoch": 0.2018, "grad_norm": 17.214555740356445, "learning_rate": 4.031363636363636e-06, "loss": 6.483126068115235, "step": 20180 }, { "epoch": 0.20185, "grad_norm": 3.5849075317382812, "learning_rate": 4.031111111111111e-06, "loss": 6.487813568115234, "step": 20185 }, { "epoch": 0.2019, "grad_norm": 8.417732238769531, "learning_rate": 4.030858585858586e-06, "loss": 6.350529479980469, "step": 20190 }, { "epoch": 0.20195, "grad_norm": 4.588068962097168, "learning_rate": 4.030606060606061e-06, "loss": 6.414650726318359, "step": 20195 }, { "epoch": 0.202, "grad_norm": 2.868237018585205, "learning_rate": 4.0303535353535356e-06, "loss": 6.382372665405273, "step": 20200 }, { "epoch": 0.20205, "grad_norm": 3.537229299545288, "learning_rate": 4.03010101010101e-06, "loss": 6.401815032958984, "step": 20205 }, { "epoch": 0.2021, "grad_norm": 4.665562152862549, "learning_rate": 4.029848484848486e-06, "loss": 6.490205383300781, "step": 20210 }, { "epoch": 0.20215, "grad_norm": 4.411448001861572, "learning_rate": 4.02959595959596e-06, "loss": 6.363483047485351, "step": 20215 }, { "epoch": 0.2022, "grad_norm": 3.324888229370117, "learning_rate": 4.029343434343435e-06, "loss": 6.403836822509765, "step": 20220 }, { "epoch": 0.20225, "grad_norm": 5.32629919052124, "learning_rate": 4.0290909090909096e-06, "loss": 6.378729248046875, "step": 20225 }, { "epoch": 0.2023, "grad_norm": 3.8315112590789795, "learning_rate": 4.028838383838384e-06, "loss": 6.337154388427734, "step": 20230 }, { "epoch": 0.20235, "grad_norm": 2.7585361003875732, "learning_rate": 4.028585858585859e-06, "loss": 6.332876586914063, "step": 20235 }, { "epoch": 0.2024, "grad_norm": 6.2034711837768555, "learning_rate": 4.0283333333333334e-06, "loss": 6.340119171142578, "step": 20240 }, { "epoch": 0.20245, "grad_norm": 4.362227439880371, "learning_rate": 4.028080808080808e-06, "loss": 6.359671020507813, "step": 20245 }, { "epoch": 0.2025, "grad_norm": 4.8785786628723145, "learning_rate": 4.0278282828282836e-06, "loss": 6.333001708984375, "step": 20250 }, { "epoch": 0.20255, "grad_norm": 6.298890590667725, "learning_rate": 4.027575757575758e-06, "loss": 6.3823402404785154, "step": 20255 }, { "epoch": 0.2026, "grad_norm": 4.969520092010498, "learning_rate": 4.027323232323233e-06, "loss": 6.355764007568359, "step": 20260 }, { "epoch": 0.20265, "grad_norm": 3.8611021041870117, "learning_rate": 4.0270707070707074e-06, "loss": 6.346780395507812, "step": 20265 }, { "epoch": 0.2027, "grad_norm": 4.315485000610352, "learning_rate": 4.026818181818182e-06, "loss": 6.356830978393555, "step": 20270 }, { "epoch": 0.20275, "grad_norm": 5.489567756652832, "learning_rate": 4.026565656565657e-06, "loss": 6.3519542694091795, "step": 20275 }, { "epoch": 0.2028, "grad_norm": 4.812333106994629, "learning_rate": 4.026313131313131e-06, "loss": 6.3612548828125, "step": 20280 }, { "epoch": 0.20285, "grad_norm": 4.527482986450195, "learning_rate": 4.026060606060606e-06, "loss": 6.365477752685547, "step": 20285 }, { "epoch": 0.2029, "grad_norm": 2.9685986042022705, "learning_rate": 4.0258080808080814e-06, "loss": 6.36877326965332, "step": 20290 }, { "epoch": 0.20295, "grad_norm": 7.469924449920654, "learning_rate": 4.025555555555556e-06, "loss": 6.312658309936523, "step": 20295 }, { "epoch": 0.203, "grad_norm": 8.20630931854248, "learning_rate": 4.025303030303031e-06, "loss": 6.228873443603516, "step": 20300 }, { "epoch": 0.20305, "grad_norm": 5.82289981842041, "learning_rate": 4.025050505050505e-06, "loss": 6.3263099670410154, "step": 20305 }, { "epoch": 0.2031, "grad_norm": 5.933867454528809, "learning_rate": 4.02479797979798e-06, "loss": 6.446194458007812, "step": 20310 }, { "epoch": 0.20315, "grad_norm": 3.6465070247650146, "learning_rate": 4.024545454545455e-06, "loss": 6.337147521972656, "step": 20315 }, { "epoch": 0.2032, "grad_norm": 5.106374263763428, "learning_rate": 4.024292929292929e-06, "loss": 6.323910522460937, "step": 20320 }, { "epoch": 0.20325, "grad_norm": 6.2929606437683105, "learning_rate": 4.024040404040405e-06, "loss": 6.35522575378418, "step": 20325 }, { "epoch": 0.2033, "grad_norm": 4.971077919006348, "learning_rate": 4.023787878787879e-06, "loss": 6.374831771850586, "step": 20330 }, { "epoch": 0.20335, "grad_norm": 4.0720391273498535, "learning_rate": 4.023535353535354e-06, "loss": 6.363529205322266, "step": 20335 }, { "epoch": 0.2034, "grad_norm": 6.761946201324463, "learning_rate": 4.023282828282829e-06, "loss": 6.328658676147461, "step": 20340 }, { "epoch": 0.20345, "grad_norm": 9.92784595489502, "learning_rate": 4.023030303030303e-06, "loss": 6.362234115600586, "step": 20345 }, { "epoch": 0.2035, "grad_norm": 4.542357921600342, "learning_rate": 4.022777777777778e-06, "loss": 6.447410583496094, "step": 20350 }, { "epoch": 0.20355, "grad_norm": 5.023688316345215, "learning_rate": 4.0225252525252525e-06, "loss": 6.306566619873047, "step": 20355 }, { "epoch": 0.2036, "grad_norm": 10.305927276611328, "learning_rate": 4.022272727272727e-06, "loss": 6.383105087280273, "step": 20360 }, { "epoch": 0.20365, "grad_norm": 2.9185283184051514, "learning_rate": 4.022020202020203e-06, "loss": 6.3423622131347654, "step": 20365 }, { "epoch": 0.2037, "grad_norm": 3.667742967605591, "learning_rate": 4.021767676767677e-06, "loss": 6.3707115173339846, "step": 20370 }, { "epoch": 0.20375, "grad_norm": 8.89310073852539, "learning_rate": 4.021515151515152e-06, "loss": 6.351136779785156, "step": 20375 }, { "epoch": 0.2038, "grad_norm": 3.8383567333221436, "learning_rate": 4.0212626262626265e-06, "loss": 6.357881927490235, "step": 20380 }, { "epoch": 0.20385, "grad_norm": 8.614429473876953, "learning_rate": 4.021010101010102e-06, "loss": 6.362750625610351, "step": 20385 }, { "epoch": 0.2039, "grad_norm": 12.984660148620605, "learning_rate": 4.020757575757577e-06, "loss": 6.574108123779297, "step": 20390 }, { "epoch": 0.20395, "grad_norm": 3.2902913093566895, "learning_rate": 4.020505050505051e-06, "loss": 6.352775192260742, "step": 20395 }, { "epoch": 0.204, "grad_norm": 4.644119739532471, "learning_rate": 4.020252525252525e-06, "loss": 6.380883026123047, "step": 20400 }, { "epoch": 0.20405, "grad_norm": 5.100404262542725, "learning_rate": 4.0200000000000005e-06, "loss": 6.397125244140625, "step": 20405 }, { "epoch": 0.2041, "grad_norm": 4.269204616546631, "learning_rate": 4.019747474747475e-06, "loss": 6.408762359619141, "step": 20410 }, { "epoch": 0.20415, "grad_norm": 31.281578063964844, "learning_rate": 4.01949494949495e-06, "loss": 6.278554534912109, "step": 20415 }, { "epoch": 0.2042, "grad_norm": 3.684269428253174, "learning_rate": 4.019242424242424e-06, "loss": 6.30320930480957, "step": 20420 }, { "epoch": 0.20425, "grad_norm": 6.166539192199707, "learning_rate": 4.0189898989899e-06, "loss": 6.385466766357422, "step": 20425 }, { "epoch": 0.2043, "grad_norm": 3.428785562515259, "learning_rate": 4.0187373737373745e-06, "loss": 6.324388122558593, "step": 20430 }, { "epoch": 0.20435, "grad_norm": 4.514702320098877, "learning_rate": 4.018484848484849e-06, "loss": 6.375190734863281, "step": 20435 }, { "epoch": 0.2044, "grad_norm": 5.615166187286377, "learning_rate": 4.018232323232324e-06, "loss": 6.356307601928711, "step": 20440 }, { "epoch": 0.20445, "grad_norm": 3.3424322605133057, "learning_rate": 4.017979797979798e-06, "loss": 6.345679855346679, "step": 20445 }, { "epoch": 0.2045, "grad_norm": 2.646754264831543, "learning_rate": 4.017727272727273e-06, "loss": 6.355110168457031, "step": 20450 }, { "epoch": 0.20455, "grad_norm": 3.1417641639709473, "learning_rate": 4.017474747474748e-06, "loss": 6.353330993652344, "step": 20455 }, { "epoch": 0.2046, "grad_norm": 9.960881233215332, "learning_rate": 4.017222222222222e-06, "loss": 6.490077209472656, "step": 20460 }, { "epoch": 0.20465, "grad_norm": 4.625771522521973, "learning_rate": 4.016969696969698e-06, "loss": 6.4681541442871096, "step": 20465 }, { "epoch": 0.2047, "grad_norm": 7.6048688888549805, "learning_rate": 4.016717171717172e-06, "loss": 6.440632629394531, "step": 20470 }, { "epoch": 0.20475, "grad_norm": 7.140348434448242, "learning_rate": 4.016464646464647e-06, "loss": 6.376180648803711, "step": 20475 }, { "epoch": 0.2048, "grad_norm": 4.286020278930664, "learning_rate": 4.016212121212122e-06, "loss": 6.402983093261719, "step": 20480 }, { "epoch": 0.20485, "grad_norm": 6.941558361053467, "learning_rate": 4.015959595959596e-06, "loss": 6.3578754425048825, "step": 20485 }, { "epoch": 0.2049, "grad_norm": 13.418527603149414, "learning_rate": 4.015707070707071e-06, "loss": 6.460516357421875, "step": 20490 }, { "epoch": 0.20495, "grad_norm": 3.7380850315093994, "learning_rate": 4.0154545454545455e-06, "loss": 6.342895126342773, "step": 20495 }, { "epoch": 0.205, "grad_norm": 3.571575403213501, "learning_rate": 4.01520202020202e-06, "loss": 6.388998413085938, "step": 20500 }, { "epoch": 0.20505, "grad_norm": 3.692755937576294, "learning_rate": 4.014949494949496e-06, "loss": 6.34915771484375, "step": 20505 }, { "epoch": 0.2051, "grad_norm": 4.794854640960693, "learning_rate": 4.01469696969697e-06, "loss": 6.389421463012695, "step": 20510 }, { "epoch": 0.20515, "grad_norm": 3.288912057876587, "learning_rate": 4.014444444444445e-06, "loss": 6.594066619873047, "step": 20515 }, { "epoch": 0.2052, "grad_norm": 5.0585036277771, "learning_rate": 4.0141919191919195e-06, "loss": 6.355093002319336, "step": 20520 }, { "epoch": 0.20525, "grad_norm": 5.878778457641602, "learning_rate": 4.013939393939394e-06, "loss": 6.610690307617188, "step": 20525 }, { "epoch": 0.2053, "grad_norm": 4.336269378662109, "learning_rate": 4.013686868686869e-06, "loss": 6.3579357147216795, "step": 20530 }, { "epoch": 0.20535, "grad_norm": 4.008432865142822, "learning_rate": 4.013434343434343e-06, "loss": 6.350932693481445, "step": 20535 }, { "epoch": 0.2054, "grad_norm": 5.025798797607422, "learning_rate": 4.013181818181818e-06, "loss": 6.365487670898437, "step": 20540 }, { "epoch": 0.20545, "grad_norm": 4.775993347167969, "learning_rate": 4.0129292929292935e-06, "loss": 6.370899200439453, "step": 20545 }, { "epoch": 0.2055, "grad_norm": 4.619327545166016, "learning_rate": 4.012676767676768e-06, "loss": 6.320161437988281, "step": 20550 }, { "epoch": 0.20555, "grad_norm": 4.6794962882995605, "learning_rate": 4.012424242424243e-06, "loss": 6.3646728515625, "step": 20555 }, { "epoch": 0.2056, "grad_norm": 3.7817881107330322, "learning_rate": 4.012171717171717e-06, "loss": 6.445259094238281, "step": 20560 }, { "epoch": 0.20565, "grad_norm": 5.776644229888916, "learning_rate": 4.011919191919192e-06, "loss": 6.35023193359375, "step": 20565 }, { "epoch": 0.2057, "grad_norm": 14.06558895111084, "learning_rate": 4.011666666666667e-06, "loss": 6.460578918457031, "step": 20570 }, { "epoch": 0.20575, "grad_norm": 4.164096355438232, "learning_rate": 4.011414141414141e-06, "loss": 6.464813232421875, "step": 20575 }, { "epoch": 0.2058, "grad_norm": 4.149917125701904, "learning_rate": 4.011161616161616e-06, "loss": 6.368473052978516, "step": 20580 }, { "epoch": 0.20585, "grad_norm": 3.432687282562256, "learning_rate": 4.010909090909091e-06, "loss": 6.356121826171875, "step": 20585 }, { "epoch": 0.2059, "grad_norm": 5.107146263122559, "learning_rate": 4.010656565656566e-06, "loss": 6.364453887939453, "step": 20590 }, { "epoch": 0.20595, "grad_norm": 3.402611017227173, "learning_rate": 4.010404040404041e-06, "loss": 6.357967376708984, "step": 20595 }, { "epoch": 0.206, "grad_norm": 5.611897945404053, "learning_rate": 4.010151515151515e-06, "loss": 6.358080291748047, "step": 20600 }, { "epoch": 0.20605, "grad_norm": 5.799321174621582, "learning_rate": 4.009898989898991e-06, "loss": 6.3924613952636715, "step": 20605 }, { "epoch": 0.2061, "grad_norm": 4.138247489929199, "learning_rate": 4.009646464646465e-06, "loss": 6.2831378936767575, "step": 20610 }, { "epoch": 0.20615, "grad_norm": 3.312458038330078, "learning_rate": 4.00939393939394e-06, "loss": 6.453696441650391, "step": 20615 }, { "epoch": 0.2062, "grad_norm": 5.111586093902588, "learning_rate": 4.009141414141414e-06, "loss": 6.406465148925781, "step": 20620 }, { "epoch": 0.20625, "grad_norm": 3.0231993198394775, "learning_rate": 4.008888888888889e-06, "loss": 6.41394271850586, "step": 20625 }, { "epoch": 0.2063, "grad_norm": 6.32208776473999, "learning_rate": 4.008636363636364e-06, "loss": 6.4822135925292965, "step": 20630 }, { "epoch": 0.20635, "grad_norm": 3.232923746109009, "learning_rate": 4.0083838383838385e-06, "loss": 6.396792984008789, "step": 20635 }, { "epoch": 0.2064, "grad_norm": 3.0254411697387695, "learning_rate": 4.008131313131313e-06, "loss": 6.348231506347656, "step": 20640 }, { "epoch": 0.20645, "grad_norm": 5.295039653778076, "learning_rate": 4.007878787878789e-06, "loss": 6.398869323730469, "step": 20645 }, { "epoch": 0.2065, "grad_norm": 11.293522834777832, "learning_rate": 4.007626262626263e-06, "loss": 6.548778533935547, "step": 20650 }, { "epoch": 0.20655, "grad_norm": 3.4703831672668457, "learning_rate": 4.007373737373738e-06, "loss": 6.3591064453125, "step": 20655 }, { "epoch": 0.2066, "grad_norm": 4.949450969696045, "learning_rate": 4.0071212121212125e-06, "loss": 6.395159149169922, "step": 20660 }, { "epoch": 0.20665, "grad_norm": 4.732714653015137, "learning_rate": 4.006868686868687e-06, "loss": 6.35709228515625, "step": 20665 }, { "epoch": 0.2067, "grad_norm": 8.116568565368652, "learning_rate": 4.006616161616162e-06, "loss": 6.353506469726563, "step": 20670 }, { "epoch": 0.20675, "grad_norm": 7.027108192443848, "learning_rate": 4.0063636363636364e-06, "loss": 6.3533683776855465, "step": 20675 }, { "epoch": 0.2068, "grad_norm": 5.622602939605713, "learning_rate": 4.006111111111111e-06, "loss": 6.3503257751464846, "step": 20680 }, { "epoch": 0.20685, "grad_norm": 5.282142162322998, "learning_rate": 4.0058585858585865e-06, "loss": 6.377806854248047, "step": 20685 }, { "epoch": 0.2069, "grad_norm": 7.004215240478516, "learning_rate": 4.005606060606061e-06, "loss": 6.185681533813477, "step": 20690 }, { "epoch": 0.20695, "grad_norm": 3.5564122200012207, "learning_rate": 4.005353535353536e-06, "loss": 6.359429168701172, "step": 20695 }, { "epoch": 0.207, "grad_norm": 3.7010040283203125, "learning_rate": 4.0051010101010104e-06, "loss": 6.339554214477539, "step": 20700 }, { "epoch": 0.20705, "grad_norm": 3.790940761566162, "learning_rate": 4.004848484848485e-06, "loss": 6.333852005004883, "step": 20705 }, { "epoch": 0.2071, "grad_norm": 4.1006245613098145, "learning_rate": 4.00459595959596e-06, "loss": 6.365642547607422, "step": 20710 }, { "epoch": 0.20715, "grad_norm": 17.143461227416992, "learning_rate": 4.004343434343434e-06, "loss": 6.3984840393066404, "step": 20715 }, { "epoch": 0.2072, "grad_norm": 6.992534637451172, "learning_rate": 4.00409090909091e-06, "loss": 6.381084823608399, "step": 20720 }, { "epoch": 0.20725, "grad_norm": 4.424422740936279, "learning_rate": 4.0038383838383844e-06, "loss": 6.356184005737305, "step": 20725 }, { "epoch": 0.2073, "grad_norm": 4.875939846038818, "learning_rate": 4.003585858585859e-06, "loss": 6.360240173339844, "step": 20730 }, { "epoch": 0.20735, "grad_norm": 5.140171527862549, "learning_rate": 4.003333333333334e-06, "loss": 6.355035018920899, "step": 20735 }, { "epoch": 0.2074, "grad_norm": 4.237532615661621, "learning_rate": 4.003080808080808e-06, "loss": 6.35460205078125, "step": 20740 }, { "epoch": 0.20745, "grad_norm": 6.2590508460998535, "learning_rate": 4.002828282828283e-06, "loss": 6.412515258789062, "step": 20745 }, { "epoch": 0.2075, "grad_norm": 4.44389533996582, "learning_rate": 4.0025757575757576e-06, "loss": 6.375264739990234, "step": 20750 }, { "epoch": 0.20755, "grad_norm": 3.36201810836792, "learning_rate": 4.002323232323232e-06, "loss": 6.387198638916016, "step": 20755 }, { "epoch": 0.2076, "grad_norm": 4.838787078857422, "learning_rate": 4.002070707070708e-06, "loss": 6.4140174865722654, "step": 20760 }, { "epoch": 0.20765, "grad_norm": 7.249969005584717, "learning_rate": 4.001818181818182e-06, "loss": 6.351432037353516, "step": 20765 }, { "epoch": 0.2077, "grad_norm": 4.546363830566406, "learning_rate": 4.001565656565657e-06, "loss": 6.330135726928711, "step": 20770 }, { "epoch": 0.20775, "grad_norm": 6.03836727142334, "learning_rate": 4.0013131313131316e-06, "loss": 6.317938995361328, "step": 20775 }, { "epoch": 0.2078, "grad_norm": 4.840814590454102, "learning_rate": 4.001060606060607e-06, "loss": 6.373530578613281, "step": 20780 }, { "epoch": 0.20785, "grad_norm": 9.981611251831055, "learning_rate": 4.000808080808081e-06, "loss": 6.296901702880859, "step": 20785 }, { "epoch": 0.2079, "grad_norm": 7.801023960113525, "learning_rate": 4.0005555555555555e-06, "loss": 6.346604537963867, "step": 20790 }, { "epoch": 0.20795, "grad_norm": 6.029979228973389, "learning_rate": 4.00030303030303e-06, "loss": 6.401062774658203, "step": 20795 }, { "epoch": 0.208, "grad_norm": 5.683027744293213, "learning_rate": 4.0000505050505056e-06, "loss": 6.394994735717773, "step": 20800 }, { "epoch": 0.20805, "grad_norm": 3.5526480674743652, "learning_rate": 3.99979797979798e-06, "loss": 6.377469635009765, "step": 20805 }, { "epoch": 0.2081, "grad_norm": 4.418914794921875, "learning_rate": 3.999545454545455e-06, "loss": 6.336770629882812, "step": 20810 }, { "epoch": 0.20815, "grad_norm": 4.853655815124512, "learning_rate": 3.9992929292929295e-06, "loss": 6.460979461669922, "step": 20815 }, { "epoch": 0.2082, "grad_norm": 5.496026039123535, "learning_rate": 3.999040404040405e-06, "loss": 6.354240417480469, "step": 20820 }, { "epoch": 0.20825, "grad_norm": 2.983301877975464, "learning_rate": 3.9987878787878796e-06, "loss": 6.390739440917969, "step": 20825 }, { "epoch": 0.2083, "grad_norm": 16.09366798400879, "learning_rate": 3.998535353535354e-06, "loss": 6.3454235076904295, "step": 20830 }, { "epoch": 0.20835, "grad_norm": 8.245223999023438, "learning_rate": 3.998282828282829e-06, "loss": 6.430421447753906, "step": 20835 }, { "epoch": 0.2084, "grad_norm": 3.684112310409546, "learning_rate": 3.9980303030303035e-06, "loss": 6.33931884765625, "step": 20840 }, { "epoch": 0.20845, "grad_norm": 6.892725467681885, "learning_rate": 3.997777777777778e-06, "loss": 6.399040222167969, "step": 20845 }, { "epoch": 0.2085, "grad_norm": 3.439957618713379, "learning_rate": 3.997525252525253e-06, "loss": 6.343931198120117, "step": 20850 }, { "epoch": 0.20855, "grad_norm": 8.895709037780762, "learning_rate": 3.997272727272727e-06, "loss": 6.446553039550781, "step": 20855 }, { "epoch": 0.2086, "grad_norm": 5.565595626831055, "learning_rate": 3.997020202020203e-06, "loss": 6.476204681396484, "step": 20860 }, { "epoch": 0.20865, "grad_norm": 4.334374904632568, "learning_rate": 3.9967676767676775e-06, "loss": 6.327626037597656, "step": 20865 }, { "epoch": 0.2087, "grad_norm": 5.3817901611328125, "learning_rate": 3.996515151515152e-06, "loss": 6.290274810791016, "step": 20870 }, { "epoch": 0.20875, "grad_norm": 10.791574478149414, "learning_rate": 3.996262626262627e-06, "loss": 6.453623962402344, "step": 20875 }, { "epoch": 0.2088, "grad_norm": 3.3294553756713867, "learning_rate": 3.996010101010101e-06, "loss": 6.382739639282226, "step": 20880 }, { "epoch": 0.20885, "grad_norm": 7.442202091217041, "learning_rate": 3.995757575757576e-06, "loss": 6.355996704101562, "step": 20885 }, { "epoch": 0.2089, "grad_norm": 6.2286553382873535, "learning_rate": 3.995505050505051e-06, "loss": 6.352015686035156, "step": 20890 }, { "epoch": 0.20895, "grad_norm": 4.528999328613281, "learning_rate": 3.995252525252525e-06, "loss": 6.398309326171875, "step": 20895 }, { "epoch": 0.209, "grad_norm": 5.539361476898193, "learning_rate": 3.995000000000001e-06, "loss": 6.379689025878906, "step": 20900 }, { "epoch": 0.20905, "grad_norm": 4.757680416107178, "learning_rate": 3.994747474747475e-06, "loss": 6.489178466796875, "step": 20905 }, { "epoch": 0.2091, "grad_norm": 5.892220497131348, "learning_rate": 3.99449494949495e-06, "loss": 6.356561660766602, "step": 20910 }, { "epoch": 0.20915, "grad_norm": 5.8020524978637695, "learning_rate": 3.994242424242425e-06, "loss": 6.269894409179687, "step": 20915 }, { "epoch": 0.2092, "grad_norm": 6.144820690155029, "learning_rate": 3.993989898989899e-06, "loss": 6.359446716308594, "step": 20920 }, { "epoch": 0.20925, "grad_norm": 6.359478950500488, "learning_rate": 3.993737373737374e-06, "loss": 6.331379318237305, "step": 20925 }, { "epoch": 0.2093, "grad_norm": 6.281612396240234, "learning_rate": 3.9934848484848485e-06, "loss": 6.353636932373047, "step": 20930 }, { "epoch": 0.20935, "grad_norm": 4.308096885681152, "learning_rate": 3.993232323232323e-06, "loss": 6.315085601806641, "step": 20935 }, { "epoch": 0.2094, "grad_norm": 5.336031436920166, "learning_rate": 3.992979797979799e-06, "loss": 6.354276275634765, "step": 20940 }, { "epoch": 0.20945, "grad_norm": 4.012864589691162, "learning_rate": 3.992727272727273e-06, "loss": 6.339645767211914, "step": 20945 }, { "epoch": 0.2095, "grad_norm": 4.652839183807373, "learning_rate": 3.992474747474748e-06, "loss": 6.336920547485351, "step": 20950 }, { "epoch": 0.20955, "grad_norm": 2.7315380573272705, "learning_rate": 3.9922222222222225e-06, "loss": 6.329990005493164, "step": 20955 }, { "epoch": 0.2096, "grad_norm": 11.15515422821045, "learning_rate": 3.991969696969697e-06, "loss": 6.6475685119628904, "step": 20960 }, { "epoch": 0.20965, "grad_norm": 5.343552112579346, "learning_rate": 3.991717171717172e-06, "loss": 6.389681243896485, "step": 20965 }, { "epoch": 0.2097, "grad_norm": 3.3437976837158203, "learning_rate": 3.991464646464646e-06, "loss": 6.283087158203125, "step": 20970 }, { "epoch": 0.20975, "grad_norm": 6.911880970001221, "learning_rate": 3.991212121212121e-06, "loss": 6.3439277648925785, "step": 20975 }, { "epoch": 0.2098, "grad_norm": 5.578760623931885, "learning_rate": 3.9909595959595965e-06, "loss": 6.32133674621582, "step": 20980 }, { "epoch": 0.20985, "grad_norm": 19.15350914001465, "learning_rate": 3.990707070707071e-06, "loss": 6.321911239624024, "step": 20985 }, { "epoch": 0.2099, "grad_norm": 6.631302833557129, "learning_rate": 3.990454545454546e-06, "loss": 6.389309692382812, "step": 20990 }, { "epoch": 0.20995, "grad_norm": 5.406488418579102, "learning_rate": 3.99020202020202e-06, "loss": 6.289642333984375, "step": 20995 }, { "epoch": 0.21, "grad_norm": 5.33192253112793, "learning_rate": 3.989949494949496e-06, "loss": 6.312437438964844, "step": 21000 }, { "epoch": 0.21005, "grad_norm": 4.749044418334961, "learning_rate": 3.9896969696969705e-06, "loss": 6.341854858398437, "step": 21005 }, { "epoch": 0.2101, "grad_norm": 3.360349416732788, "learning_rate": 3.989444444444444e-06, "loss": 6.3814537048339846, "step": 21010 }, { "epoch": 0.21015, "grad_norm": 5.383340358734131, "learning_rate": 3.989191919191919e-06, "loss": 6.349407577514649, "step": 21015 }, { "epoch": 0.2102, "grad_norm": 6.050821781158447, "learning_rate": 3.988939393939394e-06, "loss": 6.314698028564453, "step": 21020 }, { "epoch": 0.21025, "grad_norm": 4.283251762390137, "learning_rate": 3.988686868686869e-06, "loss": 6.39244384765625, "step": 21025 }, { "epoch": 0.2103, "grad_norm": 3.3211472034454346, "learning_rate": 3.988434343434344e-06, "loss": 6.328467941284179, "step": 21030 }, { "epoch": 0.21035, "grad_norm": 5.578979015350342, "learning_rate": 3.988181818181818e-06, "loss": 6.356046295166015, "step": 21035 }, { "epoch": 0.2104, "grad_norm": 2.923556327819824, "learning_rate": 3.987929292929294e-06, "loss": 6.333168029785156, "step": 21040 }, { "epoch": 0.21045, "grad_norm": 3.341574192047119, "learning_rate": 3.987676767676768e-06, "loss": 6.384103775024414, "step": 21045 }, { "epoch": 0.2105, "grad_norm": 4.628855228424072, "learning_rate": 3.987424242424243e-06, "loss": 6.305697631835938, "step": 21050 }, { "epoch": 0.21055, "grad_norm": 8.533914566040039, "learning_rate": 3.987171717171718e-06, "loss": 6.350126647949219, "step": 21055 }, { "epoch": 0.2106, "grad_norm": 4.511482238769531, "learning_rate": 3.986919191919192e-06, "loss": 6.36800537109375, "step": 21060 }, { "epoch": 0.21065, "grad_norm": 4.5658698081970215, "learning_rate": 3.986666666666667e-06, "loss": 6.287645721435547, "step": 21065 }, { "epoch": 0.2107, "grad_norm": 5.8238019943237305, "learning_rate": 3.9864141414141415e-06, "loss": 6.429801940917969, "step": 21070 }, { "epoch": 0.21075, "grad_norm": 4.793154239654541, "learning_rate": 3.986161616161616e-06, "loss": 6.3375404357910154, "step": 21075 }, { "epoch": 0.2108, "grad_norm": 2.6113874912261963, "learning_rate": 3.985909090909092e-06, "loss": 6.347359085083008, "step": 21080 }, { "epoch": 0.21085, "grad_norm": 4.035508155822754, "learning_rate": 3.985656565656566e-06, "loss": 6.327709197998047, "step": 21085 }, { "epoch": 0.2109, "grad_norm": 7.060179233551025, "learning_rate": 3.985404040404041e-06, "loss": 6.395231628417969, "step": 21090 }, { "epoch": 0.21095, "grad_norm": 3.3100943565368652, "learning_rate": 3.9851515151515155e-06, "loss": 6.334197616577148, "step": 21095 }, { "epoch": 0.211, "grad_norm": 3.410588026046753, "learning_rate": 3.98489898989899e-06, "loss": 6.2635650634765625, "step": 21100 }, { "epoch": 0.21105, "grad_norm": 5.326977252960205, "learning_rate": 3.984646464646465e-06, "loss": 6.4241477966308596, "step": 21105 }, { "epoch": 0.2111, "grad_norm": 6.2368998527526855, "learning_rate": 3.984393939393939e-06, "loss": 6.374138641357422, "step": 21110 }, { "epoch": 0.21115, "grad_norm": 3.7779035568237305, "learning_rate": 3.984141414141414e-06, "loss": 6.285033416748047, "step": 21115 }, { "epoch": 0.2112, "grad_norm": 3.6418726444244385, "learning_rate": 3.9838888888888895e-06, "loss": 6.3784027099609375, "step": 21120 }, { "epoch": 0.21125, "grad_norm": 3.3509421348571777, "learning_rate": 3.983636363636364e-06, "loss": 6.317223358154297, "step": 21125 }, { "epoch": 0.2113, "grad_norm": 4.907395362854004, "learning_rate": 3.983383838383839e-06, "loss": 6.384052276611328, "step": 21130 }, { "epoch": 0.21135, "grad_norm": 4.7471137046813965, "learning_rate": 3.983131313131313e-06, "loss": 6.349382400512695, "step": 21135 }, { "epoch": 0.2114, "grad_norm": 4.345952033996582, "learning_rate": 3.982878787878788e-06, "loss": 6.33165397644043, "step": 21140 }, { "epoch": 0.21145, "grad_norm": 4.373905658721924, "learning_rate": 3.982626262626263e-06, "loss": 6.342807006835938, "step": 21145 }, { "epoch": 0.2115, "grad_norm": 5.889057636260986, "learning_rate": 3.982373737373737e-06, "loss": 6.36004638671875, "step": 21150 }, { "epoch": 0.21155, "grad_norm": 3.803243637084961, "learning_rate": 3.982121212121213e-06, "loss": 6.334016036987305, "step": 21155 }, { "epoch": 0.2116, "grad_norm": 20.658977508544922, "learning_rate": 3.981868686868687e-06, "loss": 6.540630340576172, "step": 21160 }, { "epoch": 0.21165, "grad_norm": 4.416229248046875, "learning_rate": 3.981616161616162e-06, "loss": 6.344305419921875, "step": 21165 }, { "epoch": 0.2117, "grad_norm": 4.388610363006592, "learning_rate": 3.981363636363637e-06, "loss": 6.338248443603516, "step": 21170 }, { "epoch": 0.21175, "grad_norm": 3.0095276832580566, "learning_rate": 3.981111111111111e-06, "loss": 6.349266052246094, "step": 21175 }, { "epoch": 0.2118, "grad_norm": 5.305881023406982, "learning_rate": 3.980858585858586e-06, "loss": 6.345028686523437, "step": 21180 }, { "epoch": 0.21185, "grad_norm": 9.09161376953125, "learning_rate": 3.9806060606060606e-06, "loss": 6.310293579101563, "step": 21185 }, { "epoch": 0.2119, "grad_norm": 5.345005512237549, "learning_rate": 3.980353535353535e-06, "loss": 6.351240539550782, "step": 21190 }, { "epoch": 0.21195, "grad_norm": 37.088558197021484, "learning_rate": 3.980101010101011e-06, "loss": 6.596629333496094, "step": 21195 }, { "epoch": 0.212, "grad_norm": 6.0732550621032715, "learning_rate": 3.979848484848485e-06, "loss": 6.331794738769531, "step": 21200 }, { "epoch": 0.21205, "grad_norm": 15.80643367767334, "learning_rate": 3.97959595959596e-06, "loss": 6.3303382873535154, "step": 21205 }, { "epoch": 0.2121, "grad_norm": 4.361663818359375, "learning_rate": 3.9793434343434346e-06, "loss": 6.364749908447266, "step": 21210 }, { "epoch": 0.21215, "grad_norm": 5.119795322418213, "learning_rate": 3.97909090909091e-06, "loss": 6.366403579711914, "step": 21215 }, { "epoch": 0.2122, "grad_norm": 31.1216983795166, "learning_rate": 3.978838383838385e-06, "loss": 6.392498397827149, "step": 21220 }, { "epoch": 0.21225, "grad_norm": 4.654155731201172, "learning_rate": 3.978585858585859e-06, "loss": 6.366376876831055, "step": 21225 }, { "epoch": 0.2123, "grad_norm": 6.365190505981445, "learning_rate": 3.978333333333333e-06, "loss": 6.349500274658203, "step": 21230 }, { "epoch": 0.21235, "grad_norm": 4.107705593109131, "learning_rate": 3.9780808080808086e-06, "loss": 6.391770935058593, "step": 21235 }, { "epoch": 0.2124, "grad_norm": 10.965446472167969, "learning_rate": 3.977828282828283e-06, "loss": 6.451714324951172, "step": 21240 }, { "epoch": 0.21245, "grad_norm": 3.374950408935547, "learning_rate": 3.977575757575758e-06, "loss": 6.316110610961914, "step": 21245 }, { "epoch": 0.2125, "grad_norm": 3.1414036750793457, "learning_rate": 3.9773232323232324e-06, "loss": 6.3915855407714846, "step": 21250 }, { "epoch": 0.21255, "grad_norm": 2.866434335708618, "learning_rate": 3.977070707070708e-06, "loss": 6.324235534667968, "step": 21255 }, { "epoch": 0.2126, "grad_norm": 5.275301456451416, "learning_rate": 3.9768181818181826e-06, "loss": 6.3302154541015625, "step": 21260 }, { "epoch": 0.21265, "grad_norm": 4.489538192749023, "learning_rate": 3.976565656565657e-06, "loss": 6.358934020996093, "step": 21265 }, { "epoch": 0.2127, "grad_norm": 4.967562675476074, "learning_rate": 3.976313131313132e-06, "loss": 6.339309310913086, "step": 21270 }, { "epoch": 0.21275, "grad_norm": 4.33439302444458, "learning_rate": 3.9760606060606064e-06, "loss": 6.3633369445800785, "step": 21275 }, { "epoch": 0.2128, "grad_norm": 4.58602237701416, "learning_rate": 3.975808080808081e-06, "loss": 6.30540771484375, "step": 21280 }, { "epoch": 0.21285, "grad_norm": 4.668452739715576, "learning_rate": 3.975555555555556e-06, "loss": 6.3459617614746096, "step": 21285 }, { "epoch": 0.2129, "grad_norm": 13.259733200073242, "learning_rate": 3.97530303030303e-06, "loss": 6.312184906005859, "step": 21290 }, { "epoch": 0.21295, "grad_norm": 3.6409895420074463, "learning_rate": 3.975050505050506e-06, "loss": 6.281867980957031, "step": 21295 }, { "epoch": 0.213, "grad_norm": 3.5067427158355713, "learning_rate": 3.9747979797979804e-06, "loss": 6.281711578369141, "step": 21300 }, { "epoch": 0.21305, "grad_norm": 5.1656670570373535, "learning_rate": 3.974545454545455e-06, "loss": 6.301529312133789, "step": 21305 }, { "epoch": 0.2131, "grad_norm": 6.2557373046875, "learning_rate": 3.97429292929293e-06, "loss": 6.315029144287109, "step": 21310 }, { "epoch": 0.21315, "grad_norm": 7.3833184242248535, "learning_rate": 3.974040404040404e-06, "loss": 6.347721099853516, "step": 21315 }, { "epoch": 0.2132, "grad_norm": 5.040048599243164, "learning_rate": 3.973787878787879e-06, "loss": 6.342762756347656, "step": 21320 }, { "epoch": 0.21325, "grad_norm": 4.772780418395996, "learning_rate": 3.973535353535354e-06, "loss": 6.391777420043946, "step": 21325 }, { "epoch": 0.2133, "grad_norm": 6.528231143951416, "learning_rate": 3.973282828282828e-06, "loss": 6.336102294921875, "step": 21330 }, { "epoch": 0.21335, "grad_norm": 5.7505784034729, "learning_rate": 3.973030303030304e-06, "loss": 6.341020965576172, "step": 21335 }, { "epoch": 0.2134, "grad_norm": 4.493333339691162, "learning_rate": 3.972777777777778e-06, "loss": 6.353553009033203, "step": 21340 }, { "epoch": 0.21345, "grad_norm": 5.1836395263671875, "learning_rate": 3.972525252525253e-06, "loss": 6.2645820617675785, "step": 21345 }, { "epoch": 0.2135, "grad_norm": 5.564456462860107, "learning_rate": 3.972272727272728e-06, "loss": 6.354582977294922, "step": 21350 }, { "epoch": 0.21355, "grad_norm": 5.747696876525879, "learning_rate": 3.972020202020202e-06, "loss": 6.3443256378173825, "step": 21355 }, { "epoch": 0.2136, "grad_norm": 6.277464389801025, "learning_rate": 3.971767676767677e-06, "loss": 6.380101013183594, "step": 21360 }, { "epoch": 0.21365, "grad_norm": 3.980302095413208, "learning_rate": 3.9715151515151515e-06, "loss": 6.357783508300781, "step": 21365 }, { "epoch": 0.2137, "grad_norm": 9.000571250915527, "learning_rate": 3.971262626262626e-06, "loss": 6.496051788330078, "step": 21370 }, { "epoch": 0.21375, "grad_norm": 8.144983291625977, "learning_rate": 3.971010101010102e-06, "loss": 6.521352386474609, "step": 21375 }, { "epoch": 0.2138, "grad_norm": 4.647714614868164, "learning_rate": 3.970757575757576e-06, "loss": 6.336988067626953, "step": 21380 }, { "epoch": 0.21385, "grad_norm": 4.901347637176514, "learning_rate": 3.970505050505051e-06, "loss": 6.313188934326172, "step": 21385 }, { "epoch": 0.2139, "grad_norm": 3.1035268306732178, "learning_rate": 3.9702525252525255e-06, "loss": 6.459159088134766, "step": 21390 }, { "epoch": 0.21395, "grad_norm": 5.16068172454834, "learning_rate": 3.97e-06, "loss": 6.3476509094238285, "step": 21395 }, { "epoch": 0.214, "grad_norm": 4.311553955078125, "learning_rate": 3.969747474747475e-06, "loss": 6.3172954559326175, "step": 21400 }, { "epoch": 0.21405, "grad_norm": 4.532411098480225, "learning_rate": 3.969494949494949e-06, "loss": 6.3630828857421875, "step": 21405 }, { "epoch": 0.2141, "grad_norm": 4.450646877288818, "learning_rate": 3.969242424242424e-06, "loss": 6.387375259399414, "step": 21410 }, { "epoch": 0.21415, "grad_norm": 4.3361616134643555, "learning_rate": 3.9689898989898995e-06, "loss": 6.3737529754638675, "step": 21415 }, { "epoch": 0.2142, "grad_norm": 3.546706438064575, "learning_rate": 3.968737373737374e-06, "loss": 6.3721263885498045, "step": 21420 }, { "epoch": 0.21425, "grad_norm": 5.346105098724365, "learning_rate": 3.968484848484849e-06, "loss": 6.427745056152344, "step": 21425 }, { "epoch": 0.2143, "grad_norm": 5.1019816398620605, "learning_rate": 3.968232323232323e-06, "loss": 6.345580291748047, "step": 21430 }, { "epoch": 0.21435, "grad_norm": 4.563647747039795, "learning_rate": 3.967979797979799e-06, "loss": 6.340779876708984, "step": 21435 }, { "epoch": 0.2144, "grad_norm": 4.112351894378662, "learning_rate": 3.9677272727272735e-06, "loss": 6.359388732910157, "step": 21440 }, { "epoch": 0.21445, "grad_norm": 5.125707626342773, "learning_rate": 3.967474747474748e-06, "loss": 6.347042083740234, "step": 21445 }, { "epoch": 0.2145, "grad_norm": 7.400912284851074, "learning_rate": 3.967222222222222e-06, "loss": 6.349510192871094, "step": 21450 }, { "epoch": 0.21455, "grad_norm": 6.1745452880859375, "learning_rate": 3.966969696969697e-06, "loss": 6.322837066650391, "step": 21455 }, { "epoch": 0.2146, "grad_norm": 5.7768659591674805, "learning_rate": 3.966717171717172e-06, "loss": 6.3995006561279295, "step": 21460 }, { "epoch": 0.21465, "grad_norm": 6.1235151290893555, "learning_rate": 3.966464646464647e-06, "loss": 6.390949249267578, "step": 21465 }, { "epoch": 0.2147, "grad_norm": 5.278066158294678, "learning_rate": 3.966212121212121e-06, "loss": 6.36290283203125, "step": 21470 }, { "epoch": 0.21475, "grad_norm": 5.278104305267334, "learning_rate": 3.965959595959597e-06, "loss": 6.421596527099609, "step": 21475 }, { "epoch": 0.2148, "grad_norm": 4.818373680114746, "learning_rate": 3.965707070707071e-06, "loss": 6.333554077148437, "step": 21480 }, { "epoch": 0.21485, "grad_norm": 4.123604774475098, "learning_rate": 3.965454545454546e-06, "loss": 6.355207824707032, "step": 21485 }, { "epoch": 0.2149, "grad_norm": 8.12369155883789, "learning_rate": 3.965202020202021e-06, "loss": 6.351161193847656, "step": 21490 }, { "epoch": 0.21495, "grad_norm": 5.29095458984375, "learning_rate": 3.964949494949495e-06, "loss": 6.496044921875, "step": 21495 }, { "epoch": 0.215, "grad_norm": 3.7790210247039795, "learning_rate": 3.96469696969697e-06, "loss": 6.422084045410156, "step": 21500 }, { "epoch": 0.21505, "grad_norm": 6.441680908203125, "learning_rate": 3.9644444444444445e-06, "loss": 6.355801010131836, "step": 21505 }, { "epoch": 0.2151, "grad_norm": 4.648576259613037, "learning_rate": 3.964191919191919e-06, "loss": 6.361141204833984, "step": 21510 }, { "epoch": 0.21515, "grad_norm": 5.097938537597656, "learning_rate": 3.963939393939395e-06, "loss": 6.418809509277343, "step": 21515 }, { "epoch": 0.2152, "grad_norm": 5.896158218383789, "learning_rate": 3.963686868686869e-06, "loss": 6.381727600097657, "step": 21520 }, { "epoch": 0.21525, "grad_norm": 4.103091239929199, "learning_rate": 3.963434343434344e-06, "loss": 6.291159439086914, "step": 21525 }, { "epoch": 0.2153, "grad_norm": 8.13593578338623, "learning_rate": 3.9631818181818185e-06, "loss": 6.123272323608399, "step": 21530 }, { "epoch": 0.21535, "grad_norm": 3.3224587440490723, "learning_rate": 3.962929292929293e-06, "loss": 6.325215911865234, "step": 21535 }, { "epoch": 0.2154, "grad_norm": 16.141931533813477, "learning_rate": 3.962676767676768e-06, "loss": 6.235218811035156, "step": 21540 }, { "epoch": 0.21545, "grad_norm": 4.663811683654785, "learning_rate": 3.962424242424242e-06, "loss": 6.347470092773437, "step": 21545 }, { "epoch": 0.2155, "grad_norm": 5.491455554962158, "learning_rate": 3.962171717171717e-06, "loss": 6.362879943847656, "step": 21550 }, { "epoch": 0.21555, "grad_norm": 4.514288902282715, "learning_rate": 3.9619191919191925e-06, "loss": 6.382190704345703, "step": 21555 }, { "epoch": 0.2156, "grad_norm": 3.900432825088501, "learning_rate": 3.961666666666667e-06, "loss": 6.384289932250977, "step": 21560 }, { "epoch": 0.21565, "grad_norm": 5.21976375579834, "learning_rate": 3.961414141414142e-06, "loss": 6.3376914978027346, "step": 21565 }, { "epoch": 0.2157, "grad_norm": 5.378953456878662, "learning_rate": 3.961161616161616e-06, "loss": 6.309489440917969, "step": 21570 }, { "epoch": 0.21575, "grad_norm": 7.530524730682373, "learning_rate": 3.960909090909091e-06, "loss": 6.465728759765625, "step": 21575 }, { "epoch": 0.2158, "grad_norm": 4.982522964477539, "learning_rate": 3.960656565656566e-06, "loss": 6.321490859985351, "step": 21580 }, { "epoch": 0.21585, "grad_norm": 5.911306858062744, "learning_rate": 3.96040404040404e-06, "loss": 6.364742279052734, "step": 21585 }, { "epoch": 0.2159, "grad_norm": 5.698651313781738, "learning_rate": 3.960151515151516e-06, "loss": 6.372504425048828, "step": 21590 }, { "epoch": 0.21595, "grad_norm": 5.642559051513672, "learning_rate": 3.95989898989899e-06, "loss": 6.358496856689453, "step": 21595 }, { "epoch": 0.216, "grad_norm": 2.855836868286133, "learning_rate": 3.959646464646465e-06, "loss": 6.3617603302001955, "step": 21600 }, { "epoch": 0.21605, "grad_norm": 6.255645751953125, "learning_rate": 3.95939393939394e-06, "loss": 6.3692474365234375, "step": 21605 }, { "epoch": 0.2161, "grad_norm": 4.702322483062744, "learning_rate": 3.959141414141415e-06, "loss": 6.321267318725586, "step": 21610 }, { "epoch": 0.21615, "grad_norm": 4.781519889831543, "learning_rate": 3.958888888888889e-06, "loss": 6.370015335083008, "step": 21615 }, { "epoch": 0.2162, "grad_norm": 5.614774703979492, "learning_rate": 3.9586363636363635e-06, "loss": 6.352112197875977, "step": 21620 }, { "epoch": 0.21625, "grad_norm": 4.693443298339844, "learning_rate": 3.958383838383838e-06, "loss": 6.335024642944336, "step": 21625 }, { "epoch": 0.2163, "grad_norm": 4.996774196624756, "learning_rate": 3.958131313131314e-06, "loss": 6.364309692382813, "step": 21630 }, { "epoch": 0.21635, "grad_norm": 5.610236167907715, "learning_rate": 3.957878787878788e-06, "loss": 6.360979080200195, "step": 21635 }, { "epoch": 0.2164, "grad_norm": 5.365516662597656, "learning_rate": 3.957626262626263e-06, "loss": 6.377675247192383, "step": 21640 }, { "epoch": 0.21645, "grad_norm": 5.070429801940918, "learning_rate": 3.9573737373737375e-06, "loss": 6.314213562011719, "step": 21645 }, { "epoch": 0.2165, "grad_norm": 4.424585819244385, "learning_rate": 3.957121212121213e-06, "loss": 6.372905349731445, "step": 21650 }, { "epoch": 0.21655, "grad_norm": 3.3610172271728516, "learning_rate": 3.956868686868688e-06, "loss": 6.338685607910156, "step": 21655 }, { "epoch": 0.2166, "grad_norm": 5.519661903381348, "learning_rate": 3.956616161616162e-06, "loss": 6.307672500610352, "step": 21660 }, { "epoch": 0.21665, "grad_norm": 4.318310260772705, "learning_rate": 3.956363636363637e-06, "loss": 6.296174240112305, "step": 21665 }, { "epoch": 0.2167, "grad_norm": 3.873854398727417, "learning_rate": 3.9561111111111115e-06, "loss": 6.304802322387696, "step": 21670 }, { "epoch": 0.21675, "grad_norm": 4.453067302703857, "learning_rate": 3.955858585858586e-06, "loss": 6.387702560424804, "step": 21675 }, { "epoch": 0.2168, "grad_norm": 4.276709079742432, "learning_rate": 3.955606060606061e-06, "loss": 6.3565673828125, "step": 21680 }, { "epoch": 0.21685, "grad_norm": 4.498365879058838, "learning_rate": 3.9553535353535354e-06, "loss": 6.357732772827148, "step": 21685 }, { "epoch": 0.2169, "grad_norm": 4.903365612030029, "learning_rate": 3.955101010101011e-06, "loss": 6.337341690063477, "step": 21690 }, { "epoch": 0.21695, "grad_norm": 4.123236656188965, "learning_rate": 3.9548484848484855e-06, "loss": 6.418603515625, "step": 21695 }, { "epoch": 0.217, "grad_norm": 3.1305978298187256, "learning_rate": 3.95459595959596e-06, "loss": 6.3165534973144535, "step": 21700 }, { "epoch": 0.21705, "grad_norm": 3.4849157333374023, "learning_rate": 3.954343434343435e-06, "loss": 6.369656753540039, "step": 21705 }, { "epoch": 0.2171, "grad_norm": 4.606607913970947, "learning_rate": 3.9540909090909094e-06, "loss": 6.303907775878907, "step": 21710 }, { "epoch": 0.21715, "grad_norm": 8.79810905456543, "learning_rate": 3.953838383838384e-06, "loss": 6.346179580688476, "step": 21715 }, { "epoch": 0.2172, "grad_norm": 3.3929765224456787, "learning_rate": 3.953585858585859e-06, "loss": 6.390393829345703, "step": 21720 }, { "epoch": 0.21725, "grad_norm": 4.543918132781982, "learning_rate": 3.953333333333333e-06, "loss": 6.3717185974121096, "step": 21725 }, { "epoch": 0.2173, "grad_norm": 4.410362720489502, "learning_rate": 3.953080808080809e-06, "loss": 6.339332199096679, "step": 21730 }, { "epoch": 0.21735, "grad_norm": 4.107402324676514, "learning_rate": 3.9528282828282834e-06, "loss": 6.317170715332031, "step": 21735 }, { "epoch": 0.2174, "grad_norm": 6.2682719230651855, "learning_rate": 3.952575757575758e-06, "loss": 6.330909729003906, "step": 21740 }, { "epoch": 0.21745, "grad_norm": 5.619136333465576, "learning_rate": 3.952323232323233e-06, "loss": 6.417395782470703, "step": 21745 }, { "epoch": 0.2175, "grad_norm": 9.82800579071045, "learning_rate": 3.952070707070707e-06, "loss": 6.290705871582031, "step": 21750 }, { "epoch": 0.21755, "grad_norm": 4.4177117347717285, "learning_rate": 3.951818181818182e-06, "loss": 6.358160018920898, "step": 21755 }, { "epoch": 0.2176, "grad_norm": 7.399737358093262, "learning_rate": 3.9515656565656566e-06, "loss": 6.373242568969727, "step": 21760 }, { "epoch": 0.21765, "grad_norm": 2.527482509613037, "learning_rate": 3.951313131313131e-06, "loss": 6.389566040039062, "step": 21765 }, { "epoch": 0.2177, "grad_norm": 5.605316638946533, "learning_rate": 3.951060606060607e-06, "loss": 6.503941345214844, "step": 21770 }, { "epoch": 0.21775, "grad_norm": 4.3367228507995605, "learning_rate": 3.950808080808081e-06, "loss": 6.3826953887939455, "step": 21775 }, { "epoch": 0.2178, "grad_norm": 3.7563390731811523, "learning_rate": 3.950555555555556e-06, "loss": 6.340676116943359, "step": 21780 }, { "epoch": 0.21785, "grad_norm": 4.662507057189941, "learning_rate": 3.9503030303030306e-06, "loss": 6.337590026855469, "step": 21785 }, { "epoch": 0.2179, "grad_norm": 6.536571502685547, "learning_rate": 3.950050505050505e-06, "loss": 6.372230911254883, "step": 21790 }, { "epoch": 0.21795, "grad_norm": 3.432849645614624, "learning_rate": 3.94979797979798e-06, "loss": 6.324848556518555, "step": 21795 }, { "epoch": 0.218, "grad_norm": 3.079097032546997, "learning_rate": 3.9495454545454545e-06, "loss": 6.505039215087891, "step": 21800 }, { "epoch": 0.21805, "grad_norm": 3.5670788288116455, "learning_rate": 3.949292929292929e-06, "loss": 6.300986480712891, "step": 21805 }, { "epoch": 0.2181, "grad_norm": 7.068577766418457, "learning_rate": 3.9490404040404046e-06, "loss": 6.358495712280273, "step": 21810 }, { "epoch": 0.21815, "grad_norm": 4.30488395690918, "learning_rate": 3.948787878787879e-06, "loss": 6.372005844116211, "step": 21815 }, { "epoch": 0.2182, "grad_norm": 4.001838684082031, "learning_rate": 3.948535353535354e-06, "loss": 6.344174194335937, "step": 21820 }, { "epoch": 0.21825, "grad_norm": 3.0370726585388184, "learning_rate": 3.9482828282828285e-06, "loss": 6.3144477844238285, "step": 21825 }, { "epoch": 0.2183, "grad_norm": 3.9563684463500977, "learning_rate": 3.948030303030304e-06, "loss": 6.400917816162109, "step": 21830 }, { "epoch": 0.21835, "grad_norm": 3.697495222091675, "learning_rate": 3.9477777777777786e-06, "loss": 6.373700714111328, "step": 21835 }, { "epoch": 0.2184, "grad_norm": 5.052238941192627, "learning_rate": 3.947525252525252e-06, "loss": 6.337027740478516, "step": 21840 }, { "epoch": 0.21845, "grad_norm": 4.299785137176514, "learning_rate": 3.947272727272727e-06, "loss": 6.434446716308594, "step": 21845 }, { "epoch": 0.2185, "grad_norm": 11.646597862243652, "learning_rate": 3.9470202020202025e-06, "loss": 6.341838836669922, "step": 21850 }, { "epoch": 0.21855, "grad_norm": 3.6401522159576416, "learning_rate": 3.946767676767677e-06, "loss": 6.390865707397461, "step": 21855 }, { "epoch": 0.2186, "grad_norm": 10.22292423248291, "learning_rate": 3.946515151515152e-06, "loss": 6.414631652832031, "step": 21860 }, { "epoch": 0.21865, "grad_norm": 3.756352663040161, "learning_rate": 3.946262626262626e-06, "loss": 6.571408081054687, "step": 21865 }, { "epoch": 0.2187, "grad_norm": 4.389672756195068, "learning_rate": 3.946010101010102e-06, "loss": 6.316670989990234, "step": 21870 }, { "epoch": 0.21875, "grad_norm": 3.8163018226623535, "learning_rate": 3.9457575757575765e-06, "loss": 6.334868621826172, "step": 21875 }, { "epoch": 0.2188, "grad_norm": 4.799773693084717, "learning_rate": 3.945505050505051e-06, "loss": 6.318629455566406, "step": 21880 }, { "epoch": 0.21885, "grad_norm": 5.284512042999268, "learning_rate": 3.945252525252526e-06, "loss": 6.286919784545899, "step": 21885 }, { "epoch": 0.2189, "grad_norm": 4.744752883911133, "learning_rate": 3.945e-06, "loss": 6.391831970214843, "step": 21890 }, { "epoch": 0.21895, "grad_norm": 3.9955224990844727, "learning_rate": 3.944747474747475e-06, "loss": 6.3674579620361325, "step": 21895 }, { "epoch": 0.219, "grad_norm": 3.5363123416900635, "learning_rate": 3.94449494949495e-06, "loss": 6.355007934570312, "step": 21900 }, { "epoch": 0.21905, "grad_norm": 6.196945667266846, "learning_rate": 3.944242424242424e-06, "loss": 6.3502952575683596, "step": 21905 }, { "epoch": 0.2191, "grad_norm": 4.365527638583444e-05, "learning_rate": 3.9439898989899e-06, "loss": 1.3522143363952637, "step": 21910 }, { "epoch": 0.21915, "grad_norm": 12.8985013961792, "learning_rate": 3.943737373737374e-06, "loss": 0.9795275688171386, "step": 21915 }, { "epoch": 0.2192, "grad_norm": 6.572193145751953, "learning_rate": 3.943484848484849e-06, "loss": 6.335569763183594, "step": 21920 }, { "epoch": 0.21925, "grad_norm": 4.460758209228516, "learning_rate": 3.943232323232324e-06, "loss": 6.365348434448242, "step": 21925 }, { "epoch": 0.2193, "grad_norm": 5.885843276977539, "learning_rate": 3.942979797979798e-06, "loss": 6.313883972167969, "step": 21930 }, { "epoch": 0.21935, "grad_norm": 7.0637593269348145, "learning_rate": 3.942727272727273e-06, "loss": 6.343839263916015, "step": 21935 }, { "epoch": 0.2194, "grad_norm": 15.866835594177246, "learning_rate": 3.9424747474747475e-06, "loss": 6.554473876953125, "step": 21940 }, { "epoch": 0.21945, "grad_norm": 4.444032192230225, "learning_rate": 3.942222222222222e-06, "loss": 6.316667938232422, "step": 21945 }, { "epoch": 0.2195, "grad_norm": 6.178563117980957, "learning_rate": 3.941969696969698e-06, "loss": 6.258186340332031, "step": 21950 }, { "epoch": 0.21955, "grad_norm": 12.335797309875488, "learning_rate": 3.941717171717172e-06, "loss": 6.482456970214844, "step": 21955 }, { "epoch": 0.2196, "grad_norm": 5.042158126831055, "learning_rate": 3.941464646464647e-06, "loss": 6.338861083984375, "step": 21960 }, { "epoch": 0.21965, "grad_norm": 3.2799739837646484, "learning_rate": 3.9412121212121215e-06, "loss": 6.230805969238281, "step": 21965 }, { "epoch": 0.2197, "grad_norm": 5.143213748931885, "learning_rate": 3.940959595959596e-06, "loss": 6.386198806762695, "step": 21970 }, { "epoch": 0.21975, "grad_norm": 3.7820916175842285, "learning_rate": 3.940707070707071e-06, "loss": 6.333648300170898, "step": 21975 }, { "epoch": 0.2198, "grad_norm": 7.150546073913574, "learning_rate": 3.940454545454545e-06, "loss": 6.333771896362305, "step": 21980 }, { "epoch": 0.21985, "grad_norm": 4.593753337860107, "learning_rate": 3.94020202020202e-06, "loss": 6.342755508422852, "step": 21985 }, { "epoch": 0.2199, "grad_norm": 5.354594707489014, "learning_rate": 3.9399494949494955e-06, "loss": 6.301165008544922, "step": 21990 }, { "epoch": 0.21995, "grad_norm": 4.7729034423828125, "learning_rate": 3.93969696969697e-06, "loss": 6.367475128173828, "step": 21995 }, { "epoch": 0.22, "grad_norm": 9.192155838012695, "learning_rate": 3.939444444444445e-06, "loss": 6.365518188476562, "step": 22000 }, { "epoch": 0.22005, "grad_norm": 5.5374884605407715, "learning_rate": 3.939191919191919e-06, "loss": 6.355663299560547, "step": 22005 }, { "epoch": 0.2201, "grad_norm": 4.4399800300598145, "learning_rate": 3.938939393939394e-06, "loss": 6.3567665100097654, "step": 22010 }, { "epoch": 0.22015, "grad_norm": 3.6572413444519043, "learning_rate": 3.938686868686869e-06, "loss": 6.353284454345703, "step": 22015 }, { "epoch": 0.2202, "grad_norm": 3.180549144744873, "learning_rate": 3.938434343434343e-06, "loss": 6.365865325927734, "step": 22020 }, { "epoch": 0.22025, "grad_norm": 3.33052921295166, "learning_rate": 3.938181818181819e-06, "loss": 6.363699722290039, "step": 22025 }, { "epoch": 0.2203, "grad_norm": 5.216503143310547, "learning_rate": 3.937929292929293e-06, "loss": 6.335125350952149, "step": 22030 }, { "epoch": 0.22035, "grad_norm": 4.603518009185791, "learning_rate": 3.937676767676768e-06, "loss": 6.361663818359375, "step": 22035 }, { "epoch": 0.2204, "grad_norm": 6.61617374420166, "learning_rate": 3.937424242424243e-06, "loss": 6.51595458984375, "step": 22040 }, { "epoch": 0.22045, "grad_norm": 2.6309993267059326, "learning_rate": 3.937171717171718e-06, "loss": 6.329469299316406, "step": 22045 }, { "epoch": 0.2205, "grad_norm": 4.145974159240723, "learning_rate": 3.936919191919193e-06, "loss": 6.37933578491211, "step": 22050 }, { "epoch": 0.22055, "grad_norm": 5.518905162811279, "learning_rate": 3.936666666666667e-06, "loss": 6.348477172851562, "step": 22055 }, { "epoch": 0.2206, "grad_norm": 3.480255365371704, "learning_rate": 3.936414141414141e-06, "loss": 6.327765274047851, "step": 22060 }, { "epoch": 0.22065, "grad_norm": 4.1377081871032715, "learning_rate": 3.936161616161617e-06, "loss": 6.365662384033203, "step": 22065 }, { "epoch": 0.2207, "grad_norm": 11.709415435791016, "learning_rate": 3.935909090909091e-06, "loss": 6.390205764770508, "step": 22070 }, { "epoch": 0.22075, "grad_norm": 4.713285446166992, "learning_rate": 3.935656565656566e-06, "loss": 6.392504119873047, "step": 22075 }, { "epoch": 0.2208, "grad_norm": 9.711366653442383, "learning_rate": 3.9354040404040405e-06, "loss": 6.345866394042969, "step": 22080 }, { "epoch": 0.22085, "grad_norm": 3.541916608810425, "learning_rate": 3.935151515151516e-06, "loss": 6.390850830078125, "step": 22085 }, { "epoch": 0.2209, "grad_norm": 4.560990810394287, "learning_rate": 3.934898989898991e-06, "loss": 6.420188903808594, "step": 22090 }, { "epoch": 0.22095, "grad_norm": 3.9550952911376953, "learning_rate": 3.934646464646465e-06, "loss": 6.3682300567626955, "step": 22095 }, { "epoch": 0.221, "grad_norm": 4.7514238357543945, "learning_rate": 3.93439393939394e-06, "loss": 6.3016101837158205, "step": 22100 }, { "epoch": 0.22105, "grad_norm": 2.9150662422180176, "learning_rate": 3.9341414141414145e-06, "loss": 6.449092102050781, "step": 22105 }, { "epoch": 0.2211, "grad_norm": 6.345834255218506, "learning_rate": 3.933888888888889e-06, "loss": 6.713237762451172, "step": 22110 }, { "epoch": 0.22115, "grad_norm": 5.034000396728516, "learning_rate": 3.933636363636364e-06, "loss": 6.3048858642578125, "step": 22115 }, { "epoch": 0.2212, "grad_norm": 5.093130111694336, "learning_rate": 3.933383838383838e-06, "loss": 6.3440399169921875, "step": 22120 }, { "epoch": 0.22125, "grad_norm": 4.820078372955322, "learning_rate": 3.933131313131314e-06, "loss": 6.350347900390625, "step": 22125 }, { "epoch": 0.2213, "grad_norm": 5.668598175048828, "learning_rate": 3.9328787878787885e-06, "loss": 6.4487152099609375, "step": 22130 }, { "epoch": 0.22135, "grad_norm": 26.93146514892578, "learning_rate": 3.932626262626263e-06, "loss": 6.496022796630859, "step": 22135 }, { "epoch": 0.2214, "grad_norm": 4.091119766235352, "learning_rate": 3.932373737373738e-06, "loss": 6.366105270385742, "step": 22140 }, { "epoch": 0.22145, "grad_norm": 6.68541955947876, "learning_rate": 3.932121212121212e-06, "loss": 6.313965225219727, "step": 22145 }, { "epoch": 0.2215, "grad_norm": 13.055360794067383, "learning_rate": 3.931868686868687e-06, "loss": 6.391211700439453, "step": 22150 }, { "epoch": 0.22155, "grad_norm": 3.9464967250823975, "learning_rate": 3.931616161616162e-06, "loss": 6.369891738891601, "step": 22155 }, { "epoch": 0.2216, "grad_norm": 3.6612648963928223, "learning_rate": 3.931363636363636e-06, "loss": 6.33223876953125, "step": 22160 }, { "epoch": 0.22165, "grad_norm": 4.623621940612793, "learning_rate": 3.931111111111112e-06, "loss": 6.292797470092774, "step": 22165 }, { "epoch": 0.2217, "grad_norm": 3.946295976638794, "learning_rate": 3.930858585858586e-06, "loss": 6.329728317260742, "step": 22170 }, { "epoch": 0.22175, "grad_norm": 5.205915451049805, "learning_rate": 3.930606060606061e-06, "loss": 6.344585418701172, "step": 22175 }, { "epoch": 0.2218, "grad_norm": 3.874110460281372, "learning_rate": 3.930353535353536e-06, "loss": 6.353389739990234, "step": 22180 }, { "epoch": 0.22185, "grad_norm": 6.213982582092285, "learning_rate": 3.93010101010101e-06, "loss": 6.363431930541992, "step": 22185 }, { "epoch": 0.2219, "grad_norm": 5.4450554847717285, "learning_rate": 3.929848484848485e-06, "loss": 6.392853927612305, "step": 22190 }, { "epoch": 0.22195, "grad_norm": 3.41461443901062, "learning_rate": 3.9295959595959596e-06, "loss": 6.359414291381836, "step": 22195 }, { "epoch": 0.222, "grad_norm": 5.682251930236816, "learning_rate": 3.929343434343434e-06, "loss": 6.344573974609375, "step": 22200 }, { "epoch": 0.22205, "grad_norm": 10.852306365966797, "learning_rate": 3.92909090909091e-06, "loss": 6.403154754638672, "step": 22205 }, { "epoch": 0.2221, "grad_norm": 4.6603193283081055, "learning_rate": 3.928838383838384e-06, "loss": 6.3841300964355465, "step": 22210 }, { "epoch": 0.22215, "grad_norm": 4.077589511871338, "learning_rate": 3.928585858585859e-06, "loss": 6.321484756469727, "step": 22215 }, { "epoch": 0.2222, "grad_norm": 8.503129959106445, "learning_rate": 3.9283333333333336e-06, "loss": 6.358746337890625, "step": 22220 }, { "epoch": 0.22225, "grad_norm": 11.96543025970459, "learning_rate": 3.928080808080808e-06, "loss": 6.45408935546875, "step": 22225 }, { "epoch": 0.2223, "grad_norm": 3.565160036087036, "learning_rate": 3.927828282828283e-06, "loss": 6.333126068115234, "step": 22230 }, { "epoch": 0.22235, "grad_norm": 5.02822732925415, "learning_rate": 3.9275757575757574e-06, "loss": 6.354891586303711, "step": 22235 }, { "epoch": 0.2224, "grad_norm": 3.2479867935180664, "learning_rate": 3.927323232323232e-06, "loss": 6.350685501098633, "step": 22240 }, { "epoch": 0.22245, "grad_norm": 2.5334458351135254, "learning_rate": 3.9270707070707076e-06, "loss": 6.362509918212891, "step": 22245 }, { "epoch": 0.2225, "grad_norm": 3.950979232788086, "learning_rate": 3.926818181818182e-06, "loss": 6.371293640136718, "step": 22250 }, { "epoch": 0.22255, "grad_norm": 5.216533660888672, "learning_rate": 3.926565656565657e-06, "loss": 6.43817138671875, "step": 22255 }, { "epoch": 0.2226, "grad_norm": 4.938499450683594, "learning_rate": 3.9263131313131314e-06, "loss": 6.302172088623047, "step": 22260 }, { "epoch": 0.22265, "grad_norm": 4.723968505859375, "learning_rate": 3.926060606060607e-06, "loss": 6.338837432861328, "step": 22265 }, { "epoch": 0.2227, "grad_norm": 14.432202339172363, "learning_rate": 3.9258080808080816e-06, "loss": 6.631285858154297, "step": 22270 }, { "epoch": 0.22275, "grad_norm": 6.60339879989624, "learning_rate": 3.925555555555556e-06, "loss": 6.3137470245361325, "step": 22275 }, { "epoch": 0.2228, "grad_norm": 6.9297943115234375, "learning_rate": 3.925303030303031e-06, "loss": 6.4274749755859375, "step": 22280 }, { "epoch": 0.22285, "grad_norm": 5.100585460662842, "learning_rate": 3.9250505050505054e-06, "loss": 6.385882568359375, "step": 22285 }, { "epoch": 0.2229, "grad_norm": 15.293475151062012, "learning_rate": 3.92479797979798e-06, "loss": 6.425459289550782, "step": 22290 }, { "epoch": 0.22295, "grad_norm": 8.208292961120605, "learning_rate": 3.924545454545455e-06, "loss": 6.391284561157226, "step": 22295 }, { "epoch": 0.223, "grad_norm": 4.862734317779541, "learning_rate": 3.924292929292929e-06, "loss": 6.306989288330078, "step": 22300 }, { "epoch": 0.22305, "grad_norm": 5.783478260040283, "learning_rate": 3.924040404040405e-06, "loss": 6.316993331909179, "step": 22305 }, { "epoch": 0.2231, "grad_norm": 4.817179203033447, "learning_rate": 3.9237878787878794e-06, "loss": 6.282204818725586, "step": 22310 }, { "epoch": 0.22315, "grad_norm": 2.706372022628784, "learning_rate": 3.923535353535354e-06, "loss": 6.314044952392578, "step": 22315 }, { "epoch": 0.2232, "grad_norm": 5.045387268066406, "learning_rate": 3.923282828282829e-06, "loss": 6.320502471923828, "step": 22320 }, { "epoch": 0.22325, "grad_norm": 3.94541335105896, "learning_rate": 3.923030303030303e-06, "loss": 6.355395126342773, "step": 22325 }, { "epoch": 0.2233, "grad_norm": 6.158459186553955, "learning_rate": 3.922777777777778e-06, "loss": 6.473046875, "step": 22330 }, { "epoch": 0.22335, "grad_norm": 9.140213966369629, "learning_rate": 3.922525252525253e-06, "loss": 6.428215789794922, "step": 22335 }, { "epoch": 0.2234, "grad_norm": 3.600584030151367, "learning_rate": 3.922272727272727e-06, "loss": 6.360541152954101, "step": 22340 }, { "epoch": 0.22345, "grad_norm": 5.084117412567139, "learning_rate": 3.922020202020203e-06, "loss": 6.339508819580078, "step": 22345 }, { "epoch": 0.2235, "grad_norm": 9.5404052734375, "learning_rate": 3.921767676767677e-06, "loss": 6.460652923583984, "step": 22350 }, { "epoch": 0.22355, "grad_norm": 4.658763408660889, "learning_rate": 3.921515151515152e-06, "loss": 6.321860122680664, "step": 22355 }, { "epoch": 0.2236, "grad_norm": 27.364280700683594, "learning_rate": 3.921262626262627e-06, "loss": 6.301727294921875, "step": 22360 }, { "epoch": 0.22365, "grad_norm": 5.701543807983398, "learning_rate": 3.921010101010101e-06, "loss": 6.334483337402344, "step": 22365 }, { "epoch": 0.2237, "grad_norm": 5.135466575622559, "learning_rate": 3.920757575757576e-06, "loss": 6.411952972412109, "step": 22370 }, { "epoch": 0.22375, "grad_norm": 3.4898979663848877, "learning_rate": 3.9205050505050505e-06, "loss": 6.334175872802734, "step": 22375 }, { "epoch": 0.2238, "grad_norm": 5.37144136428833, "learning_rate": 3.920252525252525e-06, "loss": 6.317586135864258, "step": 22380 }, { "epoch": 0.22385, "grad_norm": 4.7771077156066895, "learning_rate": 3.920000000000001e-06, "loss": 6.474195861816407, "step": 22385 }, { "epoch": 0.2239, "grad_norm": 5.8470869064331055, "learning_rate": 3.919747474747475e-06, "loss": 6.332202529907226, "step": 22390 }, { "epoch": 0.22395, "grad_norm": 5.772493362426758, "learning_rate": 3.91949494949495e-06, "loss": 6.329004287719727, "step": 22395 }, { "epoch": 0.224, "grad_norm": 4.178285598754883, "learning_rate": 3.9192424242424245e-06, "loss": 6.332929229736328, "step": 22400 }, { "epoch": 0.22405, "grad_norm": 4.7770466804504395, "learning_rate": 3.918989898989899e-06, "loss": 6.367168426513672, "step": 22405 }, { "epoch": 0.2241, "grad_norm": 3.172170639038086, "learning_rate": 3.918737373737374e-06, "loss": 6.4264076232910154, "step": 22410 }, { "epoch": 0.22415, "grad_norm": 3.6349265575408936, "learning_rate": 3.918484848484848e-06, "loss": 6.371656036376953, "step": 22415 }, { "epoch": 0.2242, "grad_norm": 4.508726596832275, "learning_rate": 3.918232323232323e-06, "loss": 6.3616081237792965, "step": 22420 }, { "epoch": 0.22425, "grad_norm": 3.959306001663208, "learning_rate": 3.9179797979797985e-06, "loss": 6.3989513397216795, "step": 22425 }, { "epoch": 0.2243, "grad_norm": 3.8372223377227783, "learning_rate": 3.917727272727273e-06, "loss": 6.335467529296875, "step": 22430 }, { "epoch": 0.22435, "grad_norm": 5.9017133712768555, "learning_rate": 3.917474747474748e-06, "loss": 6.396006774902344, "step": 22435 }, { "epoch": 0.2244, "grad_norm": 2.8308753967285156, "learning_rate": 3.917222222222223e-06, "loss": 6.4979301452636715, "step": 22440 }, { "epoch": 0.22445, "grad_norm": 6.826470851898193, "learning_rate": 3.916969696969698e-06, "loss": 6.328047180175782, "step": 22445 }, { "epoch": 0.2245, "grad_norm": 7.748316287994385, "learning_rate": 3.916717171717172e-06, "loss": 6.526156616210938, "step": 22450 }, { "epoch": 0.22455, "grad_norm": 3.821187734603882, "learning_rate": 3.916464646464646e-06, "loss": 6.505616760253906, "step": 22455 }, { "epoch": 0.2246, "grad_norm": 4.951218605041504, "learning_rate": 3.916212121212122e-06, "loss": 6.342050933837891, "step": 22460 }, { "epoch": 0.22465, "grad_norm": 6.786853790283203, "learning_rate": 3.915959595959596e-06, "loss": 6.360301208496094, "step": 22465 }, { "epoch": 0.2247, "grad_norm": 3.502075672149658, "learning_rate": 3.915707070707071e-06, "loss": 6.338533782958985, "step": 22470 }, { "epoch": 0.22475, "grad_norm": 6.989213466644287, "learning_rate": 3.915454545454546e-06, "loss": 6.388299179077149, "step": 22475 }, { "epoch": 0.2248, "grad_norm": 5.7881669998168945, "learning_rate": 3.915202020202021e-06, "loss": 6.336063385009766, "step": 22480 }, { "epoch": 0.22485, "grad_norm": 5.1808624267578125, "learning_rate": 3.914949494949496e-06, "loss": 6.332523345947266, "step": 22485 }, { "epoch": 0.2249, "grad_norm": 7.353672504425049, "learning_rate": 3.91469696969697e-06, "loss": 6.344490814208984, "step": 22490 }, { "epoch": 0.22495, "grad_norm": 6.273380279541016, "learning_rate": 3.914444444444445e-06, "loss": 6.404228210449219, "step": 22495 }, { "epoch": 0.225, "grad_norm": 4.358814716339111, "learning_rate": 3.91419191919192e-06, "loss": 6.399936294555664, "step": 22500 }, { "epoch": 0.22505, "grad_norm": 4.169908046722412, "learning_rate": 3.913939393939394e-06, "loss": 6.322358703613281, "step": 22505 }, { "epoch": 0.2251, "grad_norm": 4.44150447845459, "learning_rate": 3.913686868686869e-06, "loss": 6.327580261230469, "step": 22510 }, { "epoch": 0.22515, "grad_norm": 3.8741261959075928, "learning_rate": 3.9134343434343435e-06, "loss": 6.384078598022461, "step": 22515 }, { "epoch": 0.2252, "grad_norm": 5.4641547203063965, "learning_rate": 3.913181818181819e-06, "loss": 6.403395080566407, "step": 22520 }, { "epoch": 0.22525, "grad_norm": 6.242650508880615, "learning_rate": 3.912929292929294e-06, "loss": 6.325046539306641, "step": 22525 }, { "epoch": 0.2253, "grad_norm": 4.318515300750732, "learning_rate": 3.912676767676768e-06, "loss": 6.353705215454101, "step": 22530 }, { "epoch": 0.22535, "grad_norm": 3.7297794818878174, "learning_rate": 3.912424242424243e-06, "loss": 6.382925033569336, "step": 22535 }, { "epoch": 0.2254, "grad_norm": 7.640115261077881, "learning_rate": 3.9121717171717175e-06, "loss": 6.381857299804688, "step": 22540 }, { "epoch": 0.22545, "grad_norm": 6.061130046844482, "learning_rate": 3.911919191919192e-06, "loss": 6.382945251464844, "step": 22545 }, { "epoch": 0.2255, "grad_norm": 2.7385566234588623, "learning_rate": 3.911666666666667e-06, "loss": 6.378577423095703, "step": 22550 }, { "epoch": 0.22555, "grad_norm": 4.775401592254639, "learning_rate": 3.911414141414141e-06, "loss": 6.337713623046875, "step": 22555 }, { "epoch": 0.2256, "grad_norm": 5.491299629211426, "learning_rate": 3.911161616161617e-06, "loss": 6.328542327880859, "step": 22560 }, { "epoch": 0.22565, "grad_norm": 4.745079040527344, "learning_rate": 3.9109090909090915e-06, "loss": 6.29979248046875, "step": 22565 }, { "epoch": 0.2257, "grad_norm": 2.9135758876800537, "learning_rate": 3.910656565656566e-06, "loss": 6.47158203125, "step": 22570 }, { "epoch": 0.22575, "grad_norm": 5.584052085876465, "learning_rate": 3.910404040404041e-06, "loss": 6.331560516357422, "step": 22575 }, { "epoch": 0.2258, "grad_norm": 9.276415824890137, "learning_rate": 3.910151515151515e-06, "loss": 6.337969970703125, "step": 22580 }, { "epoch": 0.22585, "grad_norm": 6.934063911437988, "learning_rate": 3.90989898989899e-06, "loss": 6.32188835144043, "step": 22585 }, { "epoch": 0.2259, "grad_norm": 16.917560577392578, "learning_rate": 3.909646464646465e-06, "loss": 6.477536010742187, "step": 22590 }, { "epoch": 0.22595, "grad_norm": 4.670986652374268, "learning_rate": 3.909393939393939e-06, "loss": 6.445035552978515, "step": 22595 }, { "epoch": 0.226, "grad_norm": 4.802990913391113, "learning_rate": 3.909141414141415e-06, "loss": 6.308481979370117, "step": 22600 }, { "epoch": 0.22605, "grad_norm": 5.691634654998779, "learning_rate": 3.908888888888889e-06, "loss": 6.357482528686523, "step": 22605 }, { "epoch": 0.2261, "grad_norm": 13.684615135192871, "learning_rate": 3.908636363636364e-06, "loss": 6.368569183349609, "step": 22610 }, { "epoch": 0.22615, "grad_norm": 3.902438163757324, "learning_rate": 3.908383838383839e-06, "loss": 6.335686492919922, "step": 22615 }, { "epoch": 0.2262, "grad_norm": 7.399766445159912, "learning_rate": 3.908131313131313e-06, "loss": 6.338708877563477, "step": 22620 }, { "epoch": 0.22625, "grad_norm": 8.710539817810059, "learning_rate": 3.907878787878788e-06, "loss": 6.363970947265625, "step": 22625 }, { "epoch": 0.2263, "grad_norm": 2.6321825981140137, "learning_rate": 3.9076262626262625e-06, "loss": 6.345091247558594, "step": 22630 }, { "epoch": 0.22635, "grad_norm": 4.621089458465576, "learning_rate": 3.907373737373737e-06, "loss": 6.372050476074219, "step": 22635 }, { "epoch": 0.2264, "grad_norm": 2.6601388454437256, "learning_rate": 3.907121212121213e-06, "loss": 6.359982299804687, "step": 22640 }, { "epoch": 0.22645, "grad_norm": 4.208619594573975, "learning_rate": 3.906868686868687e-06, "loss": 6.28399887084961, "step": 22645 }, { "epoch": 0.2265, "grad_norm": 2.9859957695007324, "learning_rate": 3.906616161616162e-06, "loss": 6.322683715820313, "step": 22650 }, { "epoch": 0.22655, "grad_norm": 3.646662950515747, "learning_rate": 3.9063636363636365e-06, "loss": 6.538740539550782, "step": 22655 }, { "epoch": 0.2266, "grad_norm": 7.204134941101074, "learning_rate": 3.906111111111112e-06, "loss": 6.370079040527344, "step": 22660 }, { "epoch": 0.22665, "grad_norm": 6.763442039489746, "learning_rate": 3.905858585858587e-06, "loss": 6.3298896789550785, "step": 22665 }, { "epoch": 0.2267, "grad_norm": 3.2383322715759277, "learning_rate": 3.9056060606060604e-06, "loss": 6.280295944213867, "step": 22670 }, { "epoch": 0.22675, "grad_norm": 6.440304756164551, "learning_rate": 3.905353535353535e-06, "loss": 6.324994659423828, "step": 22675 }, { "epoch": 0.2268, "grad_norm": 6.234589576721191, "learning_rate": 3.9051010101010105e-06, "loss": 6.2860454559326175, "step": 22680 }, { "epoch": 0.22685, "grad_norm": 3.487361192703247, "learning_rate": 3.904848484848485e-06, "loss": 6.372900390625, "step": 22685 }, { "epoch": 0.2269, "grad_norm": 5.023828983306885, "learning_rate": 3.90459595959596e-06, "loss": 6.3307750701904295, "step": 22690 }, { "epoch": 0.22695, "grad_norm": 4.465714454650879, "learning_rate": 3.9043434343434344e-06, "loss": 6.3260444641113285, "step": 22695 }, { "epoch": 0.227, "grad_norm": 5.33807897567749, "learning_rate": 3.90409090909091e-06, "loss": 6.332941055297852, "step": 22700 }, { "epoch": 0.22705, "grad_norm": 4.581475734710693, "learning_rate": 3.9038383838383845e-06, "loss": 6.276580810546875, "step": 22705 }, { "epoch": 0.2271, "grad_norm": 9.838051795959473, "learning_rate": 3.903585858585859e-06, "loss": 6.3844352722167965, "step": 22710 }, { "epoch": 0.22715, "grad_norm": 4.7370500564575195, "learning_rate": 3.903333333333334e-06, "loss": 6.332559204101562, "step": 22715 }, { "epoch": 0.2272, "grad_norm": 4.323394775390625, "learning_rate": 3.9030808080808084e-06, "loss": 6.337944412231446, "step": 22720 }, { "epoch": 0.22725, "grad_norm": 12.361985206604004, "learning_rate": 3.902828282828283e-06, "loss": 6.329248046875, "step": 22725 }, { "epoch": 0.2273, "grad_norm": 4.016077995300293, "learning_rate": 3.902575757575758e-06, "loss": 6.310000228881836, "step": 22730 }, { "epoch": 0.22735, "grad_norm": 10.174169540405273, "learning_rate": 3.902323232323232e-06, "loss": 6.300182723999024, "step": 22735 }, { "epoch": 0.2274, "grad_norm": 4.72421932220459, "learning_rate": 3.902070707070708e-06, "loss": 6.322100067138672, "step": 22740 }, { "epoch": 0.22745, "grad_norm": 3.681933641433716, "learning_rate": 3.901818181818182e-06, "loss": 6.307403945922852, "step": 22745 }, { "epoch": 0.2275, "grad_norm": 9.383283615112305, "learning_rate": 3.901565656565657e-06, "loss": 6.3398185729980465, "step": 22750 }, { "epoch": 0.22755, "grad_norm": 4.995811939239502, "learning_rate": 3.901313131313132e-06, "loss": 6.2936443328857425, "step": 22755 }, { "epoch": 0.2276, "grad_norm": 3.3530404567718506, "learning_rate": 3.901060606060606e-06, "loss": 6.347632598876953, "step": 22760 }, { "epoch": 0.22765, "grad_norm": 3.182511329650879, "learning_rate": 3.900808080808081e-06, "loss": 6.311480331420898, "step": 22765 }, { "epoch": 0.2277, "grad_norm": 9.345281600952148, "learning_rate": 3.9005555555555556e-06, "loss": 6.402029418945313, "step": 22770 }, { "epoch": 0.22775, "grad_norm": 8.018506050109863, "learning_rate": 3.90030303030303e-06, "loss": 6.410591888427734, "step": 22775 }, { "epoch": 0.2278, "grad_norm": 4.074703693389893, "learning_rate": 3.900050505050506e-06, "loss": 6.2636150360107425, "step": 22780 }, { "epoch": 0.22785, "grad_norm": 4.909788608551025, "learning_rate": 3.89979797979798e-06, "loss": 6.382930755615234, "step": 22785 }, { "epoch": 0.2279, "grad_norm": 5.080381393432617, "learning_rate": 3.899545454545455e-06, "loss": 6.358546447753906, "step": 22790 }, { "epoch": 0.22795, "grad_norm": 4.097197532653809, "learning_rate": 3.8992929292929296e-06, "loss": 6.312070083618164, "step": 22795 }, { "epoch": 0.228, "grad_norm": 25.735511779785156, "learning_rate": 3.899040404040404e-06, "loss": 6.575778198242188, "step": 22800 }, { "epoch": 0.22805, "grad_norm": 4.943105220794678, "learning_rate": 3.898787878787879e-06, "loss": 6.38532829284668, "step": 22805 }, { "epoch": 0.2281, "grad_norm": 7.563329696655273, "learning_rate": 3.8985353535353535e-06, "loss": 6.417387390136719, "step": 22810 }, { "epoch": 0.22815, "grad_norm": 6.726633548736572, "learning_rate": 3.898282828282828e-06, "loss": 6.3709667205810545, "step": 22815 }, { "epoch": 0.2282, "grad_norm": 5.321633815765381, "learning_rate": 3.8980303030303036e-06, "loss": 6.300463104248047, "step": 22820 }, { "epoch": 0.22825, "grad_norm": 3.257067918777466, "learning_rate": 3.897777777777778e-06, "loss": 6.320222091674805, "step": 22825 }, { "epoch": 0.2283, "grad_norm": 4.2850165367126465, "learning_rate": 3.897525252525253e-06, "loss": 6.336700439453125, "step": 22830 }, { "epoch": 0.22835, "grad_norm": 4.84731388092041, "learning_rate": 3.8972727272727275e-06, "loss": 6.380000305175781, "step": 22835 }, { "epoch": 0.2284, "grad_norm": 3.038480520248413, "learning_rate": 3.897020202020202e-06, "loss": 6.339151763916016, "step": 22840 }, { "epoch": 0.22845, "grad_norm": 5.033095836639404, "learning_rate": 3.896767676767677e-06, "loss": 6.336574554443359, "step": 22845 }, { "epoch": 0.2285, "grad_norm": 4.695345401763916, "learning_rate": 3.896515151515151e-06, "loss": 6.325368499755859, "step": 22850 }, { "epoch": 0.22855, "grad_norm": 5.818275451660156, "learning_rate": 3.896262626262627e-06, "loss": 6.3680274963378904, "step": 22855 }, { "epoch": 0.2286, "grad_norm": 6.599861145019531, "learning_rate": 3.8960101010101015e-06, "loss": 6.329372787475586, "step": 22860 }, { "epoch": 0.22865, "grad_norm": 4.976995944976807, "learning_rate": 3.895757575757576e-06, "loss": 6.327268981933594, "step": 22865 }, { "epoch": 0.2287, "grad_norm": 5.749120235443115, "learning_rate": 3.895505050505051e-06, "loss": 6.342090606689453, "step": 22870 }, { "epoch": 0.22875, "grad_norm": 5.105892658233643, "learning_rate": 3.895252525252526e-06, "loss": 6.324028015136719, "step": 22875 }, { "epoch": 0.2288, "grad_norm": 3.765927791595459, "learning_rate": 3.895000000000001e-06, "loss": 6.336331558227539, "step": 22880 }, { "epoch": 0.22885, "grad_norm": 3.19744610786438, "learning_rate": 3.8947474747474755e-06, "loss": 6.343682098388672, "step": 22885 }, { "epoch": 0.2289, "grad_norm": 4.750840187072754, "learning_rate": 3.89449494949495e-06, "loss": 6.320813369750977, "step": 22890 }, { "epoch": 0.22895, "grad_norm": 5.255784511566162, "learning_rate": 3.894242424242425e-06, "loss": 6.3571525573730465, "step": 22895 }, { "epoch": 0.229, "grad_norm": 4.399352073669434, "learning_rate": 3.893989898989899e-06, "loss": 6.308528137207031, "step": 22900 }, { "epoch": 0.22905, "grad_norm": 8.322492599487305, "learning_rate": 3.893737373737374e-06, "loss": 6.354090881347656, "step": 22905 }, { "epoch": 0.2291, "grad_norm": 6.842194080352783, "learning_rate": 3.893484848484849e-06, "loss": 6.3514972686767575, "step": 22910 }, { "epoch": 0.22915, "grad_norm": 3.7356793880462646, "learning_rate": 3.893232323232324e-06, "loss": 6.336241912841797, "step": 22915 }, { "epoch": 0.2292, "grad_norm": 4.927994251251221, "learning_rate": 3.892979797979799e-06, "loss": 6.5726371765136715, "step": 22920 }, { "epoch": 0.22925, "grad_norm": 5.46537971496582, "learning_rate": 3.892727272727273e-06, "loss": 6.291214370727539, "step": 22925 }, { "epoch": 0.2293, "grad_norm": 4.191037654876709, "learning_rate": 3.892474747474748e-06, "loss": 6.370200729370117, "step": 22930 }, { "epoch": 0.22935, "grad_norm": 5.507706642150879, "learning_rate": 3.892222222222223e-06, "loss": 6.346655654907226, "step": 22935 }, { "epoch": 0.2294, "grad_norm": 5.677035808563232, "learning_rate": 3.891969696969697e-06, "loss": 6.369369125366211, "step": 22940 }, { "epoch": 0.22945, "grad_norm": 5.0656256675720215, "learning_rate": 3.891717171717172e-06, "loss": 6.343718719482422, "step": 22945 }, { "epoch": 0.2295, "grad_norm": 5.381255149841309, "learning_rate": 3.8914646464646465e-06, "loss": 6.34277229309082, "step": 22950 }, { "epoch": 0.22955, "grad_norm": 7.631879806518555, "learning_rate": 3.891212121212122e-06, "loss": 6.376353073120117, "step": 22955 }, { "epoch": 0.2296, "grad_norm": 19.41536521911621, "learning_rate": 3.890959595959597e-06, "loss": 6.432665252685547, "step": 22960 }, { "epoch": 0.22965, "grad_norm": 6.49357271194458, "learning_rate": 3.890707070707071e-06, "loss": 6.338615417480469, "step": 22965 }, { "epoch": 0.2297, "grad_norm": 8.45300579071045, "learning_rate": 3.890454545454546e-06, "loss": 6.343662261962891, "step": 22970 }, { "epoch": 0.22975, "grad_norm": 6.372900009155273, "learning_rate": 3.8902020202020205e-06, "loss": 6.298488616943359, "step": 22975 }, { "epoch": 0.2298, "grad_norm": 8.20447826385498, "learning_rate": 3.889949494949495e-06, "loss": 6.365265655517578, "step": 22980 }, { "epoch": 0.22985, "grad_norm": 3.094377040863037, "learning_rate": 3.88969696969697e-06, "loss": 6.345206069946289, "step": 22985 }, { "epoch": 0.2299, "grad_norm": 3.126749277114868, "learning_rate": 3.889444444444444e-06, "loss": 6.2982830047607425, "step": 22990 }, { "epoch": 0.22995, "grad_norm": 4.843676567077637, "learning_rate": 3.88919191919192e-06, "loss": 6.376802825927735, "step": 22995 }, { "epoch": 0.23, "grad_norm": 5.38779354095459, "learning_rate": 3.8889393939393945e-06, "loss": 6.32762451171875, "step": 23000 }, { "epoch": 0.23005, "grad_norm": 4.954679489135742, "learning_rate": 3.888686868686869e-06, "loss": 6.351786804199219, "step": 23005 }, { "epoch": 0.2301, "grad_norm": 2.945500612258911, "learning_rate": 3.888434343434344e-06, "loss": 6.312575149536133, "step": 23010 }, { "epoch": 0.23015, "grad_norm": 6.137492656707764, "learning_rate": 3.888181818181818e-06, "loss": 6.285461807250977, "step": 23015 }, { "epoch": 0.2302, "grad_norm": 7.3395209312438965, "learning_rate": 3.887929292929293e-06, "loss": 6.320472717285156, "step": 23020 }, { "epoch": 0.23025, "grad_norm": 6.096472263336182, "learning_rate": 3.887676767676768e-06, "loss": 6.364611053466797, "step": 23025 }, { "epoch": 0.2303, "grad_norm": 4.10763692855835, "learning_rate": 3.887424242424242e-06, "loss": 6.268888854980469, "step": 23030 }, { "epoch": 0.23035, "grad_norm": 3.639521837234497, "learning_rate": 3.887171717171718e-06, "loss": 6.3149566650390625, "step": 23035 }, { "epoch": 0.2304, "grad_norm": 4.735448837280273, "learning_rate": 3.886919191919192e-06, "loss": 6.320700454711914, "step": 23040 }, { "epoch": 0.23045, "grad_norm": 2.874537944793701, "learning_rate": 3.886666666666667e-06, "loss": 6.319160461425781, "step": 23045 }, { "epoch": 0.2305, "grad_norm": 8.195582389831543, "learning_rate": 3.886414141414142e-06, "loss": 6.325889205932617, "step": 23050 }, { "epoch": 0.23055, "grad_norm": 10.065967559814453, "learning_rate": 3.886161616161617e-06, "loss": 6.303153991699219, "step": 23055 }, { "epoch": 0.2306, "grad_norm": 6.2665276527404785, "learning_rate": 3.885909090909091e-06, "loss": 6.323134231567383, "step": 23060 }, { "epoch": 0.23065, "grad_norm": 3.270631790161133, "learning_rate": 3.8856565656565655e-06, "loss": 6.33084716796875, "step": 23065 }, { "epoch": 0.2307, "grad_norm": 24.19078254699707, "learning_rate": 3.88540404040404e-06, "loss": 6.472919464111328, "step": 23070 }, { "epoch": 0.23075, "grad_norm": 5.563656806945801, "learning_rate": 3.885151515151516e-06, "loss": 6.285568618774414, "step": 23075 }, { "epoch": 0.2308, "grad_norm": 6.1202311515808105, "learning_rate": 3.88489898989899e-06, "loss": 6.326751327514648, "step": 23080 }, { "epoch": 0.23085, "grad_norm": 8.782366752624512, "learning_rate": 3.884646464646465e-06, "loss": 6.57257308959961, "step": 23085 }, { "epoch": 0.2309, "grad_norm": 3.759363889694214, "learning_rate": 3.8843939393939395e-06, "loss": 6.325263214111328, "step": 23090 }, { "epoch": 0.23095, "grad_norm": 3.3130881786346436, "learning_rate": 3.884141414141415e-06, "loss": 6.329995727539062, "step": 23095 }, { "epoch": 0.231, "grad_norm": 8.368171691894531, "learning_rate": 3.88388888888889e-06, "loss": 6.336648178100586, "step": 23100 }, { "epoch": 0.23105, "grad_norm": 4.715917110443115, "learning_rate": 3.883636363636364e-06, "loss": 6.364529418945312, "step": 23105 }, { "epoch": 0.2311, "grad_norm": 4.715631008148193, "learning_rate": 3.883383838383839e-06, "loss": 6.338137435913086, "step": 23110 }, { "epoch": 0.23115, "grad_norm": 4.896527290344238, "learning_rate": 3.8831313131313135e-06, "loss": 6.332489776611328, "step": 23115 }, { "epoch": 0.2312, "grad_norm": 5.281552791595459, "learning_rate": 3.882878787878788e-06, "loss": 6.310781860351563, "step": 23120 }, { "epoch": 0.23125, "grad_norm": 4.807526588439941, "learning_rate": 3.882626262626263e-06, "loss": 6.312370681762696, "step": 23125 }, { "epoch": 0.2313, "grad_norm": 4.429936408996582, "learning_rate": 3.882373737373737e-06, "loss": 6.3045494079589846, "step": 23130 }, { "epoch": 0.23135, "grad_norm": 4.234960079193115, "learning_rate": 3.882121212121213e-06, "loss": 6.347998809814453, "step": 23135 }, { "epoch": 0.2314, "grad_norm": 4.947700023651123, "learning_rate": 3.8818686868686875e-06, "loss": 6.345174789428711, "step": 23140 }, { "epoch": 0.23145, "grad_norm": 4.114682674407959, "learning_rate": 3.881616161616162e-06, "loss": 6.354502487182617, "step": 23145 }, { "epoch": 0.2315, "grad_norm": 5.636453151702881, "learning_rate": 3.881363636363637e-06, "loss": 7.966081237792968, "step": 23150 }, { "epoch": 0.23155, "grad_norm": 4.054574012756348, "learning_rate": 3.881111111111111e-06, "loss": 6.342223358154297, "step": 23155 }, { "epoch": 0.2316, "grad_norm": 4.726178169250488, "learning_rate": 3.880858585858586e-06, "loss": 6.240601348876953, "step": 23160 }, { "epoch": 0.23165, "grad_norm": 3.2076096534729004, "learning_rate": 3.880606060606061e-06, "loss": 6.360503387451172, "step": 23165 }, { "epoch": 0.2317, "grad_norm": 5.214802265167236, "learning_rate": 3.880353535353535e-06, "loss": 6.375963592529297, "step": 23170 }, { "epoch": 0.23175, "grad_norm": 2.5959393978118896, "learning_rate": 3.880101010101011e-06, "loss": 6.337738800048828, "step": 23175 }, { "epoch": 0.2318, "grad_norm": 6.595320701599121, "learning_rate": 3.879848484848485e-06, "loss": 6.507447052001953, "step": 23180 }, { "epoch": 0.23185, "grad_norm": 6.693985462188721, "learning_rate": 3.87959595959596e-06, "loss": 6.361866760253906, "step": 23185 }, { "epoch": 0.2319, "grad_norm": 2.933483600616455, "learning_rate": 3.879343434343435e-06, "loss": 6.301678085327149, "step": 23190 }, { "epoch": 0.23195, "grad_norm": 6.579181671142578, "learning_rate": 3.879090909090909e-06, "loss": 6.3379253387451175, "step": 23195 }, { "epoch": 0.232, "grad_norm": 4.142708778381348, "learning_rate": 3.878838383838384e-06, "loss": 6.317596435546875, "step": 23200 }, { "epoch": 0.23205, "grad_norm": 4.419910430908203, "learning_rate": 3.8785858585858586e-06, "loss": 6.341427612304687, "step": 23205 }, { "epoch": 0.2321, "grad_norm": 6.135593414306641, "learning_rate": 3.878333333333333e-06, "loss": 6.306870651245117, "step": 23210 }, { "epoch": 0.23215, "grad_norm": 4.757776737213135, "learning_rate": 3.878080808080809e-06, "loss": 6.358721160888672, "step": 23215 }, { "epoch": 0.2322, "grad_norm": 4.6640753746032715, "learning_rate": 3.877828282828283e-06, "loss": 6.567625427246094, "step": 23220 }, { "epoch": 0.23225, "grad_norm": 5.31903076171875, "learning_rate": 3.877575757575758e-06, "loss": 6.34802474975586, "step": 23225 }, { "epoch": 0.2323, "grad_norm": 5.003335475921631, "learning_rate": 3.8773232323232326e-06, "loss": 6.338460159301758, "step": 23230 }, { "epoch": 0.23235, "grad_norm": 4.627562046051025, "learning_rate": 3.877070707070707e-06, "loss": 6.336625671386718, "step": 23235 }, { "epoch": 0.2324, "grad_norm": 4.523017883300781, "learning_rate": 3.876818181818182e-06, "loss": 6.333913040161133, "step": 23240 }, { "epoch": 0.23245, "grad_norm": 5.125813961029053, "learning_rate": 3.8765656565656564e-06, "loss": 6.317988586425781, "step": 23245 }, { "epoch": 0.2325, "grad_norm": 9.030656814575195, "learning_rate": 3.876313131313131e-06, "loss": 6.356466674804688, "step": 23250 }, { "epoch": 0.23255, "grad_norm": 3.9603564739227295, "learning_rate": 3.8760606060606066e-06, "loss": 6.357289886474609, "step": 23255 }, { "epoch": 0.2326, "grad_norm": 5.487985610961914, "learning_rate": 3.875808080808081e-06, "loss": 6.313074111938477, "step": 23260 }, { "epoch": 0.23265, "grad_norm": 5.957836151123047, "learning_rate": 3.875555555555556e-06, "loss": 6.356830596923828, "step": 23265 }, { "epoch": 0.2327, "grad_norm": 7.0877203941345215, "learning_rate": 3.8753030303030304e-06, "loss": 6.335504150390625, "step": 23270 }, { "epoch": 0.23275, "grad_norm": 4.857766628265381, "learning_rate": 3.875050505050506e-06, "loss": 6.3571117401123045, "step": 23275 }, { "epoch": 0.2328, "grad_norm": 2.8736531734466553, "learning_rate": 3.87479797979798e-06, "loss": 6.2714378356933596, "step": 23280 }, { "epoch": 0.23285, "grad_norm": 3.912163496017456, "learning_rate": 3.874545454545454e-06, "loss": 6.376251220703125, "step": 23285 }, { "epoch": 0.2329, "grad_norm": 5.335253715515137, "learning_rate": 3.87429292929293e-06, "loss": 6.339522171020508, "step": 23290 }, { "epoch": 0.23295, "grad_norm": 5.40915584564209, "learning_rate": 3.8740404040404044e-06, "loss": 6.346769332885742, "step": 23295 }, { "epoch": 0.233, "grad_norm": 7.5331549644470215, "learning_rate": 3.873787878787879e-06, "loss": 6.348665237426758, "step": 23300 }, { "epoch": 0.23305, "grad_norm": 3.9876327514648438, "learning_rate": 3.873535353535354e-06, "loss": 6.33411979675293, "step": 23305 }, { "epoch": 0.2331, "grad_norm": 4.99548864364624, "learning_rate": 3.873282828282829e-06, "loss": 6.402000427246094, "step": 23310 }, { "epoch": 0.23315, "grad_norm": 4.863030433654785, "learning_rate": 3.873030303030304e-06, "loss": 6.395990371704102, "step": 23315 }, { "epoch": 0.2332, "grad_norm": 4.847181797027588, "learning_rate": 3.8727777777777784e-06, "loss": 6.305920791625977, "step": 23320 }, { "epoch": 0.23325, "grad_norm": 4.464929103851318, "learning_rate": 3.872525252525253e-06, "loss": 6.294976043701172, "step": 23325 }, { "epoch": 0.2333, "grad_norm": 7.697823524475098, "learning_rate": 3.872272727272728e-06, "loss": 6.5640708923339846, "step": 23330 }, { "epoch": 0.23335, "grad_norm": 3.7077395915985107, "learning_rate": 3.872020202020202e-06, "loss": 6.344643783569336, "step": 23335 }, { "epoch": 0.2334, "grad_norm": 12.582032203674316, "learning_rate": 3.871767676767677e-06, "loss": 6.408039855957031, "step": 23340 }, { "epoch": 0.23345, "grad_norm": 7.015203952789307, "learning_rate": 3.871515151515152e-06, "loss": 6.32598876953125, "step": 23345 }, { "epoch": 0.2335, "grad_norm": 4.288808822631836, "learning_rate": 3.871262626262627e-06, "loss": 6.313179016113281, "step": 23350 }, { "epoch": 0.23355, "grad_norm": 6.3981032371521, "learning_rate": 3.871010101010102e-06, "loss": 6.35522689819336, "step": 23355 }, { "epoch": 0.2336, "grad_norm": 7.464481353759766, "learning_rate": 3.870757575757576e-06, "loss": 6.341023254394531, "step": 23360 }, { "epoch": 0.23365, "grad_norm": 5.585562705993652, "learning_rate": 3.870505050505051e-06, "loss": 6.302985382080078, "step": 23365 }, { "epoch": 0.2337, "grad_norm": 5.149304389953613, "learning_rate": 3.870252525252526e-06, "loss": 6.363208770751953, "step": 23370 }, { "epoch": 0.23375, "grad_norm": 6.442792892456055, "learning_rate": 3.87e-06, "loss": 6.287469100952149, "step": 23375 }, { "epoch": 0.2338, "grad_norm": 3.9642062187194824, "learning_rate": 3.869747474747475e-06, "loss": 6.346845626831055, "step": 23380 }, { "epoch": 0.23385, "grad_norm": 8.07283878326416, "learning_rate": 3.8694949494949495e-06, "loss": 6.33221321105957, "step": 23385 }, { "epoch": 0.2339, "grad_norm": 4.279328346252441, "learning_rate": 3.869242424242425e-06, "loss": 6.406983184814453, "step": 23390 }, { "epoch": 0.23395, "grad_norm": 6.113319396972656, "learning_rate": 3.8689898989899e-06, "loss": 6.3385578155517575, "step": 23395 }, { "epoch": 0.234, "grad_norm": 5.017642498016357, "learning_rate": 3.868737373737374e-06, "loss": 6.356827163696289, "step": 23400 }, { "epoch": 0.23405, "grad_norm": 5.603649139404297, "learning_rate": 3.868484848484849e-06, "loss": 6.350005722045898, "step": 23405 }, { "epoch": 0.2341, "grad_norm": 8.45039176940918, "learning_rate": 3.8682323232323235e-06, "loss": 6.345639038085937, "step": 23410 }, { "epoch": 0.23415, "grad_norm": 6.548638343811035, "learning_rate": 3.867979797979798e-06, "loss": 6.320265197753907, "step": 23415 }, { "epoch": 0.2342, "grad_norm": 4.55612850189209, "learning_rate": 3.867727272727273e-06, "loss": 6.278616333007813, "step": 23420 }, { "epoch": 0.23425, "grad_norm": 6.277486324310303, "learning_rate": 3.867474747474747e-06, "loss": 6.325331115722657, "step": 23425 }, { "epoch": 0.2343, "grad_norm": 4.796436786651611, "learning_rate": 3.867222222222223e-06, "loss": 6.313381958007812, "step": 23430 }, { "epoch": 0.23435, "grad_norm": 7.758279800415039, "learning_rate": 3.8669696969696975e-06, "loss": 6.38459358215332, "step": 23435 }, { "epoch": 0.2344, "grad_norm": 5.2945966720581055, "learning_rate": 3.866717171717172e-06, "loss": 6.287571716308594, "step": 23440 }, { "epoch": 0.23445, "grad_norm": 3.818080425262451, "learning_rate": 3.866464646464647e-06, "loss": 6.357430267333984, "step": 23445 }, { "epoch": 0.2345, "grad_norm": 10.90357780456543, "learning_rate": 3.866212121212121e-06, "loss": 6.456547546386719, "step": 23450 }, { "epoch": 0.23455, "grad_norm": 5.000766277313232, "learning_rate": 3.865959595959596e-06, "loss": 6.297657775878906, "step": 23455 }, { "epoch": 0.2346, "grad_norm": 5.889200687408447, "learning_rate": 3.865707070707071e-06, "loss": 6.320281219482422, "step": 23460 }, { "epoch": 0.23465, "grad_norm": 4.958840370178223, "learning_rate": 3.865454545454545e-06, "loss": 6.353712463378907, "step": 23465 }, { "epoch": 0.2347, "grad_norm": 5.088508605957031, "learning_rate": 3.865202020202021e-06, "loss": 6.320648956298828, "step": 23470 }, { "epoch": 0.23475, "grad_norm": 6.032515048980713, "learning_rate": 3.864949494949495e-06, "loss": 6.298333740234375, "step": 23475 }, { "epoch": 0.2348, "grad_norm": 5.224704265594482, "learning_rate": 3.86469696969697e-06, "loss": 6.3115684509277346, "step": 23480 }, { "epoch": 0.23485, "grad_norm": 3.3492045402526855, "learning_rate": 3.864444444444445e-06, "loss": 6.319695281982422, "step": 23485 }, { "epoch": 0.2349, "grad_norm": 3.982449531555176, "learning_rate": 3.86419191919192e-06, "loss": 6.312643432617188, "step": 23490 }, { "epoch": 0.23495, "grad_norm": 6.876667499542236, "learning_rate": 3.863939393939395e-06, "loss": 6.357552337646484, "step": 23495 }, { "epoch": 0.235, "grad_norm": 4.515787124633789, "learning_rate": 3.8636868686868685e-06, "loss": 6.401741027832031, "step": 23500 }, { "epoch": 0.23505, "grad_norm": 2.912814140319824, "learning_rate": 3.863434343434343e-06, "loss": 6.448995971679688, "step": 23505 }, { "epoch": 0.2351, "grad_norm": 13.348073959350586, "learning_rate": 3.863181818181819e-06, "loss": 6.461601257324219, "step": 23510 }, { "epoch": 0.23515, "grad_norm": 4.441615581512451, "learning_rate": 3.862929292929293e-06, "loss": 6.3068798065185545, "step": 23515 }, { "epoch": 0.2352, "grad_norm": 17.6944580078125, "learning_rate": 3.862676767676768e-06, "loss": 6.685466766357422, "step": 23520 }, { "epoch": 0.23525, "grad_norm": 5.63979434967041, "learning_rate": 3.8624242424242425e-06, "loss": 6.300366973876953, "step": 23525 }, { "epoch": 0.2353, "grad_norm": 4.300624370574951, "learning_rate": 3.862171717171718e-06, "loss": 6.374665451049805, "step": 23530 }, { "epoch": 0.23535, "grad_norm": 4.913166046142578, "learning_rate": 3.861919191919193e-06, "loss": 6.294214248657227, "step": 23535 }, { "epoch": 0.2354, "grad_norm": 4.407675743103027, "learning_rate": 3.861666666666667e-06, "loss": 6.447904205322265, "step": 23540 }, { "epoch": 0.23545, "grad_norm": 5.9636311531066895, "learning_rate": 3.861414141414142e-06, "loss": 6.302238464355469, "step": 23545 }, { "epoch": 0.2355, "grad_norm": 5.325197696685791, "learning_rate": 3.8611616161616165e-06, "loss": 6.308079528808594, "step": 23550 }, { "epoch": 0.23555, "grad_norm": 3.4554481506347656, "learning_rate": 3.860909090909091e-06, "loss": 6.3169292449951175, "step": 23555 }, { "epoch": 0.2356, "grad_norm": 5.059482097625732, "learning_rate": 3.860656565656566e-06, "loss": 6.346894073486328, "step": 23560 }, { "epoch": 0.23565, "grad_norm": 5.85630989074707, "learning_rate": 3.86040404040404e-06, "loss": 6.338227844238281, "step": 23565 }, { "epoch": 0.2357, "grad_norm": 4.866011142730713, "learning_rate": 3.860151515151516e-06, "loss": 6.348033905029297, "step": 23570 }, { "epoch": 0.23575, "grad_norm": 8.268282890319824, "learning_rate": 3.8598989898989905e-06, "loss": 6.342802429199219, "step": 23575 }, { "epoch": 0.2358, "grad_norm": 4.089817523956299, "learning_rate": 3.859646464646465e-06, "loss": 6.348376083374023, "step": 23580 }, { "epoch": 0.23585, "grad_norm": 4.6949543952941895, "learning_rate": 3.85939393939394e-06, "loss": 6.288275146484375, "step": 23585 }, { "epoch": 0.2359, "grad_norm": 3.2280616760253906, "learning_rate": 3.859141414141414e-06, "loss": 6.432965087890625, "step": 23590 }, { "epoch": 0.23595, "grad_norm": 5.22882604598999, "learning_rate": 3.858888888888889e-06, "loss": 6.529062652587891, "step": 23595 }, { "epoch": 0.236, "grad_norm": 5.003121376037598, "learning_rate": 3.858636363636364e-06, "loss": 6.350822067260742, "step": 23600 }, { "epoch": 0.23605, "grad_norm": 5.160042762756348, "learning_rate": 3.858383838383838e-06, "loss": 6.317710494995117, "step": 23605 }, { "epoch": 0.2361, "grad_norm": 7.498924255371094, "learning_rate": 3.858131313131314e-06, "loss": 6.369483184814453, "step": 23610 }, { "epoch": 0.23615, "grad_norm": 5.770627498626709, "learning_rate": 3.857878787878788e-06, "loss": 6.334783554077148, "step": 23615 }, { "epoch": 0.2362, "grad_norm": 15.272682189941406, "learning_rate": 3.857626262626263e-06, "loss": 6.349882507324219, "step": 23620 }, { "epoch": 0.23625, "grad_norm": 5.338118553161621, "learning_rate": 3.857373737373738e-06, "loss": 6.409487915039063, "step": 23625 }, { "epoch": 0.2363, "grad_norm": 3.1280364990234375, "learning_rate": 3.857121212121212e-06, "loss": 6.365061187744141, "step": 23630 }, { "epoch": 0.23635, "grad_norm": 9.67115592956543, "learning_rate": 3.856868686868687e-06, "loss": 6.278264999389648, "step": 23635 }, { "epoch": 0.2364, "grad_norm": 9.441956520080566, "learning_rate": 3.8566161616161615e-06, "loss": 6.305233383178711, "step": 23640 }, { "epoch": 0.23645, "grad_norm": 5.106546401977539, "learning_rate": 3.856363636363636e-06, "loss": 6.264565658569336, "step": 23645 }, { "epoch": 0.2365, "grad_norm": 3.7163166999816895, "learning_rate": 3.856111111111112e-06, "loss": 6.308106231689453, "step": 23650 }, { "epoch": 0.23655, "grad_norm": 3.001555919647217, "learning_rate": 3.855858585858586e-06, "loss": 6.322109985351562, "step": 23655 }, { "epoch": 0.2366, "grad_norm": 8.146796226501465, "learning_rate": 3.855606060606061e-06, "loss": 6.486976623535156, "step": 23660 }, { "epoch": 0.23665, "grad_norm": 10.82591724395752, "learning_rate": 3.8553535353535355e-06, "loss": 6.567188262939453, "step": 23665 }, { "epoch": 0.2367, "grad_norm": 4.7886738777160645, "learning_rate": 3.85510101010101e-06, "loss": 6.339975357055664, "step": 23670 }, { "epoch": 0.23675, "grad_norm": 4.170238018035889, "learning_rate": 3.854848484848485e-06, "loss": 6.411251831054687, "step": 23675 }, { "epoch": 0.2368, "grad_norm": 8.593775749206543, "learning_rate": 3.8545959595959594e-06, "loss": 6.424325561523437, "step": 23680 }, { "epoch": 0.23685, "grad_norm": 5.364352226257324, "learning_rate": 3.854343434343434e-06, "loss": 6.321148300170899, "step": 23685 }, { "epoch": 0.2369, "grad_norm": 8.323290824890137, "learning_rate": 3.8540909090909095e-06, "loss": 6.344879913330078, "step": 23690 }, { "epoch": 0.23695, "grad_norm": 5.71909236907959, "learning_rate": 3.853838383838384e-06, "loss": 6.382958984375, "step": 23695 }, { "epoch": 0.237, "grad_norm": 5.052379131317139, "learning_rate": 3.853585858585859e-06, "loss": 6.351225662231445, "step": 23700 }, { "epoch": 0.23705, "grad_norm": 15.467516899108887, "learning_rate": 3.853333333333334e-06, "loss": 6.410855102539062, "step": 23705 }, { "epoch": 0.2371, "grad_norm": 6.143123149871826, "learning_rate": 3.853080808080809e-06, "loss": 6.473812866210937, "step": 23710 }, { "epoch": 0.23715, "grad_norm": 5.948540210723877, "learning_rate": 3.8528282828282835e-06, "loss": 6.340862274169922, "step": 23715 }, { "epoch": 0.2372, "grad_norm": 7.276488780975342, "learning_rate": 3.852575757575758e-06, "loss": 6.320771408081055, "step": 23720 }, { "epoch": 0.23725, "grad_norm": 4.338954925537109, "learning_rate": 3.852323232323233e-06, "loss": 6.345733642578125, "step": 23725 }, { "epoch": 0.2373, "grad_norm": 5.703032493591309, "learning_rate": 3.8520707070707074e-06, "loss": 6.292609405517578, "step": 23730 }, { "epoch": 0.23735, "grad_norm": 10.466814994812012, "learning_rate": 3.851818181818182e-06, "loss": 6.340583801269531, "step": 23735 }, { "epoch": 0.2374, "grad_norm": 9.100690841674805, "learning_rate": 3.851565656565657e-06, "loss": 6.2599021911621096, "step": 23740 }, { "epoch": 0.23745, "grad_norm": 3.205427885055542, "learning_rate": 3.851313131313132e-06, "loss": 6.347158050537109, "step": 23745 }, { "epoch": 0.2375, "grad_norm": 4.370527744293213, "learning_rate": 3.851060606060607e-06, "loss": 6.591325378417968, "step": 23750 }, { "epoch": 0.23755, "grad_norm": 4.382680416107178, "learning_rate": 3.850808080808081e-06, "loss": 6.346498107910156, "step": 23755 }, { "epoch": 0.2376, "grad_norm": 5.607738018035889, "learning_rate": 3.850555555555556e-06, "loss": 6.332609558105469, "step": 23760 }, { "epoch": 0.23765, "grad_norm": 4.1176533699035645, "learning_rate": 3.850303030303031e-06, "loss": 6.301857376098633, "step": 23765 }, { "epoch": 0.2377, "grad_norm": 5.200326442718506, "learning_rate": 3.850050505050505e-06, "loss": 6.347055816650391, "step": 23770 }, { "epoch": 0.23775, "grad_norm": 5.249614715576172, "learning_rate": 3.84979797979798e-06, "loss": 6.332490539550781, "step": 23775 }, { "epoch": 0.2378, "grad_norm": 4.23456335067749, "learning_rate": 3.8495454545454546e-06, "loss": 6.3379875183105465, "step": 23780 }, { "epoch": 0.23785, "grad_norm": 5.759710788726807, "learning_rate": 3.84929292929293e-06, "loss": 6.336750793457031, "step": 23785 }, { "epoch": 0.2379, "grad_norm": 4.506970405578613, "learning_rate": 3.849040404040405e-06, "loss": 6.374192810058593, "step": 23790 }, { "epoch": 0.23795, "grad_norm": 4.625814914703369, "learning_rate": 3.848787878787879e-06, "loss": 6.296202850341797, "step": 23795 }, { "epoch": 0.238, "grad_norm": 5.241394996643066, "learning_rate": 3.848535353535354e-06, "loss": 6.331798553466797, "step": 23800 }, { "epoch": 0.23805, "grad_norm": 4.331488609313965, "learning_rate": 3.8482828282828286e-06, "loss": 6.354387664794922, "step": 23805 }, { "epoch": 0.2381, "grad_norm": 16.20773696899414, "learning_rate": 3.848030303030303e-06, "loss": 6.272127532958985, "step": 23810 }, { "epoch": 0.23815, "grad_norm": 9.23940372467041, "learning_rate": 3.847777777777778e-06, "loss": 6.372252655029297, "step": 23815 }, { "epoch": 0.2382, "grad_norm": 7.978832244873047, "learning_rate": 3.8475252525252525e-06, "loss": 6.389898681640625, "step": 23820 }, { "epoch": 0.23825, "grad_norm": 3.9656801223754883, "learning_rate": 3.847272727272728e-06, "loss": 6.3098796844482425, "step": 23825 }, { "epoch": 0.2383, "grad_norm": 4.411893844604492, "learning_rate": 3.8470202020202026e-06, "loss": 6.480058288574218, "step": 23830 }, { "epoch": 0.23835, "grad_norm": 7.543813705444336, "learning_rate": 3.846767676767677e-06, "loss": 6.344625091552734, "step": 23835 }, { "epoch": 0.2384, "grad_norm": 4.580704212188721, "learning_rate": 3.846515151515152e-06, "loss": 6.336263275146484, "step": 23840 }, { "epoch": 0.23845, "grad_norm": 6.742680549621582, "learning_rate": 3.8462626262626265e-06, "loss": 6.327861022949219, "step": 23845 }, { "epoch": 0.2385, "grad_norm": 4.4109296798706055, "learning_rate": 3.846010101010101e-06, "loss": 6.448567199707031, "step": 23850 }, { "epoch": 0.23855, "grad_norm": 2.650472402572632, "learning_rate": 3.845757575757576e-06, "loss": 6.233216094970703, "step": 23855 }, { "epoch": 0.2386, "grad_norm": 4.066323757171631, "learning_rate": 3.84550505050505e-06, "loss": 6.377291870117188, "step": 23860 }, { "epoch": 0.23865, "grad_norm": 6.54950475692749, "learning_rate": 3.845252525252526e-06, "loss": 6.2900642395019535, "step": 23865 }, { "epoch": 0.2387, "grad_norm": 11.044845581054688, "learning_rate": 3.8450000000000005e-06, "loss": 6.303443145751953, "step": 23870 }, { "epoch": 0.23875, "grad_norm": 5.392928123474121, "learning_rate": 3.844747474747475e-06, "loss": 6.310714340209961, "step": 23875 }, { "epoch": 0.2388, "grad_norm": 4.054230213165283, "learning_rate": 3.84449494949495e-06, "loss": 6.343774032592774, "step": 23880 }, { "epoch": 0.23885, "grad_norm": 4.995977878570557, "learning_rate": 3.844242424242425e-06, "loss": 6.348242568969726, "step": 23885 }, { "epoch": 0.2389, "grad_norm": 3.4550576210021973, "learning_rate": 3.843989898989899e-06, "loss": 6.4812156677246096, "step": 23890 }, { "epoch": 0.23895, "grad_norm": 14.268070220947266, "learning_rate": 3.843737373737374e-06, "loss": 6.333097839355469, "step": 23895 }, { "epoch": 0.239, "grad_norm": 5.137991428375244, "learning_rate": 3.843484848484848e-06, "loss": 6.318060302734375, "step": 23900 }, { "epoch": 0.23905, "grad_norm": 11.986200332641602, "learning_rate": 3.843232323232324e-06, "loss": 6.461307525634766, "step": 23905 }, { "epoch": 0.2391, "grad_norm": 6.322595596313477, "learning_rate": 3.842979797979798e-06, "loss": 6.290758514404297, "step": 23910 }, { "epoch": 0.23915, "grad_norm": 5.891141891479492, "learning_rate": 3.842727272727273e-06, "loss": 6.3174896240234375, "step": 23915 }, { "epoch": 0.2392, "grad_norm": 5.38576078414917, "learning_rate": 3.842474747474748e-06, "loss": 6.301665115356445, "step": 23920 }, { "epoch": 0.23925, "grad_norm": 4.610705375671387, "learning_rate": 3.842222222222223e-06, "loss": 6.342383956909179, "step": 23925 }, { "epoch": 0.2393, "grad_norm": 4.6446123123168945, "learning_rate": 3.841969696969698e-06, "loss": 6.310555267333984, "step": 23930 }, { "epoch": 0.23935, "grad_norm": 3.6305770874023438, "learning_rate": 3.841717171717172e-06, "loss": 6.317502975463867, "step": 23935 }, { "epoch": 0.2394, "grad_norm": 9.057592391967773, "learning_rate": 3.841464646464647e-06, "loss": 6.3187004089355465, "step": 23940 }, { "epoch": 0.23945, "grad_norm": 4.471657752990723, "learning_rate": 3.841212121212122e-06, "loss": 6.3662261962890625, "step": 23945 }, { "epoch": 0.2395, "grad_norm": 28.701416015625, "learning_rate": 3.840959595959596e-06, "loss": 6.303096008300781, "step": 23950 }, { "epoch": 0.23955, "grad_norm": 6.47994327545166, "learning_rate": 3.840707070707071e-06, "loss": 6.339190673828125, "step": 23955 }, { "epoch": 0.2396, "grad_norm": 5.364729404449463, "learning_rate": 3.8404545454545455e-06, "loss": 6.374001693725586, "step": 23960 }, { "epoch": 0.23965, "grad_norm": 6.019107818603516, "learning_rate": 3.840202020202021e-06, "loss": 6.328044891357422, "step": 23965 }, { "epoch": 0.2397, "grad_norm": 7.591935157775879, "learning_rate": 3.839949494949496e-06, "loss": 6.295163345336914, "step": 23970 }, { "epoch": 0.23975, "grad_norm": 3.144721746444702, "learning_rate": 3.83969696969697e-06, "loss": 6.335067367553711, "step": 23975 }, { "epoch": 0.2398, "grad_norm": 4.811420440673828, "learning_rate": 3.839444444444445e-06, "loss": 6.319854354858398, "step": 23980 }, { "epoch": 0.23985, "grad_norm": 4.0684332847595215, "learning_rate": 3.8391919191919195e-06, "loss": 6.344844818115234, "step": 23985 }, { "epoch": 0.2399, "grad_norm": 6.380399703979492, "learning_rate": 3.838939393939394e-06, "loss": 6.410658264160157, "step": 23990 }, { "epoch": 0.23995, "grad_norm": 5.0786824226379395, "learning_rate": 3.838686868686869e-06, "loss": 6.324929809570312, "step": 23995 }, { "epoch": 0.24, "grad_norm": 5.080565929412842, "learning_rate": 3.838434343434343e-06, "loss": 6.319526290893554, "step": 24000 }, { "epoch": 0.24005, "grad_norm": 11.166057586669922, "learning_rate": 3.838181818181819e-06, "loss": 6.277838897705078, "step": 24005 }, { "epoch": 0.2401, "grad_norm": 8.359878540039062, "learning_rate": 3.8379292929292935e-06, "loss": 6.356361389160156, "step": 24010 }, { "epoch": 0.24015, "grad_norm": 5.312755107879639, "learning_rate": 3.837676767676768e-06, "loss": 6.340828704833984, "step": 24015 }, { "epoch": 0.2402, "grad_norm": 10.990241050720215, "learning_rate": 3.837424242424243e-06, "loss": 6.48319091796875, "step": 24020 }, { "epoch": 0.24025, "grad_norm": 4.064845085144043, "learning_rate": 3.837171717171717e-06, "loss": 6.29597282409668, "step": 24025 }, { "epoch": 0.2403, "grad_norm": 5.763132572174072, "learning_rate": 3.836919191919192e-06, "loss": 6.3638763427734375, "step": 24030 }, { "epoch": 0.24035, "grad_norm": 3.2304301261901855, "learning_rate": 3.836666666666667e-06, "loss": 6.285865783691406, "step": 24035 }, { "epoch": 0.2404, "grad_norm": 5.555386066436768, "learning_rate": 3.836414141414141e-06, "loss": 6.350043106079101, "step": 24040 }, { "epoch": 0.24045, "grad_norm": 4.308685779571533, "learning_rate": 3.836161616161617e-06, "loss": 6.298805999755859, "step": 24045 }, { "epoch": 0.2405, "grad_norm": 8.277054786682129, "learning_rate": 3.835909090909091e-06, "loss": 6.353446578979492, "step": 24050 }, { "epoch": 0.24055, "grad_norm": 7.427578449249268, "learning_rate": 3.835656565656566e-06, "loss": 6.335530853271484, "step": 24055 }, { "epoch": 0.2406, "grad_norm": 6.2172393798828125, "learning_rate": 3.835404040404041e-06, "loss": 6.349313735961914, "step": 24060 }, { "epoch": 0.24065, "grad_norm": 42.39357376098633, "learning_rate": 3.835151515151515e-06, "loss": 6.43951416015625, "step": 24065 }, { "epoch": 0.2407, "grad_norm": 3.775812864303589, "learning_rate": 3.83489898989899e-06, "loss": 6.494371032714843, "step": 24070 }, { "epoch": 0.24075, "grad_norm": 5.839667320251465, "learning_rate": 3.8346464646464645e-06, "loss": 6.30771484375, "step": 24075 }, { "epoch": 0.2408, "grad_norm": 5.576752185821533, "learning_rate": 3.834393939393939e-06, "loss": 6.33317985534668, "step": 24080 }, { "epoch": 0.24085, "grad_norm": 6.3316192626953125, "learning_rate": 3.834141414141415e-06, "loss": 6.327111434936524, "step": 24085 }, { "epoch": 0.2409, "grad_norm": 5.125498294830322, "learning_rate": 3.833888888888889e-06, "loss": 6.284212493896485, "step": 24090 }, { "epoch": 0.24095, "grad_norm": 4.017132759094238, "learning_rate": 3.833636363636364e-06, "loss": 6.340859985351562, "step": 24095 }, { "epoch": 0.241, "grad_norm": 6.5962815284729, "learning_rate": 3.8333838383838385e-06, "loss": 6.326301574707031, "step": 24100 }, { "epoch": 0.24105, "grad_norm": 4.3865580558776855, "learning_rate": 3.833131313131314e-06, "loss": 6.375648117065429, "step": 24105 }, { "epoch": 0.2411, "grad_norm": 3.6525449752807617, "learning_rate": 3.832878787878788e-06, "loss": 6.277046585083008, "step": 24110 }, { "epoch": 0.24115, "grad_norm": 5.683064937591553, "learning_rate": 3.832626262626262e-06, "loss": 6.30534553527832, "step": 24115 }, { "epoch": 0.2412, "grad_norm": 8.254146575927734, "learning_rate": 3.832373737373737e-06, "loss": 6.3214977264404295, "step": 24120 }, { "epoch": 0.24125, "grad_norm": 4.52504825592041, "learning_rate": 3.8321212121212125e-06, "loss": 6.322126770019532, "step": 24125 }, { "epoch": 0.2413, "grad_norm": 6.124266147613525, "learning_rate": 3.831868686868687e-06, "loss": 6.289709854125976, "step": 24130 }, { "epoch": 0.24135, "grad_norm": 12.03856086730957, "learning_rate": 3.831616161616162e-06, "loss": 6.575050354003906, "step": 24135 }, { "epoch": 0.2414, "grad_norm": 5.803675174713135, "learning_rate": 3.831363636363637e-06, "loss": 6.361572265625, "step": 24140 }, { "epoch": 0.24145, "grad_norm": 15.412129402160645, "learning_rate": 3.831111111111112e-06, "loss": 6.488151550292969, "step": 24145 }, { "epoch": 0.2415, "grad_norm": 9.824732780456543, "learning_rate": 3.8308585858585865e-06, "loss": 6.322882080078125, "step": 24150 }, { "epoch": 0.24155, "grad_norm": 4.750017166137695, "learning_rate": 3.830606060606061e-06, "loss": 6.3200843811035154, "step": 24155 }, { "epoch": 0.2416, "grad_norm": 3.0917975902557373, "learning_rate": 3.830353535353536e-06, "loss": 6.3036956787109375, "step": 24160 }, { "epoch": 0.24165, "grad_norm": 3.859509229660034, "learning_rate": 3.83010101010101e-06, "loss": 6.38201904296875, "step": 24165 }, { "epoch": 0.2417, "grad_norm": 7.741752624511719, "learning_rate": 3.829848484848485e-06, "loss": 6.360684204101562, "step": 24170 }, { "epoch": 0.24175, "grad_norm": 6.099020957946777, "learning_rate": 3.82959595959596e-06, "loss": 6.316205596923828, "step": 24175 }, { "epoch": 0.2418, "grad_norm": 8.330970764160156, "learning_rate": 3.829343434343435e-06, "loss": 6.346596145629883, "step": 24180 }, { "epoch": 0.24185, "grad_norm": 7.450042724609375, "learning_rate": 3.82909090909091e-06, "loss": 6.345100021362304, "step": 24185 }, { "epoch": 0.2419, "grad_norm": 98.45242309570312, "learning_rate": 3.828838383838384e-06, "loss": 7.838534545898438, "step": 24190 }, { "epoch": 0.24195, "grad_norm": 4.102593421936035, "learning_rate": 3.828585858585859e-06, "loss": 9.138383483886718, "step": 24195 }, { "epoch": 0.242, "grad_norm": 4.679060459136963, "learning_rate": 3.828333333333334e-06, "loss": 6.317528533935547, "step": 24200 }, { "epoch": 0.24205, "grad_norm": 5.479498863220215, "learning_rate": 3.828080808080808e-06, "loss": 6.343070983886719, "step": 24205 }, { "epoch": 0.2421, "grad_norm": 7.108460426330566, "learning_rate": 3.827828282828283e-06, "loss": 6.388593292236328, "step": 24210 }, { "epoch": 0.24215, "grad_norm": 4.889233112335205, "learning_rate": 3.8275757575757576e-06, "loss": 6.337596130371094, "step": 24215 }, { "epoch": 0.2422, "grad_norm": 4.090147495269775, "learning_rate": 3.827323232323233e-06, "loss": 6.326486968994141, "step": 24220 }, { "epoch": 0.24225, "grad_norm": 4.227202892303467, "learning_rate": 3.827070707070708e-06, "loss": 6.329409790039063, "step": 24225 }, { "epoch": 0.2423, "grad_norm": 4.322852611541748, "learning_rate": 3.826818181818182e-06, "loss": 6.35192756652832, "step": 24230 }, { "epoch": 0.24235, "grad_norm": 5.811985015869141, "learning_rate": 3.826565656565657e-06, "loss": 6.289396667480469, "step": 24235 }, { "epoch": 0.2424, "grad_norm": 5.137140274047852, "learning_rate": 3.8263131313131316e-06, "loss": 6.322133636474609, "step": 24240 }, { "epoch": 0.24245, "grad_norm": 3.537370443344116, "learning_rate": 3.826060606060606e-06, "loss": 6.374931335449219, "step": 24245 }, { "epoch": 0.2425, "grad_norm": 5.18840217590332, "learning_rate": 3.825808080808081e-06, "loss": 6.331224060058593, "step": 24250 }, { "epoch": 0.24255, "grad_norm": 8.186988830566406, "learning_rate": 3.8255555555555554e-06, "loss": 6.331034088134766, "step": 24255 }, { "epoch": 0.2426, "grad_norm": 5.282400608062744, "learning_rate": 3.825303030303031e-06, "loss": 6.32293930053711, "step": 24260 }, { "epoch": 0.24265, "grad_norm": 11.877413749694824, "learning_rate": 3.8250505050505056e-06, "loss": 6.375288009643555, "step": 24265 }, { "epoch": 0.2427, "grad_norm": 4.934964179992676, "learning_rate": 3.82479797979798e-06, "loss": 6.358880615234375, "step": 24270 }, { "epoch": 0.24275, "grad_norm": 5.96776819229126, "learning_rate": 3.824545454545455e-06, "loss": 6.358721923828125, "step": 24275 }, { "epoch": 0.2428, "grad_norm": 5.294593334197998, "learning_rate": 3.8242929292929294e-06, "loss": 6.290181732177734, "step": 24280 }, { "epoch": 0.24285, "grad_norm": 5.190549373626709, "learning_rate": 3.824040404040404e-06, "loss": 6.301936340332031, "step": 24285 }, { "epoch": 0.2429, "grad_norm": 5.636746883392334, "learning_rate": 3.823787878787879e-06, "loss": 6.355446624755859, "step": 24290 }, { "epoch": 0.24295, "grad_norm": 3.8872592449188232, "learning_rate": 3.823535353535353e-06, "loss": 6.262110900878906, "step": 24295 }, { "epoch": 0.243, "grad_norm": 5.9948344230651855, "learning_rate": 3.823282828282829e-06, "loss": 6.352171325683594, "step": 24300 }, { "epoch": 0.24305, "grad_norm": 4.132962703704834, "learning_rate": 3.8230303030303034e-06, "loss": 6.499596405029297, "step": 24305 }, { "epoch": 0.2431, "grad_norm": 2.929037094116211, "learning_rate": 3.822777777777778e-06, "loss": 6.310108184814453, "step": 24310 }, { "epoch": 0.24315, "grad_norm": 7.119785308837891, "learning_rate": 3.822525252525253e-06, "loss": 6.381988525390625, "step": 24315 }, { "epoch": 0.2432, "grad_norm": 6.319742202758789, "learning_rate": 3.822272727272728e-06, "loss": 6.574148559570313, "step": 24320 }, { "epoch": 0.24325, "grad_norm": 3.726332664489746, "learning_rate": 3.822020202020203e-06, "loss": 6.291788101196289, "step": 24325 }, { "epoch": 0.2433, "grad_norm": 3.8167693614959717, "learning_rate": 3.8217676767676774e-06, "loss": 6.320569610595703, "step": 24330 }, { "epoch": 0.24335, "grad_norm": 6.320568561553955, "learning_rate": 3.821515151515151e-06, "loss": 6.325244903564453, "step": 24335 }, { "epoch": 0.2434, "grad_norm": 4.583096981048584, "learning_rate": 3.821262626262627e-06, "loss": 6.356656646728515, "step": 24340 }, { "epoch": 0.24345, "grad_norm": 5.153962135314941, "learning_rate": 3.821010101010101e-06, "loss": 6.304667663574219, "step": 24345 }, { "epoch": 0.2435, "grad_norm": 6.890057563781738, "learning_rate": 3.820757575757576e-06, "loss": 6.289279174804688, "step": 24350 }, { "epoch": 0.24355, "grad_norm": 5.230597496032715, "learning_rate": 3.820505050505051e-06, "loss": 6.312651443481445, "step": 24355 }, { "epoch": 0.2436, "grad_norm": 5.591320037841797, "learning_rate": 3.820252525252526e-06, "loss": 6.3118846893310545, "step": 24360 }, { "epoch": 0.24365, "grad_norm": 5.494738578796387, "learning_rate": 3.820000000000001e-06, "loss": 6.321754455566406, "step": 24365 }, { "epoch": 0.2437, "grad_norm": 4.298521995544434, "learning_rate": 3.819747474747475e-06, "loss": 6.3334907531738285, "step": 24370 }, { "epoch": 0.24375, "grad_norm": 4.728217124938965, "learning_rate": 3.81949494949495e-06, "loss": 6.311742782592773, "step": 24375 }, { "epoch": 0.2438, "grad_norm": 18.264829635620117, "learning_rate": 3.819242424242425e-06, "loss": 6.358943939208984, "step": 24380 }, { "epoch": 0.24385, "grad_norm": 5.511072158813477, "learning_rate": 3.818989898989899e-06, "loss": 6.277855682373047, "step": 24385 }, { "epoch": 0.2439, "grad_norm": 5.5497941970825195, "learning_rate": 3.818737373737374e-06, "loss": 6.457023620605469, "step": 24390 }, { "epoch": 0.24395, "grad_norm": 4.7944016456604, "learning_rate": 3.8184848484848485e-06, "loss": 6.304094696044922, "step": 24395 }, { "epoch": 0.244, "grad_norm": 5.471667766571045, "learning_rate": 3.818232323232324e-06, "loss": 6.315402221679688, "step": 24400 }, { "epoch": 0.24405, "grad_norm": 5.938952922821045, "learning_rate": 3.817979797979799e-06, "loss": 6.350075149536133, "step": 24405 }, { "epoch": 0.2441, "grad_norm": 5.84389066696167, "learning_rate": 3.817727272727273e-06, "loss": 6.351403045654297, "step": 24410 }, { "epoch": 0.24415, "grad_norm": 4.341282844543457, "learning_rate": 3.817474747474748e-06, "loss": 6.345235061645508, "step": 24415 }, { "epoch": 0.2442, "grad_norm": 4.11017370223999, "learning_rate": 3.8172222222222225e-06, "loss": 6.316091918945313, "step": 24420 }, { "epoch": 0.24425, "grad_norm": 4.32265567779541, "learning_rate": 3.816969696969697e-06, "loss": 6.353371047973633, "step": 24425 }, { "epoch": 0.2443, "grad_norm": 4.453935623168945, "learning_rate": 3.816717171717172e-06, "loss": 6.322188186645508, "step": 24430 }, { "epoch": 0.24435, "grad_norm": 5.044913291931152, "learning_rate": 3.816464646464646e-06, "loss": 6.4478271484375, "step": 24435 }, { "epoch": 0.2444, "grad_norm": 4.089410305023193, "learning_rate": 3.816212121212122e-06, "loss": 6.311819839477539, "step": 24440 }, { "epoch": 0.24445, "grad_norm": 4.8262038230896, "learning_rate": 3.8159595959595965e-06, "loss": 6.329756546020508, "step": 24445 }, { "epoch": 0.2445, "grad_norm": 4.36832332611084, "learning_rate": 3.815707070707071e-06, "loss": 6.3622184753417965, "step": 24450 }, { "epoch": 0.24455, "grad_norm": 5.734066486358643, "learning_rate": 3.815454545454546e-06, "loss": 6.289995574951172, "step": 24455 }, { "epoch": 0.2446, "grad_norm": 3.8248202800750732, "learning_rate": 3.81520202020202e-06, "loss": 6.317963027954102, "step": 24460 }, { "epoch": 0.24465, "grad_norm": 8.372124671936035, "learning_rate": 3.814949494949495e-06, "loss": 6.352735900878907, "step": 24465 }, { "epoch": 0.2447, "grad_norm": 6.922900199890137, "learning_rate": 3.81469696969697e-06, "loss": 6.296628952026367, "step": 24470 }, { "epoch": 0.24475, "grad_norm": 6.852545738220215, "learning_rate": 3.8144444444444447e-06, "loss": 6.304690933227539, "step": 24475 }, { "epoch": 0.2448, "grad_norm": 6.217726230621338, "learning_rate": 3.8141919191919197e-06, "loss": 6.305267715454102, "step": 24480 }, { "epoch": 0.24485, "grad_norm": 5.941946506500244, "learning_rate": 3.8139393939393944e-06, "loss": 6.380317306518554, "step": 24485 }, { "epoch": 0.2449, "grad_norm": 4.736086845397949, "learning_rate": 3.813686868686869e-06, "loss": 6.295236968994141, "step": 24490 }, { "epoch": 0.24495, "grad_norm": 3.732618808746338, "learning_rate": 3.8134343434343436e-06, "loss": 6.323743438720703, "step": 24495 }, { "epoch": 0.245, "grad_norm": 23.454654693603516, "learning_rate": 3.8131818181818187e-06, "loss": 6.019623565673828, "step": 24500 }, { "epoch": 0.24505, "grad_norm": 3.7260236740112305, "learning_rate": 3.8129292929292933e-06, "loss": 6.226126480102539, "step": 24505 }, { "epoch": 0.2451, "grad_norm": 4.491339206695557, "learning_rate": 3.812676767676768e-06, "loss": 6.309399032592774, "step": 24510 }, { "epoch": 0.24515, "grad_norm": 8.292157173156738, "learning_rate": 3.8124242424242426e-06, "loss": 6.33044204711914, "step": 24515 }, { "epoch": 0.2452, "grad_norm": 4.637438774108887, "learning_rate": 3.8121717171717176e-06, "loss": 6.327280426025391, "step": 24520 }, { "epoch": 0.24525, "grad_norm": 3.9368162155151367, "learning_rate": 3.8119191919191922e-06, "loss": 6.332490539550781, "step": 24525 }, { "epoch": 0.2453, "grad_norm": 5.515077590942383, "learning_rate": 3.811666666666667e-06, "loss": 6.410867309570312, "step": 24530 }, { "epoch": 0.24535, "grad_norm": 5.964354991912842, "learning_rate": 3.8114141414141415e-06, "loss": 6.521192932128907, "step": 24535 }, { "epoch": 0.2454, "grad_norm": 16.648439407348633, "learning_rate": 3.8111616161616166e-06, "loss": 6.4942466735839846, "step": 24540 }, { "epoch": 0.24545, "grad_norm": 4.645191669464111, "learning_rate": 3.810909090909091e-06, "loss": 6.309139251708984, "step": 24545 }, { "epoch": 0.2455, "grad_norm": 7.315805912017822, "learning_rate": 3.810656565656566e-06, "loss": 6.38135986328125, "step": 24550 }, { "epoch": 0.24555, "grad_norm": 7.0268425941467285, "learning_rate": 3.8104040404040405e-06, "loss": 6.359656524658203, "step": 24555 }, { "epoch": 0.2456, "grad_norm": 11.247036933898926, "learning_rate": 3.8101515151515155e-06, "loss": 6.4888862609863285, "step": 24560 }, { "epoch": 0.24565, "grad_norm": 9.910554885864258, "learning_rate": 3.80989898989899e-06, "loss": 6.319495391845703, "step": 24565 }, { "epoch": 0.2457, "grad_norm": 3.8603157997131348, "learning_rate": 3.8096464646464648e-06, "loss": 6.307781219482422, "step": 24570 }, { "epoch": 0.24575, "grad_norm": 3.8969900608062744, "learning_rate": 3.80939393939394e-06, "loss": 6.307999038696289, "step": 24575 }, { "epoch": 0.2458, "grad_norm": 5.8311767578125, "learning_rate": 3.8091414141414144e-06, "loss": 6.317541122436523, "step": 24580 }, { "epoch": 0.24585, "grad_norm": 5.299201965332031, "learning_rate": 3.808888888888889e-06, "loss": 6.357425308227539, "step": 24585 }, { "epoch": 0.2459, "grad_norm": 4.799232006072998, "learning_rate": 3.8086363636363637e-06, "loss": 6.3418926239013675, "step": 24590 }, { "epoch": 0.24595, "grad_norm": 8.093696594238281, "learning_rate": 3.8083838383838388e-06, "loss": 6.3264514923095705, "step": 24595 }, { "epoch": 0.246, "grad_norm": 5.64540958404541, "learning_rate": 3.8081313131313134e-06, "loss": 6.388252258300781, "step": 24600 }, { "epoch": 0.24605, "grad_norm": 11.959281921386719, "learning_rate": 3.807878787878788e-06, "loss": 6.3093719482421875, "step": 24605 }, { "epoch": 0.2461, "grad_norm": 4.623868465423584, "learning_rate": 3.8076262626262627e-06, "loss": 6.3174396514892575, "step": 24610 }, { "epoch": 0.24615, "grad_norm": 6.7488484382629395, "learning_rate": 3.807373737373738e-06, "loss": 6.361555099487305, "step": 24615 }, { "epoch": 0.2462, "grad_norm": 5.641331672668457, "learning_rate": 3.8071212121212128e-06, "loss": 6.320330047607422, "step": 24620 }, { "epoch": 0.24625, "grad_norm": 4.6171183586120605, "learning_rate": 3.806868686868687e-06, "loss": 6.3777214050292965, "step": 24625 }, { "epoch": 0.2463, "grad_norm": 6.4527387619018555, "learning_rate": 3.8066161616161616e-06, "loss": 6.344682312011718, "step": 24630 }, { "epoch": 0.24635, "grad_norm": 4.407225608825684, "learning_rate": 3.806363636363637e-06, "loss": 6.3406013488769535, "step": 24635 }, { "epoch": 0.2464, "grad_norm": 3.734208583831787, "learning_rate": 3.8061111111111117e-06, "loss": 6.254399108886719, "step": 24640 }, { "epoch": 0.24645, "grad_norm": 3.616285800933838, "learning_rate": 3.8058585858585863e-06, "loss": 6.328759384155274, "step": 24645 }, { "epoch": 0.2465, "grad_norm": 4.61699914932251, "learning_rate": 3.8056060606060605e-06, "loss": 6.300517272949219, "step": 24650 }, { "epoch": 0.24655, "grad_norm": 6.491902828216553, "learning_rate": 3.805353535353536e-06, "loss": 6.269119644165039, "step": 24655 }, { "epoch": 0.2466, "grad_norm": 6.549318790435791, "learning_rate": 3.8051010101010106e-06, "loss": 6.338206481933594, "step": 24660 }, { "epoch": 0.24665, "grad_norm": 5.878042697906494, "learning_rate": 3.8048484848484853e-06, "loss": 6.31360969543457, "step": 24665 }, { "epoch": 0.2467, "grad_norm": 5.124664306640625, "learning_rate": 3.80459595959596e-06, "loss": 6.307469177246094, "step": 24670 }, { "epoch": 0.24675, "grad_norm": 8.913350105285645, "learning_rate": 3.804343434343435e-06, "loss": 6.323848724365234, "step": 24675 }, { "epoch": 0.2468, "grad_norm": 11.819416999816895, "learning_rate": 3.8040909090909096e-06, "loss": 6.324026870727539, "step": 24680 }, { "epoch": 0.24685, "grad_norm": 7.507349014282227, "learning_rate": 3.8038383838383842e-06, "loss": 6.365409851074219, "step": 24685 }, { "epoch": 0.2469, "grad_norm": 2.5268735885620117, "learning_rate": 3.803585858585859e-06, "loss": 6.308486557006836, "step": 24690 }, { "epoch": 0.24695, "grad_norm": 3.9347801208496094, "learning_rate": 3.803333333333334e-06, "loss": 6.283709335327148, "step": 24695 }, { "epoch": 0.247, "grad_norm": 5.1139750480651855, "learning_rate": 3.8030808080808085e-06, "loss": 6.273369598388672, "step": 24700 }, { "epoch": 0.24705, "grad_norm": 4.346036911010742, "learning_rate": 3.802828282828283e-06, "loss": 6.352246856689453, "step": 24705 }, { "epoch": 0.2471, "grad_norm": 5.568414211273193, "learning_rate": 3.802575757575758e-06, "loss": 6.303802490234375, "step": 24710 }, { "epoch": 0.24715, "grad_norm": 6.704183578491211, "learning_rate": 3.802323232323233e-06, "loss": 6.371820068359375, "step": 24715 }, { "epoch": 0.2472, "grad_norm": 6.049572944641113, "learning_rate": 3.8020707070707075e-06, "loss": 6.295503616333008, "step": 24720 }, { "epoch": 0.24725, "grad_norm": 5.961676597595215, "learning_rate": 3.801818181818182e-06, "loss": 6.371158599853516, "step": 24725 }, { "epoch": 0.2473, "grad_norm": 6.4478864669799805, "learning_rate": 3.8015656565656567e-06, "loss": 6.306602859497071, "step": 24730 }, { "epoch": 0.24735, "grad_norm": 5.454312801361084, "learning_rate": 3.801313131313132e-06, "loss": 6.321035385131836, "step": 24735 }, { "epoch": 0.2474, "grad_norm": 4.791575908660889, "learning_rate": 3.8010606060606064e-06, "loss": 6.370033264160156, "step": 24740 }, { "epoch": 0.24745, "grad_norm": 5.4491190910339355, "learning_rate": 3.800808080808081e-06, "loss": 6.299378967285156, "step": 24745 }, { "epoch": 0.2475, "grad_norm": 7.616333961486816, "learning_rate": 3.8005555555555557e-06, "loss": 6.336561584472657, "step": 24750 }, { "epoch": 0.24755, "grad_norm": 4.816043376922607, "learning_rate": 3.8003030303030307e-06, "loss": 6.451275634765625, "step": 24755 }, { "epoch": 0.2476, "grad_norm": 4.450271129608154, "learning_rate": 3.8000505050505054e-06, "loss": 6.506224822998047, "step": 24760 }, { "epoch": 0.24765, "grad_norm": 5.473537921905518, "learning_rate": 3.79979797979798e-06, "loss": 6.321356582641601, "step": 24765 }, { "epoch": 0.2477, "grad_norm": 6.245075702667236, "learning_rate": 3.7995454545454546e-06, "loss": 6.346730804443359, "step": 24770 }, { "epoch": 0.24775, "grad_norm": 5.433206558227539, "learning_rate": 3.7992929292929297e-06, "loss": 6.325083923339844, "step": 24775 }, { "epoch": 0.2478, "grad_norm": 5.377098083496094, "learning_rate": 3.7990404040404043e-06, "loss": 6.320269775390625, "step": 24780 }, { "epoch": 0.24785, "grad_norm": 5.338435649871826, "learning_rate": 3.798787878787879e-06, "loss": 6.371579360961914, "step": 24785 }, { "epoch": 0.2479, "grad_norm": 6.252237319946289, "learning_rate": 3.7985353535353536e-06, "loss": 6.3367469787597654, "step": 24790 }, { "epoch": 0.24795, "grad_norm": 10.832158088684082, "learning_rate": 3.7982828282828286e-06, "loss": 6.4828544616699215, "step": 24795 }, { "epoch": 0.248, "grad_norm": 3.6074142456054688, "learning_rate": 3.7980303030303033e-06, "loss": 6.317464447021484, "step": 24800 }, { "epoch": 0.24805, "grad_norm": 2.7859723567962646, "learning_rate": 3.797777777777778e-06, "loss": 6.332563018798828, "step": 24805 }, { "epoch": 0.2481, "grad_norm": 5.28027868270874, "learning_rate": 3.7975252525252525e-06, "loss": 6.321557998657227, "step": 24810 }, { "epoch": 0.24815, "grad_norm": 4.698992729187012, "learning_rate": 3.7972727272727276e-06, "loss": 6.321604537963867, "step": 24815 }, { "epoch": 0.2482, "grad_norm": 3.5160722732543945, "learning_rate": 3.797020202020202e-06, "loss": 6.331064605712891, "step": 24820 }, { "epoch": 0.24825, "grad_norm": 3.0970358848571777, "learning_rate": 3.796767676767677e-06, "loss": 6.280361938476562, "step": 24825 }, { "epoch": 0.2483, "grad_norm": 6.861550807952881, "learning_rate": 3.7965151515151515e-06, "loss": 6.316182708740234, "step": 24830 }, { "epoch": 0.24835, "grad_norm": 5.847132205963135, "learning_rate": 3.796262626262627e-06, "loss": 6.303302764892578, "step": 24835 }, { "epoch": 0.2484, "grad_norm": 6.176864147186279, "learning_rate": 3.7960101010101016e-06, "loss": 6.323783111572266, "step": 24840 }, { "epoch": 0.24845, "grad_norm": 7.012228488922119, "learning_rate": 3.7957575757575758e-06, "loss": 6.321030807495117, "step": 24845 }, { "epoch": 0.2485, "grad_norm": 4.1989827156066895, "learning_rate": 3.7955050505050504e-06, "loss": 6.320504760742187, "step": 24850 }, { "epoch": 0.24855, "grad_norm": 4.0412821769714355, "learning_rate": 3.795252525252526e-06, "loss": 6.298176574707031, "step": 24855 }, { "epoch": 0.2486, "grad_norm": 5.565872669219971, "learning_rate": 3.7950000000000005e-06, "loss": 6.349287414550782, "step": 24860 }, { "epoch": 0.24865, "grad_norm": 4.126809597015381, "learning_rate": 3.794747474747475e-06, "loss": 6.341047286987305, "step": 24865 }, { "epoch": 0.2487, "grad_norm": 8.392001152038574, "learning_rate": 3.7944949494949498e-06, "loss": 6.326367568969727, "step": 24870 }, { "epoch": 0.24875, "grad_norm": 4.290130615234375, "learning_rate": 3.794242424242425e-06, "loss": 6.336184692382813, "step": 24875 }, { "epoch": 0.2488, "grad_norm": 6.508862018585205, "learning_rate": 3.7939898989898995e-06, "loss": 6.29949951171875, "step": 24880 }, { "epoch": 0.24885, "grad_norm": 5.598213195800781, "learning_rate": 3.793737373737374e-06, "loss": 6.316053771972657, "step": 24885 }, { "epoch": 0.2489, "grad_norm": 4.809788227081299, "learning_rate": 3.7934848484848487e-06, "loss": 6.316693496704102, "step": 24890 }, { "epoch": 0.24895, "grad_norm": 7.314576625823975, "learning_rate": 3.7932323232323238e-06, "loss": 6.31152458190918, "step": 24895 }, { "epoch": 0.249, "grad_norm": 5.41033935546875, "learning_rate": 3.7929797979797984e-06, "loss": 6.347332000732422, "step": 24900 }, { "epoch": 0.24905, "grad_norm": 6.0101494789123535, "learning_rate": 3.792727272727273e-06, "loss": 6.3308765411376955, "step": 24905 }, { "epoch": 0.2491, "grad_norm": 3.3470981121063232, "learning_rate": 3.7924747474747477e-06, "loss": 6.531220245361328, "step": 24910 }, { "epoch": 0.24915, "grad_norm": 8.132181167602539, "learning_rate": 3.7922222222222227e-06, "loss": 6.342335510253906, "step": 24915 }, { "epoch": 0.2492, "grad_norm": 6.243249416351318, "learning_rate": 3.7919696969696973e-06, "loss": 6.3342033386230465, "step": 24920 }, { "epoch": 0.24925, "grad_norm": 23.18819808959961, "learning_rate": 3.791717171717172e-06, "loss": 6.534953308105469, "step": 24925 }, { "epoch": 0.2493, "grad_norm": 23.736787796020508, "learning_rate": 3.7914646464646466e-06, "loss": 6.464931488037109, "step": 24930 }, { "epoch": 0.24935, "grad_norm": 9.789299964904785, "learning_rate": 3.7912121212121217e-06, "loss": 6.3269187927246096, "step": 24935 }, { "epoch": 0.2494, "grad_norm": 7.245536804199219, "learning_rate": 3.7909595959595963e-06, "loss": 6.412259674072265, "step": 24940 }, { "epoch": 0.24945, "grad_norm": 7.821969985961914, "learning_rate": 3.790707070707071e-06, "loss": 6.500132751464844, "step": 24945 }, { "epoch": 0.2495, "grad_norm": 3.7165093421936035, "learning_rate": 3.7904545454545455e-06, "loss": 6.331393814086914, "step": 24950 }, { "epoch": 0.24955, "grad_norm": 6.083778381347656, "learning_rate": 3.7902020202020206e-06, "loss": 6.310817718505859, "step": 24955 }, { "epoch": 0.2496, "grad_norm": 6.308335781097412, "learning_rate": 3.7899494949494952e-06, "loss": 6.291022491455078, "step": 24960 }, { "epoch": 0.24965, "grad_norm": 6.097376823425293, "learning_rate": 3.78969696969697e-06, "loss": 6.329010391235352, "step": 24965 }, { "epoch": 0.2497, "grad_norm": 15.87796688079834, "learning_rate": 3.7894444444444445e-06, "loss": 6.280488586425781, "step": 24970 }, { "epoch": 0.24975, "grad_norm": 5.089207649230957, "learning_rate": 3.7891919191919195e-06, "loss": 6.3177040100097654, "step": 24975 }, { "epoch": 0.2498, "grad_norm": 5.913401126861572, "learning_rate": 3.788939393939394e-06, "loss": 6.32628173828125, "step": 24980 }, { "epoch": 0.24985, "grad_norm": 4.108267307281494, "learning_rate": 3.788686868686869e-06, "loss": 6.373434829711914, "step": 24985 }, { "epoch": 0.2499, "grad_norm": 7.040623188018799, "learning_rate": 3.788434343434344e-06, "loss": 6.323977279663086, "step": 24990 }, { "epoch": 0.24995, "grad_norm": 8.5296630859375, "learning_rate": 3.7881818181818185e-06, "loss": 6.384491348266602, "step": 24995 }, { "epoch": 0.25, "grad_norm": 4.425950527191162, "learning_rate": 3.787929292929293e-06, "loss": 6.352629089355469, "step": 25000 }, { "epoch": 0.25005, "grad_norm": 4.340997219085693, "learning_rate": 3.7876767676767677e-06, "loss": 6.319969940185547, "step": 25005 }, { "epoch": 0.2501, "grad_norm": 8.521161079406738, "learning_rate": 3.787424242424243e-06, "loss": 6.303459930419922, "step": 25010 }, { "epoch": 0.25015, "grad_norm": 3.6244568824768066, "learning_rate": 3.7871717171717174e-06, "loss": 6.320890426635742, "step": 25015 }, { "epoch": 0.2502, "grad_norm": 5.3409342765808105, "learning_rate": 3.786919191919192e-06, "loss": 6.3129119873046875, "step": 25020 }, { "epoch": 0.25025, "grad_norm": 4.810161590576172, "learning_rate": 3.7866666666666667e-06, "loss": 6.320738983154297, "step": 25025 }, { "epoch": 0.2503, "grad_norm": 5.8221821784973145, "learning_rate": 3.786414141414142e-06, "loss": 6.334032440185547, "step": 25030 }, { "epoch": 0.25035, "grad_norm": 4.197929859161377, "learning_rate": 3.786161616161617e-06, "loss": 6.32874755859375, "step": 25035 }, { "epoch": 0.2504, "grad_norm": 6.125472545623779, "learning_rate": 3.785909090909091e-06, "loss": 6.31010627746582, "step": 25040 }, { "epoch": 0.25045, "grad_norm": 5.974649429321289, "learning_rate": 3.7856565656565656e-06, "loss": 6.3127799987792965, "step": 25045 }, { "epoch": 0.2505, "grad_norm": 6.5002055168151855, "learning_rate": 3.785404040404041e-06, "loss": 6.405833435058594, "step": 25050 }, { "epoch": 0.25055, "grad_norm": 6.257747173309326, "learning_rate": 3.7851515151515157e-06, "loss": 6.3007560729980465, "step": 25055 }, { "epoch": 0.2506, "grad_norm": 6.085604190826416, "learning_rate": 3.7848989898989904e-06, "loss": 6.333103942871094, "step": 25060 }, { "epoch": 0.25065, "grad_norm": 7.817204475402832, "learning_rate": 3.7846464646464646e-06, "loss": 6.371909332275391, "step": 25065 }, { "epoch": 0.2507, "grad_norm": 5.689461708068848, "learning_rate": 3.78439393939394e-06, "loss": 6.257283782958984, "step": 25070 }, { "epoch": 0.25075, "grad_norm": 5.629419803619385, "learning_rate": 3.7841414141414147e-06, "loss": 6.292929077148438, "step": 25075 }, { "epoch": 0.2508, "grad_norm": 5.537148475646973, "learning_rate": 3.7838888888888893e-06, "loss": 6.282789611816407, "step": 25080 }, { "epoch": 0.25085, "grad_norm": 5.929000377655029, "learning_rate": 3.783636363636364e-06, "loss": 6.342111206054687, "step": 25085 }, { "epoch": 0.2509, "grad_norm": 4.9777374267578125, "learning_rate": 3.783383838383839e-06, "loss": 6.345852279663086, "step": 25090 }, { "epoch": 0.25095, "grad_norm": 3.2527365684509277, "learning_rate": 3.7831313131313136e-06, "loss": 6.3046119689941404, "step": 25095 }, { "epoch": 0.251, "grad_norm": 4.792423725128174, "learning_rate": 3.7828787878787883e-06, "loss": 6.334859466552734, "step": 25100 }, { "epoch": 0.25105, "grad_norm": 6.289525985717773, "learning_rate": 3.782626262626263e-06, "loss": 6.338176345825195, "step": 25105 }, { "epoch": 0.2511, "grad_norm": 6.7715067863464355, "learning_rate": 3.782373737373738e-06, "loss": 6.406371307373047, "step": 25110 }, { "epoch": 0.25115, "grad_norm": 5.310438632965088, "learning_rate": 3.7821212121212126e-06, "loss": 6.328800201416016, "step": 25115 }, { "epoch": 0.2512, "grad_norm": 4.307737827301025, "learning_rate": 3.781868686868687e-06, "loss": 6.136697769165039, "step": 25120 }, { "epoch": 0.25125, "grad_norm": 7.467793941497803, "learning_rate": 3.781616161616162e-06, "loss": 6.336032104492188, "step": 25125 }, { "epoch": 0.2513, "grad_norm": 5.349710464477539, "learning_rate": 3.781363636363637e-06, "loss": 6.339819717407226, "step": 25130 }, { "epoch": 0.25135, "grad_norm": 7.182404041290283, "learning_rate": 3.7811111111111115e-06, "loss": 6.28376579284668, "step": 25135 }, { "epoch": 0.2514, "grad_norm": 11.696770668029785, "learning_rate": 3.780858585858586e-06, "loss": 6.379244232177735, "step": 25140 }, { "epoch": 0.25145, "grad_norm": 6.058556079864502, "learning_rate": 3.7806060606060608e-06, "loss": 6.325894165039062, "step": 25145 }, { "epoch": 0.2515, "grad_norm": 6.843220233917236, "learning_rate": 3.780353535353536e-06, "loss": 6.3213146209716795, "step": 25150 }, { "epoch": 0.25155, "grad_norm": 4.323552131652832, "learning_rate": 3.7801010101010105e-06, "loss": 6.326206970214844, "step": 25155 }, { "epoch": 0.2516, "grad_norm": 15.439602851867676, "learning_rate": 3.779848484848485e-06, "loss": 6.329898452758789, "step": 25160 }, { "epoch": 0.25165, "grad_norm": 6.368456840515137, "learning_rate": 3.7795959595959597e-06, "loss": 6.306449127197266, "step": 25165 }, { "epoch": 0.2517, "grad_norm": 3.291074275970459, "learning_rate": 3.7793434343434348e-06, "loss": 6.309361267089844, "step": 25170 }, { "epoch": 0.25175, "grad_norm": 5.348289489746094, "learning_rate": 3.7790909090909094e-06, "loss": 6.40875015258789, "step": 25175 }, { "epoch": 0.2518, "grad_norm": 7.199095249176025, "learning_rate": 3.778838383838384e-06, "loss": 6.2788959503173825, "step": 25180 }, { "epoch": 0.25185, "grad_norm": 13.519686698913574, "learning_rate": 3.7785858585858587e-06, "loss": 6.536505889892578, "step": 25185 }, { "epoch": 0.2519, "grad_norm": 4.959192752838135, "learning_rate": 3.7783333333333337e-06, "loss": 6.341909408569336, "step": 25190 }, { "epoch": 0.25195, "grad_norm": 4.8973565101623535, "learning_rate": 3.7780808080808084e-06, "loss": 6.282737731933594, "step": 25195 }, { "epoch": 0.252, "grad_norm": 7.021212100982666, "learning_rate": 3.777828282828283e-06, "loss": 6.344532775878906, "step": 25200 }, { "epoch": 0.25205, "grad_norm": 4.479836940765381, "learning_rate": 3.7775757575757576e-06, "loss": 6.332622528076172, "step": 25205 }, { "epoch": 0.2521, "grad_norm": 7.643125534057617, "learning_rate": 3.7773232323232327e-06, "loss": 6.299216079711914, "step": 25210 }, { "epoch": 0.25215, "grad_norm": 5.385834217071533, "learning_rate": 3.7770707070707073e-06, "loss": 6.334075164794922, "step": 25215 }, { "epoch": 0.2522, "grad_norm": 8.056965827941895, "learning_rate": 3.776818181818182e-06, "loss": 6.287517547607422, "step": 25220 }, { "epoch": 0.25225, "grad_norm": 4.003643989562988, "learning_rate": 3.7765656565656566e-06, "loss": 6.327437973022461, "step": 25225 }, { "epoch": 0.2523, "grad_norm": 5.555039882659912, "learning_rate": 3.7763131313131316e-06, "loss": 6.338059997558593, "step": 25230 }, { "epoch": 0.25235, "grad_norm": 5.187587738037109, "learning_rate": 3.7760606060606062e-06, "loss": 6.348991394042969, "step": 25235 }, { "epoch": 0.2524, "grad_norm": 16.27007484436035, "learning_rate": 3.775808080808081e-06, "loss": 6.402467346191406, "step": 25240 }, { "epoch": 0.25245, "grad_norm": 5.928394317626953, "learning_rate": 3.7755555555555555e-06, "loss": 6.301950836181641, "step": 25245 }, { "epoch": 0.2525, "grad_norm": 3.9654927253723145, "learning_rate": 3.775303030303031e-06, "loss": 6.280168533325195, "step": 25250 }, { "epoch": 0.25255, "grad_norm": 3.6951045989990234, "learning_rate": 3.7750505050505056e-06, "loss": 6.327494049072266, "step": 25255 }, { "epoch": 0.2526, "grad_norm": 4.890178203582764, "learning_rate": 3.77479797979798e-06, "loss": 6.353133773803711, "step": 25260 }, { "epoch": 0.25265, "grad_norm": 3.536865711212158, "learning_rate": 3.7745454545454544e-06, "loss": 6.2545722961425785, "step": 25265 }, { "epoch": 0.2527, "grad_norm": 5.668813228607178, "learning_rate": 3.77429292929293e-06, "loss": 6.27594223022461, "step": 25270 }, { "epoch": 0.25275, "grad_norm": 6.222916603088379, "learning_rate": 3.7740404040404046e-06, "loss": 6.298922729492188, "step": 25275 }, { "epoch": 0.2528, "grad_norm": 5.081191539764404, "learning_rate": 3.773787878787879e-06, "loss": 6.341779708862305, "step": 25280 }, { "epoch": 0.25285, "grad_norm": 2.704108953475952, "learning_rate": 3.773535353535354e-06, "loss": 6.340196990966797, "step": 25285 }, { "epoch": 0.2529, "grad_norm": 3.871838331222534, "learning_rate": 3.773282828282829e-06, "loss": 6.254093170166016, "step": 25290 }, { "epoch": 0.25295, "grad_norm": 6.71392297744751, "learning_rate": 3.7730303030303035e-06, "loss": 6.355170059204101, "step": 25295 }, { "epoch": 0.253, "grad_norm": 5.878215312957764, "learning_rate": 3.772777777777778e-06, "loss": 6.314839553833008, "step": 25300 }, { "epoch": 0.25305, "grad_norm": 5.235101699829102, "learning_rate": 3.7725252525252528e-06, "loss": 6.335359954833985, "step": 25305 }, { "epoch": 0.2531, "grad_norm": 6.897562026977539, "learning_rate": 3.772272727272728e-06, "loss": 6.331043243408203, "step": 25310 }, { "epoch": 0.25315, "grad_norm": 4.549420356750488, "learning_rate": 3.7720202020202024e-06, "loss": 6.28438720703125, "step": 25315 }, { "epoch": 0.2532, "grad_norm": 4.89314603805542, "learning_rate": 3.771767676767677e-06, "loss": 6.274560546875, "step": 25320 }, { "epoch": 0.25325, "grad_norm": 6.686513423919678, "learning_rate": 3.7715151515151517e-06, "loss": 6.356611633300782, "step": 25325 }, { "epoch": 0.2533, "grad_norm": 4.0462775230407715, "learning_rate": 3.7712626262626268e-06, "loss": 6.310383605957031, "step": 25330 }, { "epoch": 0.25335, "grad_norm": 5.5056471824646, "learning_rate": 3.7710101010101014e-06, "loss": 6.35367431640625, "step": 25335 }, { "epoch": 0.2534, "grad_norm": 6.372457504272461, "learning_rate": 3.770757575757576e-06, "loss": 6.29835090637207, "step": 25340 }, { "epoch": 0.25345, "grad_norm": 9.095788955688477, "learning_rate": 3.7705050505050506e-06, "loss": 6.347739028930664, "step": 25345 }, { "epoch": 0.2535, "grad_norm": 4.10211706161499, "learning_rate": 3.7702525252525257e-06, "loss": 6.294282531738281, "step": 25350 }, { "epoch": 0.25355, "grad_norm": 5.600131988525391, "learning_rate": 3.7700000000000003e-06, "loss": 6.410514831542969, "step": 25355 }, { "epoch": 0.2536, "grad_norm": 6.060492992401123, "learning_rate": 3.769747474747475e-06, "loss": 6.307199096679687, "step": 25360 }, { "epoch": 0.25365, "grad_norm": 6.697558403015137, "learning_rate": 3.7694949494949496e-06, "loss": 6.313204193115235, "step": 25365 }, { "epoch": 0.2537, "grad_norm": 6.430440902709961, "learning_rate": 3.7692424242424246e-06, "loss": 6.369896697998047, "step": 25370 }, { "epoch": 0.25375, "grad_norm": 4.880927085876465, "learning_rate": 3.7689898989898993e-06, "loss": 6.275716018676758, "step": 25375 }, { "epoch": 0.2538, "grad_norm": 24.3110408782959, "learning_rate": 3.768737373737374e-06, "loss": 6.441619873046875, "step": 25380 }, { "epoch": 0.25385, "grad_norm": 3.379992723464966, "learning_rate": 3.7684848484848485e-06, "loss": 6.298157501220703, "step": 25385 }, { "epoch": 0.2539, "grad_norm": 6.328636646270752, "learning_rate": 3.7682323232323236e-06, "loss": 6.293209838867187, "step": 25390 }, { "epoch": 0.25395, "grad_norm": 5.421080589294434, "learning_rate": 3.7679797979797982e-06, "loss": 6.274344635009766, "step": 25395 }, { "epoch": 0.254, "grad_norm": 4.121160507202148, "learning_rate": 3.767727272727273e-06, "loss": 6.432330322265625, "step": 25400 }, { "epoch": 0.25405, "grad_norm": 5.689609527587891, "learning_rate": 3.7674747474747475e-06, "loss": 6.461353302001953, "step": 25405 }, { "epoch": 0.2541, "grad_norm": 5.738166809082031, "learning_rate": 3.7672222222222225e-06, "loss": 6.279194641113281, "step": 25410 }, { "epoch": 0.25415, "grad_norm": 5.494600772857666, "learning_rate": 3.766969696969697e-06, "loss": 6.283211135864258, "step": 25415 }, { "epoch": 0.2542, "grad_norm": 4.94999361038208, "learning_rate": 3.766717171717172e-06, "loss": 6.406076812744141, "step": 25420 }, { "epoch": 0.25425, "grad_norm": 8.06949234008789, "learning_rate": 3.766464646464647e-06, "loss": 6.356209182739258, "step": 25425 }, { "epoch": 0.2543, "grad_norm": 4.0149407386779785, "learning_rate": 3.7662121212121215e-06, "loss": 6.312895584106445, "step": 25430 }, { "epoch": 0.25435, "grad_norm": 5.239719867706299, "learning_rate": 3.765959595959596e-06, "loss": 6.650364685058594, "step": 25435 }, { "epoch": 0.2544, "grad_norm": 5.239631175994873, "learning_rate": 3.7657070707070707e-06, "loss": 6.315817260742188, "step": 25440 }, { "epoch": 0.25445, "grad_norm": 6.933191299438477, "learning_rate": 3.765454545454546e-06, "loss": 6.301728057861328, "step": 25445 }, { "epoch": 0.2545, "grad_norm": 7.270870685577393, "learning_rate": 3.765202020202021e-06, "loss": 6.2948753356933596, "step": 25450 }, { "epoch": 0.25455, "grad_norm": 6.023252964019775, "learning_rate": 3.764949494949495e-06, "loss": 6.286990356445313, "step": 25455 }, { "epoch": 0.2546, "grad_norm": 21.820240020751953, "learning_rate": 3.7646969696969697e-06, "loss": 6.334384918212891, "step": 25460 }, { "epoch": 0.25465, "grad_norm": 6.190969944000244, "learning_rate": 3.764444444444445e-06, "loss": 6.3011524200439455, "step": 25465 }, { "epoch": 0.2547, "grad_norm": 4.7140069007873535, "learning_rate": 3.7641919191919198e-06, "loss": 6.247749328613281, "step": 25470 }, { "epoch": 0.25475, "grad_norm": 6.45259428024292, "learning_rate": 3.7639393939393944e-06, "loss": 6.304717636108398, "step": 25475 }, { "epoch": 0.2548, "grad_norm": 5.1916422843933105, "learning_rate": 3.763686868686869e-06, "loss": 6.449502563476562, "step": 25480 }, { "epoch": 0.25485, "grad_norm": 4.448257923126221, "learning_rate": 3.763434343434344e-06, "loss": 6.291581726074218, "step": 25485 }, { "epoch": 0.2549, "grad_norm": 6.921205043792725, "learning_rate": 3.7631818181818187e-06, "loss": 6.417276000976562, "step": 25490 }, { "epoch": 0.25495, "grad_norm": 3.463156223297119, "learning_rate": 3.7629292929292934e-06, "loss": 6.317172622680664, "step": 25495 }, { "epoch": 0.255, "grad_norm": 6.353147506713867, "learning_rate": 3.762676767676768e-06, "loss": 6.332834625244141, "step": 25500 }, { "epoch": 0.25505, "grad_norm": 5.797553539276123, "learning_rate": 3.762424242424243e-06, "loss": 6.34072265625, "step": 25505 }, { "epoch": 0.2551, "grad_norm": 4.203051567077637, "learning_rate": 3.7621717171717177e-06, "loss": 6.363444519042969, "step": 25510 }, { "epoch": 0.25515, "grad_norm": 3.411024808883667, "learning_rate": 3.7619191919191923e-06, "loss": 6.312711334228515, "step": 25515 }, { "epoch": 0.2552, "grad_norm": 4.494279861450195, "learning_rate": 3.761666666666667e-06, "loss": 6.352021026611328, "step": 25520 }, { "epoch": 0.25525, "grad_norm": 4.610098361968994, "learning_rate": 3.761414141414142e-06, "loss": 6.310567474365234, "step": 25525 }, { "epoch": 0.2553, "grad_norm": 6.316253185272217, "learning_rate": 3.7611616161616166e-06, "loss": 6.3118335723876955, "step": 25530 }, { "epoch": 0.25535, "grad_norm": 6.338903903961182, "learning_rate": 3.7609090909090912e-06, "loss": 6.334992218017578, "step": 25535 }, { "epoch": 0.2554, "grad_norm": 6.317355155944824, "learning_rate": 3.760656565656566e-06, "loss": 6.361281204223633, "step": 25540 }, { "epoch": 0.25545, "grad_norm": 3.003718852996826, "learning_rate": 3.760404040404041e-06, "loss": 6.280581283569336, "step": 25545 }, { "epoch": 0.2555, "grad_norm": 5.89451789855957, "learning_rate": 3.7601515151515156e-06, "loss": 6.33014030456543, "step": 25550 }, { "epoch": 0.25555, "grad_norm": 4.448596000671387, "learning_rate": 3.75989898989899e-06, "loss": 6.3578025817871096, "step": 25555 }, { "epoch": 0.2556, "grad_norm": 4.1418657302856445, "learning_rate": 3.759646464646465e-06, "loss": 6.315767288208008, "step": 25560 }, { "epoch": 0.25565, "grad_norm": 5.621753692626953, "learning_rate": 3.75939393939394e-06, "loss": 6.3265380859375, "step": 25565 }, { "epoch": 0.2557, "grad_norm": 4.4001946449279785, "learning_rate": 3.7591414141414145e-06, "loss": 6.356894683837891, "step": 25570 }, { "epoch": 0.25575, "grad_norm": 5.946242332458496, "learning_rate": 3.758888888888889e-06, "loss": 6.29267692565918, "step": 25575 }, { "epoch": 0.2558, "grad_norm": 3.9284374713897705, "learning_rate": 3.7586363636363638e-06, "loss": 6.369844055175781, "step": 25580 }, { "epoch": 0.25585, "grad_norm": 4.874556064605713, "learning_rate": 3.758383838383839e-06, "loss": 6.2970123291015625, "step": 25585 }, { "epoch": 0.2559, "grad_norm": 5.120628833770752, "learning_rate": 3.7581313131313134e-06, "loss": 6.302325820922851, "step": 25590 }, { "epoch": 0.25595, "grad_norm": 5.658938884735107, "learning_rate": 3.757878787878788e-06, "loss": 6.300355529785156, "step": 25595 }, { "epoch": 0.256, "grad_norm": 6.131402969360352, "learning_rate": 3.7576262626262627e-06, "loss": 6.398571395874024, "step": 25600 }, { "epoch": 0.25605, "grad_norm": 7.134830474853516, "learning_rate": 3.7573737373737378e-06, "loss": 6.321735763549805, "step": 25605 }, { "epoch": 0.2561, "grad_norm": 7.279565811157227, "learning_rate": 3.7571212121212124e-06, "loss": 6.317948913574218, "step": 25610 }, { "epoch": 0.25615, "grad_norm": 4.105667591094971, "learning_rate": 3.756868686868687e-06, "loss": 6.355893325805664, "step": 25615 }, { "epoch": 0.2562, "grad_norm": 4.135354518890381, "learning_rate": 3.7566161616161617e-06, "loss": 6.299208068847657, "step": 25620 }, { "epoch": 0.25625, "grad_norm": 5.6695427894592285, "learning_rate": 3.7563636363636367e-06, "loss": 6.388299179077149, "step": 25625 }, { "epoch": 0.2563, "grad_norm": 6.777655124664307, "learning_rate": 3.7561111111111113e-06, "loss": 6.318672180175781, "step": 25630 }, { "epoch": 0.25635, "grad_norm": 11.51413631439209, "learning_rate": 3.755858585858586e-06, "loss": 6.269196701049805, "step": 25635 }, { "epoch": 0.2564, "grad_norm": 8.006168365478516, "learning_rate": 3.7556060606060606e-06, "loss": 6.303530502319336, "step": 25640 }, { "epoch": 0.25645, "grad_norm": 2.778656482696533, "learning_rate": 3.755353535353536e-06, "loss": 6.378502273559571, "step": 25645 }, { "epoch": 0.2565, "grad_norm": 6.352634429931641, "learning_rate": 3.7551010101010103e-06, "loss": 6.328384780883789, "step": 25650 }, { "epoch": 0.25655, "grad_norm": 7.626034736633301, "learning_rate": 3.754848484848485e-06, "loss": 6.2730567932128904, "step": 25655 }, { "epoch": 0.2566, "grad_norm": 5.763443946838379, "learning_rate": 3.7545959595959595e-06, "loss": 6.3103790283203125, "step": 25660 }, { "epoch": 0.25665, "grad_norm": 7.114588737487793, "learning_rate": 3.754343434343435e-06, "loss": 6.321139144897461, "step": 25665 }, { "epoch": 0.2567, "grad_norm": 4.970399379730225, "learning_rate": 3.7540909090909096e-06, "loss": 6.30126724243164, "step": 25670 }, { "epoch": 0.25675, "grad_norm": 15.335805892944336, "learning_rate": 3.753838383838384e-06, "loss": 6.380884170532227, "step": 25675 }, { "epoch": 0.2568, "grad_norm": 5.617876052856445, "learning_rate": 3.7535858585858585e-06, "loss": 6.320364379882813, "step": 25680 }, { "epoch": 0.25685, "grad_norm": 5.104880332946777, "learning_rate": 3.753333333333334e-06, "loss": 6.282043838500977, "step": 25685 }, { "epoch": 0.2569, "grad_norm": 3.9709420204162598, "learning_rate": 3.7530808080808086e-06, "loss": 6.3550865173339846, "step": 25690 }, { "epoch": 0.25695, "grad_norm": 4.676482677459717, "learning_rate": 3.7528282828282832e-06, "loss": 6.314486312866211, "step": 25695 }, { "epoch": 0.257, "grad_norm": 2.948791027069092, "learning_rate": 3.752575757575758e-06, "loss": 6.330905914306641, "step": 25700 }, { "epoch": 0.25705, "grad_norm": 5.080928802490234, "learning_rate": 3.752323232323233e-06, "loss": 6.272143936157226, "step": 25705 }, { "epoch": 0.2571, "grad_norm": 5.137936115264893, "learning_rate": 3.7520707070707075e-06, "loss": 6.2986083984375, "step": 25710 }, { "epoch": 0.25715, "grad_norm": 4.85935640335083, "learning_rate": 3.751818181818182e-06, "loss": 6.309001541137695, "step": 25715 }, { "epoch": 0.2572, "grad_norm": 10.076822280883789, "learning_rate": 3.751565656565657e-06, "loss": 6.393866729736328, "step": 25720 }, { "epoch": 0.25725, "grad_norm": 5.196927070617676, "learning_rate": 3.751313131313132e-06, "loss": 6.283771514892578, "step": 25725 }, { "epoch": 0.2573, "grad_norm": 5.493474006652832, "learning_rate": 3.7510606060606065e-06, "loss": 6.313966369628906, "step": 25730 }, { "epoch": 0.25735, "grad_norm": 3.0006868839263916, "learning_rate": 3.750808080808081e-06, "loss": 6.3156791687011715, "step": 25735 }, { "epoch": 0.2574, "grad_norm": 4.618391513824463, "learning_rate": 3.7505555555555557e-06, "loss": 6.311557388305664, "step": 25740 }, { "epoch": 0.25745, "grad_norm": 6.398454189300537, "learning_rate": 3.750303030303031e-06, "loss": 6.320861053466797, "step": 25745 }, { "epoch": 0.2575, "grad_norm": 3.639242172241211, "learning_rate": 3.7500505050505054e-06, "loss": 6.34509506225586, "step": 25750 }, { "epoch": 0.25755, "grad_norm": 5.7552266120910645, "learning_rate": 3.74979797979798e-06, "loss": 6.316498184204102, "step": 25755 }, { "epoch": 0.2576, "grad_norm": 6.610881328582764, "learning_rate": 3.7495454545454547e-06, "loss": 6.340697479248047, "step": 25760 }, { "epoch": 0.25765, "grad_norm": 7.406063556671143, "learning_rate": 3.7492929292929297e-06, "loss": 6.331300354003906, "step": 25765 }, { "epoch": 0.2577, "grad_norm": 19.54023551940918, "learning_rate": 3.7490404040404044e-06, "loss": 7.775923156738282, "step": 25770 }, { "epoch": 0.25775, "grad_norm": 5.497799873352051, "learning_rate": 3.748787878787879e-06, "loss": 6.349108505249023, "step": 25775 }, { "epoch": 0.2578, "grad_norm": 22.489675521850586, "learning_rate": 3.7485353535353536e-06, "loss": 6.5139213562011715, "step": 25780 }, { "epoch": 0.25785, "grad_norm": 6.94371223449707, "learning_rate": 3.7482828282828287e-06, "loss": 6.384370422363281, "step": 25785 }, { "epoch": 0.2579, "grad_norm": 28.294677734375, "learning_rate": 3.7480303030303033e-06, "loss": 6.2426399230957035, "step": 25790 }, { "epoch": 0.25795, "grad_norm": 7.523863315582275, "learning_rate": 3.747777777777778e-06, "loss": 6.267576599121094, "step": 25795 }, { "epoch": 0.258, "grad_norm": 5.9129414558410645, "learning_rate": 3.7475252525252526e-06, "loss": 6.481067657470703, "step": 25800 }, { "epoch": 0.25805, "grad_norm": 4.752256393432617, "learning_rate": 3.7472727272727276e-06, "loss": 6.270918273925782, "step": 25805 }, { "epoch": 0.2581, "grad_norm": 13.342780113220215, "learning_rate": 3.7470202020202023e-06, "loss": 6.469509887695312, "step": 25810 }, { "epoch": 0.25815, "grad_norm": 5.768407344818115, "learning_rate": 3.746767676767677e-06, "loss": 6.322460556030274, "step": 25815 }, { "epoch": 0.2582, "grad_norm": 4.341750621795654, "learning_rate": 3.7465151515151515e-06, "loss": 6.349857330322266, "step": 25820 }, { "epoch": 0.25825, "grad_norm": 5.218201160430908, "learning_rate": 3.7462626262626266e-06, "loss": 6.312518692016601, "step": 25825 }, { "epoch": 0.2583, "grad_norm": 3.5760719776153564, "learning_rate": 3.746010101010101e-06, "loss": 6.306364059448242, "step": 25830 }, { "epoch": 0.25835, "grad_norm": 5.729518890380859, "learning_rate": 3.745757575757576e-06, "loss": 6.274363708496094, "step": 25835 }, { "epoch": 0.2584, "grad_norm": 4.480441093444824, "learning_rate": 3.745505050505051e-06, "loss": 6.341136169433594, "step": 25840 }, { "epoch": 0.25845, "grad_norm": 6.3472065925598145, "learning_rate": 3.7452525252525255e-06, "loss": 6.340521621704101, "step": 25845 }, { "epoch": 0.2585, "grad_norm": 3.4044299125671387, "learning_rate": 3.745e-06, "loss": 6.288996887207031, "step": 25850 }, { "epoch": 0.25855, "grad_norm": 7.293043613433838, "learning_rate": 3.7447474747474748e-06, "loss": 6.310678863525391, "step": 25855 }, { "epoch": 0.2586, "grad_norm": 4.639132022857666, "learning_rate": 3.7444949494949503e-06, "loss": 6.285670471191406, "step": 25860 }, { "epoch": 0.25865, "grad_norm": 5.799014568328857, "learning_rate": 3.744242424242425e-06, "loss": 6.293280410766601, "step": 25865 }, { "epoch": 0.2587, "grad_norm": 9.18403148651123, "learning_rate": 3.743989898989899e-06, "loss": 6.364139556884766, "step": 25870 }, { "epoch": 0.25875, "grad_norm": 3.7617788314819336, "learning_rate": 3.7437373737373737e-06, "loss": 6.311959457397461, "step": 25875 }, { "epoch": 0.2588, "grad_norm": 7.300887107849121, "learning_rate": 3.743484848484849e-06, "loss": 6.351084899902344, "step": 25880 }, { "epoch": 0.25885, "grad_norm": 6.165378570556641, "learning_rate": 3.743232323232324e-06, "loss": 6.280418395996094, "step": 25885 }, { "epoch": 0.2589, "grad_norm": 4.42415189743042, "learning_rate": 3.7429797979797985e-06, "loss": 6.311539077758789, "step": 25890 }, { "epoch": 0.25895, "grad_norm": 5.727616310119629, "learning_rate": 3.742727272727273e-06, "loss": 6.279382705688477, "step": 25895 }, { "epoch": 0.259, "grad_norm": 6.383045196533203, "learning_rate": 3.742474747474748e-06, "loss": 6.369139862060547, "step": 25900 }, { "epoch": 0.25905, "grad_norm": 4.464376449584961, "learning_rate": 3.7422222222222228e-06, "loss": 6.341421127319336, "step": 25905 }, { "epoch": 0.2591, "grad_norm": 6.632906436920166, "learning_rate": 3.7419696969696974e-06, "loss": 6.2994647979736325, "step": 25910 }, { "epoch": 0.25915, "grad_norm": 6.0817551612854, "learning_rate": 3.741717171717172e-06, "loss": 6.340204238891602, "step": 25915 }, { "epoch": 0.2592, "grad_norm": 6.505630016326904, "learning_rate": 3.741464646464647e-06, "loss": 6.312049102783203, "step": 25920 }, { "epoch": 0.25925, "grad_norm": 6.062469959259033, "learning_rate": 3.7412121212121217e-06, "loss": 6.3038993835449215, "step": 25925 }, { "epoch": 0.2593, "grad_norm": 3.7920961380004883, "learning_rate": 3.7409595959595963e-06, "loss": 6.325644683837891, "step": 25930 }, { "epoch": 0.25935, "grad_norm": 6.971884727478027, "learning_rate": 3.740707070707071e-06, "loss": 6.307147598266601, "step": 25935 }, { "epoch": 0.2594, "grad_norm": 8.16590404510498, "learning_rate": 3.740454545454546e-06, "loss": 6.349484252929687, "step": 25940 }, { "epoch": 0.25945, "grad_norm": 4.030996322631836, "learning_rate": 3.7402020202020207e-06, "loss": 6.321955871582031, "step": 25945 }, { "epoch": 0.2595, "grad_norm": 4.152819633483887, "learning_rate": 3.7399494949494953e-06, "loss": 6.338518905639648, "step": 25950 }, { "epoch": 0.25955, "grad_norm": 6.071410655975342, "learning_rate": 3.73969696969697e-06, "loss": 6.300592422485352, "step": 25955 }, { "epoch": 0.2596, "grad_norm": 3.4485857486724854, "learning_rate": 3.739444444444445e-06, "loss": 6.290352630615234, "step": 25960 }, { "epoch": 0.25965, "grad_norm": 5.505067348480225, "learning_rate": 3.7391919191919196e-06, "loss": 6.327737808227539, "step": 25965 }, { "epoch": 0.2597, "grad_norm": 19.289127349853516, "learning_rate": 3.7389393939393942e-06, "loss": 6.577301788330078, "step": 25970 }, { "epoch": 0.25975, "grad_norm": 8.704923629760742, "learning_rate": 3.738686868686869e-06, "loss": 6.458626556396484, "step": 25975 }, { "epoch": 0.2598, "grad_norm": 4.893348693847656, "learning_rate": 3.738434343434344e-06, "loss": 6.3094535827636715, "step": 25980 }, { "epoch": 0.25985, "grad_norm": 3.9374477863311768, "learning_rate": 3.7381818181818185e-06, "loss": 6.3080909729003904, "step": 25985 }, { "epoch": 0.2599, "grad_norm": 5.476983547210693, "learning_rate": 3.737929292929293e-06, "loss": 6.241751861572266, "step": 25990 }, { "epoch": 0.25995, "grad_norm": 5.592909336090088, "learning_rate": 3.737676767676768e-06, "loss": 6.345246505737305, "step": 25995 }, { "epoch": 0.26, "grad_norm": 6.048604965209961, "learning_rate": 3.737424242424243e-06, "loss": 6.303387069702149, "step": 26000 }, { "epoch": 0.26005, "grad_norm": 7.592295169830322, "learning_rate": 3.7371717171717175e-06, "loss": 6.321836853027344, "step": 26005 }, { "epoch": 0.2601, "grad_norm": 13.155803680419922, "learning_rate": 3.736919191919192e-06, "loss": 6.378254699707031, "step": 26010 }, { "epoch": 0.26015, "grad_norm": 4.8016133308410645, "learning_rate": 3.7366666666666667e-06, "loss": 6.357674789428711, "step": 26015 }, { "epoch": 0.2602, "grad_norm": 7.2934346199035645, "learning_rate": 3.736414141414142e-06, "loss": 6.379250717163086, "step": 26020 }, { "epoch": 0.26025, "grad_norm": 7.6341681480407715, "learning_rate": 3.7361616161616164e-06, "loss": 6.316954803466797, "step": 26025 }, { "epoch": 0.2603, "grad_norm": 6.500908851623535, "learning_rate": 3.735909090909091e-06, "loss": 6.31068000793457, "step": 26030 }, { "epoch": 0.26035, "grad_norm": 10.253875732421875, "learning_rate": 3.7356565656565657e-06, "loss": 6.368290710449219, "step": 26035 }, { "epoch": 0.2604, "grad_norm": 5.357359409332275, "learning_rate": 3.7354040404040407e-06, "loss": 6.363461685180664, "step": 26040 }, { "epoch": 0.26045, "grad_norm": 4.9127278327941895, "learning_rate": 3.7351515151515154e-06, "loss": 6.3103893280029295, "step": 26045 }, { "epoch": 0.2605, "grad_norm": 3.805856227874756, "learning_rate": 3.73489898989899e-06, "loss": 6.255103302001953, "step": 26050 }, { "epoch": 0.26055, "grad_norm": 6.294460296630859, "learning_rate": 3.7346464646464646e-06, "loss": 6.222151184082032, "step": 26055 }, { "epoch": 0.2606, "grad_norm": 5.851790428161621, "learning_rate": 3.73439393939394e-06, "loss": 6.358969497680664, "step": 26060 }, { "epoch": 0.26065, "grad_norm": 4.111115455627441, "learning_rate": 3.7341414141414143e-06, "loss": 6.302725982666016, "step": 26065 }, { "epoch": 0.2607, "grad_norm": 3.745502233505249, "learning_rate": 3.733888888888889e-06, "loss": 6.277087020874023, "step": 26070 }, { "epoch": 0.26075, "grad_norm": 4.239696502685547, "learning_rate": 3.7336363636363636e-06, "loss": 6.2929542541503904, "step": 26075 }, { "epoch": 0.2608, "grad_norm": 5.222475051879883, "learning_rate": 3.733383838383839e-06, "loss": 6.278980255126953, "step": 26080 }, { "epoch": 0.26085, "grad_norm": 3.807363986968994, "learning_rate": 3.7331313131313137e-06, "loss": 6.3698570251464846, "step": 26085 }, { "epoch": 0.2609, "grad_norm": 3.5199828147888184, "learning_rate": 3.732878787878788e-06, "loss": 6.369163513183594, "step": 26090 }, { "epoch": 0.26095, "grad_norm": 3.936950922012329, "learning_rate": 3.7326262626262625e-06, "loss": 6.277021789550782, "step": 26095 }, { "epoch": 0.261, "grad_norm": 5.894335746765137, "learning_rate": 3.732373737373738e-06, "loss": 6.303717422485351, "step": 26100 }, { "epoch": 0.26105, "grad_norm": 4.626053810119629, "learning_rate": 3.7321212121212126e-06, "loss": 6.30285873413086, "step": 26105 }, { "epoch": 0.2611, "grad_norm": 6.459420680999756, "learning_rate": 3.7318686868686873e-06, "loss": 6.346944427490234, "step": 26110 }, { "epoch": 0.26115, "grad_norm": 4.927858829498291, "learning_rate": 3.731616161616162e-06, "loss": 6.299044799804688, "step": 26115 }, { "epoch": 0.2612, "grad_norm": 5.024233341217041, "learning_rate": 3.731363636363637e-06, "loss": 6.313681411743164, "step": 26120 }, { "epoch": 0.26125, "grad_norm": 5.382054805755615, "learning_rate": 3.7311111111111116e-06, "loss": 6.312351226806641, "step": 26125 }, { "epoch": 0.2613, "grad_norm": 4.888686656951904, "learning_rate": 3.730858585858586e-06, "loss": 6.301369476318359, "step": 26130 }, { "epoch": 0.26135, "grad_norm": 4.748159408569336, "learning_rate": 3.730606060606061e-06, "loss": 6.323791885375977, "step": 26135 }, { "epoch": 0.2614, "grad_norm": 4.530581474304199, "learning_rate": 3.730353535353536e-06, "loss": 6.3240100860595705, "step": 26140 }, { "epoch": 0.26145, "grad_norm": 8.526449203491211, "learning_rate": 3.7301010101010105e-06, "loss": 6.3054344177246096, "step": 26145 }, { "epoch": 0.2615, "grad_norm": 6.011352062225342, "learning_rate": 3.729848484848485e-06, "loss": 6.288131332397461, "step": 26150 }, { "epoch": 0.26155, "grad_norm": 3.5370843410491943, "learning_rate": 3.7295959595959598e-06, "loss": 6.298888397216797, "step": 26155 }, { "epoch": 0.2616, "grad_norm": 6.116135597229004, "learning_rate": 3.729343434343435e-06, "loss": 6.349218368530273, "step": 26160 }, { "epoch": 0.26165, "grad_norm": 9.984169006347656, "learning_rate": 3.7290909090909095e-06, "loss": 6.626108551025391, "step": 26165 }, { "epoch": 0.2617, "grad_norm": 5.469905853271484, "learning_rate": 3.728838383838384e-06, "loss": 6.323924255371094, "step": 26170 }, { "epoch": 0.26175, "grad_norm": 3.216952085494995, "learning_rate": 3.7285858585858587e-06, "loss": 6.3355766296386715, "step": 26175 }, { "epoch": 0.2618, "grad_norm": 6.09674596786499, "learning_rate": 3.7283333333333338e-06, "loss": 6.317614364624023, "step": 26180 }, { "epoch": 0.26185, "grad_norm": 5.6028313636779785, "learning_rate": 3.7280808080808084e-06, "loss": 6.277299499511718, "step": 26185 }, { "epoch": 0.2619, "grad_norm": 4.124261856079102, "learning_rate": 3.727828282828283e-06, "loss": 6.254676055908203, "step": 26190 }, { "epoch": 0.26195, "grad_norm": 7.0662126541137695, "learning_rate": 3.7275757575757577e-06, "loss": 6.285635375976563, "step": 26195 }, { "epoch": 0.262, "grad_norm": 3.379336357116699, "learning_rate": 3.7273232323232327e-06, "loss": 6.317951965332031, "step": 26200 }, { "epoch": 0.26205, "grad_norm": 4.760306358337402, "learning_rate": 3.7270707070707074e-06, "loss": 6.313139343261719, "step": 26205 }, { "epoch": 0.2621, "grad_norm": 5.957250118255615, "learning_rate": 3.726818181818182e-06, "loss": 6.434622192382813, "step": 26210 }, { "epoch": 0.26215, "grad_norm": 6.194187641143799, "learning_rate": 3.7265656565656566e-06, "loss": 6.335553359985352, "step": 26215 }, { "epoch": 0.2622, "grad_norm": 5.565445423126221, "learning_rate": 3.7263131313131317e-06, "loss": 6.311965560913086, "step": 26220 }, { "epoch": 0.26225, "grad_norm": 4.003701686859131, "learning_rate": 3.7260606060606063e-06, "loss": 6.34698715209961, "step": 26225 }, { "epoch": 0.2623, "grad_norm": 4.141316890716553, "learning_rate": 3.725808080808081e-06, "loss": 6.357235717773437, "step": 26230 }, { "epoch": 0.26235, "grad_norm": 14.1913480758667, "learning_rate": 3.7255555555555556e-06, "loss": 6.530574035644531, "step": 26235 }, { "epoch": 0.2624, "grad_norm": 6.550050258636475, "learning_rate": 3.7253030303030306e-06, "loss": 6.553728485107422, "step": 26240 }, { "epoch": 0.26245, "grad_norm": 4.129290580749512, "learning_rate": 3.7250505050505052e-06, "loss": 6.1954082489013675, "step": 26245 }, { "epoch": 0.2625, "grad_norm": 4.914117813110352, "learning_rate": 3.72479797979798e-06, "loss": 6.194663238525391, "step": 26250 }, { "epoch": 0.26255, "grad_norm": 7.698709011077881, "learning_rate": 3.7245454545454545e-06, "loss": 6.348603057861328, "step": 26255 }, { "epoch": 0.2626, "grad_norm": 6.306207656860352, "learning_rate": 3.7242929292929296e-06, "loss": 6.334062194824218, "step": 26260 }, { "epoch": 0.26265, "grad_norm": 6.408227443695068, "learning_rate": 3.724040404040404e-06, "loss": 6.345890808105469, "step": 26265 }, { "epoch": 0.2627, "grad_norm": 9.241336822509766, "learning_rate": 3.723787878787879e-06, "loss": 6.296112442016602, "step": 26270 }, { "epoch": 0.26275, "grad_norm": 5.870677471160889, "learning_rate": 3.7235353535353543e-06, "loss": 6.323639297485352, "step": 26275 }, { "epoch": 0.2628, "grad_norm": 3.7651994228363037, "learning_rate": 3.723282828282829e-06, "loss": 6.28508415222168, "step": 26280 }, { "epoch": 0.26285, "grad_norm": 8.727423667907715, "learning_rate": 3.723030303030303e-06, "loss": 6.2695472717285154, "step": 26285 }, { "epoch": 0.2629, "grad_norm": 8.419320106506348, "learning_rate": 3.7227777777777778e-06, "loss": 6.382785797119141, "step": 26290 }, { "epoch": 0.26295, "grad_norm": 5.617575168609619, "learning_rate": 3.7225252525252532e-06, "loss": 6.256048965454101, "step": 26295 }, { "epoch": 0.263, "grad_norm": 4.341620922088623, "learning_rate": 3.722272727272728e-06, "loss": 6.304189300537109, "step": 26300 }, { "epoch": 0.26305, "grad_norm": 3.023475170135498, "learning_rate": 3.7220202020202025e-06, "loss": 6.348501586914063, "step": 26305 }, { "epoch": 0.2631, "grad_norm": 4.962677478790283, "learning_rate": 3.721767676767677e-06, "loss": 6.379773330688477, "step": 26310 }, { "epoch": 0.26315, "grad_norm": 6.683004856109619, "learning_rate": 3.721515151515152e-06, "loss": 6.275565719604492, "step": 26315 }, { "epoch": 0.2632, "grad_norm": 5.569352149963379, "learning_rate": 3.721262626262627e-06, "loss": 6.327439880371093, "step": 26320 }, { "epoch": 0.26325, "grad_norm": 10.31742000579834, "learning_rate": 3.7210101010101014e-06, "loss": 6.309109497070312, "step": 26325 }, { "epoch": 0.2633, "grad_norm": 6.658947944641113, "learning_rate": 3.720757575757576e-06, "loss": 6.272724151611328, "step": 26330 }, { "epoch": 0.26335, "grad_norm": 7.066236972808838, "learning_rate": 3.720505050505051e-06, "loss": 6.2431999206542965, "step": 26335 }, { "epoch": 0.2634, "grad_norm": 3.53053879737854, "learning_rate": 3.7202525252525258e-06, "loss": 6.298480224609375, "step": 26340 }, { "epoch": 0.26345, "grad_norm": 4.227921962738037, "learning_rate": 3.7200000000000004e-06, "loss": 6.295641326904297, "step": 26345 }, { "epoch": 0.2635, "grad_norm": 5.329212188720703, "learning_rate": 3.719747474747475e-06, "loss": 6.239668273925782, "step": 26350 }, { "epoch": 0.26355, "grad_norm": 8.125785827636719, "learning_rate": 3.71949494949495e-06, "loss": 6.27431640625, "step": 26355 }, { "epoch": 0.2636, "grad_norm": 3.123746156692505, "learning_rate": 3.7192424242424247e-06, "loss": 6.278509140014648, "step": 26360 }, { "epoch": 0.26365, "grad_norm": 5.170199871063232, "learning_rate": 3.7189898989898993e-06, "loss": 6.2833503723144535, "step": 26365 }, { "epoch": 0.2637, "grad_norm": 6.96051025390625, "learning_rate": 3.718737373737374e-06, "loss": 6.320729827880859, "step": 26370 }, { "epoch": 0.26375, "grad_norm": 4.483336925506592, "learning_rate": 3.718484848484849e-06, "loss": 6.277563095092773, "step": 26375 }, { "epoch": 0.2638, "grad_norm": 4.463827133178711, "learning_rate": 3.7182323232323236e-06, "loss": 6.298120498657227, "step": 26380 }, { "epoch": 0.26385, "grad_norm": 6.680516719818115, "learning_rate": 3.7179797979797983e-06, "loss": 6.346215057373047, "step": 26385 }, { "epoch": 0.2639, "grad_norm": 8.19997787475586, "learning_rate": 3.717727272727273e-06, "loss": 6.31745719909668, "step": 26390 }, { "epoch": 0.26395, "grad_norm": 8.61323070526123, "learning_rate": 3.717474747474748e-06, "loss": 6.302040481567383, "step": 26395 }, { "epoch": 0.264, "grad_norm": 6.7611308097839355, "learning_rate": 3.7172222222222226e-06, "loss": 6.310557556152344, "step": 26400 }, { "epoch": 0.26405, "grad_norm": 5.070945739746094, "learning_rate": 3.7169696969696972e-06, "loss": 6.312297058105469, "step": 26405 }, { "epoch": 0.2641, "grad_norm": 5.215084075927734, "learning_rate": 3.716717171717172e-06, "loss": 6.345547485351562, "step": 26410 }, { "epoch": 0.26415, "grad_norm": 6.304257392883301, "learning_rate": 3.716464646464647e-06, "loss": 6.274436187744141, "step": 26415 }, { "epoch": 0.2642, "grad_norm": 5.778364658355713, "learning_rate": 3.7162121212121215e-06, "loss": 6.291240310668945, "step": 26420 }, { "epoch": 0.26425, "grad_norm": 7.779139518737793, "learning_rate": 3.715959595959596e-06, "loss": 6.126119995117188, "step": 26425 }, { "epoch": 0.2643, "grad_norm": 14.83737850189209, "learning_rate": 3.7157070707070708e-06, "loss": 6.372323608398437, "step": 26430 }, { "epoch": 0.26435, "grad_norm": 6.235430717468262, "learning_rate": 3.715454545454546e-06, "loss": 6.356795120239258, "step": 26435 }, { "epoch": 0.2644, "grad_norm": 6.155943870544434, "learning_rate": 3.7152020202020205e-06, "loss": 6.356856918334961, "step": 26440 }, { "epoch": 0.26445, "grad_norm": 4.312663555145264, "learning_rate": 3.714949494949495e-06, "loss": 6.3146003723144535, "step": 26445 }, { "epoch": 0.2645, "grad_norm": 4.167872905731201, "learning_rate": 3.7146969696969697e-06, "loss": 6.284971618652344, "step": 26450 }, { "epoch": 0.26455, "grad_norm": 5.625514507293701, "learning_rate": 3.7144444444444448e-06, "loss": 6.309310531616211, "step": 26455 }, { "epoch": 0.2646, "grad_norm": 6.204106330871582, "learning_rate": 3.7141919191919194e-06, "loss": 6.316373443603515, "step": 26460 }, { "epoch": 0.26465, "grad_norm": 4.534530162811279, "learning_rate": 3.713939393939394e-06, "loss": 6.310372161865234, "step": 26465 }, { "epoch": 0.2647, "grad_norm": 9.556028366088867, "learning_rate": 3.7136868686868687e-06, "loss": 6.421902465820312, "step": 26470 }, { "epoch": 0.26475, "grad_norm": 5.625797748565674, "learning_rate": 3.713434343434344e-06, "loss": 6.31466064453125, "step": 26475 }, { "epoch": 0.2648, "grad_norm": 4.716540336608887, "learning_rate": 3.7131818181818184e-06, "loss": 6.277513122558593, "step": 26480 }, { "epoch": 0.26485, "grad_norm": 6.932608127593994, "learning_rate": 3.712929292929293e-06, "loss": 6.288468933105468, "step": 26485 }, { "epoch": 0.2649, "grad_norm": 4.093786716461182, "learning_rate": 3.7126767676767676e-06, "loss": 6.294921875, "step": 26490 }, { "epoch": 0.26495, "grad_norm": 4.0656352043151855, "learning_rate": 3.712424242424243e-06, "loss": 6.466824340820312, "step": 26495 }, { "epoch": 0.265, "grad_norm": 4.660396099090576, "learning_rate": 3.7121717171717177e-06, "loss": 6.340495681762695, "step": 26500 }, { "epoch": 0.26505, "grad_norm": 5.2467041015625, "learning_rate": 3.711919191919192e-06, "loss": 6.352877807617188, "step": 26505 }, { "epoch": 0.2651, "grad_norm": 6.64797830581665, "learning_rate": 3.7116666666666666e-06, "loss": 6.310916137695313, "step": 26510 }, { "epoch": 0.26515, "grad_norm": 7.042019367218018, "learning_rate": 3.711414141414142e-06, "loss": 6.320623779296875, "step": 26515 }, { "epoch": 0.2652, "grad_norm": 4.990765571594238, "learning_rate": 3.7111616161616167e-06, "loss": 6.351276397705078, "step": 26520 }, { "epoch": 0.26525, "grad_norm": 4.41853666305542, "learning_rate": 3.7109090909090913e-06, "loss": 6.348481369018555, "step": 26525 }, { "epoch": 0.2653, "grad_norm": 6.613559722900391, "learning_rate": 3.710656565656566e-06, "loss": 6.358288955688477, "step": 26530 }, { "epoch": 0.26535, "grad_norm": 3.7633843421936035, "learning_rate": 3.710404040404041e-06, "loss": 6.592947387695313, "step": 26535 }, { "epoch": 0.2654, "grad_norm": 28.784074783325195, "learning_rate": 3.7101515151515156e-06, "loss": 6.396603393554687, "step": 26540 }, { "epoch": 0.26545, "grad_norm": 5.612402439117432, "learning_rate": 3.7098989898989902e-06, "loss": 6.314526748657227, "step": 26545 }, { "epoch": 0.2655, "grad_norm": 3.840545177459717, "learning_rate": 3.709646464646465e-06, "loss": 6.299229431152344, "step": 26550 }, { "epoch": 0.26555, "grad_norm": 4.948549747467041, "learning_rate": 3.70939393939394e-06, "loss": 6.3980857849121096, "step": 26555 }, { "epoch": 0.2656, "grad_norm": 5.623737335205078, "learning_rate": 3.7091414141414146e-06, "loss": 6.2890983581542965, "step": 26560 }, { "epoch": 0.26565, "grad_norm": 8.677591323852539, "learning_rate": 3.708888888888889e-06, "loss": 6.221810150146484, "step": 26565 }, { "epoch": 0.2657, "grad_norm": 5.249553680419922, "learning_rate": 3.708636363636364e-06, "loss": 6.267752456665039, "step": 26570 }, { "epoch": 0.26575, "grad_norm": 6.837306022644043, "learning_rate": 3.708383838383839e-06, "loss": 6.326768493652343, "step": 26575 }, { "epoch": 0.2658, "grad_norm": 3.9022114276885986, "learning_rate": 3.7081313131313135e-06, "loss": 6.27919921875, "step": 26580 }, { "epoch": 0.26585, "grad_norm": 5.0991740226745605, "learning_rate": 3.707878787878788e-06, "loss": 6.308753967285156, "step": 26585 }, { "epoch": 0.2659, "grad_norm": 4.755216598510742, "learning_rate": 3.7076262626262628e-06, "loss": 6.309245681762695, "step": 26590 }, { "epoch": 0.26595, "grad_norm": 6.145205974578857, "learning_rate": 3.707373737373738e-06, "loss": 6.303793334960938, "step": 26595 }, { "epoch": 0.266, "grad_norm": 6.579081058502197, "learning_rate": 3.7071212121212124e-06, "loss": 6.380836486816406, "step": 26600 }, { "epoch": 0.26605, "grad_norm": 5.823310375213623, "learning_rate": 3.706868686868687e-06, "loss": 6.327860260009766, "step": 26605 }, { "epoch": 0.2661, "grad_norm": 14.725138664245605, "learning_rate": 3.7066161616161617e-06, "loss": 6.241134262084961, "step": 26610 }, { "epoch": 0.26615, "grad_norm": 6.160394191741943, "learning_rate": 3.7063636363636368e-06, "loss": 6.327823257446289, "step": 26615 }, { "epoch": 0.2662, "grad_norm": 12.070825576782227, "learning_rate": 3.7061111111111114e-06, "loss": 6.3534400939941404, "step": 26620 }, { "epoch": 0.26625, "grad_norm": 9.368671417236328, "learning_rate": 3.705858585858586e-06, "loss": 6.320414733886719, "step": 26625 }, { "epoch": 0.2663, "grad_norm": 5.5563645362854, "learning_rate": 3.7056060606060607e-06, "loss": 6.280802917480469, "step": 26630 }, { "epoch": 0.26635, "grad_norm": 6.470174789428711, "learning_rate": 3.7053535353535357e-06, "loss": 6.311470031738281, "step": 26635 }, { "epoch": 0.2664, "grad_norm": 6.211435317993164, "learning_rate": 3.7051010101010103e-06, "loss": 6.321544647216797, "step": 26640 }, { "epoch": 0.26645, "grad_norm": 6.208619594573975, "learning_rate": 3.704848484848485e-06, "loss": 6.288274765014648, "step": 26645 }, { "epoch": 0.2665, "grad_norm": 5.49216890335083, "learning_rate": 3.7045959595959596e-06, "loss": 6.314466094970703, "step": 26650 }, { "epoch": 0.26655, "grad_norm": 4.749322414398193, "learning_rate": 3.7043434343434346e-06, "loss": 6.310703277587891, "step": 26655 }, { "epoch": 0.2666, "grad_norm": 6.292878150939941, "learning_rate": 3.7040909090909093e-06, "loss": 6.256142425537109, "step": 26660 }, { "epoch": 0.26665, "grad_norm": 9.90998363494873, "learning_rate": 3.703838383838384e-06, "loss": 6.328286743164062, "step": 26665 }, { "epoch": 0.2667, "grad_norm": 8.273748397827148, "learning_rate": 3.7035858585858585e-06, "loss": 6.391135406494141, "step": 26670 }, { "epoch": 0.26675, "grad_norm": 7.825234413146973, "learning_rate": 3.7033333333333336e-06, "loss": 6.330204010009766, "step": 26675 }, { "epoch": 0.2668, "grad_norm": 6.633081436157227, "learning_rate": 3.7030808080808082e-06, "loss": 6.2249290466308596, "step": 26680 }, { "epoch": 0.26685, "grad_norm": 6.235692024230957, "learning_rate": 3.702828282828283e-06, "loss": 6.296071624755859, "step": 26685 }, { "epoch": 0.2669, "grad_norm": 2.764573574066162, "learning_rate": 3.7025757575757583e-06, "loss": 6.312575149536133, "step": 26690 }, { "epoch": 0.26695, "grad_norm": 24.203401565551758, "learning_rate": 3.702323232323233e-06, "loss": 5.779959869384766, "step": 26695 }, { "epoch": 0.267, "grad_norm": 6.252912521362305, "learning_rate": 3.702070707070707e-06, "loss": 6.187589645385742, "step": 26700 }, { "epoch": 0.26705, "grad_norm": 5.7782673835754395, "learning_rate": 3.701818181818182e-06, "loss": 6.290151214599609, "step": 26705 }, { "epoch": 0.2671, "grad_norm": 4.44142484664917, "learning_rate": 3.7015656565656573e-06, "loss": 6.308514404296875, "step": 26710 }, { "epoch": 0.26715, "grad_norm": 3.782618284225464, "learning_rate": 3.701313131313132e-06, "loss": 6.288658905029297, "step": 26715 }, { "epoch": 0.2672, "grad_norm": 4.4122090339660645, "learning_rate": 3.7010606060606065e-06, "loss": 6.301872253417969, "step": 26720 }, { "epoch": 0.26725, "grad_norm": 5.271899223327637, "learning_rate": 3.700808080808081e-06, "loss": 6.352619171142578, "step": 26725 }, { "epoch": 0.2673, "grad_norm": 9.056796073913574, "learning_rate": 3.7005555555555562e-06, "loss": 6.293643951416016, "step": 26730 }, { "epoch": 0.26735, "grad_norm": 9.25720500946045, "learning_rate": 3.700303030303031e-06, "loss": 6.19014892578125, "step": 26735 }, { "epoch": 0.2674, "grad_norm": 6.81941032409668, "learning_rate": 3.7000505050505055e-06, "loss": 6.31116943359375, "step": 26740 }, { "epoch": 0.26745, "grad_norm": 6.528906345367432, "learning_rate": 3.69979797979798e-06, "loss": 6.340464782714844, "step": 26745 }, { "epoch": 0.2675, "grad_norm": 6.670742034912109, "learning_rate": 3.699545454545455e-06, "loss": 6.305242919921875, "step": 26750 }, { "epoch": 0.26755, "grad_norm": 5.823094844818115, "learning_rate": 3.69929292929293e-06, "loss": 6.354362869262696, "step": 26755 }, { "epoch": 0.2676, "grad_norm": 4.012843608856201, "learning_rate": 3.6990404040404044e-06, "loss": 6.304963684082031, "step": 26760 }, { "epoch": 0.26765, "grad_norm": 3.887054204940796, "learning_rate": 3.698787878787879e-06, "loss": 6.309297180175781, "step": 26765 }, { "epoch": 0.2677, "grad_norm": 6.6337738037109375, "learning_rate": 3.698535353535354e-06, "loss": 6.304352569580078, "step": 26770 }, { "epoch": 0.26775, "grad_norm": 5.795122146606445, "learning_rate": 3.6982828282828287e-06, "loss": 6.2976325988769535, "step": 26775 }, { "epoch": 0.2678, "grad_norm": 7.247713565826416, "learning_rate": 3.6980303030303034e-06, "loss": 6.309531402587891, "step": 26780 }, { "epoch": 0.26785, "grad_norm": 3.1448569297790527, "learning_rate": 3.697777777777778e-06, "loss": 6.322195434570313, "step": 26785 }, { "epoch": 0.2679, "grad_norm": 4.731367111206055, "learning_rate": 3.697525252525253e-06, "loss": 6.333831024169922, "step": 26790 }, { "epoch": 0.26795, "grad_norm": 5.314652442932129, "learning_rate": 3.6972727272727277e-06, "loss": 6.33575210571289, "step": 26795 }, { "epoch": 0.268, "grad_norm": 6.6948628425598145, "learning_rate": 3.6970202020202023e-06, "loss": 6.286165618896485, "step": 26800 }, { "epoch": 0.26805, "grad_norm": 4.824985027313232, "learning_rate": 3.696767676767677e-06, "loss": 6.4542091369628904, "step": 26805 }, { "epoch": 0.2681, "grad_norm": 24.83047866821289, "learning_rate": 3.696515151515152e-06, "loss": 6.429389190673828, "step": 26810 }, { "epoch": 0.26815, "grad_norm": 3.8253021240234375, "learning_rate": 3.6962626262626266e-06, "loss": 6.41650619506836, "step": 26815 }, { "epoch": 0.2682, "grad_norm": 5.6358842849731445, "learning_rate": 3.6960101010101013e-06, "loss": 6.283239364624023, "step": 26820 }, { "epoch": 0.26825, "grad_norm": 5.373743534088135, "learning_rate": 3.695757575757576e-06, "loss": 6.298413467407227, "step": 26825 }, { "epoch": 0.2683, "grad_norm": 6.3748908042907715, "learning_rate": 3.695505050505051e-06, "loss": 6.386786651611328, "step": 26830 }, { "epoch": 0.26835, "grad_norm": 10.69896411895752, "learning_rate": 3.6952525252525256e-06, "loss": 6.309291839599609, "step": 26835 }, { "epoch": 0.2684, "grad_norm": 8.072282791137695, "learning_rate": 3.695e-06, "loss": 6.316229248046875, "step": 26840 }, { "epoch": 0.26845, "grad_norm": 3.893014907836914, "learning_rate": 3.694747474747475e-06, "loss": 6.292485046386719, "step": 26845 }, { "epoch": 0.2685, "grad_norm": 5.410457611083984, "learning_rate": 3.69449494949495e-06, "loss": 6.35026741027832, "step": 26850 }, { "epoch": 0.26855, "grad_norm": 4.644840240478516, "learning_rate": 3.6942424242424245e-06, "loss": 6.29133415222168, "step": 26855 }, { "epoch": 0.2686, "grad_norm": 3.826502561569214, "learning_rate": 3.693989898989899e-06, "loss": 6.323394012451172, "step": 26860 }, { "epoch": 0.26865, "grad_norm": 4.887697219848633, "learning_rate": 3.6937373737373738e-06, "loss": 6.293112945556641, "step": 26865 }, { "epoch": 0.2687, "grad_norm": 7.097919464111328, "learning_rate": 3.693484848484849e-06, "loss": 6.375573348999024, "step": 26870 }, { "epoch": 0.26875, "grad_norm": 5.015824317932129, "learning_rate": 3.6932323232323235e-06, "loss": 6.31727180480957, "step": 26875 }, { "epoch": 0.2688, "grad_norm": 15.25874137878418, "learning_rate": 3.692979797979798e-06, "loss": 6.388730239868164, "step": 26880 }, { "epoch": 0.26885, "grad_norm": 5.598916053771973, "learning_rate": 3.6927272727272727e-06, "loss": 6.4653167724609375, "step": 26885 }, { "epoch": 0.2689, "grad_norm": 7.2569169998168945, "learning_rate": 3.692474747474748e-06, "loss": 6.320243835449219, "step": 26890 }, { "epoch": 0.26895, "grad_norm": 4.8509697914123535, "learning_rate": 3.6922222222222224e-06, "loss": 6.320016860961914, "step": 26895 }, { "epoch": 0.269, "grad_norm": 5.818135738372803, "learning_rate": 3.691969696969697e-06, "loss": 6.3122001647949215, "step": 26900 }, { "epoch": 0.26905, "grad_norm": 5.155037879943848, "learning_rate": 3.6917171717171717e-06, "loss": 6.285564041137695, "step": 26905 }, { "epoch": 0.2691, "grad_norm": 4.966747760772705, "learning_rate": 3.691464646464647e-06, "loss": 6.344732284545898, "step": 26910 }, { "epoch": 0.26915, "grad_norm": 3.5271859169006348, "learning_rate": 3.6912121212121218e-06, "loss": 6.29732666015625, "step": 26915 }, { "epoch": 0.2692, "grad_norm": 3.3971197605133057, "learning_rate": 3.6909595959595964e-06, "loss": 6.478607177734375, "step": 26920 }, { "epoch": 0.26925, "grad_norm": 3.548765182495117, "learning_rate": 3.6907070707070706e-06, "loss": 6.427531433105469, "step": 26925 }, { "epoch": 0.2693, "grad_norm": 6.163392066955566, "learning_rate": 3.690454545454546e-06, "loss": 6.326008987426758, "step": 26930 }, { "epoch": 0.26935, "grad_norm": 9.534852981567383, "learning_rate": 3.6902020202020207e-06, "loss": 6.312456512451172, "step": 26935 }, { "epoch": 0.2694, "grad_norm": 5.93145227432251, "learning_rate": 3.6899494949494953e-06, "loss": 6.4287872314453125, "step": 26940 }, { "epoch": 0.26945, "grad_norm": 6.5361008644104, "learning_rate": 3.68969696969697e-06, "loss": 6.198099517822266, "step": 26945 }, { "epoch": 0.2695, "grad_norm": 3.01558256149292, "learning_rate": 3.689444444444445e-06, "loss": 6.331450271606445, "step": 26950 }, { "epoch": 0.26955, "grad_norm": 7.183080196380615, "learning_rate": 3.6891919191919197e-06, "loss": 6.267487716674805, "step": 26955 }, { "epoch": 0.2696, "grad_norm": 5.281537055969238, "learning_rate": 3.6889393939393943e-06, "loss": 6.286395263671875, "step": 26960 }, { "epoch": 0.26965, "grad_norm": 8.85568618774414, "learning_rate": 3.688686868686869e-06, "loss": 6.417039489746093, "step": 26965 }, { "epoch": 0.2697, "grad_norm": 4.938319683074951, "learning_rate": 3.688434343434344e-06, "loss": 6.321633529663086, "step": 26970 }, { "epoch": 0.26975, "grad_norm": 4.457610130310059, "learning_rate": 3.6881818181818186e-06, "loss": 6.302154922485352, "step": 26975 }, { "epoch": 0.2698, "grad_norm": 11.88290786743164, "learning_rate": 3.6879292929292932e-06, "loss": 6.240940093994141, "step": 26980 }, { "epoch": 0.26985, "grad_norm": 4.763050079345703, "learning_rate": 3.687676767676768e-06, "loss": 6.443199157714844, "step": 26985 }, { "epoch": 0.2699, "grad_norm": 6.093479156494141, "learning_rate": 3.687424242424243e-06, "loss": 6.3075920104980465, "step": 26990 }, { "epoch": 0.26995, "grad_norm": 6.936769008636475, "learning_rate": 3.6871717171717175e-06, "loss": 6.2605140686035154, "step": 26995 }, { "epoch": 0.27, "grad_norm": 6.309386730194092, "learning_rate": 3.686919191919192e-06, "loss": 6.352606201171875, "step": 27000 }, { "epoch": 0.27005, "grad_norm": 3.5648856163024902, "learning_rate": 3.686666666666667e-06, "loss": 6.348497009277343, "step": 27005 }, { "epoch": 0.2701, "grad_norm": 6.490140914916992, "learning_rate": 3.686414141414142e-06, "loss": 6.304312896728516, "step": 27010 }, { "epoch": 0.27015, "grad_norm": 7.55325984954834, "learning_rate": 3.6861616161616165e-06, "loss": 6.312275695800781, "step": 27015 }, { "epoch": 0.2702, "grad_norm": 5.265467643737793, "learning_rate": 3.685909090909091e-06, "loss": 6.306962966918945, "step": 27020 }, { "epoch": 0.27025, "grad_norm": 8.939051628112793, "learning_rate": 3.6856565656565657e-06, "loss": 6.299648284912109, "step": 27025 }, { "epoch": 0.2703, "grad_norm": 4.23619270324707, "learning_rate": 3.685404040404041e-06, "loss": 6.301703262329101, "step": 27030 }, { "epoch": 0.27035, "grad_norm": 3.9826061725616455, "learning_rate": 3.6851515151515154e-06, "loss": 6.297209930419922, "step": 27035 }, { "epoch": 0.2704, "grad_norm": 3.513404130935669, "learning_rate": 3.68489898989899e-06, "loss": 6.337911605834961, "step": 27040 }, { "epoch": 0.27045, "grad_norm": 5.519970417022705, "learning_rate": 3.6846464646464647e-06, "loss": 6.3209583282470705, "step": 27045 }, { "epoch": 0.2705, "grad_norm": 4.103199005126953, "learning_rate": 3.6843939393939397e-06, "loss": 6.324940872192383, "step": 27050 }, { "epoch": 0.27055, "grad_norm": 3.660179853439331, "learning_rate": 3.6841414141414144e-06, "loss": 6.286650848388672, "step": 27055 }, { "epoch": 0.2706, "grad_norm": 17.357694625854492, "learning_rate": 3.683888888888889e-06, "loss": 6.414802551269531, "step": 27060 }, { "epoch": 0.27065, "grad_norm": 17.362144470214844, "learning_rate": 3.6836363636363636e-06, "loss": 6.405165100097657, "step": 27065 }, { "epoch": 0.2707, "grad_norm": 5.589799404144287, "learning_rate": 3.6833838383838387e-06, "loss": 6.462714385986328, "step": 27070 }, { "epoch": 0.27075, "grad_norm": 6.704919815063477, "learning_rate": 3.6831313131313133e-06, "loss": 6.357617950439453, "step": 27075 }, { "epoch": 0.2708, "grad_norm": 3.599680185317993, "learning_rate": 3.682878787878788e-06, "loss": 6.330355834960938, "step": 27080 }, { "epoch": 0.27085, "grad_norm": 6.037678241729736, "learning_rate": 3.6826262626262626e-06, "loss": 6.3815162658691404, "step": 27085 }, { "epoch": 0.2709, "grad_norm": 5.9402241706848145, "learning_rate": 3.6823737373737376e-06, "loss": 6.327337265014648, "step": 27090 }, { "epoch": 0.27095, "grad_norm": 13.197674751281738, "learning_rate": 3.6821212121212123e-06, "loss": 6.375093078613281, "step": 27095 }, { "epoch": 0.271, "grad_norm": 6.832035064697266, "learning_rate": 3.681868686868687e-06, "loss": 6.369658660888672, "step": 27100 }, { "epoch": 0.27105, "grad_norm": 7.220258712768555, "learning_rate": 3.6816161616161615e-06, "loss": 6.330833435058594, "step": 27105 }, { "epoch": 0.2711, "grad_norm": 6.748472213745117, "learning_rate": 3.681363636363637e-06, "loss": 6.339237976074219, "step": 27110 }, { "epoch": 0.27115, "grad_norm": 4.736085414886475, "learning_rate": 3.681111111111111e-06, "loss": 6.2969623565673825, "step": 27115 }, { "epoch": 0.2712, "grad_norm": 6.751535415649414, "learning_rate": 3.680858585858586e-06, "loss": 6.314698791503906, "step": 27120 }, { "epoch": 0.27125, "grad_norm": 4.617252826690674, "learning_rate": 3.6806060606060613e-06, "loss": 6.300072097778321, "step": 27125 }, { "epoch": 0.2713, "grad_norm": 4.715736389160156, "learning_rate": 3.680353535353536e-06, "loss": 6.357831192016602, "step": 27130 }, { "epoch": 0.27135, "grad_norm": 7.623340129852295, "learning_rate": 3.6801010101010106e-06, "loss": 6.2798583984375, "step": 27135 }, { "epoch": 0.2714, "grad_norm": 14.752669334411621, "learning_rate": 3.679848484848485e-06, "loss": 6.400682067871093, "step": 27140 }, { "epoch": 0.27145, "grad_norm": 17.989601135253906, "learning_rate": 3.6795959595959603e-06, "loss": 6.633216857910156, "step": 27145 }, { "epoch": 0.2715, "grad_norm": 5.985446929931641, "learning_rate": 3.679343434343435e-06, "loss": 6.324993896484375, "step": 27150 }, { "epoch": 0.27155, "grad_norm": 6.010487079620361, "learning_rate": 3.6790909090909095e-06, "loss": 6.329694747924805, "step": 27155 }, { "epoch": 0.2716, "grad_norm": 3.7532799243927, "learning_rate": 3.678838383838384e-06, "loss": 6.333596420288086, "step": 27160 }, { "epoch": 0.27165, "grad_norm": 6.736462116241455, "learning_rate": 3.678585858585859e-06, "loss": 6.305368804931641, "step": 27165 }, { "epoch": 0.2717, "grad_norm": 4.241910457611084, "learning_rate": 3.678333333333334e-06, "loss": 6.327504730224609, "step": 27170 }, { "epoch": 0.27175, "grad_norm": 4.362829685211182, "learning_rate": 3.6780808080808085e-06, "loss": 6.343601989746094, "step": 27175 }, { "epoch": 0.2718, "grad_norm": 8.831284523010254, "learning_rate": 3.677828282828283e-06, "loss": 6.304806518554687, "step": 27180 }, { "epoch": 0.27185, "grad_norm": 15.093597412109375, "learning_rate": 3.677575757575758e-06, "loss": 6.665618896484375, "step": 27185 }, { "epoch": 0.2719, "grad_norm": 11.941668510437012, "learning_rate": 3.6773232323232328e-06, "loss": 6.458573913574218, "step": 27190 }, { "epoch": 0.27195, "grad_norm": 5.2044854164123535, "learning_rate": 3.6770707070707074e-06, "loss": 6.359412384033203, "step": 27195 }, { "epoch": 0.272, "grad_norm": 3.6306865215301514, "learning_rate": 3.676818181818182e-06, "loss": 6.284060668945313, "step": 27200 }, { "epoch": 0.27205, "grad_norm": 6.809463024139404, "learning_rate": 3.676565656565657e-06, "loss": 6.331309509277344, "step": 27205 }, { "epoch": 0.2721, "grad_norm": 3.6361868381500244, "learning_rate": 3.6763131313131317e-06, "loss": 6.301479339599609, "step": 27210 }, { "epoch": 0.27215, "grad_norm": 8.436450004577637, "learning_rate": 3.6760606060606064e-06, "loss": 6.403886413574218, "step": 27215 }, { "epoch": 0.2722, "grad_norm": 17.354646682739258, "learning_rate": 3.675808080808081e-06, "loss": 6.68623046875, "step": 27220 }, { "epoch": 0.27225, "grad_norm": 8.736088752746582, "learning_rate": 3.675555555555556e-06, "loss": 6.348749160766602, "step": 27225 }, { "epoch": 0.2723, "grad_norm": 4.222883224487305, "learning_rate": 3.6753030303030307e-06, "loss": 6.348350906372071, "step": 27230 }, { "epoch": 0.27235, "grad_norm": 9.181174278259277, "learning_rate": 3.6750505050505053e-06, "loss": 6.275466537475586, "step": 27235 }, { "epoch": 0.2724, "grad_norm": 5.909955978393555, "learning_rate": 3.67479797979798e-06, "loss": 6.005701065063477, "step": 27240 }, { "epoch": 0.27245, "grad_norm": 6.051238536834717, "learning_rate": 3.674545454545455e-06, "loss": 6.349235153198242, "step": 27245 }, { "epoch": 0.2725, "grad_norm": 6.935568809509277, "learning_rate": 3.6742929292929296e-06, "loss": 6.396284484863282, "step": 27250 }, { "epoch": 0.27255, "grad_norm": 5.963278770446777, "learning_rate": 3.6740404040404042e-06, "loss": 6.305793380737304, "step": 27255 }, { "epoch": 0.2726, "grad_norm": 4.418926239013672, "learning_rate": 3.673787878787879e-06, "loss": 6.29881706237793, "step": 27260 }, { "epoch": 0.27265, "grad_norm": 6.743359088897705, "learning_rate": 3.673535353535354e-06, "loss": 6.300883483886719, "step": 27265 }, { "epoch": 0.2727, "grad_norm": 3.3065185546875, "learning_rate": 3.6732828282828286e-06, "loss": 6.239302062988282, "step": 27270 }, { "epoch": 0.27275, "grad_norm": 4.624436855316162, "learning_rate": 3.673030303030303e-06, "loss": 6.332311630249023, "step": 27275 }, { "epoch": 0.2728, "grad_norm": 4.648477554321289, "learning_rate": 3.672777777777778e-06, "loss": 6.275871276855469, "step": 27280 }, { "epoch": 0.27285, "grad_norm": 6.098147869110107, "learning_rate": 3.672525252525253e-06, "loss": 6.460188293457032, "step": 27285 }, { "epoch": 0.2729, "grad_norm": 6.516494274139404, "learning_rate": 3.6722727272727275e-06, "loss": 6.323106384277343, "step": 27290 }, { "epoch": 0.27295, "grad_norm": 3.3825085163116455, "learning_rate": 3.672020202020202e-06, "loss": 6.320406341552735, "step": 27295 }, { "epoch": 0.273, "grad_norm": 7.282415866851807, "learning_rate": 3.6717676767676768e-06, "loss": 6.26165771484375, "step": 27300 }, { "epoch": 0.27305, "grad_norm": 5.538201808929443, "learning_rate": 3.6715151515151522e-06, "loss": 6.369514846801758, "step": 27305 }, { "epoch": 0.2731, "grad_norm": 6.222240447998047, "learning_rate": 3.6712626262626264e-06, "loss": 6.334397888183593, "step": 27310 }, { "epoch": 0.27315, "grad_norm": 7.491812705993652, "learning_rate": 3.671010101010101e-06, "loss": 6.30638427734375, "step": 27315 }, { "epoch": 0.2732, "grad_norm": 6.758635520935059, "learning_rate": 3.6707575757575757e-06, "loss": 6.330162429809571, "step": 27320 }, { "epoch": 0.27325, "grad_norm": 5.886263370513916, "learning_rate": 3.670505050505051e-06, "loss": 6.324074935913086, "step": 27325 }, { "epoch": 0.2733, "grad_norm": 4.35609769821167, "learning_rate": 3.670252525252526e-06, "loss": 6.257471084594727, "step": 27330 }, { "epoch": 0.27335, "grad_norm": 5.643250942230225, "learning_rate": 3.6700000000000004e-06, "loss": 6.305144119262695, "step": 27335 }, { "epoch": 0.2734, "grad_norm": 5.410227298736572, "learning_rate": 3.6697474747474746e-06, "loss": 6.31523551940918, "step": 27340 }, { "epoch": 0.27345, "grad_norm": 7.4235029220581055, "learning_rate": 3.66949494949495e-06, "loss": 6.35937614440918, "step": 27345 }, { "epoch": 0.2735, "grad_norm": 3.914283275604248, "learning_rate": 3.6692424242424248e-06, "loss": 6.298578262329102, "step": 27350 }, { "epoch": 0.27355, "grad_norm": 6.989834308624268, "learning_rate": 3.6689898989898994e-06, "loss": 6.280791473388672, "step": 27355 }, { "epoch": 0.2736, "grad_norm": 5.437248706817627, "learning_rate": 3.668737373737374e-06, "loss": 6.327111434936524, "step": 27360 }, { "epoch": 0.27365, "grad_norm": 28.786766052246094, "learning_rate": 3.668484848484849e-06, "loss": 6.578860473632813, "step": 27365 }, { "epoch": 0.2737, "grad_norm": 17.695451736450195, "learning_rate": 3.6682323232323237e-06, "loss": 6.455966186523438, "step": 27370 }, { "epoch": 0.27375, "grad_norm": 15.653703689575195, "learning_rate": 3.6679797979797983e-06, "loss": 6.355830383300781, "step": 27375 }, { "epoch": 0.2738, "grad_norm": 9.790855407714844, "learning_rate": 3.667727272727273e-06, "loss": 6.414237213134766, "step": 27380 }, { "epoch": 0.27385, "grad_norm": 4.400730609893799, "learning_rate": 3.667474747474748e-06, "loss": 6.303078460693359, "step": 27385 }, { "epoch": 0.2739, "grad_norm": 6.707621097564697, "learning_rate": 3.6672222222222226e-06, "loss": 6.285072708129883, "step": 27390 }, { "epoch": 0.27395, "grad_norm": 4.499448776245117, "learning_rate": 3.6669696969696973e-06, "loss": 6.284291458129883, "step": 27395 }, { "epoch": 0.274, "grad_norm": 6.156611442565918, "learning_rate": 3.666717171717172e-06, "loss": 6.325781631469726, "step": 27400 }, { "epoch": 0.27405, "grad_norm": 5.899245738983154, "learning_rate": 3.666464646464647e-06, "loss": 6.271918106079101, "step": 27405 }, { "epoch": 0.2741, "grad_norm": 7.383429050445557, "learning_rate": 3.6662121212121216e-06, "loss": 6.322491455078125, "step": 27410 }, { "epoch": 0.27415, "grad_norm": 4.748988628387451, "learning_rate": 3.6659595959595962e-06, "loss": 6.3604583740234375, "step": 27415 }, { "epoch": 0.2742, "grad_norm": 7.032674789428711, "learning_rate": 3.665707070707071e-06, "loss": 6.308526611328125, "step": 27420 }, { "epoch": 0.27425, "grad_norm": 4.7725324630737305, "learning_rate": 3.665454545454546e-06, "loss": 6.357879638671875, "step": 27425 }, { "epoch": 0.2743, "grad_norm": 3.24104642868042, "learning_rate": 3.6652020202020205e-06, "loss": 6.380682373046875, "step": 27430 }, { "epoch": 0.27435, "grad_norm": 6.934032917022705, "learning_rate": 3.664949494949495e-06, "loss": 6.511369323730468, "step": 27435 }, { "epoch": 0.2744, "grad_norm": 4.539759635925293, "learning_rate": 3.6646969696969698e-06, "loss": 6.3147937774658205, "step": 27440 }, { "epoch": 0.27445, "grad_norm": 5.013272285461426, "learning_rate": 3.664444444444445e-06, "loss": 6.293770217895508, "step": 27445 }, { "epoch": 0.2745, "grad_norm": 6.674352169036865, "learning_rate": 3.6641919191919195e-06, "loss": 6.435519409179688, "step": 27450 }, { "epoch": 0.27455, "grad_norm": 5.441864967346191, "learning_rate": 3.663939393939394e-06, "loss": 6.288307189941406, "step": 27455 }, { "epoch": 0.2746, "grad_norm": 3.34708309173584, "learning_rate": 3.6636868686868687e-06, "loss": 6.292370223999024, "step": 27460 }, { "epoch": 0.27465, "grad_norm": 5.303905010223389, "learning_rate": 3.6634343434343438e-06, "loss": 6.323270416259765, "step": 27465 }, { "epoch": 0.2747, "grad_norm": 5.824272155761719, "learning_rate": 3.6631818181818184e-06, "loss": 6.3546794891357425, "step": 27470 }, { "epoch": 0.27475, "grad_norm": 7.414111137390137, "learning_rate": 3.662929292929293e-06, "loss": 6.3130035400390625, "step": 27475 }, { "epoch": 0.2748, "grad_norm": 3.9218544960021973, "learning_rate": 3.6626767676767677e-06, "loss": 6.299446105957031, "step": 27480 }, { "epoch": 0.27485, "grad_norm": 4.6603007316589355, "learning_rate": 3.6624242424242427e-06, "loss": 6.294777297973633, "step": 27485 }, { "epoch": 0.2749, "grad_norm": 4.896655559539795, "learning_rate": 3.6621717171717174e-06, "loss": 6.293980407714844, "step": 27490 }, { "epoch": 0.27495, "grad_norm": 6.365837574005127, "learning_rate": 3.661919191919192e-06, "loss": 6.297945404052735, "step": 27495 }, { "epoch": 0.275, "grad_norm": 4.755804061889648, "learning_rate": 3.6616666666666666e-06, "loss": 6.289345550537109, "step": 27500 }, { "epoch": 0.27505, "grad_norm": 4.283837795257568, "learning_rate": 3.6614141414141417e-06, "loss": 6.300391387939453, "step": 27505 }, { "epoch": 0.2751, "grad_norm": 5.812557220458984, "learning_rate": 3.6611616161616163e-06, "loss": 6.291962432861328, "step": 27510 }, { "epoch": 0.27515, "grad_norm": 8.087723731994629, "learning_rate": 3.660909090909091e-06, "loss": 6.368450546264649, "step": 27515 }, { "epoch": 0.2752, "grad_norm": 8.5402193069458, "learning_rate": 3.6606565656565656e-06, "loss": 6.306014633178711, "step": 27520 }, { "epoch": 0.27525, "grad_norm": 8.47391414642334, "learning_rate": 3.660404040404041e-06, "loss": 6.327323532104492, "step": 27525 }, { "epoch": 0.2753, "grad_norm": 4.842244625091553, "learning_rate": 3.6601515151515152e-06, "loss": 6.372050476074219, "step": 27530 }, { "epoch": 0.27535, "grad_norm": 5.422746658325195, "learning_rate": 3.65989898989899e-06, "loss": 6.2908882141113285, "step": 27535 }, { "epoch": 0.2754, "grad_norm": 5.98571252822876, "learning_rate": 3.6596464646464645e-06, "loss": 6.328924942016601, "step": 27540 }, { "epoch": 0.27545, "grad_norm": 7.599514007568359, "learning_rate": 3.65939393939394e-06, "loss": 6.33453369140625, "step": 27545 }, { "epoch": 0.2755, "grad_norm": 6.009715557098389, "learning_rate": 3.6591414141414146e-06, "loss": 6.297414779663086, "step": 27550 }, { "epoch": 0.27555, "grad_norm": 4.236227989196777, "learning_rate": 3.6588888888888892e-06, "loss": 6.349770355224609, "step": 27555 }, { "epoch": 0.2756, "grad_norm": 3.1502792835235596, "learning_rate": 3.6586363636363643e-06, "loss": 6.399088668823242, "step": 27560 }, { "epoch": 0.27565, "grad_norm": 6.337276458740234, "learning_rate": 3.658383838383839e-06, "loss": 6.324374008178711, "step": 27565 }, { "epoch": 0.2757, "grad_norm": 6.978740692138672, "learning_rate": 3.6581313131313136e-06, "loss": 6.3005828857421875, "step": 27570 }, { "epoch": 0.27575, "grad_norm": 5.612730026245117, "learning_rate": 3.657878787878788e-06, "loss": 6.282025146484375, "step": 27575 }, { "epoch": 0.2758, "grad_norm": 3.4305179119110107, "learning_rate": 3.6576262626262632e-06, "loss": 6.279350662231446, "step": 27580 }, { "epoch": 0.27585, "grad_norm": 6.836961269378662, "learning_rate": 3.657373737373738e-06, "loss": 6.276103210449219, "step": 27585 }, { "epoch": 0.2759, "grad_norm": 10.697168350219727, "learning_rate": 3.6571212121212125e-06, "loss": 6.441885375976563, "step": 27590 }, { "epoch": 0.27595, "grad_norm": 4.975052356719971, "learning_rate": 3.656868686868687e-06, "loss": 6.239624786376953, "step": 27595 }, { "epoch": 0.276, "grad_norm": 4.282489776611328, "learning_rate": 3.656616161616162e-06, "loss": 6.2851417541503904, "step": 27600 }, { "epoch": 0.27605, "grad_norm": 4.480371952056885, "learning_rate": 3.656363636363637e-06, "loss": 6.381772994995117, "step": 27605 }, { "epoch": 0.2761, "grad_norm": 4.752960681915283, "learning_rate": 3.6561111111111114e-06, "loss": 6.29516487121582, "step": 27610 }, { "epoch": 0.27615, "grad_norm": 6.230968952178955, "learning_rate": 3.655858585858586e-06, "loss": 6.363798522949219, "step": 27615 }, { "epoch": 0.2762, "grad_norm": 5.228012561798096, "learning_rate": 3.655606060606061e-06, "loss": 6.336743927001953, "step": 27620 }, { "epoch": 0.27625, "grad_norm": 3.5650548934936523, "learning_rate": 3.6553535353535358e-06, "loss": 6.33189697265625, "step": 27625 }, { "epoch": 0.2763, "grad_norm": 6.895216464996338, "learning_rate": 3.6551010101010104e-06, "loss": 6.282896041870117, "step": 27630 }, { "epoch": 0.27635, "grad_norm": 7.159496784210205, "learning_rate": 3.654848484848485e-06, "loss": 6.362102508544922, "step": 27635 }, { "epoch": 0.2764, "grad_norm": 5.794086933135986, "learning_rate": 3.65459595959596e-06, "loss": 6.377381896972656, "step": 27640 }, { "epoch": 0.27645, "grad_norm": 5.044839382171631, "learning_rate": 3.6543434343434347e-06, "loss": 6.303004455566406, "step": 27645 }, { "epoch": 0.2765, "grad_norm": 4.638664245605469, "learning_rate": 3.6540909090909093e-06, "loss": 6.554790496826172, "step": 27650 }, { "epoch": 0.27655, "grad_norm": 5.905210494995117, "learning_rate": 3.653838383838384e-06, "loss": 6.3347126007080075, "step": 27655 }, { "epoch": 0.2766, "grad_norm": 6.589792728424072, "learning_rate": 3.653585858585859e-06, "loss": 6.3598388671875, "step": 27660 }, { "epoch": 0.27665, "grad_norm": 4.932498455047607, "learning_rate": 3.6533333333333336e-06, "loss": 6.322650527954101, "step": 27665 }, { "epoch": 0.2767, "grad_norm": 8.342058181762695, "learning_rate": 3.6530808080808083e-06, "loss": 6.460323333740234, "step": 27670 }, { "epoch": 0.27675, "grad_norm": 8.272595405578613, "learning_rate": 3.652828282828283e-06, "loss": 6.284732055664063, "step": 27675 }, { "epoch": 0.2768, "grad_norm": 8.160736083984375, "learning_rate": 3.652575757575758e-06, "loss": 6.28509292602539, "step": 27680 }, { "epoch": 0.27685, "grad_norm": 7.295288562774658, "learning_rate": 3.6523232323232326e-06, "loss": 6.337397766113281, "step": 27685 }, { "epoch": 0.2769, "grad_norm": 7.7256317138671875, "learning_rate": 3.6520707070707072e-06, "loss": 6.329400634765625, "step": 27690 }, { "epoch": 0.27695, "grad_norm": 14.549596786499023, "learning_rate": 3.651818181818182e-06, "loss": 6.340921020507812, "step": 27695 }, { "epoch": 0.277, "grad_norm": 7.941248416900635, "learning_rate": 3.651565656565657e-06, "loss": 6.256224822998047, "step": 27700 }, { "epoch": 0.27705, "grad_norm": 6.222874164581299, "learning_rate": 3.6513131313131315e-06, "loss": 6.284463882446289, "step": 27705 }, { "epoch": 0.2771, "grad_norm": 6.031448841094971, "learning_rate": 3.651060606060606e-06, "loss": 6.308800125122071, "step": 27710 }, { "epoch": 0.27715, "grad_norm": 4.797364711761475, "learning_rate": 3.650808080808081e-06, "loss": 6.244573593139648, "step": 27715 }, { "epoch": 0.2772, "grad_norm": 17.27484893798828, "learning_rate": 3.6505555555555563e-06, "loss": 6.298854064941406, "step": 27720 }, { "epoch": 0.27725, "grad_norm": 7.919289588928223, "learning_rate": 3.6503030303030305e-06, "loss": 6.294458389282227, "step": 27725 }, { "epoch": 0.2773, "grad_norm": 5.620197296142578, "learning_rate": 3.650050505050505e-06, "loss": 6.223772811889648, "step": 27730 }, { "epoch": 0.27735, "grad_norm": 5.687453269958496, "learning_rate": 3.6497979797979797e-06, "loss": 6.307546997070313, "step": 27735 }, { "epoch": 0.2774, "grad_norm": 4.351597309112549, "learning_rate": 3.6495454545454552e-06, "loss": 6.2876018524169925, "step": 27740 }, { "epoch": 0.27745, "grad_norm": 6.214696407318115, "learning_rate": 3.64929292929293e-06, "loss": 6.30900993347168, "step": 27745 }, { "epoch": 0.2775, "grad_norm": 7.783788681030273, "learning_rate": 3.6490404040404045e-06, "loss": 6.5370330810546875, "step": 27750 }, { "epoch": 0.27755, "grad_norm": 6.718545436859131, "learning_rate": 3.6487878787878787e-06, "loss": 6.314834976196289, "step": 27755 }, { "epoch": 0.2776, "grad_norm": 5.24647331237793, "learning_rate": 3.648535353535354e-06, "loss": 6.462430572509765, "step": 27760 }, { "epoch": 0.27765, "grad_norm": 5.680200099945068, "learning_rate": 3.648282828282829e-06, "loss": 6.303385925292969, "step": 27765 }, { "epoch": 0.2777, "grad_norm": 4.463397979736328, "learning_rate": 3.6480303030303034e-06, "loss": 6.303775787353516, "step": 27770 }, { "epoch": 0.27775, "grad_norm": 4.503332614898682, "learning_rate": 3.647777777777778e-06, "loss": 6.334011077880859, "step": 27775 }, { "epoch": 0.2778, "grad_norm": 4.750062942504883, "learning_rate": 3.647525252525253e-06, "loss": 6.3940589904785154, "step": 27780 }, { "epoch": 0.27785, "grad_norm": 3.595973014831543, "learning_rate": 3.6472727272727277e-06, "loss": 6.376918029785156, "step": 27785 }, { "epoch": 0.2779, "grad_norm": 2.8240511417388916, "learning_rate": 3.6470202020202024e-06, "loss": 6.2959953308105465, "step": 27790 }, { "epoch": 0.27795, "grad_norm": 5.838364124298096, "learning_rate": 3.646767676767677e-06, "loss": 6.307080078125, "step": 27795 }, { "epoch": 0.278, "grad_norm": 4.7670440673828125, "learning_rate": 3.646515151515152e-06, "loss": 6.291278839111328, "step": 27800 }, { "epoch": 0.27805, "grad_norm": 4.551059722900391, "learning_rate": 3.6462626262626267e-06, "loss": 6.284486389160156, "step": 27805 }, { "epoch": 0.2781, "grad_norm": 4.2642621994018555, "learning_rate": 3.6460101010101013e-06, "loss": 6.29508056640625, "step": 27810 }, { "epoch": 0.27815, "grad_norm": 6.023734092712402, "learning_rate": 3.645757575757576e-06, "loss": 6.279483795166016, "step": 27815 }, { "epoch": 0.2782, "grad_norm": 5.463079452514648, "learning_rate": 3.645505050505051e-06, "loss": 6.330992126464844, "step": 27820 }, { "epoch": 0.27825, "grad_norm": 3.9083940982818604, "learning_rate": 3.6452525252525256e-06, "loss": 6.3431846618652346, "step": 27825 }, { "epoch": 0.2783, "grad_norm": 6.314789772033691, "learning_rate": 3.6450000000000003e-06, "loss": 6.318592834472656, "step": 27830 }, { "epoch": 0.27835, "grad_norm": 7.027535915374756, "learning_rate": 3.644747474747475e-06, "loss": 6.324528503417969, "step": 27835 }, { "epoch": 0.2784, "grad_norm": 5.595466613769531, "learning_rate": 3.64449494949495e-06, "loss": 6.267961883544922, "step": 27840 }, { "epoch": 0.27845, "grad_norm": 5.740839958190918, "learning_rate": 3.6442424242424246e-06, "loss": 6.3134716033935545, "step": 27845 }, { "epoch": 0.2785, "grad_norm": 8.319234848022461, "learning_rate": 3.643989898989899e-06, "loss": 6.353271102905273, "step": 27850 }, { "epoch": 0.27855, "grad_norm": 12.15185260772705, "learning_rate": 3.643737373737374e-06, "loss": 6.292927551269531, "step": 27855 }, { "epoch": 0.2786, "grad_norm": 2.607407808303833, "learning_rate": 3.643484848484849e-06, "loss": 6.324199676513672, "step": 27860 }, { "epoch": 0.27865, "grad_norm": 5.051003932952881, "learning_rate": 3.6432323232323235e-06, "loss": 6.319622802734375, "step": 27865 }, { "epoch": 0.2787, "grad_norm": 6.675957679748535, "learning_rate": 3.642979797979798e-06, "loss": 6.300960540771484, "step": 27870 }, { "epoch": 0.27875, "grad_norm": 7.717424392700195, "learning_rate": 3.6427272727272728e-06, "loss": 6.31220817565918, "step": 27875 }, { "epoch": 0.2788, "grad_norm": 15.18311882019043, "learning_rate": 3.642474747474748e-06, "loss": 6.534521484375, "step": 27880 }, { "epoch": 0.27885, "grad_norm": 7.362109661102295, "learning_rate": 3.6422222222222225e-06, "loss": 6.337028884887696, "step": 27885 }, { "epoch": 0.2789, "grad_norm": 5.795037269592285, "learning_rate": 3.641969696969697e-06, "loss": 6.292536926269531, "step": 27890 }, { "epoch": 0.27895, "grad_norm": 11.788749694824219, "learning_rate": 3.6417171717171717e-06, "loss": 6.276415634155273, "step": 27895 }, { "epoch": 0.279, "grad_norm": 7.711708068847656, "learning_rate": 3.6414646464646468e-06, "loss": 6.344890213012695, "step": 27900 }, { "epoch": 0.27905, "grad_norm": 6.711668014526367, "learning_rate": 3.6412121212121214e-06, "loss": 6.304533004760742, "step": 27905 }, { "epoch": 0.2791, "grad_norm": 6.960334300994873, "learning_rate": 3.640959595959596e-06, "loss": 6.2920066833496096, "step": 27910 }, { "epoch": 0.27915, "grad_norm": 5.496513366699219, "learning_rate": 3.6407070707070707e-06, "loss": 6.328239822387696, "step": 27915 }, { "epoch": 0.2792, "grad_norm": 7.50898551940918, "learning_rate": 3.6404545454545457e-06, "loss": 6.325896835327148, "step": 27920 }, { "epoch": 0.27925, "grad_norm": 5.988427639007568, "learning_rate": 3.6402020202020203e-06, "loss": 6.316617202758789, "step": 27925 }, { "epoch": 0.2793, "grad_norm": 3.6656086444854736, "learning_rate": 3.639949494949495e-06, "loss": 6.311394882202149, "step": 27930 }, { "epoch": 0.27935, "grad_norm": 5.195313453674316, "learning_rate": 3.6396969696969696e-06, "loss": 6.1900779724121096, "step": 27935 }, { "epoch": 0.2794, "grad_norm": 3.6240956783294678, "learning_rate": 3.639444444444445e-06, "loss": 6.276221084594726, "step": 27940 }, { "epoch": 0.27945, "grad_norm": 9.404885292053223, "learning_rate": 3.6391919191919193e-06, "loss": 6.338648986816406, "step": 27945 }, { "epoch": 0.2795, "grad_norm": 4.335447788238525, "learning_rate": 3.638939393939394e-06, "loss": 6.319538116455078, "step": 27950 }, { "epoch": 0.27955, "grad_norm": 5.008849143981934, "learning_rate": 3.6386868686868685e-06, "loss": 6.263839340209961, "step": 27955 }, { "epoch": 0.2796, "grad_norm": 6.388202667236328, "learning_rate": 3.638434343434344e-06, "loss": 6.244398880004883, "step": 27960 }, { "epoch": 0.27965, "grad_norm": 4.846386909484863, "learning_rate": 3.6381818181818187e-06, "loss": 6.243783950805664, "step": 27965 }, { "epoch": 0.2797, "grad_norm": 3.5508856773376465, "learning_rate": 3.6379292929292933e-06, "loss": 6.2923023223876955, "step": 27970 }, { "epoch": 0.27975, "grad_norm": 6.707224369049072, "learning_rate": 3.6376767676767683e-06, "loss": 6.28193473815918, "step": 27975 }, { "epoch": 0.2798, "grad_norm": 4.432704925537109, "learning_rate": 3.637424242424243e-06, "loss": 6.366315460205078, "step": 27980 }, { "epoch": 0.27985, "grad_norm": 4.012948989868164, "learning_rate": 3.6371717171717176e-06, "loss": 6.915853881835938, "step": 27985 }, { "epoch": 0.2799, "grad_norm": 3.8945529460906982, "learning_rate": 3.6369191919191922e-06, "loss": 6.305094146728516, "step": 27990 }, { "epoch": 0.27995, "grad_norm": 5.104999542236328, "learning_rate": 3.6366666666666673e-06, "loss": 6.298294448852539, "step": 27995 }, { "epoch": 0.28, "grad_norm": 4.341668605804443, "learning_rate": 3.636414141414142e-06, "loss": 6.3459014892578125, "step": 28000 }, { "epoch": 0.28005, "grad_norm": 5.0260090827941895, "learning_rate": 3.6361616161616165e-06, "loss": 6.3812309265136715, "step": 28005 }, { "epoch": 0.2801, "grad_norm": 6.266566276550293, "learning_rate": 3.635909090909091e-06, "loss": 6.290509033203125, "step": 28010 }, { "epoch": 0.28015, "grad_norm": 4.886213779449463, "learning_rate": 3.6356565656565662e-06, "loss": 6.301641845703125, "step": 28015 }, { "epoch": 0.2802, "grad_norm": 7.614068508148193, "learning_rate": 3.635404040404041e-06, "loss": 6.331428527832031, "step": 28020 }, { "epoch": 0.28025, "grad_norm": 5.054044723510742, "learning_rate": 3.6351515151515155e-06, "loss": 6.275740051269532, "step": 28025 }, { "epoch": 0.2803, "grad_norm": 5.4943108558654785, "learning_rate": 3.63489898989899e-06, "loss": 6.271029281616211, "step": 28030 }, { "epoch": 0.28035, "grad_norm": 2.8076703548431396, "learning_rate": 3.634646464646465e-06, "loss": 6.319896697998047, "step": 28035 }, { "epoch": 0.2804, "grad_norm": 4.2270684242248535, "learning_rate": 3.63439393939394e-06, "loss": 6.279963684082031, "step": 28040 }, { "epoch": 0.28045, "grad_norm": 7.827011585235596, "learning_rate": 3.6341414141414144e-06, "loss": 6.364917373657226, "step": 28045 }, { "epoch": 0.2805, "grad_norm": 10.75042724609375, "learning_rate": 3.633888888888889e-06, "loss": 6.374034118652344, "step": 28050 }, { "epoch": 0.28055, "grad_norm": 7.272946357727051, "learning_rate": 3.633636363636364e-06, "loss": 6.280351257324218, "step": 28055 }, { "epoch": 0.2806, "grad_norm": 6.345447540283203, "learning_rate": 3.6333838383838387e-06, "loss": 6.290077209472656, "step": 28060 }, { "epoch": 0.28065, "grad_norm": 5.485608100891113, "learning_rate": 3.6331313131313134e-06, "loss": 6.338864135742187, "step": 28065 }, { "epoch": 0.2807, "grad_norm": 4.412539005279541, "learning_rate": 3.632878787878788e-06, "loss": 6.263153839111328, "step": 28070 }, { "epoch": 0.28075, "grad_norm": 5.289031982421875, "learning_rate": 3.632626262626263e-06, "loss": 6.309101867675781, "step": 28075 }, { "epoch": 0.2808, "grad_norm": 5.946156024932861, "learning_rate": 3.6323737373737377e-06, "loss": 6.315034866333008, "step": 28080 }, { "epoch": 0.28085, "grad_norm": 7.790994167327881, "learning_rate": 3.6321212121212123e-06, "loss": 6.293221664428711, "step": 28085 }, { "epoch": 0.2809, "grad_norm": 3.6762535572052, "learning_rate": 3.631868686868687e-06, "loss": 6.303483963012695, "step": 28090 }, { "epoch": 0.28095, "grad_norm": 4.119012355804443, "learning_rate": 3.631616161616162e-06, "loss": 6.293547821044922, "step": 28095 }, { "epoch": 0.281, "grad_norm": 7.7604217529296875, "learning_rate": 3.6313636363636366e-06, "loss": 6.321548080444336, "step": 28100 }, { "epoch": 0.28105, "grad_norm": 6.160776615142822, "learning_rate": 3.6311111111111113e-06, "loss": 6.239625930786133, "step": 28105 }, { "epoch": 0.2811, "grad_norm": 3.838702440261841, "learning_rate": 3.630858585858586e-06, "loss": 6.291314697265625, "step": 28110 }, { "epoch": 0.28115, "grad_norm": 3.0702872276306152, "learning_rate": 3.630606060606061e-06, "loss": 6.332421875, "step": 28115 }, { "epoch": 0.2812, "grad_norm": 12.404671669006348, "learning_rate": 3.6303535353535356e-06, "loss": 6.281068420410156, "step": 28120 }, { "epoch": 0.28125, "grad_norm": 5.579586982727051, "learning_rate": 3.63010101010101e-06, "loss": 6.247989654541016, "step": 28125 }, { "epoch": 0.2813, "grad_norm": 8.449063301086426, "learning_rate": 3.629848484848485e-06, "loss": 6.356984710693359, "step": 28130 }, { "epoch": 0.28135, "grad_norm": 7.785964012145996, "learning_rate": 3.6295959595959603e-06, "loss": 6.33087272644043, "step": 28135 }, { "epoch": 0.2814, "grad_norm": 4.403900623321533, "learning_rate": 3.6293434343434345e-06, "loss": 6.294289398193359, "step": 28140 }, { "epoch": 0.28145, "grad_norm": 3.6800692081451416, "learning_rate": 3.629090909090909e-06, "loss": 6.308982086181641, "step": 28145 }, { "epoch": 0.2815, "grad_norm": 4.775040149688721, "learning_rate": 3.6288383838383838e-06, "loss": 6.28216438293457, "step": 28150 }, { "epoch": 0.28155, "grad_norm": 5.842979431152344, "learning_rate": 3.6285858585858593e-06, "loss": 6.31855583190918, "step": 28155 }, { "epoch": 0.2816, "grad_norm": 6.876884460449219, "learning_rate": 3.628333333333334e-06, "loss": 6.306393432617187, "step": 28160 }, { "epoch": 0.28165, "grad_norm": 9.199670791625977, "learning_rate": 3.6280808080808085e-06, "loss": 6.432333374023438, "step": 28165 }, { "epoch": 0.2817, "grad_norm": 5.5287766456604, "learning_rate": 3.6278282828282827e-06, "loss": 6.346640014648438, "step": 28170 }, { "epoch": 0.28175, "grad_norm": 7.259590148925781, "learning_rate": 3.627575757575758e-06, "loss": 6.297891616821289, "step": 28175 }, { "epoch": 0.2818, "grad_norm": 6.268401145935059, "learning_rate": 3.627323232323233e-06, "loss": 6.299401473999024, "step": 28180 }, { "epoch": 0.28185, "grad_norm": 5.581124782562256, "learning_rate": 3.6270707070707075e-06, "loss": 6.286925506591797, "step": 28185 }, { "epoch": 0.2819, "grad_norm": 8.935769081115723, "learning_rate": 3.626818181818182e-06, "loss": 6.345262145996093, "step": 28190 }, { "epoch": 0.28195, "grad_norm": 3.9361765384674072, "learning_rate": 3.626565656565657e-06, "loss": 6.319243621826172, "step": 28195 }, { "epoch": 0.282, "grad_norm": 9.349164962768555, "learning_rate": 3.6263131313131318e-06, "loss": 6.316201782226562, "step": 28200 }, { "epoch": 0.28205, "grad_norm": 5.504910469055176, "learning_rate": 3.6260606060606064e-06, "loss": 6.338265228271484, "step": 28205 }, { "epoch": 0.2821, "grad_norm": 5.569362163543701, "learning_rate": 3.625808080808081e-06, "loss": 6.275389862060547, "step": 28210 }, { "epoch": 0.28215, "grad_norm": 7.4478254318237305, "learning_rate": 3.625555555555556e-06, "loss": 6.294215393066406, "step": 28215 }, { "epoch": 0.2822, "grad_norm": 5.783453464508057, "learning_rate": 3.6253030303030307e-06, "loss": 6.271634674072265, "step": 28220 }, { "epoch": 0.28225, "grad_norm": 5.720432281494141, "learning_rate": 3.6250505050505053e-06, "loss": 6.264583587646484, "step": 28225 }, { "epoch": 0.2823, "grad_norm": 6.850325584411621, "learning_rate": 3.62479797979798e-06, "loss": 6.350994110107422, "step": 28230 }, { "epoch": 0.28235, "grad_norm": 5.166013240814209, "learning_rate": 3.624545454545455e-06, "loss": 6.31109504699707, "step": 28235 }, { "epoch": 0.2824, "grad_norm": 4.749843597412109, "learning_rate": 3.6242929292929297e-06, "loss": 6.306935119628906, "step": 28240 }, { "epoch": 0.28245, "grad_norm": 6.03468656539917, "learning_rate": 3.6240404040404043e-06, "loss": 6.305165100097656, "step": 28245 }, { "epoch": 0.2825, "grad_norm": 7.289083003997803, "learning_rate": 3.623787878787879e-06, "loss": 6.269268035888672, "step": 28250 }, { "epoch": 0.28255, "grad_norm": 16.096832275390625, "learning_rate": 3.623535353535354e-06, "loss": 6.4483642578125, "step": 28255 }, { "epoch": 0.2826, "grad_norm": 6.292984962463379, "learning_rate": 3.6232828282828286e-06, "loss": 6.344646835327149, "step": 28260 }, { "epoch": 0.28265, "grad_norm": 8.245882987976074, "learning_rate": 3.6230303030303032e-06, "loss": 6.243195343017578, "step": 28265 }, { "epoch": 0.2827, "grad_norm": 8.939123153686523, "learning_rate": 3.622777777777778e-06, "loss": 6.3416297912597654, "step": 28270 }, { "epoch": 0.28275, "grad_norm": 6.540311813354492, "learning_rate": 3.622525252525253e-06, "loss": 6.265554428100586, "step": 28275 }, { "epoch": 0.2828, "grad_norm": 13.80894947052002, "learning_rate": 3.6222727272727276e-06, "loss": 6.238034057617187, "step": 28280 }, { "epoch": 0.28285, "grad_norm": 6.235456943511963, "learning_rate": 3.622020202020202e-06, "loss": 6.222642135620117, "step": 28285 }, { "epoch": 0.2829, "grad_norm": 8.632944107055664, "learning_rate": 3.621767676767677e-06, "loss": 6.305866241455078, "step": 28290 }, { "epoch": 0.28295, "grad_norm": 5.584783554077148, "learning_rate": 3.621515151515152e-06, "loss": 6.321941375732422, "step": 28295 }, { "epoch": 0.283, "grad_norm": 6.277401924133301, "learning_rate": 3.6212626262626265e-06, "loss": 6.297470474243164, "step": 28300 }, { "epoch": 0.28305, "grad_norm": 6.028356552124023, "learning_rate": 3.621010101010101e-06, "loss": 6.2892101287841795, "step": 28305 }, { "epoch": 0.2831, "grad_norm": 3.7015345096588135, "learning_rate": 3.6207575757575758e-06, "loss": 6.33265380859375, "step": 28310 }, { "epoch": 0.28315, "grad_norm": 7.897459983825684, "learning_rate": 3.620505050505051e-06, "loss": 6.365507888793945, "step": 28315 }, { "epoch": 0.2832, "grad_norm": 4.112943172454834, "learning_rate": 3.6202525252525254e-06, "loss": 6.34315299987793, "step": 28320 }, { "epoch": 0.28325, "grad_norm": 3.81148099899292, "learning_rate": 3.62e-06, "loss": 6.31453857421875, "step": 28325 }, { "epoch": 0.2833, "grad_norm": 4.4862751960754395, "learning_rate": 3.6197474747474747e-06, "loss": 6.310483169555664, "step": 28330 }, { "epoch": 0.28335, "grad_norm": 6.629267692565918, "learning_rate": 3.6194949494949498e-06, "loss": 6.326427459716797, "step": 28335 }, { "epoch": 0.2834, "grad_norm": 24.268821716308594, "learning_rate": 3.6192424242424244e-06, "loss": 6.385597229003906, "step": 28340 }, { "epoch": 0.28345, "grad_norm": 7.630352973937988, "learning_rate": 3.618989898989899e-06, "loss": 6.365678405761718, "step": 28345 }, { "epoch": 0.2835, "grad_norm": 4.164686679840088, "learning_rate": 3.6187373737373736e-06, "loss": 6.297580718994141, "step": 28350 }, { "epoch": 0.28355, "grad_norm": 13.063346862792969, "learning_rate": 3.618484848484849e-06, "loss": 6.456536865234375, "step": 28355 }, { "epoch": 0.2836, "grad_norm": 5.402000904083252, "learning_rate": 3.6182323232323238e-06, "loss": 6.338290786743164, "step": 28360 }, { "epoch": 0.28365, "grad_norm": 7.2131876945495605, "learning_rate": 3.617979797979798e-06, "loss": 6.265399932861328, "step": 28365 }, { "epoch": 0.2837, "grad_norm": 11.81849193572998, "learning_rate": 3.6177272727272726e-06, "loss": 6.3232769012451175, "step": 28370 }, { "epoch": 0.28375, "grad_norm": 9.864227294921875, "learning_rate": 3.617474747474748e-06, "loss": 6.292572784423828, "step": 28375 }, { "epoch": 0.2838, "grad_norm": 4.6609697341918945, "learning_rate": 3.6172222222222227e-06, "loss": 6.372264862060547, "step": 28380 }, { "epoch": 0.28385, "grad_norm": 3.6671109199523926, "learning_rate": 3.6169696969696973e-06, "loss": 6.258380889892578, "step": 28385 }, { "epoch": 0.2839, "grad_norm": 4.324212551116943, "learning_rate": 3.6167171717171715e-06, "loss": 6.551113891601562, "step": 28390 }, { "epoch": 0.28395, "grad_norm": 7.14066743850708, "learning_rate": 3.616464646464647e-06, "loss": 6.321744918823242, "step": 28395 }, { "epoch": 0.284, "grad_norm": 5.860414505004883, "learning_rate": 3.6162121212121216e-06, "loss": 6.27824592590332, "step": 28400 }, { "epoch": 0.28405, "grad_norm": 5.641157150268555, "learning_rate": 3.6159595959595963e-06, "loss": 6.307501983642578, "step": 28405 }, { "epoch": 0.2841, "grad_norm": 7.852639198303223, "learning_rate": 3.6157070707070713e-06, "loss": 6.2658740997314455, "step": 28410 }, { "epoch": 0.28415, "grad_norm": 6.846890926361084, "learning_rate": 3.615454545454546e-06, "loss": 6.293794250488281, "step": 28415 }, { "epoch": 0.2842, "grad_norm": 21.634830474853516, "learning_rate": 3.6152020202020206e-06, "loss": 5.728129196166992, "step": 28420 }, { "epoch": 0.28425, "grad_norm": 4.734566688537598, "learning_rate": 3.6149494949494952e-06, "loss": 6.271745300292968, "step": 28425 }, { "epoch": 0.2843, "grad_norm": 8.629287719726562, "learning_rate": 3.6146969696969703e-06, "loss": 6.299970626831055, "step": 28430 }, { "epoch": 0.28435, "grad_norm": 6.360217094421387, "learning_rate": 3.614444444444445e-06, "loss": 6.28626594543457, "step": 28435 }, { "epoch": 0.2844, "grad_norm": 7.5656256675720215, "learning_rate": 3.6141919191919195e-06, "loss": 6.353227233886718, "step": 28440 }, { "epoch": 0.28445, "grad_norm": 3.9535250663757324, "learning_rate": 3.613939393939394e-06, "loss": 6.286642074584961, "step": 28445 }, { "epoch": 0.2845, "grad_norm": 4.453943252563477, "learning_rate": 3.613686868686869e-06, "loss": 6.326182556152344, "step": 28450 }, { "epoch": 0.28455, "grad_norm": 7.433956623077393, "learning_rate": 3.613434343434344e-06, "loss": 6.337706756591797, "step": 28455 }, { "epoch": 0.2846, "grad_norm": 4.301914215087891, "learning_rate": 3.6131818181818185e-06, "loss": 6.3437339782714846, "step": 28460 }, { "epoch": 0.28465, "grad_norm": 4.406134128570557, "learning_rate": 3.612929292929293e-06, "loss": 6.3015602111816404, "step": 28465 }, { "epoch": 0.2847, "grad_norm": 9.075971603393555, "learning_rate": 3.612676767676768e-06, "loss": 6.358526611328125, "step": 28470 }, { "epoch": 0.28475, "grad_norm": 4.6138458251953125, "learning_rate": 3.6124242424242428e-06, "loss": 6.254545593261719, "step": 28475 }, { "epoch": 0.2848, "grad_norm": 7.897307395935059, "learning_rate": 3.6121717171717174e-06, "loss": 6.332913970947265, "step": 28480 }, { "epoch": 0.28485, "grad_norm": 6.553394317626953, "learning_rate": 3.611919191919192e-06, "loss": 6.257846069335938, "step": 28485 }, { "epoch": 0.2849, "grad_norm": 16.72750473022461, "learning_rate": 3.611666666666667e-06, "loss": 6.572441101074219, "step": 28490 }, { "epoch": 0.28495, "grad_norm": 7.237514495849609, "learning_rate": 3.6114141414141417e-06, "loss": 6.942755126953125, "step": 28495 }, { "epoch": 0.285, "grad_norm": 5.142264366149902, "learning_rate": 3.6111616161616164e-06, "loss": 6.280252838134766, "step": 28500 }, { "epoch": 0.28505, "grad_norm": 4.845027923583984, "learning_rate": 3.610909090909091e-06, "loss": 6.357699966430664, "step": 28505 }, { "epoch": 0.2851, "grad_norm": 7.452653408050537, "learning_rate": 3.610656565656566e-06, "loss": 6.322651290893555, "step": 28510 }, { "epoch": 0.28515, "grad_norm": 4.44130802154541, "learning_rate": 3.6104040404040407e-06, "loss": 6.319202423095703, "step": 28515 }, { "epoch": 0.2852, "grad_norm": 9.019397735595703, "learning_rate": 3.6101515151515153e-06, "loss": 6.348906326293945, "step": 28520 }, { "epoch": 0.28525, "grad_norm": 3.067349433898926, "learning_rate": 3.60989898989899e-06, "loss": 6.298970794677734, "step": 28525 }, { "epoch": 0.2853, "grad_norm": 7.98826789855957, "learning_rate": 3.609646464646465e-06, "loss": 6.302264022827148, "step": 28530 }, { "epoch": 0.28535, "grad_norm": 3.739572763442993, "learning_rate": 3.6093939393939396e-06, "loss": 6.3411415100097654, "step": 28535 }, { "epoch": 0.2854, "grad_norm": 5.005424976348877, "learning_rate": 3.6091414141414142e-06, "loss": 6.304608154296875, "step": 28540 }, { "epoch": 0.28545, "grad_norm": 6.723167419433594, "learning_rate": 3.608888888888889e-06, "loss": 6.330911254882812, "step": 28545 }, { "epoch": 0.2855, "grad_norm": 6.457500457763672, "learning_rate": 3.6086363636363644e-06, "loss": 6.272603988647461, "step": 28550 }, { "epoch": 0.28555, "grad_norm": 4.978327751159668, "learning_rate": 3.6083838383838386e-06, "loss": 6.3408252716064455, "step": 28555 }, { "epoch": 0.2856, "grad_norm": 9.292272567749023, "learning_rate": 3.608131313131313e-06, "loss": 6.325208282470703, "step": 28560 }, { "epoch": 0.28565, "grad_norm": 6.802079200744629, "learning_rate": 3.607878787878788e-06, "loss": 6.284593963623047, "step": 28565 }, { "epoch": 0.2857, "grad_norm": 8.00263500213623, "learning_rate": 3.6076262626262633e-06, "loss": 6.295877838134766, "step": 28570 }, { "epoch": 0.28575, "grad_norm": 5.805321216583252, "learning_rate": 3.607373737373738e-06, "loss": 6.3116191864013675, "step": 28575 }, { "epoch": 0.2858, "grad_norm": 8.302186965942383, "learning_rate": 3.6071212121212126e-06, "loss": 6.333726501464843, "step": 28580 }, { "epoch": 0.28585, "grad_norm": 6.827087879180908, "learning_rate": 3.6068686868686868e-06, "loss": 6.314664840698242, "step": 28585 }, { "epoch": 0.2859, "grad_norm": 3.9440419673919678, "learning_rate": 3.6066161616161622e-06, "loss": 6.307729339599609, "step": 28590 }, { "epoch": 0.28595, "grad_norm": 7.446237564086914, "learning_rate": 3.606363636363637e-06, "loss": 6.312208938598633, "step": 28595 }, { "epoch": 0.286, "grad_norm": 5.832243919372559, "learning_rate": 3.6061111111111115e-06, "loss": 6.302655029296875, "step": 28600 }, { "epoch": 0.28605, "grad_norm": 6.713873863220215, "learning_rate": 3.605858585858586e-06, "loss": 6.282908630371094, "step": 28605 }, { "epoch": 0.2861, "grad_norm": 5.9138994216918945, "learning_rate": 3.605606060606061e-06, "loss": 6.328889083862305, "step": 28610 }, { "epoch": 0.28615, "grad_norm": 4.267538070678711, "learning_rate": 3.605353535353536e-06, "loss": 6.287181091308594, "step": 28615 }, { "epoch": 0.2862, "grad_norm": 9.032495498657227, "learning_rate": 3.6051010101010104e-06, "loss": 6.265768432617188, "step": 28620 }, { "epoch": 0.28625, "grad_norm": 7.286610126495361, "learning_rate": 3.604848484848485e-06, "loss": 6.267265701293946, "step": 28625 }, { "epoch": 0.2863, "grad_norm": 6.7487969398498535, "learning_rate": 3.60459595959596e-06, "loss": 6.39251937866211, "step": 28630 }, { "epoch": 0.28635, "grad_norm": 5.171732425689697, "learning_rate": 3.6043434343434348e-06, "loss": 6.326206970214844, "step": 28635 }, { "epoch": 0.2864, "grad_norm": 4.860161304473877, "learning_rate": 3.6040909090909094e-06, "loss": 6.276853942871094, "step": 28640 }, { "epoch": 0.28645, "grad_norm": 14.631268501281738, "learning_rate": 3.603838383838384e-06, "loss": 6.474327087402344, "step": 28645 }, { "epoch": 0.2865, "grad_norm": 5.2309675216674805, "learning_rate": 3.603585858585859e-06, "loss": 6.298002624511719, "step": 28650 }, { "epoch": 0.28655, "grad_norm": 5.926724433898926, "learning_rate": 3.6033333333333337e-06, "loss": 6.291964721679688, "step": 28655 }, { "epoch": 0.2866, "grad_norm": 5.344966411590576, "learning_rate": 3.6030808080808083e-06, "loss": 6.299165344238281, "step": 28660 }, { "epoch": 0.28665, "grad_norm": 6.5483293533325195, "learning_rate": 3.602828282828283e-06, "loss": 6.247673034667969, "step": 28665 }, { "epoch": 0.2867, "grad_norm": 4.552358627319336, "learning_rate": 3.602575757575758e-06, "loss": 6.2986194610595705, "step": 28670 }, { "epoch": 0.28675, "grad_norm": 7.367349147796631, "learning_rate": 3.6023232323232326e-06, "loss": 6.27129898071289, "step": 28675 }, { "epoch": 0.2868, "grad_norm": 2.5761096477508545, "learning_rate": 3.6020707070707073e-06, "loss": 6.294193267822266, "step": 28680 }, { "epoch": 0.28685, "grad_norm": 4.492839336395264, "learning_rate": 3.601818181818182e-06, "loss": 6.274634170532226, "step": 28685 }, { "epoch": 0.2869, "grad_norm": 5.902728080749512, "learning_rate": 3.601565656565657e-06, "loss": 6.302301025390625, "step": 28690 }, { "epoch": 0.28695, "grad_norm": 6.737079620361328, "learning_rate": 3.6013131313131316e-06, "loss": 6.310853958129883, "step": 28695 }, { "epoch": 0.287, "grad_norm": 5.647436618804932, "learning_rate": 3.6010606060606062e-06, "loss": 6.2589271545410154, "step": 28700 }, { "epoch": 0.28705, "grad_norm": 5.1896820068359375, "learning_rate": 3.600808080808081e-06, "loss": 6.490826416015625, "step": 28705 }, { "epoch": 0.2871, "grad_norm": 10.56762409210205, "learning_rate": 3.600555555555556e-06, "loss": 6.340222930908203, "step": 28710 }, { "epoch": 0.28715, "grad_norm": 8.439702987670898, "learning_rate": 3.6003030303030305e-06, "loss": 6.277909851074218, "step": 28715 }, { "epoch": 0.2872, "grad_norm": 6.057107925415039, "learning_rate": 3.600050505050505e-06, "loss": 6.321920013427734, "step": 28720 }, { "epoch": 0.28725, "grad_norm": 5.100966930389404, "learning_rate": 3.59979797979798e-06, "loss": 6.272132110595703, "step": 28725 }, { "epoch": 0.2873, "grad_norm": 5.461853981018066, "learning_rate": 3.599545454545455e-06, "loss": 6.309622955322266, "step": 28730 }, { "epoch": 0.28735, "grad_norm": 7.601759433746338, "learning_rate": 3.5992929292929295e-06, "loss": 6.375357437133789, "step": 28735 }, { "epoch": 0.2874, "grad_norm": 13.467184066772461, "learning_rate": 3.599040404040404e-06, "loss": 6.3205116271972654, "step": 28740 }, { "epoch": 0.28745, "grad_norm": 17.61040496826172, "learning_rate": 3.5987878787878787e-06, "loss": 6.261721420288086, "step": 28745 }, { "epoch": 0.2875, "grad_norm": 5.8795270919799805, "learning_rate": 3.598535353535354e-06, "loss": 6.286089324951172, "step": 28750 }, { "epoch": 0.28755, "grad_norm": 6.664981842041016, "learning_rate": 3.5982828282828284e-06, "loss": 6.305912017822266, "step": 28755 }, { "epoch": 0.2876, "grad_norm": 7.345498561859131, "learning_rate": 3.598030303030303e-06, "loss": 6.212039184570313, "step": 28760 }, { "epoch": 0.28765, "grad_norm": 9.788105010986328, "learning_rate": 3.5977777777777777e-06, "loss": 6.3267253875732425, "step": 28765 }, { "epoch": 0.2877, "grad_norm": 4.504337787628174, "learning_rate": 3.597525252525253e-06, "loss": 6.3018638610839846, "step": 28770 }, { "epoch": 0.28775, "grad_norm": 5.888172626495361, "learning_rate": 3.597272727272728e-06, "loss": 6.299828720092774, "step": 28775 }, { "epoch": 0.2878, "grad_norm": 5.33428955078125, "learning_rate": 3.597020202020202e-06, "loss": 6.332022857666016, "step": 28780 }, { "epoch": 0.28785, "grad_norm": 5.0910773277282715, "learning_rate": 3.5967676767676766e-06, "loss": 6.342835998535156, "step": 28785 }, { "epoch": 0.2879, "grad_norm": 7.898355960845947, "learning_rate": 3.596515151515152e-06, "loss": 6.317800521850586, "step": 28790 }, { "epoch": 0.28795, "grad_norm": 5.999654769897461, "learning_rate": 3.5962626262626267e-06, "loss": 6.346852111816406, "step": 28795 }, { "epoch": 0.288, "grad_norm": 4.864771366119385, "learning_rate": 3.5960101010101014e-06, "loss": 6.346336746215821, "step": 28800 }, { "epoch": 0.28805, "grad_norm": 7.295454025268555, "learning_rate": 3.5957575757575756e-06, "loss": 6.2179405212402346, "step": 28805 }, { "epoch": 0.2881, "grad_norm": 5.1876959800720215, "learning_rate": 3.595505050505051e-06, "loss": 6.2810710906982425, "step": 28810 }, { "epoch": 0.28815, "grad_norm": 5.081201076507568, "learning_rate": 3.5952525252525257e-06, "loss": 6.300830078125, "step": 28815 }, { "epoch": 0.2882, "grad_norm": 7.472127437591553, "learning_rate": 3.5950000000000003e-06, "loss": 6.341290283203125, "step": 28820 }, { "epoch": 0.28825, "grad_norm": 6.3653106689453125, "learning_rate": 3.5947474747474754e-06, "loss": 6.267138671875, "step": 28825 }, { "epoch": 0.2883, "grad_norm": 6.403017520904541, "learning_rate": 3.59449494949495e-06, "loss": 6.265883255004883, "step": 28830 }, { "epoch": 0.28835, "grad_norm": 14.382781982421875, "learning_rate": 3.5942424242424246e-06, "loss": 6.287187194824218, "step": 28835 }, { "epoch": 0.2884, "grad_norm": 8.581934928894043, "learning_rate": 3.5939898989898993e-06, "loss": 6.492272186279297, "step": 28840 }, { "epoch": 0.28845, "grad_norm": 9.566410064697266, "learning_rate": 3.5937373737373743e-06, "loss": 6.306854629516602, "step": 28845 }, { "epoch": 0.2885, "grad_norm": 2.9971132278442383, "learning_rate": 3.593484848484849e-06, "loss": 6.288943481445313, "step": 28850 }, { "epoch": 0.28855, "grad_norm": 7.320003032684326, "learning_rate": 3.5932323232323236e-06, "loss": 6.269635009765625, "step": 28855 }, { "epoch": 0.2886, "grad_norm": 6.678771495819092, "learning_rate": 3.592979797979798e-06, "loss": 6.310543060302734, "step": 28860 }, { "epoch": 0.28865, "grad_norm": 6.354955196380615, "learning_rate": 3.5927272727272733e-06, "loss": 6.272520446777344, "step": 28865 }, { "epoch": 0.2887, "grad_norm": 5.6723408699035645, "learning_rate": 3.592474747474748e-06, "loss": 6.27713623046875, "step": 28870 }, { "epoch": 0.28875, "grad_norm": 3.6920952796936035, "learning_rate": 3.5922222222222225e-06, "loss": 6.32288932800293, "step": 28875 }, { "epoch": 0.2888, "grad_norm": 4.317010402679443, "learning_rate": 3.591969696969697e-06, "loss": 6.3220367431640625, "step": 28880 }, { "epoch": 0.28885, "grad_norm": 6.300610065460205, "learning_rate": 3.591717171717172e-06, "loss": 6.287174987792969, "step": 28885 }, { "epoch": 0.2889, "grad_norm": 5.775434494018555, "learning_rate": 3.591464646464647e-06, "loss": 6.379221343994141, "step": 28890 }, { "epoch": 0.28895, "grad_norm": 9.007369995117188, "learning_rate": 3.5912121212121215e-06, "loss": 6.285770034790039, "step": 28895 }, { "epoch": 0.289, "grad_norm": 4.011603832244873, "learning_rate": 3.590959595959596e-06, "loss": 6.3403575897216795, "step": 28900 }, { "epoch": 0.28905, "grad_norm": 5.603331565856934, "learning_rate": 3.590707070707071e-06, "loss": 6.35119514465332, "step": 28905 }, { "epoch": 0.2891, "grad_norm": 6.6066131591796875, "learning_rate": 3.5904545454545458e-06, "loss": 6.322298049926758, "step": 28910 }, { "epoch": 0.28915, "grad_norm": 5.682127475738525, "learning_rate": 3.5902020202020204e-06, "loss": 6.357522201538086, "step": 28915 }, { "epoch": 0.2892, "grad_norm": 7.731381893157959, "learning_rate": 3.589949494949495e-06, "loss": 6.292919158935547, "step": 28920 }, { "epoch": 0.28925, "grad_norm": 5.588710308074951, "learning_rate": 3.58969696969697e-06, "loss": 6.304866790771484, "step": 28925 }, { "epoch": 0.2893, "grad_norm": 6.755128383636475, "learning_rate": 3.5894444444444447e-06, "loss": 6.2753761291503904, "step": 28930 }, { "epoch": 0.28935, "grad_norm": 13.567451477050781, "learning_rate": 3.5891919191919193e-06, "loss": 6.498242950439453, "step": 28935 }, { "epoch": 0.2894, "grad_norm": 4.328904151916504, "learning_rate": 3.588939393939394e-06, "loss": 6.284892272949219, "step": 28940 }, { "epoch": 0.28945, "grad_norm": 15.966270446777344, "learning_rate": 3.588686868686869e-06, "loss": 6.149523544311523, "step": 28945 }, { "epoch": 0.2895, "grad_norm": 7.5463032722473145, "learning_rate": 3.5884343434343437e-06, "loss": 6.344626617431641, "step": 28950 }, { "epoch": 0.28955, "grad_norm": 9.200867652893066, "learning_rate": 3.5881818181818183e-06, "loss": 6.265273666381836, "step": 28955 }, { "epoch": 0.2896, "grad_norm": 5.613604545593262, "learning_rate": 3.587929292929293e-06, "loss": 6.2631275177001955, "step": 28960 }, { "epoch": 0.28965, "grad_norm": 4.82565450668335, "learning_rate": 3.5876767676767684e-06, "loss": 6.2754966735839846, "step": 28965 }, { "epoch": 0.2897, "grad_norm": 5.451135635375977, "learning_rate": 3.5874242424242426e-06, "loss": 6.299645614624024, "step": 28970 }, { "epoch": 0.28975, "grad_norm": 7.026264190673828, "learning_rate": 3.5871717171717172e-06, "loss": 6.288877487182617, "step": 28975 }, { "epoch": 0.2898, "grad_norm": 4.140593528747559, "learning_rate": 3.586919191919192e-06, "loss": 6.312584686279297, "step": 28980 }, { "epoch": 0.28985, "grad_norm": 4.383130073547363, "learning_rate": 3.5866666666666673e-06, "loss": 6.299951553344727, "step": 28985 }, { "epoch": 0.2899, "grad_norm": 11.480146408081055, "learning_rate": 3.586414141414142e-06, "loss": 6.342953872680664, "step": 28990 }, { "epoch": 0.28995, "grad_norm": 8.813700675964355, "learning_rate": 3.5861616161616166e-06, "loss": 6.452587890625, "step": 28995 }, { "epoch": 0.29, "grad_norm": 9.15721321105957, "learning_rate": 3.585909090909091e-06, "loss": 6.276811218261718, "step": 29000 }, { "epoch": 0.29005, "grad_norm": 5.8108601570129395, "learning_rate": 3.5856565656565663e-06, "loss": 6.325713348388672, "step": 29005 }, { "epoch": 0.2901, "grad_norm": 5.665340900421143, "learning_rate": 3.585404040404041e-06, "loss": 6.284506225585938, "step": 29010 }, { "epoch": 0.29015, "grad_norm": 4.184017658233643, "learning_rate": 3.5851515151515155e-06, "loss": 6.2553142547607425, "step": 29015 }, { "epoch": 0.2902, "grad_norm": 7.18170690536499, "learning_rate": 3.58489898989899e-06, "loss": 6.284331893920898, "step": 29020 }, { "epoch": 0.29025, "grad_norm": 5.53205680847168, "learning_rate": 3.5846464646464652e-06, "loss": 6.273492813110352, "step": 29025 }, { "epoch": 0.2903, "grad_norm": 5.149905681610107, "learning_rate": 3.58439393939394e-06, "loss": 6.273368072509766, "step": 29030 }, { "epoch": 0.29035, "grad_norm": 12.37472152709961, "learning_rate": 3.5841414141414145e-06, "loss": 6.364044952392578, "step": 29035 }, { "epoch": 0.2904, "grad_norm": 5.250348091125488, "learning_rate": 3.583888888888889e-06, "loss": 6.334981536865234, "step": 29040 }, { "epoch": 0.29045, "grad_norm": 17.527746200561523, "learning_rate": 3.583636363636364e-06, "loss": 6.259870529174805, "step": 29045 }, { "epoch": 0.2905, "grad_norm": 4.577603340148926, "learning_rate": 3.583383838383839e-06, "loss": 6.273335266113281, "step": 29050 }, { "epoch": 0.29055, "grad_norm": 13.465856552124023, "learning_rate": 3.5831313131313134e-06, "loss": 6.253815078735352, "step": 29055 }, { "epoch": 0.2906, "grad_norm": 8.97461986541748, "learning_rate": 3.582878787878788e-06, "loss": 6.3315986633300785, "step": 29060 }, { "epoch": 0.29065, "grad_norm": 3.53078293800354, "learning_rate": 3.582626262626263e-06, "loss": 6.344053649902344, "step": 29065 }, { "epoch": 0.2907, "grad_norm": 4.615138530731201, "learning_rate": 3.5823737373737377e-06, "loss": 6.296694946289063, "step": 29070 }, { "epoch": 0.29075, "grad_norm": 34.30817413330078, "learning_rate": 3.5821212121212124e-06, "loss": 6.558651733398437, "step": 29075 }, { "epoch": 0.2908, "grad_norm": 3.9465205669403076, "learning_rate": 3.581868686868687e-06, "loss": 6.2717437744140625, "step": 29080 }, { "epoch": 0.29085, "grad_norm": 8.13430404663086, "learning_rate": 3.581616161616162e-06, "loss": 6.322042846679688, "step": 29085 }, { "epoch": 0.2909, "grad_norm": 6.644056797027588, "learning_rate": 3.5813636363636367e-06, "loss": 6.339138031005859, "step": 29090 }, { "epoch": 0.29095, "grad_norm": 3.916104555130005, "learning_rate": 3.5811111111111113e-06, "loss": 6.347944641113282, "step": 29095 }, { "epoch": 0.291, "grad_norm": 5.36790657043457, "learning_rate": 3.580858585858586e-06, "loss": 6.28611946105957, "step": 29100 }, { "epoch": 0.29105, "grad_norm": 5.422263145446777, "learning_rate": 3.580606060606061e-06, "loss": 6.2707977294921875, "step": 29105 }, { "epoch": 0.2911, "grad_norm": 6.120826721191406, "learning_rate": 3.5803535353535356e-06, "loss": 6.30921630859375, "step": 29110 }, { "epoch": 0.29115, "grad_norm": 7.3816914558410645, "learning_rate": 3.5801010101010103e-06, "loss": 6.273519897460938, "step": 29115 }, { "epoch": 0.2912, "grad_norm": 22.8885498046875, "learning_rate": 3.579848484848485e-06, "loss": 6.252163314819336, "step": 29120 }, { "epoch": 0.29125, "grad_norm": 7.437877655029297, "learning_rate": 3.57959595959596e-06, "loss": 6.2931255340576175, "step": 29125 }, { "epoch": 0.2913, "grad_norm": 6.049250602722168, "learning_rate": 3.5793434343434346e-06, "loss": 6.301666259765625, "step": 29130 }, { "epoch": 0.29135, "grad_norm": 6.148924350738525, "learning_rate": 3.579090909090909e-06, "loss": 6.313642501831055, "step": 29135 }, { "epoch": 0.2914, "grad_norm": 5.537292957305908, "learning_rate": 3.578838383838384e-06, "loss": 6.279273223876953, "step": 29140 }, { "epoch": 0.29145, "grad_norm": 8.600954055786133, "learning_rate": 3.578585858585859e-06, "loss": 6.323283386230469, "step": 29145 }, { "epoch": 0.2915, "grad_norm": 4.2532806396484375, "learning_rate": 3.5783333333333335e-06, "loss": 6.320634460449218, "step": 29150 }, { "epoch": 0.29155, "grad_norm": 3.7155098915100098, "learning_rate": 3.578080808080808e-06, "loss": 6.298206329345703, "step": 29155 }, { "epoch": 0.2916, "grad_norm": 7.740283489227295, "learning_rate": 3.5778282828282828e-06, "loss": 6.322317123413086, "step": 29160 }, { "epoch": 0.29165, "grad_norm": 4.459195137023926, "learning_rate": 3.577575757575758e-06, "loss": 6.370047760009766, "step": 29165 }, { "epoch": 0.2917, "grad_norm": 41.54624938964844, "learning_rate": 3.5773232323232325e-06, "loss": 6.199592590332031, "step": 29170 }, { "epoch": 0.29175, "grad_norm": 3.356029987335205, "learning_rate": 3.577070707070707e-06, "loss": 6.085910034179688, "step": 29175 }, { "epoch": 0.2918, "grad_norm": 7.024199962615967, "learning_rate": 3.5768181818181817e-06, "loss": 6.28079833984375, "step": 29180 }, { "epoch": 0.29185, "grad_norm": 7.20844841003418, "learning_rate": 3.576565656565657e-06, "loss": 6.275858306884766, "step": 29185 }, { "epoch": 0.2919, "grad_norm": 7.901179313659668, "learning_rate": 3.576313131313132e-06, "loss": 6.474188995361328, "step": 29190 }, { "epoch": 0.29195, "grad_norm": 5.406975746154785, "learning_rate": 3.576060606060606e-06, "loss": 6.313428115844727, "step": 29195 }, { "epoch": 0.292, "grad_norm": 7.44565486907959, "learning_rate": 3.5758080808080807e-06, "loss": 6.3004608154296875, "step": 29200 }, { "epoch": 0.29205, "grad_norm": 5.292686939239502, "learning_rate": 3.575555555555556e-06, "loss": 6.281940460205078, "step": 29205 }, { "epoch": 0.2921, "grad_norm": 5.81514835357666, "learning_rate": 3.5753030303030308e-06, "loss": 6.348016357421875, "step": 29210 }, { "epoch": 0.29215, "grad_norm": 4.0477142333984375, "learning_rate": 3.5750505050505054e-06, "loss": 6.310513687133789, "step": 29215 }, { "epoch": 0.2922, "grad_norm": 6.460570812225342, "learning_rate": 3.5747979797979796e-06, "loss": 6.3128093719482425, "step": 29220 }, { "epoch": 0.29225, "grad_norm": 4.725200176239014, "learning_rate": 3.574545454545455e-06, "loss": 6.317466354370117, "step": 29225 }, { "epoch": 0.2923, "grad_norm": 4.774486541748047, "learning_rate": 3.5742929292929297e-06, "loss": 6.2786205291748045, "step": 29230 }, { "epoch": 0.29235, "grad_norm": 5.3465576171875, "learning_rate": 3.5740404040404043e-06, "loss": 6.324010848999023, "step": 29235 }, { "epoch": 0.2924, "grad_norm": 5.891499042510986, "learning_rate": 3.573787878787879e-06, "loss": 6.284039306640625, "step": 29240 }, { "epoch": 0.29245, "grad_norm": 8.401041984558105, "learning_rate": 3.573535353535354e-06, "loss": 6.272295379638672, "step": 29245 }, { "epoch": 0.2925, "grad_norm": 3.6559360027313232, "learning_rate": 3.5732828282828287e-06, "loss": 6.298555374145508, "step": 29250 }, { "epoch": 0.29255, "grad_norm": 4.162300109863281, "learning_rate": 3.5730303030303033e-06, "loss": 6.303508758544922, "step": 29255 }, { "epoch": 0.2926, "grad_norm": 3.364703893661499, "learning_rate": 3.5727777777777783e-06, "loss": 6.252381896972656, "step": 29260 }, { "epoch": 0.29265, "grad_norm": 5.502880573272705, "learning_rate": 3.572525252525253e-06, "loss": 6.319469451904297, "step": 29265 }, { "epoch": 0.2927, "grad_norm": 7.987439155578613, "learning_rate": 3.5722727272727276e-06, "loss": 6.29638786315918, "step": 29270 }, { "epoch": 0.29275, "grad_norm": 9.062360763549805, "learning_rate": 3.5720202020202022e-06, "loss": 6.339817047119141, "step": 29275 }, { "epoch": 0.2928, "grad_norm": 3.9892537593841553, "learning_rate": 3.5717676767676773e-06, "loss": 6.323897933959961, "step": 29280 }, { "epoch": 0.29285, "grad_norm": 7.5546650886535645, "learning_rate": 3.571515151515152e-06, "loss": 6.294126510620117, "step": 29285 }, { "epoch": 0.2929, "grad_norm": 8.40169620513916, "learning_rate": 3.5712626262626266e-06, "loss": 6.30529556274414, "step": 29290 }, { "epoch": 0.29295, "grad_norm": 7.2665181159973145, "learning_rate": 3.571010101010101e-06, "loss": 6.357051086425781, "step": 29295 }, { "epoch": 0.293, "grad_norm": 4.001103401184082, "learning_rate": 3.5707575757575762e-06, "loss": 6.29302749633789, "step": 29300 }, { "epoch": 0.29305, "grad_norm": 6.259092330932617, "learning_rate": 3.570505050505051e-06, "loss": 6.241490173339844, "step": 29305 }, { "epoch": 0.2931, "grad_norm": 10.678960800170898, "learning_rate": 3.5702525252525255e-06, "loss": 6.337287139892578, "step": 29310 }, { "epoch": 0.29315, "grad_norm": 6.0177001953125, "learning_rate": 3.57e-06, "loss": 6.304063415527343, "step": 29315 }, { "epoch": 0.2932, "grad_norm": 5.336589813232422, "learning_rate": 3.569747474747475e-06, "loss": 6.324079132080078, "step": 29320 }, { "epoch": 0.29325, "grad_norm": 4.683729648590088, "learning_rate": 3.56949494949495e-06, "loss": 6.27764663696289, "step": 29325 }, { "epoch": 0.2933, "grad_norm": 5.715831756591797, "learning_rate": 3.5692424242424244e-06, "loss": 6.305743408203125, "step": 29330 }, { "epoch": 0.29335, "grad_norm": 6.360212802886963, "learning_rate": 3.568989898989899e-06, "loss": 6.303823471069336, "step": 29335 }, { "epoch": 0.2934, "grad_norm": 9.453848838806152, "learning_rate": 3.568737373737374e-06, "loss": 6.3699188232421875, "step": 29340 }, { "epoch": 0.29345, "grad_norm": 5.854568004608154, "learning_rate": 3.5684848484848488e-06, "loss": 6.4501701354980465, "step": 29345 }, { "epoch": 0.2935, "grad_norm": 7.13533353805542, "learning_rate": 3.5682323232323234e-06, "loss": 6.320172119140625, "step": 29350 }, { "epoch": 0.29355, "grad_norm": 7.607511043548584, "learning_rate": 3.567979797979798e-06, "loss": 6.665991973876953, "step": 29355 }, { "epoch": 0.2936, "grad_norm": 7.771399021148682, "learning_rate": 3.567727272727273e-06, "loss": 6.2485809326171875, "step": 29360 }, { "epoch": 0.29365, "grad_norm": 8.342504501342773, "learning_rate": 3.5674747474747477e-06, "loss": 6.312671661376953, "step": 29365 }, { "epoch": 0.2937, "grad_norm": 5.386643409729004, "learning_rate": 3.5672222222222223e-06, "loss": 6.311551284790039, "step": 29370 }, { "epoch": 0.29375, "grad_norm": 6.3127851486206055, "learning_rate": 3.566969696969697e-06, "loss": 6.309173583984375, "step": 29375 }, { "epoch": 0.2938, "grad_norm": 7.319690227508545, "learning_rate": 3.5667171717171724e-06, "loss": 6.332748794555664, "step": 29380 }, { "epoch": 0.29385, "grad_norm": 5.301462650299072, "learning_rate": 3.5664646464646466e-06, "loss": 6.343075561523437, "step": 29385 }, { "epoch": 0.2939, "grad_norm": 13.324609756469727, "learning_rate": 3.5662121212121213e-06, "loss": 6.195567321777344, "step": 29390 }, { "epoch": 0.29395, "grad_norm": 6.056017875671387, "learning_rate": 3.565959595959596e-06, "loss": 6.291217041015625, "step": 29395 }, { "epoch": 0.294, "grad_norm": 6.894514560699463, "learning_rate": 3.5657070707070714e-06, "loss": 6.317230224609375, "step": 29400 }, { "epoch": 0.29405, "grad_norm": 9.174859046936035, "learning_rate": 3.565454545454546e-06, "loss": 6.405181121826172, "step": 29405 }, { "epoch": 0.2941, "grad_norm": 5.910061359405518, "learning_rate": 3.5652020202020206e-06, "loss": 6.338095474243164, "step": 29410 }, { "epoch": 0.29415, "grad_norm": 4.480605125427246, "learning_rate": 3.564949494949495e-06, "loss": 6.249681091308593, "step": 29415 }, { "epoch": 0.2942, "grad_norm": 8.932344436645508, "learning_rate": 3.5646969696969703e-06, "loss": 6.322673034667969, "step": 29420 }, { "epoch": 0.29425, "grad_norm": 5.6024956703186035, "learning_rate": 3.564444444444445e-06, "loss": 6.304268646240234, "step": 29425 }, { "epoch": 0.2943, "grad_norm": 7.965230464935303, "learning_rate": 3.5641919191919196e-06, "loss": 6.258568954467774, "step": 29430 }, { "epoch": 0.29435, "grad_norm": 7.790179252624512, "learning_rate": 3.563939393939394e-06, "loss": 6.31065673828125, "step": 29435 }, { "epoch": 0.2944, "grad_norm": 5.013424873352051, "learning_rate": 3.5636868686868693e-06, "loss": 6.291405487060547, "step": 29440 }, { "epoch": 0.29445, "grad_norm": 5.350167751312256, "learning_rate": 3.563434343434344e-06, "loss": 6.296469879150391, "step": 29445 }, { "epoch": 0.2945, "grad_norm": 4.955093860626221, "learning_rate": 3.5631818181818185e-06, "loss": 6.32977523803711, "step": 29450 }, { "epoch": 0.29455, "grad_norm": 8.105680465698242, "learning_rate": 3.562929292929293e-06, "loss": 6.381786346435547, "step": 29455 }, { "epoch": 0.2946, "grad_norm": 10.865351676940918, "learning_rate": 3.562676767676768e-06, "loss": 6.327909851074219, "step": 29460 }, { "epoch": 0.29465, "grad_norm": 5.101296901702881, "learning_rate": 3.562424242424243e-06, "loss": 6.276557922363281, "step": 29465 }, { "epoch": 0.2947, "grad_norm": 6.576651096343994, "learning_rate": 3.5621717171717175e-06, "loss": 6.273812103271484, "step": 29470 }, { "epoch": 0.29475, "grad_norm": 5.275091648101807, "learning_rate": 3.561919191919192e-06, "loss": 6.301033020019531, "step": 29475 }, { "epoch": 0.2948, "grad_norm": 4.066470146179199, "learning_rate": 3.561666666666667e-06, "loss": 6.303281021118164, "step": 29480 }, { "epoch": 0.29485, "grad_norm": 7.759692192077637, "learning_rate": 3.5614141414141418e-06, "loss": 6.294646835327148, "step": 29485 }, { "epoch": 0.2949, "grad_norm": 35.01994705200195, "learning_rate": 3.5611616161616164e-06, "loss": 6.164224243164062, "step": 29490 }, { "epoch": 0.29495, "grad_norm": 7.678826808929443, "learning_rate": 3.560909090909091e-06, "loss": 6.227882766723633, "step": 29495 }, { "epoch": 0.295, "grad_norm": 4.337158679962158, "learning_rate": 3.560656565656566e-06, "loss": 6.282904052734375, "step": 29500 }, { "epoch": 0.29505, "grad_norm": 7.657970905303955, "learning_rate": 3.5604040404040407e-06, "loss": 6.302424621582031, "step": 29505 }, { "epoch": 0.2951, "grad_norm": 6.034323692321777, "learning_rate": 3.5601515151515154e-06, "loss": 6.271267700195312, "step": 29510 }, { "epoch": 0.29515, "grad_norm": 5.539667129516602, "learning_rate": 3.55989898989899e-06, "loss": 6.35978889465332, "step": 29515 }, { "epoch": 0.2952, "grad_norm": 14.03776741027832, "learning_rate": 3.559646464646465e-06, "loss": 6.305487442016601, "step": 29520 }, { "epoch": 0.29525, "grad_norm": 4.768974304199219, "learning_rate": 3.5593939393939397e-06, "loss": 6.1936897277832035, "step": 29525 }, { "epoch": 0.2953, "grad_norm": 6.302005290985107, "learning_rate": 3.5591414141414143e-06, "loss": 6.271580505371094, "step": 29530 }, { "epoch": 0.29535, "grad_norm": 4.648326396942139, "learning_rate": 3.558888888888889e-06, "loss": 6.3098091125488285, "step": 29535 }, { "epoch": 0.2954, "grad_norm": 10.270318031311035, "learning_rate": 3.558636363636364e-06, "loss": 6.278652191162109, "step": 29540 }, { "epoch": 0.29545, "grad_norm": 7.089316368103027, "learning_rate": 3.5583838383838386e-06, "loss": 6.323274612426758, "step": 29545 }, { "epoch": 0.2955, "grad_norm": 5.445096492767334, "learning_rate": 3.5581313131313132e-06, "loss": 6.278923034667969, "step": 29550 }, { "epoch": 0.29555, "grad_norm": 4.289143085479736, "learning_rate": 3.557878787878788e-06, "loss": 6.313589859008789, "step": 29555 }, { "epoch": 0.2956, "grad_norm": 13.919963836669922, "learning_rate": 3.557626262626263e-06, "loss": 6.256758499145508, "step": 29560 }, { "epoch": 0.29565, "grad_norm": 9.134503364562988, "learning_rate": 3.5573737373737376e-06, "loss": 6.2950439453125, "step": 29565 }, { "epoch": 0.2957, "grad_norm": 5.561146259307861, "learning_rate": 3.557121212121212e-06, "loss": 6.365909576416016, "step": 29570 }, { "epoch": 0.29575, "grad_norm": 13.374863624572754, "learning_rate": 3.556868686868687e-06, "loss": 6.332316589355469, "step": 29575 }, { "epoch": 0.2958, "grad_norm": 3.0493996143341064, "learning_rate": 3.556616161616162e-06, "loss": 6.309603500366211, "step": 29580 }, { "epoch": 0.29585, "grad_norm": 9.188202857971191, "learning_rate": 3.5563636363636365e-06, "loss": 6.342683029174805, "step": 29585 }, { "epoch": 0.2959, "grad_norm": 7.470753192901611, "learning_rate": 3.556111111111111e-06, "loss": 6.308557510375977, "step": 29590 }, { "epoch": 0.29595, "grad_norm": 3.8476085662841797, "learning_rate": 3.5558585858585858e-06, "loss": 6.273486709594726, "step": 29595 }, { "epoch": 0.296, "grad_norm": 9.957229614257812, "learning_rate": 3.5556060606060612e-06, "loss": 6.349224090576172, "step": 29600 }, { "epoch": 0.29605, "grad_norm": 6.487241268157959, "learning_rate": 3.555353535353536e-06, "loss": 6.318891906738282, "step": 29605 }, { "epoch": 0.2961, "grad_norm": 5.474497318267822, "learning_rate": 3.55510101010101e-06, "loss": 6.3058631896972654, "step": 29610 }, { "epoch": 0.29615, "grad_norm": 3.405442237854004, "learning_rate": 3.5548484848484847e-06, "loss": 6.3488616943359375, "step": 29615 }, { "epoch": 0.2962, "grad_norm": 4.462718963623047, "learning_rate": 3.55459595959596e-06, "loss": 6.2877960205078125, "step": 29620 }, { "epoch": 0.29625, "grad_norm": 6.832292556762695, "learning_rate": 3.554343434343435e-06, "loss": 6.293314743041992, "step": 29625 }, { "epoch": 0.2963, "grad_norm": 5.779564380645752, "learning_rate": 3.5540909090909094e-06, "loss": 6.3362171173095705, "step": 29630 }, { "epoch": 0.29635, "grad_norm": 4.0064592361450195, "learning_rate": 3.553838383838384e-06, "loss": 6.334922027587891, "step": 29635 }, { "epoch": 0.2964, "grad_norm": 7.410496234893799, "learning_rate": 3.553585858585859e-06, "loss": 6.32331428527832, "step": 29640 }, { "epoch": 0.29645, "grad_norm": 6.6105146408081055, "learning_rate": 3.5533333333333338e-06, "loss": 6.289257049560547, "step": 29645 }, { "epoch": 0.2965, "grad_norm": 4.35793399810791, "learning_rate": 3.5530808080808084e-06, "loss": 6.311123657226562, "step": 29650 }, { "epoch": 0.29655, "grad_norm": 5.256542682647705, "learning_rate": 3.552828282828283e-06, "loss": 6.265432739257813, "step": 29655 }, { "epoch": 0.2966, "grad_norm": 5.709907054901123, "learning_rate": 3.552575757575758e-06, "loss": 6.267720031738281, "step": 29660 }, { "epoch": 0.29665, "grad_norm": 4.539068222045898, "learning_rate": 3.5523232323232327e-06, "loss": 6.281230545043945, "step": 29665 }, { "epoch": 0.2967, "grad_norm": 8.578764915466309, "learning_rate": 3.5520707070707073e-06, "loss": 6.283077239990234, "step": 29670 }, { "epoch": 0.29675, "grad_norm": 4.906740665435791, "learning_rate": 3.551818181818182e-06, "loss": 6.290102005004883, "step": 29675 }, { "epoch": 0.2968, "grad_norm": 7.908860683441162, "learning_rate": 3.551565656565657e-06, "loss": 6.287265396118164, "step": 29680 }, { "epoch": 0.29685, "grad_norm": 6.177100658416748, "learning_rate": 3.5513131313131316e-06, "loss": 6.421934509277344, "step": 29685 }, { "epoch": 0.2969, "grad_norm": 6.305956840515137, "learning_rate": 3.5510606060606063e-06, "loss": 6.285928344726562, "step": 29690 }, { "epoch": 0.29695, "grad_norm": 7.294287204742432, "learning_rate": 3.5508080808080813e-06, "loss": 6.256629943847656, "step": 29695 }, { "epoch": 0.297, "grad_norm": 6.181205749511719, "learning_rate": 3.550555555555556e-06, "loss": 6.263467788696289, "step": 29700 }, { "epoch": 0.29705, "grad_norm": 6.430354118347168, "learning_rate": 3.5503030303030306e-06, "loss": 6.277581787109375, "step": 29705 }, { "epoch": 0.2971, "grad_norm": 9.443880081176758, "learning_rate": 3.5500505050505052e-06, "loss": 6.261558151245117, "step": 29710 }, { "epoch": 0.29715, "grad_norm": 24.640432357788086, "learning_rate": 3.5497979797979803e-06, "loss": 5.906934356689453, "step": 29715 }, { "epoch": 0.2972, "grad_norm": 24.947561264038086, "learning_rate": 3.549545454545455e-06, "loss": 5.436868667602539, "step": 29720 }, { "epoch": 0.29725, "grad_norm": 7.861790657043457, "learning_rate": 3.5492929292929295e-06, "loss": 6.244410705566406, "step": 29725 }, { "epoch": 0.2973, "grad_norm": 5.021613121032715, "learning_rate": 3.549040404040404e-06, "loss": 6.3192138671875, "step": 29730 }, { "epoch": 0.29735, "grad_norm": 9.627565383911133, "learning_rate": 3.5487878787878792e-06, "loss": 6.248217010498047, "step": 29735 }, { "epoch": 0.2974, "grad_norm": 8.743024826049805, "learning_rate": 3.548535353535354e-06, "loss": 6.27972640991211, "step": 29740 }, { "epoch": 0.29745, "grad_norm": 4.4820427894592285, "learning_rate": 3.5482828282828285e-06, "loss": 6.326733779907227, "step": 29745 }, { "epoch": 0.2975, "grad_norm": 6.135499477386475, "learning_rate": 3.548030303030303e-06, "loss": 6.343452835083008, "step": 29750 }, { "epoch": 0.29755, "grad_norm": 4.53029203414917, "learning_rate": 3.547777777777778e-06, "loss": 6.296767807006836, "step": 29755 }, { "epoch": 0.2976, "grad_norm": 6.879767894744873, "learning_rate": 3.547525252525253e-06, "loss": 6.285468292236328, "step": 29760 }, { "epoch": 0.29765, "grad_norm": 5.981883525848389, "learning_rate": 3.5472727272727274e-06, "loss": 6.292559051513672, "step": 29765 }, { "epoch": 0.2977, "grad_norm": 3.529245615005493, "learning_rate": 3.547020202020202e-06, "loss": 6.2896484375, "step": 29770 }, { "epoch": 0.29775, "grad_norm": 4.5766096115112305, "learning_rate": 3.546767676767677e-06, "loss": 6.282649993896484, "step": 29775 }, { "epoch": 0.2978, "grad_norm": 3.258888006210327, "learning_rate": 3.5465151515151517e-06, "loss": 6.310800552368164, "step": 29780 }, { "epoch": 0.29785, "grad_norm": 5.118357181549072, "learning_rate": 3.5462626262626264e-06, "loss": 6.332068634033203, "step": 29785 }, { "epoch": 0.2979, "grad_norm": 6.421121597290039, "learning_rate": 3.546010101010101e-06, "loss": 6.382361602783203, "step": 29790 }, { "epoch": 0.29795, "grad_norm": 4.995228290557861, "learning_rate": 3.5457575757575765e-06, "loss": 6.294057464599609, "step": 29795 }, { "epoch": 0.298, "grad_norm": 6.232391357421875, "learning_rate": 3.545505050505051e-06, "loss": 6.4007926940917965, "step": 29800 }, { "epoch": 0.29805, "grad_norm": 4.867082595825195, "learning_rate": 3.5452525252525253e-06, "loss": 6.291249084472656, "step": 29805 }, { "epoch": 0.2981, "grad_norm": 15.936858177185059, "learning_rate": 3.545e-06, "loss": 6.340728759765625, "step": 29810 }, { "epoch": 0.29815, "grad_norm": 5.088853359222412, "learning_rate": 3.5447474747474754e-06, "loss": 6.3107250213623045, "step": 29815 }, { "epoch": 0.2982, "grad_norm": 8.644691467285156, "learning_rate": 3.54449494949495e-06, "loss": 6.274047088623047, "step": 29820 }, { "epoch": 0.29825, "grad_norm": 4.894280433654785, "learning_rate": 3.5442424242424247e-06, "loss": 6.305270767211914, "step": 29825 }, { "epoch": 0.2983, "grad_norm": 3.2908434867858887, "learning_rate": 3.543989898989899e-06, "loss": 6.302842330932617, "step": 29830 }, { "epoch": 0.29835, "grad_norm": 15.87802505493164, "learning_rate": 3.5437373737373744e-06, "loss": 6.508995056152344, "step": 29835 }, { "epoch": 0.2984, "grad_norm": 25.657636642456055, "learning_rate": 3.543484848484849e-06, "loss": 6.492586517333985, "step": 29840 }, { "epoch": 0.29845, "grad_norm": 15.43842887878418, "learning_rate": 3.5432323232323236e-06, "loss": 6.437177276611328, "step": 29845 }, { "epoch": 0.2985, "grad_norm": 7.3887786865234375, "learning_rate": 3.5429797979797983e-06, "loss": 6.345815277099609, "step": 29850 }, { "epoch": 0.29855, "grad_norm": 6.90225076675415, "learning_rate": 3.5427272727272733e-06, "loss": 6.340507888793946, "step": 29855 }, { "epoch": 0.2986, "grad_norm": 6.959244251251221, "learning_rate": 3.542474747474748e-06, "loss": 6.191806030273438, "step": 29860 }, { "epoch": 0.29865, "grad_norm": 14.063956260681152, "learning_rate": 3.5422222222222226e-06, "loss": 6.44783935546875, "step": 29865 }, { "epoch": 0.2987, "grad_norm": 5.093909740447998, "learning_rate": 3.541969696969697e-06, "loss": 6.415027618408203, "step": 29870 }, { "epoch": 0.29875, "grad_norm": 6.303191184997559, "learning_rate": 3.5417171717171722e-06, "loss": 6.378090286254883, "step": 29875 }, { "epoch": 0.2988, "grad_norm": 8.031275749206543, "learning_rate": 3.541464646464647e-06, "loss": 6.312608337402343, "step": 29880 }, { "epoch": 0.29885, "grad_norm": 4.717705249786377, "learning_rate": 3.5412121212121215e-06, "loss": 6.352294921875, "step": 29885 }, { "epoch": 0.2989, "grad_norm": 7.987373352050781, "learning_rate": 3.540959595959596e-06, "loss": 6.286529922485352, "step": 29890 }, { "epoch": 0.29895, "grad_norm": 6.087131977081299, "learning_rate": 3.540707070707071e-06, "loss": 6.306478118896484, "step": 29895 }, { "epoch": 0.299, "grad_norm": 6.794095516204834, "learning_rate": 3.540454545454546e-06, "loss": 6.3231201171875, "step": 29900 }, { "epoch": 0.29905, "grad_norm": 6.107101917266846, "learning_rate": 3.5402020202020205e-06, "loss": 6.311095428466797, "step": 29905 }, { "epoch": 0.2991, "grad_norm": 6.169513702392578, "learning_rate": 3.539949494949495e-06, "loss": 6.310579681396485, "step": 29910 }, { "epoch": 0.29915, "grad_norm": 6.874863147735596, "learning_rate": 3.53969696969697e-06, "loss": 6.300856018066407, "step": 29915 }, { "epoch": 0.2992, "grad_norm": 3.1986083984375, "learning_rate": 3.5394444444444448e-06, "loss": 6.299204254150391, "step": 29920 }, { "epoch": 0.29925, "grad_norm": 6.728003978729248, "learning_rate": 3.5391919191919194e-06, "loss": 6.316722106933594, "step": 29925 }, { "epoch": 0.2993, "grad_norm": 7.261544227600098, "learning_rate": 3.538939393939394e-06, "loss": 6.29705810546875, "step": 29930 }, { "epoch": 0.29935, "grad_norm": 4.328718185424805, "learning_rate": 3.538686868686869e-06, "loss": 6.29155387878418, "step": 29935 }, { "epoch": 0.2994, "grad_norm": 3.990241050720215, "learning_rate": 3.5384343434343437e-06, "loss": 6.246854400634765, "step": 29940 }, { "epoch": 0.29945, "grad_norm": 5.74593448638916, "learning_rate": 3.5381818181818183e-06, "loss": 6.335731124877929, "step": 29945 }, { "epoch": 0.2995, "grad_norm": 6.287469863891602, "learning_rate": 3.537929292929293e-06, "loss": 6.322204208374023, "step": 29950 }, { "epoch": 0.29955, "grad_norm": 12.371041297912598, "learning_rate": 3.537676767676768e-06, "loss": 6.296611785888672, "step": 29955 }, { "epoch": 0.2996, "grad_norm": 13.603057861328125, "learning_rate": 3.5374242424242427e-06, "loss": 6.462675476074219, "step": 29960 }, { "epoch": 0.29965, "grad_norm": 4.240120887756348, "learning_rate": 3.5371717171717173e-06, "loss": 6.298146438598633, "step": 29965 }, { "epoch": 0.2997, "grad_norm": 5.512221813201904, "learning_rate": 3.536919191919192e-06, "loss": 6.379457473754883, "step": 29970 }, { "epoch": 0.29975, "grad_norm": 16.020565032958984, "learning_rate": 3.536666666666667e-06, "loss": 6.414798736572266, "step": 29975 }, { "epoch": 0.2998, "grad_norm": 8.86745834350586, "learning_rate": 3.5364141414141416e-06, "loss": 6.298620986938476, "step": 29980 }, { "epoch": 0.29985, "grad_norm": 3.77581787109375, "learning_rate": 3.5361616161616162e-06, "loss": 6.265024566650391, "step": 29985 }, { "epoch": 0.2999, "grad_norm": 6.239947319030762, "learning_rate": 3.535909090909091e-06, "loss": 6.304644775390625, "step": 29990 }, { "epoch": 0.29995, "grad_norm": 4.137029647827148, "learning_rate": 3.535656565656566e-06, "loss": 6.319628143310547, "step": 29995 }, { "epoch": 0.3, "grad_norm": 6.027128219604492, "learning_rate": 3.5354040404040405e-06, "loss": 6.296832275390625, "step": 30000 }, { "epoch": 0.30005, "grad_norm": 3.7814910411834717, "learning_rate": 3.535151515151515e-06, "loss": 6.277749633789062, "step": 30005 }, { "epoch": 0.3001, "grad_norm": 4.204829216003418, "learning_rate": 3.53489898989899e-06, "loss": 6.244540023803711, "step": 30010 }, { "epoch": 0.30015, "grad_norm": 5.629079341888428, "learning_rate": 3.5346464646464653e-06, "loss": 6.274118804931641, "step": 30015 }, { "epoch": 0.3002, "grad_norm": 10.304169654846191, "learning_rate": 3.53439393939394e-06, "loss": 6.397298049926758, "step": 30020 }, { "epoch": 0.30025, "grad_norm": 5.382565975189209, "learning_rate": 3.534141414141414e-06, "loss": 6.222068405151367, "step": 30025 }, { "epoch": 0.3003, "grad_norm": 4.017219543457031, "learning_rate": 3.5338888888888887e-06, "loss": 6.283623886108399, "step": 30030 }, { "epoch": 0.30035, "grad_norm": 5.777952671051025, "learning_rate": 3.5336363636363642e-06, "loss": 6.287588500976563, "step": 30035 }, { "epoch": 0.3004, "grad_norm": 4.398703575134277, "learning_rate": 3.533383838383839e-06, "loss": 6.273918914794922, "step": 30040 }, { "epoch": 0.30045, "grad_norm": 16.484294891357422, "learning_rate": 3.5331313131313135e-06, "loss": 6.261434936523438, "step": 30045 }, { "epoch": 0.3005, "grad_norm": 5.08121919631958, "learning_rate": 3.532878787878788e-06, "loss": 6.341363906860352, "step": 30050 }, { "epoch": 0.30055, "grad_norm": 8.416431427001953, "learning_rate": 3.532626262626263e-06, "loss": 6.346788787841797, "step": 30055 }, { "epoch": 0.3006, "grad_norm": 6.134696960449219, "learning_rate": 3.532373737373738e-06, "loss": 6.311572265625, "step": 30060 }, { "epoch": 0.30065, "grad_norm": 11.407038688659668, "learning_rate": 3.5321212121212124e-06, "loss": 6.33343620300293, "step": 30065 }, { "epoch": 0.3007, "grad_norm": 8.71596622467041, "learning_rate": 3.531868686868687e-06, "loss": 6.592255401611328, "step": 30070 }, { "epoch": 0.30075, "grad_norm": 47.59225082397461, "learning_rate": 3.531616161616162e-06, "loss": 6.6562049865722654, "step": 30075 }, { "epoch": 0.3008, "grad_norm": 20.232004165649414, "learning_rate": 3.5313636363636367e-06, "loss": 6.269707489013672, "step": 30080 }, { "epoch": 0.30085, "grad_norm": 5.101109504699707, "learning_rate": 3.5311111111111114e-06, "loss": 6.319311904907226, "step": 30085 }, { "epoch": 0.3009, "grad_norm": 12.357074737548828, "learning_rate": 3.530858585858586e-06, "loss": 6.2411540985107425, "step": 30090 }, { "epoch": 0.30095, "grad_norm": 7.410624027252197, "learning_rate": 3.530606060606061e-06, "loss": 6.3332359313964846, "step": 30095 }, { "epoch": 0.301, "grad_norm": 7.05325174331665, "learning_rate": 3.5303535353535357e-06, "loss": 6.2748779296875, "step": 30100 }, { "epoch": 5e-05, "grad_norm": 5.484194278717041, "learning_rate": 3.5301010101010103e-06, "loss": 6.325788879394532, "step": 30105 }, { "epoch": 0.0001, "grad_norm": 4.1424880027771, "learning_rate": 3.5298484848484854e-06, "loss": 6.352497100830078, "step": 30110 }, { "epoch": 0.00015, "grad_norm": 5.890789031982422, "learning_rate": 3.52959595959596e-06, "loss": 6.31262435913086, "step": 30115 }, { "epoch": 0.0002, "grad_norm": 5.297179222106934, "learning_rate": 3.5293434343434346e-06, "loss": 6.30126953125, "step": 30120 }, { "epoch": 0.00025, "grad_norm": 4.809101104736328, "learning_rate": 3.5290909090909093e-06, "loss": 6.285891723632813, "step": 30125 }, { "epoch": 0.0003, "grad_norm": 8.304152488708496, "learning_rate": 3.5288383838383843e-06, "loss": 6.30511474609375, "step": 30130 }, { "epoch": 0.00035, "grad_norm": 7.660754203796387, "learning_rate": 3.528585858585859e-06, "loss": 6.294705581665039, "step": 30135 }, { "epoch": 0.0004, "grad_norm": 7.110419273376465, "learning_rate": 3.5283333333333336e-06, "loss": 6.306806182861328, "step": 30140 }, { "epoch": 0.00045, "grad_norm": 9.31564998626709, "learning_rate": 3.528080808080808e-06, "loss": 6.400209045410156, "step": 30145 }, { "epoch": 0.0005, "grad_norm": 5.657293796539307, "learning_rate": 3.5278282828282833e-06, "loss": 6.380509948730468, "step": 30150 }, { "epoch": 0.00055, "grad_norm": 4.684988021850586, "learning_rate": 3.527575757575758e-06, "loss": 6.286779403686523, "step": 30155 }, { "epoch": 0.0006, "grad_norm": 6.626471996307373, "learning_rate": 3.5273232323232325e-06, "loss": 6.2453960418701175, "step": 30160 }, { "epoch": 0.00065, "grad_norm": 5.251129627227783, "learning_rate": 3.527070707070707e-06, "loss": 6.2742431640625, "step": 30165 }, { "epoch": 0.0007, "grad_norm": 5.405531883239746, "learning_rate": 3.526818181818182e-06, "loss": 6.338249588012696, "step": 30170 }, { "epoch": 0.00075, "grad_norm": 4.635233402252197, "learning_rate": 3.526565656565657e-06, "loss": 6.280062103271485, "step": 30175 }, { "epoch": 0.0008, "grad_norm": 7.615424156188965, "learning_rate": 3.5263131313131315e-06, "loss": 6.319436645507812, "step": 30180 }, { "epoch": 0.00085, "grad_norm": 7.014859676361084, "learning_rate": 3.526060606060606e-06, "loss": 6.3284858703613285, "step": 30185 }, { "epoch": 0.0009, "grad_norm": 7.948516845703125, "learning_rate": 3.525808080808081e-06, "loss": 6.306148910522461, "step": 30190 }, { "epoch": 0.00095, "grad_norm": 4.9987568855285645, "learning_rate": 3.5255555555555558e-06, "loss": 6.297168731689453, "step": 30195 }, { "epoch": 0.001, "grad_norm": 9.004301071166992, "learning_rate": 3.5253030303030304e-06, "loss": 6.3516700744628904, "step": 30200 }, { "epoch": 0.00105, "grad_norm": 6.185268402099609, "learning_rate": 3.525050505050505e-06, "loss": 6.23489990234375, "step": 30205 }, { "epoch": 0.0011, "grad_norm": 6.5398125648498535, "learning_rate": 3.5247979797979805e-06, "loss": 6.331185150146484, "step": 30210 }, { "epoch": 0.00115, "grad_norm": 4.726842403411865, "learning_rate": 3.524545454545455e-06, "loss": 6.253910827636719, "step": 30215 }, { "epoch": 0.0012, "grad_norm": 4.543725967407227, "learning_rate": 3.5242929292929294e-06, "loss": 6.309986114501953, "step": 30220 }, { "epoch": 0.00125, "grad_norm": 7.547321319580078, "learning_rate": 3.524040404040404e-06, "loss": 6.300954437255859, "step": 30225 }, { "epoch": 0.0013, "grad_norm": 9.456096649169922, "learning_rate": 3.5237878787878795e-06, "loss": 6.324053573608398, "step": 30230 }, { "epoch": 0.00135, "grad_norm": 11.498023986816406, "learning_rate": 3.523535353535354e-06, "loss": 6.364581680297851, "step": 30235 }, { "epoch": 0.0014, "grad_norm": 5.708373546600342, "learning_rate": 3.5232828282828287e-06, "loss": 6.488095092773437, "step": 30240 }, { "epoch": 0.00145, "grad_norm": 9.381766319274902, "learning_rate": 3.523030303030303e-06, "loss": 6.313569641113281, "step": 30245 }, { "epoch": 0.0015, "grad_norm": 6.118898391723633, "learning_rate": 3.5227777777777784e-06, "loss": 6.311029052734375, "step": 30250 }, { "epoch": 0.00155, "grad_norm": 8.702325820922852, "learning_rate": 3.522525252525253e-06, "loss": 6.2553550720214846, "step": 30255 }, { "epoch": 0.0016, "grad_norm": 5.618542671203613, "learning_rate": 3.5222727272727277e-06, "loss": 6.295967864990234, "step": 30260 }, { "epoch": 0.00165, "grad_norm": 7.077052116394043, "learning_rate": 3.5220202020202023e-06, "loss": 6.332790374755859, "step": 30265 }, { "epoch": 0.0017, "grad_norm": 6.07678747177124, "learning_rate": 3.5217676767676773e-06, "loss": 6.357634353637695, "step": 30270 }, { "epoch": 0.00175, "grad_norm": 5.752668857574463, "learning_rate": 3.521515151515152e-06, "loss": 6.275868606567383, "step": 30275 }, { "epoch": 0.0018, "grad_norm": 8.120055198669434, "learning_rate": 3.5212626262626266e-06, "loss": 6.174786376953125, "step": 30280 }, { "epoch": 0.00185, "grad_norm": 3.0004894733428955, "learning_rate": 3.5210101010101012e-06, "loss": 6.391631317138672, "step": 30285 }, { "epoch": 0.0019, "grad_norm": 5.824384689331055, "learning_rate": 3.5207575757575763e-06, "loss": 6.249583435058594, "step": 30290 }, { "epoch": 0.00195, "grad_norm": 5.606202602386475, "learning_rate": 3.520505050505051e-06, "loss": 6.282828903198242, "step": 30295 }, { "epoch": 0.002, "grad_norm": 8.084426879882812, "learning_rate": 3.5202525252525255e-06, "loss": 6.290953826904297, "step": 30300 }, { "epoch": 0.00205, "grad_norm": 5.668701171875, "learning_rate": 3.52e-06, "loss": 6.291912078857422, "step": 30305 }, { "epoch": 0.0021, "grad_norm": 4.827895164489746, "learning_rate": 3.5197474747474752e-06, "loss": 6.37243423461914, "step": 30310 }, { "epoch": 0.00215, "grad_norm": 6.453768253326416, "learning_rate": 3.51949494949495e-06, "loss": 6.257444763183594, "step": 30315 }, { "epoch": 0.0022, "grad_norm": 12.45335578918457, "learning_rate": 3.5192424242424245e-06, "loss": 6.339299011230469, "step": 30320 }, { "epoch": 0.00225, "grad_norm": 7.6751227378845215, "learning_rate": 3.518989898989899e-06, "loss": 6.3201641082763675, "step": 30325 }, { "epoch": 0.0023, "grad_norm": 7.691946983337402, "learning_rate": 3.518737373737374e-06, "loss": 6.3052978515625, "step": 30330 }, { "epoch": 0.00235, "grad_norm": 3.9807279109954834, "learning_rate": 3.518484848484849e-06, "loss": 6.291839981079102, "step": 30335 }, { "epoch": 0.0024, "grad_norm": 4.4699387550354, "learning_rate": 3.5182323232323234e-06, "loss": 6.2644813537597654, "step": 30340 }, { "epoch": 0.00245, "grad_norm": 5.149966239929199, "learning_rate": 3.517979797979798e-06, "loss": 6.323543930053711, "step": 30345 }, { "epoch": 0.0025, "grad_norm": 5.827171325683594, "learning_rate": 3.517727272727273e-06, "loss": 6.303450393676758, "step": 30350 }, { "epoch": 0.00255, "grad_norm": 3.0999603271484375, "learning_rate": 3.5174747474747478e-06, "loss": 6.317843246459961, "step": 30355 }, { "epoch": 0.0026, "grad_norm": 6.033076763153076, "learning_rate": 3.5172222222222224e-06, "loss": 6.282498931884765, "step": 30360 }, { "epoch": 0.00265, "grad_norm": 7.726853847503662, "learning_rate": 3.516969696969697e-06, "loss": 6.1833148956298825, "step": 30365 }, { "epoch": 0.0027, "grad_norm": 4.318711280822754, "learning_rate": 3.516717171717172e-06, "loss": 6.257895278930664, "step": 30370 }, { "epoch": 0.00275, "grad_norm": 6.404482841491699, "learning_rate": 3.5164646464646467e-06, "loss": 6.3033794403076175, "step": 30375 }, { "epoch": 0.0028, "grad_norm": 9.870217323303223, "learning_rate": 3.5162121212121213e-06, "loss": 6.347330474853516, "step": 30380 }, { "epoch": 0.00285, "grad_norm": 7.3675079345703125, "learning_rate": 3.515959595959596e-06, "loss": 6.281995391845703, "step": 30385 }, { "epoch": 0.0029, "grad_norm": 7.579485893249512, "learning_rate": 3.515707070707071e-06, "loss": 6.36811637878418, "step": 30390 }, { "epoch": 0.00295, "grad_norm": 4.2415547370910645, "learning_rate": 3.5154545454545456e-06, "loss": 6.35584716796875, "step": 30395 }, { "epoch": 0.003, "grad_norm": 6.5480637550354, "learning_rate": 3.5152020202020203e-06, "loss": 6.314322662353516, "step": 30400 }, { "epoch": 0.00305, "grad_norm": 6.053180694580078, "learning_rate": 3.514949494949495e-06, "loss": 6.310372924804687, "step": 30405 }, { "epoch": 0.0031, "grad_norm": 12.136195182800293, "learning_rate": 3.51469696969697e-06, "loss": 6.34039077758789, "step": 30410 }, { "epoch": 0.00315, "grad_norm": 5.282136917114258, "learning_rate": 3.5144444444444446e-06, "loss": 6.384181976318359, "step": 30415 }, { "epoch": 0.0032, "grad_norm": 6.466001033782959, "learning_rate": 3.5141919191919192e-06, "loss": 6.266043853759766, "step": 30420 }, { "epoch": 0.00325, "grad_norm": 5.129824161529541, "learning_rate": 3.513939393939394e-06, "loss": 6.298617553710938, "step": 30425 }, { "epoch": 0.0033, "grad_norm": 8.354724884033203, "learning_rate": 3.5136868686868693e-06, "loss": 6.482086181640625, "step": 30430 }, { "epoch": 0.00335, "grad_norm": 5.265137672424316, "learning_rate": 3.513434343434344e-06, "loss": 6.297601318359375, "step": 30435 }, { "epoch": 0.0034, "grad_norm": 5.081305027008057, "learning_rate": 3.513181818181818e-06, "loss": 6.291044616699219, "step": 30440 }, { "epoch": 0.00345, "grad_norm": 6.677523136138916, "learning_rate": 3.5129292929292928e-06, "loss": 6.313158416748047, "step": 30445 }, { "epoch": 0.0035, "grad_norm": 7.239242076873779, "learning_rate": 3.5126767676767683e-06, "loss": 6.311925888061523, "step": 30450 }, { "epoch": 0.00355, "grad_norm": 14.75670051574707, "learning_rate": 3.512424242424243e-06, "loss": 6.391028594970703, "step": 30455 }, { "epoch": 0.0036, "grad_norm": 6.772289276123047, "learning_rate": 3.5121717171717175e-06, "loss": 6.306143188476563, "step": 30460 }, { "epoch": 0.00365, "grad_norm": 9.194825172424316, "learning_rate": 3.511919191919192e-06, "loss": 6.294182586669922, "step": 30465 }, { "epoch": 0.0037, "grad_norm": 7.671576976776123, "learning_rate": 3.511666666666667e-06, "loss": 6.251313781738281, "step": 30470 }, { "epoch": 0.00375, "grad_norm": 4.937087059020996, "learning_rate": 3.511414141414142e-06, "loss": 6.291476058959961, "step": 30475 }, { "epoch": 0.0038, "grad_norm": 5.50131368637085, "learning_rate": 3.5111616161616165e-06, "loss": 6.277745056152344, "step": 30480 }, { "epoch": 0.00385, "grad_norm": 3.8277409076690674, "learning_rate": 3.510909090909091e-06, "loss": 6.290627670288086, "step": 30485 }, { "epoch": 0.0039, "grad_norm": 4.3165788650512695, "learning_rate": 3.510656565656566e-06, "loss": 6.336138534545898, "step": 30490 }, { "epoch": 0.00395, "grad_norm": 6.351437568664551, "learning_rate": 3.5104040404040408e-06, "loss": 6.332951354980469, "step": 30495 }, { "epoch": 0.004, "grad_norm": 4.145952224731445, "learning_rate": 3.5101515151515154e-06, "loss": 6.258573532104492, "step": 30500 }, { "epoch": 0.00405, "grad_norm": 7.508584976196289, "learning_rate": 3.50989898989899e-06, "loss": 6.297691345214844, "step": 30505 }, { "epoch": 0.0041, "grad_norm": 24.11528968811035, "learning_rate": 3.509646464646465e-06, "loss": 6.36131706237793, "step": 30510 }, { "epoch": 0.00415, "grad_norm": 8.579665184020996, "learning_rate": 3.5093939393939397e-06, "loss": 6.303544616699218, "step": 30515 }, { "epoch": 0.0042, "grad_norm": 7.393471717834473, "learning_rate": 3.5091414141414144e-06, "loss": 6.289500045776367, "step": 30520 }, { "epoch": 0.00425, "grad_norm": 6.886440753936768, "learning_rate": 3.508888888888889e-06, "loss": 6.292948913574219, "step": 30525 }, { "epoch": 0.0043, "grad_norm": 5.595465660095215, "learning_rate": 3.508636363636364e-06, "loss": 6.296092224121094, "step": 30530 }, { "epoch": 0.00435, "grad_norm": 7.313178539276123, "learning_rate": 3.5083838383838387e-06, "loss": 6.283732986450195, "step": 30535 }, { "epoch": 0.0044, "grad_norm": 6.557757377624512, "learning_rate": 3.5081313131313133e-06, "loss": 6.341020202636718, "step": 30540 }, { "epoch": 0.00445, "grad_norm": 11.70034408569336, "learning_rate": 3.5078787878787884e-06, "loss": 6.344221115112305, "step": 30545 }, { "epoch": 0.0045, "grad_norm": 6.1660356521606445, "learning_rate": 3.507626262626263e-06, "loss": 6.327466583251953, "step": 30550 }, { "epoch": 0.00455, "grad_norm": 9.203703880310059, "learning_rate": 3.5073737373737376e-06, "loss": 6.266476058959961, "step": 30555 }, { "epoch": 0.0046, "grad_norm": 5.459879398345947, "learning_rate": 3.5071212121212122e-06, "loss": 6.257968139648438, "step": 30560 }, { "epoch": 0.00465, "grad_norm": 7.479710102081299, "learning_rate": 3.5068686868686873e-06, "loss": 6.265128326416016, "step": 30565 }, { "epoch": 0.0047, "grad_norm": 5.643678188323975, "learning_rate": 3.506616161616162e-06, "loss": 6.2212074279785154, "step": 30570 }, { "epoch": 0.00475, "grad_norm": 15.428375244140625, "learning_rate": 3.5063636363636366e-06, "loss": 6.213994979858398, "step": 30575 }, { "epoch": 0.0048, "grad_norm": 5.4300923347473145, "learning_rate": 3.506111111111111e-06, "loss": 6.280587768554687, "step": 30580 }, { "epoch": 0.00485, "grad_norm": 9.55874252319336, "learning_rate": 3.5058585858585862e-06, "loss": 6.324400329589844, "step": 30585 }, { "epoch": 0.0049, "grad_norm": 6.635635852813721, "learning_rate": 3.505606060606061e-06, "loss": 6.273868179321289, "step": 30590 }, { "epoch": 0.00495, "grad_norm": 7.755094528198242, "learning_rate": 3.5053535353535355e-06, "loss": 6.272342300415039, "step": 30595 }, { "epoch": 0.005, "grad_norm": 6.867818832397461, "learning_rate": 3.50510101010101e-06, "loss": 6.395664215087891, "step": 30600 }, { "epoch": 0.00505, "grad_norm": 5.472387313842773, "learning_rate": 3.504848484848485e-06, "loss": 6.320342254638672, "step": 30605 }, { "epoch": 0.0051, "grad_norm": 4.040513038635254, "learning_rate": 3.50459595959596e-06, "loss": 6.307614135742187, "step": 30610 }, { "epoch": 0.00515, "grad_norm": 14.557611465454102, "learning_rate": 3.5043434343434344e-06, "loss": 6.376139831542969, "step": 30615 }, { "epoch": 0.0052, "grad_norm": 7.899896144866943, "learning_rate": 3.504090909090909e-06, "loss": 6.48419189453125, "step": 30620 }, { "epoch": 0.00525, "grad_norm": 5.929014682769775, "learning_rate": 3.5038383838383846e-06, "loss": 6.329907989501953, "step": 30625 }, { "epoch": 0.0053, "grad_norm": 5.357595920562744, "learning_rate": 3.503585858585859e-06, "loss": 6.304969787597656, "step": 30630 }, { "epoch": 0.00535, "grad_norm": 3.0218911170959473, "learning_rate": 3.5033333333333334e-06, "loss": 6.314809799194336, "step": 30635 }, { "epoch": 0.0054, "grad_norm": 44.7281608581543, "learning_rate": 3.503080808080808e-06, "loss": 6.208759307861328, "step": 30640 }, { "epoch": 0.00545, "grad_norm": 5.094700813293457, "learning_rate": 3.5028282828282835e-06, "loss": 6.170816040039062, "step": 30645 }, { "epoch": 0.0055, "grad_norm": 4.900203704833984, "learning_rate": 3.502575757575758e-06, "loss": 6.279574966430664, "step": 30650 }, { "epoch": 0.00555, "grad_norm": 3.7754247188568115, "learning_rate": 3.5023232323232328e-06, "loss": 6.264418029785157, "step": 30655 }, { "epoch": 0.0056, "grad_norm": 7.221945285797119, "learning_rate": 3.5020707070707074e-06, "loss": 6.283297729492188, "step": 30660 }, { "epoch": 0.00565, "grad_norm": 7.076565742492676, "learning_rate": 3.5018181818181824e-06, "loss": 6.3167167663574215, "step": 30665 }, { "epoch": 0.0057, "grad_norm": 5.709422588348389, "learning_rate": 3.501565656565657e-06, "loss": 6.316467666625977, "step": 30670 }, { "epoch": 0.00575, "grad_norm": 14.146867752075195, "learning_rate": 3.5013131313131317e-06, "loss": 6.631809997558594, "step": 30675 }, { "epoch": 0.0058, "grad_norm": 6.814350605010986, "learning_rate": 3.5010606060606063e-06, "loss": 6.41088638305664, "step": 30680 }, { "epoch": 0.00585, "grad_norm": 8.508028984069824, "learning_rate": 3.5008080808080814e-06, "loss": 6.4017173767089846, "step": 30685 }, { "epoch": 0.0059, "grad_norm": 7.867400646209717, "learning_rate": 3.500555555555556e-06, "loss": 6.268592453002929, "step": 30690 }, { "epoch": 0.00595, "grad_norm": 8.200491905212402, "learning_rate": 3.5003030303030306e-06, "loss": 6.312607192993164, "step": 30695 }, { "epoch": 0.006, "grad_norm": 6.092241287231445, "learning_rate": 3.5000505050505053e-06, "loss": 6.323750686645508, "step": 30700 }, { "epoch": 0.00605, "grad_norm": 5.498466491699219, "learning_rate": 3.4997979797979803e-06, "loss": 6.2912841796875, "step": 30705 }, { "epoch": 0.0061, "grad_norm": 6.105342388153076, "learning_rate": 3.499545454545455e-06, "loss": 6.342540740966797, "step": 30710 }, { "epoch": 0.00615, "grad_norm": 7.391833782196045, "learning_rate": 3.4992929292929296e-06, "loss": 6.3187309265136715, "step": 30715 }, { "epoch": 0.0062, "grad_norm": 18.563892364501953, "learning_rate": 3.4990404040404042e-06, "loss": 6.415699768066406, "step": 30720 }, { "epoch": 0.00625, "grad_norm": 4.351109027862549, "learning_rate": 3.4987878787878793e-06, "loss": 6.460601043701172, "step": 30725 }, { "epoch": 0.0063, "grad_norm": 7.162830829620361, "learning_rate": 3.498535353535354e-06, "loss": 6.325341033935547, "step": 30730 }, { "epoch": 0.00635, "grad_norm": 4.240682125091553, "learning_rate": 3.4982828282828285e-06, "loss": 6.236849975585938, "step": 30735 }, { "epoch": 0.0064, "grad_norm": 12.998048782348633, "learning_rate": 3.498030303030303e-06, "loss": 6.328357696533203, "step": 30740 }, { "epoch": 0.00645, "grad_norm": 5.847378253936768, "learning_rate": 3.4977777777777782e-06, "loss": 6.2958118438720705, "step": 30745 }, { "epoch": 0.0065, "grad_norm": 5.156700134277344, "learning_rate": 3.497525252525253e-06, "loss": 6.242645645141602, "step": 30750 }, { "epoch": 0.00655, "grad_norm": 7.520177364349365, "learning_rate": 3.4972727272727275e-06, "loss": 6.2973480224609375, "step": 30755 }, { "epoch": 0.0066, "grad_norm": 4.853328227996826, "learning_rate": 3.497020202020202e-06, "loss": 6.394388198852539, "step": 30760 }, { "epoch": 0.00665, "grad_norm": 3.3100924491882324, "learning_rate": 3.496767676767677e-06, "loss": 6.284613037109375, "step": 30765 }, { "epoch": 0.0067, "grad_norm": 5.1759257316589355, "learning_rate": 3.496515151515152e-06, "loss": 6.332266235351563, "step": 30770 }, { "epoch": 0.00675, "grad_norm": 10.115545272827148, "learning_rate": 3.4962626262626264e-06, "loss": 6.3796546936035154, "step": 30775 }, { "epoch": 0.0068, "grad_norm": 8.139554023742676, "learning_rate": 3.496010101010101e-06, "loss": 6.328509140014648, "step": 30780 }, { "epoch": 0.00685, "grad_norm": 4.393502712249756, "learning_rate": 3.495757575757576e-06, "loss": 6.339471435546875, "step": 30785 }, { "epoch": 0.0069, "grad_norm": 5.392394065856934, "learning_rate": 3.4955050505050507e-06, "loss": 6.2997081756591795, "step": 30790 }, { "epoch": 0.00695, "grad_norm": 5.542270183563232, "learning_rate": 3.4952525252525254e-06, "loss": 6.278899765014648, "step": 30795 }, { "epoch": 0.007, "grad_norm": 5.188408374786377, "learning_rate": 3.495e-06, "loss": 6.389769744873047, "step": 30800 }, { "epoch": 0.00705, "grad_norm": 12.353620529174805, "learning_rate": 3.494747474747475e-06, "loss": 6.585284423828125, "step": 30805 }, { "epoch": 0.0071, "grad_norm": 5.277525901794434, "learning_rate": 3.4944949494949497e-06, "loss": 6.29046745300293, "step": 30810 }, { "epoch": 0.00715, "grad_norm": 4.494175434112549, "learning_rate": 3.4942424242424243e-06, "loss": 6.307538604736328, "step": 30815 }, { "epoch": 0.0072, "grad_norm": 3.8398125171661377, "learning_rate": 3.493989898989899e-06, "loss": 6.340407180786133, "step": 30820 }, { "epoch": 0.00725, "grad_norm": 6.357994556427002, "learning_rate": 3.4937373737373744e-06, "loss": 6.268693923950195, "step": 30825 }, { "epoch": 0.0073, "grad_norm": 5.676535606384277, "learning_rate": 3.4934848484848486e-06, "loss": 6.329642105102539, "step": 30830 }, { "epoch": 0.00735, "grad_norm": 8.069583892822266, "learning_rate": 3.4932323232323233e-06, "loss": 6.309796142578125, "step": 30835 }, { "epoch": 0.0074, "grad_norm": 6.2477874755859375, "learning_rate": 3.492979797979798e-06, "loss": 6.307234191894532, "step": 30840 }, { "epoch": 0.00745, "grad_norm": 7.16832971572876, "learning_rate": 3.4927272727272734e-06, "loss": 6.288982391357422, "step": 30845 }, { "epoch": 0.0075, "grad_norm": 8.136871337890625, "learning_rate": 3.492474747474748e-06, "loss": 6.296388626098633, "step": 30850 }, { "epoch": 0.00755, "grad_norm": 6.288802146911621, "learning_rate": 3.492222222222222e-06, "loss": 6.360136032104492, "step": 30855 }, { "epoch": 0.0076, "grad_norm": 8.711474418640137, "learning_rate": 3.491969696969697e-06, "loss": 6.493196105957031, "step": 30860 }, { "epoch": 0.00765, "grad_norm": 10.541821479797363, "learning_rate": 3.4917171717171723e-06, "loss": 6.548341369628906, "step": 30865 }, { "epoch": 0.0077, "grad_norm": 6.081124782562256, "learning_rate": 3.491464646464647e-06, "loss": 6.312615203857422, "step": 30870 }, { "epoch": 0.00775, "grad_norm": 11.08112907409668, "learning_rate": 3.4912121212121216e-06, "loss": 6.34002914428711, "step": 30875 }, { "epoch": 0.0078, "grad_norm": 5.0082855224609375, "learning_rate": 3.490959595959596e-06, "loss": 6.30450325012207, "step": 30880 }, { "epoch": 0.00785, "grad_norm": 30.433208465576172, "learning_rate": 3.4907070707070712e-06, "loss": 6.295254516601562, "step": 30885 }, { "epoch": 0.0079, "grad_norm": 29.286144256591797, "learning_rate": 3.490454545454546e-06, "loss": 5.973501586914063, "step": 30890 }, { "epoch": 0.00795, "grad_norm": 37.13059616088867, "learning_rate": 3.4902020202020205e-06, "loss": 5.728216171264648, "step": 30895 }, { "epoch": 0.008, "grad_norm": 10.889192581176758, "learning_rate": 3.489949494949495e-06, "loss": 6.34569206237793, "step": 30900 }, { "epoch": 0.00805, "grad_norm": 10.188030242919922, "learning_rate": 3.48969696969697e-06, "loss": 6.311787796020508, "step": 30905 }, { "epoch": 0.0081, "grad_norm": 6.174193382263184, "learning_rate": 3.489444444444445e-06, "loss": 6.339508438110352, "step": 30910 }, { "epoch": 0.00815, "grad_norm": 5.9210662841796875, "learning_rate": 3.4891919191919195e-06, "loss": 6.2550605773925785, "step": 30915 }, { "epoch": 0.0082, "grad_norm": 7.499844551086426, "learning_rate": 3.488939393939394e-06, "loss": 6.317704391479492, "step": 30920 }, { "epoch": 0.00825, "grad_norm": 8.778815269470215, "learning_rate": 3.488686868686869e-06, "loss": 6.3352813720703125, "step": 30925 }, { "epoch": 0.0083, "grad_norm": 4.930359840393066, "learning_rate": 3.4884343434343438e-06, "loss": 6.258121109008789, "step": 30930 }, { "epoch": 0.00835, "grad_norm": 5.634444236755371, "learning_rate": 3.4881818181818184e-06, "loss": 6.2620491027832035, "step": 30935 }, { "epoch": 0.0084, "grad_norm": 4.310962677001953, "learning_rate": 3.487929292929293e-06, "loss": 6.262651443481445, "step": 30940 }, { "epoch": 0.00845, "grad_norm": 5.746262550354004, "learning_rate": 3.487676767676768e-06, "loss": 6.286780929565429, "step": 30945 }, { "epoch": 0.0085, "grad_norm": 4.739052772521973, "learning_rate": 3.4874242424242427e-06, "loss": 6.442961120605469, "step": 30950 }, { "epoch": 0.00855, "grad_norm": 8.815422058105469, "learning_rate": 3.4871717171717173e-06, "loss": 6.25898323059082, "step": 30955 }, { "epoch": 0.0086, "grad_norm": 5.505147457122803, "learning_rate": 3.4869191919191924e-06, "loss": 6.2786376953125, "step": 30960 }, { "epoch": 0.00865, "grad_norm": 7.598847389221191, "learning_rate": 3.486666666666667e-06, "loss": 6.277008056640625, "step": 30965 }, { "epoch": 0.0087, "grad_norm": 5.241137981414795, "learning_rate": 3.4864141414141417e-06, "loss": 6.3271636962890625, "step": 30970 }, { "epoch": 0.00875, "grad_norm": 5.15465784072876, "learning_rate": 3.4861616161616163e-06, "loss": 6.287715911865234, "step": 30975 }, { "epoch": 0.0088, "grad_norm": 8.85396957397461, "learning_rate": 3.4859090909090913e-06, "loss": 6.281443786621094, "step": 30980 }, { "epoch": 0.00885, "grad_norm": 5.2901787757873535, "learning_rate": 3.485656565656566e-06, "loss": 6.301638793945313, "step": 30985 }, { "epoch": 0.0089, "grad_norm": 5.208120822906494, "learning_rate": 3.4854040404040406e-06, "loss": 6.328473281860352, "step": 30990 }, { "epoch": 0.00895, "grad_norm": 5.497430324554443, "learning_rate": 3.4851515151515152e-06, "loss": 6.277241134643555, "step": 30995 }, { "epoch": 0.009, "grad_norm": 20.714143753051758, "learning_rate": 3.4848989898989903e-06, "loss": 6.237617874145508, "step": 31000 }, { "epoch": 0.00905, "grad_norm": 6.485785007476807, "learning_rate": 3.484646464646465e-06, "loss": 6.26361198425293, "step": 31005 }, { "epoch": 0.0091, "grad_norm": 5.9065961837768555, "learning_rate": 3.4843939393939395e-06, "loss": 6.291852951049805, "step": 31010 }, { "epoch": 0.00915, "grad_norm": 6.946830749511719, "learning_rate": 3.484141414141414e-06, "loss": 6.242046356201172, "step": 31015 }, { "epoch": 0.0092, "grad_norm": 10.202825546264648, "learning_rate": 3.4838888888888892e-06, "loss": 6.282825469970703, "step": 31020 }, { "epoch": 0.00925, "grad_norm": 6.090758323669434, "learning_rate": 3.483636363636364e-06, "loss": 6.393174362182617, "step": 31025 }, { "epoch": 0.0093, "grad_norm": 7.1127238273620605, "learning_rate": 3.4833838383838385e-06, "loss": 6.272439193725586, "step": 31030 }, { "epoch": 0.00935, "grad_norm": 5.086367607116699, "learning_rate": 3.483131313131313e-06, "loss": 6.296226882934571, "step": 31035 }, { "epoch": 0.0094, "grad_norm": 5.704259872436523, "learning_rate": 3.4828787878787886e-06, "loss": 6.254584503173828, "step": 31040 }, { "epoch": 0.00945, "grad_norm": 6.761820316314697, "learning_rate": 3.4826262626262632e-06, "loss": 6.241915512084961, "step": 31045 }, { "epoch": 0.0095, "grad_norm": 4.720517635345459, "learning_rate": 3.4823737373737374e-06, "loss": 6.288992691040039, "step": 31050 }, { "epoch": 0.00955, "grad_norm": 6.525435447692871, "learning_rate": 3.482121212121212e-06, "loss": 6.277404403686523, "step": 31055 }, { "epoch": 0.0096, "grad_norm": 5.819321632385254, "learning_rate": 3.4818686868686875e-06, "loss": 6.2881324768066404, "step": 31060 }, { "epoch": 0.00965, "grad_norm": 6.052142143249512, "learning_rate": 3.481616161616162e-06, "loss": 6.25890884399414, "step": 31065 }, { "epoch": 0.0097, "grad_norm": 8.274016380310059, "learning_rate": 3.481363636363637e-06, "loss": 6.321687316894531, "step": 31070 }, { "epoch": 0.00975, "grad_norm": 6.385990619659424, "learning_rate": 3.4811111111111114e-06, "loss": 6.298026275634766, "step": 31075 }, { "epoch": 0.0098, "grad_norm": 6.122243881225586, "learning_rate": 3.4808585858585865e-06, "loss": 6.3379051208496096, "step": 31080 }, { "epoch": 0.00985, "grad_norm": 21.89448356628418, "learning_rate": 3.480606060606061e-06, "loss": 6.341522216796875, "step": 31085 }, { "epoch": 0.0099, "grad_norm": 6.187833309173584, "learning_rate": 3.4803535353535357e-06, "loss": 6.317514419555664, "step": 31090 }, { "epoch": 0.00995, "grad_norm": 7.483710765838623, "learning_rate": 3.4801010101010104e-06, "loss": 6.275627899169922, "step": 31095 }, { "epoch": 0.01, "grad_norm": 7.134104251861572, "learning_rate": 3.4798484848484854e-06, "loss": 6.269377517700195, "step": 31100 }, { "epoch": 0.01005, "grad_norm": 3.7349679470062256, "learning_rate": 3.47959595959596e-06, "loss": 6.307213211059571, "step": 31105 }, { "epoch": 0.0101, "grad_norm": 7.14758825302124, "learning_rate": 3.4793434343434347e-06, "loss": 6.303622055053711, "step": 31110 }, { "epoch": 0.01015, "grad_norm": 6.496312618255615, "learning_rate": 3.4790909090909093e-06, "loss": 6.283578491210937, "step": 31115 }, { "epoch": 0.0102, "grad_norm": 6.780876636505127, "learning_rate": 3.4788383838383844e-06, "loss": 6.2924247741699215, "step": 31120 }, { "epoch": 0.01025, "grad_norm": 7.086086273193359, "learning_rate": 3.478585858585859e-06, "loss": 6.312794494628906, "step": 31125 }, { "epoch": 0.0103, "grad_norm": 5.741054058074951, "learning_rate": 3.4783333333333336e-06, "loss": 6.3786567687988285, "step": 31130 }, { "epoch": 0.01035, "grad_norm": 8.722561836242676, "learning_rate": 3.4780808080808083e-06, "loss": 6.309943008422851, "step": 31135 }, { "epoch": 0.0104, "grad_norm": 5.473861217498779, "learning_rate": 3.4778282828282833e-06, "loss": 6.287100982666016, "step": 31140 }, { "epoch": 0.01045, "grad_norm": 6.0656585693359375, "learning_rate": 3.477575757575758e-06, "loss": 6.332005310058594, "step": 31145 }, { "epoch": 0.0105, "grad_norm": 7.1780805587768555, "learning_rate": 3.4773232323232326e-06, "loss": 6.309861373901367, "step": 31150 }, { "epoch": 0.01055, "grad_norm": 5.149395942687988, "learning_rate": 3.477070707070707e-06, "loss": 6.259609985351562, "step": 31155 }, { "epoch": 0.0106, "grad_norm": 4.867814064025879, "learning_rate": 3.4768181818181823e-06, "loss": 6.2881591796875, "step": 31160 }, { "epoch": 0.01065, "grad_norm": 3.7261312007904053, "learning_rate": 3.476565656565657e-06, "loss": 6.354864501953125, "step": 31165 }, { "epoch": 0.0107, "grad_norm": 5.980228900909424, "learning_rate": 3.4763131313131315e-06, "loss": 6.359089279174805, "step": 31170 }, { "epoch": 0.01075, "grad_norm": 7.473334312438965, "learning_rate": 3.476060606060606e-06, "loss": 6.2624763488769535, "step": 31175 }, { "epoch": 0.0108, "grad_norm": 5.091750144958496, "learning_rate": 3.475808080808081e-06, "loss": 6.287517547607422, "step": 31180 }, { "epoch": 0.01085, "grad_norm": 3.724547863006592, "learning_rate": 3.475555555555556e-06, "loss": 6.2558341979980465, "step": 31185 }, { "epoch": 0.0109, "grad_norm": 7.599609375, "learning_rate": 3.4753030303030305e-06, "loss": 6.269147491455078, "step": 31190 }, { "epoch": 0.01095, "grad_norm": 8.652864456176758, "learning_rate": 3.475050505050505e-06, "loss": 6.249160385131836, "step": 31195 }, { "epoch": 0.011, "grad_norm": 7.556710243225098, "learning_rate": 3.47479797979798e-06, "loss": 6.308186721801758, "step": 31200 }, { "epoch": 0.01105, "grad_norm": 7.35058069229126, "learning_rate": 3.4745454545454548e-06, "loss": 6.277926635742188, "step": 31205 }, { "epoch": 0.0111, "grad_norm": 29.186626434326172, "learning_rate": 3.4742929292929294e-06, "loss": 6.4645637512207035, "step": 31210 }, { "epoch": 0.01115, "grad_norm": 7.042591571807861, "learning_rate": 3.474040404040404e-06, "loss": 6.323080825805664, "step": 31215 }, { "epoch": 0.0112, "grad_norm": 10.886995315551758, "learning_rate": 3.473787878787879e-06, "loss": 6.284367370605469, "step": 31220 }, { "epoch": 0.01125, "grad_norm": 8.533283233642578, "learning_rate": 3.4735353535353537e-06, "loss": 6.3526451110839846, "step": 31225 }, { "epoch": 0.0113, "grad_norm": 4.954051494598389, "learning_rate": 3.4732828282828283e-06, "loss": 6.339914321899414, "step": 31230 }, { "epoch": 0.01135, "grad_norm": 9.046163558959961, "learning_rate": 3.473030303030303e-06, "loss": 6.2956687927246096, "step": 31235 }, { "epoch": 0.0114, "grad_norm": 6.325660705566406, "learning_rate": 3.4727777777777785e-06, "loss": 6.249410247802734, "step": 31240 }, { "epoch": 0.01145, "grad_norm": 4.934713363647461, "learning_rate": 3.4725252525252527e-06, "loss": 6.31416015625, "step": 31245 }, { "epoch": 0.0115, "grad_norm": 4.498656749725342, "learning_rate": 3.4722727272727273e-06, "loss": 6.2695068359375, "step": 31250 }, { "epoch": 0.01155, "grad_norm": 8.790675163269043, "learning_rate": 3.472020202020202e-06, "loss": 6.448046112060547, "step": 31255 }, { "epoch": 0.0116, "grad_norm": 6.70318603515625, "learning_rate": 3.4717676767676774e-06, "loss": 6.3224647521972654, "step": 31260 }, { "epoch": 0.01165, "grad_norm": 5.324131488800049, "learning_rate": 3.471515151515152e-06, "loss": 6.364325714111328, "step": 31265 }, { "epoch": 0.0117, "grad_norm": 4.485103130340576, "learning_rate": 3.4712626262626262e-06, "loss": 6.294645690917969, "step": 31270 }, { "epoch": 0.01175, "grad_norm": 4.885267734527588, "learning_rate": 3.471010101010101e-06, "loss": 6.441793823242188, "step": 31275 }, { "epoch": 0.0118, "grad_norm": 4.959426403045654, "learning_rate": 3.4707575757575763e-06, "loss": 6.291802597045899, "step": 31280 }, { "epoch": 0.01185, "grad_norm": 4.438997745513916, "learning_rate": 3.470505050505051e-06, "loss": 6.282447814941406, "step": 31285 }, { "epoch": 0.0119, "grad_norm": 5.538366317749023, "learning_rate": 3.4702525252525256e-06, "loss": 6.2852119445800785, "step": 31290 }, { "epoch": 0.01195, "grad_norm": 4.181748390197754, "learning_rate": 3.4700000000000002e-06, "loss": 6.338478469848633, "step": 31295 }, { "epoch": 0.012, "grad_norm": 5.777076244354248, "learning_rate": 3.4697474747474753e-06, "loss": 6.483787536621094, "step": 31300 }, { "epoch": 0.01205, "grad_norm": 5.703756332397461, "learning_rate": 3.46949494949495e-06, "loss": 6.315401840209961, "step": 31305 }, { "epoch": 0.0121, "grad_norm": 10.012688636779785, "learning_rate": 3.4692424242424245e-06, "loss": 6.341141891479492, "step": 31310 }, { "epoch": 0.01215, "grad_norm": 7.826373100280762, "learning_rate": 3.468989898989899e-06, "loss": 6.328828048706055, "step": 31315 }, { "epoch": 0.0122, "grad_norm": 4.342375755310059, "learning_rate": 3.4687373737373742e-06, "loss": 6.2282051086425785, "step": 31320 }, { "epoch": 0.01225, "grad_norm": 11.406405448913574, "learning_rate": 3.468484848484849e-06, "loss": 6.318561553955078, "step": 31325 }, { "epoch": 0.0123, "grad_norm": 6.454343318939209, "learning_rate": 3.4682323232323235e-06, "loss": 6.281166076660156, "step": 31330 }, { "epoch": 0.01235, "grad_norm": 5.668166160583496, "learning_rate": 3.467979797979798e-06, "loss": 6.303965759277344, "step": 31335 }, { "epoch": 0.0124, "grad_norm": 10.77260971069336, "learning_rate": 3.467727272727273e-06, "loss": 6.285918426513672, "step": 31340 }, { "epoch": 0.01245, "grad_norm": 5.856976509094238, "learning_rate": 3.467474747474748e-06, "loss": 6.3688232421875, "step": 31345 }, { "epoch": 0.0125, "grad_norm": 5.343064308166504, "learning_rate": 3.4672222222222224e-06, "loss": 6.344748306274414, "step": 31350 }, { "epoch": 0.01255, "grad_norm": 4.35451602935791, "learning_rate": 3.466969696969697e-06, "loss": 6.534793853759766, "step": 31355 }, { "epoch": 0.0126, "grad_norm": 11.853941917419434, "learning_rate": 3.466717171717172e-06, "loss": 6.23565788269043, "step": 31360 }, { "epoch": 0.01265, "grad_norm": 8.171963691711426, "learning_rate": 3.4664646464646468e-06, "loss": 6.318867492675781, "step": 31365 }, { "epoch": 0.0127, "grad_norm": 6.3174872398376465, "learning_rate": 3.4662121212121214e-06, "loss": 6.293863296508789, "step": 31370 }, { "epoch": 0.01275, "grad_norm": 6.077780246734619, "learning_rate": 3.465959595959596e-06, "loss": 6.349203109741211, "step": 31375 }, { "epoch": 0.0128, "grad_norm": 6.290863037109375, "learning_rate": 3.465707070707071e-06, "loss": 6.288595581054688, "step": 31380 }, { "epoch": 0.01285, "grad_norm": 6.152403354644775, "learning_rate": 3.4654545454545457e-06, "loss": 6.274100494384766, "step": 31385 }, { "epoch": 0.0129, "grad_norm": 5.087575435638428, "learning_rate": 3.4652020202020203e-06, "loss": 6.2673286437988285, "step": 31390 }, { "epoch": 0.01295, "grad_norm": 14.480708122253418, "learning_rate": 3.4649494949494954e-06, "loss": 6.2662403106689455, "step": 31395 }, { "epoch": 0.013, "grad_norm": 7.394726753234863, "learning_rate": 3.46469696969697e-06, "loss": 6.310274887084961, "step": 31400 }, { "epoch": 0.01305, "grad_norm": 3.9056942462921143, "learning_rate": 3.4644444444444446e-06, "loss": 6.328700256347656, "step": 31405 }, { "epoch": 0.0131, "grad_norm": 6.815493106842041, "learning_rate": 3.4641919191919193e-06, "loss": 6.323628234863281, "step": 31410 }, { "epoch": 0.01315, "grad_norm": 7.505720615386963, "learning_rate": 3.4639393939393943e-06, "loss": 6.351559448242187, "step": 31415 }, { "epoch": 0.0132, "grad_norm": 6.078978061676025, "learning_rate": 3.463686868686869e-06, "loss": 6.273245239257813, "step": 31420 }, { "epoch": 0.01325, "grad_norm": 4.2614569664001465, "learning_rate": 3.4634343434343436e-06, "loss": 6.294971466064453, "step": 31425 }, { "epoch": 0.0133, "grad_norm": 8.286840438842773, "learning_rate": 3.4631818181818182e-06, "loss": 6.291056823730469, "step": 31430 }, { "epoch": 0.01335, "grad_norm": 4.454635143280029, "learning_rate": 3.4629292929292933e-06, "loss": 6.276801300048828, "step": 31435 }, { "epoch": 0.0134, "grad_norm": 8.538434982299805, "learning_rate": 3.462676767676768e-06, "loss": 6.301482009887695, "step": 31440 }, { "epoch": 0.01345, "grad_norm": 8.241325378417969, "learning_rate": 3.4624242424242425e-06, "loss": 6.329000854492188, "step": 31445 }, { "epoch": 0.0135, "grad_norm": 5.039905071258545, "learning_rate": 3.462171717171717e-06, "loss": 6.33343734741211, "step": 31450 }, { "epoch": 0.01355, "grad_norm": 4.068391799926758, "learning_rate": 3.4619191919191926e-06, "loss": 6.293236541748047, "step": 31455 }, { "epoch": 0.0136, "grad_norm": 6.268673419952393, "learning_rate": 3.4616666666666673e-06, "loss": 6.275437545776367, "step": 31460 }, { "epoch": 0.01365, "grad_norm": 4.046528339385986, "learning_rate": 3.4614141414141415e-06, "loss": 6.310839462280273, "step": 31465 }, { "epoch": 0.0137, "grad_norm": 6.196056365966797, "learning_rate": 3.461161616161616e-06, "loss": 6.296234893798828, "step": 31470 }, { "epoch": 0.01375, "grad_norm": 5.756940841674805, "learning_rate": 3.4609090909090916e-06, "loss": 6.282190322875977, "step": 31475 }, { "epoch": 0.0138, "grad_norm": 8.284753799438477, "learning_rate": 3.460656565656566e-06, "loss": 6.244682312011719, "step": 31480 }, { "epoch": 0.01385, "grad_norm": 5.020042896270752, "learning_rate": 3.460404040404041e-06, "loss": 6.305149078369141, "step": 31485 }, { "epoch": 0.0139, "grad_norm": 5.6719841957092285, "learning_rate": 3.4601515151515155e-06, "loss": 6.309772872924805, "step": 31490 }, { "epoch": 0.01395, "grad_norm": 4.409892559051514, "learning_rate": 3.4598989898989905e-06, "loss": 6.2854766845703125, "step": 31495 }, { "epoch": 0.014, "grad_norm": 5.163342475891113, "learning_rate": 3.459646464646465e-06, "loss": 6.313314819335938, "step": 31500 }, { "epoch": 0.01405, "grad_norm": 7.4333367347717285, "learning_rate": 3.4593939393939398e-06, "loss": 6.269931030273438, "step": 31505 }, { "epoch": 0.0141, "grad_norm": 7.723011493682861, "learning_rate": 3.4591414141414144e-06, "loss": 6.437898254394531, "step": 31510 }, { "epoch": 0.01415, "grad_norm": 8.310696601867676, "learning_rate": 3.4588888888888895e-06, "loss": 6.293450164794922, "step": 31515 }, { "epoch": 0.0142, "grad_norm": 3.969463348388672, "learning_rate": 3.458636363636364e-06, "loss": 6.442210388183594, "step": 31520 }, { "epoch": 0.01425, "grad_norm": 3.8181865215301514, "learning_rate": 3.4583838383838387e-06, "loss": 6.240133666992188, "step": 31525 }, { "epoch": 0.0143, "grad_norm": 8.632500648498535, "learning_rate": 3.4581313131313134e-06, "loss": 6.295115661621094, "step": 31530 }, { "epoch": 0.01435, "grad_norm": 44.44834518432617, "learning_rate": 3.4578787878787884e-06, "loss": 6.356418228149414, "step": 31535 }, { "epoch": 0.0144, "grad_norm": 7.034519195556641, "learning_rate": 3.457626262626263e-06, "loss": 6.467449951171875, "step": 31540 }, { "epoch": 0.01445, "grad_norm": 6.14238166809082, "learning_rate": 3.4573737373737377e-06, "loss": 6.291847610473633, "step": 31545 }, { "epoch": 0.0145, "grad_norm": 6.4046430587768555, "learning_rate": 3.4571212121212123e-06, "loss": 6.298342514038086, "step": 31550 }, { "epoch": 0.01455, "grad_norm": 6.635002136230469, "learning_rate": 3.4568686868686874e-06, "loss": 6.297444152832031, "step": 31555 }, { "epoch": 0.0146, "grad_norm": 7.026538372039795, "learning_rate": 3.456616161616162e-06, "loss": 6.278774261474609, "step": 31560 }, { "epoch": 0.01465, "grad_norm": 11.56414794921875, "learning_rate": 3.4563636363636366e-06, "loss": 6.291840362548828, "step": 31565 }, { "epoch": 0.0147, "grad_norm": 7.137899875640869, "learning_rate": 3.4561111111111112e-06, "loss": 6.274166870117187, "step": 31570 }, { "epoch": 0.01475, "grad_norm": 8.378945350646973, "learning_rate": 3.4558585858585863e-06, "loss": 6.307837677001953, "step": 31575 }, { "epoch": 0.0148, "grad_norm": 8.641300201416016, "learning_rate": 3.455606060606061e-06, "loss": 6.4817863464355465, "step": 31580 }, { "epoch": 0.01485, "grad_norm": 5.136144638061523, "learning_rate": 3.4553535353535356e-06, "loss": 6.303439331054688, "step": 31585 }, { "epoch": 0.0149, "grad_norm": 3.68477725982666, "learning_rate": 3.45510101010101e-06, "loss": 6.322549438476562, "step": 31590 }, { "epoch": 0.01495, "grad_norm": 5.370787620544434, "learning_rate": 3.4548484848484852e-06, "loss": 6.259751510620117, "step": 31595 }, { "epoch": 0.015, "grad_norm": 6.589470863342285, "learning_rate": 3.45459595959596e-06, "loss": 6.347718048095703, "step": 31600 }, { "epoch": 0.01505, "grad_norm": 4.90852165222168, "learning_rate": 3.4543434343434345e-06, "loss": 6.234229278564453, "step": 31605 }, { "epoch": 0.0151, "grad_norm": 7.632578372955322, "learning_rate": 3.454090909090909e-06, "loss": 6.2794342041015625, "step": 31610 }, { "epoch": 0.01515, "grad_norm": 4.145769119262695, "learning_rate": 3.453838383838384e-06, "loss": 6.28853759765625, "step": 31615 }, { "epoch": 0.0152, "grad_norm": 7.2483344078063965, "learning_rate": 3.453585858585859e-06, "loss": 6.336896133422852, "step": 31620 }, { "epoch": 0.01525, "grad_norm": 5.190921783447266, "learning_rate": 3.4533333333333334e-06, "loss": 6.316866302490235, "step": 31625 }, { "epoch": 0.0153, "grad_norm": 4.380551338195801, "learning_rate": 3.453080808080808e-06, "loss": 6.476371765136719, "step": 31630 }, { "epoch": 0.01535, "grad_norm": 4.949336051940918, "learning_rate": 3.452828282828283e-06, "loss": 6.2478271484375, "step": 31635 }, { "epoch": 0.0154, "grad_norm": 10.83963394165039, "learning_rate": 3.4525757575757578e-06, "loss": 6.3129730224609375, "step": 31640 }, { "epoch": 0.01545, "grad_norm": 4.589915752410889, "learning_rate": 3.4523232323232324e-06, "loss": 6.312158203125, "step": 31645 }, { "epoch": 0.0155, "grad_norm": 6.311068058013916, "learning_rate": 3.452070707070707e-06, "loss": 6.333624267578125, "step": 31650 }, { "epoch": 0.01555, "grad_norm": 5.2569684982299805, "learning_rate": 3.4518181818181825e-06, "loss": 6.324805450439453, "step": 31655 }, { "epoch": 0.0156, "grad_norm": 5.8145060539245605, "learning_rate": 3.4515656565656567e-06, "loss": 6.256742858886719, "step": 31660 }, { "epoch": 0.01565, "grad_norm": 4.225329875946045, "learning_rate": 3.4513131313131313e-06, "loss": 6.249169921875, "step": 31665 }, { "epoch": 0.0157, "grad_norm": 7.952047348022461, "learning_rate": 3.451060606060606e-06, "loss": 6.3016815185546875, "step": 31670 }, { "epoch": 0.01575, "grad_norm": 6.908677577972412, "learning_rate": 3.4508080808080814e-06, "loss": 6.2995361328125, "step": 31675 }, { "epoch": 0.0158, "grad_norm": 25.09419822692871, "learning_rate": 3.450555555555556e-06, "loss": 6.192238235473633, "step": 31680 }, { "epoch": 0.01585, "grad_norm": 6.209366321563721, "learning_rate": 3.4503030303030303e-06, "loss": 6.316732025146484, "step": 31685 }, { "epoch": 0.0159, "grad_norm": 3.3453104496002197, "learning_rate": 3.450050505050505e-06, "loss": 6.322166061401367, "step": 31690 }, { "epoch": 0.01595, "grad_norm": 4.147703170776367, "learning_rate": 3.4497979797979804e-06, "loss": 6.255399322509765, "step": 31695 }, { "epoch": 0.016, "grad_norm": 6.530035495758057, "learning_rate": 3.449545454545455e-06, "loss": 6.323888778686523, "step": 31700 }, { "epoch": 0.01605, "grad_norm": 7.125916957855225, "learning_rate": 3.4492929292929296e-06, "loss": 6.30679931640625, "step": 31705 }, { "epoch": 0.0161, "grad_norm": 5.580719470977783, "learning_rate": 3.4490404040404043e-06, "loss": 6.335359573364258, "step": 31710 }, { "epoch": 0.01615, "grad_norm": 5.956960201263428, "learning_rate": 3.4487878787878793e-06, "loss": 6.311019515991211, "step": 31715 }, { "epoch": 0.0162, "grad_norm": 5.927552700042725, "learning_rate": 3.448535353535354e-06, "loss": 6.298535919189453, "step": 31720 }, { "epoch": 0.01625, "grad_norm": 20.22911262512207, "learning_rate": 3.4482828282828286e-06, "loss": 6.181367492675781, "step": 31725 }, { "epoch": 0.0163, "grad_norm": 7.554849147796631, "learning_rate": 3.4480303030303032e-06, "loss": 6.315692901611328, "step": 31730 }, { "epoch": 0.01635, "grad_norm": 7.305336952209473, "learning_rate": 3.4477777777777783e-06, "loss": 6.283394241333008, "step": 31735 }, { "epoch": 0.0164, "grad_norm": 3.5878970623016357, "learning_rate": 3.447525252525253e-06, "loss": 6.2654376983642575, "step": 31740 }, { "epoch": 0.01645, "grad_norm": 4.613415718078613, "learning_rate": 3.4472727272727275e-06, "loss": 6.300001525878907, "step": 31745 }, { "epoch": 0.0165, "grad_norm": 5.8660736083984375, "learning_rate": 3.447020202020202e-06, "loss": 6.325134658813477, "step": 31750 }, { "epoch": 0.01655, "grad_norm": 6.802213191986084, "learning_rate": 3.4467676767676772e-06, "loss": 6.304117965698242, "step": 31755 }, { "epoch": 0.0166, "grad_norm": 7.30796480178833, "learning_rate": 3.446515151515152e-06, "loss": 6.256463623046875, "step": 31760 }, { "epoch": 0.01665, "grad_norm": 4.652501106262207, "learning_rate": 3.4462626262626265e-06, "loss": 6.29427490234375, "step": 31765 }, { "epoch": 0.0167, "grad_norm": 3.471275568008423, "learning_rate": 3.446010101010101e-06, "loss": 6.273506164550781, "step": 31770 }, { "epoch": 0.01675, "grad_norm": 6.855621814727783, "learning_rate": 3.445757575757576e-06, "loss": 6.310698699951172, "step": 31775 }, { "epoch": 0.0168, "grad_norm": 11.133617401123047, "learning_rate": 3.445505050505051e-06, "loss": 6.33552360534668, "step": 31780 }, { "epoch": 0.01685, "grad_norm": 6.413993835449219, "learning_rate": 3.4452525252525254e-06, "loss": 6.2734619140625, "step": 31785 }, { "epoch": 0.0169, "grad_norm": 4.237363815307617, "learning_rate": 3.445e-06, "loss": 6.3292488098144535, "step": 31790 }, { "epoch": 0.01695, "grad_norm": 6.368398666381836, "learning_rate": 3.444747474747475e-06, "loss": 6.2896991729736325, "step": 31795 }, { "epoch": 0.017, "grad_norm": 4.33073616027832, "learning_rate": 3.4444949494949497e-06, "loss": 6.345384216308593, "step": 31800 }, { "epoch": 0.01705, "grad_norm": 8.354249954223633, "learning_rate": 3.4442424242424244e-06, "loss": 6.299488830566406, "step": 31805 }, { "epoch": 0.0171, "grad_norm": 4.806987762451172, "learning_rate": 3.443989898989899e-06, "loss": 6.261016082763672, "step": 31810 }, { "epoch": 0.01715, "grad_norm": 7.7956156730651855, "learning_rate": 3.443737373737374e-06, "loss": 6.3149871826171875, "step": 31815 }, { "epoch": 0.0172, "grad_norm": 6.116947650909424, "learning_rate": 3.4434848484848487e-06, "loss": 6.267167663574218, "step": 31820 }, { "epoch": 0.01725, "grad_norm": 3.6102206707000732, "learning_rate": 3.4432323232323233e-06, "loss": 6.272686004638672, "step": 31825 }, { "epoch": 0.0173, "grad_norm": 6.606013774871826, "learning_rate": 3.4429797979797984e-06, "loss": 6.289307022094727, "step": 31830 }, { "epoch": 0.01735, "grad_norm": 8.34733772277832, "learning_rate": 3.442727272727273e-06, "loss": 6.306378936767578, "step": 31835 }, { "epoch": 0.0174, "grad_norm": 5.240185260772705, "learning_rate": 3.4424747474747476e-06, "loss": 6.2825065612792965, "step": 31840 }, { "epoch": 0.01745, "grad_norm": 5.906691074371338, "learning_rate": 3.4422222222222223e-06, "loss": 6.1539253234863285, "step": 31845 }, { "epoch": 0.0175, "grad_norm": 8.313730239868164, "learning_rate": 3.4419696969696973e-06, "loss": 6.278577423095703, "step": 31850 }, { "epoch": 0.01755, "grad_norm": 5.267515659332275, "learning_rate": 3.441717171717172e-06, "loss": 6.307084655761718, "step": 31855 }, { "epoch": 0.0176, "grad_norm": 2.9735546112060547, "learning_rate": 3.4414646464646466e-06, "loss": 6.287197875976562, "step": 31860 }, { "epoch": 0.01765, "grad_norm": 6.6072492599487305, "learning_rate": 3.441212121212121e-06, "loss": 6.294551086425781, "step": 31865 }, { "epoch": 0.0177, "grad_norm": 7.9679388999938965, "learning_rate": 3.4409595959595967e-06, "loss": 6.241648101806641, "step": 31870 }, { "epoch": 0.01775, "grad_norm": 4.801064491271973, "learning_rate": 3.4407070707070713e-06, "loss": 6.265339660644531, "step": 31875 }, { "epoch": 0.0178, "grad_norm": 7.333015441894531, "learning_rate": 3.4404545454545455e-06, "loss": 6.345217895507813, "step": 31880 }, { "epoch": 0.01785, "grad_norm": 9.557048797607422, "learning_rate": 3.44020202020202e-06, "loss": 6.3404685974121096, "step": 31885 }, { "epoch": 0.0179, "grad_norm": 4.929149150848389, "learning_rate": 3.4399494949494956e-06, "loss": 6.23961181640625, "step": 31890 }, { "epoch": 0.01795, "grad_norm": 6.641455173492432, "learning_rate": 3.4396969696969702e-06, "loss": 6.321847534179687, "step": 31895 }, { "epoch": 0.018, "grad_norm": 6.2663726806640625, "learning_rate": 3.439444444444445e-06, "loss": 6.193353271484375, "step": 31900 }, { "epoch": 0.01805, "grad_norm": 5.846175670623779, "learning_rate": 3.4391919191919195e-06, "loss": 6.2901664733886715, "step": 31905 }, { "epoch": 0.0181, "grad_norm": 3.615746021270752, "learning_rate": 3.4389393939393946e-06, "loss": 6.236089706420898, "step": 31910 }, { "epoch": 0.01815, "grad_norm": 9.009526252746582, "learning_rate": 3.438686868686869e-06, "loss": 6.287271881103516, "step": 31915 }, { "epoch": 0.0182, "grad_norm": 13.213021278381348, "learning_rate": 3.438434343434344e-06, "loss": 6.367655563354492, "step": 31920 }, { "epoch": 0.01825, "grad_norm": 4.49423885345459, "learning_rate": 3.4381818181818185e-06, "loss": 6.333085632324218, "step": 31925 }, { "epoch": 0.0183, "grad_norm": 6.173900604248047, "learning_rate": 3.4379292929292935e-06, "loss": 6.399452209472656, "step": 31930 }, { "epoch": 0.01835, "grad_norm": 7.305313587188721, "learning_rate": 3.437676767676768e-06, "loss": 6.3648323059082035, "step": 31935 }, { "epoch": 0.0184, "grad_norm": 5.277191162109375, "learning_rate": 3.4374242424242428e-06, "loss": 6.321726226806641, "step": 31940 }, { "epoch": 0.01845, "grad_norm": 3.4797632694244385, "learning_rate": 3.4371717171717174e-06, "loss": 6.211325073242188, "step": 31945 }, { "epoch": 0.0185, "grad_norm": 7.6612548828125, "learning_rate": 3.4369191919191924e-06, "loss": 6.321328735351562, "step": 31950 }, { "epoch": 0.01855, "grad_norm": 4.453453063964844, "learning_rate": 3.436666666666667e-06, "loss": 6.413594055175781, "step": 31955 }, { "epoch": 0.0186, "grad_norm": 5.733448505401611, "learning_rate": 3.4364141414141417e-06, "loss": 6.437434387207031, "step": 31960 }, { "epoch": 0.01865, "grad_norm": 8.235899925231934, "learning_rate": 3.4361616161616163e-06, "loss": 6.277687454223633, "step": 31965 }, { "epoch": 0.0187, "grad_norm": 5.644808292388916, "learning_rate": 3.4359090909090914e-06, "loss": 6.245795059204101, "step": 31970 }, { "epoch": 0.01875, "grad_norm": 18.938283920288086, "learning_rate": 3.435656565656566e-06, "loss": 6.404547119140625, "step": 31975 }, { "epoch": 0.0188, "grad_norm": 3.7965950965881348, "learning_rate": 3.4354040404040407e-06, "loss": 6.284709930419922, "step": 31980 }, { "epoch": 0.01885, "grad_norm": 7.361538410186768, "learning_rate": 3.4351515151515153e-06, "loss": 6.290018463134766, "step": 31985 }, { "epoch": 0.0189, "grad_norm": 3.8698885440826416, "learning_rate": 3.4348989898989903e-06, "loss": 6.271343994140625, "step": 31990 }, { "epoch": 0.01895, "grad_norm": 9.876664161682129, "learning_rate": 3.434646464646465e-06, "loss": 6.3014263153076175, "step": 31995 }, { "epoch": 0.019, "grad_norm": 45.41852951049805, "learning_rate": 3.4343939393939396e-06, "loss": 6.447395324707031, "step": 32000 }, { "epoch": 0.01905, "grad_norm": 4.826485633850098, "learning_rate": 3.4341414141414142e-06, "loss": 6.337058639526367, "step": 32005 }, { "epoch": 0.0191, "grad_norm": 6.922322750091553, "learning_rate": 3.4338888888888893e-06, "loss": 6.328204345703125, "step": 32010 }, { "epoch": 0.01915, "grad_norm": 3.874532461166382, "learning_rate": 3.433636363636364e-06, "loss": 6.264595794677734, "step": 32015 }, { "epoch": 0.0192, "grad_norm": 8.299689292907715, "learning_rate": 3.4333838383838385e-06, "loss": 6.642129516601562, "step": 32020 }, { "epoch": 0.01925, "grad_norm": 5.006965160369873, "learning_rate": 3.433131313131313e-06, "loss": 6.285784912109375, "step": 32025 }, { "epoch": 0.0193, "grad_norm": 6.392104148864746, "learning_rate": 3.4328787878787882e-06, "loss": 6.308644485473633, "step": 32030 }, { "epoch": 0.01935, "grad_norm": 6.673003673553467, "learning_rate": 3.432626262626263e-06, "loss": 6.292664337158203, "step": 32035 }, { "epoch": 0.0194, "grad_norm": 3.572056293487549, "learning_rate": 3.4323737373737375e-06, "loss": 6.306335830688477, "step": 32040 }, { "epoch": 0.01945, "grad_norm": 3.6506781578063965, "learning_rate": 3.432121212121212e-06, "loss": 6.30716552734375, "step": 32045 }, { "epoch": 0.0195, "grad_norm": 5.960323333740234, "learning_rate": 3.431868686868687e-06, "loss": 6.268722152709961, "step": 32050 }, { "epoch": 0.01955, "grad_norm": 3.9909090995788574, "learning_rate": 3.431616161616162e-06, "loss": 6.366038513183594, "step": 32055 }, { "epoch": 0.0196, "grad_norm": 8.183165550231934, "learning_rate": 3.4313636363636364e-06, "loss": 6.3193004608154295, "step": 32060 }, { "epoch": 0.01965, "grad_norm": 18.577024459838867, "learning_rate": 3.431111111111111e-06, "loss": 6.529879760742188, "step": 32065 }, { "epoch": 0.0197, "grad_norm": 4.920264720916748, "learning_rate": 3.4308585858585865e-06, "loss": 6.278936767578125, "step": 32070 }, { "epoch": 0.01975, "grad_norm": 6.263941764831543, "learning_rate": 3.4306060606060607e-06, "loss": 6.25976333618164, "step": 32075 }, { "epoch": 0.0198, "grad_norm": 7.049136161804199, "learning_rate": 3.4303535353535354e-06, "loss": 6.231179046630859, "step": 32080 }, { "epoch": 0.01985, "grad_norm": 3.2108652591705322, "learning_rate": 3.43010101010101e-06, "loss": 6.464751434326172, "step": 32085 }, { "epoch": 0.0199, "grad_norm": 6.771056652069092, "learning_rate": 3.4298484848484855e-06, "loss": 6.27479248046875, "step": 32090 }, { "epoch": 0.01995, "grad_norm": 6.785924911499023, "learning_rate": 3.42959595959596e-06, "loss": 6.324093627929687, "step": 32095 }, { "epoch": 0.02, "grad_norm": 7.140957832336426, "learning_rate": 3.4293434343434347e-06, "loss": 6.310171890258789, "step": 32100 }, { "epoch": 0.02005, "grad_norm": 7.84042501449585, "learning_rate": 3.429090909090909e-06, "loss": 6.315644073486328, "step": 32105 }, { "epoch": 0.0201, "grad_norm": 7.905246734619141, "learning_rate": 3.4288383838383844e-06, "loss": 6.309546279907226, "step": 32110 }, { "epoch": 0.02015, "grad_norm": 4.346594333648682, "learning_rate": 3.428585858585859e-06, "loss": 6.28812255859375, "step": 32115 }, { "epoch": 0.0202, "grad_norm": 3.4818339347839355, "learning_rate": 3.4283333333333337e-06, "loss": 6.2875019073486325, "step": 32120 }, { "epoch": 0.02025, "grad_norm": 18.192026138305664, "learning_rate": 3.4280808080808083e-06, "loss": 6.266826248168945, "step": 32125 }, { "epoch": 0.0203, "grad_norm": 5.623541831970215, "learning_rate": 3.4278282828282834e-06, "loss": 6.35534896850586, "step": 32130 }, { "epoch": 0.02035, "grad_norm": 7.073942184448242, "learning_rate": 3.427575757575758e-06, "loss": 6.265307235717773, "step": 32135 }, { "epoch": 0.0204, "grad_norm": 3.950676679611206, "learning_rate": 3.4273232323232326e-06, "loss": 6.3546699523925785, "step": 32140 }, { "epoch": 0.02045, "grad_norm": 6.548425674438477, "learning_rate": 3.4270707070707073e-06, "loss": 6.272074890136719, "step": 32145 }, { "epoch": 0.0205, "grad_norm": 5.880001544952393, "learning_rate": 3.4268181818181823e-06, "loss": 6.259016418457032, "step": 32150 }, { "epoch": 0.02055, "grad_norm": 4.494113445281982, "learning_rate": 3.426565656565657e-06, "loss": 6.295233917236328, "step": 32155 }, { "epoch": 0.0206, "grad_norm": 8.922174453735352, "learning_rate": 3.4263131313131316e-06, "loss": 6.348507690429687, "step": 32160 }, { "epoch": 0.02065, "grad_norm": 6.134346008300781, "learning_rate": 3.426060606060606e-06, "loss": 6.379944610595703, "step": 32165 }, { "epoch": 0.0207, "grad_norm": 6.486685752868652, "learning_rate": 3.4258080808080813e-06, "loss": 6.32507438659668, "step": 32170 }, { "epoch": 0.02075, "grad_norm": 4.252213954925537, "learning_rate": 3.425555555555556e-06, "loss": 6.3460956573486325, "step": 32175 }, { "epoch": 0.0208, "grad_norm": 6.346280097961426, "learning_rate": 3.4253030303030305e-06, "loss": 6.349752426147461, "step": 32180 }, { "epoch": 0.02085, "grad_norm": 7.851796627044678, "learning_rate": 3.425050505050505e-06, "loss": 6.279642486572266, "step": 32185 }, { "epoch": 0.0209, "grad_norm": 7.163774013519287, "learning_rate": 3.42479797979798e-06, "loss": 6.236224365234375, "step": 32190 }, { "epoch": 0.02095, "grad_norm": 6.286948204040527, "learning_rate": 3.424545454545455e-06, "loss": 6.2620399475097654, "step": 32195 }, { "epoch": 0.021, "grad_norm": 7.335815906524658, "learning_rate": 3.4242929292929295e-06, "loss": 6.318128204345703, "step": 32200 }, { "epoch": 0.02105, "grad_norm": 7.27271032333374, "learning_rate": 3.424040404040404e-06, "loss": 6.433381652832031, "step": 32205 }, { "epoch": 0.0211, "grad_norm": 6.081254959106445, "learning_rate": 3.423787878787879e-06, "loss": 6.279003524780274, "step": 32210 }, { "epoch": 0.02115, "grad_norm": 4.697077751159668, "learning_rate": 3.4235353535353538e-06, "loss": 6.268719863891602, "step": 32215 }, { "epoch": 0.0212, "grad_norm": 5.818203926086426, "learning_rate": 3.4232828282828284e-06, "loss": 6.2838695526123045, "step": 32220 }, { "epoch": 0.02125, "grad_norm": 5.986729145050049, "learning_rate": 3.423030303030303e-06, "loss": 6.312086868286133, "step": 32225 }, { "epoch": 0.0213, "grad_norm": 7.142797470092773, "learning_rate": 3.422777777777778e-06, "loss": 6.284982681274414, "step": 32230 }, { "epoch": 0.02135, "grad_norm": 3.1200385093688965, "learning_rate": 3.4225252525252527e-06, "loss": 6.287965393066406, "step": 32235 }, { "epoch": 0.0214, "grad_norm": 6.946979522705078, "learning_rate": 3.4222727272727273e-06, "loss": 6.337002563476562, "step": 32240 }, { "epoch": 0.02145, "grad_norm": 7.877346992492676, "learning_rate": 3.4220202020202024e-06, "loss": 6.307799530029297, "step": 32245 }, { "epoch": 0.0215, "grad_norm": 7.355072975158691, "learning_rate": 3.421767676767677e-06, "loss": 6.289122772216797, "step": 32250 }, { "epoch": 0.02155, "grad_norm": 4.829036235809326, "learning_rate": 3.4215151515151517e-06, "loss": 6.259711456298828, "step": 32255 }, { "epoch": 0.0216, "grad_norm": 9.047592163085938, "learning_rate": 3.4212626262626263e-06, "loss": 6.27264518737793, "step": 32260 }, { "epoch": 0.02165, "grad_norm": 5.92063570022583, "learning_rate": 3.4210101010101018e-06, "loss": 6.3243560791015625, "step": 32265 }, { "epoch": 0.0217, "grad_norm": 12.830426216125488, "learning_rate": 3.420757575757576e-06, "loss": 6.211675643920898, "step": 32270 }, { "epoch": 0.02175, "grad_norm": 7.308344841003418, "learning_rate": 3.4205050505050506e-06, "loss": 6.222997665405273, "step": 32275 }, { "epoch": 0.0218, "grad_norm": 4.568763732910156, "learning_rate": 3.4202525252525252e-06, "loss": 6.275173568725586, "step": 32280 }, { "epoch": 0.02185, "grad_norm": 5.150790214538574, "learning_rate": 3.4200000000000007e-06, "loss": 6.298620986938476, "step": 32285 }, { "epoch": 0.0219, "grad_norm": 5.813236713409424, "learning_rate": 3.4197474747474753e-06, "loss": 6.288979339599609, "step": 32290 }, { "epoch": 0.02195, "grad_norm": 5.45294189453125, "learning_rate": 3.4194949494949496e-06, "loss": 6.309714508056641, "step": 32295 }, { "epoch": 0.022, "grad_norm": 7.023442268371582, "learning_rate": 3.419242424242424e-06, "loss": 6.223733901977539, "step": 32300 }, { "epoch": 0.02205, "grad_norm": 4.545304298400879, "learning_rate": 3.4189898989898997e-06, "loss": 6.272264862060547, "step": 32305 }, { "epoch": 0.0221, "grad_norm": 4.7777299880981445, "learning_rate": 3.4187373737373743e-06, "loss": 6.2722320556640625, "step": 32310 }, { "epoch": 0.02215, "grad_norm": 5.0901899337768555, "learning_rate": 3.418484848484849e-06, "loss": 6.29754867553711, "step": 32315 }, { "epoch": 0.0222, "grad_norm": 3.8483922481536865, "learning_rate": 3.4182323232323235e-06, "loss": 6.279553985595703, "step": 32320 }, { "epoch": 0.02225, "grad_norm": 6.103102684020996, "learning_rate": 3.4179797979797986e-06, "loss": 6.287636566162109, "step": 32325 }, { "epoch": 0.0223, "grad_norm": 4.236302852630615, "learning_rate": 3.4177272727272732e-06, "loss": 6.2598003387451175, "step": 32330 }, { "epoch": 0.02235, "grad_norm": 5.115714073181152, "learning_rate": 3.417474747474748e-06, "loss": 6.287063217163086, "step": 32335 }, { "epoch": 0.0224, "grad_norm": 9.660810470581055, "learning_rate": 3.4172222222222225e-06, "loss": 6.373627471923828, "step": 32340 }, { "epoch": 0.02245, "grad_norm": 5.38846492767334, "learning_rate": 3.4169696969696975e-06, "loss": 6.2686012268066404, "step": 32345 }, { "epoch": 0.0225, "grad_norm": 4.665284633636475, "learning_rate": 3.416717171717172e-06, "loss": 6.2926891326904295, "step": 32350 }, { "epoch": 0.02255, "grad_norm": 6.9151434898376465, "learning_rate": 3.416464646464647e-06, "loss": 6.2726295471191404, "step": 32355 }, { "epoch": 0.0226, "grad_norm": 6.4344096183776855, "learning_rate": 3.4162121212121214e-06, "loss": 6.248241424560547, "step": 32360 }, { "epoch": 0.02265, "grad_norm": 5.950206279754639, "learning_rate": 3.4159595959595965e-06, "loss": 6.306484985351562, "step": 32365 }, { "epoch": 0.0227, "grad_norm": 4.17216157913208, "learning_rate": 3.415707070707071e-06, "loss": 6.2299762725830075, "step": 32370 }, { "epoch": 0.02275, "grad_norm": 6.776298999786377, "learning_rate": 3.4154545454545457e-06, "loss": 6.2634422302246096, "step": 32375 }, { "epoch": 0.0228, "grad_norm": 5.850812911987305, "learning_rate": 3.4152020202020204e-06, "loss": 6.330546951293945, "step": 32380 }, { "epoch": 0.02285, "grad_norm": 5.061315536499023, "learning_rate": 3.4149494949494954e-06, "loss": 6.285494995117188, "step": 32385 }, { "epoch": 0.0229, "grad_norm": 5.327838897705078, "learning_rate": 3.41469696969697e-06, "loss": 6.318167114257813, "step": 32390 }, { "epoch": 0.02295, "grad_norm": 4.172123432159424, "learning_rate": 3.4144444444444447e-06, "loss": 6.378875732421875, "step": 32395 }, { "epoch": 0.023, "grad_norm": 5.2284955978393555, "learning_rate": 3.4141919191919193e-06, "loss": 6.295571899414062, "step": 32400 }, { "epoch": 0.02305, "grad_norm": 23.464540481567383, "learning_rate": 3.4139393939393944e-06, "loss": 6.1604766845703125, "step": 32405 }, { "epoch": 0.0231, "grad_norm": 8.758240699768066, "learning_rate": 3.413686868686869e-06, "loss": 6.166027069091797, "step": 32410 }, { "epoch": 0.02315, "grad_norm": 5.50541353225708, "learning_rate": 3.4134343434343436e-06, "loss": 6.2739105224609375, "step": 32415 }, { "epoch": 0.0232, "grad_norm": 5.875354290008545, "learning_rate": 3.4131818181818183e-06, "loss": 6.27103271484375, "step": 32420 }, { "epoch": 0.02325, "grad_norm": 6.040763854980469, "learning_rate": 3.4129292929292933e-06, "loss": 6.319233703613281, "step": 32425 }, { "epoch": 0.0233, "grad_norm": 4.986361026763916, "learning_rate": 3.412676767676768e-06, "loss": 6.260108947753906, "step": 32430 }, { "epoch": 0.02335, "grad_norm": 4.917168140411377, "learning_rate": 3.4124242424242426e-06, "loss": 6.281546020507813, "step": 32435 }, { "epoch": 0.0234, "grad_norm": 7.084517002105713, "learning_rate": 3.412171717171717e-06, "loss": 6.263153076171875, "step": 32440 }, { "epoch": 0.02345, "grad_norm": 5.084254741668701, "learning_rate": 3.4119191919191923e-06, "loss": 6.331203460693359, "step": 32445 }, { "epoch": 0.0235, "grad_norm": 6.203841209411621, "learning_rate": 3.411666666666667e-06, "loss": 6.31361198425293, "step": 32450 }, { "epoch": 0.02355, "grad_norm": 6.095081329345703, "learning_rate": 3.4114141414141415e-06, "loss": 6.307169342041016, "step": 32455 }, { "epoch": 0.0236, "grad_norm": 10.236827850341797, "learning_rate": 3.411161616161616e-06, "loss": 6.326509094238281, "step": 32460 }, { "epoch": 0.02365, "grad_norm": 5.875765323638916, "learning_rate": 3.410909090909091e-06, "loss": 6.376076126098633, "step": 32465 }, { "epoch": 0.0237, "grad_norm": 4.158951759338379, "learning_rate": 3.410656565656566e-06, "loss": 6.266670227050781, "step": 32470 }, { "epoch": 0.02375, "grad_norm": 8.984514236450195, "learning_rate": 3.4104040404040405e-06, "loss": 6.1888267517089846, "step": 32475 }, { "epoch": 0.0238, "grad_norm": 10.406311988830566, "learning_rate": 3.410151515151515e-06, "loss": 6.295879364013672, "step": 32480 }, { "epoch": 0.02385, "grad_norm": 9.178069114685059, "learning_rate": 3.4098989898989906e-06, "loss": 6.320977020263672, "step": 32485 }, { "epoch": 0.0239, "grad_norm": 8.282934188842773, "learning_rate": 3.4096464646464648e-06, "loss": 6.271314239501953, "step": 32490 }, { "epoch": 0.02395, "grad_norm": 10.929844856262207, "learning_rate": 3.4093939393939394e-06, "loss": 6.305949020385742, "step": 32495 }, { "epoch": 0.024, "grad_norm": 5.974272727966309, "learning_rate": 3.409141414141414e-06, "loss": 6.284951782226562, "step": 32500 }, { "epoch": 0.02405, "grad_norm": 9.90165901184082, "learning_rate": 3.4088888888888895e-06, "loss": 6.311413955688477, "step": 32505 }, { "epoch": 0.0241, "grad_norm": 7.626299858093262, "learning_rate": 3.408636363636364e-06, "loss": 6.3053436279296875, "step": 32510 }, { "epoch": 0.02415, "grad_norm": 8.374993324279785, "learning_rate": 3.4083838383838388e-06, "loss": 6.2742919921875, "step": 32515 }, { "epoch": 0.0242, "grad_norm": 5.127213001251221, "learning_rate": 3.408131313131313e-06, "loss": 6.250851440429687, "step": 32520 }, { "epoch": 0.02425, "grad_norm": 61.41652297973633, "learning_rate": 3.4078787878787885e-06, "loss": 6.212837219238281, "step": 32525 }, { "epoch": 0.0243, "grad_norm": 8.33638858795166, "learning_rate": 3.407626262626263e-06, "loss": 6.191169738769531, "step": 32530 }, { "epoch": 0.02435, "grad_norm": 6.257434368133545, "learning_rate": 3.4073737373737377e-06, "loss": 6.3098194122314455, "step": 32535 }, { "epoch": 0.0244, "grad_norm": 6.95494270324707, "learning_rate": 3.4071212121212124e-06, "loss": 6.274754333496094, "step": 32540 }, { "epoch": 0.02445, "grad_norm": 7.08582067489624, "learning_rate": 3.4068686868686874e-06, "loss": 6.273867797851563, "step": 32545 }, { "epoch": 0.0245, "grad_norm": 11.90192985534668, "learning_rate": 3.406616161616162e-06, "loss": 6.259604644775391, "step": 32550 }, { "epoch": 0.02455, "grad_norm": 12.465909957885742, "learning_rate": 3.4063636363636367e-06, "loss": 6.344020080566406, "step": 32555 }, { "epoch": 0.0246, "grad_norm": 8.09375286102295, "learning_rate": 3.4061111111111113e-06, "loss": 6.271186828613281, "step": 32560 }, { "epoch": 0.02465, "grad_norm": 5.28250789642334, "learning_rate": 3.4058585858585864e-06, "loss": 6.281183242797852, "step": 32565 }, { "epoch": 0.0247, "grad_norm": 4.477888584136963, "learning_rate": 3.405606060606061e-06, "loss": 6.377745056152344, "step": 32570 }, { "epoch": 0.02475, "grad_norm": 7.783365249633789, "learning_rate": 3.4053535353535356e-06, "loss": 6.295442962646485, "step": 32575 }, { "epoch": 0.0248, "grad_norm": 7.352127552032471, "learning_rate": 3.4051010101010102e-06, "loss": 6.511199188232422, "step": 32580 }, { "epoch": 0.02485, "grad_norm": 20.00706672668457, "learning_rate": 3.4048484848484853e-06, "loss": 6.419554138183594, "step": 32585 }, { "epoch": 0.0249, "grad_norm": 5.256389141082764, "learning_rate": 3.40459595959596e-06, "loss": 6.339096450805664, "step": 32590 }, { "epoch": 0.02495, "grad_norm": 6.563801288604736, "learning_rate": 3.4043434343434346e-06, "loss": 6.28271369934082, "step": 32595 }, { "epoch": 0.025, "grad_norm": 6.374956130981445, "learning_rate": 3.404090909090909e-06, "loss": 6.272246551513672, "step": 32600 }, { "epoch": 0.02505, "grad_norm": 5.4979023933410645, "learning_rate": 3.4038383838383842e-06, "loss": 6.249833297729492, "step": 32605 }, { "epoch": 0.0251, "grad_norm": 6.788812637329102, "learning_rate": 3.403585858585859e-06, "loss": 6.283016586303711, "step": 32610 }, { "epoch": 0.02515, "grad_norm": 17.309892654418945, "learning_rate": 3.4033333333333335e-06, "loss": 6.304659271240235, "step": 32615 }, { "epoch": 0.0252, "grad_norm": 4.285764694213867, "learning_rate": 3.403080808080808e-06, "loss": 6.161538696289062, "step": 32620 }, { "epoch": 0.02525, "grad_norm": 5.18768310546875, "learning_rate": 3.402828282828283e-06, "loss": 6.297689056396484, "step": 32625 }, { "epoch": 0.0253, "grad_norm": 3.6121935844421387, "learning_rate": 3.402575757575758e-06, "loss": 6.296743774414063, "step": 32630 }, { "epoch": 0.02535, "grad_norm": 7.011654376983643, "learning_rate": 3.4023232323232324e-06, "loss": 6.253909301757813, "step": 32635 }, { "epoch": 0.0254, "grad_norm": 5.257863998413086, "learning_rate": 3.402070707070707e-06, "loss": 6.244644546508789, "step": 32640 }, { "epoch": 0.02545, "grad_norm": 5.4188971519470215, "learning_rate": 3.401818181818182e-06, "loss": 6.270030975341797, "step": 32645 }, { "epoch": 0.0255, "grad_norm": 6.767153739929199, "learning_rate": 3.4015656565656568e-06, "loss": 6.3018543243408205, "step": 32650 }, { "epoch": 0.02555, "grad_norm": 4.691617012023926, "learning_rate": 3.4013131313131314e-06, "loss": 6.338926315307617, "step": 32655 }, { "epoch": 0.0256, "grad_norm": 5.44677734375, "learning_rate": 3.401060606060606e-06, "loss": 6.251610565185547, "step": 32660 }, { "epoch": 0.02565, "grad_norm": 9.626145362854004, "learning_rate": 3.400808080808081e-06, "loss": 6.3152210235595705, "step": 32665 }, { "epoch": 0.0257, "grad_norm": 4.189969062805176, "learning_rate": 3.4005555555555557e-06, "loss": 6.338197708129883, "step": 32670 }, { "epoch": 0.02575, "grad_norm": 6.453485012054443, "learning_rate": 3.4003030303030303e-06, "loss": 6.316188812255859, "step": 32675 }, { "epoch": 0.0258, "grad_norm": 6.72882080078125, "learning_rate": 3.400050505050506e-06, "loss": 6.253453063964844, "step": 32680 }, { "epoch": 0.02585, "grad_norm": 4.00105619430542, "learning_rate": 3.39979797979798e-06, "loss": 6.289204025268555, "step": 32685 }, { "epoch": 0.0259, "grad_norm": 4.617617130279541, "learning_rate": 3.3995454545454546e-06, "loss": 6.297190475463867, "step": 32690 }, { "epoch": 0.02595, "grad_norm": 8.77125072479248, "learning_rate": 3.3992929292929293e-06, "loss": 6.331533432006836, "step": 32695 }, { "epoch": 0.026, "grad_norm": 5.983002662658691, "learning_rate": 3.3990404040404048e-06, "loss": 6.25775260925293, "step": 32700 }, { "epoch": 0.02605, "grad_norm": 4.518277645111084, "learning_rate": 3.3987878787878794e-06, "loss": 6.334423828125, "step": 32705 }, { "epoch": 0.0261, "grad_norm": 8.751855850219727, "learning_rate": 3.3985353535353536e-06, "loss": 6.2957813262939455, "step": 32710 }, { "epoch": 0.02615, "grad_norm": 5.162111759185791, "learning_rate": 3.3982828282828282e-06, "loss": 6.294309616088867, "step": 32715 }, { "epoch": 0.0262, "grad_norm": 7.6749796867370605, "learning_rate": 3.3980303030303037e-06, "loss": 6.291846466064453, "step": 32720 }, { "epoch": 0.02625, "grad_norm": 6.8210673332214355, "learning_rate": 3.3977777777777783e-06, "loss": 6.319868469238282, "step": 32725 }, { "epoch": 0.0263, "grad_norm": 24.7596378326416, "learning_rate": 3.397525252525253e-06, "loss": 6.413050842285156, "step": 32730 }, { "epoch": 0.02635, "grad_norm": 11.842512130737305, "learning_rate": 3.3972727272727276e-06, "loss": 6.2862812042236325, "step": 32735 }, { "epoch": 0.0264, "grad_norm": 6.6752519607543945, "learning_rate": 3.3970202020202026e-06, "loss": 6.295029067993164, "step": 32740 }, { "epoch": 0.02645, "grad_norm": 6.844410419464111, "learning_rate": 3.3967676767676773e-06, "loss": 6.252899551391602, "step": 32745 }, { "epoch": 0.0265, "grad_norm": 9.802356719970703, "learning_rate": 3.396515151515152e-06, "loss": 6.321332550048828, "step": 32750 }, { "epoch": 0.02655, "grad_norm": 5.263422012329102, "learning_rate": 3.3962626262626265e-06, "loss": 6.256106567382813, "step": 32755 }, { "epoch": 0.0266, "grad_norm": 6.753117561340332, "learning_rate": 3.3960101010101016e-06, "loss": 6.309476852416992, "step": 32760 }, { "epoch": 0.02665, "grad_norm": 6.541280746459961, "learning_rate": 3.3957575757575762e-06, "loss": 6.295966720581054, "step": 32765 }, { "epoch": 0.0267, "grad_norm": 5.8446760177612305, "learning_rate": 3.395505050505051e-06, "loss": 6.3157196044921875, "step": 32770 }, { "epoch": 0.02675, "grad_norm": 4.168968200683594, "learning_rate": 3.3952525252525255e-06, "loss": 6.317673873901367, "step": 32775 }, { "epoch": 0.0268, "grad_norm": 4.293978691101074, "learning_rate": 3.3950000000000005e-06, "loss": 6.305788803100586, "step": 32780 }, { "epoch": 0.02685, "grad_norm": 6.9493489265441895, "learning_rate": 3.394747474747475e-06, "loss": 6.281805801391601, "step": 32785 }, { "epoch": 0.0269, "grad_norm": 4.490607738494873, "learning_rate": 3.39449494949495e-06, "loss": 6.2968708038330075, "step": 32790 }, { "epoch": 0.02695, "grad_norm": 7.814769268035889, "learning_rate": 3.3942424242424244e-06, "loss": 6.269432067871094, "step": 32795 }, { "epoch": 0.027, "grad_norm": 4.410820960998535, "learning_rate": 3.3939898989898995e-06, "loss": 6.305855560302734, "step": 32800 }, { "epoch": 0.02705, "grad_norm": 18.658266067504883, "learning_rate": 3.393737373737374e-06, "loss": 6.227949142456055, "step": 32805 }, { "epoch": 0.0271, "grad_norm": 9.212102890014648, "learning_rate": 3.3934848484848487e-06, "loss": 6.259611511230469, "step": 32810 }, { "epoch": 0.02715, "grad_norm": 12.332964897155762, "learning_rate": 3.3932323232323234e-06, "loss": 6.344914245605469, "step": 32815 }, { "epoch": 0.0272, "grad_norm": 17.153284072875977, "learning_rate": 3.3929797979797984e-06, "loss": 6.415339660644531, "step": 32820 }, { "epoch": 0.02725, "grad_norm": 4.676977634429932, "learning_rate": 3.392727272727273e-06, "loss": 6.260399627685547, "step": 32825 }, { "epoch": 0.0273, "grad_norm": 8.144231796264648, "learning_rate": 3.3924747474747477e-06, "loss": 6.2758949279785154, "step": 32830 }, { "epoch": 0.02735, "grad_norm": 4.575031280517578, "learning_rate": 3.3922222222222223e-06, "loss": 6.340414047241211, "step": 32835 }, { "epoch": 0.0274, "grad_norm": 5.878757476806641, "learning_rate": 3.3919696969696974e-06, "loss": 6.286156463623047, "step": 32840 }, { "epoch": 0.02745, "grad_norm": 3.6416032314300537, "learning_rate": 3.391717171717172e-06, "loss": 6.321328353881836, "step": 32845 }, { "epoch": 0.0275, "grad_norm": 6.340916633605957, "learning_rate": 3.3914646464646466e-06, "loss": 6.393750762939453, "step": 32850 }, { "epoch": 0.02755, "grad_norm": 6.809146881103516, "learning_rate": 3.3912121212121213e-06, "loss": 6.269526672363281, "step": 32855 }, { "epoch": 0.0276, "grad_norm": 3.288670063018799, "learning_rate": 3.3909595959595963e-06, "loss": 6.281950378417969, "step": 32860 }, { "epoch": 0.02765, "grad_norm": 5.109802722930908, "learning_rate": 3.390707070707071e-06, "loss": 6.262111663818359, "step": 32865 }, { "epoch": 0.0277, "grad_norm": 3.799258232116699, "learning_rate": 3.3904545454545456e-06, "loss": 6.2551628112792965, "step": 32870 }, { "epoch": 0.02775, "grad_norm": 4.301450252532959, "learning_rate": 3.39020202020202e-06, "loss": 6.275251388549805, "step": 32875 }, { "epoch": 0.0278, "grad_norm": 4.405968189239502, "learning_rate": 3.3899494949494952e-06, "loss": 6.273078536987304, "step": 32880 }, { "epoch": 0.02785, "grad_norm": 5.581389904022217, "learning_rate": 3.38969696969697e-06, "loss": 6.2833808898925785, "step": 32885 }, { "epoch": 0.0279, "grad_norm": 3.189084529876709, "learning_rate": 3.3894444444444445e-06, "loss": 6.270728302001953, "step": 32890 }, { "epoch": 0.02795, "grad_norm": 6.098864555358887, "learning_rate": 3.389191919191919e-06, "loss": 6.2906238555908205, "step": 32895 }, { "epoch": 0.028, "grad_norm": 6.566707611083984, "learning_rate": 3.3889393939393946e-06, "loss": 6.261868286132812, "step": 32900 }, { "epoch": 0.02805, "grad_norm": 7.309471130371094, "learning_rate": 3.388686868686869e-06, "loss": 6.394461059570313, "step": 32905 }, { "epoch": 0.0281, "grad_norm": 10.70196533203125, "learning_rate": 3.3884343434343435e-06, "loss": 6.247087478637695, "step": 32910 }, { "epoch": 0.02815, "grad_norm": 7.136679172515869, "learning_rate": 3.388181818181818e-06, "loss": 6.233831405639648, "step": 32915 }, { "epoch": 0.0282, "grad_norm": 7.76154899597168, "learning_rate": 3.3879292929292936e-06, "loss": 6.274042129516602, "step": 32920 }, { "epoch": 0.02825, "grad_norm": 7.1754961013793945, "learning_rate": 3.387676767676768e-06, "loss": 6.274201965332031, "step": 32925 }, { "epoch": 0.0283, "grad_norm": 5.778814315795898, "learning_rate": 3.387424242424243e-06, "loss": 6.587728881835938, "step": 32930 }, { "epoch": 0.02835, "grad_norm": 7.604826927185059, "learning_rate": 3.387171717171717e-06, "loss": 6.361950302124024, "step": 32935 }, { "epoch": 0.0284, "grad_norm": 7.614299774169922, "learning_rate": 3.3869191919191925e-06, "loss": 6.276834106445312, "step": 32940 }, { "epoch": 0.02845, "grad_norm": 6.805838108062744, "learning_rate": 3.386666666666667e-06, "loss": 6.295297622680664, "step": 32945 }, { "epoch": 0.0285, "grad_norm": 9.325764656066895, "learning_rate": 3.3864141414141418e-06, "loss": 6.240407943725586, "step": 32950 }, { "epoch": 0.02855, "grad_norm": 11.881196975708008, "learning_rate": 3.3861616161616164e-06, "loss": 6.2974365234375, "step": 32955 }, { "epoch": 0.0286, "grad_norm": 4.622828006744385, "learning_rate": 3.3859090909090914e-06, "loss": 6.282869338989258, "step": 32960 }, { "epoch": 0.02865, "grad_norm": 4.647458076477051, "learning_rate": 3.385656565656566e-06, "loss": 6.294223785400391, "step": 32965 }, { "epoch": 0.0287, "grad_norm": 4.806219100952148, "learning_rate": 3.3854040404040407e-06, "loss": 6.294277191162109, "step": 32970 }, { "epoch": 0.02875, "grad_norm": 6.078112602233887, "learning_rate": 3.3851515151515153e-06, "loss": 6.300291061401367, "step": 32975 }, { "epoch": 0.0288, "grad_norm": 8.323182106018066, "learning_rate": 3.3848989898989904e-06, "loss": 6.285092163085937, "step": 32980 }, { "epoch": 0.02885, "grad_norm": 9.339707374572754, "learning_rate": 3.384646464646465e-06, "loss": 6.270893859863281, "step": 32985 }, { "epoch": 0.0289, "grad_norm": 7.797632217407227, "learning_rate": 3.3843939393939397e-06, "loss": 6.291423797607422, "step": 32990 }, { "epoch": 0.02895, "grad_norm": 4.118420124053955, "learning_rate": 3.3841414141414143e-06, "loss": 6.350740432739258, "step": 32995 }, { "epoch": 0.029, "grad_norm": 6.957522392272949, "learning_rate": 3.3838888888888893e-06, "loss": 6.265525054931641, "step": 33000 }, { "epoch": 0.02905, "grad_norm": 7.795707702636719, "learning_rate": 3.383636363636364e-06, "loss": 6.283013153076172, "step": 33005 }, { "epoch": 0.0291, "grad_norm": 7.959973335266113, "learning_rate": 3.3833838383838386e-06, "loss": 6.2590888977050785, "step": 33010 }, { "epoch": 0.02915, "grad_norm": 8.439517974853516, "learning_rate": 3.3831313131313132e-06, "loss": 6.277886581420899, "step": 33015 }, { "epoch": 0.0292, "grad_norm": 7.143141269683838, "learning_rate": 3.3828787878787883e-06, "loss": 6.3066963195800785, "step": 33020 }, { "epoch": 0.02925, "grad_norm": 7.266543865203857, "learning_rate": 3.382626262626263e-06, "loss": 6.432810211181641, "step": 33025 }, { "epoch": 0.0293, "grad_norm": 4.507126331329346, "learning_rate": 3.3823737373737375e-06, "loss": 6.320489501953125, "step": 33030 }, { "epoch": 0.02935, "grad_norm": 6.134202480316162, "learning_rate": 3.382121212121212e-06, "loss": 6.484198760986328, "step": 33035 }, { "epoch": 0.0294, "grad_norm": 4.969404220581055, "learning_rate": 3.3818686868686872e-06, "loss": 6.308735275268555, "step": 33040 }, { "epoch": 0.02945, "grad_norm": 5.8853631019592285, "learning_rate": 3.381616161616162e-06, "loss": 6.543714904785157, "step": 33045 }, { "epoch": 0.0295, "grad_norm": 9.244221687316895, "learning_rate": 3.3813636363636365e-06, "loss": 6.256529235839844, "step": 33050 }, { "epoch": 0.02955, "grad_norm": 8.25161361694336, "learning_rate": 3.381111111111111e-06, "loss": 6.332659912109375, "step": 33055 }, { "epoch": 0.0296, "grad_norm": 6.023909091949463, "learning_rate": 3.380858585858586e-06, "loss": 6.3304191589355465, "step": 33060 }, { "epoch": 0.02965, "grad_norm": 4.226219177246094, "learning_rate": 3.380606060606061e-06, "loss": 6.324036407470703, "step": 33065 }, { "epoch": 0.0297, "grad_norm": 6.545253753662109, "learning_rate": 3.3803535353535354e-06, "loss": 6.297446823120117, "step": 33070 }, { "epoch": 0.02975, "grad_norm": 5.140801429748535, "learning_rate": 3.38010101010101e-06, "loss": 6.258947372436523, "step": 33075 }, { "epoch": 0.0298, "grad_norm": 6.786447048187256, "learning_rate": 3.379848484848485e-06, "loss": 6.272116088867188, "step": 33080 }, { "epoch": 0.02985, "grad_norm": 11.403496742248535, "learning_rate": 3.3795959595959597e-06, "loss": 6.29498519897461, "step": 33085 }, { "epoch": 0.0299, "grad_norm": 5.828900337219238, "learning_rate": 3.3793434343434344e-06, "loss": 6.265420150756836, "step": 33090 }, { "epoch": 0.02995, "grad_norm": 13.697530746459961, "learning_rate": 3.37909090909091e-06, "loss": 6.2951805114746096, "step": 33095 }, { "epoch": 0.03, "grad_norm": 5.753592491149902, "learning_rate": 3.378838383838384e-06, "loss": 6.255740737915039, "step": 33100 }, { "epoch": 0.03005, "grad_norm": 7.691236972808838, "learning_rate": 3.3785858585858587e-06, "loss": 6.240294647216797, "step": 33105 }, { "epoch": 0.0301, "grad_norm": 8.726850509643555, "learning_rate": 3.3783333333333333e-06, "loss": 6.267334747314453, "step": 33110 }, { "epoch": 0.03015, "grad_norm": 5.179562568664551, "learning_rate": 3.378080808080809e-06, "loss": 6.273417663574219, "step": 33115 }, { "epoch": 0.0302, "grad_norm": 4.327252388000488, "learning_rate": 3.3778282828282834e-06, "loss": 6.2932373046875, "step": 33120 }, { "epoch": 0.03025, "grad_norm": 8.62805461883545, "learning_rate": 3.3775757575757576e-06, "loss": 6.279069900512695, "step": 33125 }, { "epoch": 0.0303, "grad_norm": 6.345445156097412, "learning_rate": 3.3773232323232323e-06, "loss": 6.2639610290527346, "step": 33130 }, { "epoch": 0.03035, "grad_norm": 6.395575046539307, "learning_rate": 3.3770707070707077e-06, "loss": 6.295189285278321, "step": 33135 }, { "epoch": 0.0304, "grad_norm": 5.761693477630615, "learning_rate": 3.3768181818181824e-06, "loss": 6.251622009277344, "step": 33140 }, { "epoch": 0.03045, "grad_norm": 7.3734846115112305, "learning_rate": 3.376565656565657e-06, "loss": 6.435795593261719, "step": 33145 }, { "epoch": 0.0305, "grad_norm": 11.005121231079102, "learning_rate": 3.3763131313131316e-06, "loss": 6.330982208251953, "step": 33150 }, { "epoch": 0.03055, "grad_norm": 7.978140354156494, "learning_rate": 3.3760606060606067e-06, "loss": 6.271990966796875, "step": 33155 }, { "epoch": 0.0306, "grad_norm": 15.699374198913574, "learning_rate": 3.3758080808080813e-06, "loss": 6.368354034423828, "step": 33160 }, { "epoch": 0.03065, "grad_norm": 6.79068660736084, "learning_rate": 3.375555555555556e-06, "loss": 6.281839752197266, "step": 33165 }, { "epoch": 0.0307, "grad_norm": 6.520066261291504, "learning_rate": 3.3753030303030306e-06, "loss": 6.27618293762207, "step": 33170 }, { "epoch": 0.03075, "grad_norm": 8.005951881408691, "learning_rate": 3.3750505050505056e-06, "loss": 6.287255096435547, "step": 33175 }, { "epoch": 0.0308, "grad_norm": 10.881730079650879, "learning_rate": 3.3747979797979803e-06, "loss": 6.2649696350097654, "step": 33180 }, { "epoch": 0.03085, "grad_norm": 6.8364691734313965, "learning_rate": 3.374545454545455e-06, "loss": 6.2221824645996096, "step": 33185 }, { "epoch": 0.0309, "grad_norm": 9.698918342590332, "learning_rate": 3.3742929292929295e-06, "loss": 6.229153823852539, "step": 33190 }, { "epoch": 0.03095, "grad_norm": 6.957477569580078, "learning_rate": 3.3740404040404046e-06, "loss": 6.306251525878906, "step": 33195 }, { "epoch": 0.031, "grad_norm": 5.204045295715332, "learning_rate": 3.373787878787879e-06, "loss": 6.350005340576172, "step": 33200 }, { "epoch": 0.03105, "grad_norm": 10.334306716918945, "learning_rate": 3.373535353535354e-06, "loss": 6.464518737792969, "step": 33205 }, { "epoch": 0.0311, "grad_norm": 5.167375087738037, "learning_rate": 3.3732828282828285e-06, "loss": 6.318617248535157, "step": 33210 }, { "epoch": 0.03115, "grad_norm": 5.3746562004089355, "learning_rate": 3.3730303030303035e-06, "loss": 6.243132400512695, "step": 33215 }, { "epoch": 0.0312, "grad_norm": 9.352612495422363, "learning_rate": 3.372777777777778e-06, "loss": 6.24566764831543, "step": 33220 }, { "epoch": 0.03125, "grad_norm": 8.810617446899414, "learning_rate": 3.3725252525252528e-06, "loss": 6.266605377197266, "step": 33225 }, { "epoch": 0.0313, "grad_norm": 6.503082752227783, "learning_rate": 3.3722727272727274e-06, "loss": 6.326488876342774, "step": 33230 }, { "epoch": 0.03135, "grad_norm": 7.317083835601807, "learning_rate": 3.3720202020202025e-06, "loss": 6.264865112304688, "step": 33235 }, { "epoch": 0.0314, "grad_norm": 11.144768714904785, "learning_rate": 3.371767676767677e-06, "loss": 6.294929885864258, "step": 33240 }, { "epoch": 0.03145, "grad_norm": 6.385911464691162, "learning_rate": 3.3715151515151517e-06, "loss": 6.2755592346191404, "step": 33245 }, { "epoch": 0.0315, "grad_norm": 8.339351654052734, "learning_rate": 3.3712626262626263e-06, "loss": 6.333158111572265, "step": 33250 }, { "epoch": 0.03155, "grad_norm": 13.5152006149292, "learning_rate": 3.3710101010101014e-06, "loss": 6.302943420410156, "step": 33255 }, { "epoch": 0.0316, "grad_norm": 7.733433246612549, "learning_rate": 3.370757575757576e-06, "loss": 6.297227096557617, "step": 33260 }, { "epoch": 0.03165, "grad_norm": 4.517010688781738, "learning_rate": 3.3705050505050507e-06, "loss": 6.293292617797851, "step": 33265 }, { "epoch": 0.0317, "grad_norm": 4.14784574508667, "learning_rate": 3.3702525252525253e-06, "loss": 6.557199096679687, "step": 33270 }, { "epoch": 0.03175, "grad_norm": 4.097201347351074, "learning_rate": 3.3700000000000003e-06, "loss": 6.262990951538086, "step": 33275 }, { "epoch": 0.0318, "grad_norm": 5.252623558044434, "learning_rate": 3.369747474747475e-06, "loss": 6.2520301818847654, "step": 33280 }, { "epoch": 0.03185, "grad_norm": 5.871214389801025, "learning_rate": 3.3694949494949496e-06, "loss": 6.248757171630859, "step": 33285 }, { "epoch": 0.0319, "grad_norm": 4.1304779052734375, "learning_rate": 3.3692424242424242e-06, "loss": 6.293046569824218, "step": 33290 }, { "epoch": 0.03195, "grad_norm": 7.157261848449707, "learning_rate": 3.3689898989898993e-06, "loss": 6.307251739501953, "step": 33295 }, { "epoch": 0.032, "grad_norm": 6.0266900062561035, "learning_rate": 3.368737373737374e-06, "loss": 6.30908432006836, "step": 33300 }, { "epoch": 0.03205, "grad_norm": 7.173007488250732, "learning_rate": 3.3684848484848485e-06, "loss": 6.260527420043945, "step": 33305 }, { "epoch": 0.0321, "grad_norm": 3.5890491008758545, "learning_rate": 3.368232323232323e-06, "loss": 6.285307693481445, "step": 33310 }, { "epoch": 0.03215, "grad_norm": 6.2995381355285645, "learning_rate": 3.3679797979797987e-06, "loss": 6.231388854980469, "step": 33315 }, { "epoch": 0.0322, "grad_norm": 5.338662624359131, "learning_rate": 3.367727272727273e-06, "loss": 6.2807056427001955, "step": 33320 }, { "epoch": 0.03225, "grad_norm": 4.731300354003906, "learning_rate": 3.3674747474747475e-06, "loss": 6.333821487426758, "step": 33325 }, { "epoch": 0.0323, "grad_norm": 7.608765602111816, "learning_rate": 3.367222222222222e-06, "loss": 6.2648674011230465, "step": 33330 }, { "epoch": 0.03235, "grad_norm": 3.8843512535095215, "learning_rate": 3.3669696969696976e-06, "loss": 6.324729156494141, "step": 33335 }, { "epoch": 0.0324, "grad_norm": 6.545660972595215, "learning_rate": 3.3667171717171722e-06, "loss": 6.2716819763183596, "step": 33340 }, { "epoch": 0.03245, "grad_norm": 5.238186359405518, "learning_rate": 3.366464646464647e-06, "loss": 6.260275268554688, "step": 33345 }, { "epoch": 0.0325, "grad_norm": 7.485328197479248, "learning_rate": 3.366212121212121e-06, "loss": 6.294602584838867, "step": 33350 }, { "epoch": 0.03255, "grad_norm": 8.373588562011719, "learning_rate": 3.3659595959595965e-06, "loss": 6.304227447509765, "step": 33355 }, { "epoch": 0.0326, "grad_norm": 7.3558549880981445, "learning_rate": 3.365707070707071e-06, "loss": 6.324623107910156, "step": 33360 }, { "epoch": 0.03265, "grad_norm": 8.7068510055542, "learning_rate": 3.365454545454546e-06, "loss": 6.353107070922851, "step": 33365 }, { "epoch": 0.0327, "grad_norm": 9.252971649169922, "learning_rate": 3.3652020202020204e-06, "loss": 6.2792808532714846, "step": 33370 }, { "epoch": 0.03275, "grad_norm": 8.630522727966309, "learning_rate": 3.3649494949494955e-06, "loss": 6.299975967407226, "step": 33375 }, { "epoch": 0.0328, "grad_norm": 5.127129077911377, "learning_rate": 3.36469696969697e-06, "loss": 6.27946891784668, "step": 33380 }, { "epoch": 0.03285, "grad_norm": 5.696117877960205, "learning_rate": 3.3644444444444447e-06, "loss": 6.288686752319336, "step": 33385 }, { "epoch": 0.0329, "grad_norm": 4.3785929679870605, "learning_rate": 3.3641919191919194e-06, "loss": 6.296165466308594, "step": 33390 }, { "epoch": 0.03295, "grad_norm": 6.155759334564209, "learning_rate": 3.3639393939393944e-06, "loss": 6.3178459167480465, "step": 33395 }, { "epoch": 0.033, "grad_norm": 6.632721424102783, "learning_rate": 3.363686868686869e-06, "loss": 6.214113998413086, "step": 33400 }, { "epoch": 0.03305, "grad_norm": 5.624794960021973, "learning_rate": 3.3634343434343437e-06, "loss": 6.277983474731445, "step": 33405 }, { "epoch": 0.0331, "grad_norm": 4.801393032073975, "learning_rate": 3.3631818181818183e-06, "loss": 6.263437271118164, "step": 33410 }, { "epoch": 0.03315, "grad_norm": 8.162773132324219, "learning_rate": 3.3629292929292934e-06, "loss": 6.253548812866211, "step": 33415 }, { "epoch": 0.0332, "grad_norm": 7.992072105407715, "learning_rate": 3.362676767676768e-06, "loss": 6.318422317504883, "step": 33420 }, { "epoch": 0.03325, "grad_norm": 3.6902403831481934, "learning_rate": 3.3624242424242426e-06, "loss": 6.293793869018555, "step": 33425 }, { "epoch": 0.0333, "grad_norm": 8.048416137695312, "learning_rate": 3.3621717171717173e-06, "loss": 6.295990753173828, "step": 33430 }, { "epoch": 0.03335, "grad_norm": 23.609006881713867, "learning_rate": 3.3619191919191923e-06, "loss": 6.494249725341797, "step": 33435 }, { "epoch": 0.0334, "grad_norm": 8.648221015930176, "learning_rate": 3.361666666666667e-06, "loss": 6.267356872558594, "step": 33440 }, { "epoch": 0.03345, "grad_norm": 4.9014201164245605, "learning_rate": 3.3614141414141416e-06, "loss": 6.295452117919922, "step": 33445 }, { "epoch": 0.0335, "grad_norm": 7.896302223205566, "learning_rate": 3.361161616161616e-06, "loss": 6.321183776855468, "step": 33450 }, { "epoch": 0.03355, "grad_norm": 5.677506923675537, "learning_rate": 3.3609090909090913e-06, "loss": 6.297599792480469, "step": 33455 }, { "epoch": 0.0336, "grad_norm": 6.08756685256958, "learning_rate": 3.360656565656566e-06, "loss": 6.331251907348633, "step": 33460 }, { "epoch": 0.03365, "grad_norm": 7.160477161407471, "learning_rate": 3.3604040404040405e-06, "loss": 6.321124267578125, "step": 33465 }, { "epoch": 0.0337, "grad_norm": 8.190502166748047, "learning_rate": 3.360151515151515e-06, "loss": 6.255464935302735, "step": 33470 }, { "epoch": 0.03375, "grad_norm": 3.4396893978118896, "learning_rate": 3.35989898989899e-06, "loss": 6.271050262451172, "step": 33475 }, { "epoch": 0.0338, "grad_norm": 10.833885192871094, "learning_rate": 3.359646464646465e-06, "loss": 6.272621536254883, "step": 33480 }, { "epoch": 0.03385, "grad_norm": 9.92856502532959, "learning_rate": 3.3593939393939395e-06, "loss": 6.285462188720703, "step": 33485 }, { "epoch": 0.0339, "grad_norm": 6.707010269165039, "learning_rate": 3.359141414141414e-06, "loss": 6.304716873168945, "step": 33490 }, { "epoch": 0.03395, "grad_norm": 7.269896030426025, "learning_rate": 3.358888888888889e-06, "loss": 6.300924301147461, "step": 33495 }, { "epoch": 0.034, "grad_norm": 6.4969940185546875, "learning_rate": 3.3586363636363638e-06, "loss": 6.275922393798828, "step": 33500 }, { "epoch": 0.03405, "grad_norm": 7.214869022369385, "learning_rate": 3.3583838383838384e-06, "loss": 6.2764892578125, "step": 33505 }, { "epoch": 0.0341, "grad_norm": 4.950541973114014, "learning_rate": 3.358131313131313e-06, "loss": 6.361250305175782, "step": 33510 }, { "epoch": 0.03415, "grad_norm": 6.94204044342041, "learning_rate": 3.357878787878788e-06, "loss": 6.297611999511719, "step": 33515 }, { "epoch": 0.0342, "grad_norm": 6.812038421630859, "learning_rate": 3.3576262626262627e-06, "loss": 6.287727355957031, "step": 33520 }, { "epoch": 0.03425, "grad_norm": 8.114991188049316, "learning_rate": 3.3573737373737374e-06, "loss": 6.279646301269532, "step": 33525 }, { "epoch": 0.0343, "grad_norm": 6.5903544425964355, "learning_rate": 3.357121212121213e-06, "loss": 6.28112907409668, "step": 33530 }, { "epoch": 0.03435, "grad_norm": 5.445219993591309, "learning_rate": 3.3568686868686875e-06, "loss": 6.3290557861328125, "step": 33535 }, { "epoch": 0.0344, "grad_norm": 6.044078826904297, "learning_rate": 3.356616161616162e-06, "loss": 6.331441116333008, "step": 33540 }, { "epoch": 0.03445, "grad_norm": 6.1797099113464355, "learning_rate": 3.3563636363636363e-06, "loss": 6.3179985046386715, "step": 33545 }, { "epoch": 0.0345, "grad_norm": 17.00074005126953, "learning_rate": 3.3561111111111118e-06, "loss": 6.347199249267578, "step": 33550 }, { "epoch": 0.03455, "grad_norm": 5.40175199508667, "learning_rate": 3.3558585858585864e-06, "loss": 6.339302062988281, "step": 33555 }, { "epoch": 0.0346, "grad_norm": 4.950738430023193, "learning_rate": 3.355606060606061e-06, "loss": 6.24754753112793, "step": 33560 }, { "epoch": 0.03465, "grad_norm": 14.542377471923828, "learning_rate": 3.3553535353535357e-06, "loss": 6.3889717102050785, "step": 33565 }, { "epoch": 0.0347, "grad_norm": 6.742488384246826, "learning_rate": 3.3551010101010107e-06, "loss": 6.30975227355957, "step": 33570 }, { "epoch": 0.03475, "grad_norm": 5.7151384353637695, "learning_rate": 3.3548484848484854e-06, "loss": 6.262050628662109, "step": 33575 }, { "epoch": 0.0348, "grad_norm": 3.9715187549591064, "learning_rate": 3.35459595959596e-06, "loss": 6.286602020263672, "step": 33580 }, { "epoch": 0.03485, "grad_norm": 5.020255088806152, "learning_rate": 3.3543434343434346e-06, "loss": 6.274787139892578, "step": 33585 }, { "epoch": 0.0349, "grad_norm": 5.915451526641846, "learning_rate": 3.3540909090909097e-06, "loss": 6.282065200805664, "step": 33590 }, { "epoch": 0.03495, "grad_norm": 6.1321539878845215, "learning_rate": 3.3538383838383843e-06, "loss": 6.262400817871094, "step": 33595 }, { "epoch": 0.035, "grad_norm": 5.729720592498779, "learning_rate": 3.353585858585859e-06, "loss": 6.232015991210938, "step": 33600 }, { "epoch": 0.03505, "grad_norm": 5.136466026306152, "learning_rate": 3.3533333333333336e-06, "loss": 6.316616058349609, "step": 33605 }, { "epoch": 0.0351, "grad_norm": 6.942987442016602, "learning_rate": 3.3530808080808086e-06, "loss": 6.265991973876953, "step": 33610 }, { "epoch": 0.03515, "grad_norm": 5.394021511077881, "learning_rate": 3.3528282828282832e-06, "loss": 6.27789192199707, "step": 33615 }, { "epoch": 0.0352, "grad_norm": 5.824485778808594, "learning_rate": 3.352575757575758e-06, "loss": 6.279919815063477, "step": 33620 }, { "epoch": 0.03525, "grad_norm": 4.70603609085083, "learning_rate": 3.3523232323232325e-06, "loss": 6.2905017852783205, "step": 33625 }, { "epoch": 0.0353, "grad_norm": 7.554645538330078, "learning_rate": 3.3520707070707076e-06, "loss": 6.315367126464844, "step": 33630 }, { "epoch": 0.03535, "grad_norm": 7.915616512298584, "learning_rate": 3.351818181818182e-06, "loss": 6.444318389892578, "step": 33635 }, { "epoch": 0.0354, "grad_norm": 4.64765739440918, "learning_rate": 3.351565656565657e-06, "loss": 6.269488525390625, "step": 33640 }, { "epoch": 0.03545, "grad_norm": 8.423317909240723, "learning_rate": 3.3513131313131314e-06, "loss": 6.249978637695312, "step": 33645 }, { "epoch": 0.0355, "grad_norm": 6.274908065795898, "learning_rate": 3.3510606060606065e-06, "loss": 6.366060638427735, "step": 33650 }, { "epoch": 0.03555, "grad_norm": 15.349830627441406, "learning_rate": 3.350808080808081e-06, "loss": 6.425099182128906, "step": 33655 }, { "epoch": 0.0356, "grad_norm": 8.832176208496094, "learning_rate": 3.3505555555555558e-06, "loss": 6.291016006469727, "step": 33660 }, { "epoch": 0.03565, "grad_norm": 9.772109031677246, "learning_rate": 3.3503030303030304e-06, "loss": 6.297457885742188, "step": 33665 }, { "epoch": 0.0357, "grad_norm": 7.770026683807373, "learning_rate": 3.3500505050505054e-06, "loss": 6.300077056884765, "step": 33670 }, { "epoch": 0.03575, "grad_norm": 4.734652519226074, "learning_rate": 3.34979797979798e-06, "loss": 6.270442962646484, "step": 33675 }, { "epoch": 0.0358, "grad_norm": 7.3415350914001465, "learning_rate": 3.3495454545454547e-06, "loss": 6.284919357299804, "step": 33680 }, { "epoch": 0.03585, "grad_norm": 8.17043399810791, "learning_rate": 3.3492929292929293e-06, "loss": 6.22940673828125, "step": 33685 }, { "epoch": 0.0359, "grad_norm": 15.422343254089355, "learning_rate": 3.3490404040404044e-06, "loss": 6.518870544433594, "step": 33690 }, { "epoch": 0.03595, "grad_norm": 5.319767951965332, "learning_rate": 3.348787878787879e-06, "loss": 6.280790710449219, "step": 33695 }, { "epoch": 0.036, "grad_norm": 13.630831718444824, "learning_rate": 3.3485353535353536e-06, "loss": 6.561427307128906, "step": 33700 }, { "epoch": 0.03605, "grad_norm": 7.198222637176514, "learning_rate": 3.3482828282828283e-06, "loss": 6.426054382324219, "step": 33705 }, { "epoch": 0.0361, "grad_norm": 20.527406692504883, "learning_rate": 3.3480303030303033e-06, "loss": 6.809866333007813, "step": 33710 }, { "epoch": 0.03615, "grad_norm": 9.435182571411133, "learning_rate": 3.347777777777778e-06, "loss": 6.4427490234375, "step": 33715 }, { "epoch": 0.0362, "grad_norm": 8.986084938049316, "learning_rate": 3.3475252525252526e-06, "loss": 6.269795227050781, "step": 33720 }, { "epoch": 0.03625, "grad_norm": 10.78356647491455, "learning_rate": 3.3472727272727272e-06, "loss": 6.3464210510253904, "step": 33725 }, { "epoch": 0.0363, "grad_norm": 6.935975551605225, "learning_rate": 3.3470202020202027e-06, "loss": 6.327423477172852, "step": 33730 }, { "epoch": 0.03635, "grad_norm": 4.871250152587891, "learning_rate": 3.346767676767677e-06, "loss": 6.385657501220703, "step": 33735 }, { "epoch": 0.0364, "grad_norm": 5.24281120300293, "learning_rate": 3.3465151515151515e-06, "loss": 6.291134643554687, "step": 33740 }, { "epoch": 0.03645, "grad_norm": 8.088532447814941, "learning_rate": 3.346262626262626e-06, "loss": 6.4737060546875, "step": 33745 }, { "epoch": 0.0365, "grad_norm": 4.294514179229736, "learning_rate": 3.3460101010101016e-06, "loss": 6.2565654754638675, "step": 33750 }, { "epoch": 0.03655, "grad_norm": 7.172997951507568, "learning_rate": 3.3457575757575763e-06, "loss": 6.28502197265625, "step": 33755 }, { "epoch": 0.0366, "grad_norm": 27.490144729614258, "learning_rate": 3.345505050505051e-06, "loss": 6.473532867431641, "step": 33760 }, { "epoch": 0.03665, "grad_norm": 6.903679370880127, "learning_rate": 3.345252525252525e-06, "loss": 6.341043853759766, "step": 33765 }, { "epoch": 0.0367, "grad_norm": 9.803825378417969, "learning_rate": 3.3450000000000006e-06, "loss": 6.274591445922852, "step": 33770 }, { "epoch": 0.03675, "grad_norm": 8.376204490661621, "learning_rate": 3.3447474747474752e-06, "loss": 6.289490127563477, "step": 33775 }, { "epoch": 0.0368, "grad_norm": 7.227331638336182, "learning_rate": 3.34449494949495e-06, "loss": 6.381228256225586, "step": 33780 }, { "epoch": 0.03685, "grad_norm": 5.414806365966797, "learning_rate": 3.3442424242424245e-06, "loss": 6.293725204467774, "step": 33785 }, { "epoch": 0.0369, "grad_norm": 10.67294692993164, "learning_rate": 3.3439898989898995e-06, "loss": 6.292182540893554, "step": 33790 }, { "epoch": 0.03695, "grad_norm": 5.945865154266357, "learning_rate": 3.343737373737374e-06, "loss": 6.287956619262696, "step": 33795 }, { "epoch": 0.037, "grad_norm": 7.563597679138184, "learning_rate": 3.343484848484849e-06, "loss": 6.510175323486328, "step": 33800 }, { "epoch": 0.03705, "grad_norm": 10.006854057312012, "learning_rate": 3.3432323232323234e-06, "loss": 6.297396469116211, "step": 33805 }, { "epoch": 0.0371, "grad_norm": 13.400472640991211, "learning_rate": 3.3429797979797985e-06, "loss": 6.350586700439453, "step": 33810 }, { "epoch": 0.03715, "grad_norm": 4.86964750289917, "learning_rate": 3.342727272727273e-06, "loss": 6.276943969726562, "step": 33815 }, { "epoch": 0.0372, "grad_norm": 8.409882545471191, "learning_rate": 3.3424747474747477e-06, "loss": 6.3280784606933596, "step": 33820 }, { "epoch": 0.03725, "grad_norm": 7.396688461303711, "learning_rate": 3.3422222222222224e-06, "loss": 6.336164093017578, "step": 33825 }, { "epoch": 0.0373, "grad_norm": 9.505524635314941, "learning_rate": 3.3419696969696974e-06, "loss": 6.284850311279297, "step": 33830 }, { "epoch": 0.03735, "grad_norm": 5.70472526550293, "learning_rate": 3.341717171717172e-06, "loss": 6.3307945251464846, "step": 33835 }, { "epoch": 0.0374, "grad_norm": 7.884207725524902, "learning_rate": 3.3414646464646467e-06, "loss": 6.274061965942383, "step": 33840 }, { "epoch": 0.03745, "grad_norm": 6.796994686126709, "learning_rate": 3.3412121212121213e-06, "loss": 6.302053451538086, "step": 33845 }, { "epoch": 0.0375, "grad_norm": 7.3839335441589355, "learning_rate": 3.3409595959595964e-06, "loss": 6.309565734863281, "step": 33850 }, { "epoch": 0.03755, "grad_norm": 4.965457916259766, "learning_rate": 3.340707070707071e-06, "loss": 6.311651611328125, "step": 33855 }, { "epoch": 0.0376, "grad_norm": 7.722535133361816, "learning_rate": 3.3404545454545456e-06, "loss": 6.2738494873046875, "step": 33860 }, { "epoch": 0.03765, "grad_norm": 18.980051040649414, "learning_rate": 3.3402020202020203e-06, "loss": 6.167140197753906, "step": 33865 }, { "epoch": 0.0377, "grad_norm": 19.575685501098633, "learning_rate": 3.3399494949494953e-06, "loss": 6.332438278198242, "step": 33870 }, { "epoch": 0.03775, "grad_norm": 7.988999843597412, "learning_rate": 3.33969696969697e-06, "loss": 6.26928596496582, "step": 33875 }, { "epoch": 0.0378, "grad_norm": 3.7111904621124268, "learning_rate": 3.3394444444444446e-06, "loss": 6.278633499145508, "step": 33880 }, { "epoch": 0.03785, "grad_norm": 5.462652683258057, "learning_rate": 3.339191919191919e-06, "loss": 6.296630096435547, "step": 33885 }, { "epoch": 0.0379, "grad_norm": 6.537981986999512, "learning_rate": 3.3389393939393942e-06, "loss": 6.276802062988281, "step": 33890 }, { "epoch": 0.03795, "grad_norm": 4.2480692863464355, "learning_rate": 3.338686868686869e-06, "loss": 6.319560623168945, "step": 33895 }, { "epoch": 0.038, "grad_norm": 5.105456829071045, "learning_rate": 3.3384343434343435e-06, "loss": 6.2697502136230465, "step": 33900 }, { "epoch": 0.03805, "grad_norm": 13.569671630859375, "learning_rate": 3.338181818181818e-06, "loss": 6.586620330810547, "step": 33905 }, { "epoch": 0.0381, "grad_norm": 8.04488468170166, "learning_rate": 3.337929292929293e-06, "loss": 6.313685607910156, "step": 33910 }, { "epoch": 0.03815, "grad_norm": 6.999444007873535, "learning_rate": 3.337676767676768e-06, "loss": 6.286004638671875, "step": 33915 }, { "epoch": 0.0382, "grad_norm": 8.203609466552734, "learning_rate": 3.3374242424242425e-06, "loss": 6.324343490600586, "step": 33920 }, { "epoch": 0.03825, "grad_norm": 8.53204345703125, "learning_rate": 3.337171717171717e-06, "loss": 6.300914001464844, "step": 33925 }, { "epoch": 0.0383, "grad_norm": 3.4334888458251953, "learning_rate": 3.336919191919192e-06, "loss": 6.268751525878907, "step": 33930 }, { "epoch": 0.03835, "grad_norm": 7.859654903411865, "learning_rate": 3.3366666666666668e-06, "loss": 6.250489044189453, "step": 33935 }, { "epoch": 0.0384, "grad_norm": 5.163532257080078, "learning_rate": 3.3364141414141414e-06, "loss": 6.269466781616211, "step": 33940 }, { "epoch": 0.03845, "grad_norm": 7.342118740081787, "learning_rate": 3.336161616161617e-06, "loss": 6.2851104736328125, "step": 33945 }, { "epoch": 0.0385, "grad_norm": 3.566540241241455, "learning_rate": 3.3359090909090915e-06, "loss": 6.307624053955078, "step": 33950 }, { "epoch": 0.03855, "grad_norm": 6.865889072418213, "learning_rate": 3.335656565656566e-06, "loss": 6.295575332641602, "step": 33955 }, { "epoch": 0.0386, "grad_norm": 7.333113193511963, "learning_rate": 3.3354040404040403e-06, "loss": 6.238556671142578, "step": 33960 }, { "epoch": 0.03865, "grad_norm": 7.950794219970703, "learning_rate": 3.335151515151516e-06, "loss": 6.269853210449218, "step": 33965 }, { "epoch": 0.0387, "grad_norm": 5.649864673614502, "learning_rate": 3.3348989898989904e-06, "loss": 6.299364852905273, "step": 33970 }, { "epoch": 0.03875, "grad_norm": 4.264184474945068, "learning_rate": 3.334646464646465e-06, "loss": 6.27393684387207, "step": 33975 }, { "epoch": 0.0388, "grad_norm": 9.34839153289795, "learning_rate": 3.3343939393939397e-06, "loss": 6.316100311279297, "step": 33980 }, { "epoch": 0.03885, "grad_norm": 4.900908946990967, "learning_rate": 3.3341414141414148e-06, "loss": 6.278008270263672, "step": 33985 }, { "epoch": 0.0389, "grad_norm": 5.4358978271484375, "learning_rate": 3.3338888888888894e-06, "loss": 6.2871757507324215, "step": 33990 }, { "epoch": 0.03895, "grad_norm": 7.3742356300354, "learning_rate": 3.333636363636364e-06, "loss": 6.275775146484375, "step": 33995 }, { "epoch": 0.039, "grad_norm": 6.171125411987305, "learning_rate": 3.3333838383838387e-06, "loss": 6.312109375, "step": 34000 }, { "epoch": 0.03905, "grad_norm": 7.1294708251953125, "learning_rate": 3.3331313131313137e-06, "loss": 6.249440383911133, "step": 34005 }, { "epoch": 0.0391, "grad_norm": 4.51965856552124, "learning_rate": 3.3328787878787883e-06, "loss": 6.307046508789062, "step": 34010 }, { "epoch": 0.03915, "grad_norm": 12.280434608459473, "learning_rate": 3.332626262626263e-06, "loss": 6.292683792114258, "step": 34015 }, { "epoch": 0.0392, "grad_norm": 19.992704391479492, "learning_rate": 3.3323737373737376e-06, "loss": 6.643345642089844, "step": 34020 }, { "epoch": 0.03925, "grad_norm": 3.8565711975097656, "learning_rate": 3.3321212121212126e-06, "loss": 6.33795051574707, "step": 34025 }, { "epoch": 0.0393, "grad_norm": 7.719776153564453, "learning_rate": 3.3318686868686873e-06, "loss": 6.300451278686523, "step": 34030 }, { "epoch": 0.03935, "grad_norm": 9.122726440429688, "learning_rate": 3.331616161616162e-06, "loss": 6.305955505371093, "step": 34035 }, { "epoch": 0.0394, "grad_norm": 7.075936794281006, "learning_rate": 3.3313636363636365e-06, "loss": 6.321106338500977, "step": 34040 }, { "epoch": 0.03945, "grad_norm": 5.540849685668945, "learning_rate": 3.3311111111111116e-06, "loss": 6.3144268035888675, "step": 34045 }, { "epoch": 0.0395, "grad_norm": 8.590054512023926, "learning_rate": 3.3308585858585862e-06, "loss": 6.25340576171875, "step": 34050 }, { "epoch": 0.03955, "grad_norm": 6.305569648742676, "learning_rate": 3.330606060606061e-06, "loss": 6.281805801391601, "step": 34055 }, { "epoch": 0.0396, "grad_norm": 8.15976619720459, "learning_rate": 3.3303535353535355e-06, "loss": 6.294978332519531, "step": 34060 }, { "epoch": 0.03965, "grad_norm": 8.914175987243652, "learning_rate": 3.3301010101010105e-06, "loss": 6.298697280883789, "step": 34065 }, { "epoch": 0.0397, "grad_norm": 6.634190082550049, "learning_rate": 3.329848484848485e-06, "loss": 6.250032043457031, "step": 34070 }, { "epoch": 0.03975, "grad_norm": 4.937743663787842, "learning_rate": 3.32959595959596e-06, "loss": 6.214720153808594, "step": 34075 }, { "epoch": 0.0398, "grad_norm": 4.592219352722168, "learning_rate": 3.3293434343434344e-06, "loss": 6.285158920288086, "step": 34080 }, { "epoch": 0.03985, "grad_norm": 5.886895656585693, "learning_rate": 3.3290909090909095e-06, "loss": 6.30814208984375, "step": 34085 }, { "epoch": 0.0399, "grad_norm": 7.94163703918457, "learning_rate": 3.328838383838384e-06, "loss": 6.308694076538086, "step": 34090 }, { "epoch": 0.03995, "grad_norm": 7.083491325378418, "learning_rate": 3.3285858585858587e-06, "loss": 6.325359725952149, "step": 34095 }, { "epoch": 0.04, "grad_norm": 3.235384702682495, "learning_rate": 3.3283333333333334e-06, "loss": 6.2913970947265625, "step": 34100 }, { "epoch": 0.04005, "grad_norm": 7.114415645599365, "learning_rate": 3.3280808080808084e-06, "loss": 6.302985763549804, "step": 34105 }, { "epoch": 0.0401, "grad_norm": 4.573584079742432, "learning_rate": 3.327828282828283e-06, "loss": 6.275545883178711, "step": 34110 }, { "epoch": 0.04015, "grad_norm": 4.46937894821167, "learning_rate": 3.3275757575757577e-06, "loss": 6.314866256713867, "step": 34115 }, { "epoch": 0.0402, "grad_norm": 5.1440253257751465, "learning_rate": 3.3273232323232323e-06, "loss": 6.236521148681641, "step": 34120 }, { "epoch": 0.04025, "grad_norm": 6.009187698364258, "learning_rate": 3.3270707070707074e-06, "loss": 6.271181106567383, "step": 34125 }, { "epoch": 0.0403, "grad_norm": 12.343667984008789, "learning_rate": 3.326818181818182e-06, "loss": 6.287606811523437, "step": 34130 }, { "epoch": 0.04035, "grad_norm": 6.6855010986328125, "learning_rate": 3.3265656565656566e-06, "loss": 6.280777359008789, "step": 34135 }, { "epoch": 0.0404, "grad_norm": 7.358508586883545, "learning_rate": 3.3263131313131313e-06, "loss": 6.254603576660156, "step": 34140 }, { "epoch": 0.04045, "grad_norm": 4.691926002502441, "learning_rate": 3.3260606060606067e-06, "loss": 6.313080215454102, "step": 34145 }, { "epoch": 0.0405, "grad_norm": 13.051141738891602, "learning_rate": 3.325808080808081e-06, "loss": 6.2752025604248045, "step": 34150 }, { "epoch": 0.04055, "grad_norm": 5.666262626647949, "learning_rate": 3.3255555555555556e-06, "loss": 6.374200057983399, "step": 34155 }, { "epoch": 0.0406, "grad_norm": 10.970081329345703, "learning_rate": 3.32530303030303e-06, "loss": 6.197991943359375, "step": 34160 }, { "epoch": 0.04065, "grad_norm": 8.356911659240723, "learning_rate": 3.3250505050505057e-06, "loss": 6.288248825073242, "step": 34165 }, { "epoch": 0.0407, "grad_norm": 4.276556968688965, "learning_rate": 3.3247979797979803e-06, "loss": 6.319438171386719, "step": 34170 }, { "epoch": 0.04075, "grad_norm": 4.564706802368164, "learning_rate": 3.324545454545455e-06, "loss": 6.288778686523438, "step": 34175 }, { "epoch": 0.0408, "grad_norm": 6.004821300506592, "learning_rate": 3.324292929292929e-06, "loss": 6.241841888427734, "step": 34180 }, { "epoch": 0.04085, "grad_norm": 6.343606472015381, "learning_rate": 3.3240404040404046e-06, "loss": 6.2810516357421875, "step": 34185 }, { "epoch": 0.0409, "grad_norm": 5.414394855499268, "learning_rate": 3.3237878787878793e-06, "loss": 6.4226539611816404, "step": 34190 }, { "epoch": 0.04095, "grad_norm": 8.430062294006348, "learning_rate": 3.323535353535354e-06, "loss": 6.278907012939453, "step": 34195 }, { "epoch": 0.041, "grad_norm": 9.01877212524414, "learning_rate": 3.3232828282828285e-06, "loss": 6.2799560546875, "step": 34200 }, { "epoch": 0.04105, "grad_norm": 5.575852870941162, "learning_rate": 3.3230303030303036e-06, "loss": 6.307883453369141, "step": 34205 }, { "epoch": 0.0411, "grad_norm": 5.510700702667236, "learning_rate": 3.322777777777778e-06, "loss": 6.267377853393555, "step": 34210 }, { "epoch": 0.04115, "grad_norm": 3.97086238861084, "learning_rate": 3.322525252525253e-06, "loss": 6.271921920776367, "step": 34215 }, { "epoch": 0.0412, "grad_norm": 5.346134185791016, "learning_rate": 3.3222727272727275e-06, "loss": 6.278012466430664, "step": 34220 }, { "epoch": 0.04125, "grad_norm": 8.779306411743164, "learning_rate": 3.3220202020202025e-06, "loss": 6.3176521301269535, "step": 34225 }, { "epoch": 0.0413, "grad_norm": 7.388701915740967, "learning_rate": 3.321767676767677e-06, "loss": 6.266966247558594, "step": 34230 }, { "epoch": 0.04135, "grad_norm": 6.078861236572266, "learning_rate": 3.3215151515151518e-06, "loss": 6.2674610137939455, "step": 34235 }, { "epoch": 0.0414, "grad_norm": 5.081811904907227, "learning_rate": 3.3212626262626264e-06, "loss": 6.307051467895508, "step": 34240 }, { "epoch": 0.04145, "grad_norm": 6.914938449859619, "learning_rate": 3.3210101010101015e-06, "loss": 6.28289794921875, "step": 34245 }, { "epoch": 0.0415, "grad_norm": 13.939677238464355, "learning_rate": 3.320757575757576e-06, "loss": 6.371585845947266, "step": 34250 }, { "epoch": 0.04155, "grad_norm": 6.9364542961120605, "learning_rate": 3.3205050505050507e-06, "loss": 6.302772903442383, "step": 34255 }, { "epoch": 0.0416, "grad_norm": 6.12338399887085, "learning_rate": 3.3202525252525253e-06, "loss": 6.280635070800781, "step": 34260 }, { "epoch": 0.04165, "grad_norm": 60.867557525634766, "learning_rate": 3.3200000000000004e-06, "loss": 8.877529907226563, "step": 34265 }, { "epoch": 0.0417, "grad_norm": 10.511495590209961, "learning_rate": 3.319747474747475e-06, "loss": 6.693716430664063, "step": 34270 }, { "epoch": 0.04175, "grad_norm": 7.926007270812988, "learning_rate": 3.3194949494949497e-06, "loss": 6.289888763427735, "step": 34275 }, { "epoch": 0.0418, "grad_norm": 7.224324703216553, "learning_rate": 3.3192424242424243e-06, "loss": 6.328134155273437, "step": 34280 }, { "epoch": 0.04185, "grad_norm": 11.48455810546875, "learning_rate": 3.3189898989898993e-06, "loss": 6.361230087280274, "step": 34285 }, { "epoch": 0.0419, "grad_norm": 5.697624683380127, "learning_rate": 3.318737373737374e-06, "loss": 6.271654891967773, "step": 34290 }, { "epoch": 0.04195, "grad_norm": 15.202046394348145, "learning_rate": 3.3184848484848486e-06, "loss": 6.354739761352539, "step": 34295 }, { "epoch": 0.042, "grad_norm": 4.309697151184082, "learning_rate": 3.3182323232323232e-06, "loss": 6.28790283203125, "step": 34300 }, { "epoch": 0.04205, "grad_norm": 6.149411678314209, "learning_rate": 3.3179797979797983e-06, "loss": 6.294909286499023, "step": 34305 }, { "epoch": 0.0421, "grad_norm": 8.412841796875, "learning_rate": 3.317727272727273e-06, "loss": 6.281791305541992, "step": 34310 }, { "epoch": 0.04215, "grad_norm": 5.090383529663086, "learning_rate": 3.3174747474747475e-06, "loss": 6.206612396240234, "step": 34315 }, { "epoch": 0.0422, "grad_norm": 5.041045188903809, "learning_rate": 3.317222222222222e-06, "loss": 6.318148803710938, "step": 34320 }, { "epoch": 0.04225, "grad_norm": 6.910464763641357, "learning_rate": 3.3169696969696972e-06, "loss": 6.291581726074218, "step": 34325 }, { "epoch": 0.0423, "grad_norm": 8.667393684387207, "learning_rate": 3.316717171717172e-06, "loss": 6.302043151855469, "step": 34330 }, { "epoch": 0.04235, "grad_norm": 6.076626300811768, "learning_rate": 3.3164646464646465e-06, "loss": 6.345893096923828, "step": 34335 }, { "epoch": 0.0424, "grad_norm": 7.758793354034424, "learning_rate": 3.316212121212121e-06, "loss": 6.265994644165039, "step": 34340 }, { "epoch": 0.04245, "grad_norm": 7.956061840057373, "learning_rate": 3.315959595959596e-06, "loss": 6.340013122558593, "step": 34345 }, { "epoch": 0.0425, "grad_norm": 5.982802391052246, "learning_rate": 3.315707070707071e-06, "loss": 6.278118133544922, "step": 34350 }, { "epoch": 0.04255, "grad_norm": 6.534622669219971, "learning_rate": 3.3154545454545454e-06, "loss": 6.261928558349609, "step": 34355 }, { "epoch": 0.0426, "grad_norm": 3.7858529090881348, "learning_rate": 3.31520202020202e-06, "loss": 6.289427185058594, "step": 34360 }, { "epoch": 0.04265, "grad_norm": 8.042802810668945, "learning_rate": 3.3149494949494955e-06, "loss": 6.427771759033203, "step": 34365 }, { "epoch": 0.0427, "grad_norm": 3.7131643295288086, "learning_rate": 3.31469696969697e-06, "loss": 6.323392105102539, "step": 34370 }, { "epoch": 0.04275, "grad_norm": 8.797636032104492, "learning_rate": 3.3144444444444444e-06, "loss": 6.358470153808594, "step": 34375 }, { "epoch": 0.0428, "grad_norm": 8.763351440429688, "learning_rate": 3.31419191919192e-06, "loss": 6.28143424987793, "step": 34380 }, { "epoch": 0.04285, "grad_norm": 5.359097957611084, "learning_rate": 3.3139393939393945e-06, "loss": 6.29913215637207, "step": 34385 }, { "epoch": 0.0429, "grad_norm": 5.128442287445068, "learning_rate": 3.313686868686869e-06, "loss": 6.232828521728516, "step": 34390 }, { "epoch": 0.04295, "grad_norm": 4.666426658630371, "learning_rate": 3.3134343434343437e-06, "loss": 6.287538146972656, "step": 34395 }, { "epoch": 0.043, "grad_norm": 7.890666484832764, "learning_rate": 3.313181818181819e-06, "loss": 6.271531295776367, "step": 34400 }, { "epoch": 0.04305, "grad_norm": 6.427894592285156, "learning_rate": 3.3129292929292934e-06, "loss": 6.232158660888672, "step": 34405 }, { "epoch": 0.0431, "grad_norm": 10.532413482666016, "learning_rate": 3.312676767676768e-06, "loss": 6.3208160400390625, "step": 34410 }, { "epoch": 0.04315, "grad_norm": 4.692079067230225, "learning_rate": 3.3124242424242427e-06, "loss": 6.23803596496582, "step": 34415 }, { "epoch": 0.0432, "grad_norm": 6.030078411102295, "learning_rate": 3.3121717171717177e-06, "loss": 6.264137268066406, "step": 34420 }, { "epoch": 0.04325, "grad_norm": 7.714862823486328, "learning_rate": 3.3119191919191924e-06, "loss": 6.272183990478515, "step": 34425 }, { "epoch": 0.0433, "grad_norm": 6.6889495849609375, "learning_rate": 3.311666666666667e-06, "loss": 6.297808074951172, "step": 34430 }, { "epoch": 0.04335, "grad_norm": 5.760891914367676, "learning_rate": 3.3114141414141416e-06, "loss": 6.292405700683593, "step": 34435 }, { "epoch": 0.0434, "grad_norm": 6.947488784790039, "learning_rate": 3.3111616161616167e-06, "loss": 6.288687133789063, "step": 34440 }, { "epoch": 0.04345, "grad_norm": 6.754628658294678, "learning_rate": 3.3109090909090913e-06, "loss": 6.173713684082031, "step": 34445 }, { "epoch": 0.0435, "grad_norm": 7.911752223968506, "learning_rate": 3.310656565656566e-06, "loss": 6.320727157592773, "step": 34450 }, { "epoch": 0.04355, "grad_norm": 16.554855346679688, "learning_rate": 3.3104040404040406e-06, "loss": 6.314252853393555, "step": 34455 }, { "epoch": 0.0436, "grad_norm": 6.524188995361328, "learning_rate": 3.3101515151515156e-06, "loss": 6.295826721191406, "step": 34460 }, { "epoch": 0.04365, "grad_norm": 7.557045936584473, "learning_rate": 3.3098989898989903e-06, "loss": 6.262946319580078, "step": 34465 }, { "epoch": 0.0437, "grad_norm": 4.240478038787842, "learning_rate": 3.309646464646465e-06, "loss": 6.291202545166016, "step": 34470 }, { "epoch": 0.04375, "grad_norm": 7.513948440551758, "learning_rate": 3.3093939393939395e-06, "loss": 6.137839126586914, "step": 34475 }, { "epoch": 0.0438, "grad_norm": 5.614612102508545, "learning_rate": 3.3091414141414146e-06, "loss": 6.194662475585938, "step": 34480 }, { "epoch": 0.04385, "grad_norm": 4.89637565612793, "learning_rate": 3.308888888888889e-06, "loss": 6.271914672851563, "step": 34485 }, { "epoch": 0.0439, "grad_norm": 10.660588264465332, "learning_rate": 3.308636363636364e-06, "loss": 6.238831329345703, "step": 34490 }, { "epoch": 0.04395, "grad_norm": 7.518210411071777, "learning_rate": 3.3083838383838385e-06, "loss": 6.254771423339844, "step": 34495 }, { "epoch": 0.044, "grad_norm": 5.086498737335205, "learning_rate": 3.3081313131313135e-06, "loss": 6.236605834960938, "step": 34500 }, { "epoch": 0.04405, "grad_norm": 5.905466556549072, "learning_rate": 3.307878787878788e-06, "loss": 6.273438262939453, "step": 34505 }, { "epoch": 0.0441, "grad_norm": 14.148482322692871, "learning_rate": 3.3076262626262628e-06, "loss": 6.224131011962891, "step": 34510 }, { "epoch": 0.04415, "grad_norm": 6.002893924713135, "learning_rate": 3.3073737373737374e-06, "loss": 6.252923583984375, "step": 34515 }, { "epoch": 0.0442, "grad_norm": 6.884568214416504, "learning_rate": 3.3071212121212125e-06, "loss": 6.470506286621093, "step": 34520 }, { "epoch": 0.04425, "grad_norm": 5.463130950927734, "learning_rate": 3.306868686868687e-06, "loss": 6.330756759643554, "step": 34525 }, { "epoch": 0.0443, "grad_norm": 4.5670952796936035, "learning_rate": 3.3066161616161617e-06, "loss": 6.2619976043701175, "step": 34530 }, { "epoch": 0.04435, "grad_norm": 5.738001823425293, "learning_rate": 3.3063636363636364e-06, "loss": 6.262389755249023, "step": 34535 }, { "epoch": 0.0444, "grad_norm": 3.498025894165039, "learning_rate": 3.3061111111111114e-06, "loss": 6.278166961669922, "step": 34540 }, { "epoch": 0.04445, "grad_norm": 6.04850435256958, "learning_rate": 3.305858585858586e-06, "loss": 6.302559280395508, "step": 34545 }, { "epoch": 0.0445, "grad_norm": 4.828122615814209, "learning_rate": 3.3056060606060607e-06, "loss": 6.258047866821289, "step": 34550 }, { "epoch": 0.04455, "grad_norm": 8.403566360473633, "learning_rate": 3.3053535353535353e-06, "loss": 6.278432846069336, "step": 34555 }, { "epoch": 0.0446, "grad_norm": 4.917405605316162, "learning_rate": 3.3051010101010108e-06, "loss": 6.242156219482422, "step": 34560 }, { "epoch": 0.04465, "grad_norm": 7.1146626472473145, "learning_rate": 3.304848484848485e-06, "loss": 6.282624816894531, "step": 34565 }, { "epoch": 0.0447, "grad_norm": 8.404033660888672, "learning_rate": 3.3045959595959596e-06, "loss": 6.282833862304687, "step": 34570 }, { "epoch": 0.04475, "grad_norm": 5.883131980895996, "learning_rate": 3.3043434343434342e-06, "loss": 6.301968383789062, "step": 34575 }, { "epoch": 0.0448, "grad_norm": 6.738358974456787, "learning_rate": 3.3040909090909097e-06, "loss": 6.3014575958251955, "step": 34580 }, { "epoch": 0.04485, "grad_norm": 7.075225353240967, "learning_rate": 3.3038383838383844e-06, "loss": 6.285932540893555, "step": 34585 }, { "epoch": 0.0449, "grad_norm": 6.355262756347656, "learning_rate": 3.303585858585859e-06, "loss": 6.304890441894531, "step": 34590 }, { "epoch": 0.04495, "grad_norm": 7.817450523376465, "learning_rate": 3.303333333333333e-06, "loss": 6.257619857788086, "step": 34595 }, { "epoch": 0.045, "grad_norm": 7.867593288421631, "learning_rate": 3.3030808080808087e-06, "loss": 6.289083099365234, "step": 34600 }, { "epoch": 0.04505, "grad_norm": 8.933186531066895, "learning_rate": 3.3028282828282833e-06, "loss": 6.311075210571289, "step": 34605 }, { "epoch": 0.0451, "grad_norm": 7.551687717437744, "learning_rate": 3.302575757575758e-06, "loss": 6.489048004150391, "step": 34610 }, { "epoch": 0.04515, "grad_norm": 4.4418511390686035, "learning_rate": 3.3023232323232326e-06, "loss": 6.241167831420898, "step": 34615 }, { "epoch": 0.0452, "grad_norm": 7.764725208282471, "learning_rate": 3.3020707070707076e-06, "loss": 6.272475051879883, "step": 34620 }, { "epoch": 0.04525, "grad_norm": 5.191059112548828, "learning_rate": 3.3018181818181822e-06, "loss": 6.284445190429688, "step": 34625 }, { "epoch": 0.0453, "grad_norm": 6.391307353973389, "learning_rate": 3.301565656565657e-06, "loss": 6.251656723022461, "step": 34630 }, { "epoch": 0.04535, "grad_norm": 5.652439594268799, "learning_rate": 3.3013131313131315e-06, "loss": 6.3121589660644535, "step": 34635 }, { "epoch": 0.0454, "grad_norm": 8.243512153625488, "learning_rate": 3.3010606060606066e-06, "loss": 6.298933410644532, "step": 34640 }, { "epoch": 0.04545, "grad_norm": 7.082751750946045, "learning_rate": 3.300808080808081e-06, "loss": 6.289758682250977, "step": 34645 }, { "epoch": 0.0455, "grad_norm": 6.459993839263916, "learning_rate": 3.300555555555556e-06, "loss": 6.277294921875, "step": 34650 }, { "epoch": 0.04555, "grad_norm": 9.741841316223145, "learning_rate": 3.3003030303030304e-06, "loss": 6.269869995117188, "step": 34655 }, { "epoch": 0.0456, "grad_norm": 9.275556564331055, "learning_rate": 3.3000505050505055e-06, "loss": 6.28051643371582, "step": 34660 }, { "epoch": 0.04565, "grad_norm": 6.940868377685547, "learning_rate": 3.29979797979798e-06, "loss": 6.293081283569336, "step": 34665 }, { "epoch": 0.0457, "grad_norm": 5.493474006652832, "learning_rate": 3.2995454545454548e-06, "loss": 6.2845104217529295, "step": 34670 }, { "epoch": 0.04575, "grad_norm": 4.492004871368408, "learning_rate": 3.2992929292929294e-06, "loss": 6.222401428222656, "step": 34675 }, { "epoch": 0.0458, "grad_norm": 5.246931552886963, "learning_rate": 3.2990404040404044e-06, "loss": 6.275069046020508, "step": 34680 }, { "epoch": 0.04585, "grad_norm": 4.374936103820801, "learning_rate": 3.298787878787879e-06, "loss": 6.300774383544922, "step": 34685 }, { "epoch": 0.0459, "grad_norm": 9.484991073608398, "learning_rate": 3.2985353535353537e-06, "loss": 6.2548870086669925, "step": 34690 }, { "epoch": 0.04595, "grad_norm": 5.631253719329834, "learning_rate": 3.2982828282828283e-06, "loss": 6.180917739868164, "step": 34695 }, { "epoch": 0.046, "grad_norm": 8.674565315246582, "learning_rate": 3.2980303030303034e-06, "loss": 6.271075439453125, "step": 34700 }, { "epoch": 0.04605, "grad_norm": 5.325832843780518, "learning_rate": 3.297777777777778e-06, "loss": 6.256395721435547, "step": 34705 }, { "epoch": 0.0461, "grad_norm": 7.4633636474609375, "learning_rate": 3.2975252525252526e-06, "loss": 6.252257537841797, "step": 34710 }, { "epoch": 0.04615, "grad_norm": 6.290852069854736, "learning_rate": 3.2972727272727273e-06, "loss": 6.288002777099609, "step": 34715 }, { "epoch": 0.0462, "grad_norm": 9.203449249267578, "learning_rate": 3.2970202020202023e-06, "loss": 6.267097473144531, "step": 34720 }, { "epoch": 0.04625, "grad_norm": 6.91786003112793, "learning_rate": 3.296767676767677e-06, "loss": 6.264380645751953, "step": 34725 }, { "epoch": 0.0463, "grad_norm": 4.948250770568848, "learning_rate": 3.2965151515151516e-06, "loss": 6.294237518310547, "step": 34730 }, { "epoch": 0.04635, "grad_norm": 3.9593372344970703, "learning_rate": 3.2962626262626262e-06, "loss": 6.270987319946289, "step": 34735 }, { "epoch": 0.0464, "grad_norm": 4.310776710510254, "learning_rate": 3.2960101010101013e-06, "loss": 6.277498626708985, "step": 34740 }, { "epoch": 0.04645, "grad_norm": 3.705284833908081, "learning_rate": 3.295757575757576e-06, "loss": 6.412985992431641, "step": 34745 }, { "epoch": 0.0465, "grad_norm": 6.526326656341553, "learning_rate": 3.2955050505050505e-06, "loss": 6.321973419189453, "step": 34750 }, { "epoch": 0.04655, "grad_norm": 6.018779277801514, "learning_rate": 3.295252525252525e-06, "loss": 6.235342407226563, "step": 34755 }, { "epoch": 0.0466, "grad_norm": 21.40691375732422, "learning_rate": 3.2950000000000002e-06, "loss": 6.2852783203125, "step": 34760 }, { "epoch": 0.04665, "grad_norm": 6.6330389976501465, "learning_rate": 3.294747474747475e-06, "loss": 7.19727783203125, "step": 34765 }, { "epoch": 0.0467, "grad_norm": 8.7340087890625, "learning_rate": 3.2944949494949495e-06, "loss": 6.278639602661133, "step": 34770 }, { "epoch": 0.04675, "grad_norm": 15.627199172973633, "learning_rate": 3.294242424242424e-06, "loss": 6.363483428955078, "step": 34775 }, { "epoch": 0.0468, "grad_norm": 8.208427429199219, "learning_rate": 3.2939898989898996e-06, "loss": 6.288812637329102, "step": 34780 }, { "epoch": 0.04685, "grad_norm": 5.565245628356934, "learning_rate": 3.2937373737373742e-06, "loss": 6.2667381286621096, "step": 34785 }, { "epoch": 0.0469, "grad_norm": 4.7135162353515625, "learning_rate": 3.2934848484848484e-06, "loss": 6.298993301391602, "step": 34790 }, { "epoch": 0.04695, "grad_norm": 4.540097713470459, "learning_rate": 3.293232323232323e-06, "loss": 6.294288253784179, "step": 34795 }, { "epoch": 0.047, "grad_norm": 24.287010192871094, "learning_rate": 3.2929797979797985e-06, "loss": 6.258221054077149, "step": 34800 }, { "epoch": 0.04705, "grad_norm": 9.320701599121094, "learning_rate": 3.292727272727273e-06, "loss": 6.332061767578125, "step": 34805 }, { "epoch": 0.0471, "grad_norm": 8.871326446533203, "learning_rate": 3.2924747474747478e-06, "loss": 6.256705856323242, "step": 34810 }, { "epoch": 0.04715, "grad_norm": 5.026736259460449, "learning_rate": 3.292222222222223e-06, "loss": 6.2572776794433596, "step": 34815 }, { "epoch": 0.0472, "grad_norm": 8.295275688171387, "learning_rate": 3.2919696969696975e-06, "loss": 6.287162017822266, "step": 34820 }, { "epoch": 0.04725, "grad_norm": 7.350594997406006, "learning_rate": 3.291717171717172e-06, "loss": 6.261262512207031, "step": 34825 }, { "epoch": 0.0473, "grad_norm": 7.532747745513916, "learning_rate": 3.2914646464646467e-06, "loss": 6.311292266845703, "step": 34830 }, { "epoch": 0.04735, "grad_norm": 4.576544761657715, "learning_rate": 3.2912121212121218e-06, "loss": 6.290398025512696, "step": 34835 }, { "epoch": 0.0474, "grad_norm": 10.832393646240234, "learning_rate": 3.2909595959595964e-06, "loss": 6.3363502502441404, "step": 34840 }, { "epoch": 0.04745, "grad_norm": 6.160528659820557, "learning_rate": 3.290707070707071e-06, "loss": 6.238041687011719, "step": 34845 }, { "epoch": 0.0475, "grad_norm": 6.178637504577637, "learning_rate": 3.2904545454545457e-06, "loss": 6.288633728027344, "step": 34850 }, { "epoch": 0.04755, "grad_norm": 4.105836868286133, "learning_rate": 3.2902020202020207e-06, "loss": 6.248758316040039, "step": 34855 }, { "epoch": 0.0476, "grad_norm": 8.171919822692871, "learning_rate": 3.2899494949494954e-06, "loss": 6.271131134033203, "step": 34860 }, { "epoch": 0.04765, "grad_norm": 5.888716697692871, "learning_rate": 3.28969696969697e-06, "loss": 6.271892547607422, "step": 34865 }, { "epoch": 0.0477, "grad_norm": 6.41104793548584, "learning_rate": 3.2894444444444446e-06, "loss": 6.244176864624023, "step": 34870 }, { "epoch": 0.04775, "grad_norm": 6.922689437866211, "learning_rate": 3.2891919191919197e-06, "loss": 6.283294296264648, "step": 34875 }, { "epoch": 0.0478, "grad_norm": 5.373449802398682, "learning_rate": 3.2889393939393943e-06, "loss": 6.309387588500977, "step": 34880 }, { "epoch": 0.04785, "grad_norm": 4.975199222564697, "learning_rate": 3.288686868686869e-06, "loss": 6.296195983886719, "step": 34885 }, { "epoch": 0.0479, "grad_norm": 5.440279483795166, "learning_rate": 3.2884343434343436e-06, "loss": 6.219166564941406, "step": 34890 }, { "epoch": 0.04795, "grad_norm": 5.346248626708984, "learning_rate": 3.2881818181818186e-06, "loss": 6.292256164550781, "step": 34895 }, { "epoch": 0.048, "grad_norm": 16.57040023803711, "learning_rate": 3.2879292929292932e-06, "loss": 6.438202667236328, "step": 34900 }, { "epoch": 0.04805, "grad_norm": 8.363144874572754, "learning_rate": 3.287676767676768e-06, "loss": 6.335869598388672, "step": 34905 }, { "epoch": 0.0481, "grad_norm": 10.708696365356445, "learning_rate": 3.2874242424242425e-06, "loss": 6.459724426269531, "step": 34910 }, { "epoch": 0.04815, "grad_norm": 8.479935646057129, "learning_rate": 3.2871717171717176e-06, "loss": 6.282632064819336, "step": 34915 }, { "epoch": 0.0482, "grad_norm": 5.50937557220459, "learning_rate": 3.286919191919192e-06, "loss": 6.299241256713867, "step": 34920 }, { "epoch": 0.04825, "grad_norm": 5.580682277679443, "learning_rate": 3.286666666666667e-06, "loss": 6.356058120727539, "step": 34925 }, { "epoch": 0.0483, "grad_norm": 9.991703033447266, "learning_rate": 3.2864141414141415e-06, "loss": 6.277548599243164, "step": 34930 }, { "epoch": 0.04835, "grad_norm": 13.065298080444336, "learning_rate": 3.2861616161616165e-06, "loss": 6.165322875976562, "step": 34935 }, { "epoch": 0.0484, "grad_norm": 5.021111488342285, "learning_rate": 3.285909090909091e-06, "loss": 6.277559661865235, "step": 34940 }, { "epoch": 0.04845, "grad_norm": 5.548837184906006, "learning_rate": 3.2856565656565658e-06, "loss": 6.312124633789063, "step": 34945 }, { "epoch": 0.0485, "grad_norm": 6.6719560623168945, "learning_rate": 3.2854040404040404e-06, "loss": 6.289908218383789, "step": 34950 }, { "epoch": 0.04855, "grad_norm": 5.518957614898682, "learning_rate": 3.2851515151515154e-06, "loss": 6.258171081542969, "step": 34955 }, { "epoch": 0.0486, "grad_norm": 6.432694911956787, "learning_rate": 3.28489898989899e-06, "loss": 6.308834457397461, "step": 34960 }, { "epoch": 0.04865, "grad_norm": 4.614040374755859, "learning_rate": 3.2846464646464647e-06, "loss": 6.342221069335937, "step": 34965 }, { "epoch": 0.0487, "grad_norm": 9.187679290771484, "learning_rate": 3.2843939393939393e-06, "loss": 6.41964111328125, "step": 34970 }, { "epoch": 0.04875, "grad_norm": 4.949752330780029, "learning_rate": 3.284141414141415e-06, "loss": 6.298169326782227, "step": 34975 }, { "epoch": 0.0488, "grad_norm": 6.344938278198242, "learning_rate": 3.2838888888888894e-06, "loss": 6.240311813354492, "step": 34980 }, { "epoch": 0.04885, "grad_norm": 6.389272689819336, "learning_rate": 3.2836363636363637e-06, "loss": 6.296944046020508, "step": 34985 }, { "epoch": 0.0489, "grad_norm": 4.36979866027832, "learning_rate": 3.2833838383838383e-06, "loss": 6.28919563293457, "step": 34990 }, { "epoch": 0.04895, "grad_norm": 4.547360897064209, "learning_rate": 3.2831313131313138e-06, "loss": 6.301368331909179, "step": 34995 }, { "epoch": 0.049, "grad_norm": 15.396032333374023, "learning_rate": 3.2828787878787884e-06, "loss": 6.267964553833008, "step": 35000 }, { "epoch": 0.04905, "grad_norm": 5.424217224121094, "learning_rate": 3.282626262626263e-06, "loss": 6.285940933227539, "step": 35005 }, { "epoch": 0.0491, "grad_norm": 6.066476821899414, "learning_rate": 3.2823737373737372e-06, "loss": 6.276809310913086, "step": 35010 }, { "epoch": 0.04915, "grad_norm": 5.129293918609619, "learning_rate": 3.2821212121212127e-06, "loss": 6.256174087524414, "step": 35015 }, { "epoch": 0.0492, "grad_norm": 7.298128128051758, "learning_rate": 3.2818686868686873e-06, "loss": 6.264319229125976, "step": 35020 }, { "epoch": 0.04925, "grad_norm": 4.629802227020264, "learning_rate": 3.281616161616162e-06, "loss": 6.250748062133789, "step": 35025 }, { "epoch": 0.0493, "grad_norm": 6.018641471862793, "learning_rate": 3.2813636363636366e-06, "loss": 6.285256958007812, "step": 35030 }, { "epoch": 0.04935, "grad_norm": 7.293976306915283, "learning_rate": 3.2811111111111116e-06, "loss": 6.273014831542969, "step": 35035 }, { "epoch": 0.0494, "grad_norm": 7.021273612976074, "learning_rate": 3.2808585858585863e-06, "loss": 6.2677467346191404, "step": 35040 }, { "epoch": 0.04945, "grad_norm": 7.875330448150635, "learning_rate": 3.280606060606061e-06, "loss": 6.29931411743164, "step": 35045 }, { "epoch": 0.0495, "grad_norm": 21.023818969726562, "learning_rate": 3.2803535353535355e-06, "loss": 6.427122497558594, "step": 35050 }, { "epoch": 0.04955, "grad_norm": 7.667167663574219, "learning_rate": 3.2801010101010106e-06, "loss": 6.317515182495117, "step": 35055 }, { "epoch": 0.0496, "grad_norm": 25.968650817871094, "learning_rate": 3.2798484848484852e-06, "loss": 6.82774658203125, "step": 35060 }, { "epoch": 0.04965, "grad_norm": 12.805072784423828, "learning_rate": 3.27959595959596e-06, "loss": 6.866229248046875, "step": 35065 }, { "epoch": 0.0497, "grad_norm": 4.353940486907959, "learning_rate": 3.2793434343434345e-06, "loss": 6.284849548339844, "step": 35070 }, { "epoch": 0.04975, "grad_norm": 9.325813293457031, "learning_rate": 3.2790909090909095e-06, "loss": 6.434210968017578, "step": 35075 }, { "epoch": 0.0498, "grad_norm": 5.88203763961792, "learning_rate": 3.278838383838384e-06, "loss": 6.215633392333984, "step": 35080 }, { "epoch": 0.04985, "grad_norm": 10.095976829528809, "learning_rate": 3.278585858585859e-06, "loss": 6.323418045043946, "step": 35085 }, { "epoch": 0.0499, "grad_norm": 6.309284687042236, "learning_rate": 3.2783333333333334e-06, "loss": 6.318337249755859, "step": 35090 }, { "epoch": 0.04995, "grad_norm": 7.243902683258057, "learning_rate": 3.2780808080808085e-06, "loss": 6.272135543823242, "step": 35095 }, { "epoch": 0.05, "grad_norm": 5.851275444030762, "learning_rate": 3.277828282828283e-06, "loss": 6.332157897949219, "step": 35100 }, { "epoch": 0.05005, "grad_norm": 4.588741779327393, "learning_rate": 3.2775757575757577e-06, "loss": 6.310874938964844, "step": 35105 }, { "epoch": 0.0501, "grad_norm": 6.159008502960205, "learning_rate": 3.2773232323232324e-06, "loss": 6.26396369934082, "step": 35110 }, { "epoch": 0.05015, "grad_norm": 6.673763751983643, "learning_rate": 3.2770707070707074e-06, "loss": 6.280423736572265, "step": 35115 }, { "epoch": 0.0502, "grad_norm": 4.924130916595459, "learning_rate": 3.276818181818182e-06, "loss": 6.2667381286621096, "step": 35120 }, { "epoch": 0.05025, "grad_norm": 5.175667762756348, "learning_rate": 3.2765656565656567e-06, "loss": 6.275269317626953, "step": 35125 }, { "epoch": 0.0503, "grad_norm": 5.337789535522461, "learning_rate": 3.2763131313131313e-06, "loss": 6.282649230957031, "step": 35130 }, { "epoch": 0.05035, "grad_norm": 9.778614044189453, "learning_rate": 3.2760606060606064e-06, "loss": 6.256710052490234, "step": 35135 }, { "epoch": 0.0504, "grad_norm": 7.157259464263916, "learning_rate": 3.275808080808081e-06, "loss": 6.268456268310547, "step": 35140 }, { "epoch": 0.05045, "grad_norm": 7.5348687171936035, "learning_rate": 3.2755555555555556e-06, "loss": 6.267140960693359, "step": 35145 }, { "epoch": 0.0505, "grad_norm": 3.803321599960327, "learning_rate": 3.2753030303030303e-06, "loss": 6.280174636840821, "step": 35150 }, { "epoch": 0.05055, "grad_norm": 10.501906394958496, "learning_rate": 3.2750505050505053e-06, "loss": 6.292560195922851, "step": 35155 }, { "epoch": 0.0506, "grad_norm": 13.511707305908203, "learning_rate": 3.27479797979798e-06, "loss": 6.276032257080078, "step": 35160 }, { "epoch": 0.05065, "grad_norm": 5.1371002197265625, "learning_rate": 3.2745454545454546e-06, "loss": 6.306236267089844, "step": 35165 }, { "epoch": 0.0507, "grad_norm": 6.77003812789917, "learning_rate": 3.274292929292929e-06, "loss": 6.387910461425781, "step": 35170 }, { "epoch": 0.05075, "grad_norm": 4.099214553833008, "learning_rate": 3.2740404040404043e-06, "loss": 6.196070861816406, "step": 35175 }, { "epoch": 0.0508, "grad_norm": 5.801113605499268, "learning_rate": 3.273787878787879e-06, "loss": 6.476727294921875, "step": 35180 }, { "epoch": 0.05085, "grad_norm": 5.65481424331665, "learning_rate": 3.2735353535353535e-06, "loss": 6.270328140258789, "step": 35185 }, { "epoch": 0.0509, "grad_norm": 6.214885234832764, "learning_rate": 3.273282828282828e-06, "loss": 6.294662857055664, "step": 35190 }, { "epoch": 0.05095, "grad_norm": 6.880329132080078, "learning_rate": 3.2730303030303036e-06, "loss": 6.288825225830078, "step": 35195 }, { "epoch": 0.051, "grad_norm": 5.270611763000488, "learning_rate": 3.2727777777777783e-06, "loss": 6.29886474609375, "step": 35200 }, { "epoch": 0.05105, "grad_norm": 7.678526878356934, "learning_rate": 3.2725252525252525e-06, "loss": 6.270150756835937, "step": 35205 }, { "epoch": 0.0511, "grad_norm": 9.149025917053223, "learning_rate": 3.272272727272727e-06, "loss": 6.273993682861328, "step": 35210 }, { "epoch": 0.05115, "grad_norm": 12.453163146972656, "learning_rate": 3.2720202020202026e-06, "loss": 6.337948608398437, "step": 35215 }, { "epoch": 0.0512, "grad_norm": 6.77817964553833, "learning_rate": 3.271767676767677e-06, "loss": 6.334602355957031, "step": 35220 }, { "epoch": 0.05125, "grad_norm": 7.45134973526001, "learning_rate": 3.271515151515152e-06, "loss": 6.375101089477539, "step": 35225 }, { "epoch": 0.0513, "grad_norm": 10.077485084533691, "learning_rate": 3.271262626262627e-06, "loss": 6.265316390991211, "step": 35230 }, { "epoch": 0.05135, "grad_norm": 10.97733211517334, "learning_rate": 3.2710101010101015e-06, "loss": 6.290368270874024, "step": 35235 }, { "epoch": 0.0514, "grad_norm": 9.000105857849121, "learning_rate": 3.270757575757576e-06, "loss": 6.266787338256836, "step": 35240 }, { "epoch": 0.05145, "grad_norm": 7.792247295379639, "learning_rate": 3.2705050505050508e-06, "loss": 6.199068069458008, "step": 35245 }, { "epoch": 0.0515, "grad_norm": 7.313779830932617, "learning_rate": 3.270252525252526e-06, "loss": 6.317647933959961, "step": 35250 }, { "epoch": 0.05155, "grad_norm": 6.977554798126221, "learning_rate": 3.2700000000000005e-06, "loss": 6.292848205566406, "step": 35255 }, { "epoch": 0.0516, "grad_norm": 5.087864398956299, "learning_rate": 3.269747474747475e-06, "loss": 6.252903747558594, "step": 35260 }, { "epoch": 0.05165, "grad_norm": 6.423875331878662, "learning_rate": 3.2694949494949497e-06, "loss": 6.247765350341797, "step": 35265 }, { "epoch": 0.0517, "grad_norm": 6.133201599121094, "learning_rate": 3.2692424242424248e-06, "loss": 6.251560974121094, "step": 35270 }, { "epoch": 0.05175, "grad_norm": 11.473453521728516, "learning_rate": 3.2689898989898994e-06, "loss": 6.2307075500488285, "step": 35275 }, { "epoch": 0.0518, "grad_norm": 4.920134544372559, "learning_rate": 3.268737373737374e-06, "loss": 6.302825546264648, "step": 35280 }, { "epoch": 0.05185, "grad_norm": 14.104022026062012, "learning_rate": 3.2684848484848487e-06, "loss": 6.3282920837402346, "step": 35285 }, { "epoch": 0.0519, "grad_norm": 3.6653671264648438, "learning_rate": 3.2682323232323237e-06, "loss": 6.556161499023437, "step": 35290 }, { "epoch": 0.05195, "grad_norm": 6.94016695022583, "learning_rate": 3.2679797979797983e-06, "loss": 6.245934677124024, "step": 35295 }, { "epoch": 0.052, "grad_norm": 6.227659225463867, "learning_rate": 3.267727272727273e-06, "loss": 6.295278930664063, "step": 35300 }, { "epoch": 0.05205, "grad_norm": 4.71866512298584, "learning_rate": 3.2674747474747476e-06, "loss": 6.246229934692383, "step": 35305 }, { "epoch": 0.0521, "grad_norm": 4.561375617980957, "learning_rate": 3.2672222222222227e-06, "loss": 6.366829299926758, "step": 35310 }, { "epoch": 0.05215, "grad_norm": 5.159406661987305, "learning_rate": 3.2669696969696973e-06, "loss": 6.237774658203125, "step": 35315 }, { "epoch": 0.0522, "grad_norm": 8.701210021972656, "learning_rate": 3.266717171717172e-06, "loss": 6.306728744506836, "step": 35320 }, { "epoch": 0.05225, "grad_norm": 10.100695610046387, "learning_rate": 3.2664646464646465e-06, "loss": 6.247107696533203, "step": 35325 }, { "epoch": 0.0523, "grad_norm": 9.718118667602539, "learning_rate": 3.2662121212121216e-06, "loss": 6.255717849731445, "step": 35330 }, { "epoch": 0.05235, "grad_norm": 8.896659851074219, "learning_rate": 3.2659595959595962e-06, "loss": 6.278120422363282, "step": 35335 }, { "epoch": 0.0524, "grad_norm": 6.090173721313477, "learning_rate": 3.265707070707071e-06, "loss": 6.2666973114013675, "step": 35340 }, { "epoch": 0.05245, "grad_norm": 5.044219970703125, "learning_rate": 3.2654545454545455e-06, "loss": 6.256593322753906, "step": 35345 }, { "epoch": 0.0525, "grad_norm": 6.545598983764648, "learning_rate": 3.2652020202020205e-06, "loss": 6.300411224365234, "step": 35350 }, { "epoch": 0.05255, "grad_norm": 7.838217258453369, "learning_rate": 3.264949494949495e-06, "loss": 6.3089244842529295, "step": 35355 }, { "epoch": 0.0526, "grad_norm": 7.522068977355957, "learning_rate": 3.26469696969697e-06, "loss": 6.236774444580078, "step": 35360 }, { "epoch": 0.05265, "grad_norm": 5.815244674682617, "learning_rate": 3.2644444444444444e-06, "loss": 6.426654052734375, "step": 35365 }, { "epoch": 0.0527, "grad_norm": 8.679105758666992, "learning_rate": 3.2641919191919195e-06, "loss": 6.304439544677734, "step": 35370 }, { "epoch": 0.05275, "grad_norm": 9.651296615600586, "learning_rate": 3.263939393939394e-06, "loss": 6.236795043945312, "step": 35375 }, { "epoch": 0.0528, "grad_norm": 6.470859527587891, "learning_rate": 3.2636868686868687e-06, "loss": 6.262895965576172, "step": 35380 }, { "epoch": 0.05285, "grad_norm": 8.24250316619873, "learning_rate": 3.2634343434343434e-06, "loss": 6.272414779663086, "step": 35385 }, { "epoch": 0.0529, "grad_norm": 7.0517706871032715, "learning_rate": 3.263181818181819e-06, "loss": 6.269016265869141, "step": 35390 }, { "epoch": 0.05295, "grad_norm": 8.443787574768066, "learning_rate": 3.2629292929292935e-06, "loss": 6.29283332824707, "step": 35395 }, { "epoch": 0.053, "grad_norm": 4.77041482925415, "learning_rate": 3.2626767676767677e-06, "loss": 6.284092712402344, "step": 35400 }, { "epoch": 0.05305, "grad_norm": 5.518260478973389, "learning_rate": 3.2624242424242423e-06, "loss": 6.246273040771484, "step": 35405 }, { "epoch": 0.0531, "grad_norm": 7.27074670791626, "learning_rate": 3.262171717171718e-06, "loss": 6.262240219116211, "step": 35410 }, { "epoch": 0.05315, "grad_norm": 7.116097450256348, "learning_rate": 3.2619191919191924e-06, "loss": 6.244412612915039, "step": 35415 }, { "epoch": 0.0532, "grad_norm": 5.62921142578125, "learning_rate": 3.261666666666667e-06, "loss": 6.31261978149414, "step": 35420 }, { "epoch": 0.05325, "grad_norm": 5.603243350982666, "learning_rate": 3.2614141414141413e-06, "loss": 6.266482543945313, "step": 35425 }, { "epoch": 0.0533, "grad_norm": 6.649318695068359, "learning_rate": 3.2611616161616167e-06, "loss": 6.263259506225586, "step": 35430 }, { "epoch": 0.05335, "grad_norm": 8.162367820739746, "learning_rate": 3.2609090909090914e-06, "loss": 6.3018028259277346, "step": 35435 }, { "epoch": 0.0534, "grad_norm": 6.412289142608643, "learning_rate": 3.260656565656566e-06, "loss": 6.271790313720703, "step": 35440 }, { "epoch": 0.05345, "grad_norm": 53.48617935180664, "learning_rate": 3.2604040404040406e-06, "loss": 6.910607147216797, "step": 35445 }, { "epoch": 0.0535, "grad_norm": 7.840726852416992, "learning_rate": 3.2601515151515157e-06, "loss": 6.682980346679687, "step": 35450 }, { "epoch": 0.05355, "grad_norm": 5.839212417602539, "learning_rate": 3.2598989898989903e-06, "loss": 6.290212631225586, "step": 35455 }, { "epoch": 0.0536, "grad_norm": 8.730506896972656, "learning_rate": 3.259646464646465e-06, "loss": 6.205363845825195, "step": 35460 }, { "epoch": 0.05365, "grad_norm": 5.301448822021484, "learning_rate": 3.2593939393939396e-06, "loss": 6.2750701904296875, "step": 35465 }, { "epoch": 0.0537, "grad_norm": 9.380294799804688, "learning_rate": 3.2591414141414146e-06, "loss": 6.27130241394043, "step": 35470 }, { "epoch": 0.05375, "grad_norm": 5.596490383148193, "learning_rate": 3.2588888888888893e-06, "loss": 6.347657012939453, "step": 35475 }, { "epoch": 0.0538, "grad_norm": 8.998419761657715, "learning_rate": 3.258636363636364e-06, "loss": 6.263415908813476, "step": 35480 }, { "epoch": 0.05385, "grad_norm": 4.243162155151367, "learning_rate": 3.2583838383838385e-06, "loss": 6.275699234008789, "step": 35485 }, { "epoch": 0.0539, "grad_norm": 5.2408576011657715, "learning_rate": 3.2581313131313136e-06, "loss": 6.295782089233398, "step": 35490 }, { "epoch": 0.05395, "grad_norm": 6.622154712677002, "learning_rate": 3.257878787878788e-06, "loss": 6.262670516967773, "step": 35495 }, { "epoch": 0.054, "grad_norm": 7.402721881866455, "learning_rate": 3.257626262626263e-06, "loss": 6.251801300048828, "step": 35500 }, { "epoch": 0.05405, "grad_norm": 5.092800140380859, "learning_rate": 3.2573737373737375e-06, "loss": 6.276590347290039, "step": 35505 }, { "epoch": 0.0541, "grad_norm": 24.962053298950195, "learning_rate": 3.2571212121212125e-06, "loss": 6.481658172607422, "step": 35510 }, { "epoch": 0.05415, "grad_norm": 6.749709606170654, "learning_rate": 3.256868686868687e-06, "loss": 6.305655288696289, "step": 35515 }, { "epoch": 0.0542, "grad_norm": 5.051708221435547, "learning_rate": 3.2566161616161618e-06, "loss": 6.287960815429687, "step": 35520 }, { "epoch": 0.05425, "grad_norm": 5.226186275482178, "learning_rate": 3.2563636363636364e-06, "loss": 6.2943359375, "step": 35525 }, { "epoch": 0.0543, "grad_norm": 7.103726863861084, "learning_rate": 3.2561111111111115e-06, "loss": 6.235382080078125, "step": 35530 }, { "epoch": 0.05435, "grad_norm": 4.971731185913086, "learning_rate": 3.255858585858586e-06, "loss": 6.3156383514404295, "step": 35535 }, { "epoch": 0.0544, "grad_norm": 6.036776065826416, "learning_rate": 3.2556060606060607e-06, "loss": 6.339959716796875, "step": 35540 }, { "epoch": 0.05445, "grad_norm": 9.980279922485352, "learning_rate": 3.2553535353535354e-06, "loss": 6.251197433471679, "step": 35545 }, { "epoch": 0.0545, "grad_norm": 9.433897018432617, "learning_rate": 3.2551010101010104e-06, "loss": 6.248058319091797, "step": 35550 }, { "epoch": 0.05455, "grad_norm": 5.618668079376221, "learning_rate": 3.254848484848485e-06, "loss": 6.266307830810547, "step": 35555 }, { "epoch": 0.0546, "grad_norm": 9.41104507446289, "learning_rate": 3.2545959595959597e-06, "loss": 6.305927276611328, "step": 35560 }, { "epoch": 0.05465, "grad_norm": 6.8511247634887695, "learning_rate": 3.2543434343434343e-06, "loss": 6.2869609832763675, "step": 35565 }, { "epoch": 0.0547, "grad_norm": 9.258623123168945, "learning_rate": 3.2540909090909094e-06, "loss": 6.341587829589844, "step": 35570 }, { "epoch": 0.05475, "grad_norm": 5.560075283050537, "learning_rate": 3.253838383838384e-06, "loss": 6.28405532836914, "step": 35575 }, { "epoch": 0.0548, "grad_norm": 4.908081531524658, "learning_rate": 3.2535858585858586e-06, "loss": 6.322003936767578, "step": 35580 }, { "epoch": 0.05485, "grad_norm": 7.400416851043701, "learning_rate": 3.2533333333333332e-06, "loss": 6.304544448852539, "step": 35585 }, { "epoch": 0.0549, "grad_norm": 5.350142955780029, "learning_rate": 3.2530808080808083e-06, "loss": 6.28719482421875, "step": 35590 }, { "epoch": 0.05495, "grad_norm": 4.149595737457275, "learning_rate": 3.252828282828283e-06, "loss": 6.266949462890625, "step": 35595 }, { "epoch": 0.055, "grad_norm": 6.120441436767578, "learning_rate": 3.2525757575757576e-06, "loss": 6.2905021667480465, "step": 35600 }, { "epoch": 0.05505, "grad_norm": 9.76497745513916, "learning_rate": 3.252323232323232e-06, "loss": 6.297564315795898, "step": 35605 }, { "epoch": 0.0551, "grad_norm": 8.089164733886719, "learning_rate": 3.2520707070707077e-06, "loss": 6.2884162902832035, "step": 35610 }, { "epoch": 0.05515, "grad_norm": 4.703613758087158, "learning_rate": 3.2518181818181823e-06, "loss": 6.261649703979492, "step": 35615 }, { "epoch": 0.0552, "grad_norm": 6.085631370544434, "learning_rate": 3.2515656565656565e-06, "loss": 6.289752197265625, "step": 35620 }, { "epoch": 0.05525, "grad_norm": 9.445321083068848, "learning_rate": 3.251313131313131e-06, "loss": 6.363951873779297, "step": 35625 }, { "epoch": 0.0553, "grad_norm": 5.525025367736816, "learning_rate": 3.2510606060606066e-06, "loss": 6.2696586608886715, "step": 35630 }, { "epoch": 0.05535, "grad_norm": 18.950674057006836, "learning_rate": 3.2508080808080812e-06, "loss": 6.398841476440429, "step": 35635 }, { "epoch": 0.0554, "grad_norm": 5.078076362609863, "learning_rate": 3.250555555555556e-06, "loss": 6.348239517211914, "step": 35640 }, { "epoch": 0.05545, "grad_norm": 5.694879531860352, "learning_rate": 3.2503030303030305e-06, "loss": 6.240224075317383, "step": 35645 }, { "epoch": 0.0555, "grad_norm": 5.201033115386963, "learning_rate": 3.2500505050505056e-06, "loss": 6.298657989501953, "step": 35650 }, { "epoch": 0.05555, "grad_norm": 5.783593654632568, "learning_rate": 3.24979797979798e-06, "loss": 6.256073760986328, "step": 35655 }, { "epoch": 0.0556, "grad_norm": 8.388336181640625, "learning_rate": 3.249545454545455e-06, "loss": 6.29564208984375, "step": 35660 }, { "epoch": 0.05565, "grad_norm": 10.50983715057373, "learning_rate": 3.24929292929293e-06, "loss": 6.2493438720703125, "step": 35665 }, { "epoch": 0.0557, "grad_norm": 249.3787384033203, "learning_rate": 3.2490404040404045e-06, "loss": 8.048028564453125, "step": 35670 }, { "epoch": 0.05575, "grad_norm": 6.205459117889404, "learning_rate": 3.248787878787879e-06, "loss": 7.072125244140625, "step": 35675 }, { "epoch": 0.0558, "grad_norm": 7.880497932434082, "learning_rate": 3.2485353535353538e-06, "loss": 6.279197692871094, "step": 35680 }, { "epoch": 0.05585, "grad_norm": 8.75843620300293, "learning_rate": 3.248282828282829e-06, "loss": 6.226631927490234, "step": 35685 }, { "epoch": 0.0559, "grad_norm": 10.81594181060791, "learning_rate": 3.2480303030303034e-06, "loss": 6.252440643310547, "step": 35690 }, { "epoch": 0.05595, "grad_norm": 5.869147300720215, "learning_rate": 3.247777777777778e-06, "loss": 6.264157104492187, "step": 35695 }, { "epoch": 0.056, "grad_norm": 7.084537982940674, "learning_rate": 3.2475252525252527e-06, "loss": 6.278627014160156, "step": 35700 }, { "epoch": 0.05605, "grad_norm": 4.703330993652344, "learning_rate": 3.2472727272727278e-06, "loss": 6.293717575073242, "step": 35705 }, { "epoch": 0.0561, "grad_norm": 4.8099284172058105, "learning_rate": 3.2470202020202024e-06, "loss": 6.323994445800781, "step": 35710 }, { "epoch": 0.05615, "grad_norm": 5.613970756530762, "learning_rate": 3.246767676767677e-06, "loss": 6.262202453613281, "step": 35715 }, { "epoch": 0.0562, "grad_norm": 5.7324652671813965, "learning_rate": 3.2465151515151516e-06, "loss": 6.314483261108398, "step": 35720 }, { "epoch": 0.05625, "grad_norm": 5.6442084312438965, "learning_rate": 3.2462626262626267e-06, "loss": 6.318102264404297, "step": 35725 }, { "epoch": 0.0563, "grad_norm": 5.27984619140625, "learning_rate": 3.2460101010101013e-06, "loss": 6.252370071411133, "step": 35730 }, { "epoch": 0.05635, "grad_norm": 37.10020065307617, "learning_rate": 3.245757575757576e-06, "loss": 6.186188507080078, "step": 35735 }, { "epoch": 0.0564, "grad_norm": 5.766847133636475, "learning_rate": 3.2455050505050506e-06, "loss": 6.302840805053711, "step": 35740 }, { "epoch": 0.05645, "grad_norm": 7.287111282348633, "learning_rate": 3.2452525252525256e-06, "loss": 6.29181137084961, "step": 35745 }, { "epoch": 0.0565, "grad_norm": 5.580541610717773, "learning_rate": 3.2450000000000003e-06, "loss": 6.336334228515625, "step": 35750 }, { "epoch": 0.05655, "grad_norm": 9.409850120544434, "learning_rate": 3.244747474747475e-06, "loss": 6.381145477294922, "step": 35755 }, { "epoch": 0.0566, "grad_norm": 6.440433979034424, "learning_rate": 3.2444949494949495e-06, "loss": 6.201219940185547, "step": 35760 }, { "epoch": 0.05665, "grad_norm": 17.87500762939453, "learning_rate": 3.2442424242424246e-06, "loss": 6.879973602294922, "step": 35765 }, { "epoch": 0.0567, "grad_norm": 7.7824296951293945, "learning_rate": 3.2439898989898992e-06, "loss": 6.314643096923828, "step": 35770 }, { "epoch": 0.05675, "grad_norm": 7.30472993850708, "learning_rate": 3.243737373737374e-06, "loss": 6.297591018676758, "step": 35775 }, { "epoch": 0.0568, "grad_norm": 4.307924747467041, "learning_rate": 3.2434848484848485e-06, "loss": 6.2660400390625, "step": 35780 }, { "epoch": 0.05685, "grad_norm": 7.007259845733643, "learning_rate": 3.2432323232323235e-06, "loss": 6.218526077270508, "step": 35785 }, { "epoch": 0.0569, "grad_norm": 3.50826358795166, "learning_rate": 3.242979797979798e-06, "loss": 6.420111083984375, "step": 35790 }, { "epoch": 0.05695, "grad_norm": 5.910391330718994, "learning_rate": 3.242727272727273e-06, "loss": 6.290824127197266, "step": 35795 }, { "epoch": 0.057, "grad_norm": 6.224939823150635, "learning_rate": 3.2424747474747474e-06, "loss": 6.270291900634765, "step": 35800 }, { "epoch": 0.05705, "grad_norm": 6.197885036468506, "learning_rate": 3.242222222222223e-06, "loss": 6.217855453491211, "step": 35805 }, { "epoch": 0.0571, "grad_norm": 6.088695526123047, "learning_rate": 3.2419696969696975e-06, "loss": 6.283650207519531, "step": 35810 }, { "epoch": 0.05715, "grad_norm": 5.695998191833496, "learning_rate": 3.2417171717171717e-06, "loss": 6.252125930786133, "step": 35815 }, { "epoch": 0.0572, "grad_norm": 4.714662075042725, "learning_rate": 3.2414646464646464e-06, "loss": 6.212126159667969, "step": 35820 }, { "epoch": 0.05725, "grad_norm": 7.113203048706055, "learning_rate": 3.241212121212122e-06, "loss": 6.2817230224609375, "step": 35825 }, { "epoch": 0.0573, "grad_norm": 6.504984378814697, "learning_rate": 3.2409595959595965e-06, "loss": 6.263227081298828, "step": 35830 }, { "epoch": 0.05735, "grad_norm": 4.78233003616333, "learning_rate": 3.240707070707071e-06, "loss": 6.304944610595703, "step": 35835 }, { "epoch": 0.0574, "grad_norm": 12.624735832214355, "learning_rate": 3.2404545454545457e-06, "loss": 6.318175506591797, "step": 35840 }, { "epoch": 0.05745, "grad_norm": 6.1849045753479, "learning_rate": 3.2402020202020208e-06, "loss": 6.3728282928466795, "step": 35845 }, { "epoch": 0.0575, "grad_norm": 6.498283386230469, "learning_rate": 3.2399494949494954e-06, "loss": 6.259939193725586, "step": 35850 }, { "epoch": 0.05755, "grad_norm": 5.609084129333496, "learning_rate": 3.23969696969697e-06, "loss": 6.288790512084961, "step": 35855 }, { "epoch": 0.0576, "grad_norm": 4.448870658874512, "learning_rate": 3.2394444444444447e-06, "loss": 6.30543212890625, "step": 35860 }, { "epoch": 0.05765, "grad_norm": 6.952431678771973, "learning_rate": 3.2391919191919197e-06, "loss": 6.283628845214844, "step": 35865 }, { "epoch": 0.0577, "grad_norm": 5.7677154541015625, "learning_rate": 3.2389393939393944e-06, "loss": 6.259416580200195, "step": 35870 }, { "epoch": 0.05775, "grad_norm": 6.332040786743164, "learning_rate": 3.238686868686869e-06, "loss": 6.211784744262696, "step": 35875 }, { "epoch": 0.0578, "grad_norm": 4.846890449523926, "learning_rate": 3.2384343434343436e-06, "loss": 6.205535888671875, "step": 35880 }, { "epoch": 0.05785, "grad_norm": 6.803906440734863, "learning_rate": 3.2381818181818187e-06, "loss": 6.238606262207031, "step": 35885 }, { "epoch": 0.0579, "grad_norm": 5.141147136688232, "learning_rate": 3.2379292929292933e-06, "loss": 6.30022964477539, "step": 35890 }, { "epoch": 0.05795, "grad_norm": 6.466691970825195, "learning_rate": 3.237676767676768e-06, "loss": 6.2699134826660154, "step": 35895 }, { "epoch": 0.058, "grad_norm": 4.832157135009766, "learning_rate": 3.2374242424242426e-06, "loss": 6.346537399291992, "step": 35900 }, { "epoch": 0.05805, "grad_norm": 5.0135087966918945, "learning_rate": 3.2371717171717176e-06, "loss": 6.241169738769531, "step": 35905 }, { "epoch": 0.0581, "grad_norm": 6.855318069458008, "learning_rate": 3.2369191919191922e-06, "loss": 6.27856216430664, "step": 35910 }, { "epoch": 0.05815, "grad_norm": 4.944605827331543, "learning_rate": 3.236666666666667e-06, "loss": 6.274566650390625, "step": 35915 }, { "epoch": 0.0582, "grad_norm": 4.67616081237793, "learning_rate": 3.2364141414141415e-06, "loss": 6.278418350219726, "step": 35920 }, { "epoch": 0.05825, "grad_norm": 5.527277946472168, "learning_rate": 3.2361616161616166e-06, "loss": 6.279583740234375, "step": 35925 }, { "epoch": 0.0583, "grad_norm": 4.397896766662598, "learning_rate": 3.235909090909091e-06, "loss": 6.259646987915039, "step": 35930 }, { "epoch": 0.05835, "grad_norm": 8.066814422607422, "learning_rate": 3.235656565656566e-06, "loss": 6.285501098632812, "step": 35935 }, { "epoch": 0.0584, "grad_norm": 4.331371784210205, "learning_rate": 3.2354040404040405e-06, "loss": 6.293682479858399, "step": 35940 }, { "epoch": 0.05845, "grad_norm": 3.570474624633789, "learning_rate": 3.2351515151515155e-06, "loss": 6.2079521179199215, "step": 35945 }, { "epoch": 0.0585, "grad_norm": 6.134387969970703, "learning_rate": 3.23489898989899e-06, "loss": 6.276849365234375, "step": 35950 }, { "epoch": 0.05855, "grad_norm": 5.413759708404541, "learning_rate": 3.2346464646464648e-06, "loss": 6.315478515625, "step": 35955 }, { "epoch": 0.0586, "grad_norm": 5.291648864746094, "learning_rate": 3.2343939393939394e-06, "loss": 6.238240814208984, "step": 35960 }, { "epoch": 0.05865, "grad_norm": 5.8856282234191895, "learning_rate": 3.2341414141414144e-06, "loss": 6.291497421264649, "step": 35965 }, { "epoch": 0.0587, "grad_norm": 15.721312522888184, "learning_rate": 3.233888888888889e-06, "loss": 6.323081207275391, "step": 35970 }, { "epoch": 0.05875, "grad_norm": 4.041512966156006, "learning_rate": 3.2336363636363637e-06, "loss": 6.247463989257812, "step": 35975 }, { "epoch": 0.0588, "grad_norm": 6.351573944091797, "learning_rate": 3.2333838383838383e-06, "loss": 6.304759979248047, "step": 35980 }, { "epoch": 0.05885, "grad_norm": 5.904655456542969, "learning_rate": 3.2331313131313134e-06, "loss": 6.29975700378418, "step": 35985 }, { "epoch": 0.0589, "grad_norm": 5.130357265472412, "learning_rate": 3.232878787878788e-06, "loss": 6.303535079956054, "step": 35990 }, { "epoch": 0.05895, "grad_norm": 6.26955509185791, "learning_rate": 3.2326262626262627e-06, "loss": 6.2895957946777346, "step": 35995 }, { "epoch": 0.059, "grad_norm": 7.535348415374756, "learning_rate": 3.2323737373737373e-06, "loss": 6.313241577148437, "step": 36000 }, { "epoch": 0.05905, "grad_norm": 12.712775230407715, "learning_rate": 3.2321212121212128e-06, "loss": 6.263056945800781, "step": 36005 }, { "epoch": 0.0591, "grad_norm": 7.676797389984131, "learning_rate": 3.231868686868687e-06, "loss": 6.305780029296875, "step": 36010 }, { "epoch": 0.05915, "grad_norm": 12.023932456970215, "learning_rate": 3.2316161616161616e-06, "loss": 6.267766952514648, "step": 36015 }, { "epoch": 0.0592, "grad_norm": 9.03357219696045, "learning_rate": 3.2313636363636362e-06, "loss": 6.352530670166016, "step": 36020 }, { "epoch": 0.05925, "grad_norm": 8.658413887023926, "learning_rate": 3.2311111111111117e-06, "loss": 6.235383987426758, "step": 36025 }, { "epoch": 0.0593, "grad_norm": 4.692612648010254, "learning_rate": 3.2308585858585863e-06, "loss": 6.290262222290039, "step": 36030 }, { "epoch": 0.05935, "grad_norm": 10.57754898071289, "learning_rate": 3.2306060606060605e-06, "loss": 6.3117118835449215, "step": 36035 }, { "epoch": 0.0594, "grad_norm": 5.5504655838012695, "learning_rate": 3.230353535353535e-06, "loss": 6.312933349609375, "step": 36040 }, { "epoch": 0.05945, "grad_norm": 7.025251865386963, "learning_rate": 3.2301010101010106e-06, "loss": 6.281905746459961, "step": 36045 }, { "epoch": 0.0595, "grad_norm": 6.9643120765686035, "learning_rate": 3.2298484848484853e-06, "loss": 6.259133148193359, "step": 36050 }, { "epoch": 0.05955, "grad_norm": 7.093824863433838, "learning_rate": 3.22959595959596e-06, "loss": 6.254846572875977, "step": 36055 }, { "epoch": 0.0596, "grad_norm": 4.52366304397583, "learning_rate": 3.2293434343434345e-06, "loss": 6.303619384765625, "step": 36060 }, { "epoch": 0.05965, "grad_norm": 4.955570220947266, "learning_rate": 3.2290909090909096e-06, "loss": 6.277841186523437, "step": 36065 }, { "epoch": 0.0597, "grad_norm": 5.961994171142578, "learning_rate": 3.2288383838383842e-06, "loss": 6.258874130249024, "step": 36070 }, { "epoch": 0.05975, "grad_norm": 8.873900413513184, "learning_rate": 3.228585858585859e-06, "loss": 6.263679122924804, "step": 36075 }, { "epoch": 0.0598, "grad_norm": 7.561686038970947, "learning_rate": 3.228333333333334e-06, "loss": 6.328676223754883, "step": 36080 }, { "epoch": 0.05985, "grad_norm": 4.68233585357666, "learning_rate": 3.2280808080808085e-06, "loss": 6.334548568725586, "step": 36085 }, { "epoch": 0.0599, "grad_norm": 6.388730049133301, "learning_rate": 3.227828282828283e-06, "loss": 6.272335815429687, "step": 36090 }, { "epoch": 0.05995, "grad_norm": 4.543592929840088, "learning_rate": 3.227575757575758e-06, "loss": 6.253975677490234, "step": 36095 }, { "epoch": 0.06, "grad_norm": 4.89066743850708, "learning_rate": 3.227323232323233e-06, "loss": 6.266605377197266, "step": 36100 }, { "epoch": 0.06005, "grad_norm": 6.186338901519775, "learning_rate": 3.2270707070707075e-06, "loss": 6.305081939697265, "step": 36105 }, { "epoch": 0.0601, "grad_norm": 4.3657355308532715, "learning_rate": 3.226818181818182e-06, "loss": 6.2908683776855465, "step": 36110 }, { "epoch": 0.06015, "grad_norm": 9.564868927001953, "learning_rate": 3.2265656565656567e-06, "loss": 6.236258697509766, "step": 36115 }, { "epoch": 0.0602, "grad_norm": 10.165773391723633, "learning_rate": 3.226313131313132e-06, "loss": 6.378783798217773, "step": 36120 }, { "epoch": 0.06025, "grad_norm": 7.923320770263672, "learning_rate": 3.2260606060606064e-06, "loss": 6.258442687988281, "step": 36125 }, { "epoch": 0.0603, "grad_norm": 15.441763877868652, "learning_rate": 3.225808080808081e-06, "loss": 6.240509796142578, "step": 36130 }, { "epoch": 0.06035, "grad_norm": 7.536803245544434, "learning_rate": 3.2255555555555557e-06, "loss": 6.297138214111328, "step": 36135 }, { "epoch": 0.0604, "grad_norm": 13.460394859313965, "learning_rate": 3.2253030303030307e-06, "loss": 6.386284637451172, "step": 36140 }, { "epoch": 0.06045, "grad_norm": 8.224822044372559, "learning_rate": 3.2250505050505054e-06, "loss": 6.2711235046386715, "step": 36145 }, { "epoch": 0.0605, "grad_norm": 5.337498664855957, "learning_rate": 3.22479797979798e-06, "loss": 6.281629180908203, "step": 36150 }, { "epoch": 0.06055, "grad_norm": 7.787458419799805, "learning_rate": 3.2245454545454546e-06, "loss": 6.3079345703125, "step": 36155 }, { "epoch": 0.0606, "grad_norm": 6.076412200927734, "learning_rate": 3.2242929292929297e-06, "loss": 6.241101455688477, "step": 36160 }, { "epoch": 0.06065, "grad_norm": 7.842571258544922, "learning_rate": 3.2240404040404043e-06, "loss": 6.258106994628906, "step": 36165 }, { "epoch": 0.0607, "grad_norm": 4.963227272033691, "learning_rate": 3.223787878787879e-06, "loss": 6.300220489501953, "step": 36170 }, { "epoch": 0.06075, "grad_norm": 4.899511337280273, "learning_rate": 3.2235353535353536e-06, "loss": 6.292774963378906, "step": 36175 }, { "epoch": 0.0608, "grad_norm": 7.647768497467041, "learning_rate": 3.2232828282828286e-06, "loss": 6.281070327758789, "step": 36180 }, { "epoch": 0.06085, "grad_norm": 6.872768402099609, "learning_rate": 3.2230303030303033e-06, "loss": 6.295463943481446, "step": 36185 }, { "epoch": 0.0609, "grad_norm": 6.429582118988037, "learning_rate": 3.222777777777778e-06, "loss": 6.285393142700196, "step": 36190 }, { "epoch": 0.06095, "grad_norm": 4.48520565032959, "learning_rate": 3.2225252525252525e-06, "loss": 6.304536819458008, "step": 36195 }, { "epoch": 0.061, "grad_norm": 7.625729084014893, "learning_rate": 3.2222727272727276e-06, "loss": 6.2686279296875, "step": 36200 }, { "epoch": 0.06105, "grad_norm": 4.540413856506348, "learning_rate": 3.222020202020202e-06, "loss": 6.559607696533203, "step": 36205 }, { "epoch": 0.0611, "grad_norm": 5.8775177001953125, "learning_rate": 3.221767676767677e-06, "loss": 6.222863388061524, "step": 36210 }, { "epoch": 0.06115, "grad_norm": 5.781510829925537, "learning_rate": 3.2215151515151515e-06, "loss": 6.299044799804688, "step": 36215 }, { "epoch": 0.0612, "grad_norm": 8.922098159790039, "learning_rate": 3.221262626262627e-06, "loss": 6.36382827758789, "step": 36220 }, { "epoch": 0.06125, "grad_norm": 5.6246161460876465, "learning_rate": 3.2210101010101016e-06, "loss": 6.31041259765625, "step": 36225 }, { "epoch": 0.0613, "grad_norm": 10.640532493591309, "learning_rate": 3.2207575757575758e-06, "loss": 6.2818950653076175, "step": 36230 }, { "epoch": 0.06135, "grad_norm": 9.122007369995117, "learning_rate": 3.2205050505050504e-06, "loss": 6.20995979309082, "step": 36235 }, { "epoch": 0.0614, "grad_norm": 5.271838188171387, "learning_rate": 3.220252525252526e-06, "loss": 6.332762908935547, "step": 36240 }, { "epoch": 0.06145, "grad_norm": 5.050696849822998, "learning_rate": 3.2200000000000005e-06, "loss": 6.260143280029297, "step": 36245 }, { "epoch": 0.0615, "grad_norm": 3.4901607036590576, "learning_rate": 3.219747474747475e-06, "loss": 6.223968505859375, "step": 36250 }, { "epoch": 0.06155, "grad_norm": 5.0697855949401855, "learning_rate": 3.2194949494949498e-06, "loss": 6.268159103393555, "step": 36255 }, { "epoch": 0.0616, "grad_norm": 12.004992485046387, "learning_rate": 3.219242424242425e-06, "loss": 6.2146759033203125, "step": 36260 }, { "epoch": 0.06165, "grad_norm": 4.376293182373047, "learning_rate": 3.2189898989898995e-06, "loss": 6.300410461425781, "step": 36265 }, { "epoch": 0.0617, "grad_norm": 9.892372131347656, "learning_rate": 3.218737373737374e-06, "loss": 6.313584899902343, "step": 36270 }, { "epoch": 0.06175, "grad_norm": 7.598756790161133, "learning_rate": 3.2184848484848487e-06, "loss": 6.2634124755859375, "step": 36275 }, { "epoch": 0.0618, "grad_norm": 6.2489542961120605, "learning_rate": 3.2182323232323238e-06, "loss": 6.299337005615234, "step": 36280 }, { "epoch": 0.06185, "grad_norm": 4.525169849395752, "learning_rate": 3.2179797979797984e-06, "loss": 6.126369857788086, "step": 36285 }, { "epoch": 0.0619, "grad_norm": 11.928300857543945, "learning_rate": 3.217727272727273e-06, "loss": 6.309471130371094, "step": 36290 }, { "epoch": 0.06195, "grad_norm": 10.453754425048828, "learning_rate": 3.2174747474747477e-06, "loss": 6.2355705261230465, "step": 36295 }, { "epoch": 0.062, "grad_norm": 5.765225887298584, "learning_rate": 3.2172222222222227e-06, "loss": 6.270162200927734, "step": 36300 }, { "epoch": 0.06205, "grad_norm": 6.815283298492432, "learning_rate": 3.2169696969696973e-06, "loss": 6.35916748046875, "step": 36305 }, { "epoch": 0.0621, "grad_norm": 3.6637816429138184, "learning_rate": 3.216717171717172e-06, "loss": 6.314794921875, "step": 36310 }, { "epoch": 0.06215, "grad_norm": 5.829189300537109, "learning_rate": 3.2164646464646466e-06, "loss": 6.298483276367188, "step": 36315 }, { "epoch": 0.0622, "grad_norm": 7.153108596801758, "learning_rate": 3.2162121212121217e-06, "loss": 6.2391609191894535, "step": 36320 }, { "epoch": 0.06225, "grad_norm": 6.416375160217285, "learning_rate": 3.2159595959595963e-06, "loss": 6.31207389831543, "step": 36325 }, { "epoch": 0.0623, "grad_norm": 4.045112133026123, "learning_rate": 3.215707070707071e-06, "loss": 6.2235664367675785, "step": 36330 }, { "epoch": 0.06235, "grad_norm": 8.590131759643555, "learning_rate": 3.2154545454545455e-06, "loss": 6.307373428344727, "step": 36335 }, { "epoch": 0.0624, "grad_norm": 5.265331745147705, "learning_rate": 3.2152020202020206e-06, "loss": 6.290859985351562, "step": 36340 }, { "epoch": 0.06245, "grad_norm": 3.652536630630493, "learning_rate": 3.2149494949494952e-06, "loss": 6.300914001464844, "step": 36345 }, { "epoch": 0.0625, "grad_norm": 8.71297836303711, "learning_rate": 3.21469696969697e-06, "loss": 6.232506179809571, "step": 36350 }, { "epoch": 0.06255, "grad_norm": 6.864224910736084, "learning_rate": 3.2144444444444445e-06, "loss": 6.29503059387207, "step": 36355 }, { "epoch": 0.0626, "grad_norm": 9.109619140625, "learning_rate": 3.2141919191919195e-06, "loss": 6.2911376953125, "step": 36360 }, { "epoch": 0.06265, "grad_norm": 7.293137550354004, "learning_rate": 3.213939393939394e-06, "loss": 6.420845794677734, "step": 36365 }, { "epoch": 0.0627, "grad_norm": 4.42216157913208, "learning_rate": 3.213686868686869e-06, "loss": 6.307848358154297, "step": 36370 }, { "epoch": 0.06275, "grad_norm": 12.873619079589844, "learning_rate": 3.2134343434343434e-06, "loss": 6.225627899169922, "step": 36375 }, { "epoch": 0.0628, "grad_norm": 5.7958784103393555, "learning_rate": 3.2131818181818185e-06, "loss": 6.281547164916992, "step": 36380 }, { "epoch": 0.06285, "grad_norm": 3.507535696029663, "learning_rate": 3.212929292929293e-06, "loss": 6.301770782470703, "step": 36385 }, { "epoch": 0.0629, "grad_norm": 5.874702453613281, "learning_rate": 3.2126767676767677e-06, "loss": 6.245493698120117, "step": 36390 }, { "epoch": 0.06295, "grad_norm": 29.086824417114258, "learning_rate": 3.2124242424242424e-06, "loss": 6.2985893249511715, "step": 36395 }, { "epoch": 0.063, "grad_norm": 6.362099647521973, "learning_rate": 3.2121717171717174e-06, "loss": 6.276800537109375, "step": 36400 }, { "epoch": 0.06305, "grad_norm": 4.904004096984863, "learning_rate": 3.211919191919192e-06, "loss": 6.273395919799805, "step": 36405 }, { "epoch": 0.0631, "grad_norm": 28.616527557373047, "learning_rate": 3.2116666666666667e-06, "loss": 6.3278144836425785, "step": 36410 }, { "epoch": 0.06315, "grad_norm": 6.59384822845459, "learning_rate": 3.2114141414141413e-06, "loss": 6.303453826904297, "step": 36415 }, { "epoch": 0.0632, "grad_norm": 3.29955792427063, "learning_rate": 3.211161616161617e-06, "loss": 6.281675720214844, "step": 36420 }, { "epoch": 0.06325, "grad_norm": 5.977292537689209, "learning_rate": 3.210909090909091e-06, "loss": 6.278942489624024, "step": 36425 }, { "epoch": 0.0633, "grad_norm": 6.334712505340576, "learning_rate": 3.2106565656565656e-06, "loss": 6.29701042175293, "step": 36430 }, { "epoch": 0.06335, "grad_norm": 4.855901718139648, "learning_rate": 3.2104040404040403e-06, "loss": 6.301630020141602, "step": 36435 }, { "epoch": 0.0634, "grad_norm": 6.083690643310547, "learning_rate": 3.2101515151515157e-06, "loss": 6.2804313659667965, "step": 36440 }, { "epoch": 0.06345, "grad_norm": 6.225592136383057, "learning_rate": 3.2098989898989904e-06, "loss": 6.269704818725586, "step": 36445 }, { "epoch": 0.0635, "grad_norm": 8.574225425720215, "learning_rate": 3.2096464646464646e-06, "loss": 6.2972465515136715, "step": 36450 }, { "epoch": 0.06355, "grad_norm": 7.702210903167725, "learning_rate": 3.209393939393939e-06, "loss": 6.224702453613281, "step": 36455 }, { "epoch": 0.0636, "grad_norm": 4.262222766876221, "learning_rate": 3.2091414141414147e-06, "loss": 6.301583862304687, "step": 36460 }, { "epoch": 0.06365, "grad_norm": 5.451959133148193, "learning_rate": 3.2088888888888893e-06, "loss": 6.239467620849609, "step": 36465 }, { "epoch": 0.0637, "grad_norm": 10.184785842895508, "learning_rate": 3.208636363636364e-06, "loss": 6.3113056182861325, "step": 36470 }, { "epoch": 0.06375, "grad_norm": 3.2486181259155273, "learning_rate": 3.2083838383838386e-06, "loss": 6.24644775390625, "step": 36475 }, { "epoch": 0.0638, "grad_norm": 5.424536228179932, "learning_rate": 3.2081313131313136e-06, "loss": 6.269618225097656, "step": 36480 }, { "epoch": 0.06385, "grad_norm": 6.748194217681885, "learning_rate": 3.2078787878787883e-06, "loss": 6.278475570678711, "step": 36485 }, { "epoch": 0.0639, "grad_norm": 9.169546127319336, "learning_rate": 3.207626262626263e-06, "loss": 6.329922866821289, "step": 36490 }, { "epoch": 0.06395, "grad_norm": 23.186458587646484, "learning_rate": 3.2073737373737375e-06, "loss": 6.319356155395508, "step": 36495 }, { "epoch": 0.064, "grad_norm": 5.8647379875183105, "learning_rate": 3.2071212121212126e-06, "loss": 6.319100952148437, "step": 36500 }, { "epoch": 0.06405, "grad_norm": 10.640201568603516, "learning_rate": 3.206868686868687e-06, "loss": 6.2986328125, "step": 36505 }, { "epoch": 0.0641, "grad_norm": 4.382297515869141, "learning_rate": 3.206616161616162e-06, "loss": 6.31114616394043, "step": 36510 }, { "epoch": 0.06415, "grad_norm": 5.037419319152832, "learning_rate": 3.206363636363637e-06, "loss": 6.2956184387207035, "step": 36515 }, { "epoch": 0.0642, "grad_norm": 5.723256587982178, "learning_rate": 3.2061111111111115e-06, "loss": 6.248553085327148, "step": 36520 }, { "epoch": 0.06425, "grad_norm": 7.2562785148620605, "learning_rate": 3.205858585858586e-06, "loss": 6.310342407226562, "step": 36525 }, { "epoch": 0.0643, "grad_norm": 7.460848808288574, "learning_rate": 3.2056060606060608e-06, "loss": 6.24383544921875, "step": 36530 }, { "epoch": 0.06435, "grad_norm": 5.971197605133057, "learning_rate": 3.205353535353536e-06, "loss": 6.323737335205078, "step": 36535 }, { "epoch": 0.0644, "grad_norm": 4.572028160095215, "learning_rate": 3.2051010101010105e-06, "loss": 6.260614395141602, "step": 36540 }, { "epoch": 0.06445, "grad_norm": 6.867109298706055, "learning_rate": 3.204848484848485e-06, "loss": 6.275481033325195, "step": 36545 }, { "epoch": 0.0645, "grad_norm": 6.735420227050781, "learning_rate": 3.2045959595959597e-06, "loss": 6.305111694335937, "step": 36550 }, { "epoch": 0.06455, "grad_norm": 4.760326385498047, "learning_rate": 3.2043434343434348e-06, "loss": 6.236370849609375, "step": 36555 }, { "epoch": 0.0646, "grad_norm": 7.866374969482422, "learning_rate": 3.2040909090909094e-06, "loss": 6.282479858398437, "step": 36560 }, { "epoch": 0.06465, "grad_norm": 8.262809753417969, "learning_rate": 3.203838383838384e-06, "loss": 6.25245361328125, "step": 36565 }, { "epoch": 0.0647, "grad_norm": 14.330068588256836, "learning_rate": 3.2035858585858587e-06, "loss": 6.5194854736328125, "step": 36570 }, { "epoch": 0.06475, "grad_norm": 3.985698699951172, "learning_rate": 3.2033333333333337e-06, "loss": 6.3430023193359375, "step": 36575 }, { "epoch": 0.0648, "grad_norm": 7.45810079574585, "learning_rate": 3.2030808080808084e-06, "loss": 6.309085464477539, "step": 36580 }, { "epoch": 0.06485, "grad_norm": 7.329688549041748, "learning_rate": 3.202828282828283e-06, "loss": 6.271371459960937, "step": 36585 }, { "epoch": 0.0649, "grad_norm": 11.73178768157959, "learning_rate": 3.2025757575757576e-06, "loss": 6.253252029418945, "step": 36590 }, { "epoch": 0.06495, "grad_norm": 11.004264831542969, "learning_rate": 3.2023232323232327e-06, "loss": 6.337440490722656, "step": 36595 }, { "epoch": 0.065, "grad_norm": 5.008199691772461, "learning_rate": 3.2020707070707073e-06, "loss": 6.3322101593017575, "step": 36600 }, { "epoch": 0.06505, "grad_norm": 6.124993324279785, "learning_rate": 3.201818181818182e-06, "loss": 6.25776481628418, "step": 36605 }, { "epoch": 0.0651, "grad_norm": 6.818061351776123, "learning_rate": 3.2015656565656566e-06, "loss": 6.282144165039062, "step": 36610 }, { "epoch": 0.06515, "grad_norm": 8.341489791870117, "learning_rate": 3.2013131313131316e-06, "loss": 6.294455718994141, "step": 36615 }, { "epoch": 0.0652, "grad_norm": 4.5087714195251465, "learning_rate": 3.2010606060606062e-06, "loss": 6.335089492797851, "step": 36620 }, { "epoch": 0.06525, "grad_norm": 6.062823295593262, "learning_rate": 3.200808080808081e-06, "loss": 6.24322395324707, "step": 36625 }, { "epoch": 0.0653, "grad_norm": 26.20579719543457, "learning_rate": 3.2005555555555555e-06, "loss": 6.260039138793945, "step": 36630 }, { "epoch": 0.06535, "grad_norm": 8.18043041229248, "learning_rate": 3.200303030303031e-06, "loss": 6.2097831726074215, "step": 36635 }, { "epoch": 0.0654, "grad_norm": 10.112653732299805, "learning_rate": 3.2000505050505056e-06, "loss": 6.248643493652343, "step": 36640 }, { "epoch": 0.06545, "grad_norm": 5.432191848754883, "learning_rate": 3.19979797979798e-06, "loss": 6.275307846069336, "step": 36645 }, { "epoch": 0.0655, "grad_norm": 7.122717380523682, "learning_rate": 3.1995454545454544e-06, "loss": 6.266431427001953, "step": 36650 }, { "epoch": 0.06555, "grad_norm": 5.065654277801514, "learning_rate": 3.19929292929293e-06, "loss": 6.245184707641601, "step": 36655 }, { "epoch": 0.0656, "grad_norm": 13.7454252243042, "learning_rate": 3.1990404040404046e-06, "loss": 6.32977066040039, "step": 36660 }, { "epoch": 0.06565, "grad_norm": 8.665738105773926, "learning_rate": 3.198787878787879e-06, "loss": 6.3129627227783205, "step": 36665 }, { "epoch": 0.0657, "grad_norm": 21.71206283569336, "learning_rate": 3.198535353535354e-06, "loss": 6.281419372558593, "step": 36670 }, { "epoch": 0.06575, "grad_norm": 4.529301643371582, "learning_rate": 3.198282828282829e-06, "loss": 6.316851806640625, "step": 36675 }, { "epoch": 0.0658, "grad_norm": 6.656844139099121, "learning_rate": 3.1980303030303035e-06, "loss": 6.28864860534668, "step": 36680 }, { "epoch": 0.06585, "grad_norm": 5.826907157897949, "learning_rate": 3.197777777777778e-06, "loss": 6.304923248291016, "step": 36685 }, { "epoch": 0.0659, "grad_norm": 4.922420024871826, "learning_rate": 3.1975252525252528e-06, "loss": 6.282396697998047, "step": 36690 }, { "epoch": 0.06595, "grad_norm": 3.8270514011383057, "learning_rate": 3.197272727272728e-06, "loss": 6.3102058410644535, "step": 36695 }, { "epoch": 0.066, "grad_norm": 6.784185409545898, "learning_rate": 3.1970202020202024e-06, "loss": 6.303804397583008, "step": 36700 }, { "epoch": 0.06605, "grad_norm": 4.960334777832031, "learning_rate": 3.196767676767677e-06, "loss": 6.290188598632812, "step": 36705 }, { "epoch": 0.0661, "grad_norm": 5.850165367126465, "learning_rate": 3.1965151515151517e-06, "loss": 6.233304977416992, "step": 36710 }, { "epoch": 0.06615, "grad_norm": 4.843886852264404, "learning_rate": 3.1962626262626268e-06, "loss": 6.310402679443359, "step": 36715 }, { "epoch": 0.0662, "grad_norm": 10.043581008911133, "learning_rate": 3.1960101010101014e-06, "loss": 6.323233032226563, "step": 36720 }, { "epoch": 0.06625, "grad_norm": 8.726954460144043, "learning_rate": 3.195757575757576e-06, "loss": 6.256411743164063, "step": 36725 }, { "epoch": 0.0663, "grad_norm": 7.3386406898498535, "learning_rate": 3.1955050505050506e-06, "loss": 6.23834457397461, "step": 36730 }, { "epoch": 0.06635, "grad_norm": 5.120389461517334, "learning_rate": 3.1952525252525257e-06, "loss": 6.290074157714844, "step": 36735 }, { "epoch": 0.0664, "grad_norm": 6.098342418670654, "learning_rate": 3.1950000000000003e-06, "loss": 6.23592529296875, "step": 36740 }, { "epoch": 0.06645, "grad_norm": 6.246496677398682, "learning_rate": 3.194747474747475e-06, "loss": 6.3155677795410154, "step": 36745 }, { "epoch": 0.0665, "grad_norm": 4.702640056610107, "learning_rate": 3.1944949494949496e-06, "loss": 6.3126884460449215, "step": 36750 }, { "epoch": 0.06655, "grad_norm": 15.014023780822754, "learning_rate": 3.1942424242424246e-06, "loss": 6.26661262512207, "step": 36755 }, { "epoch": 0.0666, "grad_norm": 10.165748596191406, "learning_rate": 3.1939898989898993e-06, "loss": 6.2966148376464846, "step": 36760 }, { "epoch": 0.06665, "grad_norm": 4.553430080413818, "learning_rate": 3.193737373737374e-06, "loss": 6.340534973144531, "step": 36765 }, { "epoch": 0.0667, "grad_norm": 5.329336643218994, "learning_rate": 3.1934848484848485e-06, "loss": 6.320477294921875, "step": 36770 }, { "epoch": 0.06675, "grad_norm": 7.1557159423828125, "learning_rate": 3.1932323232323236e-06, "loss": 6.270941162109375, "step": 36775 }, { "epoch": 0.0668, "grad_norm": 4.873653888702393, "learning_rate": 3.1929797979797982e-06, "loss": 6.26012954711914, "step": 36780 }, { "epoch": 0.06685, "grad_norm": 4.634731292724609, "learning_rate": 3.192727272727273e-06, "loss": 6.212375259399414, "step": 36785 }, { "epoch": 0.0669, "grad_norm": 6.964846611022949, "learning_rate": 3.1924747474747475e-06, "loss": 6.27418327331543, "step": 36790 }, { "epoch": 0.06695, "grad_norm": 6.433138847351074, "learning_rate": 3.1922222222222225e-06, "loss": 6.224685287475586, "step": 36795 }, { "epoch": 0.067, "grad_norm": 14.013001441955566, "learning_rate": 3.191969696969697e-06, "loss": 6.302244186401367, "step": 36800 }, { "epoch": 0.06705, "grad_norm": 11.217350959777832, "learning_rate": 3.191717171717172e-06, "loss": 6.313824844360352, "step": 36805 }, { "epoch": 0.0671, "grad_norm": 8.622830390930176, "learning_rate": 3.1914646464646464e-06, "loss": 6.2494861602783205, "step": 36810 }, { "epoch": 0.06715, "grad_norm": 7.1318583488464355, "learning_rate": 3.1912121212121215e-06, "loss": 6.255902862548828, "step": 36815 }, { "epoch": 0.0672, "grad_norm": 8.694733619689941, "learning_rate": 3.190959595959596e-06, "loss": 6.345337295532227, "step": 36820 }, { "epoch": 0.06725, "grad_norm": 3.5687856674194336, "learning_rate": 3.1907070707070707e-06, "loss": 6.247873687744141, "step": 36825 }, { "epoch": 0.0673, "grad_norm": 13.05228042602539, "learning_rate": 3.1904545454545454e-06, "loss": 6.304437255859375, "step": 36830 }, { "epoch": 0.06735, "grad_norm": 7.574673652648926, "learning_rate": 3.190202020202021e-06, "loss": 6.264813995361328, "step": 36835 }, { "epoch": 0.0674, "grad_norm": 8.454272270202637, "learning_rate": 3.189949494949495e-06, "loss": 6.281142807006836, "step": 36840 }, { "epoch": 0.06745, "grad_norm": 5.634749889373779, "learning_rate": 3.1896969696969697e-06, "loss": 6.287031555175782, "step": 36845 }, { "epoch": 0.0675, "grad_norm": 7.528637886047363, "learning_rate": 3.1894444444444443e-06, "loss": 6.278181076049805, "step": 36850 }, { "epoch": 0.06755, "grad_norm": 7.446175575256348, "learning_rate": 3.1891919191919198e-06, "loss": 6.261293029785156, "step": 36855 }, { "epoch": 0.0676, "grad_norm": 10.109809875488281, "learning_rate": 3.1889393939393944e-06, "loss": 6.340166473388672, "step": 36860 }, { "epoch": 0.06765, "grad_norm": 6.301211833953857, "learning_rate": 3.1886868686868686e-06, "loss": 6.258364868164063, "step": 36865 }, { "epoch": 0.0677, "grad_norm": 7.835581302642822, "learning_rate": 3.1884343434343433e-06, "loss": 6.300771713256836, "step": 36870 }, { "epoch": 0.06775, "grad_norm": 3.8764357566833496, "learning_rate": 3.1881818181818187e-06, "loss": 6.294715881347656, "step": 36875 }, { "epoch": 0.0678, "grad_norm": 4.5970072746276855, "learning_rate": 3.1879292929292934e-06, "loss": 6.315521621704102, "step": 36880 }, { "epoch": 0.06785, "grad_norm": 3.880495309829712, "learning_rate": 3.187676767676768e-06, "loss": 6.277821350097656, "step": 36885 }, { "epoch": 0.0679, "grad_norm": 14.495641708374023, "learning_rate": 3.1874242424242426e-06, "loss": 6.25934944152832, "step": 36890 }, { "epoch": 0.06795, "grad_norm": 4.933335304260254, "learning_rate": 3.1871717171717177e-06, "loss": 6.255839157104492, "step": 36895 }, { "epoch": 0.068, "grad_norm": 5.822869777679443, "learning_rate": 3.1869191919191923e-06, "loss": 6.279859924316407, "step": 36900 }, { "epoch": 0.06805, "grad_norm": 4.854493618011475, "learning_rate": 3.186666666666667e-06, "loss": 6.303155517578125, "step": 36905 }, { "epoch": 0.0681, "grad_norm": 4.83787202835083, "learning_rate": 3.1864141414141416e-06, "loss": 6.318019104003906, "step": 36910 }, { "epoch": 0.06815, "grad_norm": 8.370582580566406, "learning_rate": 3.1861616161616166e-06, "loss": 6.235736846923828, "step": 36915 }, { "epoch": 0.0682, "grad_norm": 8.788969039916992, "learning_rate": 3.1859090909090912e-06, "loss": 6.289747619628907, "step": 36920 }, { "epoch": 0.06825, "grad_norm": 14.561031341552734, "learning_rate": 3.185656565656566e-06, "loss": 6.368999862670899, "step": 36925 }, { "epoch": 0.0683, "grad_norm": 13.909146308898926, "learning_rate": 3.1854040404040405e-06, "loss": 6.402287292480469, "step": 36930 }, { "epoch": 0.06835, "grad_norm": 9.776554107666016, "learning_rate": 3.1851515151515156e-06, "loss": 6.209181213378907, "step": 36935 }, { "epoch": 0.0684, "grad_norm": 5.79573917388916, "learning_rate": 3.18489898989899e-06, "loss": 6.266481018066406, "step": 36940 }, { "epoch": 0.06845, "grad_norm": 4.9505696296691895, "learning_rate": 3.184646464646465e-06, "loss": 6.2646537780761715, "step": 36945 }, { "epoch": 0.0685, "grad_norm": 10.019060134887695, "learning_rate": 3.18439393939394e-06, "loss": 6.374764251708984, "step": 36950 }, { "epoch": 0.06855, "grad_norm": 6.758440017700195, "learning_rate": 3.1841414141414145e-06, "loss": 6.247418212890625, "step": 36955 }, { "epoch": 0.0686, "grad_norm": 8.072874069213867, "learning_rate": 3.183888888888889e-06, "loss": 6.315916061401367, "step": 36960 }, { "epoch": 0.06865, "grad_norm": 7.8163251876831055, "learning_rate": 3.1836363636363638e-06, "loss": 6.260083389282227, "step": 36965 }, { "epoch": 0.0687, "grad_norm": 5.4313154220581055, "learning_rate": 3.183383838383839e-06, "loss": 6.289601135253906, "step": 36970 }, { "epoch": 0.06875, "grad_norm": 7.927548885345459, "learning_rate": 3.1831313131313134e-06, "loss": 6.298538589477539, "step": 36975 }, { "epoch": 0.0688, "grad_norm": 9.07821273803711, "learning_rate": 3.182878787878788e-06, "loss": 6.424419403076172, "step": 36980 }, { "epoch": 0.06885, "grad_norm": 6.260650634765625, "learning_rate": 3.1826262626262627e-06, "loss": 6.2590595245361325, "step": 36985 }, { "epoch": 0.0689, "grad_norm": 8.849831581115723, "learning_rate": 3.1823737373737378e-06, "loss": 6.292101669311523, "step": 36990 }, { "epoch": 0.06895, "grad_norm": 7.450666427612305, "learning_rate": 3.1821212121212124e-06, "loss": 6.24237289428711, "step": 36995 }, { "epoch": 0.069, "grad_norm": 41.54914474487305, "learning_rate": 3.181868686868687e-06, "loss": 6.034409713745117, "step": 37000 }, { "epoch": 0.06905, "grad_norm": 6.24685001373291, "learning_rate": 3.1816161616161617e-06, "loss": 6.5970298767089846, "step": 37005 }, { "epoch": 0.0691, "grad_norm": 25.870080947875977, "learning_rate": 3.1813636363636367e-06, "loss": 6.455099487304688, "step": 37010 }, { "epoch": 0.06915, "grad_norm": 9.891494750976562, "learning_rate": 3.1811111111111113e-06, "loss": 6.297624206542968, "step": 37015 }, { "epoch": 0.0692, "grad_norm": 5.263562202453613, "learning_rate": 3.180858585858586e-06, "loss": 6.271697998046875, "step": 37020 }, { "epoch": 0.06925, "grad_norm": 4.351042747497559, "learning_rate": 3.1806060606060606e-06, "loss": 6.261989974975586, "step": 37025 }, { "epoch": 0.0693, "grad_norm": 9.8711576461792, "learning_rate": 3.1803535353535356e-06, "loss": 6.327390670776367, "step": 37030 }, { "epoch": 0.06935, "grad_norm": 6.957086563110352, "learning_rate": 3.1801010101010103e-06, "loss": 6.368532943725586, "step": 37035 }, { "epoch": 0.0694, "grad_norm": 7.466855525970459, "learning_rate": 3.179848484848485e-06, "loss": 6.257283782958984, "step": 37040 }, { "epoch": 0.06945, "grad_norm": 9.122794151306152, "learning_rate": 3.1795959595959595e-06, "loss": 6.4168243408203125, "step": 37045 }, { "epoch": 0.0695, "grad_norm": 7.487881660461426, "learning_rate": 3.179343434343435e-06, "loss": 6.319709777832031, "step": 37050 }, { "epoch": 0.06955, "grad_norm": 4.08802604675293, "learning_rate": 3.1790909090909096e-06, "loss": 6.263545227050781, "step": 37055 }, { "epoch": 0.0696, "grad_norm": 8.976502418518066, "learning_rate": 3.178838383838384e-06, "loss": 6.275422668457031, "step": 37060 }, { "epoch": 0.06965, "grad_norm": 4.375060081481934, "learning_rate": 3.1785858585858585e-06, "loss": 6.3117218017578125, "step": 37065 }, { "epoch": 0.0697, "grad_norm": 10.198288917541504, "learning_rate": 3.178333333333334e-06, "loss": 6.316823577880859, "step": 37070 }, { "epoch": 0.06975, "grad_norm": 18.475969314575195, "learning_rate": 3.1780808080808086e-06, "loss": 6.350886535644531, "step": 37075 }, { "epoch": 0.0698, "grad_norm": 14.229499816894531, "learning_rate": 3.1778282828282832e-06, "loss": 6.314219665527344, "step": 37080 }, { "epoch": 0.06985, "grad_norm": 9.79071044921875, "learning_rate": 3.177575757575758e-06, "loss": 6.293221282958984, "step": 37085 }, { "epoch": 0.0699, "grad_norm": 23.957731246948242, "learning_rate": 3.177323232323233e-06, "loss": 6.333706283569336, "step": 37090 }, { "epoch": 0.06995, "grad_norm": 4.546286582946777, "learning_rate": 3.1770707070707075e-06, "loss": 6.293847274780274, "step": 37095 }, { "epoch": 0.07, "grad_norm": 6.213263511657715, "learning_rate": 3.176818181818182e-06, "loss": 6.268130493164063, "step": 37100 }, { "epoch": 0.07005, "grad_norm": 36.64274597167969, "learning_rate": 3.176565656565657e-06, "loss": 6.315473175048828, "step": 37105 }, { "epoch": 0.0701, "grad_norm": 11.045187950134277, "learning_rate": 3.176313131313132e-06, "loss": 6.280599975585938, "step": 37110 }, { "epoch": 0.07015, "grad_norm": 6.630953788757324, "learning_rate": 3.1760606060606065e-06, "loss": 6.41201171875, "step": 37115 }, { "epoch": 0.0702, "grad_norm": 4.518139362335205, "learning_rate": 3.175808080808081e-06, "loss": 6.320026397705078, "step": 37120 }, { "epoch": 0.07025, "grad_norm": 4.570024490356445, "learning_rate": 3.1755555555555557e-06, "loss": 6.2764122009277346, "step": 37125 }, { "epoch": 0.0703, "grad_norm": 13.163630485534668, "learning_rate": 3.175303030303031e-06, "loss": 6.381171798706054, "step": 37130 }, { "epoch": 0.07035, "grad_norm": 3.94917631149292, "learning_rate": 3.1750505050505054e-06, "loss": 6.318256378173828, "step": 37135 }, { "epoch": 0.0704, "grad_norm": 7.787908554077148, "learning_rate": 3.17479797979798e-06, "loss": 6.281631851196289, "step": 37140 }, { "epoch": 0.07045, "grad_norm": 8.503168106079102, "learning_rate": 3.1745454545454547e-06, "loss": 6.276287841796875, "step": 37145 }, { "epoch": 0.0705, "grad_norm": 4.905938148498535, "learning_rate": 3.1742929292929297e-06, "loss": 6.321661758422851, "step": 37150 }, { "epoch": 0.07055, "grad_norm": 9.237571716308594, "learning_rate": 3.1740404040404044e-06, "loss": 6.331765747070312, "step": 37155 }, { "epoch": 0.0706, "grad_norm": 5.207409858703613, "learning_rate": 3.173787878787879e-06, "loss": 6.265846252441406, "step": 37160 }, { "epoch": 0.07065, "grad_norm": 5.5256757736206055, "learning_rate": 3.1735353535353536e-06, "loss": 6.253339385986328, "step": 37165 }, { "epoch": 0.0707, "grad_norm": 4.028417587280273, "learning_rate": 3.1732828282828287e-06, "loss": 6.253042602539063, "step": 37170 }, { "epoch": 0.07075, "grad_norm": 9.520065307617188, "learning_rate": 3.1730303030303033e-06, "loss": 6.205370330810547, "step": 37175 }, { "epoch": 0.0708, "grad_norm": 5.884189128875732, "learning_rate": 3.172777777777778e-06, "loss": 6.26671257019043, "step": 37180 }, { "epoch": 0.07085, "grad_norm": 7.892874717712402, "learning_rate": 3.1725252525252526e-06, "loss": 6.353199768066406, "step": 37185 }, { "epoch": 0.0709, "grad_norm": 7.734313488006592, "learning_rate": 3.1722727272727276e-06, "loss": 6.272416687011718, "step": 37190 }, { "epoch": 0.07095, "grad_norm": 7.869888782501221, "learning_rate": 3.1720202020202023e-06, "loss": 6.316770935058594, "step": 37195 }, { "epoch": 0.071, "grad_norm": 6.256319046020508, "learning_rate": 3.171767676767677e-06, "loss": 6.268439102172851, "step": 37200 }, { "epoch": 0.07105, "grad_norm": 6.807060718536377, "learning_rate": 3.1715151515151515e-06, "loss": 6.269748306274414, "step": 37205 }, { "epoch": 0.0711, "grad_norm": 4.964698314666748, "learning_rate": 3.1712626262626266e-06, "loss": 6.310997772216797, "step": 37210 }, { "epoch": 0.07115, "grad_norm": 5.207026958465576, "learning_rate": 3.171010101010101e-06, "loss": 6.2855583190917965, "step": 37215 }, { "epoch": 0.0712, "grad_norm": 4.934160232543945, "learning_rate": 3.170757575757576e-06, "loss": 6.209947204589843, "step": 37220 }, { "epoch": 0.07125, "grad_norm": 4.457724094390869, "learning_rate": 3.1705050505050505e-06, "loss": 6.2755287170410154, "step": 37225 }, { "epoch": 0.0713, "grad_norm": 8.233461380004883, "learning_rate": 3.1702525252525255e-06, "loss": 6.226948928833008, "step": 37230 }, { "epoch": 0.07135, "grad_norm": 5.293074607849121, "learning_rate": 3.17e-06, "loss": 6.246759414672852, "step": 37235 }, { "epoch": 0.0714, "grad_norm": 7.457674026489258, "learning_rate": 3.1697474747474748e-06, "loss": 6.2879997253417965, "step": 37240 }, { "epoch": 0.07145, "grad_norm": 5.522800445556641, "learning_rate": 3.1694949494949494e-06, "loss": 6.264430236816406, "step": 37245 }, { "epoch": 0.0715, "grad_norm": 9.296031951904297, "learning_rate": 3.169242424242425e-06, "loss": 6.32413330078125, "step": 37250 }, { "epoch": 0.07155, "grad_norm": 3.82209849357605, "learning_rate": 3.168989898989899e-06, "loss": 6.271455383300781, "step": 37255 }, { "epoch": 0.0716, "grad_norm": 9.441853523254395, "learning_rate": 3.1687373737373737e-06, "loss": 6.449504089355469, "step": 37260 }, { "epoch": 0.07165, "grad_norm": 6.7686967849731445, "learning_rate": 3.1684848484848483e-06, "loss": 6.28924446105957, "step": 37265 }, { "epoch": 0.0717, "grad_norm": 7.431058406829834, "learning_rate": 3.168232323232324e-06, "loss": 6.318012237548828, "step": 37270 }, { "epoch": 0.07175, "grad_norm": 6.841818809509277, "learning_rate": 3.1679797979797985e-06, "loss": 6.268291473388672, "step": 37275 }, { "epoch": 0.0718, "grad_norm": 11.4174165725708, "learning_rate": 3.167727272727273e-06, "loss": 6.233935546875, "step": 37280 }, { "epoch": 0.07185, "grad_norm": 6.302268981933594, "learning_rate": 3.1674747474747473e-06, "loss": 6.273294067382812, "step": 37285 }, { "epoch": 0.0719, "grad_norm": 4.864156246185303, "learning_rate": 3.1672222222222228e-06, "loss": 6.31319580078125, "step": 37290 }, { "epoch": 0.07195, "grad_norm": 8.383002281188965, "learning_rate": 3.1669696969696974e-06, "loss": 6.249300384521485, "step": 37295 }, { "epoch": 0.072, "grad_norm": 9.048709869384766, "learning_rate": 3.166717171717172e-06, "loss": 6.32800407409668, "step": 37300 }, { "epoch": 0.07205, "grad_norm": 4.010061740875244, "learning_rate": 3.1664646464646467e-06, "loss": 6.253669357299804, "step": 37305 }, { "epoch": 0.0721, "grad_norm": 5.678371429443359, "learning_rate": 3.1662121212121217e-06, "loss": 6.302881240844727, "step": 37310 }, { "epoch": 0.07215, "grad_norm": 3.818110466003418, "learning_rate": 3.1659595959595963e-06, "loss": 6.288752746582031, "step": 37315 }, { "epoch": 0.0722, "grad_norm": 11.141027450561523, "learning_rate": 3.165707070707071e-06, "loss": 6.404396057128906, "step": 37320 }, { "epoch": 0.07225, "grad_norm": 3.630213737487793, "learning_rate": 3.1654545454545456e-06, "loss": 6.435263824462891, "step": 37325 }, { "epoch": 0.0723, "grad_norm": 4.842897415161133, "learning_rate": 3.1652020202020207e-06, "loss": 6.26281623840332, "step": 37330 }, { "epoch": 0.07235, "grad_norm": 5.8589301109313965, "learning_rate": 3.1649494949494953e-06, "loss": 6.302542877197266, "step": 37335 }, { "epoch": 0.0724, "grad_norm": 7.103792190551758, "learning_rate": 3.16469696969697e-06, "loss": 6.312001800537109, "step": 37340 }, { "epoch": 0.07245, "grad_norm": 7.748087406158447, "learning_rate": 3.1644444444444445e-06, "loss": 6.260186386108399, "step": 37345 }, { "epoch": 0.0725, "grad_norm": 8.078825950622559, "learning_rate": 3.1641919191919196e-06, "loss": 6.284645843505859, "step": 37350 }, { "epoch": 0.07255, "grad_norm": 7.981853485107422, "learning_rate": 3.1639393939393942e-06, "loss": 6.265850067138672, "step": 37355 }, { "epoch": 0.0726, "grad_norm": 5.116217613220215, "learning_rate": 3.163686868686869e-06, "loss": 6.359819412231445, "step": 37360 }, { "epoch": 0.07265, "grad_norm": 6.789587497711182, "learning_rate": 3.163434343434344e-06, "loss": 6.287220764160156, "step": 37365 }, { "epoch": 0.0727, "grad_norm": 3.9521000385284424, "learning_rate": 3.1631818181818185e-06, "loss": 6.295967102050781, "step": 37370 }, { "epoch": 0.07275, "grad_norm": 12.30364990234375, "learning_rate": 3.162929292929293e-06, "loss": 6.346522521972656, "step": 37375 }, { "epoch": 0.0728, "grad_norm": 4.958909511566162, "learning_rate": 3.162676767676768e-06, "loss": 6.270332717895508, "step": 37380 }, { "epoch": 0.07285, "grad_norm": 7.616885185241699, "learning_rate": 3.162424242424243e-06, "loss": 6.20783805847168, "step": 37385 }, { "epoch": 0.0729, "grad_norm": 12.162022590637207, "learning_rate": 3.1621717171717175e-06, "loss": 6.177782821655273, "step": 37390 }, { "epoch": 0.07295, "grad_norm": 4.753398895263672, "learning_rate": 3.161919191919192e-06, "loss": 6.310669708251953, "step": 37395 }, { "epoch": 0.073, "grad_norm": 7.377922058105469, "learning_rate": 3.1616666666666667e-06, "loss": 6.272311019897461, "step": 37400 }, { "epoch": 0.07305, "grad_norm": 14.341792106628418, "learning_rate": 3.161414141414142e-06, "loss": 6.329729461669922, "step": 37405 }, { "epoch": 0.0731, "grad_norm": 7.418578147888184, "learning_rate": 3.1611616161616164e-06, "loss": 6.312387847900391, "step": 37410 }, { "epoch": 0.07315, "grad_norm": 6.962608337402344, "learning_rate": 3.160909090909091e-06, "loss": 6.230244064331055, "step": 37415 }, { "epoch": 0.0732, "grad_norm": 7.441070556640625, "learning_rate": 3.1606565656565657e-06, "loss": 6.234150314331055, "step": 37420 }, { "epoch": 0.07325, "grad_norm": 4.22051477432251, "learning_rate": 3.1604040404040407e-06, "loss": 6.267570495605469, "step": 37425 }, { "epoch": 0.0733, "grad_norm": 5.209255695343018, "learning_rate": 3.1601515151515154e-06, "loss": 6.265155410766601, "step": 37430 }, { "epoch": 0.07335, "grad_norm": 4.878949165344238, "learning_rate": 3.15989898989899e-06, "loss": 6.2988838195800785, "step": 37435 }, { "epoch": 0.0734, "grad_norm": 7.132513999938965, "learning_rate": 3.1596464646464646e-06, "loss": 6.254971313476562, "step": 37440 }, { "epoch": 0.07345, "grad_norm": 5.018195629119873, "learning_rate": 3.15939393939394e-06, "loss": 6.242920684814453, "step": 37445 }, { "epoch": 0.0735, "grad_norm": 4.935504913330078, "learning_rate": 3.1591414141414143e-06, "loss": 6.2677764892578125, "step": 37450 }, { "epoch": 0.07355, "grad_norm": 6.767306804656982, "learning_rate": 3.158888888888889e-06, "loss": 6.237472152709961, "step": 37455 }, { "epoch": 0.0736, "grad_norm": 12.735971450805664, "learning_rate": 3.1586363636363636e-06, "loss": 6.274590301513672, "step": 37460 }, { "epoch": 0.07365, "grad_norm": 5.194210529327393, "learning_rate": 3.158383838383839e-06, "loss": 6.25623893737793, "step": 37465 }, { "epoch": 0.0737, "grad_norm": 6.407979488372803, "learning_rate": 3.1581313131313137e-06, "loss": 6.258872985839844, "step": 37470 }, { "epoch": 0.07375, "grad_norm": 4.7423095703125, "learning_rate": 3.157878787878788e-06, "loss": 6.2897216796875, "step": 37475 }, { "epoch": 0.0738, "grad_norm": 6.7961626052856445, "learning_rate": 3.1576262626262625e-06, "loss": 6.2469642639160154, "step": 37480 }, { "epoch": 0.07385, "grad_norm": 3.569531202316284, "learning_rate": 3.157373737373738e-06, "loss": 6.303745269775391, "step": 37485 }, { "epoch": 0.0739, "grad_norm": 7.753623962402344, "learning_rate": 3.1571212121212126e-06, "loss": 6.3174995422363285, "step": 37490 }, { "epoch": 0.07395, "grad_norm": 9.427652359008789, "learning_rate": 3.1568686868686873e-06, "loss": 6.366796875, "step": 37495 }, { "epoch": 0.074, "grad_norm": 7.133355140686035, "learning_rate": 3.156616161616162e-06, "loss": 6.281443786621094, "step": 37500 }, { "epoch": 0.07405, "grad_norm": 5.841673851013184, "learning_rate": 3.156363636363637e-06, "loss": 6.261578369140625, "step": 37505 }, { "epoch": 0.0741, "grad_norm": 6.334112167358398, "learning_rate": 3.1561111111111116e-06, "loss": 6.282224655151367, "step": 37510 }, { "epoch": 0.07415, "grad_norm": 5.278360366821289, "learning_rate": 3.155858585858586e-06, "loss": 6.309365463256836, "step": 37515 }, { "epoch": 0.0742, "grad_norm": 7.081477165222168, "learning_rate": 3.155606060606061e-06, "loss": 6.382973480224609, "step": 37520 }, { "epoch": 0.07425, "grad_norm": 4.126574993133545, "learning_rate": 3.155353535353536e-06, "loss": 6.264506912231445, "step": 37525 }, { "epoch": 0.0743, "grad_norm": 14.742033004760742, "learning_rate": 3.1551010101010105e-06, "loss": 6.271473693847656, "step": 37530 }, { "epoch": 0.07435, "grad_norm": 4.206267356872559, "learning_rate": 3.154848484848485e-06, "loss": 6.202900695800781, "step": 37535 }, { "epoch": 0.0744, "grad_norm": 3.745713710784912, "learning_rate": 3.1545959595959598e-06, "loss": 6.290991592407226, "step": 37540 }, { "epoch": 0.07445, "grad_norm": 8.073234558105469, "learning_rate": 3.154343434343435e-06, "loss": 6.290029907226563, "step": 37545 }, { "epoch": 0.0745, "grad_norm": 6.000561714172363, "learning_rate": 3.1540909090909095e-06, "loss": 6.297641754150391, "step": 37550 }, { "epoch": 0.07455, "grad_norm": 10.407940864562988, "learning_rate": 3.153838383838384e-06, "loss": 6.244596099853515, "step": 37555 }, { "epoch": 0.0746, "grad_norm": 3.530430793762207, "learning_rate": 3.1535858585858587e-06, "loss": 6.290475463867187, "step": 37560 }, { "epoch": 0.07465, "grad_norm": 5.763195037841797, "learning_rate": 3.1533333333333338e-06, "loss": 6.278494644165039, "step": 37565 }, { "epoch": 0.0747, "grad_norm": 5.46108865737915, "learning_rate": 3.1530808080808084e-06, "loss": 6.29532470703125, "step": 37570 }, { "epoch": 0.07475, "grad_norm": 5.9600114822387695, "learning_rate": 3.152828282828283e-06, "loss": 6.263047790527343, "step": 37575 }, { "epoch": 0.0748, "grad_norm": 5.3094072341918945, "learning_rate": 3.1525757575757577e-06, "loss": 6.241789245605469, "step": 37580 }, { "epoch": 0.07485, "grad_norm": 16.887203216552734, "learning_rate": 3.1523232323232327e-06, "loss": 6.331874847412109, "step": 37585 }, { "epoch": 0.0749, "grad_norm": 14.937614440917969, "learning_rate": 3.1520707070707074e-06, "loss": 6.272835540771484, "step": 37590 }, { "epoch": 0.07495, "grad_norm": 13.958966255187988, "learning_rate": 3.151818181818182e-06, "loss": 6.299780654907226, "step": 37595 }, { "epoch": 0.075, "grad_norm": 4.293147563934326, "learning_rate": 3.1515656565656566e-06, "loss": 6.238486862182617, "step": 37600 }, { "epoch": 0.07505, "grad_norm": 9.78283405303955, "learning_rate": 3.1513131313131317e-06, "loss": 6.278440856933594, "step": 37605 }, { "epoch": 0.0751, "grad_norm": 4.490179061889648, "learning_rate": 3.1510606060606063e-06, "loss": 6.3188636779785154, "step": 37610 }, { "epoch": 0.07515, "grad_norm": 4.789431095123291, "learning_rate": 3.150808080808081e-06, "loss": 6.305368041992187, "step": 37615 }, { "epoch": 0.0752, "grad_norm": 6.3389506340026855, "learning_rate": 3.1505555555555556e-06, "loss": 6.246075057983399, "step": 37620 }, { "epoch": 0.07525, "grad_norm": 6.2651872634887695, "learning_rate": 3.1503030303030306e-06, "loss": 6.295712661743164, "step": 37625 }, { "epoch": 0.0753, "grad_norm": 8.027301788330078, "learning_rate": 3.1500505050505052e-06, "loss": 6.247629547119141, "step": 37630 }, { "epoch": 0.07535, "grad_norm": 6.50150728225708, "learning_rate": 3.14979797979798e-06, "loss": 6.3652301788330075, "step": 37635 }, { "epoch": 0.0754, "grad_norm": 21.40278434753418, "learning_rate": 3.1495454545454545e-06, "loss": 6.261239242553711, "step": 37640 }, { "epoch": 0.07545, "grad_norm": 9.251375198364258, "learning_rate": 3.1492929292929296e-06, "loss": 6.293183135986328, "step": 37645 }, { "epoch": 0.0755, "grad_norm": 6.331480979919434, "learning_rate": 3.149040404040404e-06, "loss": 6.278551864624023, "step": 37650 }, { "epoch": 0.07555, "grad_norm": 5.479065895080566, "learning_rate": 3.148787878787879e-06, "loss": 6.287373733520508, "step": 37655 }, { "epoch": 0.0756, "grad_norm": 5.326474189758301, "learning_rate": 3.1485353535353534e-06, "loss": 6.251686096191406, "step": 37660 }, { "epoch": 0.07565, "grad_norm": 9.605069160461426, "learning_rate": 3.148282828282829e-06, "loss": 6.343648910522461, "step": 37665 }, { "epoch": 0.0757, "grad_norm": 22.49188995361328, "learning_rate": 3.148030303030303e-06, "loss": 6.254428863525391, "step": 37670 }, { "epoch": 0.07575, "grad_norm": 10.511200904846191, "learning_rate": 3.1477777777777778e-06, "loss": 6.194791793823242, "step": 37675 }, { "epoch": 0.0758, "grad_norm": 4.87959623336792, "learning_rate": 3.1475252525252524e-06, "loss": 6.265406799316406, "step": 37680 }, { "epoch": 0.07585, "grad_norm": 8.232267379760742, "learning_rate": 3.147272727272728e-06, "loss": 6.285223007202148, "step": 37685 }, { "epoch": 0.0759, "grad_norm": 7.462063312530518, "learning_rate": 3.1470202020202025e-06, "loss": 6.29083366394043, "step": 37690 }, { "epoch": 0.07595, "grad_norm": 5.8634138107299805, "learning_rate": 3.146767676767677e-06, "loss": 6.248137283325195, "step": 37695 }, { "epoch": 0.076, "grad_norm": 3.9038517475128174, "learning_rate": 3.1465151515151513e-06, "loss": 6.256618499755859, "step": 37700 }, { "epoch": 0.07605, "grad_norm": 5.210391044616699, "learning_rate": 3.146262626262627e-06, "loss": 6.264854812622071, "step": 37705 }, { "epoch": 0.0761, "grad_norm": 3.6396894454956055, "learning_rate": 3.1460101010101014e-06, "loss": 6.233368301391602, "step": 37710 }, { "epoch": 0.07615, "grad_norm": 4.975776672363281, "learning_rate": 3.145757575757576e-06, "loss": 6.305987930297851, "step": 37715 }, { "epoch": 0.0762, "grad_norm": 5.216904640197754, "learning_rate": 3.1455050505050507e-06, "loss": 6.2475135803222654, "step": 37720 }, { "epoch": 0.07625, "grad_norm": 7.973182201385498, "learning_rate": 3.1452525252525258e-06, "loss": 6.4416358947753904, "step": 37725 }, { "epoch": 0.0763, "grad_norm": 9.773746490478516, "learning_rate": 3.1450000000000004e-06, "loss": 6.3156578063964846, "step": 37730 }, { "epoch": 0.07635, "grad_norm": 6.910429954528809, "learning_rate": 3.144747474747475e-06, "loss": 6.300268173217773, "step": 37735 }, { "epoch": 0.0764, "grad_norm": 5.29838752746582, "learning_rate": 3.1444949494949496e-06, "loss": 6.322866439819336, "step": 37740 }, { "epoch": 0.07645, "grad_norm": 7.595389366149902, "learning_rate": 3.1442424242424247e-06, "loss": 6.267404937744141, "step": 37745 }, { "epoch": 0.0765, "grad_norm": 12.519168853759766, "learning_rate": 3.1439898989898993e-06, "loss": 6.259209060668946, "step": 37750 }, { "epoch": 0.07655, "grad_norm": 5.085344314575195, "learning_rate": 3.143737373737374e-06, "loss": 6.275138473510742, "step": 37755 }, { "epoch": 0.0766, "grad_norm": 6.443953037261963, "learning_rate": 3.1434848484848486e-06, "loss": 6.362397766113281, "step": 37760 }, { "epoch": 0.07665, "grad_norm": 6.204866409301758, "learning_rate": 3.1432323232323236e-06, "loss": 6.2758323669433596, "step": 37765 }, { "epoch": 0.0767, "grad_norm": 4.416131973266602, "learning_rate": 3.1429797979797983e-06, "loss": 6.292702865600586, "step": 37770 }, { "epoch": 0.07675, "grad_norm": 10.329001426696777, "learning_rate": 3.142727272727273e-06, "loss": 6.231916046142578, "step": 37775 }, { "epoch": 0.0768, "grad_norm": 9.71338176727295, "learning_rate": 3.1424747474747475e-06, "loss": 6.232597351074219, "step": 37780 }, { "epoch": 0.07685, "grad_norm": 5.474919319152832, "learning_rate": 3.1422222222222226e-06, "loss": 6.298401641845703, "step": 37785 }, { "epoch": 0.0769, "grad_norm": 5.1926069259643555, "learning_rate": 3.1419696969696972e-06, "loss": 6.271241760253906, "step": 37790 }, { "epoch": 0.07695, "grad_norm": 4.849372863769531, "learning_rate": 3.141717171717172e-06, "loss": 6.249138641357422, "step": 37795 }, { "epoch": 0.077, "grad_norm": 8.559463500976562, "learning_rate": 3.141464646464647e-06, "loss": 6.267048645019531, "step": 37800 }, { "epoch": 0.07705, "grad_norm": 6.0786871910095215, "learning_rate": 3.1412121212121215e-06, "loss": 6.272051620483398, "step": 37805 }, { "epoch": 0.0771, "grad_norm": 5.962799072265625, "learning_rate": 3.140959595959596e-06, "loss": 6.279791259765625, "step": 37810 }, { "epoch": 0.07715, "grad_norm": 4.927727699279785, "learning_rate": 3.1407070707070708e-06, "loss": 6.306644439697266, "step": 37815 }, { "epoch": 0.0772, "grad_norm": 7.282703399658203, "learning_rate": 3.140454545454546e-06, "loss": 6.297599029541016, "step": 37820 }, { "epoch": 0.07725, "grad_norm": 7.05251407623291, "learning_rate": 3.1402020202020205e-06, "loss": 6.260937881469727, "step": 37825 }, { "epoch": 0.0773, "grad_norm": 7.874249458312988, "learning_rate": 3.139949494949495e-06, "loss": 6.336312103271484, "step": 37830 }, { "epoch": 0.07735, "grad_norm": 4.791041851043701, "learning_rate": 3.1396969696969697e-06, "loss": 6.289236831665039, "step": 37835 }, { "epoch": 0.0774, "grad_norm": 5.794831275939941, "learning_rate": 3.1394444444444448e-06, "loss": 6.264236068725586, "step": 37840 }, { "epoch": 0.07745, "grad_norm": 7.5512919425964355, "learning_rate": 3.1391919191919194e-06, "loss": 6.257565307617187, "step": 37845 }, { "epoch": 0.0775, "grad_norm": 3.9747865200042725, "learning_rate": 3.138939393939394e-06, "loss": 6.244243240356445, "step": 37850 }, { "epoch": 0.07755, "grad_norm": 5.6916937828063965, "learning_rate": 3.1386868686868687e-06, "loss": 6.2781532287597654, "step": 37855 }, { "epoch": 0.0776, "grad_norm": 5.586228370666504, "learning_rate": 3.138434343434344e-06, "loss": 6.287054443359375, "step": 37860 }, { "epoch": 0.07765, "grad_norm": 7.9753804206848145, "learning_rate": 3.1381818181818184e-06, "loss": 6.312598419189453, "step": 37865 }, { "epoch": 0.0777, "grad_norm": 8.323701858520508, "learning_rate": 3.137929292929293e-06, "loss": 6.495091247558594, "step": 37870 }, { "epoch": 0.07775, "grad_norm": 6.556832313537598, "learning_rate": 3.1376767676767676e-06, "loss": 6.291112899780273, "step": 37875 }, { "epoch": 0.0778, "grad_norm": 7.5770344734191895, "learning_rate": 3.137424242424243e-06, "loss": 6.2511859893798825, "step": 37880 }, { "epoch": 0.07785, "grad_norm": 4.048890113830566, "learning_rate": 3.1371717171717177e-06, "loss": 6.2856182098388675, "step": 37885 }, { "epoch": 0.0779, "grad_norm": 5.7942328453063965, "learning_rate": 3.136919191919192e-06, "loss": 6.352783584594727, "step": 37890 }, { "epoch": 0.07795, "grad_norm": 7.529387474060059, "learning_rate": 3.1366666666666666e-06, "loss": 6.276668548583984, "step": 37895 }, { "epoch": 0.078, "grad_norm": 7.28542947769165, "learning_rate": 3.136414141414142e-06, "loss": 6.262855148315429, "step": 37900 }, { "epoch": 0.07805, "grad_norm": 7.1851701736450195, "learning_rate": 3.1361616161616167e-06, "loss": 6.2955772399902346, "step": 37905 }, { "epoch": 0.0781, "grad_norm": 6.73874568939209, "learning_rate": 3.1359090909090913e-06, "loss": 6.408564758300781, "step": 37910 }, { "epoch": 0.07815, "grad_norm": 5.36816930770874, "learning_rate": 3.135656565656566e-06, "loss": 6.216756439208984, "step": 37915 }, { "epoch": 0.0782, "grad_norm": 4.6218061447143555, "learning_rate": 3.135404040404041e-06, "loss": 6.2722328186035154, "step": 37920 }, { "epoch": 0.07825, "grad_norm": 4.058448314666748, "learning_rate": 3.1351515151515156e-06, "loss": 6.330149459838867, "step": 37925 }, { "epoch": 0.0783, "grad_norm": 6.136046409606934, "learning_rate": 3.1348989898989902e-06, "loss": 6.250738143920898, "step": 37930 }, { "epoch": 0.07835, "grad_norm": 6.183536052703857, "learning_rate": 3.134646464646465e-06, "loss": 6.322180938720703, "step": 37935 }, { "epoch": 0.0784, "grad_norm": 11.015217781066895, "learning_rate": 3.13439393939394e-06, "loss": 6.307817840576172, "step": 37940 }, { "epoch": 0.07845, "grad_norm": 9.74636173248291, "learning_rate": 3.1341414141414146e-06, "loss": 6.284945678710938, "step": 37945 }, { "epoch": 0.0785, "grad_norm": 9.6013765335083, "learning_rate": 3.133888888888889e-06, "loss": 6.328181457519531, "step": 37950 }, { "epoch": 0.07855, "grad_norm": 5.484803199768066, "learning_rate": 3.133636363636364e-06, "loss": 6.2290397644042965, "step": 37955 }, { "epoch": 0.0786, "grad_norm": 11.232320785522461, "learning_rate": 3.133383838383839e-06, "loss": 6.249345016479492, "step": 37960 }, { "epoch": 0.07865, "grad_norm": 4.730000972747803, "learning_rate": 3.1331313131313135e-06, "loss": 6.263342666625976, "step": 37965 }, { "epoch": 0.0787, "grad_norm": 5.246264457702637, "learning_rate": 3.132878787878788e-06, "loss": 6.306507873535156, "step": 37970 }, { "epoch": 0.07875, "grad_norm": 10.145404815673828, "learning_rate": 3.1326262626262628e-06, "loss": 6.300379943847656, "step": 37975 }, { "epoch": 0.0788, "grad_norm": 12.847841262817383, "learning_rate": 3.132373737373738e-06, "loss": 6.309225463867188, "step": 37980 }, { "epoch": 0.07885, "grad_norm": 7.377182483673096, "learning_rate": 3.1321212121212124e-06, "loss": 6.291133117675781, "step": 37985 }, { "epoch": 0.0789, "grad_norm": 6.201826095581055, "learning_rate": 3.131868686868687e-06, "loss": 6.282328033447266, "step": 37990 }, { "epoch": 0.07895, "grad_norm": 5.19812536239624, "learning_rate": 3.1316161616161617e-06, "loss": 6.306790542602539, "step": 37995 }, { "epoch": 0.079, "grad_norm": 5.159358024597168, "learning_rate": 3.1313636363636368e-06, "loss": 6.296622085571289, "step": 38000 }, { "epoch": 0.07905, "grad_norm": 7.368554592132568, "learning_rate": 3.1311111111111114e-06, "loss": 6.213531875610352, "step": 38005 }, { "epoch": 0.0791, "grad_norm": 5.449375629425049, "learning_rate": 3.130858585858586e-06, "loss": 6.247622680664063, "step": 38010 }, { "epoch": 0.07915, "grad_norm": 4.26023530960083, "learning_rate": 3.1306060606060607e-06, "loss": 6.242337036132812, "step": 38015 }, { "epoch": 0.0792, "grad_norm": 7.02564811706543, "learning_rate": 3.1303535353535357e-06, "loss": 6.290899276733398, "step": 38020 }, { "epoch": 0.07925, "grad_norm": 7.821689605712891, "learning_rate": 3.1301010101010103e-06, "loss": 6.326396942138672, "step": 38025 }, { "epoch": 0.0793, "grad_norm": 6.081252098083496, "learning_rate": 3.129848484848485e-06, "loss": 6.23885498046875, "step": 38030 }, { "epoch": 0.07935, "grad_norm": 25.332094192504883, "learning_rate": 3.1295959595959596e-06, "loss": 6.446949005126953, "step": 38035 }, { "epoch": 0.0794, "grad_norm": 10.121051788330078, "learning_rate": 3.1293434343434346e-06, "loss": 6.407130432128906, "step": 38040 }, { "epoch": 0.07945, "grad_norm": 9.777853012084961, "learning_rate": 3.1290909090909093e-06, "loss": 6.286971664428711, "step": 38045 }, { "epoch": 0.0795, "grad_norm": 5.213418960571289, "learning_rate": 3.128838383838384e-06, "loss": 6.291870880126953, "step": 38050 }, { "epoch": 0.07955, "grad_norm": 8.472552299499512, "learning_rate": 3.1285858585858585e-06, "loss": 6.212425231933594, "step": 38055 }, { "epoch": 0.0796, "grad_norm": 5.330084323883057, "learning_rate": 3.1283333333333336e-06, "loss": 6.274630355834961, "step": 38060 }, { "epoch": 0.07965, "grad_norm": 5.195708751678467, "learning_rate": 3.1280808080808082e-06, "loss": 6.281270217895508, "step": 38065 }, { "epoch": 0.0797, "grad_norm": 7.76674747467041, "learning_rate": 3.127828282828283e-06, "loss": 6.319314956665039, "step": 38070 }, { "epoch": 0.07975, "grad_norm": 5.559945106506348, "learning_rate": 3.1275757575757575e-06, "loss": 6.266827392578125, "step": 38075 }, { "epoch": 0.0798, "grad_norm": 8.864984512329102, "learning_rate": 3.127323232323233e-06, "loss": 6.265235519409179, "step": 38080 }, { "epoch": 0.07985, "grad_norm": 8.006175994873047, "learning_rate": 3.127070707070707e-06, "loss": 6.271083831787109, "step": 38085 }, { "epoch": 0.0799, "grad_norm": 6.374979019165039, "learning_rate": 3.126818181818182e-06, "loss": 6.296521759033203, "step": 38090 }, { "epoch": 0.07995, "grad_norm": 7.894449710845947, "learning_rate": 3.1265656565656564e-06, "loss": 6.265091323852539, "step": 38095 }, { "epoch": 0.08, "grad_norm": 6.649897575378418, "learning_rate": 3.126313131313132e-06, "loss": 6.3252307891845705, "step": 38100 }, { "epoch": 0.08005, "grad_norm": 12.150156021118164, "learning_rate": 3.1260606060606065e-06, "loss": 6.216816329956055, "step": 38105 }, { "epoch": 0.0801, "grad_norm": 5.530274868011475, "learning_rate": 3.125808080808081e-06, "loss": 6.302157974243164, "step": 38110 }, { "epoch": 0.08015, "grad_norm": 6.452079772949219, "learning_rate": 3.1255555555555554e-06, "loss": 6.252336120605468, "step": 38115 }, { "epoch": 0.0802, "grad_norm": 7.400596618652344, "learning_rate": 3.125303030303031e-06, "loss": 6.232899475097656, "step": 38120 }, { "epoch": 0.08025, "grad_norm": 4.144826889038086, "learning_rate": 3.1250505050505055e-06, "loss": 6.335660934448242, "step": 38125 }, { "epoch": 0.0803, "grad_norm": 9.90748119354248, "learning_rate": 3.12479797979798e-06, "loss": 6.2277984619140625, "step": 38130 }, { "epoch": 0.08035, "grad_norm": 9.47167682647705, "learning_rate": 3.1245454545454547e-06, "loss": 6.259332275390625, "step": 38135 }, { "epoch": 0.0804, "grad_norm": 12.266392707824707, "learning_rate": 3.12429292929293e-06, "loss": 6.472328948974609, "step": 38140 }, { "epoch": 0.08045, "grad_norm": 6.009767055511475, "learning_rate": 3.1240404040404044e-06, "loss": 6.274670028686524, "step": 38145 }, { "epoch": 0.0805, "grad_norm": 9.90152359008789, "learning_rate": 3.123787878787879e-06, "loss": 6.241968154907227, "step": 38150 }, { "epoch": 0.08055, "grad_norm": 6.186840534210205, "learning_rate": 3.1235353535353537e-06, "loss": 6.290187835693359, "step": 38155 }, { "epoch": 0.0806, "grad_norm": 6.030425071716309, "learning_rate": 3.1232828282828287e-06, "loss": 6.31548843383789, "step": 38160 }, { "epoch": 0.08065, "grad_norm": 4.543988227844238, "learning_rate": 3.1230303030303034e-06, "loss": 6.25293197631836, "step": 38165 }, { "epoch": 0.0807, "grad_norm": 4.740832328796387, "learning_rate": 3.122777777777778e-06, "loss": 6.257433319091797, "step": 38170 }, { "epoch": 0.08075, "grad_norm": 5.875535488128662, "learning_rate": 3.1225252525252526e-06, "loss": 6.224006652832031, "step": 38175 }, { "epoch": 0.0808, "grad_norm": 7.897028923034668, "learning_rate": 3.1222727272727277e-06, "loss": 6.256528854370117, "step": 38180 }, { "epoch": 0.08085, "grad_norm": 4.592010498046875, "learning_rate": 3.1220202020202023e-06, "loss": 6.286135864257813, "step": 38185 }, { "epoch": 0.0809, "grad_norm": 4.69254732131958, "learning_rate": 3.121767676767677e-06, "loss": 6.269754028320312, "step": 38190 }, { "epoch": 0.08095, "grad_norm": 6.738150119781494, "learning_rate": 3.1215151515151516e-06, "loss": 6.326813507080078, "step": 38195 }, { "epoch": 0.081, "grad_norm": 6.699409484863281, "learning_rate": 3.1212626262626266e-06, "loss": 6.278187942504883, "step": 38200 }, { "epoch": 0.08105, "grad_norm": 6.850310802459717, "learning_rate": 3.1210101010101013e-06, "loss": 6.258307266235351, "step": 38205 }, { "epoch": 0.0811, "grad_norm": 7.496087551116943, "learning_rate": 3.120757575757576e-06, "loss": 6.270426177978516, "step": 38210 }, { "epoch": 0.08115, "grad_norm": 9.454720497131348, "learning_rate": 3.120505050505051e-06, "loss": 6.245521545410156, "step": 38215 }, { "epoch": 0.0812, "grad_norm": 8.495699882507324, "learning_rate": 3.1202525252525256e-06, "loss": 6.244895935058594, "step": 38220 }, { "epoch": 0.08125, "grad_norm": 4.742880344390869, "learning_rate": 3.12e-06, "loss": 6.29229736328125, "step": 38225 }, { "epoch": 0.0813, "grad_norm": 12.77479362487793, "learning_rate": 3.119747474747475e-06, "loss": 6.343697357177734, "step": 38230 }, { "epoch": 0.08135, "grad_norm": 6.2325439453125, "learning_rate": 3.11949494949495e-06, "loss": 6.260408020019531, "step": 38235 }, { "epoch": 0.0814, "grad_norm": 6.783320426940918, "learning_rate": 3.1192424242424245e-06, "loss": 6.369620895385742, "step": 38240 }, { "epoch": 0.08145, "grad_norm": 5.042397499084473, "learning_rate": 3.118989898989899e-06, "loss": 6.322723388671875, "step": 38245 }, { "epoch": 0.0815, "grad_norm": 5.7038187980651855, "learning_rate": 3.1187373737373738e-06, "loss": 6.316136169433594, "step": 38250 }, { "epoch": 0.08155, "grad_norm": 7.594776153564453, "learning_rate": 3.118484848484849e-06, "loss": 6.297699737548828, "step": 38255 }, { "epoch": 0.0816, "grad_norm": 5.438727378845215, "learning_rate": 3.1182323232323235e-06, "loss": 6.302629089355468, "step": 38260 }, { "epoch": 0.08165, "grad_norm": 5.920957088470459, "learning_rate": 3.117979797979798e-06, "loss": 6.289142990112305, "step": 38265 }, { "epoch": 0.0817, "grad_norm": 9.796358108520508, "learning_rate": 3.1177272727272727e-06, "loss": 6.272397994995117, "step": 38270 }, { "epoch": 0.08175, "grad_norm": 9.278793334960938, "learning_rate": 3.117474747474748e-06, "loss": 6.2550395965576175, "step": 38275 }, { "epoch": 0.0818, "grad_norm": 6.296907901763916, "learning_rate": 3.1172222222222224e-06, "loss": 6.267685317993164, "step": 38280 }, { "epoch": 0.08185, "grad_norm": 6.79340124130249, "learning_rate": 3.116969696969697e-06, "loss": 6.360378265380859, "step": 38285 }, { "epoch": 0.0819, "grad_norm": 5.796512603759766, "learning_rate": 3.1167171717171717e-06, "loss": 6.249359893798828, "step": 38290 }, { "epoch": 0.08195, "grad_norm": 4.507469177246094, "learning_rate": 3.116464646464647e-06, "loss": 6.229007720947266, "step": 38295 }, { "epoch": 0.082, "grad_norm": 32.089622497558594, "learning_rate": 3.1162121212121218e-06, "loss": 6.084786987304687, "step": 38300 }, { "epoch": 0.08205, "grad_norm": 4.994156837463379, "learning_rate": 3.115959595959596e-06, "loss": 6.25440559387207, "step": 38305 }, { "epoch": 0.0821, "grad_norm": 11.566097259521484, "learning_rate": 3.1157070707070706e-06, "loss": 6.288254165649414, "step": 38310 }, { "epoch": 0.08215, "grad_norm": 6.119917392730713, "learning_rate": 3.115454545454546e-06, "loss": 6.268838500976562, "step": 38315 }, { "epoch": 0.0822, "grad_norm": 8.248771667480469, "learning_rate": 3.1152020202020207e-06, "loss": 6.2896068572998045, "step": 38320 }, { "epoch": 0.08225, "grad_norm": 24.355417251586914, "learning_rate": 3.1149494949494953e-06, "loss": 6.611558532714843, "step": 38325 }, { "epoch": 0.0823, "grad_norm": 6.421599864959717, "learning_rate": 3.11469696969697e-06, "loss": 6.237408828735352, "step": 38330 }, { "epoch": 0.08235, "grad_norm": 5.432710647583008, "learning_rate": 3.114444444444445e-06, "loss": 6.282366180419922, "step": 38335 }, { "epoch": 0.0824, "grad_norm": 3.114713430404663, "learning_rate": 3.1141919191919197e-06, "loss": 6.289771652221679, "step": 38340 }, { "epoch": 0.08245, "grad_norm": 12.694079399108887, "learning_rate": 3.1139393939393943e-06, "loss": 6.316346740722656, "step": 38345 }, { "epoch": 0.0825, "grad_norm": 10.0151948928833, "learning_rate": 3.113686868686869e-06, "loss": 6.2440673828125, "step": 38350 }, { "epoch": 0.08255, "grad_norm": 8.860899925231934, "learning_rate": 3.113434343434344e-06, "loss": 6.213772201538086, "step": 38355 }, { "epoch": 0.0826, "grad_norm": 4.834271430969238, "learning_rate": 3.1131818181818186e-06, "loss": 6.264373016357422, "step": 38360 }, { "epoch": 0.08265, "grad_norm": 6.366523265838623, "learning_rate": 3.1129292929292932e-06, "loss": 6.247134780883789, "step": 38365 }, { "epoch": 0.0827, "grad_norm": 4.52449893951416, "learning_rate": 3.112676767676768e-06, "loss": 6.275959777832031, "step": 38370 }, { "epoch": 0.08275, "grad_norm": 6.869139194488525, "learning_rate": 3.112424242424243e-06, "loss": 6.294654846191406, "step": 38375 }, { "epoch": 0.0828, "grad_norm": 8.28975772857666, "learning_rate": 3.1121717171717175e-06, "loss": 6.266781616210937, "step": 38380 }, { "epoch": 0.08285, "grad_norm": 6.613365650177002, "learning_rate": 3.111919191919192e-06, "loss": 6.322153472900391, "step": 38385 }, { "epoch": 0.0829, "grad_norm": 5.3877973556518555, "learning_rate": 3.111666666666667e-06, "loss": 6.365744018554688, "step": 38390 }, { "epoch": 0.08295, "grad_norm": 9.445686340332031, "learning_rate": 3.111414141414142e-06, "loss": 6.27295150756836, "step": 38395 }, { "epoch": 0.083, "grad_norm": 16.45443344116211, "learning_rate": 3.1111616161616165e-06, "loss": 6.323920822143554, "step": 38400 }, { "epoch": 0.08305, "grad_norm": 5.735808849334717, "learning_rate": 3.110909090909091e-06, "loss": 6.318212509155273, "step": 38405 }, { "epoch": 0.0831, "grad_norm": 8.596446990966797, "learning_rate": 3.1106565656565657e-06, "loss": 6.352141189575195, "step": 38410 }, { "epoch": 0.08315, "grad_norm": 6.640438079833984, "learning_rate": 3.110404040404041e-06, "loss": 6.252841186523438, "step": 38415 }, { "epoch": 0.0832, "grad_norm": 8.869839668273926, "learning_rate": 3.1101515151515154e-06, "loss": 6.227565002441406, "step": 38420 }, { "epoch": 0.08325, "grad_norm": 5.04594612121582, "learning_rate": 3.10989898989899e-06, "loss": 6.251329803466797, "step": 38425 }, { "epoch": 0.0833, "grad_norm": 6.005542755126953, "learning_rate": 3.1096464646464647e-06, "loss": 6.332159423828125, "step": 38430 }, { "epoch": 0.08335, "grad_norm": 5.255429267883301, "learning_rate": 3.1093939393939397e-06, "loss": 6.239656829833985, "step": 38435 }, { "epoch": 0.0834, "grad_norm": 5.029537677764893, "learning_rate": 3.1091414141414144e-06, "loss": 6.285135269165039, "step": 38440 }, { "epoch": 0.08345, "grad_norm": 5.088849067687988, "learning_rate": 3.108888888888889e-06, "loss": 6.297447967529297, "step": 38445 }, { "epoch": 0.0835, "grad_norm": 8.095579147338867, "learning_rate": 3.1086363636363636e-06, "loss": 6.257388305664063, "step": 38450 }, { "epoch": 0.08355, "grad_norm": 4.755861282348633, "learning_rate": 3.1083838383838387e-06, "loss": 6.229459381103515, "step": 38455 }, { "epoch": 0.0836, "grad_norm": 5.379202365875244, "learning_rate": 3.1081313131313133e-06, "loss": 6.3937122344970705, "step": 38460 }, { "epoch": 0.08365, "grad_norm": 6.659433364868164, "learning_rate": 3.107878787878788e-06, "loss": 6.274997711181641, "step": 38465 }, { "epoch": 0.0837, "grad_norm": 21.967180252075195, "learning_rate": 3.1076262626262626e-06, "loss": 6.317076873779297, "step": 38470 }, { "epoch": 0.08375, "grad_norm": 20.914159774780273, "learning_rate": 3.1073737373737376e-06, "loss": 6.447844696044922, "step": 38475 }, { "epoch": 0.0838, "grad_norm": 6.303069591522217, "learning_rate": 3.1071212121212123e-06, "loss": 6.340871810913086, "step": 38480 }, { "epoch": 0.08385, "grad_norm": 7.712333679199219, "learning_rate": 3.106868686868687e-06, "loss": 6.445771026611328, "step": 38485 }, { "epoch": 0.0839, "grad_norm": 5.1667094230651855, "learning_rate": 3.1066161616161615e-06, "loss": 6.39392204284668, "step": 38490 }, { "epoch": 0.08395, "grad_norm": 13.273898124694824, "learning_rate": 3.106363636363637e-06, "loss": 6.268840789794922, "step": 38495 }, { "epoch": 0.084, "grad_norm": 4.269434452056885, "learning_rate": 3.106111111111111e-06, "loss": 6.24295654296875, "step": 38500 }, { "epoch": 0.08405, "grad_norm": 9.553909301757812, "learning_rate": 3.105858585858586e-06, "loss": 6.274750137329102, "step": 38505 }, { "epoch": 0.0841, "grad_norm": 5.705601215362549, "learning_rate": 3.1056060606060605e-06, "loss": 6.256413269042969, "step": 38510 }, { "epoch": 0.08415, "grad_norm": 4.697645664215088, "learning_rate": 3.105353535353536e-06, "loss": 6.315234375, "step": 38515 }, { "epoch": 0.0842, "grad_norm": 6.365569591522217, "learning_rate": 3.1051010101010106e-06, "loss": 6.23462142944336, "step": 38520 }, { "epoch": 0.08425, "grad_norm": 5.4955902099609375, "learning_rate": 3.104848484848485e-06, "loss": 6.36603012084961, "step": 38525 }, { "epoch": 0.0843, "grad_norm": 6.127120494842529, "learning_rate": 3.1045959595959594e-06, "loss": 6.283335876464844, "step": 38530 }, { "epoch": 0.08435, "grad_norm": 4.127853870391846, "learning_rate": 3.104343434343435e-06, "loss": 6.240867996215821, "step": 38535 }, { "epoch": 0.0844, "grad_norm": 7.809811592102051, "learning_rate": 3.1040909090909095e-06, "loss": 6.379122924804688, "step": 38540 }, { "epoch": 0.08445, "grad_norm": 4.21857213973999, "learning_rate": 3.103838383838384e-06, "loss": 6.361758041381836, "step": 38545 }, { "epoch": 0.0845, "grad_norm": 11.523987770080566, "learning_rate": 3.1035858585858588e-06, "loss": 6.29186782836914, "step": 38550 }, { "epoch": 0.08455, "grad_norm": 3.892789125442505, "learning_rate": 3.103333333333334e-06, "loss": 6.240115737915039, "step": 38555 }, { "epoch": 0.0846, "grad_norm": 3.925283432006836, "learning_rate": 3.1030808080808085e-06, "loss": 6.2227531433105465, "step": 38560 }, { "epoch": 0.08465, "grad_norm": 9.834415435791016, "learning_rate": 3.102828282828283e-06, "loss": 6.280453491210937, "step": 38565 }, { "epoch": 0.0847, "grad_norm": 22.574993133544922, "learning_rate": 3.1025757575757577e-06, "loss": 6.526770782470703, "step": 38570 }, { "epoch": 0.08475, "grad_norm": 4.130259990692139, "learning_rate": 3.1023232323232328e-06, "loss": 6.308740615844727, "step": 38575 }, { "epoch": 0.0848, "grad_norm": 7.440810203552246, "learning_rate": 3.1020707070707074e-06, "loss": 6.5507057189941404, "step": 38580 }, { "epoch": 0.08485, "grad_norm": 4.74627685546875, "learning_rate": 3.101818181818182e-06, "loss": 6.274727249145508, "step": 38585 }, { "epoch": 0.0849, "grad_norm": 7.789112091064453, "learning_rate": 3.1015656565656567e-06, "loss": 6.322026062011719, "step": 38590 }, { "epoch": 0.08495, "grad_norm": 6.981146812438965, "learning_rate": 3.1013131313131317e-06, "loss": 6.301002502441406, "step": 38595 }, { "epoch": 0.085, "grad_norm": 6.16749382019043, "learning_rate": 3.1010606060606063e-06, "loss": 6.298861694335938, "step": 38600 }, { "epoch": 0.08505, "grad_norm": 5.422140121459961, "learning_rate": 3.100808080808081e-06, "loss": 6.271360778808594, "step": 38605 }, { "epoch": 0.0851, "grad_norm": 3.5803310871124268, "learning_rate": 3.1005555555555556e-06, "loss": 6.217414093017578, "step": 38610 }, { "epoch": 0.08515, "grad_norm": 13.9075927734375, "learning_rate": 3.1003030303030307e-06, "loss": 6.2455299377441404, "step": 38615 }, { "epoch": 0.0852, "grad_norm": 5.465734004974365, "learning_rate": 3.1000505050505053e-06, "loss": 6.250377655029297, "step": 38620 }, { "epoch": 0.08525, "grad_norm": 7.684573173522949, "learning_rate": 3.09979797979798e-06, "loss": 6.3069709777832035, "step": 38625 }, { "epoch": 0.0853, "grad_norm": 4.270673751831055, "learning_rate": 3.0995454545454546e-06, "loss": 6.265089797973633, "step": 38630 }, { "epoch": 0.08535, "grad_norm": 4.157352924346924, "learning_rate": 3.0992929292929296e-06, "loss": 6.28088264465332, "step": 38635 }, { "epoch": 0.0854, "grad_norm": 9.034980773925781, "learning_rate": 3.0990404040404042e-06, "loss": 6.279878234863281, "step": 38640 }, { "epoch": 0.08545, "grad_norm": 5.35481595993042, "learning_rate": 3.098787878787879e-06, "loss": 6.250664520263672, "step": 38645 }, { "epoch": 0.0855, "grad_norm": 4.449819087982178, "learning_rate": 3.098535353535354e-06, "loss": 6.27763557434082, "step": 38650 }, { "epoch": 0.08555, "grad_norm": 5.5076470375061035, "learning_rate": 3.0982828282828286e-06, "loss": 6.272834014892578, "step": 38655 }, { "epoch": 0.0856, "grad_norm": 5.944217681884766, "learning_rate": 3.098030303030303e-06, "loss": 6.287925720214844, "step": 38660 }, { "epoch": 0.08565, "grad_norm": 4.606578826904297, "learning_rate": 3.097777777777778e-06, "loss": 6.276762771606445, "step": 38665 }, { "epoch": 0.0857, "grad_norm": 7.794736862182617, "learning_rate": 3.097525252525253e-06, "loss": 6.278011322021484, "step": 38670 }, { "epoch": 0.08575, "grad_norm": 7.580515384674072, "learning_rate": 3.0972727272727275e-06, "loss": 6.256056594848633, "step": 38675 }, { "epoch": 0.0858, "grad_norm": 3.8276755809783936, "learning_rate": 3.097020202020202e-06, "loss": 6.295049667358398, "step": 38680 }, { "epoch": 0.08585, "grad_norm": 5.851880073547363, "learning_rate": 3.0967676767676768e-06, "loss": 6.384275817871094, "step": 38685 }, { "epoch": 0.0859, "grad_norm": 7.7033467292785645, "learning_rate": 3.0965151515151522e-06, "loss": 6.24910888671875, "step": 38690 }, { "epoch": 0.08595, "grad_norm": 7.349149227142334, "learning_rate": 3.0962626262626264e-06, "loss": 6.327001571655273, "step": 38695 }, { "epoch": 0.086, "grad_norm": 4.155896186828613, "learning_rate": 3.096010101010101e-06, "loss": 6.262163543701172, "step": 38700 }, { "epoch": 0.08605, "grad_norm": 4.982694625854492, "learning_rate": 3.0957575757575757e-06, "loss": 6.281441497802734, "step": 38705 }, { "epoch": 0.0861, "grad_norm": 7.823703765869141, "learning_rate": 3.095505050505051e-06, "loss": 6.326067733764648, "step": 38710 }, { "epoch": 0.08615, "grad_norm": 4.599400043487549, "learning_rate": 3.095252525252526e-06, "loss": 6.296357727050781, "step": 38715 }, { "epoch": 0.0862, "grad_norm": 7.1658759117126465, "learning_rate": 3.0950000000000004e-06, "loss": 6.303975677490234, "step": 38720 }, { "epoch": 0.08625, "grad_norm": 6.576052188873291, "learning_rate": 3.0947474747474746e-06, "loss": 6.27567138671875, "step": 38725 }, { "epoch": 0.0863, "grad_norm": 9.860788345336914, "learning_rate": 3.09449494949495e-06, "loss": 6.273751449584961, "step": 38730 }, { "epoch": 0.08635, "grad_norm": 6.754668712615967, "learning_rate": 3.0942424242424248e-06, "loss": 6.270984268188476, "step": 38735 }, { "epoch": 0.0864, "grad_norm": 5.392854690551758, "learning_rate": 3.0939898989898994e-06, "loss": 6.261109924316406, "step": 38740 }, { "epoch": 0.08645, "grad_norm": 6.744201183319092, "learning_rate": 3.093737373737374e-06, "loss": 6.315390777587891, "step": 38745 }, { "epoch": 0.0865, "grad_norm": 7.062400817871094, "learning_rate": 3.093484848484849e-06, "loss": 6.2728118896484375, "step": 38750 }, { "epoch": 0.08655, "grad_norm": 3.688746929168701, "learning_rate": 3.0932323232323237e-06, "loss": 6.273785781860352, "step": 38755 }, { "epoch": 0.0866, "grad_norm": 7.056690692901611, "learning_rate": 3.0929797979797983e-06, "loss": 6.350138473510742, "step": 38760 }, { "epoch": 0.08665, "grad_norm": 11.089237213134766, "learning_rate": 3.092727272727273e-06, "loss": 6.25617561340332, "step": 38765 }, { "epoch": 0.0867, "grad_norm": 6.049311637878418, "learning_rate": 3.092474747474748e-06, "loss": 6.244958114624024, "step": 38770 }, { "epoch": 0.08675, "grad_norm": 7.738856792449951, "learning_rate": 3.0922222222222226e-06, "loss": 6.369571685791016, "step": 38775 }, { "epoch": 0.0868, "grad_norm": 4.9709978103637695, "learning_rate": 3.0919696969696973e-06, "loss": 6.412933349609375, "step": 38780 }, { "epoch": 0.08685, "grad_norm": 6.475958824157715, "learning_rate": 3.091717171717172e-06, "loss": 6.295897674560547, "step": 38785 }, { "epoch": 0.0869, "grad_norm": 8.786493301391602, "learning_rate": 3.091464646464647e-06, "loss": 6.2661182403564455, "step": 38790 }, { "epoch": 0.08695, "grad_norm": 5.423460960388184, "learning_rate": 3.0912121212121216e-06, "loss": 6.189646911621094, "step": 38795 }, { "epoch": 0.087, "grad_norm": 10.83787727355957, "learning_rate": 3.0909595959595962e-06, "loss": 6.316064071655274, "step": 38800 }, { "epoch": 0.08705, "grad_norm": 6.0402607917785645, "learning_rate": 3.090707070707071e-06, "loss": 6.335948944091797, "step": 38805 }, { "epoch": 0.0871, "grad_norm": 6.209439754486084, "learning_rate": 3.090454545454546e-06, "loss": 6.241990661621093, "step": 38810 }, { "epoch": 0.08715, "grad_norm": 6.056130886077881, "learning_rate": 3.0902020202020205e-06, "loss": 6.2697898864746096, "step": 38815 }, { "epoch": 0.0872, "grad_norm": 9.67004680633545, "learning_rate": 3.089949494949495e-06, "loss": 6.335889053344727, "step": 38820 }, { "epoch": 0.08725, "grad_norm": 6.222818374633789, "learning_rate": 3.0896969696969698e-06, "loss": 6.267982482910156, "step": 38825 }, { "epoch": 0.0873, "grad_norm": 6.633770942687988, "learning_rate": 3.089444444444445e-06, "loss": 6.26705322265625, "step": 38830 }, { "epoch": 0.08735, "grad_norm": 7.350826263427734, "learning_rate": 3.0891919191919195e-06, "loss": 6.335404968261718, "step": 38835 }, { "epoch": 0.0874, "grad_norm": 6.253747463226318, "learning_rate": 3.088939393939394e-06, "loss": 6.262545394897461, "step": 38840 }, { "epoch": 0.08745, "grad_norm": 6.171756744384766, "learning_rate": 3.0886868686868687e-06, "loss": 6.224066543579101, "step": 38845 }, { "epoch": 0.0875, "grad_norm": 16.884836196899414, "learning_rate": 3.0884343434343438e-06, "loss": 5.874584197998047, "step": 38850 }, { "epoch": 0.08755, "grad_norm": 3.4429922103881836, "learning_rate": 3.0881818181818184e-06, "loss": 6.290198135375976, "step": 38855 }, { "epoch": 0.0876, "grad_norm": 6.999137878417969, "learning_rate": 3.087929292929293e-06, "loss": 6.275766372680664, "step": 38860 }, { "epoch": 0.08765, "grad_norm": 6.932957649230957, "learning_rate": 3.0876767676767677e-06, "loss": 6.262466049194336, "step": 38865 }, { "epoch": 0.0877, "grad_norm": 5.11722469329834, "learning_rate": 3.0874242424242427e-06, "loss": 6.259531021118164, "step": 38870 }, { "epoch": 0.08775, "grad_norm": 5.096556186676025, "learning_rate": 3.0871717171717174e-06, "loss": 6.268210601806641, "step": 38875 }, { "epoch": 0.0878, "grad_norm": 3.902292490005493, "learning_rate": 3.086919191919192e-06, "loss": 6.269290924072266, "step": 38880 }, { "epoch": 0.08785, "grad_norm": 5.557623386383057, "learning_rate": 3.0866666666666666e-06, "loss": 6.318363952636719, "step": 38885 }, { "epoch": 0.0879, "grad_norm": 13.312966346740723, "learning_rate": 3.0864141414141417e-06, "loss": 6.258135986328125, "step": 38890 }, { "epoch": 0.08795, "grad_norm": 9.732216835021973, "learning_rate": 3.0861616161616163e-06, "loss": 6.296059417724609, "step": 38895 }, { "epoch": 0.088, "grad_norm": 8.769017219543457, "learning_rate": 3.085909090909091e-06, "loss": 6.248266220092773, "step": 38900 }, { "epoch": 0.08805, "grad_norm": 4.784547805786133, "learning_rate": 3.0856565656565656e-06, "loss": 6.2185009002685545, "step": 38905 }, { "epoch": 0.0881, "grad_norm": 10.070372581481934, "learning_rate": 3.085404040404041e-06, "loss": 6.308760070800782, "step": 38910 }, { "epoch": 0.08815, "grad_norm": 12.993553161621094, "learning_rate": 3.0851515151515152e-06, "loss": 6.315010452270508, "step": 38915 }, { "epoch": 0.0882, "grad_norm": 9.855135917663574, "learning_rate": 3.08489898989899e-06, "loss": 6.5001678466796875, "step": 38920 }, { "epoch": 0.08825, "grad_norm": 8.678521156311035, "learning_rate": 3.0846464646464645e-06, "loss": 6.317130279541016, "step": 38925 }, { "epoch": 0.0883, "grad_norm": 6.757709503173828, "learning_rate": 3.08439393939394e-06, "loss": 6.322018814086914, "step": 38930 }, { "epoch": 0.08835, "grad_norm": 4.419641494750977, "learning_rate": 3.0841414141414146e-06, "loss": 6.281678009033203, "step": 38935 }, { "epoch": 0.0884, "grad_norm": 8.545208930969238, "learning_rate": 3.0838888888888892e-06, "loss": 6.266703796386719, "step": 38940 }, { "epoch": 0.08845, "grad_norm": 21.299427032470703, "learning_rate": 3.0836363636363635e-06, "loss": 6.315547180175781, "step": 38945 }, { "epoch": 0.0885, "grad_norm": 6.2852888107299805, "learning_rate": 3.083383838383839e-06, "loss": 6.262736511230469, "step": 38950 }, { "epoch": 0.08855, "grad_norm": 8.608904838562012, "learning_rate": 3.0831313131313136e-06, "loss": 6.268058013916016, "step": 38955 }, { "epoch": 0.0886, "grad_norm": 6.686533451080322, "learning_rate": 3.082878787878788e-06, "loss": 6.252547073364258, "step": 38960 }, { "epoch": 0.08865, "grad_norm": 6.9175214767456055, "learning_rate": 3.082626262626263e-06, "loss": 6.285993576049805, "step": 38965 }, { "epoch": 0.0887, "grad_norm": 4.883634567260742, "learning_rate": 3.082373737373738e-06, "loss": 6.264028167724609, "step": 38970 }, { "epoch": 0.08875, "grad_norm": 5.033908367156982, "learning_rate": 3.0821212121212125e-06, "loss": 6.324848937988281, "step": 38975 }, { "epoch": 0.0888, "grad_norm": 6.498447418212891, "learning_rate": 3.081868686868687e-06, "loss": 6.280373382568359, "step": 38980 }, { "epoch": 0.08885, "grad_norm": 5.7450408935546875, "learning_rate": 3.0816161616161618e-06, "loss": 6.231649398803711, "step": 38985 }, { "epoch": 0.0889, "grad_norm": 5.871547698974609, "learning_rate": 3.081363636363637e-06, "loss": 6.266879653930664, "step": 38990 }, { "epoch": 0.08895, "grad_norm": 5.903047561645508, "learning_rate": 3.0811111111111114e-06, "loss": 6.317109298706055, "step": 38995 }, { "epoch": 0.089, "grad_norm": 6.235011100769043, "learning_rate": 3.080858585858586e-06, "loss": 6.322607421875, "step": 39000 }, { "epoch": 0.08905, "grad_norm": 10.56679630279541, "learning_rate": 3.0806060606060607e-06, "loss": 6.284597015380859, "step": 39005 }, { "epoch": 0.0891, "grad_norm": 12.192068099975586, "learning_rate": 3.0803535353535358e-06, "loss": 6.515753173828125, "step": 39010 }, { "epoch": 0.08915, "grad_norm": 5.010746479034424, "learning_rate": 3.0801010101010104e-06, "loss": 6.262630844116211, "step": 39015 }, { "epoch": 0.0892, "grad_norm": 15.015843391418457, "learning_rate": 3.079848484848485e-06, "loss": 6.292151260375976, "step": 39020 }, { "epoch": 0.08925, "grad_norm": 6.680670738220215, "learning_rate": 3.0795959595959596e-06, "loss": 6.4028167724609375, "step": 39025 }, { "epoch": 0.0893, "grad_norm": 5.576340675354004, "learning_rate": 3.0793434343434347e-06, "loss": 6.238619995117188, "step": 39030 }, { "epoch": 0.08935, "grad_norm": 8.025498390197754, "learning_rate": 3.0790909090909093e-06, "loss": 6.254515838623047, "step": 39035 }, { "epoch": 0.0894, "grad_norm": 3.1588306427001953, "learning_rate": 3.078838383838384e-06, "loss": 6.338005447387696, "step": 39040 }, { "epoch": 0.08945, "grad_norm": 6.66406774520874, "learning_rate": 3.0785858585858586e-06, "loss": 6.240979385375977, "step": 39045 }, { "epoch": 0.0895, "grad_norm": 4.443551063537598, "learning_rate": 3.0783333333333336e-06, "loss": 6.415625, "step": 39050 }, { "epoch": 0.08955, "grad_norm": 8.135719299316406, "learning_rate": 3.0780808080808083e-06, "loss": 6.263232421875, "step": 39055 }, { "epoch": 0.0896, "grad_norm": 5.1961846351623535, "learning_rate": 3.077828282828283e-06, "loss": 6.24383773803711, "step": 39060 }, { "epoch": 0.08965, "grad_norm": 6.132416725158691, "learning_rate": 3.077575757575758e-06, "loss": 6.277900695800781, "step": 39065 }, { "epoch": 0.0897, "grad_norm": 6.791189670562744, "learning_rate": 3.0773232323232326e-06, "loss": 6.275061798095703, "step": 39070 }, { "epoch": 0.08975, "grad_norm": 5.99292516708374, "learning_rate": 3.0770707070707072e-06, "loss": 6.2736671447753904, "step": 39075 }, { "epoch": 0.0898, "grad_norm": 8.040875434875488, "learning_rate": 3.076818181818182e-06, "loss": 6.271006393432617, "step": 39080 }, { "epoch": 0.08985, "grad_norm": 5.793929576873779, "learning_rate": 3.076565656565657e-06, "loss": 6.286944198608398, "step": 39085 }, { "epoch": 0.0899, "grad_norm": 11.181063652038574, "learning_rate": 3.0763131313131315e-06, "loss": 6.283794403076172, "step": 39090 }, { "epoch": 0.08995, "grad_norm": 10.077513694763184, "learning_rate": 3.076060606060606e-06, "loss": 6.413910675048828, "step": 39095 }, { "epoch": 0.09, "grad_norm": 10.699628829956055, "learning_rate": 3.075808080808081e-06, "loss": 6.379662322998047, "step": 39100 }, { "epoch": 0.09005, "grad_norm": 5.655072212219238, "learning_rate": 3.0755555555555563e-06, "loss": 6.252742004394531, "step": 39105 }, { "epoch": 0.0901, "grad_norm": 7.359196186065674, "learning_rate": 3.0753030303030305e-06, "loss": 6.217854309082031, "step": 39110 }, { "epoch": 0.09015, "grad_norm": 4.99860954284668, "learning_rate": 3.075050505050505e-06, "loss": 6.252827072143555, "step": 39115 }, { "epoch": 0.0902, "grad_norm": 4.21286678314209, "learning_rate": 3.0747979797979797e-06, "loss": 6.295737075805664, "step": 39120 }, { "epoch": 0.09025, "grad_norm": 5.677606105804443, "learning_rate": 3.0745454545454552e-06, "loss": 6.487159729003906, "step": 39125 }, { "epoch": 0.0903, "grad_norm": 5.7341413497924805, "learning_rate": 3.07429292929293e-06, "loss": 6.271537017822266, "step": 39130 }, { "epoch": 0.09035, "grad_norm": 16.32733917236328, "learning_rate": 3.0740404040404045e-06, "loss": 6.370833206176758, "step": 39135 }, { "epoch": 0.0904, "grad_norm": 8.712239265441895, "learning_rate": 3.0737878787878787e-06, "loss": 6.262386322021484, "step": 39140 }, { "epoch": 0.09045, "grad_norm": 4.045783996582031, "learning_rate": 3.073535353535354e-06, "loss": 6.303938293457032, "step": 39145 }, { "epoch": 0.0905, "grad_norm": 9.539093017578125, "learning_rate": 3.073282828282829e-06, "loss": 6.260939407348633, "step": 39150 }, { "epoch": 0.09055, "grad_norm": 7.298191547393799, "learning_rate": 3.0730303030303034e-06, "loss": 6.284363555908203, "step": 39155 }, { "epoch": 0.0906, "grad_norm": 4.552484512329102, "learning_rate": 3.072777777777778e-06, "loss": 6.293478012084961, "step": 39160 }, { "epoch": 0.09065, "grad_norm": 4.384852409362793, "learning_rate": 3.072525252525253e-06, "loss": 6.288124847412109, "step": 39165 }, { "epoch": 0.0907, "grad_norm": 5.975673675537109, "learning_rate": 3.0722727272727277e-06, "loss": 6.260038757324219, "step": 39170 }, { "epoch": 0.09075, "grad_norm": 7.746758460998535, "learning_rate": 3.0720202020202024e-06, "loss": 6.304029083251953, "step": 39175 }, { "epoch": 0.0908, "grad_norm": 10.297867774963379, "learning_rate": 3.071767676767677e-06, "loss": 6.2825977325439455, "step": 39180 }, { "epoch": 0.09085, "grad_norm": 5.207608699798584, "learning_rate": 3.071515151515152e-06, "loss": 6.322061920166016, "step": 39185 }, { "epoch": 0.0909, "grad_norm": 3.982045888900757, "learning_rate": 3.0712626262626267e-06, "loss": 6.253726577758789, "step": 39190 }, { "epoch": 0.09095, "grad_norm": 5.969116687774658, "learning_rate": 3.0710101010101013e-06, "loss": 6.288674545288086, "step": 39195 }, { "epoch": 0.091, "grad_norm": 4.395119667053223, "learning_rate": 3.070757575757576e-06, "loss": 6.287556838989258, "step": 39200 }, { "epoch": 0.09105, "grad_norm": 6.272239685058594, "learning_rate": 3.070505050505051e-06, "loss": 6.284761047363281, "step": 39205 }, { "epoch": 0.0911, "grad_norm": 6.949128150939941, "learning_rate": 3.0702525252525256e-06, "loss": 6.288115310668945, "step": 39210 }, { "epoch": 0.09115, "grad_norm": 7.773336887359619, "learning_rate": 3.0700000000000003e-06, "loss": 6.430629730224609, "step": 39215 }, { "epoch": 0.0912, "grad_norm": 18.049579620361328, "learning_rate": 3.069747474747475e-06, "loss": 6.485710906982422, "step": 39220 }, { "epoch": 0.09125, "grad_norm": 4.200318813323975, "learning_rate": 3.06949494949495e-06, "loss": 6.319189834594726, "step": 39225 }, { "epoch": 0.0913, "grad_norm": 5.5459208488464355, "learning_rate": 3.0692424242424246e-06, "loss": 6.364413452148438, "step": 39230 }, { "epoch": 0.09135, "grad_norm": 7.658109664916992, "learning_rate": 3.068989898989899e-06, "loss": 6.33764762878418, "step": 39235 }, { "epoch": 0.0914, "grad_norm": 6.640018939971924, "learning_rate": 3.068737373737374e-06, "loss": 6.224563217163086, "step": 39240 }, { "epoch": 0.09145, "grad_norm": 6.312591552734375, "learning_rate": 3.068484848484849e-06, "loss": 6.253081130981445, "step": 39245 }, { "epoch": 0.0915, "grad_norm": 7.365550994873047, "learning_rate": 3.0682323232323235e-06, "loss": 6.2688652038574215, "step": 39250 }, { "epoch": 0.09155, "grad_norm": 7.650358200073242, "learning_rate": 3.067979797979798e-06, "loss": 6.311114501953125, "step": 39255 }, { "epoch": 0.0916, "grad_norm": 7.672425270080566, "learning_rate": 3.0677272727272728e-06, "loss": 6.303653717041016, "step": 39260 }, { "epoch": 0.09165, "grad_norm": 5.546412944793701, "learning_rate": 3.067474747474748e-06, "loss": 6.2515312194824215, "step": 39265 }, { "epoch": 0.0917, "grad_norm": 6.5482096672058105, "learning_rate": 3.0672222222222225e-06, "loss": 6.230216979980469, "step": 39270 }, { "epoch": 0.09175, "grad_norm": 4.945723533630371, "learning_rate": 3.066969696969697e-06, "loss": 6.277172470092774, "step": 39275 }, { "epoch": 0.0918, "grad_norm": 6.490903377532959, "learning_rate": 3.0667171717171717e-06, "loss": 6.285236358642578, "step": 39280 }, { "epoch": 0.09185, "grad_norm": 4.6270647048950195, "learning_rate": 3.0664646464646468e-06, "loss": 6.270497512817383, "step": 39285 }, { "epoch": 0.0919, "grad_norm": 8.51440143585205, "learning_rate": 3.0662121212121214e-06, "loss": 6.371000671386719, "step": 39290 }, { "epoch": 0.09195, "grad_norm": 10.238192558288574, "learning_rate": 3.065959595959596e-06, "loss": 6.327411651611328, "step": 39295 }, { "epoch": 0.092, "grad_norm": 3.3806231021881104, "learning_rate": 3.0657070707070707e-06, "loss": 6.247269821166992, "step": 39300 }, { "epoch": 0.09205, "grad_norm": 3.753351926803589, "learning_rate": 3.0654545454545457e-06, "loss": 6.26878776550293, "step": 39305 }, { "epoch": 0.0921, "grad_norm": 8.06999397277832, "learning_rate": 3.0652020202020203e-06, "loss": 6.250996017456055, "step": 39310 }, { "epoch": 0.09215, "grad_norm": 6.953955173492432, "learning_rate": 3.064949494949495e-06, "loss": 6.260722732543945, "step": 39315 }, { "epoch": 0.0922, "grad_norm": 8.116020202636719, "learning_rate": 3.0646969696969696e-06, "loss": 6.272375106811523, "step": 39320 }, { "epoch": 0.09225, "grad_norm": 8.296736717224121, "learning_rate": 3.064444444444445e-06, "loss": 6.252204513549804, "step": 39325 }, { "epoch": 0.0923, "grad_norm": 7.756749629974365, "learning_rate": 3.0641919191919193e-06, "loss": 6.243569946289062, "step": 39330 }, { "epoch": 0.09235, "grad_norm": 9.886284828186035, "learning_rate": 3.063939393939394e-06, "loss": 6.2703086853027346, "step": 39335 }, { "epoch": 0.0924, "grad_norm": 12.561198234558105, "learning_rate": 3.0636868686868685e-06, "loss": 6.2829833984375, "step": 39340 }, { "epoch": 0.09245, "grad_norm": 8.711660385131836, "learning_rate": 3.063434343434344e-06, "loss": 6.239153289794922, "step": 39345 }, { "epoch": 0.0925, "grad_norm": 17.116741180419922, "learning_rate": 3.0631818181818187e-06, "loss": 6.510383605957031, "step": 39350 }, { "epoch": 0.09255, "grad_norm": 11.632884979248047, "learning_rate": 3.0629292929292933e-06, "loss": 6.4560600280761715, "step": 39355 }, { "epoch": 0.0926, "grad_norm": 9.428510665893555, "learning_rate": 3.0626767676767675e-06, "loss": 6.301527786254883, "step": 39360 }, { "epoch": 0.09265, "grad_norm": 18.10042381286621, "learning_rate": 3.062424242424243e-06, "loss": 6.285648345947266, "step": 39365 }, { "epoch": 0.0927, "grad_norm": 7.185460090637207, "learning_rate": 3.0621717171717176e-06, "loss": 6.231967163085938, "step": 39370 }, { "epoch": 0.09275, "grad_norm": 7.942965984344482, "learning_rate": 3.0619191919191922e-06, "loss": 6.257367706298828, "step": 39375 }, { "epoch": 0.0928, "grad_norm": 7.2067646980285645, "learning_rate": 3.061666666666667e-06, "loss": 6.258131790161133, "step": 39380 }, { "epoch": 0.09285, "grad_norm": 7.035239219665527, "learning_rate": 3.061414141414142e-06, "loss": 6.258625030517578, "step": 39385 }, { "epoch": 0.0929, "grad_norm": 6.5928802490234375, "learning_rate": 3.0611616161616165e-06, "loss": 6.322162628173828, "step": 39390 }, { "epoch": 0.09295, "grad_norm": 4.762480735778809, "learning_rate": 3.060909090909091e-06, "loss": 6.35546875, "step": 39395 }, { "epoch": 0.093, "grad_norm": 8.196066856384277, "learning_rate": 3.060656565656566e-06, "loss": 6.287951278686523, "step": 39400 }, { "epoch": 0.09305, "grad_norm": 4.534555435180664, "learning_rate": 3.060404040404041e-06, "loss": 6.298977661132812, "step": 39405 }, { "epoch": 0.0931, "grad_norm": 4.168025016784668, "learning_rate": 3.0601515151515155e-06, "loss": 6.323687744140625, "step": 39410 }, { "epoch": 0.09315, "grad_norm": 10.7211332321167, "learning_rate": 3.05989898989899e-06, "loss": 6.324875259399414, "step": 39415 }, { "epoch": 0.0932, "grad_norm": 5.190300464630127, "learning_rate": 3.0596464646464647e-06, "loss": 6.308753204345703, "step": 39420 }, { "epoch": 0.09325, "grad_norm": 7.077613830566406, "learning_rate": 3.05939393939394e-06, "loss": 6.272271347045899, "step": 39425 }, { "epoch": 0.0933, "grad_norm": 4.654881954193115, "learning_rate": 3.0591414141414144e-06, "loss": 6.343343353271484, "step": 39430 }, { "epoch": 0.09335, "grad_norm": 7.842901229858398, "learning_rate": 3.058888888888889e-06, "loss": 6.276409149169922, "step": 39435 }, { "epoch": 0.0934, "grad_norm": 5.84159517288208, "learning_rate": 3.0586363636363637e-06, "loss": 6.290888977050781, "step": 39440 }, { "epoch": 0.09345, "grad_norm": 4.244458198547363, "learning_rate": 3.0583838383838387e-06, "loss": 6.262062072753906, "step": 39445 }, { "epoch": 0.0935, "grad_norm": 6.388356685638428, "learning_rate": 3.0581313131313134e-06, "loss": 6.2566673278808596, "step": 39450 }, { "epoch": 0.09355, "grad_norm": 5.874319553375244, "learning_rate": 3.057878787878788e-06, "loss": 6.315679550170898, "step": 39455 }, { "epoch": 0.0936, "grad_norm": 6.3072733879089355, "learning_rate": 3.0576262626262626e-06, "loss": 6.263661956787109, "step": 39460 }, { "epoch": 0.09365, "grad_norm": 3.7241673469543457, "learning_rate": 3.0573737373737377e-06, "loss": 6.248409271240234, "step": 39465 }, { "epoch": 0.0937, "grad_norm": 8.418601989746094, "learning_rate": 3.0571212121212123e-06, "loss": 6.3839057922363285, "step": 39470 }, { "epoch": 0.09375, "grad_norm": 7.451816082000732, "learning_rate": 3.056868686868687e-06, "loss": 6.30369873046875, "step": 39475 }, { "epoch": 0.0938, "grad_norm": 7.953643321990967, "learning_rate": 3.0566161616161616e-06, "loss": 6.344231414794922, "step": 39480 }, { "epoch": 0.09385, "grad_norm": 3.9061269760131836, "learning_rate": 3.0563636363636366e-06, "loss": 6.321837615966797, "step": 39485 }, { "epoch": 0.0939, "grad_norm": 5.189099311828613, "learning_rate": 3.0561111111111113e-06, "loss": 6.242802429199219, "step": 39490 }, { "epoch": 0.09395, "grad_norm": 3.613459587097168, "learning_rate": 3.055858585858586e-06, "loss": 6.291605377197266, "step": 39495 }, { "epoch": 0.094, "grad_norm": 4.4439191818237305, "learning_rate": 3.055606060606061e-06, "loss": 6.282182693481445, "step": 39500 }, { "epoch": 0.09405, "grad_norm": 8.112859725952148, "learning_rate": 3.0553535353535356e-06, "loss": 6.297395706176758, "step": 39505 }, { "epoch": 0.0941, "grad_norm": 3.9926745891571045, "learning_rate": 3.05510101010101e-06, "loss": 6.212990570068359, "step": 39510 }, { "epoch": 0.09415, "grad_norm": 4.801685333251953, "learning_rate": 3.054848484848485e-06, "loss": 6.281188201904297, "step": 39515 }, { "epoch": 0.0942, "grad_norm": 7.154995918273926, "learning_rate": 3.0545959595959603e-06, "loss": 6.286091613769531, "step": 39520 }, { "epoch": 0.09425, "grad_norm": 7.538205623626709, "learning_rate": 3.0543434343434345e-06, "loss": 6.258271789550781, "step": 39525 }, { "epoch": 0.0943, "grad_norm": 6.951421737670898, "learning_rate": 3.054090909090909e-06, "loss": 6.258203125, "step": 39530 }, { "epoch": 0.09435, "grad_norm": 5.049332618713379, "learning_rate": 3.0538383838383838e-06, "loss": 6.324021530151367, "step": 39535 }, { "epoch": 0.0944, "grad_norm": 7.144245147705078, "learning_rate": 3.0535858585858593e-06, "loss": 6.361887359619141, "step": 39540 }, { "epoch": 0.09445, "grad_norm": 19.257226943969727, "learning_rate": 3.053333333333334e-06, "loss": 6.28746452331543, "step": 39545 }, { "epoch": 0.0945, "grad_norm": 8.158074378967285, "learning_rate": 3.0530808080808085e-06, "loss": 6.280484771728515, "step": 39550 }, { "epoch": 0.09455, "grad_norm": 7.717003345489502, "learning_rate": 3.0528282828282827e-06, "loss": 6.328860092163086, "step": 39555 }, { "epoch": 0.0946, "grad_norm": 6.549741268157959, "learning_rate": 3.052575757575758e-06, "loss": 6.243044662475586, "step": 39560 }, { "epoch": 0.09465, "grad_norm": 7.241082668304443, "learning_rate": 3.052323232323233e-06, "loss": 6.307433319091797, "step": 39565 }, { "epoch": 0.0947, "grad_norm": 6.8255615234375, "learning_rate": 3.0520707070707075e-06, "loss": 6.279504776000977, "step": 39570 }, { "epoch": 0.09475, "grad_norm": 5.816373348236084, "learning_rate": 3.051818181818182e-06, "loss": 6.336029052734375, "step": 39575 }, { "epoch": 0.0948, "grad_norm": 6.788973331451416, "learning_rate": 3.051565656565657e-06, "loss": 6.265750122070313, "step": 39580 }, { "epoch": 0.09485, "grad_norm": 5.944479465484619, "learning_rate": 3.0513131313131318e-06, "loss": 6.370442581176758, "step": 39585 }, { "epoch": 0.0949, "grad_norm": 8.943553924560547, "learning_rate": 3.0510606060606064e-06, "loss": 6.247539520263672, "step": 39590 }, { "epoch": 0.09495, "grad_norm": 7.429471969604492, "learning_rate": 3.050808080808081e-06, "loss": 6.207161331176758, "step": 39595 }, { "epoch": 0.095, "grad_norm": 4.592660903930664, "learning_rate": 3.050555555555556e-06, "loss": 6.207543182373047, "step": 39600 }, { "epoch": 0.09505, "grad_norm": 6.410984992980957, "learning_rate": 3.0503030303030307e-06, "loss": 6.249950790405274, "step": 39605 }, { "epoch": 0.0951, "grad_norm": 8.55736255645752, "learning_rate": 3.0500505050505053e-06, "loss": 6.365015029907227, "step": 39610 }, { "epoch": 0.09515, "grad_norm": 6.169278621673584, "learning_rate": 3.04979797979798e-06, "loss": 6.244651031494141, "step": 39615 }, { "epoch": 0.0952, "grad_norm": 6.534669876098633, "learning_rate": 3.049545454545455e-06, "loss": 6.307273101806641, "step": 39620 }, { "epoch": 0.09525, "grad_norm": 8.135649681091309, "learning_rate": 3.0492929292929297e-06, "loss": 6.2627708435058596, "step": 39625 }, { "epoch": 0.0953, "grad_norm": 9.942840576171875, "learning_rate": 3.0490404040404043e-06, "loss": 6.274460983276367, "step": 39630 }, { "epoch": 0.09535, "grad_norm": 6.3688836097717285, "learning_rate": 3.048787878787879e-06, "loss": 6.2644187927246096, "step": 39635 }, { "epoch": 0.0954, "grad_norm": 4.666479587554932, "learning_rate": 3.048535353535354e-06, "loss": 6.256207275390625, "step": 39640 }, { "epoch": 0.09545, "grad_norm": 7.013498783111572, "learning_rate": 3.0482828282828286e-06, "loss": 6.235408020019531, "step": 39645 }, { "epoch": 0.0955, "grad_norm": 7.912097930908203, "learning_rate": 3.0480303030303032e-06, "loss": 6.246105194091797, "step": 39650 }, { "epoch": 0.09555, "grad_norm": 13.065936088562012, "learning_rate": 3.047777777777778e-06, "loss": 6.307199096679687, "step": 39655 }, { "epoch": 0.0956, "grad_norm": 8.842634201049805, "learning_rate": 3.047525252525253e-06, "loss": 6.232364654541016, "step": 39660 }, { "epoch": 0.09565, "grad_norm": 10.520137786865234, "learning_rate": 3.0472727272727276e-06, "loss": 6.295642471313476, "step": 39665 }, { "epoch": 0.0957, "grad_norm": 9.55027961730957, "learning_rate": 3.047020202020202e-06, "loss": 6.3119457244873045, "step": 39670 }, { "epoch": 0.09575, "grad_norm": 4.912415027618408, "learning_rate": 3.046767676767677e-06, "loss": 6.29124641418457, "step": 39675 }, { "epoch": 0.0958, "grad_norm": 33.51972961425781, "learning_rate": 3.046515151515152e-06, "loss": 6.377518844604492, "step": 39680 }, { "epoch": 0.09585, "grad_norm": 5.118306636810303, "learning_rate": 3.0462626262626265e-06, "loss": 6.307962417602539, "step": 39685 }, { "epoch": 0.0959, "grad_norm": 8.07176685333252, "learning_rate": 3.046010101010101e-06, "loss": 6.277048873901367, "step": 39690 }, { "epoch": 0.09595, "grad_norm": 6.355937480926514, "learning_rate": 3.0457575757575758e-06, "loss": 6.305795288085937, "step": 39695 }, { "epoch": 0.096, "grad_norm": 8.511637687683105, "learning_rate": 3.045505050505051e-06, "loss": 6.248551940917968, "step": 39700 }, { "epoch": 0.09605, "grad_norm": 8.970492362976074, "learning_rate": 3.0452525252525254e-06, "loss": 6.224385833740234, "step": 39705 }, { "epoch": 0.0961, "grad_norm": 6.257556915283203, "learning_rate": 3.045e-06, "loss": 6.257138061523437, "step": 39710 }, { "epoch": 0.09615, "grad_norm": 4.832211017608643, "learning_rate": 3.0447474747474747e-06, "loss": 6.265195083618164, "step": 39715 }, { "epoch": 0.0962, "grad_norm": 5.479896068572998, "learning_rate": 3.0444949494949498e-06, "loss": 6.276923751831054, "step": 39720 }, { "epoch": 0.09625, "grad_norm": 5.880154609680176, "learning_rate": 3.0442424242424244e-06, "loss": 6.30895767211914, "step": 39725 }, { "epoch": 0.0963, "grad_norm": 5.765962600708008, "learning_rate": 3.043989898989899e-06, "loss": 6.269191741943359, "step": 39730 }, { "epoch": 0.09635, "grad_norm": 6.231253147125244, "learning_rate": 3.0437373737373736e-06, "loss": 6.278958129882812, "step": 39735 }, { "epoch": 0.0964, "grad_norm": 4.817709922790527, "learning_rate": 3.043484848484849e-06, "loss": 6.278455352783203, "step": 39740 }, { "epoch": 0.09645, "grad_norm": 5.985478401184082, "learning_rate": 3.0432323232323233e-06, "loss": 6.251661682128907, "step": 39745 }, { "epoch": 0.0965, "grad_norm": 5.80155611038208, "learning_rate": 3.042979797979798e-06, "loss": 6.2939292907714846, "step": 39750 }, { "epoch": 0.09655, "grad_norm": 7.896412372589111, "learning_rate": 3.0427272727272726e-06, "loss": 6.202546691894531, "step": 39755 }, { "epoch": 0.0966, "grad_norm": 5.035182952880859, "learning_rate": 3.042474747474748e-06, "loss": 6.22095832824707, "step": 39760 }, { "epoch": 0.09665, "grad_norm": 5.645527362823486, "learning_rate": 3.0422222222222227e-06, "loss": 6.23913345336914, "step": 39765 }, { "epoch": 0.0967, "grad_norm": 4.7217912673950195, "learning_rate": 3.0419696969696973e-06, "loss": 6.238736724853515, "step": 39770 }, { "epoch": 0.09675, "grad_norm": 10.03943920135498, "learning_rate": 3.0417171717171715e-06, "loss": 6.2873893737792965, "step": 39775 }, { "epoch": 0.0968, "grad_norm": 3.709041118621826, "learning_rate": 3.041464646464647e-06, "loss": 6.261859130859375, "step": 39780 }, { "epoch": 0.09685, "grad_norm": 6.8322434425354, "learning_rate": 3.0412121212121216e-06, "loss": 6.257903289794922, "step": 39785 }, { "epoch": 0.0969, "grad_norm": 6.866949558258057, "learning_rate": 3.0409595959595963e-06, "loss": 6.261991882324219, "step": 39790 }, { "epoch": 0.09695, "grad_norm": 8.102174758911133, "learning_rate": 3.040707070707071e-06, "loss": 6.264657211303711, "step": 39795 }, { "epoch": 0.097, "grad_norm": 20.32860565185547, "learning_rate": 3.040454545454546e-06, "loss": 6.300338363647461, "step": 39800 }, { "epoch": 0.09705, "grad_norm": 4.616422653198242, "learning_rate": 3.0402020202020206e-06, "loss": 6.3257091522216795, "step": 39805 }, { "epoch": 0.0971, "grad_norm": 5.947876930236816, "learning_rate": 3.0399494949494952e-06, "loss": 6.260201644897461, "step": 39810 }, { "epoch": 0.09715, "grad_norm": 24.24739646911621, "learning_rate": 3.03969696969697e-06, "loss": 6.49999771118164, "step": 39815 }, { "epoch": 0.0972, "grad_norm": 8.184504508972168, "learning_rate": 3.039444444444445e-06, "loss": 6.323337173461914, "step": 39820 }, { "epoch": 0.09725, "grad_norm": 6.967775821685791, "learning_rate": 3.0391919191919195e-06, "loss": 6.238442230224609, "step": 39825 }, { "epoch": 0.0973, "grad_norm": 6.290642261505127, "learning_rate": 3.038939393939394e-06, "loss": 6.265054321289062, "step": 39830 }, { "epoch": 0.09735, "grad_norm": 6.699621677398682, "learning_rate": 3.0386868686868688e-06, "loss": 6.304799270629883, "step": 39835 }, { "epoch": 0.0974, "grad_norm": 7.302486896514893, "learning_rate": 3.038434343434344e-06, "loss": 6.262015533447266, "step": 39840 }, { "epoch": 0.09745, "grad_norm": 7.259206295013428, "learning_rate": 3.0381818181818185e-06, "loss": 6.314931488037109, "step": 39845 }, { "epoch": 0.0975, "grad_norm": 5.132086753845215, "learning_rate": 3.037929292929293e-06, "loss": 6.212421417236328, "step": 39850 }, { "epoch": 0.09755, "grad_norm": 3.559826135635376, "learning_rate": 3.0376767676767677e-06, "loss": 6.272224426269531, "step": 39855 }, { "epoch": 0.0976, "grad_norm": 7.799129486083984, "learning_rate": 3.0374242424242428e-06, "loss": 6.318041229248047, "step": 39860 }, { "epoch": 0.09765, "grad_norm": 6.717422008514404, "learning_rate": 3.0371717171717174e-06, "loss": 6.272261047363282, "step": 39865 }, { "epoch": 0.0977, "grad_norm": 4.4721245765686035, "learning_rate": 3.036919191919192e-06, "loss": 6.223075485229492, "step": 39870 }, { "epoch": 0.09775, "grad_norm": 5.176158905029297, "learning_rate": 3.0366666666666667e-06, "loss": 6.252046203613281, "step": 39875 }, { "epoch": 0.0978, "grad_norm": 5.7113871574401855, "learning_rate": 3.0364141414141417e-06, "loss": 6.276599884033203, "step": 39880 }, { "epoch": 0.09785, "grad_norm": 8.806733131408691, "learning_rate": 3.0361616161616164e-06, "loss": 6.262837600708008, "step": 39885 }, { "epoch": 0.0979, "grad_norm": 7.675717353820801, "learning_rate": 3.035909090909091e-06, "loss": 6.2345436096191404, "step": 39890 }, { "epoch": 0.09795, "grad_norm": 5.5042500495910645, "learning_rate": 3.0356565656565656e-06, "loss": 6.307815933227539, "step": 39895 }, { "epoch": 0.098, "grad_norm": 6.197714805603027, "learning_rate": 3.0354040404040407e-06, "loss": 6.277433776855469, "step": 39900 }, { "epoch": 0.09805, "grad_norm": 8.494780540466309, "learning_rate": 3.0351515151515153e-06, "loss": 6.287798690795898, "step": 39905 }, { "epoch": 0.0981, "grad_norm": 8.925405502319336, "learning_rate": 3.03489898989899e-06, "loss": 6.334698867797852, "step": 39910 }, { "epoch": 0.09815, "grad_norm": 6.026486873626709, "learning_rate": 3.0346464646464646e-06, "loss": 6.285811996459961, "step": 39915 }, { "epoch": 0.0982, "grad_norm": 5.018006801605225, "learning_rate": 3.0343939393939396e-06, "loss": 6.253402328491211, "step": 39920 }, { "epoch": 0.09825, "grad_norm": 30.398101806640625, "learning_rate": 3.0341414141414142e-06, "loss": 6.214478302001953, "step": 39925 }, { "epoch": 0.0983, "grad_norm": 8.109334945678711, "learning_rate": 3.033888888888889e-06, "loss": 6.242169952392578, "step": 39930 }, { "epoch": 0.09835, "grad_norm": 4.3902268409729, "learning_rate": 3.0336363636363644e-06, "loss": 6.464027404785156, "step": 39935 }, { "epoch": 0.0984, "grad_norm": 9.082077026367188, "learning_rate": 3.0333838383838386e-06, "loss": 6.246677017211914, "step": 39940 }, { "epoch": 0.09845, "grad_norm": 4.724730491638184, "learning_rate": 3.033131313131313e-06, "loss": 6.377182388305664, "step": 39945 }, { "epoch": 0.0985, "grad_norm": 8.617257118225098, "learning_rate": 3.032878787878788e-06, "loss": 6.171651077270508, "step": 39950 }, { "epoch": 0.09855, "grad_norm": 6.759725093841553, "learning_rate": 3.0326262626262633e-06, "loss": 6.305452728271485, "step": 39955 }, { "epoch": 0.0986, "grad_norm": 6.3425774574279785, "learning_rate": 3.032373737373738e-06, "loss": 6.298806762695312, "step": 39960 }, { "epoch": 0.09865, "grad_norm": 5.258531093597412, "learning_rate": 3.0321212121212126e-06, "loss": 6.258492279052734, "step": 39965 }, { "epoch": 0.0987, "grad_norm": 8.805098533630371, "learning_rate": 3.0318686868686868e-06, "loss": 6.243976593017578, "step": 39970 }, { "epoch": 0.09875, "grad_norm": 7.223904609680176, "learning_rate": 3.0316161616161622e-06, "loss": 6.200222015380859, "step": 39975 }, { "epoch": 0.0988, "grad_norm": 8.230591773986816, "learning_rate": 3.031363636363637e-06, "loss": 6.248282623291016, "step": 39980 }, { "epoch": 0.09885, "grad_norm": 7.349647045135498, "learning_rate": 3.0311111111111115e-06, "loss": 6.236552047729492, "step": 39985 }, { "epoch": 0.0989, "grad_norm": 7.409433841705322, "learning_rate": 3.030858585858586e-06, "loss": 6.286089706420898, "step": 39990 }, { "epoch": 0.09895, "grad_norm": 4.738650321960449, "learning_rate": 3.030606060606061e-06, "loss": 6.2399028778076175, "step": 39995 }, { "epoch": 0.099, "grad_norm": 4.862697601318359, "learning_rate": 3.030353535353536e-06, "loss": 6.311080551147461, "step": 40000 }, { "epoch": 0.09905, "grad_norm": 5.479323387145996, "learning_rate": 3.0301010101010104e-06, "loss": 6.2537994384765625, "step": 40005 }, { "epoch": 0.0991, "grad_norm": 7.93714714050293, "learning_rate": 3.029848484848485e-06, "loss": 6.282717895507813, "step": 40010 }, { "epoch": 0.09915, "grad_norm": 17.208250045776367, "learning_rate": 3.02959595959596e-06, "loss": 6.340678024291992, "step": 40015 }, { "epoch": 0.0992, "grad_norm": 7.450162887573242, "learning_rate": 3.0293434343434348e-06, "loss": 6.3354747772216795, "step": 40020 }, { "epoch": 0.09925, "grad_norm": 8.217369079589844, "learning_rate": 3.0290909090909094e-06, "loss": 6.294911575317383, "step": 40025 }, { "epoch": 0.0993, "grad_norm": 5.488741874694824, "learning_rate": 3.028838383838384e-06, "loss": 6.450810241699219, "step": 40030 }, { "epoch": 0.09935, "grad_norm": 5.757836818695068, "learning_rate": 3.028585858585859e-06, "loss": 6.28453369140625, "step": 40035 }, { "epoch": 0.0994, "grad_norm": 7.625567436218262, "learning_rate": 3.0283333333333337e-06, "loss": 6.213248062133789, "step": 40040 }, { "epoch": 0.09945, "grad_norm": 25.471235275268555, "learning_rate": 3.0280808080808083e-06, "loss": 6.285023880004883, "step": 40045 }, { "epoch": 0.0995, "grad_norm": 3.417393207550049, "learning_rate": 3.027828282828283e-06, "loss": 6.390966796875, "step": 40050 }, { "epoch": 0.09955, "grad_norm": 5.66697883605957, "learning_rate": 3.027575757575758e-06, "loss": 6.223991394042969, "step": 40055 }, { "epoch": 0.0996, "grad_norm": 8.43359088897705, "learning_rate": 3.0273232323232326e-06, "loss": 6.246201705932617, "step": 40060 }, { "epoch": 0.09965, "grad_norm": 11.718748092651367, "learning_rate": 3.0270707070707073e-06, "loss": 6.316236114501953, "step": 40065 }, { "epoch": 0.0997, "grad_norm": 8.034736633300781, "learning_rate": 3.026818181818182e-06, "loss": 6.293602752685547, "step": 40070 }, { "epoch": 0.09975, "grad_norm": 19.3179874420166, "learning_rate": 3.026565656565657e-06, "loss": 6.197782135009765, "step": 40075 }, { "epoch": 0.0998, "grad_norm": 8.18592643737793, "learning_rate": 3.0263131313131316e-06, "loss": 6.205689239501953, "step": 40080 }, { "epoch": 0.09985, "grad_norm": 6.5794477462768555, "learning_rate": 3.0260606060606062e-06, "loss": 6.240242767333984, "step": 40085 }, { "epoch": 0.0999, "grad_norm": 6.7191314697265625, "learning_rate": 3.025808080808081e-06, "loss": 6.369915008544922, "step": 40090 }, { "epoch": 0.09995, "grad_norm": 6.726606845855713, "learning_rate": 3.025555555555556e-06, "loss": 6.358650207519531, "step": 40095 }, { "epoch": 0.1, "grad_norm": 4.878878593444824, "learning_rate": 3.0253030303030305e-06, "loss": 6.292672729492187, "step": 40100 }, { "epoch": 0.10005, "grad_norm": 16.77215003967285, "learning_rate": 3.025050505050505e-06, "loss": 6.33614501953125, "step": 40105 }, { "epoch": 0.1001, "grad_norm": 5.844650745391846, "learning_rate": 3.02479797979798e-06, "loss": 6.270985412597656, "step": 40110 }, { "epoch": 0.10015, "grad_norm": 4.5644049644470215, "learning_rate": 3.024545454545455e-06, "loss": 6.244332122802734, "step": 40115 }, { "epoch": 0.1002, "grad_norm": 12.9856595993042, "learning_rate": 3.0242929292929295e-06, "loss": 6.414250183105469, "step": 40120 }, { "epoch": 0.10025, "grad_norm": 7.032472133636475, "learning_rate": 3.024040404040404e-06, "loss": 6.248641586303711, "step": 40125 }, { "epoch": 0.1003, "grad_norm": 4.975386619567871, "learning_rate": 3.0237878787878787e-06, "loss": 6.285332870483399, "step": 40130 }, { "epoch": 0.10035, "grad_norm": 4.637630939483643, "learning_rate": 3.023535353535354e-06, "loss": 6.304057693481445, "step": 40135 }, { "epoch": 0.1004, "grad_norm": 7.323705196380615, "learning_rate": 3.0232828282828284e-06, "loss": 6.252719116210938, "step": 40140 }, { "epoch": 0.10045, "grad_norm": 4.6293230056762695, "learning_rate": 3.023030303030303e-06, "loss": 6.329254531860352, "step": 40145 }, { "epoch": 0.1005, "grad_norm": 7.017992973327637, "learning_rate": 3.0227777777777777e-06, "loss": 6.273103713989258, "step": 40150 }, { "epoch": 0.10055, "grad_norm": 6.718472957611084, "learning_rate": 3.022525252525253e-06, "loss": 6.323498153686524, "step": 40155 }, { "epoch": 0.1006, "grad_norm": 8.068868637084961, "learning_rate": 3.022272727272728e-06, "loss": 6.267002868652344, "step": 40160 }, { "epoch": 0.10065, "grad_norm": 6.103592395782471, "learning_rate": 3.022020202020202e-06, "loss": 6.2229759216308596, "step": 40165 }, { "epoch": 0.1007, "grad_norm": 6.439410209655762, "learning_rate": 3.0217676767676766e-06, "loss": 6.244852828979492, "step": 40170 }, { "epoch": 0.10075, "grad_norm": 4.778724193572998, "learning_rate": 3.021515151515152e-06, "loss": 6.281914138793946, "step": 40175 }, { "epoch": 0.1008, "grad_norm": 6.921771049499512, "learning_rate": 3.0212626262626267e-06, "loss": 6.225162506103516, "step": 40180 }, { "epoch": 0.10085, "grad_norm": 8.687201499938965, "learning_rate": 3.0210101010101014e-06, "loss": 6.248101425170899, "step": 40185 }, { "epoch": 0.1009, "grad_norm": 4.578341007232666, "learning_rate": 3.0207575757575756e-06, "loss": 6.315703582763672, "step": 40190 }, { "epoch": 0.10095, "grad_norm": 6.512423038482666, "learning_rate": 3.020505050505051e-06, "loss": 6.297991943359375, "step": 40195 }, { "epoch": 0.101, "grad_norm": 20.48904037475586, "learning_rate": 3.0202525252525257e-06, "loss": 6.274640655517578, "step": 40200 }, { "epoch": 0.10105, "grad_norm": 4.804588794708252, "learning_rate": 3.0200000000000003e-06, "loss": 6.26307373046875, "step": 40205 }, { "epoch": 0.1011, "grad_norm": 5.951425552368164, "learning_rate": 3.019747474747475e-06, "loss": 6.450259399414063, "step": 40210 }, { "epoch": 0.10115, "grad_norm": 6.497198104858398, "learning_rate": 3.01949494949495e-06, "loss": 6.26111068725586, "step": 40215 }, { "epoch": 0.1012, "grad_norm": 5.306216239929199, "learning_rate": 3.0192424242424246e-06, "loss": 6.322530364990234, "step": 40220 }, { "epoch": 0.10125, "grad_norm": 7.001226902008057, "learning_rate": 3.0189898989898993e-06, "loss": 6.263048553466797, "step": 40225 }, { "epoch": 0.1013, "grad_norm": 8.86793041229248, "learning_rate": 3.018737373737374e-06, "loss": 6.325202560424804, "step": 40230 }, { "epoch": 0.10135, "grad_norm": 22.28270149230957, "learning_rate": 3.018484848484849e-06, "loss": 6.2083282470703125, "step": 40235 }, { "epoch": 0.1014, "grad_norm": 6.189478874206543, "learning_rate": 3.0182323232323236e-06, "loss": 6.3904670715332035, "step": 40240 }, { "epoch": 0.10145, "grad_norm": 16.189926147460938, "learning_rate": 3.017979797979798e-06, "loss": 6.55665283203125, "step": 40245 }, { "epoch": 0.1015, "grad_norm": 9.641158103942871, "learning_rate": 3.017727272727273e-06, "loss": 6.301003265380859, "step": 40250 }, { "epoch": 0.10155, "grad_norm": 8.803508758544922, "learning_rate": 3.017474747474748e-06, "loss": 6.288997650146484, "step": 40255 }, { "epoch": 0.1016, "grad_norm": 3.7549238204956055, "learning_rate": 3.0172222222222225e-06, "loss": 6.3120674133300785, "step": 40260 }, { "epoch": 0.10165, "grad_norm": 4.841829776763916, "learning_rate": 3.016969696969697e-06, "loss": 6.243906402587891, "step": 40265 }, { "epoch": 0.1017, "grad_norm": 8.047773361206055, "learning_rate": 3.0167171717171718e-06, "loss": 6.269142913818359, "step": 40270 }, { "epoch": 0.10175, "grad_norm": 7.383278846740723, "learning_rate": 3.016464646464647e-06, "loss": 6.301046752929688, "step": 40275 }, { "epoch": 0.1018, "grad_norm": 6.089845180511475, "learning_rate": 3.0162121212121215e-06, "loss": 6.273354721069336, "step": 40280 }, { "epoch": 0.10185, "grad_norm": 5.913599014282227, "learning_rate": 3.015959595959596e-06, "loss": 6.306548309326172, "step": 40285 }, { "epoch": 0.1019, "grad_norm": 5.626622676849365, "learning_rate": 3.0157070707070707e-06, "loss": 6.264913177490234, "step": 40290 }, { "epoch": 0.10195, "grad_norm": 6.200212001800537, "learning_rate": 3.0154545454545458e-06, "loss": 6.268028640747071, "step": 40295 }, { "epoch": 0.102, "grad_norm": 7.8918962478637695, "learning_rate": 3.0152020202020204e-06, "loss": 6.300220489501953, "step": 40300 }, { "epoch": 0.10205, "grad_norm": 4.525073051452637, "learning_rate": 3.014949494949495e-06, "loss": 6.225251007080078, "step": 40305 }, { "epoch": 0.1021, "grad_norm": 4.62253999710083, "learning_rate": 3.0146969696969697e-06, "loss": 6.231533813476562, "step": 40310 }, { "epoch": 0.10215, "grad_norm": 4.348503112792969, "learning_rate": 3.0144444444444447e-06, "loss": 6.319445037841797, "step": 40315 }, { "epoch": 0.1022, "grad_norm": 6.964783191680908, "learning_rate": 3.0141919191919193e-06, "loss": 6.260996246337891, "step": 40320 }, { "epoch": 0.10225, "grad_norm": 9.668238639831543, "learning_rate": 3.013939393939394e-06, "loss": 6.277481460571289, "step": 40325 }, { "epoch": 0.1023, "grad_norm": 4.093064785003662, "learning_rate": 3.0136868686868686e-06, "loss": 6.281262588500977, "step": 40330 }, { "epoch": 0.10235, "grad_norm": 4.544749736785889, "learning_rate": 3.0134343434343437e-06, "loss": 6.257682800292969, "step": 40335 }, { "epoch": 0.1024, "grad_norm": 7.640080451965332, "learning_rate": 3.0131818181818183e-06, "loss": 6.250306701660156, "step": 40340 }, { "epoch": 0.10245, "grad_norm": 6.179773807525635, "learning_rate": 3.012929292929293e-06, "loss": 6.310747528076172, "step": 40345 }, { "epoch": 0.1025, "grad_norm": 6.104367256164551, "learning_rate": 3.0126767676767684e-06, "loss": 6.3015289306640625, "step": 40350 }, { "epoch": 0.10255, "grad_norm": 6.601818561553955, "learning_rate": 3.0124242424242426e-06, "loss": 6.239876556396484, "step": 40355 }, { "epoch": 0.1026, "grad_norm": 5.260216236114502, "learning_rate": 3.0121717171717172e-06, "loss": 6.286800765991211, "step": 40360 }, { "epoch": 0.10265, "grad_norm": 9.424042701721191, "learning_rate": 3.011919191919192e-06, "loss": 6.295162200927734, "step": 40365 }, { "epoch": 0.1027, "grad_norm": 5.457647323608398, "learning_rate": 3.0116666666666673e-06, "loss": 6.253933715820312, "step": 40370 }, { "epoch": 0.10275, "grad_norm": 5.727644443511963, "learning_rate": 3.011414141414142e-06, "loss": 6.262307739257812, "step": 40375 }, { "epoch": 0.1028, "grad_norm": 4.150975227355957, "learning_rate": 3.0111616161616166e-06, "loss": 6.2860980987548825, "step": 40380 }, { "epoch": 0.10285, "grad_norm": 7.042403697967529, "learning_rate": 3.010909090909091e-06, "loss": 6.284931945800781, "step": 40385 }, { "epoch": 0.1029, "grad_norm": 5.513954162597656, "learning_rate": 3.0106565656565663e-06, "loss": 6.239775085449219, "step": 40390 }, { "epoch": 0.10295, "grad_norm": 4.471090793609619, "learning_rate": 3.010404040404041e-06, "loss": 6.256489944458008, "step": 40395 }, { "epoch": 0.103, "grad_norm": 10.35987663269043, "learning_rate": 3.0101515151515155e-06, "loss": 6.535841369628907, "step": 40400 }, { "epoch": 0.10305, "grad_norm": 8.717995643615723, "learning_rate": 3.00989898989899e-06, "loss": 6.311105728149414, "step": 40405 }, { "epoch": 0.1031, "grad_norm": 6.937499046325684, "learning_rate": 3.0096464646464652e-06, "loss": 6.215644073486328, "step": 40410 }, { "epoch": 0.10315, "grad_norm": 7.145870685577393, "learning_rate": 3.00939393939394e-06, "loss": 6.431774139404297, "step": 40415 }, { "epoch": 0.1032, "grad_norm": 5.85584831237793, "learning_rate": 3.0091414141414145e-06, "loss": 6.26512451171875, "step": 40420 }, { "epoch": 0.10325, "grad_norm": 6.8029608726501465, "learning_rate": 3.008888888888889e-06, "loss": 6.374605560302735, "step": 40425 }, { "epoch": 0.1033, "grad_norm": 7.486248970031738, "learning_rate": 3.008636363636364e-06, "loss": 6.239413452148438, "step": 40430 }, { "epoch": 0.10335, "grad_norm": 4.869155406951904, "learning_rate": 3.008383838383839e-06, "loss": 6.300119781494141, "step": 40435 }, { "epoch": 0.1034, "grad_norm": 4.470053672790527, "learning_rate": 3.0081313131313134e-06, "loss": 6.336081695556641, "step": 40440 }, { "epoch": 0.10345, "grad_norm": 6.330890655517578, "learning_rate": 3.007878787878788e-06, "loss": 6.285746002197266, "step": 40445 }, { "epoch": 0.1035, "grad_norm": 5.0657196044921875, "learning_rate": 3.007626262626263e-06, "loss": 6.268943023681641, "step": 40450 }, { "epoch": 0.10355, "grad_norm": 5.9769206047058105, "learning_rate": 3.0073737373737377e-06, "loss": 6.297139739990234, "step": 40455 }, { "epoch": 0.1036, "grad_norm": 9.325493812561035, "learning_rate": 3.0071212121212124e-06, "loss": 6.527535247802734, "step": 40460 }, { "epoch": 0.10365, "grad_norm": 134.3438720703125, "learning_rate": 3.006868686868687e-06, "loss": 8.266242218017577, "step": 40465 }, { "epoch": 0.1037, "grad_norm": 125.16313934326172, "learning_rate": 3.006616161616162e-06, "loss": 12.953028869628906, "step": 40470 }, { "epoch": 0.10375, "grad_norm": 37.13949203491211, "learning_rate": 3.0063636363636367e-06, "loss": 10.4985107421875, "step": 40475 }, { "epoch": 0.1038, "grad_norm": 16.651309967041016, "learning_rate": 3.0061111111111113e-06, "loss": 6.477317047119141, "step": 40480 }, { "epoch": 0.10385, "grad_norm": 8.390119552612305, "learning_rate": 3.005858585858586e-06, "loss": 6.280329895019531, "step": 40485 }, { "epoch": 0.1039, "grad_norm": 5.177037715911865, "learning_rate": 3.005606060606061e-06, "loss": 6.285473251342774, "step": 40490 }, { "epoch": 0.10395, "grad_norm": 5.189109802246094, "learning_rate": 3.0053535353535356e-06, "loss": 6.261329269409179, "step": 40495 }, { "epoch": 0.104, "grad_norm": 7.729437351226807, "learning_rate": 3.0051010101010103e-06, "loss": 6.243847274780274, "step": 40500 }, { "epoch": 0.10405, "grad_norm": 4.976276397705078, "learning_rate": 3.004848484848485e-06, "loss": 6.289084625244141, "step": 40505 }, { "epoch": 0.1041, "grad_norm": 11.201910972595215, "learning_rate": 3.00459595959596e-06, "loss": 6.3343353271484375, "step": 40510 }, { "epoch": 0.10415, "grad_norm": 14.465672492980957, "learning_rate": 3.0043434343434346e-06, "loss": 6.304022216796875, "step": 40515 }, { "epoch": 0.1042, "grad_norm": 7.24795389175415, "learning_rate": 3.004090909090909e-06, "loss": 6.275774383544922, "step": 40520 }, { "epoch": 0.10425, "grad_norm": 13.336684226989746, "learning_rate": 3.003838383838384e-06, "loss": 6.267287063598633, "step": 40525 }, { "epoch": 0.1043, "grad_norm": 9.539112091064453, "learning_rate": 3.003585858585859e-06, "loss": 6.259711456298828, "step": 40530 }, { "epoch": 0.10435, "grad_norm": 10.171920776367188, "learning_rate": 3.0033333333333335e-06, "loss": 6.273975372314453, "step": 40535 }, { "epoch": 0.1044, "grad_norm": 5.766279697418213, "learning_rate": 3.003080808080808e-06, "loss": 6.283254241943359, "step": 40540 }, { "epoch": 0.10445, "grad_norm": 6.518184661865234, "learning_rate": 3.0028282828282828e-06, "loss": 6.280823135375977, "step": 40545 }, { "epoch": 0.1045, "grad_norm": 7.85758113861084, "learning_rate": 3.002575757575758e-06, "loss": 6.287916564941407, "step": 40550 }, { "epoch": 0.10455, "grad_norm": 7.479003429412842, "learning_rate": 3.0023232323232325e-06, "loss": 6.232364654541016, "step": 40555 }, { "epoch": 0.1046, "grad_norm": 5.107433319091797, "learning_rate": 3.002070707070707e-06, "loss": 6.2816120147705075, "step": 40560 }, { "epoch": 0.10465, "grad_norm": 6.287453651428223, "learning_rate": 3.0018181818181817e-06, "loss": 6.247831726074219, "step": 40565 }, { "epoch": 0.1047, "grad_norm": 9.774673461914062, "learning_rate": 3.001565656565657e-06, "loss": 6.31121597290039, "step": 40570 }, { "epoch": 0.10475, "grad_norm": 3.692631244659424, "learning_rate": 3.001313131313132e-06, "loss": 6.277284240722656, "step": 40575 }, { "epoch": 0.1048, "grad_norm": 5.0820631980896, "learning_rate": 3.001060606060606e-06, "loss": 6.253063583374024, "step": 40580 }, { "epoch": 0.10485, "grad_norm": 11.12437629699707, "learning_rate": 3.0008080808080807e-06, "loss": 6.2375740051269535, "step": 40585 }, { "epoch": 0.1049, "grad_norm": 5.935394763946533, "learning_rate": 3.000555555555556e-06, "loss": 6.246023941040039, "step": 40590 }, { "epoch": 0.10495, "grad_norm": 24.374509811401367, "learning_rate": 3.0003030303030308e-06, "loss": 6.248966979980469, "step": 40595 }, { "epoch": 0.105, "grad_norm": 7.359996795654297, "learning_rate": 3.0000505050505054e-06, "loss": 6.256580352783203, "step": 40600 }, { "epoch": 0.10505, "grad_norm": 9.23360824584961, "learning_rate": 2.9997979797979796e-06, "loss": 6.261281204223633, "step": 40605 }, { "epoch": 0.1051, "grad_norm": 6.719620227813721, "learning_rate": 2.999545454545455e-06, "loss": 6.238925552368164, "step": 40610 }, { "epoch": 0.10515, "grad_norm": 6.337646484375, "learning_rate": 2.9992929292929297e-06, "loss": 6.26183853149414, "step": 40615 }, { "epoch": 0.1052, "grad_norm": 6.444962024688721, "learning_rate": 2.9990404040404043e-06, "loss": 6.259833145141601, "step": 40620 }, { "epoch": 0.10525, "grad_norm": 18.569395065307617, "learning_rate": 2.998787878787879e-06, "loss": 6.209279632568359, "step": 40625 }, { "epoch": 0.1053, "grad_norm": 4.989202499389648, "learning_rate": 2.998535353535354e-06, "loss": 6.237330627441406, "step": 40630 }, { "epoch": 0.10535, "grad_norm": 3.8753609657287598, "learning_rate": 2.9982828282828287e-06, "loss": 6.2969917297363285, "step": 40635 }, { "epoch": 0.1054, "grad_norm": 9.704370498657227, "learning_rate": 2.9980303030303033e-06, "loss": 6.222171020507813, "step": 40640 }, { "epoch": 0.10545, "grad_norm": 7.0230207443237305, "learning_rate": 2.997777777777778e-06, "loss": 6.05952262878418, "step": 40645 }, { "epoch": 0.1055, "grad_norm": 10.156947135925293, "learning_rate": 2.997525252525253e-06, "loss": 6.341531372070312, "step": 40650 }, { "epoch": 0.10555, "grad_norm": 14.176620483398438, "learning_rate": 2.9972727272727276e-06, "loss": 6.273281097412109, "step": 40655 }, { "epoch": 0.1056, "grad_norm": 8.913905143737793, "learning_rate": 2.9970202020202022e-06, "loss": 6.289741134643554, "step": 40660 }, { "epoch": 0.10565, "grad_norm": 10.895167350769043, "learning_rate": 2.996767676767677e-06, "loss": 6.2671558380126955, "step": 40665 }, { "epoch": 0.1057, "grad_norm": 6.0147857666015625, "learning_rate": 2.996515151515152e-06, "loss": 6.2519386291503904, "step": 40670 }, { "epoch": 0.10575, "grad_norm": 6.228724956512451, "learning_rate": 2.9962626262626265e-06, "loss": 6.274525833129883, "step": 40675 }, { "epoch": 0.1058, "grad_norm": 5.969265937805176, "learning_rate": 2.996010101010101e-06, "loss": 6.267035675048828, "step": 40680 }, { "epoch": 0.10585, "grad_norm": 8.021136283874512, "learning_rate": 2.995757575757576e-06, "loss": 6.297406005859375, "step": 40685 }, { "epoch": 0.1059, "grad_norm": 6.820622444152832, "learning_rate": 2.995505050505051e-06, "loss": 6.323248291015625, "step": 40690 }, { "epoch": 0.10595, "grad_norm": 10.419938087463379, "learning_rate": 2.9952525252525255e-06, "loss": 6.284586334228516, "step": 40695 }, { "epoch": 0.106, "grad_norm": 7.659506797790527, "learning_rate": 2.995e-06, "loss": 6.346948623657227, "step": 40700 }, { "epoch": 0.10605, "grad_norm": 28.604604721069336, "learning_rate": 2.9947474747474748e-06, "loss": 6.3267333984375, "step": 40705 }, { "epoch": 0.1061, "grad_norm": 3.9632394313812256, "learning_rate": 2.99449494949495e-06, "loss": 6.2607673645019535, "step": 40710 }, { "epoch": 0.10615, "grad_norm": 4.21470832824707, "learning_rate": 2.9942424242424244e-06, "loss": 6.224674987792969, "step": 40715 }, { "epoch": 0.1062, "grad_norm": 6.9980692863464355, "learning_rate": 2.993989898989899e-06, "loss": 6.231050109863281, "step": 40720 }, { "epoch": 0.10625, "grad_norm": 6.107598781585693, "learning_rate": 2.9937373737373737e-06, "loss": 6.2468116760253904, "step": 40725 }, { "epoch": 0.1063, "grad_norm": 7.791594982147217, "learning_rate": 2.9934848484848488e-06, "loss": 6.267155456542969, "step": 40730 }, { "epoch": 0.10635, "grad_norm": 7.453476905822754, "learning_rate": 2.9932323232323234e-06, "loss": 6.256111526489258, "step": 40735 }, { "epoch": 0.1064, "grad_norm": 8.638350486755371, "learning_rate": 2.992979797979798e-06, "loss": 6.422781372070313, "step": 40740 }, { "epoch": 0.10645, "grad_norm": 9.034923553466797, "learning_rate": 2.9927272727272726e-06, "loss": 6.24482421875, "step": 40745 }, { "epoch": 0.1065, "grad_norm": 12.864320755004883, "learning_rate": 2.9924747474747477e-06, "loss": 6.4788818359375, "step": 40750 }, { "epoch": 0.10655, "grad_norm": 10.517550468444824, "learning_rate": 2.9922222222222223e-06, "loss": 6.341886901855469, "step": 40755 }, { "epoch": 0.1066, "grad_norm": 14.767560005187988, "learning_rate": 2.991969696969697e-06, "loss": 6.200382232666016, "step": 40760 }, { "epoch": 0.10665, "grad_norm": 7.320537567138672, "learning_rate": 2.9917171717171716e-06, "loss": 6.221676635742187, "step": 40765 }, { "epoch": 0.1067, "grad_norm": 7.434189796447754, "learning_rate": 2.9914646464646466e-06, "loss": 6.2841846466064455, "step": 40770 }, { "epoch": 0.10675, "grad_norm": 4.990092754364014, "learning_rate": 2.9912121212121213e-06, "loss": 6.324711608886719, "step": 40775 }, { "epoch": 0.1068, "grad_norm": 4.967430114746094, "learning_rate": 2.990959595959596e-06, "loss": 6.264008331298828, "step": 40780 }, { "epoch": 0.10685, "grad_norm": 20.42410659790039, "learning_rate": 2.9907070707070714e-06, "loss": 6.2788043975830075, "step": 40785 }, { "epoch": 0.1069, "grad_norm": 6.302306652069092, "learning_rate": 2.990454545454546e-06, "loss": 6.290393829345703, "step": 40790 }, { "epoch": 0.10695, "grad_norm": 6.929636001586914, "learning_rate": 2.9902020202020206e-06, "loss": 6.340483093261719, "step": 40795 }, { "epoch": 0.107, "grad_norm": 7.211329936981201, "learning_rate": 2.989949494949495e-06, "loss": 6.228456115722656, "step": 40800 }, { "epoch": 0.10705, "grad_norm": 12.614211082458496, "learning_rate": 2.9896969696969703e-06, "loss": 6.360259246826172, "step": 40805 }, { "epoch": 0.1071, "grad_norm": 14.20513916015625, "learning_rate": 2.989444444444445e-06, "loss": 6.334272766113282, "step": 40810 }, { "epoch": 0.10715, "grad_norm": 5.4767584800720215, "learning_rate": 2.9891919191919196e-06, "loss": 6.283755874633789, "step": 40815 }, { "epoch": 0.1072, "grad_norm": 4.442662715911865, "learning_rate": 2.988939393939394e-06, "loss": 6.245004653930664, "step": 40820 }, { "epoch": 0.10725, "grad_norm": 8.286430358886719, "learning_rate": 2.9886868686868693e-06, "loss": 6.25506820678711, "step": 40825 }, { "epoch": 0.1073, "grad_norm": 4.576002597808838, "learning_rate": 2.988434343434344e-06, "loss": 6.280361175537109, "step": 40830 }, { "epoch": 0.10735, "grad_norm": 7.373710632324219, "learning_rate": 2.9881818181818185e-06, "loss": 6.2281982421875, "step": 40835 }, { "epoch": 0.1074, "grad_norm": 7.602756977081299, "learning_rate": 2.987929292929293e-06, "loss": 6.69635009765625, "step": 40840 }, { "epoch": 0.10745, "grad_norm": 4.879395008087158, "learning_rate": 2.987676767676768e-06, "loss": 6.279949188232422, "step": 40845 }, { "epoch": 0.1075, "grad_norm": 5.522468090057373, "learning_rate": 2.987424242424243e-06, "loss": 6.2714378356933596, "step": 40850 }, { "epoch": 0.10755, "grad_norm": 5.0823822021484375, "learning_rate": 2.9871717171717175e-06, "loss": 6.273974990844726, "step": 40855 }, { "epoch": 0.1076, "grad_norm": 7.435618877410889, "learning_rate": 2.986919191919192e-06, "loss": 6.257195663452149, "step": 40860 }, { "epoch": 0.10765, "grad_norm": 6.0120673179626465, "learning_rate": 2.986666666666667e-06, "loss": 6.301453399658203, "step": 40865 }, { "epoch": 0.1077, "grad_norm": 6.851010799407959, "learning_rate": 2.9864141414141418e-06, "loss": 6.370705413818359, "step": 40870 }, { "epoch": 0.10775, "grad_norm": 2.761730432510376, "learning_rate": 2.9861616161616164e-06, "loss": 6.269339752197266, "step": 40875 }, { "epoch": 0.1078, "grad_norm": 6.472357273101807, "learning_rate": 2.985909090909091e-06, "loss": 6.264884567260742, "step": 40880 }, { "epoch": 0.10785, "grad_norm": 4.667497158050537, "learning_rate": 2.985656565656566e-06, "loss": 6.336279296875, "step": 40885 }, { "epoch": 0.1079, "grad_norm": 4.179515361785889, "learning_rate": 2.9854040404040407e-06, "loss": 6.266117095947266, "step": 40890 }, { "epoch": 0.10795, "grad_norm": 29.279041290283203, "learning_rate": 2.9851515151515154e-06, "loss": 6.178432846069336, "step": 40895 }, { "epoch": 0.108, "grad_norm": 9.64975643157959, "learning_rate": 2.98489898989899e-06, "loss": 6.34957275390625, "step": 40900 }, { "epoch": 0.10805, "grad_norm": 8.178918838500977, "learning_rate": 2.984646464646465e-06, "loss": 6.214247512817383, "step": 40905 }, { "epoch": 0.1081, "grad_norm": 5.949940204620361, "learning_rate": 2.9843939393939397e-06, "loss": 6.307320404052734, "step": 40910 }, { "epoch": 0.10815, "grad_norm": 4.296314716339111, "learning_rate": 2.9841414141414143e-06, "loss": 6.380503082275391, "step": 40915 }, { "epoch": 0.1082, "grad_norm": 5.258572101593018, "learning_rate": 2.983888888888889e-06, "loss": 6.261359024047851, "step": 40920 }, { "epoch": 0.10825, "grad_norm": 5.684201240539551, "learning_rate": 2.983636363636364e-06, "loss": 6.271492385864258, "step": 40925 }, { "epoch": 0.1083, "grad_norm": 8.404999732971191, "learning_rate": 2.9833838383838386e-06, "loss": 6.271096038818359, "step": 40930 }, { "epoch": 0.10835, "grad_norm": 5.682455539703369, "learning_rate": 2.9831313131313132e-06, "loss": 6.229798126220703, "step": 40935 }, { "epoch": 0.1084, "grad_norm": 4.5564351081848145, "learning_rate": 2.982878787878788e-06, "loss": 6.2810111999511715, "step": 40940 }, { "epoch": 0.10845, "grad_norm": 9.572443008422852, "learning_rate": 2.982626262626263e-06, "loss": 6.346748733520508, "step": 40945 }, { "epoch": 0.1085, "grad_norm": 5.528713703155518, "learning_rate": 2.9823737373737376e-06, "loss": 6.271268844604492, "step": 40950 }, { "epoch": 0.10855, "grad_norm": 4.9156036376953125, "learning_rate": 2.982121212121212e-06, "loss": 6.266257858276367, "step": 40955 }, { "epoch": 0.1086, "grad_norm": 16.79425048828125, "learning_rate": 2.981868686868687e-06, "loss": 6.286772537231445, "step": 40960 }, { "epoch": 0.10865, "grad_norm": 7.619675636291504, "learning_rate": 2.981616161616162e-06, "loss": 6.336900329589843, "step": 40965 }, { "epoch": 0.1087, "grad_norm": 7.393255233764648, "learning_rate": 2.9813636363636365e-06, "loss": 6.228494262695312, "step": 40970 }, { "epoch": 0.10875, "grad_norm": 8.764200210571289, "learning_rate": 2.981111111111111e-06, "loss": 6.292969512939453, "step": 40975 }, { "epoch": 0.1088, "grad_norm": 6.330799102783203, "learning_rate": 2.9808585858585858e-06, "loss": 6.313798522949218, "step": 40980 }, { "epoch": 0.10885, "grad_norm": 4.762684345245361, "learning_rate": 2.9806060606060612e-06, "loss": 6.279609680175781, "step": 40985 }, { "epoch": 0.1089, "grad_norm": 4.448489189147949, "learning_rate": 2.980353535353536e-06, "loss": 6.248809051513672, "step": 40990 }, { "epoch": 0.10895, "grad_norm": 6.324843883514404, "learning_rate": 2.98010101010101e-06, "loss": 6.2090202331542965, "step": 40995 }, { "epoch": 0.109, "grad_norm": 10.512880325317383, "learning_rate": 2.9798484848484847e-06, "loss": 6.3197887420654295, "step": 41000 }, { "epoch": 0.10905, "grad_norm": 4.574416160583496, "learning_rate": 2.97959595959596e-06, "loss": 6.2608287811279295, "step": 41005 }, { "epoch": 0.1091, "grad_norm": 12.36335277557373, "learning_rate": 2.979343434343435e-06, "loss": 6.373162841796875, "step": 41010 }, { "epoch": 0.10915, "grad_norm": 10.4263277053833, "learning_rate": 2.9790909090909094e-06, "loss": 6.241296005249024, "step": 41015 }, { "epoch": 0.1092, "grad_norm": 7.446870803833008, "learning_rate": 2.978838383838384e-06, "loss": 6.312519073486328, "step": 41020 }, { "epoch": 0.10925, "grad_norm": 6.928959369659424, "learning_rate": 2.978585858585859e-06, "loss": 6.255401992797852, "step": 41025 }, { "epoch": 0.1093, "grad_norm": 4.91571044921875, "learning_rate": 2.9783333333333338e-06, "loss": 6.258394622802735, "step": 41030 }, { "epoch": 0.10935, "grad_norm": 5.362354278564453, "learning_rate": 2.9780808080808084e-06, "loss": 6.264893341064453, "step": 41035 }, { "epoch": 0.1094, "grad_norm": 6.303365707397461, "learning_rate": 2.977828282828283e-06, "loss": 6.251821136474609, "step": 41040 }, { "epoch": 0.10945, "grad_norm": 6.251884460449219, "learning_rate": 2.977575757575758e-06, "loss": 6.248333358764649, "step": 41045 }, { "epoch": 0.1095, "grad_norm": 5.371576309204102, "learning_rate": 2.9773232323232327e-06, "loss": 6.262422943115235, "step": 41050 }, { "epoch": 0.10955, "grad_norm": 6.611239910125732, "learning_rate": 2.9770707070707073e-06, "loss": 6.267708587646484, "step": 41055 }, { "epoch": 0.1096, "grad_norm": 5.9880194664001465, "learning_rate": 2.976818181818182e-06, "loss": 6.266776275634766, "step": 41060 }, { "epoch": 0.10965, "grad_norm": 7.130743026733398, "learning_rate": 2.976565656565657e-06, "loss": 6.271404647827149, "step": 41065 }, { "epoch": 0.1097, "grad_norm": 12.478859901428223, "learning_rate": 2.9763131313131316e-06, "loss": 6.298145294189453, "step": 41070 }, { "epoch": 0.10975, "grad_norm": 9.258267402648926, "learning_rate": 2.9760606060606063e-06, "loss": 6.2506462097167965, "step": 41075 }, { "epoch": 0.1098, "grad_norm": 5.856215476989746, "learning_rate": 2.975808080808081e-06, "loss": 6.214831161499023, "step": 41080 }, { "epoch": 0.10985, "grad_norm": 5.476376056671143, "learning_rate": 2.975555555555556e-06, "loss": 6.36833610534668, "step": 41085 }, { "epoch": 0.1099, "grad_norm": 6.68466854095459, "learning_rate": 2.9753030303030306e-06, "loss": 6.273912811279297, "step": 41090 }, { "epoch": 0.10995, "grad_norm": 11.363049507141113, "learning_rate": 2.9750505050505052e-06, "loss": 6.247793960571289, "step": 41095 }, { "epoch": 0.11, "grad_norm": 7.614119052886963, "learning_rate": 2.97479797979798e-06, "loss": 6.280935668945313, "step": 41100 }, { "epoch": 0.11005, "grad_norm": 12.029210090637207, "learning_rate": 2.974545454545455e-06, "loss": 6.5172889709472654, "step": 41105 }, { "epoch": 0.1101, "grad_norm": 8.118119239807129, "learning_rate": 2.9742929292929295e-06, "loss": 6.268307495117187, "step": 41110 }, { "epoch": 0.11015, "grad_norm": 7.955708026885986, "learning_rate": 2.974040404040404e-06, "loss": 6.322783279418945, "step": 41115 }, { "epoch": 0.1102, "grad_norm": 3.6563146114349365, "learning_rate": 2.973787878787879e-06, "loss": 6.265524291992188, "step": 41120 }, { "epoch": 0.11025, "grad_norm": 6.1474761962890625, "learning_rate": 2.973535353535354e-06, "loss": 6.261041259765625, "step": 41125 }, { "epoch": 0.1103, "grad_norm": 4.379037380218506, "learning_rate": 2.9732828282828285e-06, "loss": 6.292686843872071, "step": 41130 }, { "epoch": 0.11035, "grad_norm": 16.48497772216797, "learning_rate": 2.973030303030303e-06, "loss": 6.535237121582031, "step": 41135 }, { "epoch": 0.1104, "grad_norm": 5.66033411026001, "learning_rate": 2.9727777777777777e-06, "loss": 6.4000297546386715, "step": 41140 }, { "epoch": 0.11045, "grad_norm": 4.6377644538879395, "learning_rate": 2.972525252525253e-06, "loss": 6.291164779663086, "step": 41145 }, { "epoch": 0.1105, "grad_norm": 5.350574970245361, "learning_rate": 2.9722727272727274e-06, "loss": 6.247637557983398, "step": 41150 }, { "epoch": 0.11055, "grad_norm": 6.493470191955566, "learning_rate": 2.972020202020202e-06, "loss": 6.254945373535156, "step": 41155 }, { "epoch": 0.1106, "grad_norm": 5.607198715209961, "learning_rate": 2.9717676767676767e-06, "loss": 6.271080398559571, "step": 41160 }, { "epoch": 0.11065, "grad_norm": 8.763639450073242, "learning_rate": 2.9715151515151517e-06, "loss": 6.272879028320313, "step": 41165 }, { "epoch": 0.1107, "grad_norm": 5.549384593963623, "learning_rate": 2.9712626262626264e-06, "loss": 6.256103515625, "step": 41170 }, { "epoch": 0.11075, "grad_norm": 8.347752571105957, "learning_rate": 2.971010101010101e-06, "loss": 6.290843963623047, "step": 41175 }, { "epoch": 0.1108, "grad_norm": 4.552396297454834, "learning_rate": 2.9707575757575756e-06, "loss": 6.280599594116211, "step": 41180 }, { "epoch": 0.11085, "grad_norm": 5.94397497177124, "learning_rate": 2.9705050505050507e-06, "loss": 6.238227844238281, "step": 41185 }, { "epoch": 0.1109, "grad_norm": 7.0707197189331055, "learning_rate": 2.9702525252525253e-06, "loss": 6.239736938476563, "step": 41190 }, { "epoch": 0.11095, "grad_norm": 6.7687907218933105, "learning_rate": 2.97e-06, "loss": 6.323059844970703, "step": 41195 }, { "epoch": 0.111, "grad_norm": 6.979925155639648, "learning_rate": 2.9697474747474754e-06, "loss": 6.277816390991211, "step": 41200 }, { "epoch": 0.11105, "grad_norm": 6.223848819732666, "learning_rate": 2.96949494949495e-06, "loss": 6.2793537139892575, "step": 41205 }, { "epoch": 0.1111, "grad_norm": 5.624636173248291, "learning_rate": 2.9692424242424247e-06, "loss": 6.268921661376953, "step": 41210 }, { "epoch": 0.11115, "grad_norm": 4.727057456970215, "learning_rate": 2.968989898989899e-06, "loss": 6.2523048400878904, "step": 41215 }, { "epoch": 0.1112, "grad_norm": 6.495463848114014, "learning_rate": 2.9687373737373744e-06, "loss": 6.220867156982422, "step": 41220 }, { "epoch": 0.11125, "grad_norm": 4.154056072235107, "learning_rate": 2.968484848484849e-06, "loss": 6.285371780395508, "step": 41225 }, { "epoch": 0.1113, "grad_norm": 6.523233890533447, "learning_rate": 2.9682323232323236e-06, "loss": 6.296580123901367, "step": 41230 }, { "epoch": 0.11135, "grad_norm": 6.064924716949463, "learning_rate": 2.9679797979797983e-06, "loss": 6.293346405029297, "step": 41235 }, { "epoch": 0.1114, "grad_norm": 6.762385368347168, "learning_rate": 2.9677272727272733e-06, "loss": 6.289669799804687, "step": 41240 }, { "epoch": 0.11145, "grad_norm": 6.701186180114746, "learning_rate": 2.967474747474748e-06, "loss": 6.2802589416503904, "step": 41245 }, { "epoch": 0.1115, "grad_norm": 4.8078460693359375, "learning_rate": 2.9672222222222226e-06, "loss": 6.222413635253906, "step": 41250 }, { "epoch": 0.11155, "grad_norm": 4.475780963897705, "learning_rate": 2.966969696969697e-06, "loss": 6.2957305908203125, "step": 41255 }, { "epoch": 0.1116, "grad_norm": 8.48910903930664, "learning_rate": 2.9667171717171722e-06, "loss": 6.272172546386718, "step": 41260 }, { "epoch": 0.11165, "grad_norm": 6.871008396148682, "learning_rate": 2.966464646464647e-06, "loss": 6.248432922363281, "step": 41265 }, { "epoch": 0.1117, "grad_norm": 19.11674690246582, "learning_rate": 2.9662121212121215e-06, "loss": 6.322248077392578, "step": 41270 }, { "epoch": 0.11175, "grad_norm": 8.00665283203125, "learning_rate": 2.965959595959596e-06, "loss": 6.262233734130859, "step": 41275 }, { "epoch": 0.1118, "grad_norm": 5.737044334411621, "learning_rate": 2.965707070707071e-06, "loss": 6.261259460449219, "step": 41280 }, { "epoch": 0.11185, "grad_norm": 8.313483238220215, "learning_rate": 2.965454545454546e-06, "loss": 6.264871978759766, "step": 41285 }, { "epoch": 0.1119, "grad_norm": 5.187436103820801, "learning_rate": 2.9652020202020205e-06, "loss": 6.2812145233154295, "step": 41290 }, { "epoch": 0.11195, "grad_norm": 4.891008377075195, "learning_rate": 2.964949494949495e-06, "loss": 6.293492126464844, "step": 41295 }, { "epoch": 0.112, "grad_norm": 4.70589542388916, "learning_rate": 2.96469696969697e-06, "loss": 6.3165332794189455, "step": 41300 }, { "epoch": 0.11205, "grad_norm": 7.696374416351318, "learning_rate": 2.9644444444444448e-06, "loss": 6.256259918212891, "step": 41305 }, { "epoch": 0.1121, "grad_norm": 4.608224868774414, "learning_rate": 2.9641919191919194e-06, "loss": 6.285852432250977, "step": 41310 }, { "epoch": 0.11215, "grad_norm": 6.0423126220703125, "learning_rate": 2.963939393939394e-06, "loss": 6.289660263061523, "step": 41315 }, { "epoch": 0.1122, "grad_norm": 7.3712592124938965, "learning_rate": 2.963686868686869e-06, "loss": 6.33595085144043, "step": 41320 }, { "epoch": 0.11225, "grad_norm": 7.778461456298828, "learning_rate": 2.9634343434343437e-06, "loss": 6.262926864624023, "step": 41325 }, { "epoch": 0.1123, "grad_norm": 6.294922828674316, "learning_rate": 2.9631818181818183e-06, "loss": 6.232413101196289, "step": 41330 }, { "epoch": 0.11235, "grad_norm": 8.397032737731934, "learning_rate": 2.962929292929293e-06, "loss": 6.295490646362305, "step": 41335 }, { "epoch": 0.1124, "grad_norm": 6.530921936035156, "learning_rate": 2.962676767676768e-06, "loss": 6.268463516235352, "step": 41340 }, { "epoch": 0.11245, "grad_norm": 34.06431198120117, "learning_rate": 2.9624242424242427e-06, "loss": 6.445970153808593, "step": 41345 }, { "epoch": 0.1125, "grad_norm": 4.121753215789795, "learning_rate": 2.9621717171717173e-06, "loss": 6.258296203613281, "step": 41350 }, { "epoch": 0.11255, "grad_norm": 3.7035129070281982, "learning_rate": 2.961919191919192e-06, "loss": 6.244090270996094, "step": 41355 }, { "epoch": 0.1126, "grad_norm": 6.9883952140808105, "learning_rate": 2.961666666666667e-06, "loss": 6.2421222686767575, "step": 41360 }, { "epoch": 0.11265, "grad_norm": 15.533695220947266, "learning_rate": 2.9614141414141416e-06, "loss": 6.309162139892578, "step": 41365 }, { "epoch": 0.1127, "grad_norm": 4.674856662750244, "learning_rate": 2.9611616161616162e-06, "loss": 6.274088668823242, "step": 41370 }, { "epoch": 0.11275, "grad_norm": 8.235374450683594, "learning_rate": 2.960909090909091e-06, "loss": 6.209796142578125, "step": 41375 }, { "epoch": 0.1128, "grad_norm": 9.513147354125977, "learning_rate": 2.960656565656566e-06, "loss": 6.251120758056641, "step": 41380 }, { "epoch": 0.11285, "grad_norm": 5.406252384185791, "learning_rate": 2.9604040404040405e-06, "loss": 6.2913658142089846, "step": 41385 }, { "epoch": 0.1129, "grad_norm": 6.208230018615723, "learning_rate": 2.960151515151515e-06, "loss": 6.293059158325195, "step": 41390 }, { "epoch": 0.11295, "grad_norm": 14.40453052520752, "learning_rate": 2.95989898989899e-06, "loss": 6.297216796875, "step": 41395 }, { "epoch": 0.113, "grad_norm": 8.947781562805176, "learning_rate": 2.9596464646464653e-06, "loss": 6.4792327880859375, "step": 41400 }, { "epoch": 0.11305, "grad_norm": 5.911376476287842, "learning_rate": 2.95939393939394e-06, "loss": 6.2596588134765625, "step": 41405 }, { "epoch": 0.1131, "grad_norm": 9.886540412902832, "learning_rate": 2.959141414141414e-06, "loss": 6.3148548126220705, "step": 41410 }, { "epoch": 0.11315, "grad_norm": 4.352113723754883, "learning_rate": 2.9588888888888887e-06, "loss": 6.285553741455078, "step": 41415 }, { "epoch": 0.1132, "grad_norm": 5.89048957824707, "learning_rate": 2.9586363636363642e-06, "loss": 6.293347549438477, "step": 41420 }, { "epoch": 0.11325, "grad_norm": 7.9286789894104, "learning_rate": 2.958383838383839e-06, "loss": 6.284070205688477, "step": 41425 }, { "epoch": 0.1133, "grad_norm": 8.105696678161621, "learning_rate": 2.9581313131313135e-06, "loss": 6.216458511352539, "step": 41430 }, { "epoch": 0.11335, "grad_norm": 8.600119590759277, "learning_rate": 2.957878787878788e-06, "loss": 6.25108757019043, "step": 41435 }, { "epoch": 0.1134, "grad_norm": 6.105043888092041, "learning_rate": 2.957626262626263e-06, "loss": 6.260860824584961, "step": 41440 }, { "epoch": 0.11345, "grad_norm": 33.616058349609375, "learning_rate": 2.957373737373738e-06, "loss": 6.410401916503906, "step": 41445 }, { "epoch": 0.1135, "grad_norm": 9.411081314086914, "learning_rate": 2.9571212121212124e-06, "loss": 6.2671043395996096, "step": 41450 }, { "epoch": 0.11355, "grad_norm": 6.475320339202881, "learning_rate": 2.956868686868687e-06, "loss": 6.258475112915039, "step": 41455 }, { "epoch": 0.1136, "grad_norm": 5.441645622253418, "learning_rate": 2.956616161616162e-06, "loss": 6.327923583984375, "step": 41460 }, { "epoch": 0.11365, "grad_norm": 7.256167888641357, "learning_rate": 2.9563636363636367e-06, "loss": 6.2333526611328125, "step": 41465 }, { "epoch": 0.1137, "grad_norm": 5.314755439758301, "learning_rate": 2.9561111111111114e-06, "loss": 6.26744270324707, "step": 41470 }, { "epoch": 0.11375, "grad_norm": 4.0301384925842285, "learning_rate": 2.955858585858586e-06, "loss": 6.282952880859375, "step": 41475 }, { "epoch": 0.1138, "grad_norm": 9.508482933044434, "learning_rate": 2.955606060606061e-06, "loss": 6.235935211181641, "step": 41480 }, { "epoch": 0.11385, "grad_norm": 5.513493537902832, "learning_rate": 2.9553535353535357e-06, "loss": 6.251705169677734, "step": 41485 }, { "epoch": 0.1139, "grad_norm": 7.165191173553467, "learning_rate": 2.9551010101010103e-06, "loss": 6.244139862060547, "step": 41490 }, { "epoch": 0.11395, "grad_norm": 16.954233169555664, "learning_rate": 2.954848484848485e-06, "loss": 6.326641845703125, "step": 41495 }, { "epoch": 0.114, "grad_norm": 6.347146987915039, "learning_rate": 2.95459595959596e-06, "loss": 6.249903869628906, "step": 41500 }, { "epoch": 0.11405, "grad_norm": 6.157090663909912, "learning_rate": 2.9543434343434346e-06, "loss": 6.218859100341797, "step": 41505 }, { "epoch": 0.1141, "grad_norm": 6.450079441070557, "learning_rate": 2.9540909090909093e-06, "loss": 6.253480529785156, "step": 41510 }, { "epoch": 0.11415, "grad_norm": 3.8873414993286133, "learning_rate": 2.953838383838384e-06, "loss": 6.235320281982422, "step": 41515 }, { "epoch": 0.1142, "grad_norm": 5.781500339508057, "learning_rate": 2.953585858585859e-06, "loss": 6.228903961181641, "step": 41520 }, { "epoch": 0.11425, "grad_norm": 25.64179039001465, "learning_rate": 2.9533333333333336e-06, "loss": 6.35697021484375, "step": 41525 }, { "epoch": 0.1143, "grad_norm": 3.9164583683013916, "learning_rate": 2.953080808080808e-06, "loss": 6.266347885131836, "step": 41530 }, { "epoch": 0.11435, "grad_norm": 13.64067554473877, "learning_rate": 2.952828282828283e-06, "loss": 6.24957504272461, "step": 41535 }, { "epoch": 0.1144, "grad_norm": 8.292939186096191, "learning_rate": 2.952575757575758e-06, "loss": 6.218635177612304, "step": 41540 }, { "epoch": 0.11445, "grad_norm": 3.032524347305298, "learning_rate": 2.9523232323232325e-06, "loss": 6.317592620849609, "step": 41545 }, { "epoch": 0.1145, "grad_norm": 6.949008464813232, "learning_rate": 2.952070707070707e-06, "loss": 6.234265899658203, "step": 41550 }, { "epoch": 0.11455, "grad_norm": 6.147146224975586, "learning_rate": 2.9518181818181818e-06, "loss": 6.3040321350097654, "step": 41555 }, { "epoch": 0.1146, "grad_norm": 6.422920227050781, "learning_rate": 2.951565656565657e-06, "loss": 6.264558410644531, "step": 41560 }, { "epoch": 0.11465, "grad_norm": 4.4102349281311035, "learning_rate": 2.9513131313131315e-06, "loss": 6.2358654022216795, "step": 41565 }, { "epoch": 0.1147, "grad_norm": 6.700959205627441, "learning_rate": 2.951060606060606e-06, "loss": 6.298392486572266, "step": 41570 }, { "epoch": 0.11475, "grad_norm": 6.390358924865723, "learning_rate": 2.9508080808080807e-06, "loss": 6.313025283813476, "step": 41575 }, { "epoch": 0.1148, "grad_norm": 5.053467750549316, "learning_rate": 2.9505555555555558e-06, "loss": 6.289539337158203, "step": 41580 }, { "epoch": 0.11485, "grad_norm": 7.006431579589844, "learning_rate": 2.9503030303030304e-06, "loss": 6.312524795532227, "step": 41585 }, { "epoch": 0.1149, "grad_norm": 5.068919658660889, "learning_rate": 2.950050505050505e-06, "loss": 6.2761962890625, "step": 41590 }, { "epoch": 0.11495, "grad_norm": 4.276952266693115, "learning_rate": 2.9497979797979797e-06, "loss": 6.298951721191406, "step": 41595 }, { "epoch": 0.115, "grad_norm": 7.9876203536987305, "learning_rate": 2.949545454545455e-06, "loss": 6.236902236938477, "step": 41600 }, { "epoch": 0.11505, "grad_norm": 6.537814617156982, "learning_rate": 2.9492929292929293e-06, "loss": 6.246965026855468, "step": 41605 }, { "epoch": 0.1151, "grad_norm": 5.264881134033203, "learning_rate": 2.949040404040404e-06, "loss": 6.269857025146484, "step": 41610 }, { "epoch": 0.11515, "grad_norm": 16.27280044555664, "learning_rate": 2.9487878787878786e-06, "loss": 6.279951477050782, "step": 41615 }, { "epoch": 0.1152, "grad_norm": 4.170151710510254, "learning_rate": 2.948535353535354e-06, "loss": 6.259539794921875, "step": 41620 }, { "epoch": 0.11525, "grad_norm": 5.9737701416015625, "learning_rate": 2.9482828282828287e-06, "loss": 6.24888801574707, "step": 41625 }, { "epoch": 0.1153, "grad_norm": 19.045974731445312, "learning_rate": 2.948030303030303e-06, "loss": 6.406942749023438, "step": 41630 }, { "epoch": 0.11535, "grad_norm": 31.222557067871094, "learning_rate": 2.9477777777777784e-06, "loss": 6.287809753417969, "step": 41635 }, { "epoch": 0.1154, "grad_norm": 5.972186088562012, "learning_rate": 2.947525252525253e-06, "loss": 6.140472412109375, "step": 41640 }, { "epoch": 0.11545, "grad_norm": 5.247162342071533, "learning_rate": 2.9472727272727277e-06, "loss": 6.250090408325195, "step": 41645 }, { "epoch": 0.1155, "grad_norm": 9.633713722229004, "learning_rate": 2.9470202020202023e-06, "loss": 6.293495178222656, "step": 41650 }, { "epoch": 0.11555, "grad_norm": 8.72830867767334, "learning_rate": 2.9467676767676773e-06, "loss": 6.391761398315429, "step": 41655 }, { "epoch": 0.1156, "grad_norm": 7.226131439208984, "learning_rate": 2.946515151515152e-06, "loss": 6.2871246337890625, "step": 41660 }, { "epoch": 0.11565, "grad_norm": 6.7605109214782715, "learning_rate": 2.9462626262626266e-06, "loss": 6.470735168457031, "step": 41665 }, { "epoch": 0.1157, "grad_norm": 6.182495594024658, "learning_rate": 2.9460101010101012e-06, "loss": 6.246529769897461, "step": 41670 }, { "epoch": 0.11575, "grad_norm": 10.815364837646484, "learning_rate": 2.9457575757575763e-06, "loss": 6.22394905090332, "step": 41675 }, { "epoch": 0.1158, "grad_norm": 4.860632419586182, "learning_rate": 2.945505050505051e-06, "loss": 6.323815155029297, "step": 41680 }, { "epoch": 0.11585, "grad_norm": 5.676327705383301, "learning_rate": 2.9452525252525255e-06, "loss": 6.338115310668945, "step": 41685 }, { "epoch": 0.1159, "grad_norm": 10.705113410949707, "learning_rate": 2.945e-06, "loss": 6.285449600219726, "step": 41690 }, { "epoch": 0.11595, "grad_norm": 6.833688735961914, "learning_rate": 2.9447474747474752e-06, "loss": 6.251067352294922, "step": 41695 }, { "epoch": 0.116, "grad_norm": 6.066240310668945, "learning_rate": 2.94449494949495e-06, "loss": 6.268013381958008, "step": 41700 }, { "epoch": 0.11605, "grad_norm": 7.623030662536621, "learning_rate": 2.9442424242424245e-06, "loss": 6.276050567626953, "step": 41705 }, { "epoch": 0.1161, "grad_norm": 6.072610855102539, "learning_rate": 2.943989898989899e-06, "loss": 6.246738052368164, "step": 41710 }, { "epoch": 0.11615, "grad_norm": 9.705352783203125, "learning_rate": 2.943737373737374e-06, "loss": 6.299085998535157, "step": 41715 }, { "epoch": 0.1162, "grad_norm": 5.936763286590576, "learning_rate": 2.943484848484849e-06, "loss": 6.273356628417969, "step": 41720 }, { "epoch": 0.11625, "grad_norm": 6.097559452056885, "learning_rate": 2.9432323232323234e-06, "loss": 6.246804428100586, "step": 41725 }, { "epoch": 0.1163, "grad_norm": 6.697414398193359, "learning_rate": 2.942979797979798e-06, "loss": 6.297693252563477, "step": 41730 }, { "epoch": 0.11635, "grad_norm": 3.742938756942749, "learning_rate": 2.942727272727273e-06, "loss": 6.267548370361328, "step": 41735 }, { "epoch": 0.1164, "grad_norm": 5.218923568725586, "learning_rate": 2.9424747474747478e-06, "loss": 6.267625045776367, "step": 41740 }, { "epoch": 0.11645, "grad_norm": 8.745288848876953, "learning_rate": 2.9422222222222224e-06, "loss": 6.240455627441406, "step": 41745 }, { "epoch": 0.1165, "grad_norm": 6.21934175491333, "learning_rate": 2.941969696969697e-06, "loss": 6.2780109405517575, "step": 41750 }, { "epoch": 0.11655, "grad_norm": 6.162660121917725, "learning_rate": 2.941717171717172e-06, "loss": 6.258321380615234, "step": 41755 }, { "epoch": 0.1166, "grad_norm": 4.199395656585693, "learning_rate": 2.9414646464646467e-06, "loss": 6.2561805725097654, "step": 41760 }, { "epoch": 0.11665, "grad_norm": 5.661691188812256, "learning_rate": 2.9412121212121213e-06, "loss": 6.29736328125, "step": 41765 }, { "epoch": 0.1167, "grad_norm": 7.298620223999023, "learning_rate": 2.940959595959596e-06, "loss": 6.25826416015625, "step": 41770 }, { "epoch": 0.11675, "grad_norm": 3.6840131282806396, "learning_rate": 2.940707070707071e-06, "loss": 6.258860015869141, "step": 41775 }, { "epoch": 0.1168, "grad_norm": 3.9901351928710938, "learning_rate": 2.9404545454545456e-06, "loss": 6.275413131713867, "step": 41780 }, { "epoch": 0.11685, "grad_norm": 7.205401420593262, "learning_rate": 2.9402020202020203e-06, "loss": 6.23487663269043, "step": 41785 }, { "epoch": 0.1169, "grad_norm": 6.002083778381348, "learning_rate": 2.939949494949495e-06, "loss": 6.383166122436523, "step": 41790 }, { "epoch": 0.11695, "grad_norm": 7.036518573760986, "learning_rate": 2.93969696969697e-06, "loss": 6.286370849609375, "step": 41795 }, { "epoch": 0.117, "grad_norm": 4.9266557693481445, "learning_rate": 2.9394444444444446e-06, "loss": 6.287410736083984, "step": 41800 }, { "epoch": 0.11705, "grad_norm": 6.005686283111572, "learning_rate": 2.9391919191919192e-06, "loss": 6.273128890991211, "step": 41805 }, { "epoch": 0.1171, "grad_norm": 6.168753623962402, "learning_rate": 2.938939393939394e-06, "loss": 6.320136260986328, "step": 41810 }, { "epoch": 0.11715, "grad_norm": 17.790475845336914, "learning_rate": 2.9386868686868693e-06, "loss": 6.0803672790527346, "step": 41815 }, { "epoch": 0.1172, "grad_norm": 21.214426040649414, "learning_rate": 2.938434343434344e-06, "loss": 6.476937866210937, "step": 41820 }, { "epoch": 0.11725, "grad_norm": 8.510736465454102, "learning_rate": 2.938181818181818e-06, "loss": 6.289985275268554, "step": 41825 }, { "epoch": 0.1173, "grad_norm": 11.571453094482422, "learning_rate": 2.9379292929292928e-06, "loss": 6.371868133544922, "step": 41830 }, { "epoch": 0.11735, "grad_norm": 6.012557029724121, "learning_rate": 2.9376767676767683e-06, "loss": 6.216759109497071, "step": 41835 }, { "epoch": 0.1174, "grad_norm": 9.455918312072754, "learning_rate": 2.937424242424243e-06, "loss": 6.232325744628906, "step": 41840 }, { "epoch": 0.11745, "grad_norm": 7.151858329772949, "learning_rate": 2.9371717171717175e-06, "loss": 6.270361328125, "step": 41845 }, { "epoch": 0.1175, "grad_norm": 3.4009106159210205, "learning_rate": 2.936919191919192e-06, "loss": 6.274752426147461, "step": 41850 }, { "epoch": 0.11755, "grad_norm": 7.662902355194092, "learning_rate": 2.936666666666667e-06, "loss": 6.269326400756836, "step": 41855 }, { "epoch": 0.1176, "grad_norm": 7.758222579956055, "learning_rate": 2.936414141414142e-06, "loss": 6.3130756378173825, "step": 41860 }, { "epoch": 0.11765, "grad_norm": 19.4472713470459, "learning_rate": 2.9361616161616165e-06, "loss": 7.126626586914062, "step": 41865 }, { "epoch": 0.1177, "grad_norm": 7.449990749359131, "learning_rate": 2.935909090909091e-06, "loss": 6.3568672180175785, "step": 41870 }, { "epoch": 0.11775, "grad_norm": 4.035237789154053, "learning_rate": 2.935656565656566e-06, "loss": 6.271065902709961, "step": 41875 }, { "epoch": 0.1178, "grad_norm": 7.576226711273193, "learning_rate": 2.9354040404040408e-06, "loss": 6.330295562744141, "step": 41880 }, { "epoch": 0.11785, "grad_norm": 6.36364221572876, "learning_rate": 2.9351515151515154e-06, "loss": 6.291706848144531, "step": 41885 }, { "epoch": 0.1179, "grad_norm": 6.437422752380371, "learning_rate": 2.93489898989899e-06, "loss": 6.229768753051758, "step": 41890 }, { "epoch": 0.11795, "grad_norm": 9.435164451599121, "learning_rate": 2.934646464646465e-06, "loss": 6.263710021972656, "step": 41895 }, { "epoch": 0.118, "grad_norm": 5.994455337524414, "learning_rate": 2.9343939393939397e-06, "loss": 6.292213439941406, "step": 41900 }, { "epoch": 0.11805, "grad_norm": 11.001198768615723, "learning_rate": 2.9341414141414144e-06, "loss": 6.254618453979492, "step": 41905 }, { "epoch": 0.1181, "grad_norm": 6.369492530822754, "learning_rate": 2.933888888888889e-06, "loss": 6.310542297363281, "step": 41910 }, { "epoch": 0.11815, "grad_norm": 6.553391456604004, "learning_rate": 2.933636363636364e-06, "loss": 6.50977783203125, "step": 41915 }, { "epoch": 0.1182, "grad_norm": 5.327044486999512, "learning_rate": 2.9333838383838387e-06, "loss": 6.261212921142578, "step": 41920 }, { "epoch": 0.11825, "grad_norm": 6.749001979827881, "learning_rate": 2.9331313131313133e-06, "loss": 6.28709602355957, "step": 41925 }, { "epoch": 0.1183, "grad_norm": 6.690602779388428, "learning_rate": 2.932878787878788e-06, "loss": 6.257435989379883, "step": 41930 }, { "epoch": 0.11835, "grad_norm": 5.989022254943848, "learning_rate": 2.932626262626263e-06, "loss": 6.247370910644531, "step": 41935 }, { "epoch": 0.1184, "grad_norm": 6.763794422149658, "learning_rate": 2.9323737373737376e-06, "loss": 6.2763671875, "step": 41940 }, { "epoch": 0.11845, "grad_norm": 7.133615493774414, "learning_rate": 2.9321212121212122e-06, "loss": 6.130401992797852, "step": 41945 }, { "epoch": 0.1185, "grad_norm": 9.321901321411133, "learning_rate": 2.931868686868687e-06, "loss": 6.34447135925293, "step": 41950 }, { "epoch": 0.11855, "grad_norm": 8.437309265136719, "learning_rate": 2.931616161616162e-06, "loss": 6.274111938476563, "step": 41955 }, { "epoch": 0.1186, "grad_norm": 15.113554954528809, "learning_rate": 2.9313636363636366e-06, "loss": 6.318172836303711, "step": 41960 }, { "epoch": 0.11865, "grad_norm": 5.277710437774658, "learning_rate": 2.931111111111111e-06, "loss": 6.405787658691406, "step": 41965 }, { "epoch": 0.1187, "grad_norm": 3.614000082015991, "learning_rate": 2.930858585858586e-06, "loss": 6.2648475646972654, "step": 41970 }, { "epoch": 0.11875, "grad_norm": 4.992995738983154, "learning_rate": 2.930606060606061e-06, "loss": 6.262630462646484, "step": 41975 }, { "epoch": 0.1188, "grad_norm": 6.358795642852783, "learning_rate": 2.9303535353535355e-06, "loss": 6.277824401855469, "step": 41980 }, { "epoch": 0.11885, "grad_norm": 5.795915126800537, "learning_rate": 2.93010101010101e-06, "loss": 6.29385986328125, "step": 41985 }, { "epoch": 0.1189, "grad_norm": 6.629597187042236, "learning_rate": 2.9298484848484848e-06, "loss": 6.287817001342773, "step": 41990 }, { "epoch": 0.11895, "grad_norm": 4.3954925537109375, "learning_rate": 2.92959595959596e-06, "loss": 6.245084381103515, "step": 41995 }, { "epoch": 0.119, "grad_norm": 6.566995620727539, "learning_rate": 2.9293434343434344e-06, "loss": 6.122325897216797, "step": 42000 }, { "epoch": 0.11905, "grad_norm": 9.632706642150879, "learning_rate": 2.929090909090909e-06, "loss": 6.304722595214844, "step": 42005 }, { "epoch": 0.1191, "grad_norm": 6.31146240234375, "learning_rate": 2.9288383838383837e-06, "loss": 6.258993148803711, "step": 42010 }, { "epoch": 0.11915, "grad_norm": 7.377710342407227, "learning_rate": 2.928585858585859e-06, "loss": 6.259601593017578, "step": 42015 }, { "epoch": 0.1192, "grad_norm": 8.357134819030762, "learning_rate": 2.9283333333333334e-06, "loss": 6.3660888671875, "step": 42020 }, { "epoch": 0.11925, "grad_norm": 28.266101837158203, "learning_rate": 2.928080808080808e-06, "loss": 6.514252471923828, "step": 42025 }, { "epoch": 0.1193, "grad_norm": 13.816862106323242, "learning_rate": 2.9278282828282826e-06, "loss": 6.391886520385742, "step": 42030 }, { "epoch": 0.11935, "grad_norm": 5.289782524108887, "learning_rate": 2.927575757575758e-06, "loss": 6.195812606811524, "step": 42035 }, { "epoch": 0.1194, "grad_norm": 10.006327629089355, "learning_rate": 2.9273232323232328e-06, "loss": 6.298834991455078, "step": 42040 }, { "epoch": 0.11945, "grad_norm": 11.424853324890137, "learning_rate": 2.927070707070707e-06, "loss": 6.314796066284179, "step": 42045 }, { "epoch": 0.1195, "grad_norm": 10.174999237060547, "learning_rate": 2.9268181818181816e-06, "loss": 6.312127304077149, "step": 42050 }, { "epoch": 0.11955, "grad_norm": 7.020429611206055, "learning_rate": 2.926565656565657e-06, "loss": 6.253873443603515, "step": 42055 }, { "epoch": 0.1196, "grad_norm": 6.960992336273193, "learning_rate": 2.9263131313131317e-06, "loss": 6.2553253173828125, "step": 42060 }, { "epoch": 0.11965, "grad_norm": 6.717005729675293, "learning_rate": 2.9260606060606063e-06, "loss": 6.431163024902344, "step": 42065 }, { "epoch": 0.1197, "grad_norm": 8.205610275268555, "learning_rate": 2.9258080808080814e-06, "loss": 6.280915069580078, "step": 42070 }, { "epoch": 0.11975, "grad_norm": 5.52952241897583, "learning_rate": 2.925555555555556e-06, "loss": 6.28996696472168, "step": 42075 }, { "epoch": 0.1198, "grad_norm": 8.674248695373535, "learning_rate": 2.9253030303030306e-06, "loss": 6.2548988342285154, "step": 42080 }, { "epoch": 0.11985, "grad_norm": 13.236615180969238, "learning_rate": 2.9250505050505053e-06, "loss": 6.253871154785156, "step": 42085 }, { "epoch": 0.1199, "grad_norm": 12.444384574890137, "learning_rate": 2.9247979797979803e-06, "loss": 6.329557800292969, "step": 42090 }, { "epoch": 0.11995, "grad_norm": 5.40335750579834, "learning_rate": 2.924545454545455e-06, "loss": 6.266738510131836, "step": 42095 }, { "epoch": 0.12, "grad_norm": 4.244021892547607, "learning_rate": 2.9242929292929296e-06, "loss": 6.293515014648437, "step": 42100 }, { "epoch": 0.12005, "grad_norm": 7.901262283325195, "learning_rate": 2.9240404040404042e-06, "loss": 6.311706161499023, "step": 42105 }, { "epoch": 0.1201, "grad_norm": 7.905302047729492, "learning_rate": 2.9237878787878793e-06, "loss": 6.300086212158203, "step": 42110 }, { "epoch": 0.12015, "grad_norm": 5.926037788391113, "learning_rate": 2.923535353535354e-06, "loss": 6.206169891357422, "step": 42115 }, { "epoch": 0.1202, "grad_norm": 42.193485260009766, "learning_rate": 2.9232828282828285e-06, "loss": 6.160120010375977, "step": 42120 }, { "epoch": 0.12025, "grad_norm": 6.718685626983643, "learning_rate": 2.923030303030303e-06, "loss": 6.2411338806152346, "step": 42125 }, { "epoch": 0.1203, "grad_norm": 7.162345886230469, "learning_rate": 2.9227777777777782e-06, "loss": 6.216580581665039, "step": 42130 }, { "epoch": 0.12035, "grad_norm": 16.545089721679688, "learning_rate": 2.922525252525253e-06, "loss": 6.337087631225586, "step": 42135 }, { "epoch": 0.1204, "grad_norm": 12.938425064086914, "learning_rate": 2.9222727272727275e-06, "loss": 6.431513977050781, "step": 42140 }, { "epoch": 0.12045, "grad_norm": 5.551322937011719, "learning_rate": 2.922020202020202e-06, "loss": 6.296406173706055, "step": 42145 }, { "epoch": 0.1205, "grad_norm": 7.0052924156188965, "learning_rate": 2.921767676767677e-06, "loss": 6.245744705200195, "step": 42150 }, { "epoch": 0.12055, "grad_norm": 3.3050875663757324, "learning_rate": 2.921515151515152e-06, "loss": 6.267864227294922, "step": 42155 }, { "epoch": 0.1206, "grad_norm": 5.261967182159424, "learning_rate": 2.9212626262626264e-06, "loss": 6.262062072753906, "step": 42160 }, { "epoch": 0.12065, "grad_norm": 8.408797264099121, "learning_rate": 2.921010101010101e-06, "loss": 6.371880340576172, "step": 42165 }, { "epoch": 0.1207, "grad_norm": 7.791537284851074, "learning_rate": 2.920757575757576e-06, "loss": 6.2565967559814455, "step": 42170 }, { "epoch": 0.12075, "grad_norm": 6.196427345275879, "learning_rate": 2.9205050505050507e-06, "loss": 6.281572341918945, "step": 42175 }, { "epoch": 0.1208, "grad_norm": 4.010687351226807, "learning_rate": 2.9202525252525254e-06, "loss": 6.256117630004883, "step": 42180 }, { "epoch": 0.12085, "grad_norm": 5.8414201736450195, "learning_rate": 2.92e-06, "loss": 6.291443634033203, "step": 42185 }, { "epoch": 0.1209, "grad_norm": 8.217637062072754, "learning_rate": 2.919747474747475e-06, "loss": 6.283063507080078, "step": 42190 }, { "epoch": 0.12095, "grad_norm": 12.896876335144043, "learning_rate": 2.9194949494949497e-06, "loss": 6.24914779663086, "step": 42195 }, { "epoch": 0.121, "grad_norm": 7.494111061096191, "learning_rate": 2.9192424242424243e-06, "loss": 6.3273578643798825, "step": 42200 }, { "epoch": 0.12105, "grad_norm": 5.507627964019775, "learning_rate": 2.918989898989899e-06, "loss": 6.276900100708008, "step": 42205 }, { "epoch": 0.1211, "grad_norm": 7.411354064941406, "learning_rate": 2.918737373737374e-06, "loss": 6.290487670898438, "step": 42210 }, { "epoch": 0.12115, "grad_norm": 12.585803985595703, "learning_rate": 2.9184848484848486e-06, "loss": 6.29024658203125, "step": 42215 }, { "epoch": 0.1212, "grad_norm": 5.846058368682861, "learning_rate": 2.9182323232323233e-06, "loss": 6.242778778076172, "step": 42220 }, { "epoch": 0.12125, "grad_norm": 9.46722412109375, "learning_rate": 2.917979797979798e-06, "loss": 6.259655380249024, "step": 42225 }, { "epoch": 0.1213, "grad_norm": 6.361361026763916, "learning_rate": 2.9177272727272734e-06, "loss": 6.244113540649414, "step": 42230 }, { "epoch": 0.12135, "grad_norm": 8.150136947631836, "learning_rate": 2.917474747474748e-06, "loss": 6.239772033691406, "step": 42235 }, { "epoch": 0.1214, "grad_norm": 6.542417049407959, "learning_rate": 2.917222222222222e-06, "loss": 6.248226928710937, "step": 42240 }, { "epoch": 0.12145, "grad_norm": 5.331295967102051, "learning_rate": 2.916969696969697e-06, "loss": 6.278302001953125, "step": 42245 }, { "epoch": 0.1215, "grad_norm": 4.84158992767334, "learning_rate": 2.9167171717171723e-06, "loss": 6.2747657775878904, "step": 42250 }, { "epoch": 0.12155, "grad_norm": 6.824895858764648, "learning_rate": 2.916464646464647e-06, "loss": 6.24006233215332, "step": 42255 }, { "epoch": 0.1216, "grad_norm": 5.1547064781188965, "learning_rate": 2.9162121212121216e-06, "loss": 6.261720657348633, "step": 42260 }, { "epoch": 0.12165, "grad_norm": 5.602167129516602, "learning_rate": 2.915959595959596e-06, "loss": 6.269976806640625, "step": 42265 }, { "epoch": 0.1217, "grad_norm": 4.121480464935303, "learning_rate": 2.9157070707070712e-06, "loss": 6.303948974609375, "step": 42270 }, { "epoch": 0.12175, "grad_norm": 3.7594988346099854, "learning_rate": 2.915454545454546e-06, "loss": 6.292327499389648, "step": 42275 }, { "epoch": 0.1218, "grad_norm": 5.351687908172607, "learning_rate": 2.9152020202020205e-06, "loss": 6.250151443481445, "step": 42280 }, { "epoch": 0.12185, "grad_norm": 3.6854746341705322, "learning_rate": 2.914949494949495e-06, "loss": 6.2632301330566404, "step": 42285 }, { "epoch": 0.1219, "grad_norm": 7.980936050415039, "learning_rate": 2.91469696969697e-06, "loss": 6.290274429321289, "step": 42290 }, { "epoch": 0.12195, "grad_norm": 6.478393077850342, "learning_rate": 2.914444444444445e-06, "loss": 6.2583869934082035, "step": 42295 }, { "epoch": 0.122, "grad_norm": 6.88121223449707, "learning_rate": 2.9141919191919195e-06, "loss": 6.263861465454101, "step": 42300 }, { "epoch": 0.12205, "grad_norm": 8.553267478942871, "learning_rate": 2.913939393939394e-06, "loss": 6.256953430175781, "step": 42305 }, { "epoch": 0.1221, "grad_norm": 7.538891315460205, "learning_rate": 2.913686868686869e-06, "loss": 6.360808181762695, "step": 42310 }, { "epoch": 0.12215, "grad_norm": 4.742193698883057, "learning_rate": 2.9134343434343438e-06, "loss": 6.293458938598633, "step": 42315 }, { "epoch": 0.1222, "grad_norm": 5.849863052368164, "learning_rate": 2.9131818181818184e-06, "loss": 6.272338104248047, "step": 42320 }, { "epoch": 0.12225, "grad_norm": 6.346260070800781, "learning_rate": 2.912929292929293e-06, "loss": 6.311685180664062, "step": 42325 }, { "epoch": 0.1223, "grad_norm": 6.832108020782471, "learning_rate": 2.912676767676768e-06, "loss": 6.317210388183594, "step": 42330 }, { "epoch": 0.12235, "grad_norm": 6.909374713897705, "learning_rate": 2.9124242424242427e-06, "loss": 6.219025039672852, "step": 42335 }, { "epoch": 0.1224, "grad_norm": 12.676931381225586, "learning_rate": 2.9121717171717173e-06, "loss": 6.374499893188476, "step": 42340 }, { "epoch": 0.12245, "grad_norm": 5.914567470550537, "learning_rate": 2.911919191919192e-06, "loss": 6.267864990234375, "step": 42345 }, { "epoch": 0.1225, "grad_norm": 10.643523216247559, "learning_rate": 2.911666666666667e-06, "loss": 6.2559814453125, "step": 42350 }, { "epoch": 0.12255, "grad_norm": 5.877878665924072, "learning_rate": 2.9114141414141417e-06, "loss": 6.268153381347656, "step": 42355 }, { "epoch": 0.1226, "grad_norm": 25.16179847717285, "learning_rate": 2.9111616161616163e-06, "loss": 6.342661285400391, "step": 42360 }, { "epoch": 0.12265, "grad_norm": 6.252614974975586, "learning_rate": 2.910909090909091e-06, "loss": 6.351363754272461, "step": 42365 }, { "epoch": 0.1227, "grad_norm": 6.756246566772461, "learning_rate": 2.910656565656566e-06, "loss": 6.274284362792969, "step": 42370 }, { "epoch": 0.12275, "grad_norm": 4.101792812347412, "learning_rate": 2.9104040404040406e-06, "loss": 6.2609600067138675, "step": 42375 }, { "epoch": 0.1228, "grad_norm": 4.61412239074707, "learning_rate": 2.9101515151515152e-06, "loss": 6.223457717895508, "step": 42380 }, { "epoch": 0.12285, "grad_norm": 19.20193862915039, "learning_rate": 2.90989898989899e-06, "loss": 6.25780143737793, "step": 42385 }, { "epoch": 0.1229, "grad_norm": 4.445740699768066, "learning_rate": 2.909646464646465e-06, "loss": 6.270601272583008, "step": 42390 }, { "epoch": 0.12295, "grad_norm": 7.418521881103516, "learning_rate": 2.9093939393939395e-06, "loss": 6.200340652465821, "step": 42395 }, { "epoch": 0.123, "grad_norm": 10.031095504760742, "learning_rate": 2.909141414141414e-06, "loss": 6.2140625, "step": 42400 }, { "epoch": 0.12305, "grad_norm": 14.830766677856445, "learning_rate": 2.908888888888889e-06, "loss": 6.303277206420899, "step": 42405 }, { "epoch": 0.1231, "grad_norm": 10.013008117675781, "learning_rate": 2.908636363636364e-06, "loss": 6.286345672607422, "step": 42410 }, { "epoch": 0.12315, "grad_norm": 5.236317157745361, "learning_rate": 2.9083838383838385e-06, "loss": 6.273377227783203, "step": 42415 }, { "epoch": 0.1232, "grad_norm": 5.961149215698242, "learning_rate": 2.908131313131313e-06, "loss": 6.293456268310547, "step": 42420 }, { "epoch": 0.12325, "grad_norm": 5.4007649421691895, "learning_rate": 2.9078787878787877e-06, "loss": 6.241150665283203, "step": 42425 }, { "epoch": 0.1233, "grad_norm": 5.8289947509765625, "learning_rate": 2.9076262626262632e-06, "loss": 6.272627258300782, "step": 42430 }, { "epoch": 0.12335, "grad_norm": 6.293877601623535, "learning_rate": 2.9073737373737374e-06, "loss": 6.2651409149169925, "step": 42435 }, { "epoch": 0.1234, "grad_norm": 5.994367599487305, "learning_rate": 2.907121212121212e-06, "loss": 6.2451427459716795, "step": 42440 }, { "epoch": 0.12345, "grad_norm": 6.879055500030518, "learning_rate": 2.9068686868686867e-06, "loss": 6.314593124389648, "step": 42445 }, { "epoch": 0.1235, "grad_norm": 6.857323169708252, "learning_rate": 2.906616161616162e-06, "loss": 6.201730728149414, "step": 42450 }, { "epoch": 0.12355, "grad_norm": 8.979195594787598, "learning_rate": 2.906363636363637e-06, "loss": 6.287280654907226, "step": 42455 }, { "epoch": 0.1236, "grad_norm": 7.177506446838379, "learning_rate": 2.9061111111111114e-06, "loss": 6.215015029907226, "step": 42460 }, { "epoch": 0.12365, "grad_norm": 6.497125625610352, "learning_rate": 2.9058585858585856e-06, "loss": 6.254578399658203, "step": 42465 }, { "epoch": 0.1237, "grad_norm": 5.76339864730835, "learning_rate": 2.905606060606061e-06, "loss": 6.2757831573486325, "step": 42470 }, { "epoch": 0.12375, "grad_norm": 7.075587749481201, "learning_rate": 2.9053535353535357e-06, "loss": 6.2665351867675785, "step": 42475 }, { "epoch": 0.1238, "grad_norm": 8.224173545837402, "learning_rate": 2.9051010101010104e-06, "loss": 6.257423400878906, "step": 42480 }, { "epoch": 0.12385, "grad_norm": 10.694587707519531, "learning_rate": 2.9048484848484854e-06, "loss": 6.286917495727539, "step": 42485 }, { "epoch": 0.1239, "grad_norm": 4.870787143707275, "learning_rate": 2.90459595959596e-06, "loss": 6.2279212951660154, "step": 42490 }, { "epoch": 0.12395, "grad_norm": 8.29523754119873, "learning_rate": 2.9043434343434347e-06, "loss": 6.261679077148438, "step": 42495 }, { "epoch": 0.124, "grad_norm": 4.587533473968506, "learning_rate": 2.9040909090909093e-06, "loss": 6.295635986328125, "step": 42500 }, { "epoch": 0.12405, "grad_norm": 7.804836273193359, "learning_rate": 2.9038383838383844e-06, "loss": 6.267850112915039, "step": 42505 }, { "epoch": 0.1241, "grad_norm": 12.540488243103027, "learning_rate": 2.903585858585859e-06, "loss": 6.295806884765625, "step": 42510 }, { "epoch": 0.12415, "grad_norm": 27.464706420898438, "learning_rate": 2.9033333333333336e-06, "loss": 5.858723449707031, "step": 42515 }, { "epoch": 0.1242, "grad_norm": 10.51535701751709, "learning_rate": 2.9030808080808083e-06, "loss": 6.186760711669922, "step": 42520 }, { "epoch": 0.12425, "grad_norm": 6.48067569732666, "learning_rate": 2.9028282828282833e-06, "loss": 6.308037567138672, "step": 42525 }, { "epoch": 0.1243, "grad_norm": 8.069540023803711, "learning_rate": 2.902575757575758e-06, "loss": 6.321952056884766, "step": 42530 }, { "epoch": 0.12435, "grad_norm": 3.4910924434661865, "learning_rate": 2.9023232323232326e-06, "loss": 6.2344623565673825, "step": 42535 }, { "epoch": 0.1244, "grad_norm": 7.2858052253723145, "learning_rate": 2.902070707070707e-06, "loss": 6.262478637695312, "step": 42540 }, { "epoch": 0.12445, "grad_norm": 12.337964057922363, "learning_rate": 2.9018181818181823e-06, "loss": 6.310026550292969, "step": 42545 }, { "epoch": 0.1245, "grad_norm": 7.5035552978515625, "learning_rate": 2.901565656565657e-06, "loss": 6.210212707519531, "step": 42550 }, { "epoch": 0.12455, "grad_norm": 5.102397441864014, "learning_rate": 2.9013131313131315e-06, "loss": 6.250746536254883, "step": 42555 }, { "epoch": 0.1246, "grad_norm": 8.889419555664062, "learning_rate": 2.901060606060606e-06, "loss": 6.2247566223144535, "step": 42560 }, { "epoch": 0.12465, "grad_norm": 7.707698345184326, "learning_rate": 2.900808080808081e-06, "loss": 6.2639415740966795, "step": 42565 }, { "epoch": 0.1247, "grad_norm": 11.751856803894043, "learning_rate": 2.900555555555556e-06, "loss": 6.280557632446289, "step": 42570 }, { "epoch": 0.12475, "grad_norm": 7.207385540008545, "learning_rate": 2.9003030303030305e-06, "loss": 6.276654434204102, "step": 42575 }, { "epoch": 0.1248, "grad_norm": 4.261576175689697, "learning_rate": 2.900050505050505e-06, "loss": 6.337315368652344, "step": 42580 }, { "epoch": 0.12485, "grad_norm": 7.735414505004883, "learning_rate": 2.89979797979798e-06, "loss": 6.297902297973633, "step": 42585 }, { "epoch": 0.1249, "grad_norm": 8.187244415283203, "learning_rate": 2.8995454545454548e-06, "loss": 6.270076751708984, "step": 42590 }, { "epoch": 0.12495, "grad_norm": 4.563602447509766, "learning_rate": 2.8992929292929294e-06, "loss": 6.249410247802734, "step": 42595 }, { "epoch": 0.125, "grad_norm": 19.703453063964844, "learning_rate": 2.899040404040404e-06, "loss": 6.5523834228515625, "step": 42600 }, { "epoch": 0.12505, "grad_norm": 7.7130279541015625, "learning_rate": 2.898787878787879e-06, "loss": 6.2471660614013675, "step": 42605 }, { "epoch": 0.1251, "grad_norm": 4.6361188888549805, "learning_rate": 2.8985353535353537e-06, "loss": 6.252828216552734, "step": 42610 }, { "epoch": 0.12515, "grad_norm": 5.752023220062256, "learning_rate": 2.8982828282828283e-06, "loss": 6.233910751342774, "step": 42615 }, { "epoch": 0.1252, "grad_norm": 4.804875373840332, "learning_rate": 2.898030303030303e-06, "loss": 6.267916107177735, "step": 42620 }, { "epoch": 0.12525, "grad_norm": 6.999718189239502, "learning_rate": 2.8977777777777785e-06, "loss": 6.3237968444824215, "step": 42625 }, { "epoch": 0.1253, "grad_norm": 5.003927230834961, "learning_rate": 2.8975252525252527e-06, "loss": 6.356344985961914, "step": 42630 }, { "epoch": 0.12535, "grad_norm": 4.552326202392578, "learning_rate": 2.8972727272727273e-06, "loss": 6.305149841308594, "step": 42635 }, { "epoch": 0.1254, "grad_norm": 8.298811912536621, "learning_rate": 2.897020202020202e-06, "loss": 6.2338310241699215, "step": 42640 }, { "epoch": 0.12545, "grad_norm": 7.080240726470947, "learning_rate": 2.8967676767676774e-06, "loss": 6.331567001342774, "step": 42645 }, { "epoch": 0.1255, "grad_norm": 3.9822092056274414, "learning_rate": 2.896515151515152e-06, "loss": 6.257165908813477, "step": 42650 }, { "epoch": 0.12555, "grad_norm": 7.006120681762695, "learning_rate": 2.8962626262626262e-06, "loss": 6.265024566650391, "step": 42655 }, { "epoch": 0.1256, "grad_norm": 9.603386878967285, "learning_rate": 2.896010101010101e-06, "loss": 6.2429145812988285, "step": 42660 }, { "epoch": 0.12565, "grad_norm": 7.18720006942749, "learning_rate": 2.8957575757575763e-06, "loss": 6.273042297363281, "step": 42665 }, { "epoch": 0.1257, "grad_norm": 6.489053249359131, "learning_rate": 2.895505050505051e-06, "loss": 6.2080322265625, "step": 42670 }, { "epoch": 0.12575, "grad_norm": 7.401553630828857, "learning_rate": 2.8952525252525256e-06, "loss": 6.263488006591797, "step": 42675 }, { "epoch": 0.1258, "grad_norm": 9.740854263305664, "learning_rate": 2.8950000000000002e-06, "loss": 6.315792846679687, "step": 42680 }, { "epoch": 0.12585, "grad_norm": 11.6298828125, "learning_rate": 2.8947474747474753e-06, "loss": 6.2214916229248045, "step": 42685 }, { "epoch": 0.1259, "grad_norm": 5.739536285400391, "learning_rate": 2.89449494949495e-06, "loss": 6.31074104309082, "step": 42690 }, { "epoch": 0.12595, "grad_norm": 5.459855079650879, "learning_rate": 2.8942424242424245e-06, "loss": 6.234065246582031, "step": 42695 }, { "epoch": 0.126, "grad_norm": 6.4340667724609375, "learning_rate": 2.893989898989899e-06, "loss": 6.245785522460937, "step": 42700 }, { "epoch": 0.12605, "grad_norm": 5.002427101135254, "learning_rate": 2.8937373737373742e-06, "loss": 6.272274017333984, "step": 42705 }, { "epoch": 0.1261, "grad_norm": 4.923975944519043, "learning_rate": 2.893484848484849e-06, "loss": 6.262678527832032, "step": 42710 }, { "epoch": 0.12615, "grad_norm": 10.005484580993652, "learning_rate": 2.8932323232323235e-06, "loss": 6.253449249267578, "step": 42715 }, { "epoch": 0.1262, "grad_norm": 6.382939338684082, "learning_rate": 2.892979797979798e-06, "loss": 6.268442153930664, "step": 42720 }, { "epoch": 0.12625, "grad_norm": 15.143803596496582, "learning_rate": 2.892727272727273e-06, "loss": 6.299002075195313, "step": 42725 }, { "epoch": 0.1263, "grad_norm": 6.313310146331787, "learning_rate": 2.892474747474748e-06, "loss": 6.24136962890625, "step": 42730 }, { "epoch": 0.12635, "grad_norm": 8.379565238952637, "learning_rate": 2.8922222222222224e-06, "loss": 6.236381530761719, "step": 42735 }, { "epoch": 0.1264, "grad_norm": 6.219910621643066, "learning_rate": 2.891969696969697e-06, "loss": 6.506138610839844, "step": 42740 }, { "epoch": 0.12645, "grad_norm": 8.371438026428223, "learning_rate": 2.891717171717172e-06, "loss": 6.537351989746094, "step": 42745 }, { "epoch": 0.1265, "grad_norm": 21.726179122924805, "learning_rate": 2.8914646464646467e-06, "loss": 6.369923400878906, "step": 42750 }, { "epoch": 0.12655, "grad_norm": 42.546897888183594, "learning_rate": 2.8912121212121214e-06, "loss": 6.532388305664062, "step": 42755 }, { "epoch": 0.1266, "grad_norm": 7.587975025177002, "learning_rate": 2.890959595959596e-06, "loss": 6.272637939453125, "step": 42760 }, { "epoch": 0.12665, "grad_norm": 10.134566307067871, "learning_rate": 2.890707070707071e-06, "loss": 6.26794319152832, "step": 42765 }, { "epoch": 0.1267, "grad_norm": 8.621458053588867, "learning_rate": 2.8904545454545457e-06, "loss": 6.213787841796875, "step": 42770 }, { "epoch": 0.12675, "grad_norm": 8.493025779724121, "learning_rate": 2.8902020202020203e-06, "loss": 6.256126403808594, "step": 42775 }, { "epoch": 0.1268, "grad_norm": 5.358027935028076, "learning_rate": 2.889949494949495e-06, "loss": 6.300741577148438, "step": 42780 }, { "epoch": 0.12685, "grad_norm": 8.285168647766113, "learning_rate": 2.88969696969697e-06, "loss": 6.281980514526367, "step": 42785 }, { "epoch": 0.1269, "grad_norm": 7.218749523162842, "learning_rate": 2.8894444444444446e-06, "loss": 6.240383148193359, "step": 42790 }, { "epoch": 0.12695, "grad_norm": 8.210326194763184, "learning_rate": 2.8891919191919193e-06, "loss": 6.2898612976074215, "step": 42795 }, { "epoch": 0.127, "grad_norm": 9.779827117919922, "learning_rate": 2.888939393939394e-06, "loss": 6.190750503540039, "step": 42800 }, { "epoch": 0.12705, "grad_norm": 6.643845081329346, "learning_rate": 2.888686868686869e-06, "loss": 6.288530731201172, "step": 42805 }, { "epoch": 0.1271, "grad_norm": 5.463345050811768, "learning_rate": 2.8884343434343436e-06, "loss": 6.249415588378906, "step": 42810 }, { "epoch": 0.12715, "grad_norm": 5.694880485534668, "learning_rate": 2.8881818181818182e-06, "loss": 6.263436126708984, "step": 42815 }, { "epoch": 0.1272, "grad_norm": 8.100613594055176, "learning_rate": 2.887929292929293e-06, "loss": 6.303643417358399, "step": 42820 }, { "epoch": 0.12725, "grad_norm": 5.663472652435303, "learning_rate": 2.887676767676768e-06, "loss": 6.318413925170899, "step": 42825 }, { "epoch": 0.1273, "grad_norm": 5.596512794494629, "learning_rate": 2.8874242424242425e-06, "loss": 6.2732292175292965, "step": 42830 }, { "epoch": 0.12735, "grad_norm": 10.1311674118042, "learning_rate": 2.887171717171717e-06, "loss": 6.269374847412109, "step": 42835 }, { "epoch": 0.1274, "grad_norm": 10.639410018920898, "learning_rate": 2.8869191919191918e-06, "loss": 6.193820571899414, "step": 42840 }, { "epoch": 0.12745, "grad_norm": 8.301587104797363, "learning_rate": 2.8866666666666673e-06, "loss": 6.382625579833984, "step": 42845 }, { "epoch": 0.1275, "grad_norm": 5.8394341468811035, "learning_rate": 2.8864141414141415e-06, "loss": 6.246498870849609, "step": 42850 }, { "epoch": 0.12755, "grad_norm": 5.935181140899658, "learning_rate": 2.886161616161616e-06, "loss": 6.228199768066406, "step": 42855 }, { "epoch": 0.1276, "grad_norm": 8.786735534667969, "learning_rate": 2.8859090909090907e-06, "loss": 6.322859573364258, "step": 42860 }, { "epoch": 0.12765, "grad_norm": 6.6526007652282715, "learning_rate": 2.885656565656566e-06, "loss": 6.2727306365966795, "step": 42865 }, { "epoch": 0.1277, "grad_norm": 9.01892375946045, "learning_rate": 2.885404040404041e-06, "loss": 6.332581329345703, "step": 42870 }, { "epoch": 0.12775, "grad_norm": 6.6025824546813965, "learning_rate": 2.8851515151515155e-06, "loss": 6.285790252685547, "step": 42875 }, { "epoch": 0.1278, "grad_norm": 7.522231578826904, "learning_rate": 2.8848989898989897e-06, "loss": 6.2671455383300785, "step": 42880 }, { "epoch": 0.12785, "grad_norm": 8.550835609436035, "learning_rate": 2.884646464646465e-06, "loss": 6.359270095825195, "step": 42885 }, { "epoch": 0.1279, "grad_norm": 4.5121002197265625, "learning_rate": 2.8843939393939398e-06, "loss": 6.278555297851563, "step": 42890 }, { "epoch": 0.12795, "grad_norm": 6.215314865112305, "learning_rate": 2.8841414141414144e-06, "loss": 6.223762893676758, "step": 42895 }, { "epoch": 0.128, "grad_norm": 7.668094635009766, "learning_rate": 2.883888888888889e-06, "loss": 6.262394714355469, "step": 42900 }, { "epoch": 0.12805, "grad_norm": 5.219274044036865, "learning_rate": 2.883636363636364e-06, "loss": 6.253418731689453, "step": 42905 }, { "epoch": 0.1281, "grad_norm": 5.327474117279053, "learning_rate": 2.8833838383838387e-06, "loss": 6.253081512451172, "step": 42910 }, { "epoch": 0.12815, "grad_norm": 8.691856384277344, "learning_rate": 2.8831313131313134e-06, "loss": 6.240176010131836, "step": 42915 }, { "epoch": 0.1282, "grad_norm": 4.250524997711182, "learning_rate": 2.8828787878787884e-06, "loss": 6.312602233886719, "step": 42920 }, { "epoch": 0.12825, "grad_norm": 7.032783508300781, "learning_rate": 2.882626262626263e-06, "loss": 6.265884780883789, "step": 42925 }, { "epoch": 0.1283, "grad_norm": 13.736678123474121, "learning_rate": 2.8823737373737377e-06, "loss": 6.335506439208984, "step": 42930 }, { "epoch": 0.12835, "grad_norm": 4.994131088256836, "learning_rate": 2.8821212121212123e-06, "loss": 6.273589706420898, "step": 42935 }, { "epoch": 0.1284, "grad_norm": 5.487616539001465, "learning_rate": 2.8818686868686874e-06, "loss": 6.25526008605957, "step": 42940 }, { "epoch": 0.12845, "grad_norm": 6.868844985961914, "learning_rate": 2.881616161616162e-06, "loss": 6.239013671875, "step": 42945 }, { "epoch": 0.1285, "grad_norm": 6.135003566741943, "learning_rate": 2.8813636363636366e-06, "loss": 6.269113540649414, "step": 42950 }, { "epoch": 0.12855, "grad_norm": 6.789272308349609, "learning_rate": 2.8811111111111112e-06, "loss": 6.209231948852539, "step": 42955 }, { "epoch": 0.1286, "grad_norm": 15.948736190795898, "learning_rate": 2.8808585858585863e-06, "loss": 6.459947967529297, "step": 42960 }, { "epoch": 0.12865, "grad_norm": 8.216059684753418, "learning_rate": 2.880606060606061e-06, "loss": 6.324349594116211, "step": 42965 }, { "epoch": 0.1287, "grad_norm": 4.411581993103027, "learning_rate": 2.8803535353535356e-06, "loss": 6.23919677734375, "step": 42970 }, { "epoch": 0.12875, "grad_norm": 3.372684955596924, "learning_rate": 2.88010101010101e-06, "loss": 6.270579147338867, "step": 42975 }, { "epoch": 0.1288, "grad_norm": 26.52436065673828, "learning_rate": 2.8798484848484852e-06, "loss": 6.187003707885742, "step": 42980 }, { "epoch": 0.12885, "grad_norm": 7.956799030303955, "learning_rate": 2.87959595959596e-06, "loss": 6.291221618652344, "step": 42985 }, { "epoch": 0.1289, "grad_norm": 26.662446975708008, "learning_rate": 2.8793434343434345e-06, "loss": 6.454537963867187, "step": 42990 }, { "epoch": 0.12895, "grad_norm": 5.597362518310547, "learning_rate": 2.879090909090909e-06, "loss": 6.2570140838623045, "step": 42995 }, { "epoch": 0.129, "grad_norm": 8.650330543518066, "learning_rate": 2.878838383838384e-06, "loss": 6.311814880371093, "step": 43000 }, { "epoch": 0.12905, "grad_norm": 5.824348449707031, "learning_rate": 2.878585858585859e-06, "loss": 6.2576416015625, "step": 43005 }, { "epoch": 0.1291, "grad_norm": 18.598297119140625, "learning_rate": 2.8783333333333334e-06, "loss": 6.293525695800781, "step": 43010 }, { "epoch": 0.12915, "grad_norm": 17.007883071899414, "learning_rate": 2.878080808080808e-06, "loss": 6.3335517883300785, "step": 43015 }, { "epoch": 0.1292, "grad_norm": 12.123918533325195, "learning_rate": 2.877828282828283e-06, "loss": 6.426605224609375, "step": 43020 }, { "epoch": 0.12925, "grad_norm": 6.5796637535095215, "learning_rate": 2.8775757575757578e-06, "loss": 6.3082275390625, "step": 43025 }, { "epoch": 0.1293, "grad_norm": 8.567082405090332, "learning_rate": 2.8773232323232324e-06, "loss": 6.284790802001953, "step": 43030 }, { "epoch": 0.12935, "grad_norm": 9.459152221679688, "learning_rate": 2.877070707070707e-06, "loss": 6.284445953369141, "step": 43035 }, { "epoch": 0.1294, "grad_norm": 6.213819980621338, "learning_rate": 2.8768181818181825e-06, "loss": 6.214144515991211, "step": 43040 }, { "epoch": 0.12945, "grad_norm": 6.97295618057251, "learning_rate": 2.8765656565656567e-06, "loss": 6.279450225830078, "step": 43045 }, { "epoch": 0.1295, "grad_norm": 22.693767547607422, "learning_rate": 2.8763131313131313e-06, "loss": 6.290313720703125, "step": 43050 }, { "epoch": 0.12955, "grad_norm": 21.894380569458008, "learning_rate": 2.876060606060606e-06, "loss": 6.350310516357422, "step": 43055 }, { "epoch": 0.1296, "grad_norm": 4.333471298217773, "learning_rate": 2.8758080808080814e-06, "loss": 6.267122650146485, "step": 43060 }, { "epoch": 0.12965, "grad_norm": 6.87057638168335, "learning_rate": 2.875555555555556e-06, "loss": 6.237315368652344, "step": 43065 }, { "epoch": 0.1297, "grad_norm": 5.2339372634887695, "learning_rate": 2.8753030303030303e-06, "loss": 6.284799194335937, "step": 43070 }, { "epoch": 0.12975, "grad_norm": 11.755865097045898, "learning_rate": 2.875050505050505e-06, "loss": 6.294019317626953, "step": 43075 }, { "epoch": 0.1298, "grad_norm": 5.884405612945557, "learning_rate": 2.8747979797979804e-06, "loss": 6.279730224609375, "step": 43080 }, { "epoch": 0.12985, "grad_norm": 14.990090370178223, "learning_rate": 2.874545454545455e-06, "loss": 6.4034782409667965, "step": 43085 }, { "epoch": 0.1299, "grad_norm": 4.535803318023682, "learning_rate": 2.8742929292929296e-06, "loss": 6.332181549072265, "step": 43090 }, { "epoch": 0.12995, "grad_norm": 6.985929489135742, "learning_rate": 2.8740404040404043e-06, "loss": 6.296395492553711, "step": 43095 }, { "epoch": 0.13, "grad_norm": 8.484833717346191, "learning_rate": 2.8737878787878793e-06, "loss": 6.28063735961914, "step": 43100 }, { "epoch": 0.13005, "grad_norm": 13.143287658691406, "learning_rate": 2.873535353535354e-06, "loss": 6.4234169006347654, "step": 43105 }, { "epoch": 0.1301, "grad_norm": 5.3821001052856445, "learning_rate": 2.8732828282828286e-06, "loss": 6.255021667480468, "step": 43110 }, { "epoch": 0.13015, "grad_norm": 5.540125846862793, "learning_rate": 2.8730303030303032e-06, "loss": 6.227041625976563, "step": 43115 }, { "epoch": 0.1302, "grad_norm": 4.4096479415893555, "learning_rate": 2.8727777777777783e-06, "loss": 6.255789566040039, "step": 43120 }, { "epoch": 0.13025, "grad_norm": 9.611748695373535, "learning_rate": 2.872525252525253e-06, "loss": 6.259996032714843, "step": 43125 }, { "epoch": 0.1303, "grad_norm": 15.381868362426758, "learning_rate": 2.8722727272727275e-06, "loss": 6.343684387207031, "step": 43130 }, { "epoch": 0.13035, "grad_norm": 10.241069793701172, "learning_rate": 2.872020202020202e-06, "loss": 6.306514739990234, "step": 43135 }, { "epoch": 0.1304, "grad_norm": 7.329665184020996, "learning_rate": 2.8717676767676772e-06, "loss": 6.296849060058594, "step": 43140 }, { "epoch": 0.13045, "grad_norm": 8.880321502685547, "learning_rate": 2.871515151515152e-06, "loss": 6.244744110107422, "step": 43145 }, { "epoch": 0.1305, "grad_norm": 5.904425144195557, "learning_rate": 2.8712626262626265e-06, "loss": 6.27349967956543, "step": 43150 }, { "epoch": 0.13055, "grad_norm": 8.924327850341797, "learning_rate": 2.871010101010101e-06, "loss": 6.236366271972656, "step": 43155 }, { "epoch": 0.1306, "grad_norm": 7.5010480880737305, "learning_rate": 2.870757575757576e-06, "loss": 6.282142257690429, "step": 43160 }, { "epoch": 0.13065, "grad_norm": 5.98451566696167, "learning_rate": 2.870505050505051e-06, "loss": 6.302898025512695, "step": 43165 }, { "epoch": 0.1307, "grad_norm": 5.7592034339904785, "learning_rate": 2.8702525252525254e-06, "loss": 6.218171691894531, "step": 43170 }, { "epoch": 0.13075, "grad_norm": 5.450937271118164, "learning_rate": 2.87e-06, "loss": 6.267374801635742, "step": 43175 }, { "epoch": 0.1308, "grad_norm": 6.0445556640625, "learning_rate": 2.869747474747475e-06, "loss": 6.275907516479492, "step": 43180 }, { "epoch": 0.13085, "grad_norm": 4.397013187408447, "learning_rate": 2.8694949494949497e-06, "loss": 6.2501873016357425, "step": 43185 }, { "epoch": 0.1309, "grad_norm": 6.410189151763916, "learning_rate": 2.8692424242424244e-06, "loss": 6.294324493408203, "step": 43190 }, { "epoch": 0.13095, "grad_norm": 4.815674781799316, "learning_rate": 2.868989898989899e-06, "loss": 6.217630386352539, "step": 43195 }, { "epoch": 0.131, "grad_norm": 6.552621841430664, "learning_rate": 2.868737373737374e-06, "loss": 6.277267837524414, "step": 43200 }, { "epoch": 0.13105, "grad_norm": 5.291956424713135, "learning_rate": 2.8684848484848487e-06, "loss": 6.259783935546875, "step": 43205 }, { "epoch": 0.1311, "grad_norm": 7.579299449920654, "learning_rate": 2.8682323232323233e-06, "loss": 6.256838989257813, "step": 43210 }, { "epoch": 0.13115, "grad_norm": 10.229508399963379, "learning_rate": 2.867979797979798e-06, "loss": 6.2678169250488285, "step": 43215 }, { "epoch": 0.1312, "grad_norm": 6.355286598205566, "learning_rate": 2.867727272727273e-06, "loss": 6.3556571960449215, "step": 43220 }, { "epoch": 0.13125, "grad_norm": 8.957408905029297, "learning_rate": 2.8674747474747476e-06, "loss": 6.273205947875977, "step": 43225 }, { "epoch": 0.1313, "grad_norm": 6.60697078704834, "learning_rate": 2.8672222222222223e-06, "loss": 6.271319961547851, "step": 43230 }, { "epoch": 0.13135, "grad_norm": 6.736981391906738, "learning_rate": 2.866969696969697e-06, "loss": 6.691302490234375, "step": 43235 }, { "epoch": 0.1314, "grad_norm": 9.466843605041504, "learning_rate": 2.866717171717172e-06, "loss": 6.486256408691406, "step": 43240 }, { "epoch": 0.13145, "grad_norm": 8.836398124694824, "learning_rate": 2.8664646464646466e-06, "loss": 6.220640563964844, "step": 43245 }, { "epoch": 0.1315, "grad_norm": 6.768199920654297, "learning_rate": 2.866212121212121e-06, "loss": 6.382104873657227, "step": 43250 }, { "epoch": 0.13155, "grad_norm": 8.878084182739258, "learning_rate": 2.865959595959596e-06, "loss": 6.279502868652344, "step": 43255 }, { "epoch": 0.1316, "grad_norm": 4.339023590087891, "learning_rate": 2.8657070707070713e-06, "loss": 6.294828414916992, "step": 43260 }, { "epoch": 0.13165, "grad_norm": 7.104442119598389, "learning_rate": 2.8654545454545455e-06, "loss": 6.244049453735352, "step": 43265 }, { "epoch": 0.1317, "grad_norm": 7.406525135040283, "learning_rate": 2.86520202020202e-06, "loss": 6.252048492431641, "step": 43270 }, { "epoch": 0.13175, "grad_norm": 25.285282135009766, "learning_rate": 2.8649494949494948e-06, "loss": 6.229248046875, "step": 43275 }, { "epoch": 0.1318, "grad_norm": 7.923766613006592, "learning_rate": 2.8646969696969702e-06, "loss": 6.269764709472656, "step": 43280 }, { "epoch": 0.13185, "grad_norm": 4.315817832946777, "learning_rate": 2.864444444444445e-06, "loss": 6.3016307830810545, "step": 43285 }, { "epoch": 0.1319, "grad_norm": 10.706947326660156, "learning_rate": 2.8641919191919195e-06, "loss": 6.286434173583984, "step": 43290 }, { "epoch": 0.13195, "grad_norm": 5.020844459533691, "learning_rate": 2.8639393939393937e-06, "loss": 6.243112564086914, "step": 43295 }, { "epoch": 0.132, "grad_norm": 18.568330764770508, "learning_rate": 2.863686868686869e-06, "loss": 6.373126220703125, "step": 43300 }, { "epoch": 0.13205, "grad_norm": 6.964751243591309, "learning_rate": 2.863434343434344e-06, "loss": 6.307568359375, "step": 43305 }, { "epoch": 0.1321, "grad_norm": 7.609381675720215, "learning_rate": 2.8631818181818185e-06, "loss": 6.238526916503906, "step": 43310 }, { "epoch": 0.13215, "grad_norm": 5.103451251983643, "learning_rate": 2.862929292929293e-06, "loss": 6.257978057861328, "step": 43315 }, { "epoch": 0.1322, "grad_norm": 5.043303966522217, "learning_rate": 2.862676767676768e-06, "loss": 6.2865032196044925, "step": 43320 }, { "epoch": 0.13225, "grad_norm": 7.471829891204834, "learning_rate": 2.8624242424242428e-06, "loss": 6.24681282043457, "step": 43325 }, { "epoch": 0.1323, "grad_norm": 5.769717216491699, "learning_rate": 2.8621717171717174e-06, "loss": 6.235546112060547, "step": 43330 }, { "epoch": 0.13235, "grad_norm": 4.684678077697754, "learning_rate": 2.8619191919191924e-06, "loss": 6.257871627807617, "step": 43335 }, { "epoch": 0.1324, "grad_norm": 7.821010589599609, "learning_rate": 2.861666666666667e-06, "loss": 6.280548095703125, "step": 43340 }, { "epoch": 0.13245, "grad_norm": 6.8559184074401855, "learning_rate": 2.8614141414141417e-06, "loss": 6.36663818359375, "step": 43345 }, { "epoch": 0.1325, "grad_norm": 4.6742682456970215, "learning_rate": 2.8611616161616163e-06, "loss": 6.276481628417969, "step": 43350 }, { "epoch": 0.13255, "grad_norm": 6.713963031768799, "learning_rate": 2.8609090909090914e-06, "loss": 6.242586517333985, "step": 43355 }, { "epoch": 0.1326, "grad_norm": 11.108776092529297, "learning_rate": 2.860656565656566e-06, "loss": 6.253023147583008, "step": 43360 }, { "epoch": 0.13265, "grad_norm": 4.8930840492248535, "learning_rate": 2.8604040404040407e-06, "loss": 6.236242294311523, "step": 43365 }, { "epoch": 0.1327, "grad_norm": 5.486450672149658, "learning_rate": 2.8601515151515153e-06, "loss": 6.283148574829101, "step": 43370 }, { "epoch": 0.13275, "grad_norm": 16.538646697998047, "learning_rate": 2.8598989898989903e-06, "loss": 6.187714385986328, "step": 43375 }, { "epoch": 0.1328, "grad_norm": 7.170218467712402, "learning_rate": 2.859646464646465e-06, "loss": 6.289879989624024, "step": 43380 }, { "epoch": 0.13285, "grad_norm": 9.803731918334961, "learning_rate": 2.8593939393939396e-06, "loss": 6.2450439453125, "step": 43385 }, { "epoch": 0.1329, "grad_norm": 5.954226970672607, "learning_rate": 2.8591414141414142e-06, "loss": 6.2174324035644535, "step": 43390 }, { "epoch": 0.13295, "grad_norm": 5.374699592590332, "learning_rate": 2.8588888888888893e-06, "loss": 6.26104507446289, "step": 43395 }, { "epoch": 0.133, "grad_norm": 7.809597969055176, "learning_rate": 2.858636363636364e-06, "loss": 6.281192779541016, "step": 43400 }, { "epoch": 0.13305, "grad_norm": 4.261511325836182, "learning_rate": 2.8583838383838385e-06, "loss": 6.2378791809082035, "step": 43405 }, { "epoch": 0.1331, "grad_norm": 6.513619899749756, "learning_rate": 2.858131313131313e-06, "loss": 6.260833740234375, "step": 43410 }, { "epoch": 0.13315, "grad_norm": 9.710037231445312, "learning_rate": 2.8578787878787882e-06, "loss": 6.354568862915039, "step": 43415 }, { "epoch": 0.1332, "grad_norm": 4.6178388595581055, "learning_rate": 2.857626262626263e-06, "loss": 6.261651611328125, "step": 43420 }, { "epoch": 0.13325, "grad_norm": 7.572020053863525, "learning_rate": 2.8573737373737375e-06, "loss": 6.233094024658203, "step": 43425 }, { "epoch": 0.1333, "grad_norm": 7.258317470550537, "learning_rate": 2.857121212121212e-06, "loss": 6.2591194152832035, "step": 43430 }, { "epoch": 0.13335, "grad_norm": 5.405412197113037, "learning_rate": 2.856868686868687e-06, "loss": 6.258555603027344, "step": 43435 }, { "epoch": 0.1334, "grad_norm": 7.302793979644775, "learning_rate": 2.856616161616162e-06, "loss": 6.341430282592773, "step": 43440 }, { "epoch": 0.13345, "grad_norm": 5.300051689147949, "learning_rate": 2.8563636363636364e-06, "loss": 6.321907043457031, "step": 43445 }, { "epoch": 0.1335, "grad_norm": 6.299030303955078, "learning_rate": 2.856111111111111e-06, "loss": 6.242219161987305, "step": 43450 }, { "epoch": 0.13355, "grad_norm": 7.227540969848633, "learning_rate": 2.8558585858585865e-06, "loss": 6.280513000488281, "step": 43455 }, { "epoch": 0.1336, "grad_norm": 4.16887903213501, "learning_rate": 2.8556060606060607e-06, "loss": 6.279356384277344, "step": 43460 }, { "epoch": 0.13365, "grad_norm": 6.142021179199219, "learning_rate": 2.8553535353535354e-06, "loss": 6.338402557373047, "step": 43465 }, { "epoch": 0.1337, "grad_norm": 34.889923095703125, "learning_rate": 2.85510101010101e-06, "loss": 6.316434478759765, "step": 43470 }, { "epoch": 0.13375, "grad_norm": 14.073229789733887, "learning_rate": 2.8548484848484855e-06, "loss": 6.268582153320312, "step": 43475 }, { "epoch": 0.1338, "grad_norm": 3.796022891998291, "learning_rate": 2.85459595959596e-06, "loss": 6.248382568359375, "step": 43480 }, { "epoch": 0.13385, "grad_norm": 4.540653228759766, "learning_rate": 2.8543434343434343e-06, "loss": 6.225812149047852, "step": 43485 }, { "epoch": 0.1339, "grad_norm": 5.1803412437438965, "learning_rate": 2.854090909090909e-06, "loss": 6.137946319580078, "step": 43490 }, { "epoch": 0.13395, "grad_norm": 4.075172424316406, "learning_rate": 2.8538383838383844e-06, "loss": 6.398261260986328, "step": 43495 }, { "epoch": 0.134, "grad_norm": 5.732426166534424, "learning_rate": 2.853585858585859e-06, "loss": 6.265174102783203, "step": 43500 }, { "epoch": 0.13405, "grad_norm": 8.545089721679688, "learning_rate": 2.8533333333333337e-06, "loss": 6.217965698242187, "step": 43505 }, { "epoch": 0.1341, "grad_norm": 4.967952251434326, "learning_rate": 2.8530808080808083e-06, "loss": 6.259419631958008, "step": 43510 }, { "epoch": 0.13415, "grad_norm": 8.858756065368652, "learning_rate": 2.8528282828282834e-06, "loss": 6.30622673034668, "step": 43515 }, { "epoch": 0.1342, "grad_norm": 5.176233291625977, "learning_rate": 2.852575757575758e-06, "loss": 6.219108581542969, "step": 43520 }, { "epoch": 0.13425, "grad_norm": 26.216751098632812, "learning_rate": 2.8523232323232326e-06, "loss": 6.069157409667969, "step": 43525 }, { "epoch": 0.1343, "grad_norm": 27.284883499145508, "learning_rate": 2.8520707070707073e-06, "loss": 5.469076538085938, "step": 43530 }, { "epoch": 0.13435, "grad_norm": 4.899468898773193, "learning_rate": 2.8518181818181823e-06, "loss": 6.1373443603515625, "step": 43535 }, { "epoch": 0.1344, "grad_norm": 8.36385440826416, "learning_rate": 2.851565656565657e-06, "loss": 6.24847412109375, "step": 43540 }, { "epoch": 0.13445, "grad_norm": 6.592554569244385, "learning_rate": 2.8513131313131316e-06, "loss": 6.235096740722656, "step": 43545 }, { "epoch": 0.1345, "grad_norm": 6.8928446769714355, "learning_rate": 2.851060606060606e-06, "loss": 6.266709518432617, "step": 43550 }, { "epoch": 0.13455, "grad_norm": 6.699801445007324, "learning_rate": 2.8508080808080813e-06, "loss": 6.322134399414063, "step": 43555 }, { "epoch": 0.1346, "grad_norm": 22.859577178955078, "learning_rate": 2.850555555555556e-06, "loss": 6.396205139160156, "step": 43560 }, { "epoch": 0.13465, "grad_norm": 6.703649520874023, "learning_rate": 2.8503030303030305e-06, "loss": 6.329948043823242, "step": 43565 }, { "epoch": 0.1347, "grad_norm": 7.4208984375, "learning_rate": 2.850050505050505e-06, "loss": 6.243439865112305, "step": 43570 }, { "epoch": 0.13475, "grad_norm": 7.553806781768799, "learning_rate": 2.84979797979798e-06, "loss": 6.189703369140625, "step": 43575 }, { "epoch": 0.1348, "grad_norm": 6.758103847503662, "learning_rate": 2.849545454545455e-06, "loss": 6.276029968261719, "step": 43580 }, { "epoch": 0.13485, "grad_norm": 2.949922561645508, "learning_rate": 2.8492929292929295e-06, "loss": 6.329407501220703, "step": 43585 }, { "epoch": 0.1349, "grad_norm": 5.194447040557861, "learning_rate": 2.849040404040404e-06, "loss": 6.270747375488281, "step": 43590 }, { "epoch": 0.13495, "grad_norm": 6.993348121643066, "learning_rate": 2.848787878787879e-06, "loss": 6.280782318115234, "step": 43595 }, { "epoch": 0.135, "grad_norm": 3.5155789852142334, "learning_rate": 2.8485353535353538e-06, "loss": 6.278750228881836, "step": 43600 }, { "epoch": 0.13505, "grad_norm": 5.948009490966797, "learning_rate": 2.8482828282828284e-06, "loss": 6.289763641357422, "step": 43605 }, { "epoch": 0.1351, "grad_norm": 15.022008895874023, "learning_rate": 2.848030303030303e-06, "loss": 6.240082550048828, "step": 43610 }, { "epoch": 0.13515, "grad_norm": 4.923147678375244, "learning_rate": 2.847777777777778e-06, "loss": 6.238047790527344, "step": 43615 }, { "epoch": 0.1352, "grad_norm": 6.05237340927124, "learning_rate": 2.8475252525252527e-06, "loss": 6.246885299682617, "step": 43620 }, { "epoch": 0.13525, "grad_norm": 5.735517501831055, "learning_rate": 2.8472727272727273e-06, "loss": 6.262055969238281, "step": 43625 }, { "epoch": 0.1353, "grad_norm": 8.023652076721191, "learning_rate": 2.847020202020202e-06, "loss": 6.247505569458008, "step": 43630 }, { "epoch": 0.13535, "grad_norm": 8.489407539367676, "learning_rate": 2.846767676767677e-06, "loss": 6.281596374511719, "step": 43635 }, { "epoch": 0.1354, "grad_norm": 6.829041957855225, "learning_rate": 2.8465151515151517e-06, "loss": 6.219001388549804, "step": 43640 }, { "epoch": 0.13545, "grad_norm": 7.7343831062316895, "learning_rate": 2.8462626262626263e-06, "loss": 6.241037368774414, "step": 43645 }, { "epoch": 0.1355, "grad_norm": 4.630633354187012, "learning_rate": 2.846010101010101e-06, "loss": 6.275534439086914, "step": 43650 }, { "epoch": 0.13555, "grad_norm": 15.093324661254883, "learning_rate": 2.845757575757576e-06, "loss": 6.3310546875, "step": 43655 }, { "epoch": 0.1356, "grad_norm": 4.71070671081543, "learning_rate": 2.8455050505050506e-06, "loss": 6.337930297851562, "step": 43660 }, { "epoch": 0.13565, "grad_norm": 5.804305553436279, "learning_rate": 2.8452525252525252e-06, "loss": 6.280965042114258, "step": 43665 }, { "epoch": 0.1357, "grad_norm": 19.624473571777344, "learning_rate": 2.845e-06, "loss": 6.429391479492187, "step": 43670 }, { "epoch": 0.13575, "grad_norm": 6.171222686767578, "learning_rate": 2.8447474747474753e-06, "loss": 6.239797210693359, "step": 43675 }, { "epoch": 0.1358, "grad_norm": 8.432456016540527, "learning_rate": 2.8444949494949495e-06, "loss": 6.260105895996094, "step": 43680 }, { "epoch": 0.13585, "grad_norm": 10.232135772705078, "learning_rate": 2.844242424242424e-06, "loss": 6.292079162597656, "step": 43685 }, { "epoch": 0.1359, "grad_norm": 5.744802474975586, "learning_rate": 2.843989898989899e-06, "loss": 6.239906311035156, "step": 43690 }, { "epoch": 0.13595, "grad_norm": 5.23682165145874, "learning_rate": 2.8437373737373743e-06, "loss": 6.2383781433105465, "step": 43695 }, { "epoch": 0.136, "grad_norm": 5.2025675773620605, "learning_rate": 2.843484848484849e-06, "loss": 6.264087295532226, "step": 43700 }, { "epoch": 0.13605, "grad_norm": 6.131991863250732, "learning_rate": 2.8432323232323235e-06, "loss": 6.255496978759766, "step": 43705 }, { "epoch": 0.1361, "grad_norm": 3.643937110900879, "learning_rate": 2.8429797979797978e-06, "loss": 6.269324874877929, "step": 43710 }, { "epoch": 0.13615, "grad_norm": 8.073681831359863, "learning_rate": 2.8427272727272732e-06, "loss": 6.278940200805664, "step": 43715 }, { "epoch": 0.1362, "grad_norm": 5.550046443939209, "learning_rate": 2.842474747474748e-06, "loss": 6.26325798034668, "step": 43720 }, { "epoch": 0.13625, "grad_norm": 6.735872745513916, "learning_rate": 2.8422222222222225e-06, "loss": 6.236701965332031, "step": 43725 }, { "epoch": 0.1363, "grad_norm": 5.902424335479736, "learning_rate": 2.841969696969697e-06, "loss": 6.1923267364501955, "step": 43730 }, { "epoch": 0.13635, "grad_norm": 6.914799213409424, "learning_rate": 2.841717171717172e-06, "loss": 6.261261749267578, "step": 43735 }, { "epoch": 0.1364, "grad_norm": 4.581704139709473, "learning_rate": 2.841464646464647e-06, "loss": 6.228335571289063, "step": 43740 }, { "epoch": 0.13645, "grad_norm": 5.931632041931152, "learning_rate": 2.8412121212121214e-06, "loss": 6.298046112060547, "step": 43745 }, { "epoch": 0.1365, "grad_norm": 8.191365242004395, "learning_rate": 2.840959595959596e-06, "loss": 6.225928497314453, "step": 43750 }, { "epoch": 0.13655, "grad_norm": 22.362194061279297, "learning_rate": 2.840707070707071e-06, "loss": 6.3614757537841795, "step": 43755 }, { "epoch": 0.1366, "grad_norm": 6.169240951538086, "learning_rate": 2.8404545454545457e-06, "loss": 6.334828948974609, "step": 43760 }, { "epoch": 0.13665, "grad_norm": 6.999136924743652, "learning_rate": 2.8402020202020204e-06, "loss": 6.2414695739746096, "step": 43765 }, { "epoch": 0.1367, "grad_norm": 4.43180513381958, "learning_rate": 2.8399494949494954e-06, "loss": 6.292564010620117, "step": 43770 }, { "epoch": 0.13675, "grad_norm": 6.282449722290039, "learning_rate": 2.83969696969697e-06, "loss": 6.222959899902344, "step": 43775 }, { "epoch": 0.1368, "grad_norm": 8.183842658996582, "learning_rate": 2.8394444444444447e-06, "loss": 6.283376693725586, "step": 43780 }, { "epoch": 0.13685, "grad_norm": 8.588669776916504, "learning_rate": 2.8391919191919193e-06, "loss": 6.217567062377929, "step": 43785 }, { "epoch": 0.1369, "grad_norm": 5.36295747756958, "learning_rate": 2.8389393939393944e-06, "loss": 6.2753547668457035, "step": 43790 }, { "epoch": 0.13695, "grad_norm": 29.703964233398438, "learning_rate": 2.838686868686869e-06, "loss": 6.470872497558593, "step": 43795 }, { "epoch": 0.137, "grad_norm": 12.096829414367676, "learning_rate": 2.8384343434343436e-06, "loss": 6.343801879882813, "step": 43800 }, { "epoch": 0.13705, "grad_norm": 5.230457782745361, "learning_rate": 2.8381818181818183e-06, "loss": 6.26167106628418, "step": 43805 }, { "epoch": 0.1371, "grad_norm": 27.020553588867188, "learning_rate": 2.8379292929292933e-06, "loss": 6.407839965820313, "step": 43810 }, { "epoch": 0.13715, "grad_norm": 3.595855474472046, "learning_rate": 2.837676767676768e-06, "loss": 6.400599670410156, "step": 43815 }, { "epoch": 0.1372, "grad_norm": 6.759857177734375, "learning_rate": 2.8374242424242426e-06, "loss": 6.254929733276367, "step": 43820 }, { "epoch": 0.13725, "grad_norm": 7.911201477050781, "learning_rate": 2.837171717171717e-06, "loss": 6.267362213134765, "step": 43825 }, { "epoch": 0.1373, "grad_norm": 6.64960241317749, "learning_rate": 2.8369191919191923e-06, "loss": 6.244205474853516, "step": 43830 }, { "epoch": 0.13735, "grad_norm": 5.265005588531494, "learning_rate": 2.836666666666667e-06, "loss": 6.278976821899414, "step": 43835 }, { "epoch": 0.1374, "grad_norm": 7.2589521408081055, "learning_rate": 2.8364141414141415e-06, "loss": 6.274136352539062, "step": 43840 }, { "epoch": 0.13745, "grad_norm": 9.822064399719238, "learning_rate": 2.836161616161616e-06, "loss": 6.264536285400391, "step": 43845 }, { "epoch": 0.1375, "grad_norm": 7.016148567199707, "learning_rate": 2.835909090909091e-06, "loss": 6.244120788574219, "step": 43850 }, { "epoch": 0.13755, "grad_norm": 6.746035575866699, "learning_rate": 2.835656565656566e-06, "loss": 6.430908203125, "step": 43855 }, { "epoch": 0.1376, "grad_norm": 5.664775848388672, "learning_rate": 2.8354040404040405e-06, "loss": 6.287171936035156, "step": 43860 }, { "epoch": 0.13765, "grad_norm": 3.7893972396850586, "learning_rate": 2.835151515151515e-06, "loss": 6.228302001953125, "step": 43865 }, { "epoch": 0.1377, "grad_norm": 11.137739181518555, "learning_rate": 2.8348989898989906e-06, "loss": 6.367694473266601, "step": 43870 }, { "epoch": 0.13775, "grad_norm": 7.342423439025879, "learning_rate": 2.8346464646464648e-06, "loss": 6.273491287231446, "step": 43875 }, { "epoch": 0.1378, "grad_norm": 5.733502388000488, "learning_rate": 2.8343939393939394e-06, "loss": 6.263868713378907, "step": 43880 }, { "epoch": 0.13785, "grad_norm": 4.293156147003174, "learning_rate": 2.834141414141414e-06, "loss": 6.2460487365722654, "step": 43885 }, { "epoch": 0.1379, "grad_norm": 7.589475631713867, "learning_rate": 2.8338888888888895e-06, "loss": 6.3057201385498045, "step": 43890 }, { "epoch": 0.13795, "grad_norm": 7.665335655212402, "learning_rate": 2.833636363636364e-06, "loss": 6.276919555664063, "step": 43895 }, { "epoch": 0.138, "grad_norm": 5.666153907775879, "learning_rate": 2.8333838383838388e-06, "loss": 6.266910552978516, "step": 43900 }, { "epoch": 0.13805, "grad_norm": 4.753661632537842, "learning_rate": 2.833131313131313e-06, "loss": 6.262953567504883, "step": 43905 }, { "epoch": 0.1381, "grad_norm": 5.5510029792785645, "learning_rate": 2.8328787878787885e-06, "loss": 6.2601982116699215, "step": 43910 }, { "epoch": 0.13815, "grad_norm": 7.07350492477417, "learning_rate": 2.832626262626263e-06, "loss": 6.294867706298828, "step": 43915 }, { "epoch": 0.1382, "grad_norm": 6.252420902252197, "learning_rate": 2.8323737373737377e-06, "loss": 6.268021774291992, "step": 43920 }, { "epoch": 0.13825, "grad_norm": 15.275386810302734, "learning_rate": 2.8321212121212124e-06, "loss": 6.308098602294922, "step": 43925 }, { "epoch": 0.1383, "grad_norm": 8.324472427368164, "learning_rate": 2.8318686868686874e-06, "loss": 6.219491577148437, "step": 43930 }, { "epoch": 0.13835, "grad_norm": 7.230158805847168, "learning_rate": 2.831616161616162e-06, "loss": 6.275179290771485, "step": 43935 }, { "epoch": 0.1384, "grad_norm": 10.057366371154785, "learning_rate": 2.8313636363636367e-06, "loss": 6.395474243164062, "step": 43940 }, { "epoch": 0.13845, "grad_norm": 9.991921424865723, "learning_rate": 2.8311111111111113e-06, "loss": 6.27286376953125, "step": 43945 }, { "epoch": 0.1385, "grad_norm": 9.384007453918457, "learning_rate": 2.8308585858585864e-06, "loss": 6.326846694946289, "step": 43950 }, { "epoch": 0.13855, "grad_norm": 4.692612648010254, "learning_rate": 2.830606060606061e-06, "loss": 6.251646423339844, "step": 43955 }, { "epoch": 0.1386, "grad_norm": 5.658445358276367, "learning_rate": 2.8303535353535356e-06, "loss": 6.274729919433594, "step": 43960 }, { "epoch": 0.13865, "grad_norm": 7.3480963706970215, "learning_rate": 2.8301010101010102e-06, "loss": 6.260699081420898, "step": 43965 }, { "epoch": 0.1387, "grad_norm": 3.87876558303833, "learning_rate": 2.8298484848484853e-06, "loss": 6.279816436767578, "step": 43970 }, { "epoch": 0.13875, "grad_norm": 9.42414665222168, "learning_rate": 2.82959595959596e-06, "loss": 6.250183486938477, "step": 43975 }, { "epoch": 0.1388, "grad_norm": 5.117704391479492, "learning_rate": 2.8293434343434346e-06, "loss": 6.249322128295899, "step": 43980 }, { "epoch": 0.13885, "grad_norm": 7.544601917266846, "learning_rate": 2.829090909090909e-06, "loss": 6.286181640625, "step": 43985 }, { "epoch": 0.1389, "grad_norm": 6.675680637359619, "learning_rate": 2.8288383838383842e-06, "loss": 6.249143981933594, "step": 43990 }, { "epoch": 0.13895, "grad_norm": 8.83263111114502, "learning_rate": 2.828585858585859e-06, "loss": 6.281089401245117, "step": 43995 }, { "epoch": 0.139, "grad_norm": 7.1394853591918945, "learning_rate": 2.8283333333333335e-06, "loss": 6.245540618896484, "step": 44000 }, { "epoch": 0.13905, "grad_norm": 5.778017044067383, "learning_rate": 2.828080808080808e-06, "loss": 6.2571464538574215, "step": 44005 }, { "epoch": 0.1391, "grad_norm": 7.488250732421875, "learning_rate": 2.827828282828283e-06, "loss": 6.311445617675782, "step": 44010 }, { "epoch": 0.13915, "grad_norm": 4.445948600769043, "learning_rate": 2.827575757575758e-06, "loss": 6.204874801635742, "step": 44015 }, { "epoch": 0.1392, "grad_norm": 6.639547348022461, "learning_rate": 2.8273232323232324e-06, "loss": 6.298545074462891, "step": 44020 }, { "epoch": 0.13925, "grad_norm": 6.445807456970215, "learning_rate": 2.827070707070707e-06, "loss": 6.284674072265625, "step": 44025 }, { "epoch": 0.1393, "grad_norm": 5.879687786102295, "learning_rate": 2.826818181818182e-06, "loss": 6.254294967651367, "step": 44030 }, { "epoch": 0.13935, "grad_norm": 5.651834964752197, "learning_rate": 2.8265656565656568e-06, "loss": 6.280343246459961, "step": 44035 }, { "epoch": 0.1394, "grad_norm": 3.8625991344451904, "learning_rate": 2.8263131313131314e-06, "loss": 6.275337219238281, "step": 44040 }, { "epoch": 0.13945, "grad_norm": 33.20541763305664, "learning_rate": 2.826060606060606e-06, "loss": 6.4258781433105465, "step": 44045 }, { "epoch": 0.1395, "grad_norm": 7.0386643409729, "learning_rate": 2.825808080808081e-06, "loss": 6.372874450683594, "step": 44050 }, { "epoch": 0.13955, "grad_norm": 4.617787837982178, "learning_rate": 2.8255555555555557e-06, "loss": 6.242399597167969, "step": 44055 }, { "epoch": 0.1396, "grad_norm": 6.365588665008545, "learning_rate": 2.8253030303030303e-06, "loss": 6.31727294921875, "step": 44060 }, { "epoch": 0.13965, "grad_norm": 5.55874490737915, "learning_rate": 2.825050505050505e-06, "loss": 6.27575798034668, "step": 44065 }, { "epoch": 0.1397, "grad_norm": 9.544164657592773, "learning_rate": 2.82479797979798e-06, "loss": 6.305755996704102, "step": 44070 }, { "epoch": 0.13975, "grad_norm": 5.957465171813965, "learning_rate": 2.8245454545454546e-06, "loss": 6.313508605957031, "step": 44075 }, { "epoch": 0.1398, "grad_norm": 34.85055160522461, "learning_rate": 2.8242929292929293e-06, "loss": 6.5276947021484375, "step": 44080 }, { "epoch": 0.13985, "grad_norm": 7.99599552154541, "learning_rate": 2.824040404040404e-06, "loss": 7.055690765380859, "step": 44085 }, { "epoch": 0.1399, "grad_norm": 9.433111190795898, "learning_rate": 2.8237878787878794e-06, "loss": 6.232430648803711, "step": 44090 }, { "epoch": 0.13995, "grad_norm": 7.656954765319824, "learning_rate": 2.8235353535353536e-06, "loss": 6.251575088500976, "step": 44095 }, { "epoch": 0.14, "grad_norm": 6.092886447906494, "learning_rate": 2.8232828282828282e-06, "loss": 6.310784912109375, "step": 44100 }, { "epoch": 0.14005, "grad_norm": 3.3574023246765137, "learning_rate": 2.823030303030303e-06, "loss": 6.236681365966797, "step": 44105 }, { "epoch": 0.1401, "grad_norm": 6.0391387939453125, "learning_rate": 2.8227777777777783e-06, "loss": 6.2394248962402346, "step": 44110 }, { "epoch": 0.14015, "grad_norm": 4.739085674285889, "learning_rate": 2.822525252525253e-06, "loss": 6.253959655761719, "step": 44115 }, { "epoch": 0.1402, "grad_norm": 6.640931129455566, "learning_rate": 2.8222727272727276e-06, "loss": 6.326102828979492, "step": 44120 }, { "epoch": 0.14025, "grad_norm": 5.8023457527160645, "learning_rate": 2.822020202020202e-06, "loss": 6.329736328125, "step": 44125 }, { "epoch": 0.1403, "grad_norm": 6.25180196762085, "learning_rate": 2.8217676767676773e-06, "loss": 6.278166198730469, "step": 44130 }, { "epoch": 0.14035, "grad_norm": 5.560831546783447, "learning_rate": 2.821515151515152e-06, "loss": 6.2579906463623045, "step": 44135 }, { "epoch": 0.1404, "grad_norm": 6.91656494140625, "learning_rate": 2.8212626262626265e-06, "loss": 6.2966255187988285, "step": 44140 }, { "epoch": 0.14045, "grad_norm": 5.392748832702637, "learning_rate": 2.821010101010101e-06, "loss": 6.342135620117188, "step": 44145 }, { "epoch": 0.1405, "grad_norm": 5.052755355834961, "learning_rate": 2.8207575757575762e-06, "loss": 6.2383171081542965, "step": 44150 }, { "epoch": 0.14055, "grad_norm": 7.55592679977417, "learning_rate": 2.820505050505051e-06, "loss": 6.303934478759766, "step": 44155 }, { "epoch": 0.1406, "grad_norm": 7.580378532409668, "learning_rate": 2.8202525252525255e-06, "loss": 6.281298828125, "step": 44160 }, { "epoch": 0.14065, "grad_norm": 6.166415691375732, "learning_rate": 2.82e-06, "loss": 6.3128711700439455, "step": 44165 }, { "epoch": 0.1407, "grad_norm": 9.64521312713623, "learning_rate": 2.819747474747475e-06, "loss": 6.316741943359375, "step": 44170 }, { "epoch": 0.14075, "grad_norm": 7.520144462585449, "learning_rate": 2.81949494949495e-06, "loss": 6.315768814086914, "step": 44175 }, { "epoch": 0.1408, "grad_norm": 5.460630893707275, "learning_rate": 2.8192424242424244e-06, "loss": 6.33502311706543, "step": 44180 }, { "epoch": 0.14085, "grad_norm": 4.530205726623535, "learning_rate": 2.818989898989899e-06, "loss": 6.318400573730469, "step": 44185 }, { "epoch": 0.1409, "grad_norm": 56.43229293823242, "learning_rate": 2.818737373737374e-06, "loss": 6.42651138305664, "step": 44190 }, { "epoch": 0.14095, "grad_norm": 8.896845817565918, "learning_rate": 2.8184848484848487e-06, "loss": 6.34825439453125, "step": 44195 }, { "epoch": 0.141, "grad_norm": 5.216614723205566, "learning_rate": 2.8182323232323234e-06, "loss": 6.293082046508789, "step": 44200 }, { "epoch": 0.14105, "grad_norm": 4.2130537033081055, "learning_rate": 2.8179797979797984e-06, "loss": 6.257293319702148, "step": 44205 }, { "epoch": 0.1411, "grad_norm": 7.427875518798828, "learning_rate": 2.817727272727273e-06, "loss": 6.243085479736328, "step": 44210 }, { "epoch": 0.14115, "grad_norm": 8.460680961608887, "learning_rate": 2.8174747474747477e-06, "loss": 6.284839630126953, "step": 44215 }, { "epoch": 0.1412, "grad_norm": 4.569086074829102, "learning_rate": 2.8172222222222223e-06, "loss": 6.256906127929687, "step": 44220 }, { "epoch": 0.14125, "grad_norm": 5.858251094818115, "learning_rate": 2.8169696969696974e-06, "loss": 6.220144653320313, "step": 44225 }, { "epoch": 0.1413, "grad_norm": 18.291927337646484, "learning_rate": 2.816717171717172e-06, "loss": 6.3586170196533205, "step": 44230 }, { "epoch": 0.14135, "grad_norm": 8.168474197387695, "learning_rate": 2.8164646464646466e-06, "loss": 6.332363891601562, "step": 44235 }, { "epoch": 0.1414, "grad_norm": 12.21086597442627, "learning_rate": 2.8162121212121213e-06, "loss": 6.3088336944580075, "step": 44240 }, { "epoch": 0.14145, "grad_norm": 18.750455856323242, "learning_rate": 2.8159595959595963e-06, "loss": 6.253476333618164, "step": 44245 }, { "epoch": 0.1415, "grad_norm": 5.907544136047363, "learning_rate": 2.815707070707071e-06, "loss": 6.254354095458984, "step": 44250 }, { "epoch": 0.14155, "grad_norm": 13.08189868927002, "learning_rate": 2.8154545454545456e-06, "loss": 6.286428833007813, "step": 44255 }, { "epoch": 0.1416, "grad_norm": 20.21051597595215, "learning_rate": 2.81520202020202e-06, "loss": 6.662921142578125, "step": 44260 }, { "epoch": 0.14165, "grad_norm": 5.372474670410156, "learning_rate": 2.8149494949494952e-06, "loss": 6.271969604492187, "step": 44265 }, { "epoch": 0.1417, "grad_norm": 6.257988929748535, "learning_rate": 2.81469696969697e-06, "loss": 6.281594085693359, "step": 44270 }, { "epoch": 0.14175, "grad_norm": 8.333292961120605, "learning_rate": 2.8144444444444445e-06, "loss": 6.3841194152832035, "step": 44275 }, { "epoch": 0.1418, "grad_norm": 5.1260600090026855, "learning_rate": 2.814191919191919e-06, "loss": 6.2451835632324215, "step": 44280 }, { "epoch": 0.14185, "grad_norm": 28.831279754638672, "learning_rate": 2.8139393939393946e-06, "loss": 6.322590637207031, "step": 44285 }, { "epoch": 0.1419, "grad_norm": 5.287070274353027, "learning_rate": 2.813686868686869e-06, "loss": 6.284501266479492, "step": 44290 }, { "epoch": 0.14195, "grad_norm": 5.87811803817749, "learning_rate": 2.8134343434343435e-06, "loss": 6.271545028686523, "step": 44295 }, { "epoch": 0.142, "grad_norm": 5.718628406524658, "learning_rate": 2.813181818181818e-06, "loss": 6.2422119140625, "step": 44300 }, { "epoch": 0.14205, "grad_norm": 4.514179706573486, "learning_rate": 2.8129292929292936e-06, "loss": 6.310243606567383, "step": 44305 }, { "epoch": 0.1421, "grad_norm": 6.052227020263672, "learning_rate": 2.812676767676768e-06, "loss": 6.374269104003906, "step": 44310 }, { "epoch": 0.14215, "grad_norm": 3.6946828365325928, "learning_rate": 2.812424242424243e-06, "loss": 6.288551330566406, "step": 44315 }, { "epoch": 0.1422, "grad_norm": 6.229543685913086, "learning_rate": 2.812171717171717e-06, "loss": 6.262970352172852, "step": 44320 }, { "epoch": 0.14225, "grad_norm": 5.616713047027588, "learning_rate": 2.8119191919191925e-06, "loss": 6.33880386352539, "step": 44325 }, { "epoch": 0.1423, "grad_norm": 11.182408332824707, "learning_rate": 2.811666666666667e-06, "loss": 6.347768783569336, "step": 44330 }, { "epoch": 0.14235, "grad_norm": 4.731259822845459, "learning_rate": 2.8114141414141418e-06, "loss": 6.225266647338867, "step": 44335 }, { "epoch": 0.1424, "grad_norm": 6.6150431632995605, "learning_rate": 2.8111616161616164e-06, "loss": 6.262294006347656, "step": 44340 }, { "epoch": 0.14245, "grad_norm": 6.48414421081543, "learning_rate": 2.8109090909090914e-06, "loss": 6.325629806518554, "step": 44345 }, { "epoch": 0.1425, "grad_norm": 8.636800765991211, "learning_rate": 2.810656565656566e-06, "loss": 6.2910011291503904, "step": 44350 }, { "epoch": 0.14255, "grad_norm": 4.018395900726318, "learning_rate": 2.8104040404040407e-06, "loss": 6.242158126831055, "step": 44355 }, { "epoch": 0.1426, "grad_norm": 5.81597900390625, "learning_rate": 2.8101515151515153e-06, "loss": 6.283693313598633, "step": 44360 }, { "epoch": 0.14265, "grad_norm": 17.631328582763672, "learning_rate": 2.8098989898989904e-06, "loss": 6.2839710235595705, "step": 44365 }, { "epoch": 0.1427, "grad_norm": 5.150351524353027, "learning_rate": 2.809646464646465e-06, "loss": 6.33551025390625, "step": 44370 }, { "epoch": 0.14275, "grad_norm": 4.469070911407471, "learning_rate": 2.8093939393939397e-06, "loss": 6.217398071289063, "step": 44375 }, { "epoch": 0.1428, "grad_norm": 6.889886856079102, "learning_rate": 2.8091414141414143e-06, "loss": 6.255786895751953, "step": 44380 }, { "epoch": 0.14285, "grad_norm": 5.906260967254639, "learning_rate": 2.8088888888888893e-06, "loss": 6.254908752441406, "step": 44385 }, { "epoch": 0.1429, "grad_norm": 5.224206924438477, "learning_rate": 2.808636363636364e-06, "loss": 6.261692810058594, "step": 44390 }, { "epoch": 0.14295, "grad_norm": 11.464712142944336, "learning_rate": 2.8083838383838386e-06, "loss": 6.2593341827392575, "step": 44395 }, { "epoch": 0.143, "grad_norm": 7.688294887542725, "learning_rate": 2.8081313131313132e-06, "loss": 6.184561920166016, "step": 44400 }, { "epoch": 0.14305, "grad_norm": 10.5582914352417, "learning_rate": 2.8078787878787883e-06, "loss": 6.454902648925781, "step": 44405 }, { "epoch": 0.1431, "grad_norm": 6.947793483734131, "learning_rate": 2.807626262626263e-06, "loss": 6.266757202148438, "step": 44410 }, { "epoch": 0.14315, "grad_norm": 5.236157417297363, "learning_rate": 2.8073737373737375e-06, "loss": 6.276475524902343, "step": 44415 }, { "epoch": 0.1432, "grad_norm": 8.816290855407715, "learning_rate": 2.807121212121212e-06, "loss": 6.218918991088867, "step": 44420 }, { "epoch": 0.14325, "grad_norm": 7.809131622314453, "learning_rate": 2.8068686868686872e-06, "loss": 6.2623847961425785, "step": 44425 }, { "epoch": 0.1433, "grad_norm": 7.078592300415039, "learning_rate": 2.806616161616162e-06, "loss": 6.2383575439453125, "step": 44430 }, { "epoch": 0.14335, "grad_norm": 4.458926677703857, "learning_rate": 2.8063636363636365e-06, "loss": 6.30372428894043, "step": 44435 }, { "epoch": 0.1434, "grad_norm": 6.701879501342773, "learning_rate": 2.806111111111111e-06, "loss": 6.3138572692871096, "step": 44440 }, { "epoch": 0.14345, "grad_norm": 5.674117088317871, "learning_rate": 2.805858585858586e-06, "loss": 6.270256042480469, "step": 44445 }, { "epoch": 0.1435, "grad_norm": 6.647220611572266, "learning_rate": 2.805606060606061e-06, "loss": 6.221831893920898, "step": 44450 }, { "epoch": 0.14355, "grad_norm": 6.186133861541748, "learning_rate": 2.8053535353535354e-06, "loss": 6.25794792175293, "step": 44455 }, { "epoch": 0.1436, "grad_norm": 5.077815532684326, "learning_rate": 2.80510101010101e-06, "loss": 6.233437347412109, "step": 44460 }, { "epoch": 0.14365, "grad_norm": 6.445928573608398, "learning_rate": 2.804848484848485e-06, "loss": 6.273016357421875, "step": 44465 }, { "epoch": 0.1437, "grad_norm": 16.777536392211914, "learning_rate": 2.8045959595959597e-06, "loss": 6.276217651367188, "step": 44470 }, { "epoch": 0.14375, "grad_norm": 5.216601848602295, "learning_rate": 2.8043434343434344e-06, "loss": 6.191600036621094, "step": 44475 }, { "epoch": 0.1438, "grad_norm": 10.874481201171875, "learning_rate": 2.804090909090909e-06, "loss": 6.2490802764892575, "step": 44480 }, { "epoch": 0.14385, "grad_norm": 6.110498905181885, "learning_rate": 2.803838383838384e-06, "loss": 6.217519378662109, "step": 44485 }, { "epoch": 0.1439, "grad_norm": 5.402491569519043, "learning_rate": 2.8035858585858587e-06, "loss": 6.247758102416992, "step": 44490 }, { "epoch": 0.14395, "grad_norm": 4.603033065795898, "learning_rate": 2.8033333333333333e-06, "loss": 6.290934753417969, "step": 44495 }, { "epoch": 0.144, "grad_norm": 8.154340744018555, "learning_rate": 2.803080808080808e-06, "loss": 6.253243255615234, "step": 44500 }, { "epoch": 0.14405, "grad_norm": 6.262698173522949, "learning_rate": 2.8028282828282834e-06, "loss": 6.277739715576172, "step": 44505 }, { "epoch": 0.1441, "grad_norm": 6.730071067810059, "learning_rate": 2.8025757575757576e-06, "loss": 6.233470153808594, "step": 44510 }, { "epoch": 0.14415, "grad_norm": 4.932652950286865, "learning_rate": 2.8023232323232323e-06, "loss": 6.273648071289062, "step": 44515 }, { "epoch": 0.1442, "grad_norm": 5.77827787399292, "learning_rate": 2.802070707070707e-06, "loss": 6.235684585571289, "step": 44520 }, { "epoch": 0.14425, "grad_norm": 17.231260299682617, "learning_rate": 2.8018181818181824e-06, "loss": 6.335435485839843, "step": 44525 }, { "epoch": 0.1443, "grad_norm": 6.264203071594238, "learning_rate": 2.801565656565657e-06, "loss": 6.435466003417969, "step": 44530 }, { "epoch": 0.14435, "grad_norm": 6.983163356781006, "learning_rate": 2.8013131313131316e-06, "loss": 6.279958343505859, "step": 44535 }, { "epoch": 0.1444, "grad_norm": 5.431467533111572, "learning_rate": 2.801060606060606e-06, "loss": 6.256022262573242, "step": 44540 }, { "epoch": 0.14445, "grad_norm": 5.568228721618652, "learning_rate": 2.8008080808080813e-06, "loss": 6.27398567199707, "step": 44545 }, { "epoch": 0.1445, "grad_norm": 6.982312202453613, "learning_rate": 2.800555555555556e-06, "loss": 6.266593170166016, "step": 44550 }, { "epoch": 0.14455, "grad_norm": 26.328989028930664, "learning_rate": 2.8003030303030306e-06, "loss": 6.467733764648438, "step": 44555 }, { "epoch": 0.1446, "grad_norm": 6.364570140838623, "learning_rate": 2.800050505050505e-06, "loss": 6.356686401367187, "step": 44560 }, { "epoch": 0.14465, "grad_norm": 7.304065227508545, "learning_rate": 2.7997979797979803e-06, "loss": 6.271440505981445, "step": 44565 }, { "epoch": 0.1447, "grad_norm": 20.01658821105957, "learning_rate": 2.799545454545455e-06, "loss": 6.272675704956055, "step": 44570 }, { "epoch": 0.14475, "grad_norm": 16.472362518310547, "learning_rate": 2.7992929292929295e-06, "loss": 6.170970153808594, "step": 44575 }, { "epoch": 0.1448, "grad_norm": 11.524031639099121, "learning_rate": 2.799040404040404e-06, "loss": 6.035786437988281, "step": 44580 }, { "epoch": 0.14485, "grad_norm": 5.667213439941406, "learning_rate": 2.798787878787879e-06, "loss": 6.240152359008789, "step": 44585 }, { "epoch": 0.1449, "grad_norm": 6.9438652992248535, "learning_rate": 2.798535353535354e-06, "loss": 6.255217361450195, "step": 44590 }, { "epoch": 0.14495, "grad_norm": 7.611233234405518, "learning_rate": 2.7982828282828285e-06, "loss": 6.296379852294922, "step": 44595 }, { "epoch": 0.145, "grad_norm": 7.492669582366943, "learning_rate": 2.798030303030303e-06, "loss": 6.412486267089844, "step": 44600 }, { "epoch": 0.14505, "grad_norm": 10.752496719360352, "learning_rate": 2.797777777777778e-06, "loss": 6.237070083618164, "step": 44605 }, { "epoch": 0.1451, "grad_norm": 5.729997634887695, "learning_rate": 2.7975252525252528e-06, "loss": 6.244730377197266, "step": 44610 }, { "epoch": 0.14515, "grad_norm": 8.387418746948242, "learning_rate": 2.7972727272727274e-06, "loss": 6.246953964233398, "step": 44615 }, { "epoch": 0.1452, "grad_norm": 6.817490577697754, "learning_rate": 2.7970202020202025e-06, "loss": 6.255287551879883, "step": 44620 }, { "epoch": 0.14525, "grad_norm": 5.772327423095703, "learning_rate": 2.796767676767677e-06, "loss": 6.314306640625, "step": 44625 }, { "epoch": 0.1453, "grad_norm": 6.116434097290039, "learning_rate": 2.7965151515151517e-06, "loss": 6.363074493408203, "step": 44630 }, { "epoch": 0.14535, "grad_norm": 5.503964424133301, "learning_rate": 2.7962626262626263e-06, "loss": 6.280364608764648, "step": 44635 }, { "epoch": 0.1454, "grad_norm": 7.717550277709961, "learning_rate": 2.7960101010101014e-06, "loss": 6.2375938415527346, "step": 44640 }, { "epoch": 0.14545, "grad_norm": 8.193074226379395, "learning_rate": 2.795757575757576e-06, "loss": 6.238637161254883, "step": 44645 }, { "epoch": 0.1455, "grad_norm": 4.384008884429932, "learning_rate": 2.7955050505050507e-06, "loss": 6.2714179992675785, "step": 44650 }, { "epoch": 0.14555, "grad_norm": 5.6202802658081055, "learning_rate": 2.7952525252525253e-06, "loss": 6.251432418823242, "step": 44655 }, { "epoch": 0.1456, "grad_norm": 7.785285949707031, "learning_rate": 2.7950000000000003e-06, "loss": 6.265673828125, "step": 44660 }, { "epoch": 0.14565, "grad_norm": 3.9858291149139404, "learning_rate": 2.794747474747475e-06, "loss": 6.301128387451172, "step": 44665 }, { "epoch": 0.1457, "grad_norm": 5.879644393920898, "learning_rate": 2.7944949494949496e-06, "loss": 6.225313186645508, "step": 44670 }, { "epoch": 0.14575, "grad_norm": 6.322900772094727, "learning_rate": 2.7942424242424242e-06, "loss": 6.228992462158203, "step": 44675 }, { "epoch": 0.1458, "grad_norm": 9.002882957458496, "learning_rate": 2.7939898989898993e-06, "loss": 6.309248352050782, "step": 44680 }, { "epoch": 0.14585, "grad_norm": 7.023865222930908, "learning_rate": 2.793737373737374e-06, "loss": 6.2896781921386715, "step": 44685 }, { "epoch": 0.1459, "grad_norm": 4.218127250671387, "learning_rate": 2.7934848484848485e-06, "loss": 6.236215591430664, "step": 44690 }, { "epoch": 0.14595, "grad_norm": 5.373611927032471, "learning_rate": 2.793232323232323e-06, "loss": 6.282855606079101, "step": 44695 }, { "epoch": 0.146, "grad_norm": 9.039627075195312, "learning_rate": 2.7929797979797987e-06, "loss": 6.286446380615234, "step": 44700 }, { "epoch": 0.14605, "grad_norm": 4.935218811035156, "learning_rate": 2.792727272727273e-06, "loss": 6.301695251464844, "step": 44705 }, { "epoch": 0.1461, "grad_norm": 8.018901824951172, "learning_rate": 2.7924747474747475e-06, "loss": 6.2106067657470705, "step": 44710 }, { "epoch": 0.14615, "grad_norm": 8.946569442749023, "learning_rate": 2.792222222222222e-06, "loss": 6.287466812133789, "step": 44715 }, { "epoch": 0.1462, "grad_norm": 5.500690460205078, "learning_rate": 2.7919696969696976e-06, "loss": 6.291099548339844, "step": 44720 }, { "epoch": 0.14625, "grad_norm": 8.342184066772461, "learning_rate": 2.7917171717171722e-06, "loss": 6.264703750610352, "step": 44725 }, { "epoch": 0.1463, "grad_norm": 5.3524556159973145, "learning_rate": 2.791464646464647e-06, "loss": 6.3577880859375, "step": 44730 }, { "epoch": 0.14635, "grad_norm": 6.888080596923828, "learning_rate": 2.791212121212121e-06, "loss": 6.269052505493164, "step": 44735 }, { "epoch": 0.1464, "grad_norm": 6.409927845001221, "learning_rate": 2.7909595959595965e-06, "loss": 6.254109191894531, "step": 44740 }, { "epoch": 0.14645, "grad_norm": 5.830942630767822, "learning_rate": 2.790707070707071e-06, "loss": 6.281002807617187, "step": 44745 }, { "epoch": 0.1465, "grad_norm": 20.258390426635742, "learning_rate": 2.790454545454546e-06, "loss": 6.365029144287109, "step": 44750 }, { "epoch": 0.14655, "grad_norm": 5.790291786193848, "learning_rate": 2.7902020202020204e-06, "loss": 6.329902267456054, "step": 44755 }, { "epoch": 0.1466, "grad_norm": 15.369828224182129, "learning_rate": 2.7899494949494955e-06, "loss": 6.413082122802734, "step": 44760 }, { "epoch": 0.14665, "grad_norm": 6.428067684173584, "learning_rate": 2.78969696969697e-06, "loss": 6.29041862487793, "step": 44765 }, { "epoch": 0.1467, "grad_norm": 4.802469253540039, "learning_rate": 2.7894444444444447e-06, "loss": 6.334333801269532, "step": 44770 }, { "epoch": 0.14675, "grad_norm": 4.5667572021484375, "learning_rate": 2.7891919191919194e-06, "loss": 6.219278335571289, "step": 44775 }, { "epoch": 0.1468, "grad_norm": 8.347817420959473, "learning_rate": 2.7889393939393944e-06, "loss": 6.262254333496093, "step": 44780 }, { "epoch": 0.14685, "grad_norm": 7.037924766540527, "learning_rate": 2.788686868686869e-06, "loss": 6.330924224853516, "step": 44785 }, { "epoch": 0.1469, "grad_norm": 8.688403129577637, "learning_rate": 2.7884343434343437e-06, "loss": 6.229383850097657, "step": 44790 }, { "epoch": 0.14695, "grad_norm": 7.913570880889893, "learning_rate": 2.7881818181818183e-06, "loss": 6.295010757446289, "step": 44795 }, { "epoch": 0.147, "grad_norm": 5.322941780090332, "learning_rate": 2.7879292929292934e-06, "loss": 6.26745376586914, "step": 44800 }, { "epoch": 0.14705, "grad_norm": 6.8106255531311035, "learning_rate": 2.787676767676768e-06, "loss": 6.211701965332031, "step": 44805 }, { "epoch": 0.1471, "grad_norm": 4.671997547149658, "learning_rate": 2.7874242424242426e-06, "loss": 6.270697021484375, "step": 44810 }, { "epoch": 0.14715, "grad_norm": 4.04005765914917, "learning_rate": 2.7871717171717173e-06, "loss": 6.2741447448730465, "step": 44815 }, { "epoch": 0.1472, "grad_norm": 5.604843616485596, "learning_rate": 2.7869191919191923e-06, "loss": 6.248669052124024, "step": 44820 }, { "epoch": 0.14725, "grad_norm": 4.368190765380859, "learning_rate": 2.786666666666667e-06, "loss": 6.251483154296875, "step": 44825 }, { "epoch": 0.1473, "grad_norm": 6.3983941078186035, "learning_rate": 2.7864141414141416e-06, "loss": 6.185977554321289, "step": 44830 }, { "epoch": 0.14735, "grad_norm": 6.676506519317627, "learning_rate": 2.786161616161616e-06, "loss": 6.332187271118164, "step": 44835 }, { "epoch": 0.1474, "grad_norm": 21.463109970092773, "learning_rate": 2.7859090909090913e-06, "loss": 6.2434642791748045, "step": 44840 }, { "epoch": 0.14745, "grad_norm": 35.744998931884766, "learning_rate": 2.785656565656566e-06, "loss": 5.6699878692626955, "step": 44845 }, { "epoch": 0.1475, "grad_norm": 8.564871788024902, "learning_rate": 2.7854040404040405e-06, "loss": 6.3250732421875, "step": 44850 }, { "epoch": 0.14755, "grad_norm": 6.605083465576172, "learning_rate": 2.785151515151515e-06, "loss": 6.260835266113281, "step": 44855 }, { "epoch": 0.1476, "grad_norm": 29.167905807495117, "learning_rate": 2.78489898989899e-06, "loss": 6.301128768920899, "step": 44860 }, { "epoch": 0.14765, "grad_norm": 4.641321182250977, "learning_rate": 2.784646464646465e-06, "loss": 6.249382781982422, "step": 44865 }, { "epoch": 0.1477, "grad_norm": 6.320239543914795, "learning_rate": 2.7843939393939395e-06, "loss": 6.245568084716797, "step": 44870 }, { "epoch": 0.14775, "grad_norm": 10.498394012451172, "learning_rate": 2.784141414141414e-06, "loss": 6.266356658935547, "step": 44875 }, { "epoch": 0.1478, "grad_norm": 5.868678092956543, "learning_rate": 2.783888888888889e-06, "loss": 6.301435089111328, "step": 44880 }, { "epoch": 0.14785, "grad_norm": 100.91871643066406, "learning_rate": 2.7836363636363638e-06, "loss": 5.306366729736328, "step": 44885 }, { "epoch": 0.1479, "grad_norm": 4.730008602142334, "learning_rate": 2.7833838383838384e-06, "loss": 6.311387634277343, "step": 44890 }, { "epoch": 0.14795, "grad_norm": 6.749942302703857, "learning_rate": 2.783131313131313e-06, "loss": 6.244385147094727, "step": 44895 }, { "epoch": 0.148, "grad_norm": 10.032923698425293, "learning_rate": 2.782878787878788e-06, "loss": 6.280511474609375, "step": 44900 }, { "epoch": 0.14805, "grad_norm": 10.292678833007812, "learning_rate": 2.7826262626262627e-06, "loss": 6.282645416259766, "step": 44905 }, { "epoch": 0.1481, "grad_norm": 7.413251876831055, "learning_rate": 2.7823737373737374e-06, "loss": 6.226106262207031, "step": 44910 }, { "epoch": 0.14815, "grad_norm": 7.957797527313232, "learning_rate": 2.782121212121212e-06, "loss": 6.280856323242188, "step": 44915 }, { "epoch": 0.1482, "grad_norm": 12.615630149841309, "learning_rate": 2.7818686868686875e-06, "loss": 6.326515579223633, "step": 44920 }, { "epoch": 0.14825, "grad_norm": 5.455124855041504, "learning_rate": 2.7816161616161617e-06, "loss": 6.368231582641601, "step": 44925 }, { "epoch": 0.1483, "grad_norm": 5.302582263946533, "learning_rate": 2.7813636363636363e-06, "loss": 6.682402038574219, "step": 44930 }, { "epoch": 0.14835, "grad_norm": 3.5725865364074707, "learning_rate": 2.781111111111111e-06, "loss": 6.2102008819580075, "step": 44935 }, { "epoch": 0.1484, "grad_norm": 8.259992599487305, "learning_rate": 2.7808585858585864e-06, "loss": 6.2805938720703125, "step": 44940 }, { "epoch": 0.14845, "grad_norm": 5.32001256942749, "learning_rate": 2.780606060606061e-06, "loss": 6.231832885742188, "step": 44945 }, { "epoch": 0.1485, "grad_norm": 5.468841075897217, "learning_rate": 2.7803535353535357e-06, "loss": 6.260396575927734, "step": 44950 }, { "epoch": 0.14855, "grad_norm": 3.866842746734619, "learning_rate": 2.78010101010101e-06, "loss": 6.229546737670899, "step": 44955 }, { "epoch": 0.1486, "grad_norm": 4.696268558502197, "learning_rate": 2.7798484848484854e-06, "loss": 6.267987442016602, "step": 44960 }, { "epoch": 0.14865, "grad_norm": 6.122270107269287, "learning_rate": 2.77959595959596e-06, "loss": 6.31391716003418, "step": 44965 }, { "epoch": 0.1487, "grad_norm": 5.881770610809326, "learning_rate": 2.7793434343434346e-06, "loss": 6.283575439453125, "step": 44970 }, { "epoch": 0.14875, "grad_norm": 6.949029445648193, "learning_rate": 2.7790909090909092e-06, "loss": 6.258554077148437, "step": 44975 }, { "epoch": 0.1488, "grad_norm": 20.40878677368164, "learning_rate": 2.7788383838383843e-06, "loss": 6.2899833679199215, "step": 44980 }, { "epoch": 0.14885, "grad_norm": 5.462978363037109, "learning_rate": 2.778585858585859e-06, "loss": 6.292649459838867, "step": 44985 }, { "epoch": 0.1489, "grad_norm": 7.023738384246826, "learning_rate": 2.7783333333333336e-06, "loss": 6.259365081787109, "step": 44990 }, { "epoch": 0.14895, "grad_norm": 7.815762042999268, "learning_rate": 2.778080808080808e-06, "loss": 6.219704437255859, "step": 44995 }, { "epoch": 0.149, "grad_norm": 4.718075752258301, "learning_rate": 2.7778282828282832e-06, "loss": 6.204832458496094, "step": 45000 }, { "epoch": 0.14905, "grad_norm": 11.00755786895752, "learning_rate": 2.777575757575758e-06, "loss": 6.2437389373779295, "step": 45005 }, { "epoch": 0.1491, "grad_norm": 7.833898544311523, "learning_rate": 2.7773232323232325e-06, "loss": 6.253872299194336, "step": 45010 }, { "epoch": 0.14915, "grad_norm": 7.990160942077637, "learning_rate": 2.777070707070707e-06, "loss": 6.246794128417969, "step": 45015 }, { "epoch": 0.1492, "grad_norm": 9.865228652954102, "learning_rate": 2.776818181818182e-06, "loss": 6.284980773925781, "step": 45020 }, { "epoch": 0.14925, "grad_norm": 7.4226226806640625, "learning_rate": 2.776565656565657e-06, "loss": 6.384543609619141, "step": 45025 }, { "epoch": 0.1493, "grad_norm": 5.700085639953613, "learning_rate": 2.7763131313131314e-06, "loss": 6.2161602020263675, "step": 45030 }, { "epoch": 0.14935, "grad_norm": 7.29028844833374, "learning_rate": 2.776060606060606e-06, "loss": 6.468390655517578, "step": 45035 }, { "epoch": 0.1494, "grad_norm": 5.4389424324035645, "learning_rate": 2.775808080808081e-06, "loss": 6.229438400268554, "step": 45040 }, { "epoch": 0.14945, "grad_norm": 7.321055889129639, "learning_rate": 2.7755555555555558e-06, "loss": 6.233223724365234, "step": 45045 }, { "epoch": 0.1495, "grad_norm": 10.94199275970459, "learning_rate": 2.7753030303030304e-06, "loss": 6.245690155029297, "step": 45050 }, { "epoch": 0.14955, "grad_norm": 6.342372417449951, "learning_rate": 2.7750505050505054e-06, "loss": 6.254891967773437, "step": 45055 }, { "epoch": 0.1496, "grad_norm": 5.814186096191406, "learning_rate": 2.77479797979798e-06, "loss": 6.354615020751953, "step": 45060 }, { "epoch": 0.14965, "grad_norm": 5.986862659454346, "learning_rate": 2.7745454545454547e-06, "loss": 6.300892639160156, "step": 45065 }, { "epoch": 0.1497, "grad_norm": 4.279307842254639, "learning_rate": 2.7742929292929293e-06, "loss": 6.244779205322265, "step": 45070 }, { "epoch": 0.14975, "grad_norm": 8.474696159362793, "learning_rate": 2.7740404040404044e-06, "loss": 6.253299331665039, "step": 45075 }, { "epoch": 0.1498, "grad_norm": 11.64578914642334, "learning_rate": 2.773787878787879e-06, "loss": 6.338528823852539, "step": 45080 }, { "epoch": 0.14985, "grad_norm": 10.351479530334473, "learning_rate": 2.7735353535353536e-06, "loss": 6.2660987854003904, "step": 45085 }, { "epoch": 0.1499, "grad_norm": 8.286526679992676, "learning_rate": 2.7732828282828283e-06, "loss": 6.281661605834961, "step": 45090 }, { "epoch": 0.14995, "grad_norm": 5.407909393310547, "learning_rate": 2.7730303030303033e-06, "loss": 6.250166320800782, "step": 45095 }, { "epoch": 0.15, "grad_norm": 7.688942909240723, "learning_rate": 2.772777777777778e-06, "loss": 6.259043502807617, "step": 45100 }, { "epoch": 0.15005, "grad_norm": 7.41702127456665, "learning_rate": 2.7725252525252526e-06, "loss": 6.289417266845703, "step": 45105 }, { "epoch": 0.1501, "grad_norm": 4.245343208312988, "learning_rate": 2.7722727272727272e-06, "loss": 6.253791809082031, "step": 45110 }, { "epoch": 0.15015, "grad_norm": 15.843767166137695, "learning_rate": 2.7720202020202027e-06, "loss": 6.558887481689453, "step": 45115 }, { "epoch": 0.1502, "grad_norm": 6.3005170822143555, "learning_rate": 2.771767676767677e-06, "loss": 6.278690338134766, "step": 45120 }, { "epoch": 0.15025, "grad_norm": 7.244434356689453, "learning_rate": 2.7715151515151515e-06, "loss": 6.247335052490234, "step": 45125 }, { "epoch": 0.1503, "grad_norm": 7.250955581665039, "learning_rate": 2.771262626262626e-06, "loss": 6.279109191894531, "step": 45130 }, { "epoch": 0.15035, "grad_norm": 7.080718994140625, "learning_rate": 2.7710101010101016e-06, "loss": 6.273427963256836, "step": 45135 }, { "epoch": 0.1504, "grad_norm": 8.17448902130127, "learning_rate": 2.7707575757575763e-06, "loss": 6.266955947875976, "step": 45140 }, { "epoch": 0.15045, "grad_norm": 4.050004959106445, "learning_rate": 2.770505050505051e-06, "loss": 6.2426399230957035, "step": 45145 }, { "epoch": 0.1505, "grad_norm": 4.764939785003662, "learning_rate": 2.770252525252525e-06, "loss": 6.293728256225586, "step": 45150 }, { "epoch": 0.15055, "grad_norm": 5.397923469543457, "learning_rate": 2.7700000000000006e-06, "loss": 6.292818069458008, "step": 45155 }, { "epoch": 0.1506, "grad_norm": 6.992273807525635, "learning_rate": 2.7697474747474752e-06, "loss": 6.300243377685547, "step": 45160 }, { "epoch": 0.15065, "grad_norm": 7.758240699768066, "learning_rate": 2.76949494949495e-06, "loss": 6.271379089355468, "step": 45165 }, { "epoch": 0.1507, "grad_norm": 6.6182966232299805, "learning_rate": 2.7692424242424245e-06, "loss": 6.28007926940918, "step": 45170 }, { "epoch": 0.15075, "grad_norm": 6.425936698913574, "learning_rate": 2.7689898989898995e-06, "loss": 6.2964935302734375, "step": 45175 }, { "epoch": 0.1508, "grad_norm": 6.473394870758057, "learning_rate": 2.768737373737374e-06, "loss": 6.269863891601562, "step": 45180 }, { "epoch": 0.15085, "grad_norm": 6.038826942443848, "learning_rate": 2.7684848484848488e-06, "loss": 6.304810333251953, "step": 45185 }, { "epoch": 0.1509, "grad_norm": 6.641604900360107, "learning_rate": 2.7682323232323234e-06, "loss": 6.225019836425782, "step": 45190 }, { "epoch": 0.15095, "grad_norm": 6.2642340660095215, "learning_rate": 2.7679797979797985e-06, "loss": 6.272534942626953, "step": 45195 }, { "epoch": 0.151, "grad_norm": 9.334985733032227, "learning_rate": 2.767727272727273e-06, "loss": 6.2659423828125, "step": 45200 }, { "epoch": 0.15105, "grad_norm": 6.215610504150391, "learning_rate": 2.7674747474747477e-06, "loss": 6.236957550048828, "step": 45205 }, { "epoch": 0.1511, "grad_norm": 12.255465507507324, "learning_rate": 2.7672222222222224e-06, "loss": 6.172434234619141, "step": 45210 }, { "epoch": 0.15115, "grad_norm": 5.875138759613037, "learning_rate": 2.7669696969696974e-06, "loss": 6.253817367553711, "step": 45215 }, { "epoch": 0.1512, "grad_norm": 6.1511921882629395, "learning_rate": 2.766717171717172e-06, "loss": 6.229208755493164, "step": 45220 }, { "epoch": 0.15125, "grad_norm": 8.65363597869873, "learning_rate": 2.7664646464646467e-06, "loss": 6.282279586791992, "step": 45225 }, { "epoch": 0.1513, "grad_norm": 7.026503562927246, "learning_rate": 2.7662121212121213e-06, "loss": 6.198213195800781, "step": 45230 }, { "epoch": 0.15135, "grad_norm": 6.766472339630127, "learning_rate": 2.7659595959595964e-06, "loss": 6.280776596069336, "step": 45235 }, { "epoch": 0.1514, "grad_norm": 7.174882888793945, "learning_rate": 2.765707070707071e-06, "loss": 6.24532470703125, "step": 45240 }, { "epoch": 0.15145, "grad_norm": 7.5785298347473145, "learning_rate": 2.7654545454545456e-06, "loss": 6.2318061828613285, "step": 45245 }, { "epoch": 0.1515, "grad_norm": 4.757777690887451, "learning_rate": 2.7652020202020203e-06, "loss": 6.264457702636719, "step": 45250 }, { "epoch": 0.15155, "grad_norm": 5.941581726074219, "learning_rate": 2.7649494949494953e-06, "loss": 6.250365447998047, "step": 45255 }, { "epoch": 0.1516, "grad_norm": 6.180167198181152, "learning_rate": 2.76469696969697e-06, "loss": 6.3065650939941404, "step": 45260 }, { "epoch": 0.15165, "grad_norm": 5.529297351837158, "learning_rate": 2.7644444444444446e-06, "loss": 6.312202072143554, "step": 45265 }, { "epoch": 0.1517, "grad_norm": 12.882007598876953, "learning_rate": 2.764191919191919e-06, "loss": 6.265976333618164, "step": 45270 }, { "epoch": 0.15175, "grad_norm": 8.22797966003418, "learning_rate": 2.7639393939393942e-06, "loss": 6.2194572448730465, "step": 45275 }, { "epoch": 0.1518, "grad_norm": 13.162986755371094, "learning_rate": 2.763686868686869e-06, "loss": 6.185873413085938, "step": 45280 }, { "epoch": 0.15185, "grad_norm": 67.87120819091797, "learning_rate": 2.7634343434343435e-06, "loss": 6.2225791931152346, "step": 45285 }, { "epoch": 0.1519, "grad_norm": 5.333173751831055, "learning_rate": 2.763181818181818e-06, "loss": 6.284839630126953, "step": 45290 }, { "epoch": 0.15195, "grad_norm": 11.018622398376465, "learning_rate": 2.762929292929293e-06, "loss": 6.264982604980469, "step": 45295 }, { "epoch": 0.152, "grad_norm": 5.303316116333008, "learning_rate": 2.762676767676768e-06, "loss": 6.280719757080078, "step": 45300 }, { "epoch": 0.15205, "grad_norm": 6.629040241241455, "learning_rate": 2.7624242424242425e-06, "loss": 6.29632797241211, "step": 45305 }, { "epoch": 0.1521, "grad_norm": 26.659711837768555, "learning_rate": 2.762171717171717e-06, "loss": 6.596957397460938, "step": 45310 }, { "epoch": 0.15215, "grad_norm": 8.190544128417969, "learning_rate": 2.761919191919192e-06, "loss": 6.350960159301758, "step": 45315 }, { "epoch": 0.1522, "grad_norm": 6.476108551025391, "learning_rate": 2.7616666666666668e-06, "loss": 6.311851119995117, "step": 45320 }, { "epoch": 0.15225, "grad_norm": 4.367987632751465, "learning_rate": 2.7614141414141414e-06, "loss": 6.323622131347657, "step": 45325 }, { "epoch": 0.1523, "grad_norm": 6.251415729522705, "learning_rate": 2.761161616161616e-06, "loss": 6.227555847167968, "step": 45330 }, { "epoch": 0.15235, "grad_norm": 7.247132301330566, "learning_rate": 2.7609090909090915e-06, "loss": 6.225238418579101, "step": 45335 }, { "epoch": 0.1524, "grad_norm": 9.7341890335083, "learning_rate": 2.760656565656566e-06, "loss": 6.291847610473633, "step": 45340 }, { "epoch": 0.15245, "grad_norm": 7.029014587402344, "learning_rate": 2.7604040404040403e-06, "loss": 6.259883880615234, "step": 45345 }, { "epoch": 0.1525, "grad_norm": 6.426142692565918, "learning_rate": 2.760151515151515e-06, "loss": 6.2796272277832035, "step": 45350 }, { "epoch": 0.15255, "grad_norm": 3.514996290206909, "learning_rate": 2.7598989898989904e-06, "loss": 6.342458724975586, "step": 45355 }, { "epoch": 0.1526, "grad_norm": 4.9895429611206055, "learning_rate": 2.759646464646465e-06, "loss": 6.268963623046875, "step": 45360 }, { "epoch": 0.15265, "grad_norm": 4.495156764984131, "learning_rate": 2.7593939393939397e-06, "loss": 6.231523895263672, "step": 45365 }, { "epoch": 0.1527, "grad_norm": 5.451263904571533, "learning_rate": 2.759141414141414e-06, "loss": 6.2554584503173825, "step": 45370 }, { "epoch": 0.15275, "grad_norm": 5.487541675567627, "learning_rate": 2.7588888888888894e-06, "loss": 6.2924652099609375, "step": 45375 }, { "epoch": 0.1528, "grad_norm": 8.630091667175293, "learning_rate": 2.758636363636364e-06, "loss": 6.392219543457031, "step": 45380 }, { "epoch": 0.15285, "grad_norm": 7.646829605102539, "learning_rate": 2.7583838383838387e-06, "loss": 6.255488586425781, "step": 45385 }, { "epoch": 0.1529, "grad_norm": 6.81560754776001, "learning_rate": 2.7581313131313133e-06, "loss": 6.266620635986328, "step": 45390 }, { "epoch": 0.15295, "grad_norm": 5.639686584472656, "learning_rate": 2.7578787878787883e-06, "loss": 6.168790817260742, "step": 45395 }, { "epoch": 0.153, "grad_norm": 6.298264980316162, "learning_rate": 2.757626262626263e-06, "loss": 6.20983772277832, "step": 45400 }, { "epoch": 0.15305, "grad_norm": 5.564199447631836, "learning_rate": 2.7573737373737376e-06, "loss": 6.249552917480469, "step": 45405 }, { "epoch": 0.1531, "grad_norm": 10.35019588470459, "learning_rate": 2.7571212121212122e-06, "loss": 6.279130554199218, "step": 45410 }, { "epoch": 0.15315, "grad_norm": 9.485368728637695, "learning_rate": 2.7568686868686873e-06, "loss": 6.275108718872071, "step": 45415 }, { "epoch": 0.1532, "grad_norm": 6.720374584197998, "learning_rate": 2.756616161616162e-06, "loss": 6.238172912597657, "step": 45420 }, { "epoch": 0.15325, "grad_norm": 6.255117416381836, "learning_rate": 2.7563636363636365e-06, "loss": 6.263439178466797, "step": 45425 }, { "epoch": 0.1533, "grad_norm": 5.569790840148926, "learning_rate": 2.756111111111111e-06, "loss": 6.250357818603516, "step": 45430 }, { "epoch": 0.15335, "grad_norm": 4.797764301300049, "learning_rate": 2.7558585858585862e-06, "loss": 6.244092559814453, "step": 45435 }, { "epoch": 0.1534, "grad_norm": 6.796642780303955, "learning_rate": 2.755606060606061e-06, "loss": 6.285055541992188, "step": 45440 }, { "epoch": 0.15345, "grad_norm": 6.612273693084717, "learning_rate": 2.7553535353535355e-06, "loss": 6.335058975219726, "step": 45445 }, { "epoch": 0.1535, "grad_norm": 17.889570236206055, "learning_rate": 2.75510101010101e-06, "loss": 6.4569145202636715, "step": 45450 }, { "epoch": 0.15355, "grad_norm": 7.3726806640625, "learning_rate": 2.754848484848485e-06, "loss": 6.290710830688477, "step": 45455 }, { "epoch": 0.1536, "grad_norm": 9.082775115966797, "learning_rate": 2.75459595959596e-06, "loss": 6.3030242919921875, "step": 45460 }, { "epoch": 0.15365, "grad_norm": 7.351465225219727, "learning_rate": 2.7543434343434344e-06, "loss": 6.260487365722656, "step": 45465 }, { "epoch": 0.1537, "grad_norm": 6.87325382232666, "learning_rate": 2.7540909090909095e-06, "loss": 6.271580505371094, "step": 45470 }, { "epoch": 0.15375, "grad_norm": 5.484936237335205, "learning_rate": 2.753838383838384e-06, "loss": 6.2407676696777346, "step": 45475 }, { "epoch": 0.1538, "grad_norm": 6.882890224456787, "learning_rate": 2.7535858585858587e-06, "loss": 6.254702377319336, "step": 45480 }, { "epoch": 0.15385, "grad_norm": 10.251642227172852, "learning_rate": 2.7533333333333334e-06, "loss": 6.294542694091797, "step": 45485 }, { "epoch": 0.1539, "grad_norm": 4.068947792053223, "learning_rate": 2.7530808080808084e-06, "loss": 6.280735778808594, "step": 45490 }, { "epoch": 0.15395, "grad_norm": 7.629263401031494, "learning_rate": 2.752828282828283e-06, "loss": 6.220434951782226, "step": 45495 }, { "epoch": 0.154, "grad_norm": 26.64723014831543, "learning_rate": 2.7525757575757577e-06, "loss": 6.613409423828125, "step": 45500 }, { "epoch": 0.15405, "grad_norm": 5.593696594238281, "learning_rate": 2.7523232323232323e-06, "loss": 6.669633483886718, "step": 45505 }, { "epoch": 0.1541, "grad_norm": 4.921168804168701, "learning_rate": 2.7520707070707074e-06, "loss": 6.241437530517578, "step": 45510 }, { "epoch": 0.15415, "grad_norm": 5.356301784515381, "learning_rate": 2.751818181818182e-06, "loss": 6.254882049560547, "step": 45515 }, { "epoch": 0.1542, "grad_norm": 11.69173812866211, "learning_rate": 2.7515656565656566e-06, "loss": 6.300406646728516, "step": 45520 }, { "epoch": 0.15425, "grad_norm": 7.191143035888672, "learning_rate": 2.7513131313131313e-06, "loss": 6.289567184448242, "step": 45525 }, { "epoch": 0.1543, "grad_norm": 4.27197790145874, "learning_rate": 2.7510606060606067e-06, "loss": 6.267335510253906, "step": 45530 }, { "epoch": 0.15435, "grad_norm": 4.36561918258667, "learning_rate": 2.750808080808081e-06, "loss": 6.295677947998047, "step": 45535 }, { "epoch": 0.1544, "grad_norm": 16.01382827758789, "learning_rate": 2.7505555555555556e-06, "loss": 6.234916305541992, "step": 45540 }, { "epoch": 0.15445, "grad_norm": 13.397002220153809, "learning_rate": 2.75030303030303e-06, "loss": 6.216877365112305, "step": 45545 }, { "epoch": 0.1545, "grad_norm": 6.915647029876709, "learning_rate": 2.7500505050505057e-06, "loss": 6.292382431030274, "step": 45550 }, { "epoch": 0.15455, "grad_norm": 7.948633670806885, "learning_rate": 2.7497979797979803e-06, "loss": 6.255560302734375, "step": 45555 }, { "epoch": 0.1546, "grad_norm": 5.448237419128418, "learning_rate": 2.749545454545455e-06, "loss": 6.178861236572265, "step": 45560 }, { "epoch": 0.15465, "grad_norm": 8.465738296508789, "learning_rate": 2.749292929292929e-06, "loss": 6.2208820343017575, "step": 45565 }, { "epoch": 0.1547, "grad_norm": 5.620838642120361, "learning_rate": 2.7490404040404046e-06, "loss": 6.2233123779296875, "step": 45570 }, { "epoch": 0.15475, "grad_norm": 8.690142631530762, "learning_rate": 2.7487878787878793e-06, "loss": 6.255314636230469, "step": 45575 }, { "epoch": 0.1548, "grad_norm": 3.8793811798095703, "learning_rate": 2.748535353535354e-06, "loss": 6.253153991699219, "step": 45580 }, { "epoch": 0.15485, "grad_norm": 13.073173522949219, "learning_rate": 2.7482828282828285e-06, "loss": 6.336008834838867, "step": 45585 }, { "epoch": 0.1549, "grad_norm": 6.635429382324219, "learning_rate": 2.7480303030303036e-06, "loss": 6.266962051391602, "step": 45590 }, { "epoch": 0.15495, "grad_norm": 48.43461608886719, "learning_rate": 2.747777777777778e-06, "loss": 6.5077972412109375, "step": 45595 }, { "epoch": 0.155, "grad_norm": 6.610507965087891, "learning_rate": 2.747525252525253e-06, "loss": 6.36711311340332, "step": 45600 }, { "epoch": 0.15505, "grad_norm": 6.2888078689575195, "learning_rate": 2.7472727272727275e-06, "loss": 6.234458541870117, "step": 45605 }, { "epoch": 0.1551, "grad_norm": 6.085524559020996, "learning_rate": 2.7470202020202025e-06, "loss": 6.253709411621093, "step": 45610 }, { "epoch": 0.15515, "grad_norm": 5.913668632507324, "learning_rate": 2.746767676767677e-06, "loss": 6.272125244140625, "step": 45615 }, { "epoch": 0.1552, "grad_norm": 7.603267192840576, "learning_rate": 2.7465151515151518e-06, "loss": 6.2269947052001955, "step": 45620 }, { "epoch": 0.15525, "grad_norm": 11.403824806213379, "learning_rate": 2.7462626262626264e-06, "loss": 6.269118881225586, "step": 45625 }, { "epoch": 0.1553, "grad_norm": 9.85342025756836, "learning_rate": 2.7460101010101015e-06, "loss": 6.321148681640625, "step": 45630 }, { "epoch": 0.15535, "grad_norm": 6.315896034240723, "learning_rate": 2.745757575757576e-06, "loss": 6.268572998046875, "step": 45635 }, { "epoch": 0.1554, "grad_norm": 9.007213592529297, "learning_rate": 2.7455050505050507e-06, "loss": 6.246690368652343, "step": 45640 }, { "epoch": 0.15545, "grad_norm": 6.744345188140869, "learning_rate": 2.7452525252525253e-06, "loss": 6.26294174194336, "step": 45645 }, { "epoch": 0.1555, "grad_norm": 6.048826217651367, "learning_rate": 2.7450000000000004e-06, "loss": 6.2901763916015625, "step": 45650 }, { "epoch": 0.15555, "grad_norm": 6.296490669250488, "learning_rate": 2.744747474747475e-06, "loss": 6.2590576171875, "step": 45655 }, { "epoch": 0.1556, "grad_norm": 6.531980514526367, "learning_rate": 2.7444949494949497e-06, "loss": 6.221875, "step": 45660 }, { "epoch": 0.15565, "grad_norm": 7.764468669891357, "learning_rate": 2.7442424242424243e-06, "loss": 6.314701843261719, "step": 45665 }, { "epoch": 0.1557, "grad_norm": 29.59735870361328, "learning_rate": 2.7439898989898993e-06, "loss": 6.324686431884766, "step": 45670 }, { "epoch": 0.15575, "grad_norm": 5.054245948791504, "learning_rate": 2.743737373737374e-06, "loss": 6.258861541748047, "step": 45675 }, { "epoch": 0.1558, "grad_norm": 4.995138168334961, "learning_rate": 2.7434848484848486e-06, "loss": 6.239112472534179, "step": 45680 }, { "epoch": 0.15585, "grad_norm": 4.692570686340332, "learning_rate": 2.7432323232323232e-06, "loss": 6.215349578857422, "step": 45685 }, { "epoch": 0.1559, "grad_norm": 7.817834854125977, "learning_rate": 2.7429797979797983e-06, "loss": 6.267639923095703, "step": 45690 }, { "epoch": 0.15595, "grad_norm": 15.695502281188965, "learning_rate": 2.742727272727273e-06, "loss": 6.249320602416992, "step": 45695 }, { "epoch": 0.156, "grad_norm": 4.694500923156738, "learning_rate": 2.7424747474747475e-06, "loss": 6.323519134521485, "step": 45700 }, { "epoch": 0.15605, "grad_norm": 3.49898362159729, "learning_rate": 2.742222222222222e-06, "loss": 6.261234283447266, "step": 45705 }, { "epoch": 0.1561, "grad_norm": 5.940157413482666, "learning_rate": 2.7419696969696972e-06, "loss": 6.254181671142578, "step": 45710 }, { "epoch": 0.15615, "grad_norm": 6.628899574279785, "learning_rate": 2.741717171717172e-06, "loss": 6.259916687011719, "step": 45715 }, { "epoch": 0.1562, "grad_norm": 7.390396595001221, "learning_rate": 2.7414646464646465e-06, "loss": 6.349094390869141, "step": 45720 }, { "epoch": 0.15625, "grad_norm": 5.810349941253662, "learning_rate": 2.741212121212121e-06, "loss": 6.248091125488282, "step": 45725 }, { "epoch": 0.1563, "grad_norm": 8.089156150817871, "learning_rate": 2.740959595959596e-06, "loss": 6.272583389282227, "step": 45730 }, { "epoch": 0.15635, "grad_norm": 5.500136852264404, "learning_rate": 2.740707070707071e-06, "loss": 6.26732063293457, "step": 45735 }, { "epoch": 0.1564, "grad_norm": 6.426730632781982, "learning_rate": 2.7404545454545454e-06, "loss": 6.217781066894531, "step": 45740 }, { "epoch": 0.15645, "grad_norm": 3.88439679145813, "learning_rate": 2.74020202020202e-06, "loss": 6.236204528808594, "step": 45745 }, { "epoch": 0.1565, "grad_norm": 7.631826400756836, "learning_rate": 2.7399494949494955e-06, "loss": 6.250545501708984, "step": 45750 }, { "epoch": 0.15655, "grad_norm": 21.547183990478516, "learning_rate": 2.73969696969697e-06, "loss": 6.709126281738281, "step": 45755 }, { "epoch": 0.1566, "grad_norm": 5.165591239929199, "learning_rate": 2.7394444444444444e-06, "loss": 6.216460418701172, "step": 45760 }, { "epoch": 0.15665, "grad_norm": 10.562091827392578, "learning_rate": 2.739191919191919e-06, "loss": 6.228884887695313, "step": 45765 }, { "epoch": 0.1567, "grad_norm": 6.778519630432129, "learning_rate": 2.7389393939393945e-06, "loss": 6.656229400634766, "step": 45770 }, { "epoch": 0.15675, "grad_norm": 8.265875816345215, "learning_rate": 2.738686868686869e-06, "loss": 6.351311111450196, "step": 45775 }, { "epoch": 0.1568, "grad_norm": 5.699514865875244, "learning_rate": 2.7384343434343437e-06, "loss": 6.263597106933593, "step": 45780 }, { "epoch": 0.15685, "grad_norm": 9.122086524963379, "learning_rate": 2.738181818181818e-06, "loss": 6.246063613891602, "step": 45785 }, { "epoch": 0.1569, "grad_norm": 7.526129722595215, "learning_rate": 2.7379292929292934e-06, "loss": 6.285683441162109, "step": 45790 }, { "epoch": 0.15695, "grad_norm": 5.7187819480896, "learning_rate": 2.737676767676768e-06, "loss": 6.271430206298828, "step": 45795 }, { "epoch": 0.157, "grad_norm": 6.6965556144714355, "learning_rate": 2.7374242424242427e-06, "loss": 6.417866516113281, "step": 45800 }, { "epoch": 0.15705, "grad_norm": 5.017265796661377, "learning_rate": 2.7371717171717173e-06, "loss": 6.306255340576172, "step": 45805 }, { "epoch": 0.1571, "grad_norm": 5.243404388427734, "learning_rate": 2.7369191919191924e-06, "loss": 6.342928314208985, "step": 45810 }, { "epoch": 0.15715, "grad_norm": 8.395263671875, "learning_rate": 2.736666666666667e-06, "loss": 6.263788223266602, "step": 45815 }, { "epoch": 0.1572, "grad_norm": 4.96975564956665, "learning_rate": 2.7364141414141416e-06, "loss": 6.277923583984375, "step": 45820 }, { "epoch": 0.15725, "grad_norm": 5.136043071746826, "learning_rate": 2.7361616161616163e-06, "loss": 6.234904479980469, "step": 45825 }, { "epoch": 0.1573, "grad_norm": 4.559699535369873, "learning_rate": 2.7359090909090913e-06, "loss": 6.183169937133789, "step": 45830 }, { "epoch": 0.15735, "grad_norm": 5.624218463897705, "learning_rate": 2.735656565656566e-06, "loss": 6.2541145324707035, "step": 45835 }, { "epoch": 0.1574, "grad_norm": 6.570965766906738, "learning_rate": 2.7354040404040406e-06, "loss": 6.269226455688477, "step": 45840 }, { "epoch": 0.15745, "grad_norm": 5.847921371459961, "learning_rate": 2.735151515151515e-06, "loss": 6.253045654296875, "step": 45845 }, { "epoch": 0.1575, "grad_norm": 5.5157318115234375, "learning_rate": 2.7348989898989903e-06, "loss": 6.296728134155273, "step": 45850 }, { "epoch": 0.15755, "grad_norm": 9.268404960632324, "learning_rate": 2.734646464646465e-06, "loss": 6.199578094482422, "step": 45855 }, { "epoch": 0.1576, "grad_norm": 7.728217601776123, "learning_rate": 2.7343939393939395e-06, "loss": 6.265856170654297, "step": 45860 }, { "epoch": 0.15765, "grad_norm": 3.7824366092681885, "learning_rate": 2.734141414141414e-06, "loss": 6.223073577880859, "step": 45865 }, { "epoch": 0.1577, "grad_norm": 7.480978965759277, "learning_rate": 2.733888888888889e-06, "loss": 6.311048126220703, "step": 45870 }, { "epoch": 0.15775, "grad_norm": 3.9733169078826904, "learning_rate": 2.733636363636364e-06, "loss": 6.292192840576172, "step": 45875 }, { "epoch": 0.1578, "grad_norm": 5.505402088165283, "learning_rate": 2.7333838383838385e-06, "loss": 6.269226837158203, "step": 45880 }, { "epoch": 0.15785, "grad_norm": 5.96746826171875, "learning_rate": 2.733131313131313e-06, "loss": 6.282860183715821, "step": 45885 }, { "epoch": 0.1579, "grad_norm": 6.947137355804443, "learning_rate": 2.732878787878788e-06, "loss": 6.267567825317383, "step": 45890 }, { "epoch": 0.15795, "grad_norm": 6.577126979827881, "learning_rate": 2.7326262626262628e-06, "loss": 6.30109977722168, "step": 45895 }, { "epoch": 0.158, "grad_norm": 5.820128440856934, "learning_rate": 2.7323737373737374e-06, "loss": 6.259189224243164, "step": 45900 }, { "epoch": 0.15805, "grad_norm": 5.726507663726807, "learning_rate": 2.7321212121212125e-06, "loss": 6.266120910644531, "step": 45905 }, { "epoch": 0.1581, "grad_norm": 10.277961730957031, "learning_rate": 2.731868686868687e-06, "loss": 6.209300994873047, "step": 45910 }, { "epoch": 0.15815, "grad_norm": 3.499379873275757, "learning_rate": 2.7316161616161617e-06, "loss": 6.3102569580078125, "step": 45915 }, { "epoch": 0.1582, "grad_norm": 5.9250168800354, "learning_rate": 2.7313636363636364e-06, "loss": 6.300728607177734, "step": 45920 }, { "epoch": 0.15825, "grad_norm": 4.585822582244873, "learning_rate": 2.7311111111111114e-06, "loss": 6.267549896240235, "step": 45925 }, { "epoch": 0.1583, "grad_norm": 4.158531665802002, "learning_rate": 2.730858585858586e-06, "loss": 6.215413665771484, "step": 45930 }, { "epoch": 0.15835, "grad_norm": 5.687535762786865, "learning_rate": 2.7306060606060607e-06, "loss": 6.2782642364501955, "step": 45935 }, { "epoch": 0.1584, "grad_norm": 9.52270793914795, "learning_rate": 2.7303535353535353e-06, "loss": 6.329633331298828, "step": 45940 }, { "epoch": 0.15845, "grad_norm": 5.664690971374512, "learning_rate": 2.7301010101010108e-06, "loss": 6.269956970214844, "step": 45945 }, { "epoch": 0.1585, "grad_norm": 6.1131439208984375, "learning_rate": 2.729848484848485e-06, "loss": 6.26043472290039, "step": 45950 }, { "epoch": 0.15855, "grad_norm": 7.068819522857666, "learning_rate": 2.7295959595959596e-06, "loss": 6.256476211547851, "step": 45955 }, { "epoch": 0.1586, "grad_norm": 8.931595802307129, "learning_rate": 2.7293434343434342e-06, "loss": 6.278238677978516, "step": 45960 }, { "epoch": 0.15865, "grad_norm": 7.492035865783691, "learning_rate": 2.7290909090909097e-06, "loss": 6.347361373901367, "step": 45965 }, { "epoch": 0.1587, "grad_norm": 8.39216423034668, "learning_rate": 2.7288383838383844e-06, "loss": 6.2448680877685545, "step": 45970 }, { "epoch": 0.15875, "grad_norm": 6.067298412322998, "learning_rate": 2.728585858585859e-06, "loss": 6.320822906494141, "step": 45975 }, { "epoch": 0.1588, "grad_norm": 7.287589073181152, "learning_rate": 2.728333333333333e-06, "loss": 6.277625656127929, "step": 45980 }, { "epoch": 0.15885, "grad_norm": 4.870453834533691, "learning_rate": 2.7280808080808087e-06, "loss": 6.280265808105469, "step": 45985 }, { "epoch": 0.1589, "grad_norm": 5.409716606140137, "learning_rate": 2.7278282828282833e-06, "loss": 6.285784149169922, "step": 45990 }, { "epoch": 0.15895, "grad_norm": 7.923301696777344, "learning_rate": 2.727575757575758e-06, "loss": 6.268531417846679, "step": 45995 }, { "epoch": 0.159, "grad_norm": 7.195972919464111, "learning_rate": 2.7273232323232326e-06, "loss": 6.256218719482422, "step": 46000 }, { "epoch": 0.15905, "grad_norm": 7.993651390075684, "learning_rate": 2.7270707070707076e-06, "loss": 6.295466613769531, "step": 46005 }, { "epoch": 0.1591, "grad_norm": 16.022260665893555, "learning_rate": 2.7268181818181822e-06, "loss": 6.342257308959961, "step": 46010 }, { "epoch": 0.15915, "grad_norm": 6.131389617919922, "learning_rate": 2.726565656565657e-06, "loss": 6.533112335205078, "step": 46015 }, { "epoch": 0.1592, "grad_norm": 6.485421657562256, "learning_rate": 2.7263131313131315e-06, "loss": 6.297810363769531, "step": 46020 }, { "epoch": 0.15925, "grad_norm": 3.9858853816986084, "learning_rate": 2.7260606060606066e-06, "loss": 6.256057357788086, "step": 46025 }, { "epoch": 0.1593, "grad_norm": 5.856050491333008, "learning_rate": 2.725808080808081e-06, "loss": 6.294332885742188, "step": 46030 }, { "epoch": 0.15935, "grad_norm": 6.426441669464111, "learning_rate": 2.725555555555556e-06, "loss": 6.292838287353516, "step": 46035 }, { "epoch": 0.1594, "grad_norm": 7.87262487411499, "learning_rate": 2.7253030303030304e-06, "loss": 6.289400100708008, "step": 46040 }, { "epoch": 0.15945, "grad_norm": 9.292779922485352, "learning_rate": 2.7250505050505055e-06, "loss": 6.225447082519532, "step": 46045 }, { "epoch": 0.1595, "grad_norm": 6.177308082580566, "learning_rate": 2.72479797979798e-06, "loss": 6.246398162841797, "step": 46050 }, { "epoch": 0.15955, "grad_norm": 6.297784805297852, "learning_rate": 2.7245454545454548e-06, "loss": 6.312096786499024, "step": 46055 }, { "epoch": 0.1596, "grad_norm": 5.0504536628723145, "learning_rate": 2.7242929292929294e-06, "loss": 6.276942443847656, "step": 46060 }, { "epoch": 0.15965, "grad_norm": 4.5579833984375, "learning_rate": 2.7240404040404044e-06, "loss": 6.248768615722656, "step": 46065 }, { "epoch": 0.1597, "grad_norm": 10.091920852661133, "learning_rate": 2.723787878787879e-06, "loss": 6.246856689453125, "step": 46070 }, { "epoch": 0.15975, "grad_norm": 4.116532325744629, "learning_rate": 2.7235353535353537e-06, "loss": 6.292824935913086, "step": 46075 }, { "epoch": 0.1598, "grad_norm": 3.8468024730682373, "learning_rate": 2.7232828282828283e-06, "loss": 6.350613403320312, "step": 46080 }, { "epoch": 0.15985, "grad_norm": 8.178323745727539, "learning_rate": 2.7230303030303034e-06, "loss": 6.271936416625977, "step": 46085 }, { "epoch": 0.1599, "grad_norm": 5.0372114181518555, "learning_rate": 2.722777777777778e-06, "loss": 6.393745422363281, "step": 46090 }, { "epoch": 0.15995, "grad_norm": 5.450539588928223, "learning_rate": 2.7225252525252526e-06, "loss": 6.283059692382812, "step": 46095 }, { "epoch": 0.16, "grad_norm": 4.793132781982422, "learning_rate": 2.7222727272727273e-06, "loss": 6.287249755859375, "step": 46100 }, { "epoch": 0.16005, "grad_norm": 25.666645050048828, "learning_rate": 2.7220202020202023e-06, "loss": 6.357717514038086, "step": 46105 }, { "epoch": 0.1601, "grad_norm": 7.385717868804932, "learning_rate": 2.721767676767677e-06, "loss": 6.356494903564453, "step": 46110 }, { "epoch": 0.16015, "grad_norm": 3.884598731994629, "learning_rate": 2.7215151515151516e-06, "loss": 6.232147598266602, "step": 46115 }, { "epoch": 0.1602, "grad_norm": 8.309121131896973, "learning_rate": 2.7212626262626262e-06, "loss": 6.267416763305664, "step": 46120 }, { "epoch": 0.16025, "grad_norm": 8.813039779663086, "learning_rate": 2.7210101010101013e-06, "loss": 6.277158737182617, "step": 46125 }, { "epoch": 0.1603, "grad_norm": 6.706832408905029, "learning_rate": 2.720757575757576e-06, "loss": 6.2300361633300785, "step": 46130 }, { "epoch": 0.16035, "grad_norm": 9.096871376037598, "learning_rate": 2.7205050505050505e-06, "loss": 6.253242492675781, "step": 46135 }, { "epoch": 0.1604, "grad_norm": 5.244923114776611, "learning_rate": 2.720252525252525e-06, "loss": 6.246356201171875, "step": 46140 }, { "epoch": 0.16045, "grad_norm": 5.208483695983887, "learning_rate": 2.7200000000000002e-06, "loss": 6.311319732666016, "step": 46145 }, { "epoch": 0.1605, "grad_norm": 4.70627498626709, "learning_rate": 2.719747474747475e-06, "loss": 6.2824653625488285, "step": 46150 }, { "epoch": 0.16055, "grad_norm": 3.743726968765259, "learning_rate": 2.7194949494949495e-06, "loss": 6.350189208984375, "step": 46155 }, { "epoch": 0.1606, "grad_norm": 3.733832597732544, "learning_rate": 2.719242424242424e-06, "loss": 6.247168350219726, "step": 46160 }, { "epoch": 0.16065, "grad_norm": 10.818426132202148, "learning_rate": 2.7189898989898996e-06, "loss": 6.2261615753173825, "step": 46165 }, { "epoch": 0.1607, "grad_norm": 8.118481636047363, "learning_rate": 2.7187373737373742e-06, "loss": 6.248129653930664, "step": 46170 }, { "epoch": 0.16075, "grad_norm": 6.7805094718933105, "learning_rate": 2.7184848484848484e-06, "loss": 6.222587966918946, "step": 46175 }, { "epoch": 0.1608, "grad_norm": 8.526678085327148, "learning_rate": 2.718232323232323e-06, "loss": 6.325622177124023, "step": 46180 }, { "epoch": 0.16085, "grad_norm": 5.2164106369018555, "learning_rate": 2.7179797979797985e-06, "loss": 6.2672370910644535, "step": 46185 }, { "epoch": 0.1609, "grad_norm": 7.349819183349609, "learning_rate": 2.717727272727273e-06, "loss": 6.307279586791992, "step": 46190 }, { "epoch": 0.16095, "grad_norm": 4.121012210845947, "learning_rate": 2.7174747474747478e-06, "loss": 6.305840301513672, "step": 46195 }, { "epoch": 0.161, "grad_norm": 9.100900650024414, "learning_rate": 2.717222222222222e-06, "loss": 6.2936759948730465, "step": 46200 }, { "epoch": 0.16105, "grad_norm": 5.641716957092285, "learning_rate": 2.7169696969696975e-06, "loss": 6.263376617431641, "step": 46205 }, { "epoch": 0.1611, "grad_norm": 6.1295623779296875, "learning_rate": 2.716717171717172e-06, "loss": 6.286444473266601, "step": 46210 }, { "epoch": 0.16115, "grad_norm": 7.149283409118652, "learning_rate": 2.7164646464646467e-06, "loss": 6.286502075195313, "step": 46215 }, { "epoch": 0.1612, "grad_norm": 3.362788677215576, "learning_rate": 2.7162121212121214e-06, "loss": 6.389106750488281, "step": 46220 }, { "epoch": 0.16125, "grad_norm": 6.165426254272461, "learning_rate": 2.7159595959595964e-06, "loss": 6.27380485534668, "step": 46225 }, { "epoch": 0.1613, "grad_norm": 8.27341365814209, "learning_rate": 2.715707070707071e-06, "loss": 6.287337875366211, "step": 46230 }, { "epoch": 0.16135, "grad_norm": 5.109869003295898, "learning_rate": 2.7154545454545457e-06, "loss": 6.27685775756836, "step": 46235 }, { "epoch": 0.1614, "grad_norm": 3.7289345264434814, "learning_rate": 2.7152020202020203e-06, "loss": 6.239474487304688, "step": 46240 }, { "epoch": 0.16145, "grad_norm": 14.128501892089844, "learning_rate": 2.7149494949494954e-06, "loss": 6.332586669921875, "step": 46245 }, { "epoch": 0.1615, "grad_norm": 4.3774733543396, "learning_rate": 2.71469696969697e-06, "loss": 6.323312759399414, "step": 46250 }, { "epoch": 0.16155, "grad_norm": 5.424648284912109, "learning_rate": 2.7144444444444446e-06, "loss": 6.408489227294922, "step": 46255 }, { "epoch": 0.1616, "grad_norm": 4.015381336212158, "learning_rate": 2.7141919191919192e-06, "loss": 6.254777908325195, "step": 46260 }, { "epoch": 0.16165, "grad_norm": 4.6576738357543945, "learning_rate": 2.7139393939393943e-06, "loss": 6.282418823242187, "step": 46265 }, { "epoch": 0.1617, "grad_norm": 5.81601095199585, "learning_rate": 2.713686868686869e-06, "loss": 6.305488967895508, "step": 46270 }, { "epoch": 0.16175, "grad_norm": 7.3689284324646, "learning_rate": 2.7134343434343436e-06, "loss": 6.216822814941406, "step": 46275 }, { "epoch": 0.1618, "grad_norm": 5.484920978546143, "learning_rate": 2.713181818181818e-06, "loss": 6.2383983612060545, "step": 46280 }, { "epoch": 0.16185, "grad_norm": 7.165423393249512, "learning_rate": 2.7129292929292932e-06, "loss": 6.4825439453125, "step": 46285 }, { "epoch": 0.1619, "grad_norm": 7.157301902770996, "learning_rate": 2.712676767676768e-06, "loss": 6.36384162902832, "step": 46290 }, { "epoch": 0.16195, "grad_norm": 11.404439926147461, "learning_rate": 2.7124242424242425e-06, "loss": 6.2334739685058596, "step": 46295 }, { "epoch": 0.162, "grad_norm": 6.00426721572876, "learning_rate": 2.712171717171717e-06, "loss": 6.207920837402344, "step": 46300 }, { "epoch": 0.16205, "grad_norm": 7.8412885665893555, "learning_rate": 2.711919191919192e-06, "loss": 6.265785980224609, "step": 46305 }, { "epoch": 0.1621, "grad_norm": 5.582731246948242, "learning_rate": 2.711666666666667e-06, "loss": 6.303151321411133, "step": 46310 }, { "epoch": 0.16215, "grad_norm": 7.075326919555664, "learning_rate": 2.7114141414141415e-06, "loss": 6.292263031005859, "step": 46315 }, { "epoch": 0.1622, "grad_norm": 5.8795576095581055, "learning_rate": 2.7111616161616165e-06, "loss": 6.288729858398438, "step": 46320 }, { "epoch": 0.16225, "grad_norm": 19.464229583740234, "learning_rate": 2.710909090909091e-06, "loss": 6.702177429199219, "step": 46325 }, { "epoch": 0.1623, "grad_norm": 11.204289436340332, "learning_rate": 2.7106565656565658e-06, "loss": 6.297915267944336, "step": 46330 }, { "epoch": 0.16235, "grad_norm": 4.657784938812256, "learning_rate": 2.7104040404040404e-06, "loss": 6.239188385009766, "step": 46335 }, { "epoch": 0.1624, "grad_norm": 7.413614749908447, "learning_rate": 2.7101515151515154e-06, "loss": 6.263623809814453, "step": 46340 }, { "epoch": 0.16245, "grad_norm": 6.949495792388916, "learning_rate": 2.70989898989899e-06, "loss": 6.19805679321289, "step": 46345 }, { "epoch": 0.1625, "grad_norm": 11.199972152709961, "learning_rate": 2.7096464646464647e-06, "loss": 6.245381164550781, "step": 46350 }, { "epoch": 0.16255, "grad_norm": 6.587199687957764, "learning_rate": 2.7093939393939393e-06, "loss": 6.240771484375, "step": 46355 }, { "epoch": 0.1626, "grad_norm": 4.83115816116333, "learning_rate": 2.709141414141415e-06, "loss": 6.239324951171875, "step": 46360 }, { "epoch": 0.16265, "grad_norm": 6.9347710609436035, "learning_rate": 2.708888888888889e-06, "loss": 6.276221084594726, "step": 46365 }, { "epoch": 0.1627, "grad_norm": 7.538914203643799, "learning_rate": 2.7086363636363637e-06, "loss": 6.307289123535156, "step": 46370 }, { "epoch": 0.16275, "grad_norm": 4.4879913330078125, "learning_rate": 2.7083838383838383e-06, "loss": 6.330704116821289, "step": 46375 }, { "epoch": 0.1628, "grad_norm": 3.4831314086914062, "learning_rate": 2.7081313131313138e-06, "loss": 6.28766975402832, "step": 46380 }, { "epoch": 0.16285, "grad_norm": 4.245772838592529, "learning_rate": 2.7078787878787884e-06, "loss": 6.267551422119141, "step": 46385 }, { "epoch": 0.1629, "grad_norm": 6.916069984436035, "learning_rate": 2.707626262626263e-06, "loss": 6.199790954589844, "step": 46390 }, { "epoch": 0.16295, "grad_norm": 5.316991329193115, "learning_rate": 2.7073737373737372e-06, "loss": 6.310803604125977, "step": 46395 }, { "epoch": 0.163, "grad_norm": 7.9739227294921875, "learning_rate": 2.7071212121212127e-06, "loss": 6.299285888671875, "step": 46400 }, { "epoch": 0.16305, "grad_norm": 15.130101203918457, "learning_rate": 2.7068686868686873e-06, "loss": 6.364262390136719, "step": 46405 }, { "epoch": 0.1631, "grad_norm": 8.641922950744629, "learning_rate": 2.706616161616162e-06, "loss": 6.326145172119141, "step": 46410 }, { "epoch": 0.16315, "grad_norm": 5.703354835510254, "learning_rate": 2.7063636363636366e-06, "loss": 6.321566772460938, "step": 46415 }, { "epoch": 0.1632, "grad_norm": 6.1122145652771, "learning_rate": 2.7061111111111116e-06, "loss": 6.326084136962891, "step": 46420 }, { "epoch": 0.16325, "grad_norm": 4.721333026885986, "learning_rate": 2.7058585858585863e-06, "loss": 6.24793701171875, "step": 46425 }, { "epoch": 0.1633, "grad_norm": 8.024808883666992, "learning_rate": 2.705606060606061e-06, "loss": 6.275682830810547, "step": 46430 }, { "epoch": 0.16335, "grad_norm": 6.1483588218688965, "learning_rate": 2.7053535353535355e-06, "loss": 6.275567626953125, "step": 46435 }, { "epoch": 0.1634, "grad_norm": 7.213583469390869, "learning_rate": 2.7051010101010106e-06, "loss": 6.3659309387207035, "step": 46440 }, { "epoch": 0.16345, "grad_norm": 7.2201337814331055, "learning_rate": 2.7048484848484852e-06, "loss": 6.255638885498047, "step": 46445 }, { "epoch": 0.1635, "grad_norm": 8.284965515136719, "learning_rate": 2.70459595959596e-06, "loss": 6.267876815795899, "step": 46450 }, { "epoch": 0.16355, "grad_norm": 5.463566303253174, "learning_rate": 2.7043434343434345e-06, "loss": 6.262792587280273, "step": 46455 }, { "epoch": 0.1636, "grad_norm": 25.854286193847656, "learning_rate": 2.7040909090909095e-06, "loss": 6.397969818115234, "step": 46460 }, { "epoch": 0.16365, "grad_norm": 6.65406608581543, "learning_rate": 2.703838383838384e-06, "loss": 6.343595886230469, "step": 46465 }, { "epoch": 0.1637, "grad_norm": 4.4420294761657715, "learning_rate": 2.703585858585859e-06, "loss": 6.225080108642578, "step": 46470 }, { "epoch": 0.16375, "grad_norm": 6.799612522125244, "learning_rate": 2.7033333333333334e-06, "loss": 6.2386474609375, "step": 46475 }, { "epoch": 0.1638, "grad_norm": 6.069149017333984, "learning_rate": 2.7030808080808085e-06, "loss": 6.2604930877685545, "step": 46480 }, { "epoch": 0.16385, "grad_norm": 11.114018440246582, "learning_rate": 2.702828282828283e-06, "loss": 6.295671844482422, "step": 46485 }, { "epoch": 0.1639, "grad_norm": 4.317625999450684, "learning_rate": 2.7025757575757577e-06, "loss": 6.209845352172851, "step": 46490 }, { "epoch": 0.16395, "grad_norm": 6.574362277984619, "learning_rate": 2.7023232323232324e-06, "loss": 6.254465484619141, "step": 46495 }, { "epoch": 0.164, "grad_norm": 5.086489677429199, "learning_rate": 2.7020707070707074e-06, "loss": 6.3522483825683596, "step": 46500 }, { "epoch": 0.16405, "grad_norm": 15.905435562133789, "learning_rate": 2.701818181818182e-06, "loss": 6.483385467529297, "step": 46505 }, { "epoch": 0.1641, "grad_norm": 6.8352460861206055, "learning_rate": 2.7015656565656567e-06, "loss": 6.264405822753906, "step": 46510 }, { "epoch": 0.16415, "grad_norm": 7.894382953643799, "learning_rate": 2.7013131313131313e-06, "loss": 6.279173278808594, "step": 46515 }, { "epoch": 0.1642, "grad_norm": 7.49547004699707, "learning_rate": 2.7010606060606064e-06, "loss": 6.257052230834961, "step": 46520 }, { "epoch": 0.16425, "grad_norm": 8.441426277160645, "learning_rate": 2.700808080808081e-06, "loss": 6.370332336425781, "step": 46525 }, { "epoch": 0.1643, "grad_norm": 5.404785633087158, "learning_rate": 2.7005555555555556e-06, "loss": 6.328109359741211, "step": 46530 }, { "epoch": 0.16435, "grad_norm": 5.773766994476318, "learning_rate": 2.7003030303030303e-06, "loss": 6.311983871459961, "step": 46535 }, { "epoch": 0.1644, "grad_norm": 6.174339294433594, "learning_rate": 2.7000505050505053e-06, "loss": 6.2651123046875, "step": 46540 }, { "epoch": 0.16445, "grad_norm": 8.440091133117676, "learning_rate": 2.69979797979798e-06, "loss": 6.231924438476563, "step": 46545 }, { "epoch": 0.1645, "grad_norm": 7.178924560546875, "learning_rate": 2.6995454545454546e-06, "loss": 6.260216903686524, "step": 46550 }, { "epoch": 0.16455, "grad_norm": 11.699836730957031, "learning_rate": 2.699292929292929e-06, "loss": 6.264885711669922, "step": 46555 }, { "epoch": 0.1646, "grad_norm": 6.431806564331055, "learning_rate": 2.6990404040404043e-06, "loss": 6.3214256286621096, "step": 46560 }, { "epoch": 0.16465, "grad_norm": 7.629232883453369, "learning_rate": 2.698787878787879e-06, "loss": 6.2335670471191404, "step": 46565 }, { "epoch": 0.1647, "grad_norm": 5.159815788269043, "learning_rate": 2.6985353535353535e-06, "loss": 6.261320495605469, "step": 46570 }, { "epoch": 0.16475, "grad_norm": 5.986253261566162, "learning_rate": 2.698282828282828e-06, "loss": 6.261339569091797, "step": 46575 }, { "epoch": 0.1648, "grad_norm": 4.157566070556641, "learning_rate": 2.6980303030303036e-06, "loss": 6.247779846191406, "step": 46580 }, { "epoch": 0.16485, "grad_norm": 10.076870918273926, "learning_rate": 2.6977777777777783e-06, "loss": 6.272740173339844, "step": 46585 }, { "epoch": 0.1649, "grad_norm": 4.734520435333252, "learning_rate": 2.6975252525252525e-06, "loss": 6.220353317260742, "step": 46590 }, { "epoch": 0.16495, "grad_norm": 8.064689636230469, "learning_rate": 2.697272727272727e-06, "loss": 6.181978607177735, "step": 46595 }, { "epoch": 0.165, "grad_norm": 8.734370231628418, "learning_rate": 2.6970202020202026e-06, "loss": 6.249336242675781, "step": 46600 }, { "epoch": 0.16505, "grad_norm": 8.201236724853516, "learning_rate": 2.696767676767677e-06, "loss": 6.211967849731446, "step": 46605 }, { "epoch": 0.1651, "grad_norm": 4.407055854797363, "learning_rate": 2.696515151515152e-06, "loss": 6.322299194335938, "step": 46610 }, { "epoch": 0.16515, "grad_norm": 8.702316284179688, "learning_rate": 2.6962626262626265e-06, "loss": 6.288990020751953, "step": 46615 }, { "epoch": 0.1652, "grad_norm": 8.985153198242188, "learning_rate": 2.6960101010101015e-06, "loss": 6.2819053649902346, "step": 46620 }, { "epoch": 0.16525, "grad_norm": 6.656138896942139, "learning_rate": 2.695757575757576e-06, "loss": 6.255428314208984, "step": 46625 }, { "epoch": 0.1653, "grad_norm": 18.32538414001465, "learning_rate": 2.6955050505050508e-06, "loss": 6.693630981445312, "step": 46630 }, { "epoch": 0.16535, "grad_norm": 4.716052055358887, "learning_rate": 2.6952525252525254e-06, "loss": 6.253234481811523, "step": 46635 }, { "epoch": 0.1654, "grad_norm": 4.194809436798096, "learning_rate": 2.6950000000000005e-06, "loss": 6.312934875488281, "step": 46640 }, { "epoch": 0.16545, "grad_norm": 10.009414672851562, "learning_rate": 2.694747474747475e-06, "loss": 6.450945281982422, "step": 46645 }, { "epoch": 0.1655, "grad_norm": 7.179514408111572, "learning_rate": 2.6944949494949497e-06, "loss": 6.333229064941406, "step": 46650 }, { "epoch": 0.16555, "grad_norm": 7.354435443878174, "learning_rate": 2.6942424242424243e-06, "loss": 6.249530410766601, "step": 46655 }, { "epoch": 0.1656, "grad_norm": 5.606266498565674, "learning_rate": 2.6939898989898994e-06, "loss": 6.298655700683594, "step": 46660 }, { "epoch": 0.16565, "grad_norm": 6.070713043212891, "learning_rate": 2.693737373737374e-06, "loss": 6.259365081787109, "step": 46665 }, { "epoch": 0.1657, "grad_norm": 9.76517105102539, "learning_rate": 2.6934848484848487e-06, "loss": 6.239619445800781, "step": 46670 }, { "epoch": 0.16575, "grad_norm": 8.144340515136719, "learning_rate": 2.6932323232323233e-06, "loss": 6.231914901733399, "step": 46675 }, { "epoch": 0.1658, "grad_norm": 8.67623519897461, "learning_rate": 2.6929797979797983e-06, "loss": 6.251303482055664, "step": 46680 }, { "epoch": 0.16585, "grad_norm": 4.442389011383057, "learning_rate": 2.692727272727273e-06, "loss": 6.243590545654297, "step": 46685 }, { "epoch": 0.1659, "grad_norm": 7.291537284851074, "learning_rate": 2.6924747474747476e-06, "loss": 6.266853713989258, "step": 46690 }, { "epoch": 0.16595, "grad_norm": 4.015828609466553, "learning_rate": 2.6922222222222222e-06, "loss": 6.2374114990234375, "step": 46695 }, { "epoch": 0.166, "grad_norm": 5.816522598266602, "learning_rate": 2.6919696969696973e-06, "loss": 6.2614189147949215, "step": 46700 }, { "epoch": 0.16605, "grad_norm": 4.077749729156494, "learning_rate": 2.691717171717172e-06, "loss": 6.258872222900391, "step": 46705 }, { "epoch": 0.1661, "grad_norm": 7.331685543060303, "learning_rate": 2.6914646464646465e-06, "loss": 6.260650634765625, "step": 46710 }, { "epoch": 0.16615, "grad_norm": 3.888369560241699, "learning_rate": 2.691212121212121e-06, "loss": 6.2425285339355465, "step": 46715 }, { "epoch": 0.1662, "grad_norm": 4.465662956237793, "learning_rate": 2.6909595959595962e-06, "loss": 6.303583145141602, "step": 46720 }, { "epoch": 0.16625, "grad_norm": 5.495327949523926, "learning_rate": 2.690707070707071e-06, "loss": 6.259243011474609, "step": 46725 }, { "epoch": 0.1663, "grad_norm": 7.891727447509766, "learning_rate": 2.6904545454545455e-06, "loss": 6.472219848632813, "step": 46730 }, { "epoch": 0.16635, "grad_norm": 3.5701961517333984, "learning_rate": 2.69020202020202e-06, "loss": 6.680817413330078, "step": 46735 }, { "epoch": 0.1664, "grad_norm": 6.376168727874756, "learning_rate": 2.689949494949495e-06, "loss": 6.235052490234375, "step": 46740 }, { "epoch": 0.16645, "grad_norm": 7.788154125213623, "learning_rate": 2.68969696969697e-06, "loss": 6.238928985595703, "step": 46745 }, { "epoch": 0.1665, "grad_norm": 8.14344596862793, "learning_rate": 2.6894444444444444e-06, "loss": 6.212486648559571, "step": 46750 }, { "epoch": 0.16655, "grad_norm": 7.93447732925415, "learning_rate": 2.6891919191919195e-06, "loss": 6.289857482910156, "step": 46755 }, { "epoch": 0.1666, "grad_norm": 8.32631778717041, "learning_rate": 2.688939393939394e-06, "loss": 6.292717742919922, "step": 46760 }, { "epoch": 0.16665, "grad_norm": 5.661523342132568, "learning_rate": 2.6886868686868687e-06, "loss": 6.230666732788086, "step": 46765 }, { "epoch": 0.1667, "grad_norm": 4.273640155792236, "learning_rate": 2.6884343434343434e-06, "loss": 6.286236572265625, "step": 46770 }, { "epoch": 0.16675, "grad_norm": 22.110660552978516, "learning_rate": 2.688181818181819e-06, "loss": 6.429768371582031, "step": 46775 }, { "epoch": 0.1668, "grad_norm": 8.16692066192627, "learning_rate": 2.6879292929292935e-06, "loss": 6.442893981933594, "step": 46780 }, { "epoch": 0.16685, "grad_norm": 9.477338790893555, "learning_rate": 2.6876767676767677e-06, "loss": 6.298275375366211, "step": 46785 }, { "epoch": 0.1669, "grad_norm": 14.949170112609863, "learning_rate": 2.6874242424242423e-06, "loss": 6.299665832519532, "step": 46790 }, { "epoch": 0.16695, "grad_norm": 4.480638027191162, "learning_rate": 2.687171717171718e-06, "loss": 6.282746887207031, "step": 46795 }, { "epoch": 0.167, "grad_norm": 12.715507507324219, "learning_rate": 2.6869191919191924e-06, "loss": 6.395935440063477, "step": 46800 }, { "epoch": 0.16705, "grad_norm": 13.247716903686523, "learning_rate": 2.686666666666667e-06, "loss": 6.3251182556152346, "step": 46805 }, { "epoch": 0.1671, "grad_norm": 6.6491851806640625, "learning_rate": 2.6864141414141413e-06, "loss": 6.285381317138672, "step": 46810 }, { "epoch": 0.16715, "grad_norm": 8.114096641540527, "learning_rate": 2.6861616161616167e-06, "loss": 6.209886169433593, "step": 46815 }, { "epoch": 0.1672, "grad_norm": 6.088117599487305, "learning_rate": 2.6859090909090914e-06, "loss": 6.323033142089844, "step": 46820 }, { "epoch": 0.16725, "grad_norm": 5.569652557373047, "learning_rate": 2.685656565656566e-06, "loss": 6.3071739196777346, "step": 46825 }, { "epoch": 0.1673, "grad_norm": 5.376466274261475, "learning_rate": 2.6854040404040406e-06, "loss": 6.288654327392578, "step": 46830 }, { "epoch": 0.16735, "grad_norm": 11.529086112976074, "learning_rate": 2.6851515151515157e-06, "loss": 6.221612167358399, "step": 46835 }, { "epoch": 0.1674, "grad_norm": 4.539047718048096, "learning_rate": 2.6848989898989903e-06, "loss": 6.23243522644043, "step": 46840 }, { "epoch": 0.16745, "grad_norm": 6.175563812255859, "learning_rate": 2.684646464646465e-06, "loss": 6.286183166503906, "step": 46845 }, { "epoch": 0.1675, "grad_norm": 7.018819808959961, "learning_rate": 2.6843939393939396e-06, "loss": 6.285189437866211, "step": 46850 }, { "epoch": 0.16755, "grad_norm": 5.559396266937256, "learning_rate": 2.6841414141414146e-06, "loss": 6.163778305053711, "step": 46855 }, { "epoch": 0.1676, "grad_norm": 5.754761219024658, "learning_rate": 2.6838888888888893e-06, "loss": 6.241651153564453, "step": 46860 }, { "epoch": 0.16765, "grad_norm": 6.274162769317627, "learning_rate": 2.683636363636364e-06, "loss": 6.232267761230469, "step": 46865 }, { "epoch": 0.1677, "grad_norm": 6.256450653076172, "learning_rate": 2.6833838383838385e-06, "loss": 6.329586410522461, "step": 46870 }, { "epoch": 0.16775, "grad_norm": 8.218653678894043, "learning_rate": 2.6831313131313136e-06, "loss": 6.260939025878907, "step": 46875 }, { "epoch": 0.1678, "grad_norm": 4.546922206878662, "learning_rate": 2.682878787878788e-06, "loss": 6.252448272705078, "step": 46880 }, { "epoch": 0.16785, "grad_norm": 7.145508289337158, "learning_rate": 2.682626262626263e-06, "loss": 6.233911514282227, "step": 46885 }, { "epoch": 0.1679, "grad_norm": 6.507478713989258, "learning_rate": 2.6823737373737375e-06, "loss": 6.230224609375, "step": 46890 }, { "epoch": 0.16795, "grad_norm": 3.6727824211120605, "learning_rate": 2.6821212121212125e-06, "loss": 6.29645004272461, "step": 46895 }, { "epoch": 0.168, "grad_norm": 5.696251392364502, "learning_rate": 2.681868686868687e-06, "loss": 6.2632183074951175, "step": 46900 }, { "epoch": 0.16805, "grad_norm": 7.772921085357666, "learning_rate": 2.6816161616161618e-06, "loss": 6.274506378173828, "step": 46905 }, { "epoch": 0.1681, "grad_norm": 10.565302848815918, "learning_rate": 2.6813636363636364e-06, "loss": 6.259999847412109, "step": 46910 }, { "epoch": 0.16815, "grad_norm": 7.947033405303955, "learning_rate": 2.6811111111111115e-06, "loss": 6.2491508483886715, "step": 46915 }, { "epoch": 0.1682, "grad_norm": 4.8226542472839355, "learning_rate": 2.680858585858586e-06, "loss": 6.2375751495361325, "step": 46920 }, { "epoch": 0.16825, "grad_norm": 6.458552837371826, "learning_rate": 2.6806060606060607e-06, "loss": 6.25087890625, "step": 46925 }, { "epoch": 0.1683, "grad_norm": 7.137932300567627, "learning_rate": 2.6803535353535354e-06, "loss": 6.338753128051758, "step": 46930 }, { "epoch": 0.16835, "grad_norm": 4.068889617919922, "learning_rate": 2.6801010101010104e-06, "loss": 6.486201477050781, "step": 46935 }, { "epoch": 0.1684, "grad_norm": 10.795719146728516, "learning_rate": 2.679848484848485e-06, "loss": 6.220412063598633, "step": 46940 }, { "epoch": 0.16845, "grad_norm": 5.937544345855713, "learning_rate": 2.6795959595959597e-06, "loss": 6.2291206359863285, "step": 46945 }, { "epoch": 0.1685, "grad_norm": 6.560097694396973, "learning_rate": 2.6793434343434343e-06, "loss": 6.233093643188477, "step": 46950 }, { "epoch": 0.16855, "grad_norm": 5.830186367034912, "learning_rate": 2.6790909090909094e-06, "loss": 6.2498291015625, "step": 46955 }, { "epoch": 0.1686, "grad_norm": 3.865319013595581, "learning_rate": 2.678838383838384e-06, "loss": 6.269269561767578, "step": 46960 }, { "epoch": 0.16865, "grad_norm": 10.453680038452148, "learning_rate": 2.6785858585858586e-06, "loss": 6.301636505126953, "step": 46965 }, { "epoch": 0.1687, "grad_norm": 4.266781330108643, "learning_rate": 2.6783333333333332e-06, "loss": 6.250969314575196, "step": 46970 }, { "epoch": 0.16875, "grad_norm": 6.557286262512207, "learning_rate": 2.6780808080808083e-06, "loss": 6.2304634094238285, "step": 46975 }, { "epoch": 0.1688, "grad_norm": 6.170468807220459, "learning_rate": 2.677828282828283e-06, "loss": 6.268167495727539, "step": 46980 }, { "epoch": 0.16885, "grad_norm": 5.088135719299316, "learning_rate": 2.6775757575757576e-06, "loss": 6.298971176147461, "step": 46985 }, { "epoch": 0.1689, "grad_norm": 15.341100692749023, "learning_rate": 2.677323232323232e-06, "loss": 6.253636169433594, "step": 46990 }, { "epoch": 0.16895, "grad_norm": 4.313477993011475, "learning_rate": 2.6770707070707077e-06, "loss": 6.227603912353516, "step": 46995 }, { "epoch": 0.169, "grad_norm": 10.796445846557617, "learning_rate": 2.6768181818181823e-06, "loss": 6.346114730834961, "step": 47000 }, { "epoch": 0.16905, "grad_norm": 6.610363960266113, "learning_rate": 2.6765656565656565e-06, "loss": 6.299123382568359, "step": 47005 }, { "epoch": 0.1691, "grad_norm": 9.215063095092773, "learning_rate": 2.676313131313131e-06, "loss": 6.25313720703125, "step": 47010 }, { "epoch": 0.16915, "grad_norm": 9.41675090789795, "learning_rate": 2.6760606060606066e-06, "loss": 6.277120971679688, "step": 47015 }, { "epoch": 0.1692, "grad_norm": 4.320587635040283, "learning_rate": 2.6758080808080812e-06, "loss": 6.231455230712891, "step": 47020 }, { "epoch": 0.16925, "grad_norm": 6.5948662757873535, "learning_rate": 2.675555555555556e-06, "loss": 6.247121429443359, "step": 47025 }, { "epoch": 0.1693, "grad_norm": 5.852809906005859, "learning_rate": 2.6753030303030305e-06, "loss": 6.2626953125, "step": 47030 }, { "epoch": 0.16935, "grad_norm": 4.573475360870361, "learning_rate": 2.6750505050505056e-06, "loss": 6.394488525390625, "step": 47035 }, { "epoch": 0.1694, "grad_norm": 4.63215446472168, "learning_rate": 2.67479797979798e-06, "loss": 6.272662353515625, "step": 47040 }, { "epoch": 0.16945, "grad_norm": 4.745767116546631, "learning_rate": 2.674545454545455e-06, "loss": 6.2595478057861325, "step": 47045 }, { "epoch": 0.1695, "grad_norm": 8.48840618133545, "learning_rate": 2.6742929292929294e-06, "loss": 6.305591201782226, "step": 47050 }, { "epoch": 0.16955, "grad_norm": 10.109371185302734, "learning_rate": 2.6740404040404045e-06, "loss": 6.535638427734375, "step": 47055 }, { "epoch": 0.1696, "grad_norm": 9.948399543762207, "learning_rate": 2.673787878787879e-06, "loss": 6.260902023315429, "step": 47060 }, { "epoch": 0.16965, "grad_norm": 4.2262797355651855, "learning_rate": 2.6735353535353538e-06, "loss": 6.267045211791992, "step": 47065 }, { "epoch": 0.1697, "grad_norm": 6.319637775421143, "learning_rate": 2.6732828282828284e-06, "loss": 6.256875610351562, "step": 47070 }, { "epoch": 0.16975, "grad_norm": 8.862424850463867, "learning_rate": 2.6730303030303034e-06, "loss": 6.333828353881836, "step": 47075 }, { "epoch": 0.1698, "grad_norm": 6.296072483062744, "learning_rate": 2.672777777777778e-06, "loss": 6.265655517578125, "step": 47080 }, { "epoch": 0.16985, "grad_norm": 6.1489739418029785, "learning_rate": 2.6725252525252527e-06, "loss": 6.265882873535157, "step": 47085 }, { "epoch": 0.1699, "grad_norm": 3.8873496055603027, "learning_rate": 2.6722727272727273e-06, "loss": 6.249206161499023, "step": 47090 }, { "epoch": 0.16995, "grad_norm": 7.093249797821045, "learning_rate": 2.6720202020202024e-06, "loss": 6.2395271301269535, "step": 47095 }, { "epoch": 0.17, "grad_norm": 13.732516288757324, "learning_rate": 2.671767676767677e-06, "loss": 6.327323150634766, "step": 47100 }, { "epoch": 0.17005, "grad_norm": 8.731586456298828, "learning_rate": 2.6715151515151516e-06, "loss": 6.333403015136719, "step": 47105 }, { "epoch": 0.1701, "grad_norm": 6.988713264465332, "learning_rate": 2.6712626262626263e-06, "loss": 6.288128662109375, "step": 47110 }, { "epoch": 0.17015, "grad_norm": 9.946455001831055, "learning_rate": 2.6710101010101013e-06, "loss": 6.334548568725586, "step": 47115 }, { "epoch": 0.1702, "grad_norm": 6.224346160888672, "learning_rate": 2.670757575757576e-06, "loss": 6.272863388061523, "step": 47120 }, { "epoch": 0.17025, "grad_norm": 4.754438877105713, "learning_rate": 2.6705050505050506e-06, "loss": 6.271454620361328, "step": 47125 }, { "epoch": 0.1703, "grad_norm": 5.078529357910156, "learning_rate": 2.6702525252525252e-06, "loss": 6.243340301513672, "step": 47130 }, { "epoch": 0.17035, "grad_norm": 42.64674377441406, "learning_rate": 2.6700000000000003e-06, "loss": 6.312604141235352, "step": 47135 }, { "epoch": 0.1704, "grad_norm": 5.895768642425537, "learning_rate": 2.669747474747475e-06, "loss": 6.25867919921875, "step": 47140 }, { "epoch": 0.17045, "grad_norm": 10.404007911682129, "learning_rate": 2.6694949494949495e-06, "loss": 6.248200988769531, "step": 47145 }, { "epoch": 0.1705, "grad_norm": 7.134000301361084, "learning_rate": 2.669242424242424e-06, "loss": 6.308859252929688, "step": 47150 }, { "epoch": 0.17055, "grad_norm": 6.813919544219971, "learning_rate": 2.6689898989898992e-06, "loss": 6.257285690307617, "step": 47155 }, { "epoch": 0.1706, "grad_norm": 5.451106071472168, "learning_rate": 2.668737373737374e-06, "loss": 6.191630554199219, "step": 47160 }, { "epoch": 0.17065, "grad_norm": 4.488504409790039, "learning_rate": 2.6684848484848485e-06, "loss": 6.240713119506836, "step": 47165 }, { "epoch": 0.1707, "grad_norm": 6.545334339141846, "learning_rate": 2.668232323232323e-06, "loss": 6.299153900146484, "step": 47170 }, { "epoch": 0.17075, "grad_norm": 4.643325328826904, "learning_rate": 2.667979797979798e-06, "loss": 6.243710327148437, "step": 47175 }, { "epoch": 0.1708, "grad_norm": 7.252976894378662, "learning_rate": 2.667727272727273e-06, "loss": 6.241100311279297, "step": 47180 }, { "epoch": 0.17085, "grad_norm": 4.4150071144104, "learning_rate": 2.6674747474747474e-06, "loss": 6.254134750366211, "step": 47185 }, { "epoch": 0.1709, "grad_norm": 6.972575664520264, "learning_rate": 2.667222222222223e-06, "loss": 6.30244255065918, "step": 47190 }, { "epoch": 0.17095, "grad_norm": 8.03272533416748, "learning_rate": 2.6669696969696975e-06, "loss": 6.239234161376953, "step": 47195 }, { "epoch": 0.171, "grad_norm": 24.549503326416016, "learning_rate": 2.6667171717171717e-06, "loss": 6.2270454406738285, "step": 47200 }, { "epoch": 0.17105, "grad_norm": 7.18546724319458, "learning_rate": 2.6664646464646464e-06, "loss": 6.268075561523437, "step": 47205 }, { "epoch": 0.1711, "grad_norm": 6.794158935546875, "learning_rate": 2.666212121212122e-06, "loss": 6.334200286865235, "step": 47210 }, { "epoch": 0.17115, "grad_norm": 4.1235737800598145, "learning_rate": 2.6659595959595965e-06, "loss": 6.249919128417969, "step": 47215 }, { "epoch": 0.1712, "grad_norm": 8.385812759399414, "learning_rate": 2.665707070707071e-06, "loss": 6.322608566284179, "step": 47220 }, { "epoch": 0.17125, "grad_norm": 7.377440929412842, "learning_rate": 2.6654545454545453e-06, "loss": 6.2116962432861325, "step": 47225 }, { "epoch": 0.1713, "grad_norm": 6.202000617980957, "learning_rate": 2.6652020202020208e-06, "loss": 6.260379791259766, "step": 47230 }, { "epoch": 0.17135, "grad_norm": 16.10968017578125, "learning_rate": 2.6649494949494954e-06, "loss": 6.648375701904297, "step": 47235 }, { "epoch": 0.1714, "grad_norm": 13.456192016601562, "learning_rate": 2.66469696969697e-06, "loss": 6.298786926269531, "step": 47240 }, { "epoch": 0.17145, "grad_norm": 5.7567219734191895, "learning_rate": 2.6644444444444447e-06, "loss": 6.217862319946289, "step": 47245 }, { "epoch": 0.1715, "grad_norm": 6.193387985229492, "learning_rate": 2.6641919191919197e-06, "loss": 6.270679473876953, "step": 47250 }, { "epoch": 0.17155, "grad_norm": 4.538349151611328, "learning_rate": 2.6639393939393944e-06, "loss": 6.29577751159668, "step": 47255 }, { "epoch": 0.1716, "grad_norm": 13.8416109085083, "learning_rate": 2.663686868686869e-06, "loss": 6.667152404785156, "step": 47260 }, { "epoch": 0.17165, "grad_norm": 20.935375213623047, "learning_rate": 2.6634343434343436e-06, "loss": 6.478862762451172, "step": 47265 }, { "epoch": 0.1717, "grad_norm": 5.539295196533203, "learning_rate": 2.6631818181818187e-06, "loss": 6.275627899169922, "step": 47270 }, { "epoch": 0.17175, "grad_norm": 27.027080535888672, "learning_rate": 2.6629292929292933e-06, "loss": 6.159566497802734, "step": 47275 }, { "epoch": 0.1718, "grad_norm": 9.729592323303223, "learning_rate": 2.662676767676768e-06, "loss": 6.244743728637696, "step": 47280 }, { "epoch": 0.17185, "grad_norm": 5.296539783477783, "learning_rate": 2.6624242424242426e-06, "loss": 6.275522613525391, "step": 47285 }, { "epoch": 0.1719, "grad_norm": 5.13979959487915, "learning_rate": 2.6621717171717176e-06, "loss": 6.322090148925781, "step": 47290 }, { "epoch": 0.17195, "grad_norm": 22.095827102661133, "learning_rate": 2.6619191919191922e-06, "loss": 6.305195617675781, "step": 47295 }, { "epoch": 0.172, "grad_norm": 5.939340591430664, "learning_rate": 2.661666666666667e-06, "loss": 6.260313415527344, "step": 47300 }, { "epoch": 0.17205, "grad_norm": 5.706669330596924, "learning_rate": 2.6614141414141415e-06, "loss": 6.224989318847657, "step": 47305 }, { "epoch": 0.1721, "grad_norm": 5.8331451416015625, "learning_rate": 2.6611616161616166e-06, "loss": 6.268434143066406, "step": 47310 }, { "epoch": 0.17215, "grad_norm": 5.756060600280762, "learning_rate": 2.660909090909091e-06, "loss": 6.254639053344727, "step": 47315 }, { "epoch": 0.1722, "grad_norm": 9.055944442749023, "learning_rate": 2.660656565656566e-06, "loss": 6.298771667480469, "step": 47320 }, { "epoch": 0.17225, "grad_norm": 4.253583908081055, "learning_rate": 2.6604040404040405e-06, "loss": 6.315514373779297, "step": 47325 }, { "epoch": 0.1723, "grad_norm": 8.592488288879395, "learning_rate": 2.6601515151515155e-06, "loss": 6.379167938232422, "step": 47330 }, { "epoch": 0.17235, "grad_norm": 8.748068809509277, "learning_rate": 2.65989898989899e-06, "loss": 6.275400543212891, "step": 47335 }, { "epoch": 0.1724, "grad_norm": 5.0398783683776855, "learning_rate": 2.6596464646464648e-06, "loss": 6.249453735351563, "step": 47340 }, { "epoch": 0.17245, "grad_norm": 15.184754371643066, "learning_rate": 2.6593939393939394e-06, "loss": 6.366634750366211, "step": 47345 }, { "epoch": 0.1725, "grad_norm": 4.681080341339111, "learning_rate": 2.6591414141414144e-06, "loss": 6.28521957397461, "step": 47350 }, { "epoch": 0.17255, "grad_norm": 6.658570766448975, "learning_rate": 2.658888888888889e-06, "loss": 6.268695449829101, "step": 47355 }, { "epoch": 0.1726, "grad_norm": 9.570307731628418, "learning_rate": 2.6586363636363637e-06, "loss": 6.191267395019532, "step": 47360 }, { "epoch": 0.17265, "grad_norm": 8.531479835510254, "learning_rate": 2.6583838383838383e-06, "loss": 6.233112335205078, "step": 47365 }, { "epoch": 0.1727, "grad_norm": 3.9260778427124023, "learning_rate": 2.6581313131313134e-06, "loss": 6.279864501953125, "step": 47370 }, { "epoch": 0.17275, "grad_norm": 9.470434188842773, "learning_rate": 2.657878787878788e-06, "loss": 6.262665176391602, "step": 47375 }, { "epoch": 0.1728, "grad_norm": 8.866920471191406, "learning_rate": 2.6576262626262627e-06, "loss": 6.273202514648437, "step": 47380 }, { "epoch": 0.17285, "grad_norm": 7.32393217086792, "learning_rate": 2.6573737373737373e-06, "loss": 6.3455760955810545, "step": 47385 }, { "epoch": 0.1729, "grad_norm": 8.965706825256348, "learning_rate": 2.6571212121212123e-06, "loss": 6.332775115966797, "step": 47390 }, { "epoch": 0.17295, "grad_norm": 8.936612129211426, "learning_rate": 2.656868686868687e-06, "loss": 6.296910095214844, "step": 47395 }, { "epoch": 0.173, "grad_norm": 15.726349830627441, "learning_rate": 2.6566161616161616e-06, "loss": 6.271052551269531, "step": 47400 }, { "epoch": 0.17305, "grad_norm": 7.828895568847656, "learning_rate": 2.6563636363636362e-06, "loss": 6.324835205078125, "step": 47405 }, { "epoch": 0.1731, "grad_norm": 6.816806316375732, "learning_rate": 2.6561111111111117e-06, "loss": 6.238269805908203, "step": 47410 }, { "epoch": 0.17315, "grad_norm": 4.814878463745117, "learning_rate": 2.6558585858585863e-06, "loss": 6.274610137939453, "step": 47415 }, { "epoch": 0.1732, "grad_norm": 6.599291801452637, "learning_rate": 2.6556060606060605e-06, "loss": 6.359944534301758, "step": 47420 }, { "epoch": 0.17325, "grad_norm": 5.71464204788208, "learning_rate": 2.655353535353535e-06, "loss": 6.2392009735107425, "step": 47425 }, { "epoch": 0.1733, "grad_norm": 5.5507965087890625, "learning_rate": 2.6551010101010106e-06, "loss": 6.211863327026367, "step": 47430 }, { "epoch": 0.17335, "grad_norm": 9.266764640808105, "learning_rate": 2.6548484848484853e-06, "loss": 6.271177291870117, "step": 47435 }, { "epoch": 0.1734, "grad_norm": 15.166016578674316, "learning_rate": 2.65459595959596e-06, "loss": 6.304856872558593, "step": 47440 }, { "epoch": 0.17345, "grad_norm": 26.23482322692871, "learning_rate": 2.6543434343434345e-06, "loss": 6.21984977722168, "step": 47445 }, { "epoch": 0.1735, "grad_norm": 7.818333148956299, "learning_rate": 2.6540909090909096e-06, "loss": 6.1928855895996096, "step": 47450 }, { "epoch": 0.17355, "grad_norm": 5.183565616607666, "learning_rate": 2.6538383838383842e-06, "loss": 6.249717712402344, "step": 47455 }, { "epoch": 0.1736, "grad_norm": 8.456808090209961, "learning_rate": 2.653585858585859e-06, "loss": 6.2873188018798825, "step": 47460 }, { "epoch": 0.17365, "grad_norm": 7.607513904571533, "learning_rate": 2.6533333333333335e-06, "loss": 6.278399276733398, "step": 47465 }, { "epoch": 0.1737, "grad_norm": 6.025649070739746, "learning_rate": 2.6530808080808085e-06, "loss": 6.294619369506836, "step": 47470 }, { "epoch": 0.17375, "grad_norm": 8.025557518005371, "learning_rate": 2.652828282828283e-06, "loss": 6.341694259643555, "step": 47475 }, { "epoch": 0.1738, "grad_norm": 7.793490886688232, "learning_rate": 2.652575757575758e-06, "loss": 6.309343338012695, "step": 47480 }, { "epoch": 0.17385, "grad_norm": 6.445230007171631, "learning_rate": 2.6523232323232324e-06, "loss": 6.265369033813476, "step": 47485 }, { "epoch": 0.1739, "grad_norm": 6.140890121459961, "learning_rate": 2.6520707070707075e-06, "loss": 6.306983184814453, "step": 47490 }, { "epoch": 0.17395, "grad_norm": 18.246091842651367, "learning_rate": 2.651818181818182e-06, "loss": 6.521483612060547, "step": 47495 }, { "epoch": 0.174, "grad_norm": 5.236179351806641, "learning_rate": 2.6515656565656567e-06, "loss": 6.425864410400391, "step": 47500 }, { "epoch": 0.17405, "grad_norm": 4.087677001953125, "learning_rate": 2.6513131313131314e-06, "loss": 6.268887329101562, "step": 47505 }, { "epoch": 0.1741, "grad_norm": 3.5132699012756348, "learning_rate": 2.6510606060606064e-06, "loss": 6.265235900878906, "step": 47510 }, { "epoch": 0.17415, "grad_norm": 10.034432411193848, "learning_rate": 2.650808080808081e-06, "loss": 6.271514129638672, "step": 47515 }, { "epoch": 0.1742, "grad_norm": 6.0167694091796875, "learning_rate": 2.6505555555555557e-06, "loss": 6.255805969238281, "step": 47520 }, { "epoch": 0.17425, "grad_norm": 6.136348724365234, "learning_rate": 2.6503030303030303e-06, "loss": 6.270484161376953, "step": 47525 }, { "epoch": 0.1743, "grad_norm": 22.373666763305664, "learning_rate": 2.6500505050505054e-06, "loss": 6.498587799072266, "step": 47530 }, { "epoch": 0.17435, "grad_norm": 7.57023286819458, "learning_rate": 2.64979797979798e-06, "loss": 6.264840698242187, "step": 47535 }, { "epoch": 0.1744, "grad_norm": 4.978572368621826, "learning_rate": 2.6495454545454546e-06, "loss": 6.246053695678711, "step": 47540 }, { "epoch": 0.17445, "grad_norm": 13.049735069274902, "learning_rate": 2.6492929292929293e-06, "loss": 6.270341491699218, "step": 47545 }, { "epoch": 0.1745, "grad_norm": 7.1946306228637695, "learning_rate": 2.6490404040404043e-06, "loss": 6.203516006469727, "step": 47550 }, { "epoch": 0.17455, "grad_norm": 5.335668563842773, "learning_rate": 2.648787878787879e-06, "loss": 6.271541976928711, "step": 47555 }, { "epoch": 0.1746, "grad_norm": 5.738757610321045, "learning_rate": 2.6485353535353536e-06, "loss": 6.182346343994141, "step": 47560 }, { "epoch": 0.17465, "grad_norm": 4.051938533782959, "learning_rate": 2.648282828282828e-06, "loss": 6.226968002319336, "step": 47565 }, { "epoch": 0.1747, "grad_norm": 10.219626426696777, "learning_rate": 2.6480303030303033e-06, "loss": 6.222340393066406, "step": 47570 }, { "epoch": 0.17475, "grad_norm": 21.511680603027344, "learning_rate": 2.647777777777778e-06, "loss": 6.275871658325196, "step": 47575 }, { "epoch": 0.1748, "grad_norm": 6.837258815765381, "learning_rate": 2.6475252525252525e-06, "loss": 6.298134231567383, "step": 47580 }, { "epoch": 0.17485, "grad_norm": 8.175602912902832, "learning_rate": 2.647272727272727e-06, "loss": 6.266039276123047, "step": 47585 }, { "epoch": 0.1749, "grad_norm": 13.757307052612305, "learning_rate": 2.647020202020202e-06, "loss": 6.267093658447266, "step": 47590 }, { "epoch": 0.17495, "grad_norm": 8.710346221923828, "learning_rate": 2.646767676767677e-06, "loss": 6.267892456054687, "step": 47595 }, { "epoch": 0.175, "grad_norm": 5.32004451751709, "learning_rate": 2.6465151515151515e-06, "loss": 6.285144424438476, "step": 47600 }, { "epoch": 0.17505, "grad_norm": 8.614721298217773, "learning_rate": 2.646262626262627e-06, "loss": 6.250714874267578, "step": 47605 }, { "epoch": 0.1751, "grad_norm": 4.099207401275635, "learning_rate": 2.6460101010101016e-06, "loss": 6.250260925292968, "step": 47610 }, { "epoch": 0.17515, "grad_norm": 4.783846855163574, "learning_rate": 2.6457575757575758e-06, "loss": 6.301868438720703, "step": 47615 }, { "epoch": 0.1752, "grad_norm": 5.114558219909668, "learning_rate": 2.6455050505050504e-06, "loss": 6.2480918884277346, "step": 47620 }, { "epoch": 0.17525, "grad_norm": 3.9396958351135254, "learning_rate": 2.645252525252526e-06, "loss": 6.2717853546142575, "step": 47625 }, { "epoch": 0.1753, "grad_norm": 6.844432353973389, "learning_rate": 2.6450000000000005e-06, "loss": 6.2056629180908205, "step": 47630 }, { "epoch": 0.17535, "grad_norm": 8.014008522033691, "learning_rate": 2.644747474747475e-06, "loss": 6.290700531005859, "step": 47635 }, { "epoch": 0.1754, "grad_norm": 5.798371315002441, "learning_rate": 2.6444949494949498e-06, "loss": 6.264681243896485, "step": 47640 }, { "epoch": 0.17545, "grad_norm": 7.012979030609131, "learning_rate": 2.644242424242425e-06, "loss": 6.238762664794922, "step": 47645 }, { "epoch": 0.1755, "grad_norm": 3.5868847370147705, "learning_rate": 2.6439898989898995e-06, "loss": 6.246706008911133, "step": 47650 }, { "epoch": 0.17555, "grad_norm": 7.368686676025391, "learning_rate": 2.643737373737374e-06, "loss": 6.236088562011719, "step": 47655 }, { "epoch": 0.1756, "grad_norm": 5.073850631713867, "learning_rate": 2.6434848484848487e-06, "loss": 6.316257095336914, "step": 47660 }, { "epoch": 0.17565, "grad_norm": 7.164087772369385, "learning_rate": 2.6432323232323238e-06, "loss": 6.295541763305664, "step": 47665 }, { "epoch": 0.1757, "grad_norm": 7.024266242980957, "learning_rate": 2.6429797979797984e-06, "loss": 6.290009307861328, "step": 47670 }, { "epoch": 0.17575, "grad_norm": 5.369235992431641, "learning_rate": 2.642727272727273e-06, "loss": 6.243041229248047, "step": 47675 }, { "epoch": 0.1758, "grad_norm": 4.002133369445801, "learning_rate": 2.6424747474747477e-06, "loss": 6.2692512512207035, "step": 47680 }, { "epoch": 0.17585, "grad_norm": 15.928533554077148, "learning_rate": 2.6422222222222227e-06, "loss": 6.305919647216797, "step": 47685 }, { "epoch": 0.1759, "grad_norm": 10.939151763916016, "learning_rate": 2.6419696969696973e-06, "loss": 6.290536117553711, "step": 47690 }, { "epoch": 0.17595, "grad_norm": 7.684718608856201, "learning_rate": 2.641717171717172e-06, "loss": 6.245788955688477, "step": 47695 }, { "epoch": 0.176, "grad_norm": 5.038311958312988, "learning_rate": 2.6414646464646466e-06, "loss": 6.1863452911376955, "step": 47700 }, { "epoch": 0.17605, "grad_norm": 7.494833469390869, "learning_rate": 2.6412121212121217e-06, "loss": 6.244598388671875, "step": 47705 }, { "epoch": 0.1761, "grad_norm": 17.5210018157959, "learning_rate": 2.6409595959595963e-06, "loss": 6.240899276733399, "step": 47710 }, { "epoch": 0.17615, "grad_norm": 7.9723405838012695, "learning_rate": 2.640707070707071e-06, "loss": 6.250740432739258, "step": 47715 }, { "epoch": 0.1762, "grad_norm": 8.893933296203613, "learning_rate": 2.6404545454545455e-06, "loss": 6.285586547851563, "step": 47720 }, { "epoch": 0.17625, "grad_norm": 7.768322944641113, "learning_rate": 2.6402020202020206e-06, "loss": 6.267386245727539, "step": 47725 }, { "epoch": 0.1763, "grad_norm": 8.654402732849121, "learning_rate": 2.6399494949494952e-06, "loss": 6.249360656738281, "step": 47730 }, { "epoch": 0.17635, "grad_norm": 72.4115219116211, "learning_rate": 2.63969696969697e-06, "loss": 5.806149291992187, "step": 47735 }, { "epoch": 0.1764, "grad_norm": 18.25945472717285, "learning_rate": 2.6394444444444445e-06, "loss": 5.745714569091797, "step": 47740 }, { "epoch": 0.17645, "grad_norm": 6.664707183837891, "learning_rate": 2.6391919191919195e-06, "loss": 6.279105758666992, "step": 47745 }, { "epoch": 0.1765, "grad_norm": 22.607494354248047, "learning_rate": 2.638939393939394e-06, "loss": 6.290519714355469, "step": 47750 }, { "epoch": 0.17655, "grad_norm": 11.0410795211792, "learning_rate": 2.638686868686869e-06, "loss": 6.274182891845703, "step": 47755 }, { "epoch": 0.1766, "grad_norm": 5.112051963806152, "learning_rate": 2.6384343434343434e-06, "loss": 6.273715591430664, "step": 47760 }, { "epoch": 0.17665, "grad_norm": 6.5331711769104, "learning_rate": 2.6381818181818185e-06, "loss": 6.234976577758789, "step": 47765 }, { "epoch": 0.1767, "grad_norm": 10.753203392028809, "learning_rate": 2.637929292929293e-06, "loss": 6.2847434997558596, "step": 47770 }, { "epoch": 0.17675, "grad_norm": 9.004714965820312, "learning_rate": 2.6376767676767677e-06, "loss": 6.215074920654297, "step": 47775 }, { "epoch": 0.1768, "grad_norm": 10.99597454071045, "learning_rate": 2.6374242424242424e-06, "loss": 6.2553260803222654, "step": 47780 }, { "epoch": 0.17685, "grad_norm": 7.867167949676514, "learning_rate": 2.6371717171717174e-06, "loss": 6.266901016235352, "step": 47785 }, { "epoch": 0.1769, "grad_norm": 6.762842655181885, "learning_rate": 2.636919191919192e-06, "loss": 6.229890441894531, "step": 47790 }, { "epoch": 0.17695, "grad_norm": 6.076344966888428, "learning_rate": 2.6366666666666667e-06, "loss": 6.268466186523438, "step": 47795 }, { "epoch": 0.177, "grad_norm": 8.895609855651855, "learning_rate": 2.6364141414141413e-06, "loss": 6.218725967407226, "step": 47800 }, { "epoch": 0.17705, "grad_norm": 4.337243556976318, "learning_rate": 2.636161616161617e-06, "loss": 6.226114654541016, "step": 47805 }, { "epoch": 0.1771, "grad_norm": 5.147741317749023, "learning_rate": 2.635909090909091e-06, "loss": 6.255978012084961, "step": 47810 }, { "epoch": 0.17715, "grad_norm": 3.7334115505218506, "learning_rate": 2.6356565656565656e-06, "loss": 6.24000129699707, "step": 47815 }, { "epoch": 0.1772, "grad_norm": 7.9374470710754395, "learning_rate": 2.6354040404040403e-06, "loss": 6.294181442260742, "step": 47820 }, { "epoch": 0.17725, "grad_norm": 3.8998446464538574, "learning_rate": 2.6351515151515157e-06, "loss": 6.281140518188477, "step": 47825 }, { "epoch": 0.1773, "grad_norm": 6.497595310211182, "learning_rate": 2.6348989898989904e-06, "loss": 6.289561462402344, "step": 47830 }, { "epoch": 0.17735, "grad_norm": 6.9055070877075195, "learning_rate": 2.6346464646464646e-06, "loss": 6.27686767578125, "step": 47835 }, { "epoch": 0.1774, "grad_norm": 3.242454767227173, "learning_rate": 2.634393939393939e-06, "loss": 6.250998687744141, "step": 47840 }, { "epoch": 0.17745, "grad_norm": 6.499072551727295, "learning_rate": 2.6341414141414147e-06, "loss": 6.2536975860595705, "step": 47845 }, { "epoch": 0.1775, "grad_norm": 5.936182975769043, "learning_rate": 2.6338888888888893e-06, "loss": 6.256839752197266, "step": 47850 }, { "epoch": 0.17755, "grad_norm": 5.262515068054199, "learning_rate": 2.633636363636364e-06, "loss": 6.232771301269532, "step": 47855 }, { "epoch": 0.1776, "grad_norm": 7.631040573120117, "learning_rate": 2.6333838383838386e-06, "loss": 6.251808929443359, "step": 47860 }, { "epoch": 0.17765, "grad_norm": 4.27992582321167, "learning_rate": 2.6331313131313136e-06, "loss": 6.2478477478027346, "step": 47865 }, { "epoch": 0.1777, "grad_norm": 8.00022029876709, "learning_rate": 2.6328787878787883e-06, "loss": 6.295926284790039, "step": 47870 }, { "epoch": 0.17775, "grad_norm": 8.023147583007812, "learning_rate": 2.632626262626263e-06, "loss": 6.24711799621582, "step": 47875 }, { "epoch": 0.1778, "grad_norm": 5.588597774505615, "learning_rate": 2.6323737373737375e-06, "loss": 6.225086212158203, "step": 47880 }, { "epoch": 0.17785, "grad_norm": 8.898357391357422, "learning_rate": 2.6321212121212126e-06, "loss": 6.28184814453125, "step": 47885 }, { "epoch": 0.1779, "grad_norm": 7.23288106918335, "learning_rate": 2.631868686868687e-06, "loss": 6.312599563598633, "step": 47890 }, { "epoch": 0.17795, "grad_norm": 6.0015339851379395, "learning_rate": 2.631616161616162e-06, "loss": 6.353037261962891, "step": 47895 }, { "epoch": 0.178, "grad_norm": 3.8909544944763184, "learning_rate": 2.6313636363636365e-06, "loss": 6.299990463256836, "step": 47900 }, { "epoch": 0.17805, "grad_norm": 6.664587497711182, "learning_rate": 2.6311111111111115e-06, "loss": 6.291679763793946, "step": 47905 }, { "epoch": 0.1781, "grad_norm": 5.711031436920166, "learning_rate": 2.630858585858586e-06, "loss": 6.288218688964844, "step": 47910 }, { "epoch": 0.17815, "grad_norm": 13.36122989654541, "learning_rate": 2.6306060606060608e-06, "loss": 6.329861450195312, "step": 47915 }, { "epoch": 0.1782, "grad_norm": 6.93322229385376, "learning_rate": 2.6303535353535354e-06, "loss": 6.292606353759766, "step": 47920 }, { "epoch": 0.17825, "grad_norm": 4.575633525848389, "learning_rate": 2.6301010101010105e-06, "loss": 6.467820739746093, "step": 47925 }, { "epoch": 0.1783, "grad_norm": 4.939698696136475, "learning_rate": 2.629848484848485e-06, "loss": 6.218785095214844, "step": 47930 }, { "epoch": 0.17835, "grad_norm": 5.220640182495117, "learning_rate": 2.6295959595959597e-06, "loss": 6.242573547363281, "step": 47935 }, { "epoch": 0.1784, "grad_norm": 4.879406452178955, "learning_rate": 2.6293434343434344e-06, "loss": 6.219522094726562, "step": 47940 }, { "epoch": 0.17845, "grad_norm": 5.922840118408203, "learning_rate": 2.6290909090909094e-06, "loss": 6.239421081542969, "step": 47945 }, { "epoch": 0.1785, "grad_norm": 8.302436828613281, "learning_rate": 2.628838383838384e-06, "loss": 6.240216064453125, "step": 47950 }, { "epoch": 0.17855, "grad_norm": 7.600788116455078, "learning_rate": 2.6285858585858587e-06, "loss": 6.28115234375, "step": 47955 }, { "epoch": 0.1786, "grad_norm": 5.259443283081055, "learning_rate": 2.6283333333333333e-06, "loss": 6.241807556152343, "step": 47960 }, { "epoch": 0.17865, "grad_norm": 5.600335597991943, "learning_rate": 2.6280808080808084e-06, "loss": 6.250723648071289, "step": 47965 }, { "epoch": 0.1787, "grad_norm": 10.571996688842773, "learning_rate": 2.627828282828283e-06, "loss": 6.307792282104492, "step": 47970 }, { "epoch": 0.17875, "grad_norm": 11.651870727539062, "learning_rate": 2.6275757575757576e-06, "loss": 6.31889762878418, "step": 47975 }, { "epoch": 0.1788, "grad_norm": 9.045377731323242, "learning_rate": 2.6273232323232322e-06, "loss": 6.292393493652344, "step": 47980 }, { "epoch": 0.17885, "grad_norm": 5.730259895324707, "learning_rate": 2.6270707070707073e-06, "loss": 6.235333251953125, "step": 47985 }, { "epoch": 0.1789, "grad_norm": 5.916414260864258, "learning_rate": 2.626818181818182e-06, "loss": 6.281492614746094, "step": 47990 }, { "epoch": 0.17895, "grad_norm": 7.37360143661499, "learning_rate": 2.6265656565656566e-06, "loss": 6.209481048583984, "step": 47995 }, { "epoch": 0.179, "grad_norm": 6.093311309814453, "learning_rate": 2.626313131313131e-06, "loss": 6.235608291625977, "step": 48000 }, { "epoch": 0.17905, "grad_norm": 5.324979782104492, "learning_rate": 2.6260606060606062e-06, "loss": 6.213168334960938, "step": 48005 }, { "epoch": 0.1791, "grad_norm": 9.054213523864746, "learning_rate": 2.625808080808081e-06, "loss": 6.274770736694336, "step": 48010 }, { "epoch": 0.17915, "grad_norm": 4.6502461433410645, "learning_rate": 2.6255555555555555e-06, "loss": 6.229012680053711, "step": 48015 }, { "epoch": 0.1792, "grad_norm": 7.449853420257568, "learning_rate": 2.62530303030303e-06, "loss": 6.2473796844482425, "step": 48020 }, { "epoch": 0.17925, "grad_norm": 7.092662811279297, "learning_rate": 2.6250505050505056e-06, "loss": 6.310564041137695, "step": 48025 }, { "epoch": 0.1793, "grad_norm": 6.1560587882995605, "learning_rate": 2.62479797979798e-06, "loss": 6.515811157226563, "step": 48030 }, { "epoch": 0.17935, "grad_norm": 8.912825584411621, "learning_rate": 2.6245454545454544e-06, "loss": 6.222552490234375, "step": 48035 }, { "epoch": 0.1794, "grad_norm": 6.4607930183410645, "learning_rate": 2.62429292929293e-06, "loss": 6.275106048583984, "step": 48040 }, { "epoch": 0.17945, "grad_norm": 8.48847484588623, "learning_rate": 2.6240404040404046e-06, "loss": 6.231845474243164, "step": 48045 }, { "epoch": 0.1795, "grad_norm": 11.00533676147461, "learning_rate": 2.623787878787879e-06, "loss": 6.283255767822266, "step": 48050 }, { "epoch": 0.17955, "grad_norm": 5.8433427810668945, "learning_rate": 2.623535353535354e-06, "loss": 6.317218017578125, "step": 48055 }, { "epoch": 0.1796, "grad_norm": 7.158637046813965, "learning_rate": 2.623282828282829e-06, "loss": 6.296661758422852, "step": 48060 }, { "epoch": 0.17965, "grad_norm": 6.191345691680908, "learning_rate": 2.6230303030303035e-06, "loss": 6.246175003051758, "step": 48065 }, { "epoch": 0.1797, "grad_norm": 5.713347434997559, "learning_rate": 2.622777777777778e-06, "loss": 6.280545043945312, "step": 48070 }, { "epoch": 0.17975, "grad_norm": 8.603072166442871, "learning_rate": 2.6225252525252528e-06, "loss": 6.317818069458008, "step": 48075 }, { "epoch": 0.1798, "grad_norm": 7.589648246765137, "learning_rate": 2.622272727272728e-06, "loss": 6.278591537475586, "step": 48080 }, { "epoch": 0.17985, "grad_norm": 6.885838508605957, "learning_rate": 2.6220202020202024e-06, "loss": 6.244407272338867, "step": 48085 }, { "epoch": 0.1799, "grad_norm": 6.983613014221191, "learning_rate": 2.621767676767677e-06, "loss": 6.284003829956054, "step": 48090 }, { "epoch": 0.17995, "grad_norm": 34.798831939697266, "learning_rate": 2.6215151515151517e-06, "loss": 6.452660369873047, "step": 48095 }, { "epoch": 0.18, "grad_norm": 9.109298706054688, "learning_rate": 2.6212626262626268e-06, "loss": 6.2943168640136715, "step": 48100 }, { "epoch": 0.18005, "grad_norm": 6.893949031829834, "learning_rate": 2.6210101010101014e-06, "loss": 6.289973449707031, "step": 48105 }, { "epoch": 0.1801, "grad_norm": 9.77614974975586, "learning_rate": 2.620757575757576e-06, "loss": 6.2471466064453125, "step": 48110 }, { "epoch": 0.18015, "grad_norm": 4.86616325378418, "learning_rate": 2.6205050505050506e-06, "loss": 6.267417907714844, "step": 48115 }, { "epoch": 0.1802, "grad_norm": 12.917620658874512, "learning_rate": 2.6202525252525257e-06, "loss": 6.612276458740235, "step": 48120 }, { "epoch": 0.18025, "grad_norm": 4.852736949920654, "learning_rate": 2.6200000000000003e-06, "loss": 6.272792053222656, "step": 48125 }, { "epoch": 0.1803, "grad_norm": 11.241585731506348, "learning_rate": 2.619747474747475e-06, "loss": 6.44803466796875, "step": 48130 }, { "epoch": 0.18035, "grad_norm": 65.67726135253906, "learning_rate": 2.6194949494949496e-06, "loss": 6.650630950927734, "step": 48135 }, { "epoch": 0.1804, "grad_norm": 46.76616668701172, "learning_rate": 2.6192424242424246e-06, "loss": 8.337892150878906, "step": 48140 }, { "epoch": 0.18045, "grad_norm": 17.641292572021484, "learning_rate": 2.6189898989898993e-06, "loss": 6.656027984619141, "step": 48145 }, { "epoch": 0.1805, "grad_norm": 10.723069190979004, "learning_rate": 2.618737373737374e-06, "loss": 6.307731246948242, "step": 48150 }, { "epoch": 0.18055, "grad_norm": 5.6367974281311035, "learning_rate": 2.6184848484848485e-06, "loss": 6.2702991485595705, "step": 48155 }, { "epoch": 0.1806, "grad_norm": 12.778246879577637, "learning_rate": 2.6182323232323236e-06, "loss": 6.255522918701172, "step": 48160 }, { "epoch": 0.18065, "grad_norm": 19.859859466552734, "learning_rate": 2.6179797979797982e-06, "loss": 6.6698249816894535, "step": 48165 }, { "epoch": 0.1807, "grad_norm": 6.035888671875, "learning_rate": 2.617727272727273e-06, "loss": 6.259794235229492, "step": 48170 }, { "epoch": 0.18075, "grad_norm": 6.837201118469238, "learning_rate": 2.6174747474747475e-06, "loss": 6.226263427734375, "step": 48175 }, { "epoch": 0.1808, "grad_norm": 4.683717727661133, "learning_rate": 2.6172222222222225e-06, "loss": 6.279007339477539, "step": 48180 }, { "epoch": 0.18085, "grad_norm": 7.638728141784668, "learning_rate": 2.616969696969697e-06, "loss": 6.294086074829101, "step": 48185 }, { "epoch": 0.1809, "grad_norm": 3.691983938217163, "learning_rate": 2.6167171717171718e-06, "loss": 6.351461791992188, "step": 48190 }, { "epoch": 0.18095, "grad_norm": 4.592685222625732, "learning_rate": 2.6164646464646464e-06, "loss": 6.273931503295898, "step": 48195 }, { "epoch": 0.181, "grad_norm": 9.297545433044434, "learning_rate": 2.6162121212121215e-06, "loss": 6.280540084838867, "step": 48200 }, { "epoch": 0.18105, "grad_norm": 4.770380973815918, "learning_rate": 2.615959595959596e-06, "loss": 6.232170104980469, "step": 48205 }, { "epoch": 0.1811, "grad_norm": 4.011338710784912, "learning_rate": 2.6157070707070707e-06, "loss": 6.283312225341797, "step": 48210 }, { "epoch": 0.18115, "grad_norm": 4.543745517730713, "learning_rate": 2.6154545454545454e-06, "loss": 6.274518585205078, "step": 48215 }, { "epoch": 0.1812, "grad_norm": 8.880348205566406, "learning_rate": 2.615202020202021e-06, "loss": 6.232537460327149, "step": 48220 }, { "epoch": 0.18125, "grad_norm": 5.506641387939453, "learning_rate": 2.614949494949495e-06, "loss": 6.287428665161133, "step": 48225 }, { "epoch": 0.1813, "grad_norm": 5.396836757659912, "learning_rate": 2.6146969696969697e-06, "loss": 6.248798751831055, "step": 48230 }, { "epoch": 0.18135, "grad_norm": 10.18353271484375, "learning_rate": 2.6144444444444443e-06, "loss": 6.225008010864258, "step": 48235 }, { "epoch": 0.1814, "grad_norm": 6.2977375984191895, "learning_rate": 2.6141919191919198e-06, "loss": 6.2637794494628904, "step": 48240 }, { "epoch": 0.18145, "grad_norm": 6.100552082061768, "learning_rate": 2.6139393939393944e-06, "loss": 6.2712348937988285, "step": 48245 }, { "epoch": 0.1815, "grad_norm": 10.218465805053711, "learning_rate": 2.6136868686868686e-06, "loss": 6.251220703125, "step": 48250 }, { "epoch": 0.18155, "grad_norm": 5.597082614898682, "learning_rate": 2.6134343434343433e-06, "loss": 6.24815673828125, "step": 48255 }, { "epoch": 0.1816, "grad_norm": 9.717144012451172, "learning_rate": 2.6131818181818187e-06, "loss": 6.350579452514649, "step": 48260 }, { "epoch": 0.18165, "grad_norm": 5.8371100425720215, "learning_rate": 2.6129292929292934e-06, "loss": 6.2830039978027346, "step": 48265 }, { "epoch": 0.1817, "grad_norm": 5.892655372619629, "learning_rate": 2.612676767676768e-06, "loss": 6.258025741577148, "step": 48270 }, { "epoch": 0.18175, "grad_norm": 4.0733466148376465, "learning_rate": 2.6124242424242426e-06, "loss": 6.2617546081542965, "step": 48275 }, { "epoch": 0.1818, "grad_norm": 3.7014617919921875, "learning_rate": 2.6121717171717177e-06, "loss": 6.281090545654297, "step": 48280 }, { "epoch": 0.18185, "grad_norm": 9.8246431350708, "learning_rate": 2.6119191919191923e-06, "loss": 6.260322189331054, "step": 48285 }, { "epoch": 0.1819, "grad_norm": 5.4751811027526855, "learning_rate": 2.611666666666667e-06, "loss": 6.244245910644532, "step": 48290 }, { "epoch": 0.18195, "grad_norm": 28.53630828857422, "learning_rate": 2.6114141414141416e-06, "loss": 6.1483509063720705, "step": 48295 }, { "epoch": 0.182, "grad_norm": 7.886487007141113, "learning_rate": 2.6111616161616166e-06, "loss": 6.288107681274414, "step": 48300 }, { "epoch": 0.18205, "grad_norm": 6.2996368408203125, "learning_rate": 2.6109090909090912e-06, "loss": 6.229745101928711, "step": 48305 }, { "epoch": 0.1821, "grad_norm": 6.60714054107666, "learning_rate": 2.610656565656566e-06, "loss": 6.129868698120117, "step": 48310 }, { "epoch": 0.18215, "grad_norm": 4.8839640617370605, "learning_rate": 2.6104040404040405e-06, "loss": 6.253520584106445, "step": 48315 }, { "epoch": 0.1822, "grad_norm": 3.882784366607666, "learning_rate": 2.6101515151515156e-06, "loss": 6.239481735229492, "step": 48320 }, { "epoch": 0.18225, "grad_norm": 6.165802955627441, "learning_rate": 2.60989898989899e-06, "loss": 6.2263236999511715, "step": 48325 }, { "epoch": 0.1823, "grad_norm": 25.807262420654297, "learning_rate": 2.609646464646465e-06, "loss": 6.718295288085938, "step": 48330 }, { "epoch": 0.18235, "grad_norm": 5.936855316162109, "learning_rate": 2.6093939393939394e-06, "loss": 6.258689880371094, "step": 48335 }, { "epoch": 0.1824, "grad_norm": 7.943792819976807, "learning_rate": 2.6091414141414145e-06, "loss": 6.272266006469726, "step": 48340 }, { "epoch": 0.18245, "grad_norm": 4.897097110748291, "learning_rate": 2.608888888888889e-06, "loss": 6.312567138671875, "step": 48345 }, { "epoch": 0.1825, "grad_norm": 4.8942646980285645, "learning_rate": 2.6086363636363638e-06, "loss": 6.213748168945313, "step": 48350 }, { "epoch": 0.18255, "grad_norm": 8.319792747497559, "learning_rate": 2.6083838383838384e-06, "loss": 6.236863327026367, "step": 48355 }, { "epoch": 0.1826, "grad_norm": 6.477847099304199, "learning_rate": 2.6081313131313134e-06, "loss": 6.227033996582032, "step": 48360 }, { "epoch": 0.18265, "grad_norm": 5.5258026123046875, "learning_rate": 2.607878787878788e-06, "loss": 6.288137817382813, "step": 48365 }, { "epoch": 0.1827, "grad_norm": 8.185297012329102, "learning_rate": 2.6076262626262627e-06, "loss": 6.278008270263672, "step": 48370 }, { "epoch": 0.18275, "grad_norm": 6.713562965393066, "learning_rate": 2.6073737373737373e-06, "loss": 6.2511962890625, "step": 48375 }, { "epoch": 0.1828, "grad_norm": 36.011634826660156, "learning_rate": 2.6071212121212124e-06, "loss": 6.288155364990234, "step": 48380 }, { "epoch": 0.18285, "grad_norm": 20.03618621826172, "learning_rate": 2.606868686868687e-06, "loss": 6.375435256958008, "step": 48385 }, { "epoch": 0.1829, "grad_norm": 8.57990837097168, "learning_rate": 2.6066161616161617e-06, "loss": 6.29247817993164, "step": 48390 }, { "epoch": 0.18295, "grad_norm": 4.2938761711120605, "learning_rate": 2.6063636363636363e-06, "loss": 6.230765151977539, "step": 48395 }, { "epoch": 0.183, "grad_norm": 7.590606212615967, "learning_rate": 2.6061111111111113e-06, "loss": 6.2109733581542965, "step": 48400 }, { "epoch": 0.18305, "grad_norm": 14.99771499633789, "learning_rate": 2.605858585858586e-06, "loss": 6.288570404052734, "step": 48405 }, { "epoch": 0.1831, "grad_norm": 7.39285945892334, "learning_rate": 2.6056060606060606e-06, "loss": 6.246884536743164, "step": 48410 }, { "epoch": 0.18315, "grad_norm": 8.00991439819336, "learning_rate": 2.6053535353535352e-06, "loss": 6.2502998352050785, "step": 48415 }, { "epoch": 0.1832, "grad_norm": 6.57605504989624, "learning_rate": 2.6051010101010103e-06, "loss": 6.153263092041016, "step": 48420 }, { "epoch": 0.18325, "grad_norm": 9.317729949951172, "learning_rate": 2.604848484848485e-06, "loss": 6.286091995239258, "step": 48425 }, { "epoch": 0.1833, "grad_norm": 6.233064651489258, "learning_rate": 2.6045959595959595e-06, "loss": 6.195788955688476, "step": 48430 }, { "epoch": 0.18335, "grad_norm": 5.572995185852051, "learning_rate": 2.604343434343434e-06, "loss": 6.2682548522949215, "step": 48435 }, { "epoch": 0.1834, "grad_norm": 12.352928161621094, "learning_rate": 2.6040909090909096e-06, "loss": 6.3266864776611325, "step": 48440 }, { "epoch": 0.18345, "grad_norm": 5.938622951507568, "learning_rate": 2.603838383838384e-06, "loss": 6.361819458007813, "step": 48445 }, { "epoch": 0.1835, "grad_norm": 13.396903038024902, "learning_rate": 2.6035858585858585e-06, "loss": 6.351467895507812, "step": 48450 }, { "epoch": 0.18355, "grad_norm": 6.301222324371338, "learning_rate": 2.603333333333334e-06, "loss": 6.244200134277344, "step": 48455 }, { "epoch": 0.1836, "grad_norm": 5.913710117340088, "learning_rate": 2.6030808080808086e-06, "loss": 6.262540435791015, "step": 48460 }, { "epoch": 0.18365, "grad_norm": 4.329402446746826, "learning_rate": 2.6028282828282832e-06, "loss": 6.223360443115235, "step": 48465 }, { "epoch": 0.1837, "grad_norm": 8.29628849029541, "learning_rate": 2.602575757575758e-06, "loss": 6.300723266601563, "step": 48470 }, { "epoch": 0.18375, "grad_norm": 24.32602882385254, "learning_rate": 2.602323232323233e-06, "loss": 6.20184326171875, "step": 48475 }, { "epoch": 0.1838, "grad_norm": 7.647665023803711, "learning_rate": 2.6020707070707075e-06, "loss": 6.253891372680664, "step": 48480 }, { "epoch": 0.18385, "grad_norm": 8.034605979919434, "learning_rate": 2.601818181818182e-06, "loss": 6.292695617675781, "step": 48485 }, { "epoch": 0.1839, "grad_norm": 4.2255635261535645, "learning_rate": 2.601565656565657e-06, "loss": 6.339519119262695, "step": 48490 }, { "epoch": 0.18395, "grad_norm": 5.973121166229248, "learning_rate": 2.601313131313132e-06, "loss": 6.2767284393310545, "step": 48495 }, { "epoch": 0.184, "grad_norm": 9.747757911682129, "learning_rate": 2.6010606060606065e-06, "loss": 6.285752487182617, "step": 48500 }, { "epoch": 0.18405, "grad_norm": 6.519881248474121, "learning_rate": 2.600808080808081e-06, "loss": 6.244038772583008, "step": 48505 }, { "epoch": 0.1841, "grad_norm": 8.095598220825195, "learning_rate": 2.6005555555555557e-06, "loss": 6.23590316772461, "step": 48510 }, { "epoch": 0.18415, "grad_norm": 5.66994571685791, "learning_rate": 2.600303030303031e-06, "loss": 6.281515121459961, "step": 48515 }, { "epoch": 0.1842, "grad_norm": 7.038925647735596, "learning_rate": 2.6000505050505054e-06, "loss": 6.282519912719726, "step": 48520 }, { "epoch": 0.18425, "grad_norm": 9.214323043823242, "learning_rate": 2.59979797979798e-06, "loss": 6.27685317993164, "step": 48525 }, { "epoch": 0.1843, "grad_norm": 6.425102233886719, "learning_rate": 2.5995454545454547e-06, "loss": 6.279011917114258, "step": 48530 }, { "epoch": 0.18435, "grad_norm": 8.823031425476074, "learning_rate": 2.5992929292929297e-06, "loss": 6.205877685546875, "step": 48535 }, { "epoch": 0.1844, "grad_norm": 6.056334972381592, "learning_rate": 2.5990404040404044e-06, "loss": 6.2910816192626955, "step": 48540 }, { "epoch": 0.18445, "grad_norm": 5.318397045135498, "learning_rate": 2.598787878787879e-06, "loss": 6.268881225585938, "step": 48545 }, { "epoch": 0.1845, "grad_norm": 16.25691032409668, "learning_rate": 2.5985353535353536e-06, "loss": 6.356953811645508, "step": 48550 }, { "epoch": 0.18455, "grad_norm": 6.1977739334106445, "learning_rate": 2.5982828282828287e-06, "loss": 6.282588958740234, "step": 48555 }, { "epoch": 0.1846, "grad_norm": 5.8424506187438965, "learning_rate": 2.5980303030303033e-06, "loss": 6.255816268920898, "step": 48560 }, { "epoch": 0.18465, "grad_norm": 3.963996171951294, "learning_rate": 2.597777777777778e-06, "loss": 6.286187744140625, "step": 48565 }, { "epoch": 0.1847, "grad_norm": 43.519737243652344, "learning_rate": 2.5975252525252526e-06, "loss": 6.266797637939453, "step": 48570 }, { "epoch": 0.18475, "grad_norm": 8.03644847869873, "learning_rate": 2.5972727272727276e-06, "loss": 6.253041076660156, "step": 48575 }, { "epoch": 0.1848, "grad_norm": 4.326549053192139, "learning_rate": 2.5970202020202023e-06, "loss": 6.205309295654297, "step": 48580 }, { "epoch": 0.18485, "grad_norm": 6.785407543182373, "learning_rate": 2.596767676767677e-06, "loss": 6.296359634399414, "step": 48585 }, { "epoch": 0.1849, "grad_norm": 6.993212699890137, "learning_rate": 2.5965151515151515e-06, "loss": 6.291777038574219, "step": 48590 }, { "epoch": 0.18495, "grad_norm": 6.141006946563721, "learning_rate": 2.5962626262626266e-06, "loss": 6.242097473144531, "step": 48595 }, { "epoch": 0.185, "grad_norm": 7.309471130371094, "learning_rate": 2.596010101010101e-06, "loss": 6.263505554199218, "step": 48600 }, { "epoch": 0.18505, "grad_norm": 4.708620548248291, "learning_rate": 2.595757575757576e-06, "loss": 6.265798950195313, "step": 48605 }, { "epoch": 0.1851, "grad_norm": 18.27180290222168, "learning_rate": 2.5955050505050505e-06, "loss": 6.3953899383544925, "step": 48610 }, { "epoch": 0.18515, "grad_norm": 4.59319543838501, "learning_rate": 2.5952525252525255e-06, "loss": 6.281650543212891, "step": 48615 }, { "epoch": 0.1852, "grad_norm": 3.920062303543091, "learning_rate": 2.595e-06, "loss": 6.2684684753417965, "step": 48620 }, { "epoch": 0.18525, "grad_norm": 15.268338203430176, "learning_rate": 2.5947474747474748e-06, "loss": 6.351996612548828, "step": 48625 }, { "epoch": 0.1853, "grad_norm": 9.803231239318848, "learning_rate": 2.5944949494949494e-06, "loss": 6.17854118347168, "step": 48630 }, { "epoch": 0.18535, "grad_norm": 6.423607349395752, "learning_rate": 2.594242424242425e-06, "loss": 6.273408126831055, "step": 48635 }, { "epoch": 0.1854, "grad_norm": 8.787635803222656, "learning_rate": 2.593989898989899e-06, "loss": 6.407449340820312, "step": 48640 }, { "epoch": 0.18545, "grad_norm": 6.539122581481934, "learning_rate": 2.5937373737373737e-06, "loss": 6.236711883544922, "step": 48645 }, { "epoch": 0.1855, "grad_norm": 25.96674919128418, "learning_rate": 2.5934848484848483e-06, "loss": 6.244915771484375, "step": 48650 }, { "epoch": 0.18555, "grad_norm": 4.660292148590088, "learning_rate": 2.593232323232324e-06, "loss": 6.27751579284668, "step": 48655 }, { "epoch": 0.1856, "grad_norm": 4.730471611022949, "learning_rate": 2.5929797979797985e-06, "loss": 6.319283294677734, "step": 48660 }, { "epoch": 0.18565, "grad_norm": 11.085516929626465, "learning_rate": 2.5927272727272727e-06, "loss": 6.183718872070313, "step": 48665 }, { "epoch": 0.1857, "grad_norm": 4.9343390464782715, "learning_rate": 2.5924747474747473e-06, "loss": 6.312179946899414, "step": 48670 }, { "epoch": 0.18575, "grad_norm": 5.6558685302734375, "learning_rate": 2.5922222222222228e-06, "loss": 6.224059677124023, "step": 48675 }, { "epoch": 0.1858, "grad_norm": 7.1425933837890625, "learning_rate": 2.5919696969696974e-06, "loss": 6.267584991455078, "step": 48680 }, { "epoch": 0.18585, "grad_norm": 6.397035121917725, "learning_rate": 2.591717171717172e-06, "loss": 6.314906311035156, "step": 48685 }, { "epoch": 0.1859, "grad_norm": 13.54969596862793, "learning_rate": 2.5914646464646467e-06, "loss": 6.364657974243164, "step": 48690 }, { "epoch": 0.18595, "grad_norm": 9.52370548248291, "learning_rate": 2.5912121212121217e-06, "loss": 6.350161361694336, "step": 48695 }, { "epoch": 0.186, "grad_norm": 4.5291595458984375, "learning_rate": 2.5909595959595963e-06, "loss": 6.2265159606933596, "step": 48700 }, { "epoch": 0.18605, "grad_norm": 9.520949363708496, "learning_rate": 2.590707070707071e-06, "loss": 6.258673477172851, "step": 48705 }, { "epoch": 0.1861, "grad_norm": 5.476080894470215, "learning_rate": 2.5904545454545456e-06, "loss": 6.296039962768555, "step": 48710 }, { "epoch": 0.18615, "grad_norm": 3.8783457279205322, "learning_rate": 2.5902020202020207e-06, "loss": 6.295009994506836, "step": 48715 }, { "epoch": 0.1862, "grad_norm": 4.13353967666626, "learning_rate": 2.5899494949494953e-06, "loss": 6.317186737060547, "step": 48720 }, { "epoch": 0.18625, "grad_norm": 6.12823486328125, "learning_rate": 2.58969696969697e-06, "loss": 6.2579090118408205, "step": 48725 }, { "epoch": 0.1863, "grad_norm": 4.812850475311279, "learning_rate": 2.5894444444444445e-06, "loss": 6.283192443847656, "step": 48730 }, { "epoch": 0.18635, "grad_norm": 10.12570571899414, "learning_rate": 2.5891919191919196e-06, "loss": 6.299571990966797, "step": 48735 }, { "epoch": 0.1864, "grad_norm": 4.629065990447998, "learning_rate": 2.5889393939393942e-06, "loss": 6.2183067321777346, "step": 48740 }, { "epoch": 0.18645, "grad_norm": 6.286792755126953, "learning_rate": 2.588686868686869e-06, "loss": 6.324052047729492, "step": 48745 }, { "epoch": 0.1865, "grad_norm": 4.833723068237305, "learning_rate": 2.5884343434343435e-06, "loss": 6.2407478332519535, "step": 48750 }, { "epoch": 0.18655, "grad_norm": 7.813973903656006, "learning_rate": 2.5881818181818185e-06, "loss": 6.248383331298828, "step": 48755 }, { "epoch": 0.1866, "grad_norm": 6.6759934425354, "learning_rate": 2.587929292929293e-06, "loss": 6.267718505859375, "step": 48760 }, { "epoch": 0.18665, "grad_norm": 5.854223728179932, "learning_rate": 2.587676767676768e-06, "loss": 6.195526123046875, "step": 48765 }, { "epoch": 0.1867, "grad_norm": 7.827494144439697, "learning_rate": 2.5874242424242424e-06, "loss": 6.186255264282226, "step": 48770 }, { "epoch": 0.18675, "grad_norm": 8.332564353942871, "learning_rate": 2.5871717171717175e-06, "loss": 6.27496109008789, "step": 48775 }, { "epoch": 0.1868, "grad_norm": 7.343414783477783, "learning_rate": 2.586919191919192e-06, "loss": 6.236605072021485, "step": 48780 }, { "epoch": 0.18685, "grad_norm": 3.538496732711792, "learning_rate": 2.5866666666666667e-06, "loss": 6.246809387207032, "step": 48785 }, { "epoch": 0.1869, "grad_norm": 5.750917911529541, "learning_rate": 2.5864141414141414e-06, "loss": 6.2322853088378904, "step": 48790 }, { "epoch": 0.18695, "grad_norm": 29.018627166748047, "learning_rate": 2.5861616161616164e-06, "loss": 6.637998962402344, "step": 48795 }, { "epoch": 0.187, "grad_norm": 38.493988037109375, "learning_rate": 2.585909090909091e-06, "loss": 6.694502258300782, "step": 48800 }, { "epoch": 0.18705, "grad_norm": 28.788293838500977, "learning_rate": 2.5856565656565657e-06, "loss": 6.569867706298828, "step": 48805 }, { "epoch": 0.1871, "grad_norm": 9.71681022644043, "learning_rate": 2.5854040404040403e-06, "loss": 6.4670654296875, "step": 48810 }, { "epoch": 0.18715, "grad_norm": 9.823171615600586, "learning_rate": 2.5851515151515154e-06, "loss": 6.251391220092773, "step": 48815 }, { "epoch": 0.1872, "grad_norm": 5.7379865646362305, "learning_rate": 2.58489898989899e-06, "loss": 6.204817199707032, "step": 48820 }, { "epoch": 0.18725, "grad_norm": 7.781379222869873, "learning_rate": 2.5846464646464646e-06, "loss": 6.227052307128906, "step": 48825 }, { "epoch": 0.1873, "grad_norm": 4.968623161315918, "learning_rate": 2.5843939393939393e-06, "loss": 6.318147659301758, "step": 48830 }, { "epoch": 0.18735, "grad_norm": 4.763797760009766, "learning_rate": 2.5841414141414143e-06, "loss": 6.189499282836914, "step": 48835 }, { "epoch": 0.1874, "grad_norm": 4.019498825073242, "learning_rate": 2.583888888888889e-06, "loss": 6.268447494506836, "step": 48840 }, { "epoch": 0.18745, "grad_norm": 6.574753284454346, "learning_rate": 2.5836363636363636e-06, "loss": 6.249187850952149, "step": 48845 }, { "epoch": 0.1875, "grad_norm": 6.905210971832275, "learning_rate": 2.583383838383838e-06, "loss": 6.297005462646484, "step": 48850 }, { "epoch": 0.18755, "grad_norm": 8.396756172180176, "learning_rate": 2.5831313131313137e-06, "loss": 6.25494613647461, "step": 48855 }, { "epoch": 0.1876, "grad_norm": 7.540562152862549, "learning_rate": 2.582878787878788e-06, "loss": 6.191415405273437, "step": 48860 }, { "epoch": 0.18765, "grad_norm": 5.578557014465332, "learning_rate": 2.5826262626262625e-06, "loss": 6.262314987182617, "step": 48865 }, { "epoch": 0.1877, "grad_norm": 5.299802303314209, "learning_rate": 2.582373737373737e-06, "loss": 6.259005737304688, "step": 48870 }, { "epoch": 0.18775, "grad_norm": 6.532639503479004, "learning_rate": 2.5821212121212126e-06, "loss": 6.274296569824219, "step": 48875 }, { "epoch": 0.1878, "grad_norm": 11.265859603881836, "learning_rate": 2.5818686868686873e-06, "loss": 6.2865955352783205, "step": 48880 }, { "epoch": 0.18785, "grad_norm": 5.160705089569092, "learning_rate": 2.581616161616162e-06, "loss": 6.209521102905273, "step": 48885 }, { "epoch": 0.1879, "grad_norm": 4.39208459854126, "learning_rate": 2.581363636363637e-06, "loss": 6.231343841552734, "step": 48890 }, { "epoch": 0.18795, "grad_norm": 7.619685173034668, "learning_rate": 2.5811111111111116e-06, "loss": 6.267190551757812, "step": 48895 }, { "epoch": 0.188, "grad_norm": 9.34493350982666, "learning_rate": 2.580858585858586e-06, "loss": 6.260499572753906, "step": 48900 }, { "epoch": 0.18805, "grad_norm": 10.23723316192627, "learning_rate": 2.580606060606061e-06, "loss": 6.2354591369628904, "step": 48905 }, { "epoch": 0.1881, "grad_norm": 14.397795677185059, "learning_rate": 2.580353535353536e-06, "loss": 6.309638977050781, "step": 48910 }, { "epoch": 0.18815, "grad_norm": 4.888641834259033, "learning_rate": 2.5801010101010105e-06, "loss": 6.306062316894531, "step": 48915 }, { "epoch": 0.1882, "grad_norm": 5.830529689788818, "learning_rate": 2.579848484848485e-06, "loss": 6.29931869506836, "step": 48920 }, { "epoch": 0.18825, "grad_norm": 9.750974655151367, "learning_rate": 2.5795959595959598e-06, "loss": 6.272991180419922, "step": 48925 }, { "epoch": 0.1883, "grad_norm": 11.02366828918457, "learning_rate": 2.579343434343435e-06, "loss": 6.348286437988281, "step": 48930 }, { "epoch": 0.18835, "grad_norm": 8.45097541809082, "learning_rate": 2.5790909090909095e-06, "loss": 6.479514312744141, "step": 48935 }, { "epoch": 0.1884, "grad_norm": 6.713593482971191, "learning_rate": 2.578838383838384e-06, "loss": 6.27366943359375, "step": 48940 }, { "epoch": 0.18845, "grad_norm": 12.236699104309082, "learning_rate": 2.5785858585858587e-06, "loss": 6.16339111328125, "step": 48945 }, { "epoch": 0.1885, "grad_norm": 5.614632606506348, "learning_rate": 2.5783333333333338e-06, "loss": 6.242253112792969, "step": 48950 }, { "epoch": 0.18855, "grad_norm": 5.477415084838867, "learning_rate": 2.5780808080808084e-06, "loss": 6.264701080322266, "step": 48955 }, { "epoch": 0.1886, "grad_norm": 8.267935752868652, "learning_rate": 2.577828282828283e-06, "loss": 6.219635391235352, "step": 48960 }, { "epoch": 0.18865, "grad_norm": 11.048696517944336, "learning_rate": 2.5775757575757577e-06, "loss": 6.2633613586425785, "step": 48965 }, { "epoch": 0.1887, "grad_norm": 6.541975021362305, "learning_rate": 2.5773232323232327e-06, "loss": 6.261846542358398, "step": 48970 }, { "epoch": 0.18875, "grad_norm": 4.8001275062561035, "learning_rate": 2.5770707070707074e-06, "loss": 6.269416046142578, "step": 48975 }, { "epoch": 0.1888, "grad_norm": 10.730286598205566, "learning_rate": 2.576818181818182e-06, "loss": 6.248748016357422, "step": 48980 }, { "epoch": 0.18885, "grad_norm": 3.4705069065093994, "learning_rate": 2.5765656565656566e-06, "loss": 6.3053031921386715, "step": 48985 }, { "epoch": 0.1889, "grad_norm": 11.79560375213623, "learning_rate": 2.5763131313131317e-06, "loss": 6.349605941772461, "step": 48990 }, { "epoch": 0.18895, "grad_norm": 6.676380634307861, "learning_rate": 2.5760606060606063e-06, "loss": 6.269178009033203, "step": 48995 }, { "epoch": 0.189, "grad_norm": 5.1521782875061035, "learning_rate": 2.575808080808081e-06, "loss": 6.194595718383789, "step": 49000 }, { "epoch": 0.18905, "grad_norm": 6.293644428253174, "learning_rate": 2.5755555555555556e-06, "loss": 6.325164031982422, "step": 49005 }, { "epoch": 0.1891, "grad_norm": 4.515174388885498, "learning_rate": 2.5753030303030306e-06, "loss": 6.217018508911133, "step": 49010 }, { "epoch": 0.18915, "grad_norm": 15.09038257598877, "learning_rate": 2.5750505050505052e-06, "loss": 6.277073669433594, "step": 49015 }, { "epoch": 0.1892, "grad_norm": 4.879905700683594, "learning_rate": 2.57479797979798e-06, "loss": 6.241439437866211, "step": 49020 }, { "epoch": 0.18925, "grad_norm": 9.150812149047852, "learning_rate": 2.5745454545454545e-06, "loss": 6.193593597412109, "step": 49025 }, { "epoch": 0.1893, "grad_norm": 14.173181533813477, "learning_rate": 2.5742929292929296e-06, "loss": 6.223388671875, "step": 49030 }, { "epoch": 0.18935, "grad_norm": 14.158151626586914, "learning_rate": 2.574040404040404e-06, "loss": 6.228081893920899, "step": 49035 }, { "epoch": 0.1894, "grad_norm": 5.278214931488037, "learning_rate": 2.573787878787879e-06, "loss": 6.290943908691406, "step": 49040 }, { "epoch": 0.18945, "grad_norm": 6.5858259201049805, "learning_rate": 2.5735353535353534e-06, "loss": 6.433800506591797, "step": 49045 }, { "epoch": 0.1895, "grad_norm": 4.420543193817139, "learning_rate": 2.573282828282829e-06, "loss": 6.27868881225586, "step": 49050 }, { "epoch": 0.18955, "grad_norm": 20.557491302490234, "learning_rate": 2.573030303030303e-06, "loss": 6.372946548461914, "step": 49055 }, { "epoch": 0.1896, "grad_norm": 5.960613250732422, "learning_rate": 2.5727777777777778e-06, "loss": 6.292752075195312, "step": 49060 }, { "epoch": 0.18965, "grad_norm": 5.148416996002197, "learning_rate": 2.5725252525252524e-06, "loss": 6.270134735107422, "step": 49065 }, { "epoch": 0.1897, "grad_norm": 4.971664905548096, "learning_rate": 2.572272727272728e-06, "loss": 6.211388015747071, "step": 49070 }, { "epoch": 0.18975, "grad_norm": 6.771578788757324, "learning_rate": 2.5720202020202025e-06, "loss": 6.269935226440429, "step": 49075 }, { "epoch": 0.1898, "grad_norm": 7.280397415161133, "learning_rate": 2.571767676767677e-06, "loss": 6.2415016174316404, "step": 49080 }, { "epoch": 0.18985, "grad_norm": 5.153993606567383, "learning_rate": 2.5715151515151513e-06, "loss": 6.25068244934082, "step": 49085 }, { "epoch": 0.1899, "grad_norm": 5.687320709228516, "learning_rate": 2.571262626262627e-06, "loss": 6.194783020019531, "step": 49090 }, { "epoch": 0.18995, "grad_norm": 9.53076171875, "learning_rate": 2.5710101010101014e-06, "loss": 6.244087982177734, "step": 49095 }, { "epoch": 0.19, "grad_norm": 11.363621711730957, "learning_rate": 2.570757575757576e-06, "loss": 6.267411804199218, "step": 49100 }, { "epoch": 0.19005, "grad_norm": 7.7662129402160645, "learning_rate": 2.5705050505050507e-06, "loss": 6.237572097778321, "step": 49105 }, { "epoch": 0.1901, "grad_norm": 4.544632911682129, "learning_rate": 2.5702525252525258e-06, "loss": 6.2455902099609375, "step": 49110 }, { "epoch": 0.19015, "grad_norm": 6.281473636627197, "learning_rate": 2.5700000000000004e-06, "loss": 6.268207550048828, "step": 49115 }, { "epoch": 0.1902, "grad_norm": 8.69561767578125, "learning_rate": 2.569747474747475e-06, "loss": 6.330655288696289, "step": 49120 }, { "epoch": 0.19025, "grad_norm": 8.793510437011719, "learning_rate": 2.5694949494949496e-06, "loss": 6.302120971679687, "step": 49125 }, { "epoch": 0.1903, "grad_norm": 4.904294967651367, "learning_rate": 2.5692424242424247e-06, "loss": 6.290804672241211, "step": 49130 }, { "epoch": 0.19035, "grad_norm": 10.867995262145996, "learning_rate": 2.5689898989898993e-06, "loss": 6.2886394500732425, "step": 49135 }, { "epoch": 0.1904, "grad_norm": 10.549060821533203, "learning_rate": 2.568737373737374e-06, "loss": 6.656362915039063, "step": 49140 }, { "epoch": 0.19045, "grad_norm": 6.009684085845947, "learning_rate": 2.5684848484848486e-06, "loss": 6.515086364746094, "step": 49145 }, { "epoch": 0.1905, "grad_norm": 4.500693321228027, "learning_rate": 2.5682323232323236e-06, "loss": 6.407121276855468, "step": 49150 }, { "epoch": 0.19055, "grad_norm": 8.462690353393555, "learning_rate": 2.5679797979797983e-06, "loss": 6.2667686462402346, "step": 49155 }, { "epoch": 0.1906, "grad_norm": 8.697178840637207, "learning_rate": 2.567727272727273e-06, "loss": 6.270337677001953, "step": 49160 }, { "epoch": 0.19065, "grad_norm": 4.600515365600586, "learning_rate": 2.5674747474747475e-06, "loss": 6.259711456298828, "step": 49165 }, { "epoch": 0.1907, "grad_norm": 5.580930709838867, "learning_rate": 2.5672222222222226e-06, "loss": 6.185636520385742, "step": 49170 }, { "epoch": 0.19075, "grad_norm": 6.207088470458984, "learning_rate": 2.5669696969696972e-06, "loss": 6.266794586181641, "step": 49175 }, { "epoch": 0.1908, "grad_norm": 8.738142013549805, "learning_rate": 2.566717171717172e-06, "loss": 6.285282897949219, "step": 49180 }, { "epoch": 0.19085, "grad_norm": 5.864974498748779, "learning_rate": 2.5664646464646465e-06, "loss": 6.264548110961914, "step": 49185 }, { "epoch": 0.1909, "grad_norm": 5.345311641693115, "learning_rate": 2.5662121212121215e-06, "loss": 6.2092643737792965, "step": 49190 }, { "epoch": 0.19095, "grad_norm": 3.3796541690826416, "learning_rate": 2.565959595959596e-06, "loss": 6.170139312744141, "step": 49195 }, { "epoch": 0.191, "grad_norm": 4.290106773376465, "learning_rate": 2.5657070707070708e-06, "loss": 6.330483245849609, "step": 49200 }, { "epoch": 0.19105, "grad_norm": 4.907315731048584, "learning_rate": 2.5654545454545454e-06, "loss": 6.236188125610352, "step": 49205 }, { "epoch": 0.1911, "grad_norm": 8.493396759033203, "learning_rate": 2.5652020202020205e-06, "loss": 6.247052764892578, "step": 49210 }, { "epoch": 0.19115, "grad_norm": 6.215777397155762, "learning_rate": 2.564949494949495e-06, "loss": 6.245053482055664, "step": 49215 }, { "epoch": 0.1912, "grad_norm": 5.318109035491943, "learning_rate": 2.5646969696969697e-06, "loss": 6.218050384521485, "step": 49220 }, { "epoch": 0.19125, "grad_norm": 5.583347320556641, "learning_rate": 2.5644444444444444e-06, "loss": 6.335743713378906, "step": 49225 }, { "epoch": 0.1913, "grad_norm": 5.131632328033447, "learning_rate": 2.5641919191919194e-06, "loss": 6.503478240966797, "step": 49230 }, { "epoch": 0.19135, "grad_norm": 11.268439292907715, "learning_rate": 2.563939393939394e-06, "loss": 6.234709167480469, "step": 49235 }, { "epoch": 0.1914, "grad_norm": 19.993053436279297, "learning_rate": 2.5636868686868687e-06, "loss": 6.189196014404297, "step": 49240 }, { "epoch": 0.19145, "grad_norm": 3.650047779083252, "learning_rate": 2.5634343434343433e-06, "loss": 6.158535003662109, "step": 49245 }, { "epoch": 0.1915, "grad_norm": 10.079051971435547, "learning_rate": 2.5631818181818184e-06, "loss": 6.279978561401367, "step": 49250 }, { "epoch": 0.19155, "grad_norm": 7.439229965209961, "learning_rate": 2.562929292929293e-06, "loss": 6.2404930114746096, "step": 49255 }, { "epoch": 0.1916, "grad_norm": 6.238080024719238, "learning_rate": 2.5626767676767676e-06, "loss": 6.264157485961914, "step": 49260 }, { "epoch": 0.19165, "grad_norm": 10.82448959350586, "learning_rate": 2.5624242424242422e-06, "loss": 6.33929214477539, "step": 49265 }, { "epoch": 0.1917, "grad_norm": 3.8104515075683594, "learning_rate": 2.5621717171717177e-06, "loss": 6.252438354492187, "step": 49270 }, { "epoch": 0.19175, "grad_norm": 5.194665431976318, "learning_rate": 2.561919191919192e-06, "loss": 6.245418930053711, "step": 49275 }, { "epoch": 0.1918, "grad_norm": 4.629490852355957, "learning_rate": 2.5616666666666666e-06, "loss": 6.360482406616211, "step": 49280 }, { "epoch": 0.19185, "grad_norm": 6.012112140655518, "learning_rate": 2.561414141414141e-06, "loss": 6.233753204345703, "step": 49285 }, { "epoch": 0.1919, "grad_norm": 7.1700897216796875, "learning_rate": 2.5611616161616167e-06, "loss": 6.298054504394531, "step": 49290 }, { "epoch": 0.19195, "grad_norm": 3.7842917442321777, "learning_rate": 2.5609090909090913e-06, "loss": 6.315502548217774, "step": 49295 }, { "epoch": 0.192, "grad_norm": 5.015597820281982, "learning_rate": 2.560656565656566e-06, "loss": 6.244719696044922, "step": 49300 }, { "epoch": 0.19205, "grad_norm": 5.807133197784424, "learning_rate": 2.56040404040404e-06, "loss": 6.268914794921875, "step": 49305 }, { "epoch": 0.1921, "grad_norm": 8.493719100952148, "learning_rate": 2.5601515151515156e-06, "loss": 6.224559020996094, "step": 49310 }, { "epoch": 0.19215, "grad_norm": 10.17265796661377, "learning_rate": 2.5598989898989902e-06, "loss": 6.392619705200195, "step": 49315 }, { "epoch": 0.1922, "grad_norm": 4.521240234375, "learning_rate": 2.559646464646465e-06, "loss": 6.198482513427734, "step": 49320 }, { "epoch": 0.19225, "grad_norm": 23.955596923828125, "learning_rate": 2.55939393939394e-06, "loss": 6.194322967529297, "step": 49325 }, { "epoch": 0.1923, "grad_norm": 4.718942165374756, "learning_rate": 2.5591414141414146e-06, "loss": 6.292372512817383, "step": 49330 }, { "epoch": 0.19235, "grad_norm": 5.672336101531982, "learning_rate": 2.558888888888889e-06, "loss": 6.235918426513672, "step": 49335 }, { "epoch": 0.1924, "grad_norm": 14.888675689697266, "learning_rate": 2.558636363636364e-06, "loss": 6.3594318389892575, "step": 49340 }, { "epoch": 0.19245, "grad_norm": 5.659474849700928, "learning_rate": 2.558383838383839e-06, "loss": 6.490545654296875, "step": 49345 }, { "epoch": 0.1925, "grad_norm": 5.204984664916992, "learning_rate": 2.5581313131313135e-06, "loss": 6.255197525024414, "step": 49350 }, { "epoch": 0.19255, "grad_norm": 6.772719860076904, "learning_rate": 2.557878787878788e-06, "loss": 6.287393188476562, "step": 49355 }, { "epoch": 0.1926, "grad_norm": 8.988268852233887, "learning_rate": 2.5576262626262628e-06, "loss": 6.2579387664794925, "step": 49360 }, { "epoch": 0.19265, "grad_norm": 5.236077785491943, "learning_rate": 2.557373737373738e-06, "loss": 6.256295013427734, "step": 49365 }, { "epoch": 0.1927, "grad_norm": 12.332254409790039, "learning_rate": 2.5571212121212124e-06, "loss": 6.248375320434571, "step": 49370 }, { "epoch": 0.19275, "grad_norm": 7.010597229003906, "learning_rate": 2.556868686868687e-06, "loss": 6.2750202178955075, "step": 49375 }, { "epoch": 0.1928, "grad_norm": 4.9375481605529785, "learning_rate": 2.5566161616161617e-06, "loss": 6.319250106811523, "step": 49380 }, { "epoch": 0.19285, "grad_norm": 15.469962120056152, "learning_rate": 2.5563636363636368e-06, "loss": 6.384063720703125, "step": 49385 }, { "epoch": 0.1929, "grad_norm": 7.703574180603027, "learning_rate": 2.5561111111111114e-06, "loss": 6.241492080688476, "step": 49390 }, { "epoch": 0.19295, "grad_norm": 4.531920909881592, "learning_rate": 2.555858585858586e-06, "loss": 6.29424819946289, "step": 49395 }, { "epoch": 0.193, "grad_norm": 11.125548362731934, "learning_rate": 2.5556060606060607e-06, "loss": 6.317346572875977, "step": 49400 }, { "epoch": 0.19305, "grad_norm": 6.369386672973633, "learning_rate": 2.5553535353535357e-06, "loss": 6.289251327514648, "step": 49405 }, { "epoch": 0.1931, "grad_norm": 7.824663162231445, "learning_rate": 2.5551010101010103e-06, "loss": 6.236985778808593, "step": 49410 }, { "epoch": 0.19315, "grad_norm": 5.468588829040527, "learning_rate": 2.554848484848485e-06, "loss": 6.2594562530517575, "step": 49415 }, { "epoch": 0.1932, "grad_norm": 7.105429172515869, "learning_rate": 2.5545959595959596e-06, "loss": 6.276838302612305, "step": 49420 }, { "epoch": 0.19325, "grad_norm": 4.1598005294799805, "learning_rate": 2.5543434343434346e-06, "loss": 6.246601867675781, "step": 49425 }, { "epoch": 0.1933, "grad_norm": 5.277898788452148, "learning_rate": 2.5540909090909093e-06, "loss": 6.2564445495605465, "step": 49430 }, { "epoch": 0.19335, "grad_norm": 5.050108909606934, "learning_rate": 2.553838383838384e-06, "loss": 6.229592514038086, "step": 49435 }, { "epoch": 0.1934, "grad_norm": 8.038198471069336, "learning_rate": 2.5535858585858585e-06, "loss": 6.296820831298828, "step": 49440 }, { "epoch": 0.19345, "grad_norm": 8.222665786743164, "learning_rate": 2.5533333333333336e-06, "loss": 6.2468517303466795, "step": 49445 }, { "epoch": 0.1935, "grad_norm": 3.33221435546875, "learning_rate": 2.5530808080808082e-06, "loss": 6.259613037109375, "step": 49450 }, { "epoch": 0.19355, "grad_norm": 6.035661697387695, "learning_rate": 2.552828282828283e-06, "loss": 6.253533935546875, "step": 49455 }, { "epoch": 0.1936, "grad_norm": 7.573888301849365, "learning_rate": 2.5525757575757575e-06, "loss": 6.274067306518555, "step": 49460 }, { "epoch": 0.19365, "grad_norm": 38.818241119384766, "learning_rate": 2.552323232323233e-06, "loss": 6.169465637207031, "step": 49465 }, { "epoch": 0.1937, "grad_norm": 9.56222152709961, "learning_rate": 2.552070707070707e-06, "loss": 6.040264892578125, "step": 49470 }, { "epoch": 0.19375, "grad_norm": 5.517727851867676, "learning_rate": 2.551818181818182e-06, "loss": 6.2547859191894535, "step": 49475 }, { "epoch": 0.1938, "grad_norm": 6.0000834465026855, "learning_rate": 2.5515656565656564e-06, "loss": 6.285161972045898, "step": 49480 }, { "epoch": 0.19385, "grad_norm": 4.462721824645996, "learning_rate": 2.551313131313132e-06, "loss": 6.353302001953125, "step": 49485 }, { "epoch": 0.1939, "grad_norm": 4.639883518218994, "learning_rate": 2.5510606060606065e-06, "loss": 6.252019882202148, "step": 49490 }, { "epoch": 0.19395, "grad_norm": 9.594871520996094, "learning_rate": 2.550808080808081e-06, "loss": 6.275057601928711, "step": 49495 }, { "epoch": 0.194, "grad_norm": 6.619569778442383, "learning_rate": 2.5505555555555554e-06, "loss": 6.288486480712891, "step": 49500 }, { "epoch": 0.19405, "grad_norm": 7.626273155212402, "learning_rate": 2.550303030303031e-06, "loss": 6.259918975830078, "step": 49505 }, { "epoch": 0.1941, "grad_norm": 5.342676162719727, "learning_rate": 2.5500505050505055e-06, "loss": 6.2594043731689455, "step": 49510 }, { "epoch": 0.19415, "grad_norm": 5.9844560623168945, "learning_rate": 2.54979797979798e-06, "loss": 6.283139419555664, "step": 49515 }, { "epoch": 0.1942, "grad_norm": 7.267798900604248, "learning_rate": 2.5495454545454547e-06, "loss": 6.290557098388672, "step": 49520 }, { "epoch": 0.19425, "grad_norm": 13.667284965515137, "learning_rate": 2.54929292929293e-06, "loss": 6.206590270996093, "step": 49525 }, { "epoch": 0.1943, "grad_norm": 6.455989837646484, "learning_rate": 2.5490404040404044e-06, "loss": 6.237411880493164, "step": 49530 }, { "epoch": 0.19435, "grad_norm": 3.722670555114746, "learning_rate": 2.548787878787879e-06, "loss": 6.2253467559814455, "step": 49535 }, { "epoch": 0.1944, "grad_norm": 7.678994655609131, "learning_rate": 2.5485353535353537e-06, "loss": 6.228109359741211, "step": 49540 }, { "epoch": 0.19445, "grad_norm": 8.735237121582031, "learning_rate": 2.5482828282828287e-06, "loss": 6.260719299316406, "step": 49545 }, { "epoch": 0.1945, "grad_norm": 6.092301368713379, "learning_rate": 2.5480303030303034e-06, "loss": 6.247369384765625, "step": 49550 }, { "epoch": 0.19455, "grad_norm": 4.378487586975098, "learning_rate": 2.547777777777778e-06, "loss": 6.306856536865235, "step": 49555 }, { "epoch": 0.1946, "grad_norm": 6.51929235458374, "learning_rate": 2.5475252525252526e-06, "loss": 6.316536331176758, "step": 49560 }, { "epoch": 0.19465, "grad_norm": 9.74860954284668, "learning_rate": 2.5472727272727277e-06, "loss": 6.305975723266601, "step": 49565 }, { "epoch": 0.1947, "grad_norm": 7.420664310455322, "learning_rate": 2.5470202020202023e-06, "loss": 6.2628173828125, "step": 49570 }, { "epoch": 0.19475, "grad_norm": 4.948880672454834, "learning_rate": 2.546767676767677e-06, "loss": 6.535305786132812, "step": 49575 }, { "epoch": 0.1948, "grad_norm": 6.307788372039795, "learning_rate": 2.5465151515151516e-06, "loss": 6.331264495849609, "step": 49580 }, { "epoch": 0.19485, "grad_norm": 5.2758283615112305, "learning_rate": 2.5462626262626266e-06, "loss": 6.233275604248047, "step": 49585 }, { "epoch": 0.1949, "grad_norm": 4.821529865264893, "learning_rate": 2.5460101010101013e-06, "loss": 6.315261459350586, "step": 49590 }, { "epoch": 0.19495, "grad_norm": 9.81965446472168, "learning_rate": 2.545757575757576e-06, "loss": 6.252927017211914, "step": 49595 }, { "epoch": 0.195, "grad_norm": 5.461148262023926, "learning_rate": 2.5455050505050505e-06, "loss": 6.26063232421875, "step": 49600 }, { "epoch": 0.19505, "grad_norm": 5.8232035636901855, "learning_rate": 2.5452525252525256e-06, "loss": 6.275896072387695, "step": 49605 }, { "epoch": 0.1951, "grad_norm": 6.748569488525391, "learning_rate": 2.545e-06, "loss": 6.220322799682617, "step": 49610 }, { "epoch": 0.19515, "grad_norm": 34.90372085571289, "learning_rate": 2.544747474747475e-06, "loss": 6.264942932128906, "step": 49615 }, { "epoch": 0.1952, "grad_norm": 5.820657730102539, "learning_rate": 2.5444949494949495e-06, "loss": 6.239437866210937, "step": 49620 }, { "epoch": 0.19525, "grad_norm": 21.731359481811523, "learning_rate": 2.5442424242424245e-06, "loss": 6.290864944458008, "step": 49625 }, { "epoch": 0.1953, "grad_norm": 10.041280746459961, "learning_rate": 2.543989898989899e-06, "loss": 6.251887512207031, "step": 49630 }, { "epoch": 0.19535, "grad_norm": 11.2418212890625, "learning_rate": 2.5437373737373738e-06, "loss": 6.233210754394531, "step": 49635 }, { "epoch": 0.1954, "grad_norm": 7.633718967437744, "learning_rate": 2.5434848484848484e-06, "loss": 6.206011581420898, "step": 49640 }, { "epoch": 0.19545, "grad_norm": 29.50637435913086, "learning_rate": 2.5432323232323235e-06, "loss": 6.41741714477539, "step": 49645 }, { "epoch": 0.1955, "grad_norm": 5.431766510009766, "learning_rate": 2.542979797979798e-06, "loss": 6.306104278564453, "step": 49650 }, { "epoch": 0.19555, "grad_norm": 8.060921669006348, "learning_rate": 2.5427272727272727e-06, "loss": 6.216228866577149, "step": 49655 }, { "epoch": 0.1956, "grad_norm": 6.418784141540527, "learning_rate": 2.5424747474747473e-06, "loss": 6.258552169799804, "step": 49660 }, { "epoch": 0.19565, "grad_norm": 4.755044460296631, "learning_rate": 2.5422222222222224e-06, "loss": 6.255119323730469, "step": 49665 }, { "epoch": 0.1957, "grad_norm": 8.122811317443848, "learning_rate": 2.541969696969697e-06, "loss": 6.219739151000977, "step": 49670 }, { "epoch": 0.19575, "grad_norm": 4.895333290100098, "learning_rate": 2.5417171717171717e-06, "loss": 6.2493537902832035, "step": 49675 }, { "epoch": 0.1958, "grad_norm": 6.294668674468994, "learning_rate": 2.5414646464646463e-06, "loss": 6.246758270263672, "step": 49680 }, { "epoch": 0.19585, "grad_norm": 14.956772804260254, "learning_rate": 2.5412121212121218e-06, "loss": 6.25451431274414, "step": 49685 }, { "epoch": 0.1959, "grad_norm": 6.240623474121094, "learning_rate": 2.540959595959596e-06, "loss": 6.256995773315429, "step": 49690 }, { "epoch": 0.19595, "grad_norm": 12.748623847961426, "learning_rate": 2.5407070707070706e-06, "loss": 6.294410705566406, "step": 49695 }, { "epoch": 0.196, "grad_norm": 6.457271575927734, "learning_rate": 2.5404545454545452e-06, "loss": 6.252241134643555, "step": 49700 }, { "epoch": 0.19605, "grad_norm": 7.037939071655273, "learning_rate": 2.5402020202020207e-06, "loss": 6.28576774597168, "step": 49705 }, { "epoch": 0.1961, "grad_norm": 5.942524433135986, "learning_rate": 2.5399494949494953e-06, "loss": 6.274676513671875, "step": 49710 }, { "epoch": 0.19615, "grad_norm": 4.092289447784424, "learning_rate": 2.53969696969697e-06, "loss": 6.314986801147461, "step": 49715 }, { "epoch": 0.1962, "grad_norm": 8.213889122009277, "learning_rate": 2.539444444444444e-06, "loss": 6.342423629760742, "step": 49720 }, { "epoch": 0.19625, "grad_norm": 9.967015266418457, "learning_rate": 2.5391919191919197e-06, "loss": 6.391808319091797, "step": 49725 }, { "epoch": 0.1963, "grad_norm": 9.924222946166992, "learning_rate": 2.5389393939393943e-06, "loss": 6.271770477294922, "step": 49730 }, { "epoch": 0.19635, "grad_norm": 28.80489158630371, "learning_rate": 2.538686868686869e-06, "loss": 6.265848922729492, "step": 49735 }, { "epoch": 0.1964, "grad_norm": 5.889763832092285, "learning_rate": 2.538434343434344e-06, "loss": 6.28631706237793, "step": 49740 }, { "epoch": 0.19645, "grad_norm": 7.259796619415283, "learning_rate": 2.5381818181818186e-06, "loss": 6.300747680664062, "step": 49745 }, { "epoch": 0.1965, "grad_norm": 4.813076496124268, "learning_rate": 2.5379292929292932e-06, "loss": 6.257456970214844, "step": 49750 }, { "epoch": 0.19655, "grad_norm": 4.39386510848999, "learning_rate": 2.537676767676768e-06, "loss": 6.288967895507812, "step": 49755 }, { "epoch": 0.1966, "grad_norm": 5.40500020980835, "learning_rate": 2.537424242424243e-06, "loss": 6.373891448974609, "step": 49760 }, { "epoch": 0.19665, "grad_norm": 6.475799083709717, "learning_rate": 2.5371717171717175e-06, "loss": 6.336884689331055, "step": 49765 }, { "epoch": 0.1967, "grad_norm": 6.880368709564209, "learning_rate": 2.536919191919192e-06, "loss": 6.294986724853516, "step": 49770 }, { "epoch": 0.19675, "grad_norm": 19.850482940673828, "learning_rate": 2.536666666666667e-06, "loss": 6.231388092041016, "step": 49775 }, { "epoch": 0.1968, "grad_norm": 6.302379608154297, "learning_rate": 2.536414141414142e-06, "loss": 6.268606948852539, "step": 49780 }, { "epoch": 0.19685, "grad_norm": 6.698851108551025, "learning_rate": 2.5361616161616165e-06, "loss": 6.219470596313476, "step": 49785 }, { "epoch": 0.1969, "grad_norm": 22.631744384765625, "learning_rate": 2.535909090909091e-06, "loss": 6.480807495117188, "step": 49790 }, { "epoch": 0.19695, "grad_norm": 5.3607869148254395, "learning_rate": 2.5356565656565657e-06, "loss": 6.281029891967774, "step": 49795 }, { "epoch": 0.197, "grad_norm": 4.6973490715026855, "learning_rate": 2.535404040404041e-06, "loss": 6.255810546875, "step": 49800 }, { "epoch": 0.19705, "grad_norm": 10.91834545135498, "learning_rate": 2.5351515151515154e-06, "loss": 6.178659820556641, "step": 49805 }, { "epoch": 0.1971, "grad_norm": 5.362521171569824, "learning_rate": 2.53489898989899e-06, "loss": 6.197890853881836, "step": 49810 }, { "epoch": 0.19715, "grad_norm": 7.690623760223389, "learning_rate": 2.5346464646464647e-06, "loss": 6.237416076660156, "step": 49815 }, { "epoch": 0.1972, "grad_norm": 4.992671966552734, "learning_rate": 2.5343939393939397e-06, "loss": 6.240348815917969, "step": 49820 }, { "epoch": 0.19725, "grad_norm": 6.229156494140625, "learning_rate": 2.5341414141414144e-06, "loss": 6.249267196655273, "step": 49825 }, { "epoch": 0.1973, "grad_norm": 4.727314472198486, "learning_rate": 2.533888888888889e-06, "loss": 6.259444808959961, "step": 49830 }, { "epoch": 0.19735, "grad_norm": 5.506601333618164, "learning_rate": 2.5336363636363636e-06, "loss": 6.199823379516602, "step": 49835 }, { "epoch": 0.1974, "grad_norm": 7.88227653503418, "learning_rate": 2.5333838383838387e-06, "loss": 6.36942138671875, "step": 49840 }, { "epoch": 0.19745, "grad_norm": 6.831554889678955, "learning_rate": 2.5331313131313133e-06, "loss": 6.245549011230469, "step": 49845 }, { "epoch": 0.1975, "grad_norm": 3.877190589904785, "learning_rate": 2.532878787878788e-06, "loss": 6.237216186523438, "step": 49850 }, { "epoch": 0.19755, "grad_norm": 4.887547492980957, "learning_rate": 2.5326262626262626e-06, "loss": 6.2291206359863285, "step": 49855 }, { "epoch": 0.1976, "grad_norm": 6.2290754318237305, "learning_rate": 2.5323737373737376e-06, "loss": 6.2375, "step": 49860 }, { "epoch": 0.19765, "grad_norm": 5.0437822341918945, "learning_rate": 2.5321212121212123e-06, "loss": 6.27685317993164, "step": 49865 }, { "epoch": 0.1977, "grad_norm": 6.677088737487793, "learning_rate": 2.531868686868687e-06, "loss": 6.315046310424805, "step": 49870 }, { "epoch": 0.19775, "grad_norm": 7.430237293243408, "learning_rate": 2.5316161616161615e-06, "loss": 6.248124694824218, "step": 49875 }, { "epoch": 0.1978, "grad_norm": 6.285028457641602, "learning_rate": 2.531363636363637e-06, "loss": 6.247674179077149, "step": 49880 }, { "epoch": 0.19785, "grad_norm": 7.107331275939941, "learning_rate": 2.531111111111111e-06, "loss": 6.260677337646484, "step": 49885 }, { "epoch": 0.1979, "grad_norm": 4.4166154861450195, "learning_rate": 2.530858585858586e-06, "loss": 6.245711898803711, "step": 49890 }, { "epoch": 0.19795, "grad_norm": 6.733027458190918, "learning_rate": 2.5306060606060605e-06, "loss": 6.308213806152343, "step": 49895 }, { "epoch": 0.198, "grad_norm": 6.973963260650635, "learning_rate": 2.530353535353536e-06, "loss": 6.24494514465332, "step": 49900 }, { "epoch": 0.19805, "grad_norm": 5.454674243927002, "learning_rate": 2.5301010101010106e-06, "loss": 6.277666473388672, "step": 49905 }, { "epoch": 0.1981, "grad_norm": 4.718550682067871, "learning_rate": 2.529848484848485e-06, "loss": 6.293357849121094, "step": 49910 }, { "epoch": 0.19815, "grad_norm": 69.0890884399414, "learning_rate": 2.5295959595959594e-06, "loss": 6.093275451660157, "step": 49915 }, { "epoch": 0.1982, "grad_norm": 9.348984718322754, "learning_rate": 2.529343434343435e-06, "loss": 6.265186309814453, "step": 49920 }, { "epoch": 0.19825, "grad_norm": 10.963966369628906, "learning_rate": 2.5290909090909095e-06, "loss": 6.320544052124023, "step": 49925 }, { "epoch": 0.1983, "grad_norm": 6.6288957595825195, "learning_rate": 2.528838383838384e-06, "loss": 6.278413772583008, "step": 49930 }, { "epoch": 0.19835, "grad_norm": 20.232759475708008, "learning_rate": 2.5285858585858588e-06, "loss": 6.292303466796875, "step": 49935 }, { "epoch": 0.1984, "grad_norm": 5.470400333404541, "learning_rate": 2.528333333333334e-06, "loss": 6.231870651245117, "step": 49940 }, { "epoch": 0.19845, "grad_norm": 7.617183208465576, "learning_rate": 2.5280808080808085e-06, "loss": 6.278086853027344, "step": 49945 }, { "epoch": 0.1985, "grad_norm": 5.602214336395264, "learning_rate": 2.527828282828283e-06, "loss": 6.274223327636719, "step": 49950 }, { "epoch": 0.19855, "grad_norm": 5.300981521606445, "learning_rate": 2.5275757575757577e-06, "loss": 6.283240509033203, "step": 49955 }, { "epoch": 0.1986, "grad_norm": 10.157596588134766, "learning_rate": 2.5273232323232328e-06, "loss": 6.297488021850586, "step": 49960 }, { "epoch": 0.19865, "grad_norm": 12.647420883178711, "learning_rate": 2.5270707070707074e-06, "loss": 6.51298828125, "step": 49965 }, { "epoch": 0.1987, "grad_norm": 4.380114555358887, "learning_rate": 2.526818181818182e-06, "loss": 6.249011993408203, "step": 49970 }, { "epoch": 0.19875, "grad_norm": 10.956653594970703, "learning_rate": 2.5265656565656567e-06, "loss": 6.285200881958008, "step": 49975 }, { "epoch": 0.1988, "grad_norm": 6.14968729019165, "learning_rate": 2.5263131313131317e-06, "loss": 6.417530822753906, "step": 49980 }, { "epoch": 0.19885, "grad_norm": 4.659569263458252, "learning_rate": 2.5260606060606063e-06, "loss": 6.2325592041015625, "step": 49985 }, { "epoch": 0.1989, "grad_norm": 3.6402084827423096, "learning_rate": 2.525808080808081e-06, "loss": 6.277042007446289, "step": 49990 }, { "epoch": 0.19895, "grad_norm": 4.528951644897461, "learning_rate": 2.5255555555555556e-06, "loss": 6.227789306640625, "step": 49995 }, { "epoch": 0.199, "grad_norm": 4.44421911239624, "learning_rate": 2.5253030303030307e-06, "loss": 6.242911529541016, "step": 50000 }, { "epoch": 0.19905, "grad_norm": 4.784400463104248, "learning_rate": 2.5250505050505053e-06, "loss": 6.225411605834961, "step": 50005 }, { "epoch": 0.1991, "grad_norm": 6.0246357917785645, "learning_rate": 2.52479797979798e-06, "loss": 6.264725112915039, "step": 50010 }, { "epoch": 0.19915, "grad_norm": 6.142419338226318, "learning_rate": 2.5245454545454546e-06, "loss": 6.271052551269531, "step": 50015 }, { "epoch": 0.1992, "grad_norm": 4.694523811340332, "learning_rate": 2.5242929292929296e-06, "loss": 6.269665908813477, "step": 50020 }, { "epoch": 0.19925, "grad_norm": 5.481181621551514, "learning_rate": 2.5240404040404042e-06, "loss": 6.294979858398437, "step": 50025 }, { "epoch": 0.1993, "grad_norm": 5.57726526260376, "learning_rate": 2.523787878787879e-06, "loss": 6.281116104125976, "step": 50030 }, { "epoch": 0.19935, "grad_norm": 5.455478191375732, "learning_rate": 2.5235353535353535e-06, "loss": 6.259362411499024, "step": 50035 }, { "epoch": 0.1994, "grad_norm": 17.223485946655273, "learning_rate": 2.5232828282828286e-06, "loss": 6.644518280029297, "step": 50040 }, { "epoch": 0.19945, "grad_norm": 4.775510787963867, "learning_rate": 2.523030303030303e-06, "loss": 6.271084976196289, "step": 50045 }, { "epoch": 0.1995, "grad_norm": 7.860464096069336, "learning_rate": 2.522777777777778e-06, "loss": 6.2708477020263675, "step": 50050 }, { "epoch": 0.19955, "grad_norm": 9.58796215057373, "learning_rate": 2.5225252525252524e-06, "loss": 6.262348937988281, "step": 50055 }, { "epoch": 0.1996, "grad_norm": 7.631652355194092, "learning_rate": 2.5222727272727275e-06, "loss": 6.253146743774414, "step": 50060 }, { "epoch": 0.19965, "grad_norm": 4.74073600769043, "learning_rate": 2.522020202020202e-06, "loss": 6.232496643066407, "step": 50065 }, { "epoch": 0.1997, "grad_norm": 8.534119606018066, "learning_rate": 2.5217676767676768e-06, "loss": 6.2247779846191404, "step": 50070 }, { "epoch": 0.19975, "grad_norm": 18.228424072265625, "learning_rate": 2.5215151515151514e-06, "loss": 6.537854766845703, "step": 50075 }, { "epoch": 0.1998, "grad_norm": 6.15476131439209, "learning_rate": 2.5212626262626264e-06, "loss": 6.306629943847656, "step": 50080 }, { "epoch": 0.19985, "grad_norm": 7.709009647369385, "learning_rate": 2.521010101010101e-06, "loss": 6.328121948242187, "step": 50085 }, { "epoch": 0.1999, "grad_norm": 7.7438836097717285, "learning_rate": 2.5207575757575757e-06, "loss": 6.251179504394531, "step": 50090 }, { "epoch": 0.19995, "grad_norm": 4.380277633666992, "learning_rate": 2.5205050505050503e-06, "loss": 6.264029693603516, "step": 50095 }, { "epoch": 0.2, "grad_norm": 3.4444587230682373, "learning_rate": 2.520252525252526e-06, "loss": 6.27406005859375, "step": 50100 }, { "epoch": 0.20005, "grad_norm": 5.817159652709961, "learning_rate": 2.52e-06, "loss": 6.274032974243164, "step": 50105 }, { "epoch": 0.2001, "grad_norm": 9.877912521362305, "learning_rate": 2.5197474747474746e-06, "loss": 6.260358047485352, "step": 50110 }, { "epoch": 0.20015, "grad_norm": 7.125256538391113, "learning_rate": 2.5194949494949493e-06, "loss": 6.244119262695312, "step": 50115 }, { "epoch": 0.2002, "grad_norm": 5.224100112915039, "learning_rate": 2.5192424242424248e-06, "loss": 6.295536804199219, "step": 50120 }, { "epoch": 0.20025, "grad_norm": 5.170662879943848, "learning_rate": 2.5189898989898994e-06, "loss": 6.305734252929687, "step": 50125 }, { "epoch": 0.2003, "grad_norm": 3.760510206222534, "learning_rate": 2.518737373737374e-06, "loss": 6.305615234375, "step": 50130 }, { "epoch": 0.20035, "grad_norm": 11.115021705627441, "learning_rate": 2.5184848484848482e-06, "loss": 6.411235809326172, "step": 50135 }, { "epoch": 0.2004, "grad_norm": 4.338646411895752, "learning_rate": 2.5182323232323237e-06, "loss": 6.269055938720703, "step": 50140 }, { "epoch": 0.20045, "grad_norm": 9.082942008972168, "learning_rate": 2.5179797979797983e-06, "loss": 6.286923217773437, "step": 50145 }, { "epoch": 0.2005, "grad_norm": 9.44038200378418, "learning_rate": 2.517727272727273e-06, "loss": 6.258395767211914, "step": 50150 }, { "epoch": 0.20055, "grad_norm": 6.797711372375488, "learning_rate": 2.5174747474747476e-06, "loss": 6.268379592895508, "step": 50155 }, { "epoch": 0.2006, "grad_norm": 17.411474227905273, "learning_rate": 2.5172222222222226e-06, "loss": 6.335862731933593, "step": 50160 }, { "epoch": 0.20065, "grad_norm": 4.963981628417969, "learning_rate": 2.5169696969696973e-06, "loss": 6.261465454101563, "step": 50165 }, { "epoch": 0.2007, "grad_norm": 6.525044918060303, "learning_rate": 2.516717171717172e-06, "loss": 6.23322639465332, "step": 50170 }, { "epoch": 0.20075, "grad_norm": 14.596953392028809, "learning_rate": 2.516464646464647e-06, "loss": 6.228923034667969, "step": 50175 }, { "epoch": 0.2008, "grad_norm": 5.492651462554932, "learning_rate": 2.5162121212121216e-06, "loss": 6.173895263671875, "step": 50180 }, { "epoch": 0.20085, "grad_norm": 16.341468811035156, "learning_rate": 2.5159595959595962e-06, "loss": 6.3403984069824215, "step": 50185 }, { "epoch": 0.2009, "grad_norm": 11.532891273498535, "learning_rate": 2.515707070707071e-06, "loss": 6.2327827453613285, "step": 50190 }, { "epoch": 0.20095, "grad_norm": 4.14927339553833, "learning_rate": 2.515454545454546e-06, "loss": 6.327689361572266, "step": 50195 }, { "epoch": 0.201, "grad_norm": 6.413699626922607, "learning_rate": 2.5152020202020205e-06, "loss": 6.299884033203125, "step": 50200 }, { "epoch": 0.20105, "grad_norm": 5.843972682952881, "learning_rate": 2.514949494949495e-06, "loss": 6.253699493408203, "step": 50205 }, { "epoch": 0.2011, "grad_norm": 6.022891521453857, "learning_rate": 2.5146969696969698e-06, "loss": 6.304072570800781, "step": 50210 }, { "epoch": 0.20115, "grad_norm": 8.609193801879883, "learning_rate": 2.514444444444445e-06, "loss": 6.188970947265625, "step": 50215 }, { "epoch": 0.2012, "grad_norm": 12.187967300415039, "learning_rate": 2.5141919191919195e-06, "loss": 6.333774566650391, "step": 50220 }, { "epoch": 0.20125, "grad_norm": 6.067849636077881, "learning_rate": 2.513939393939394e-06, "loss": 6.427268218994141, "step": 50225 }, { "epoch": 0.2013, "grad_norm": 7.214718818664551, "learning_rate": 2.5136868686868687e-06, "loss": 6.29016227722168, "step": 50230 }, { "epoch": 0.20135, "grad_norm": 12.508318901062012, "learning_rate": 2.5134343434343438e-06, "loss": 6.302031707763672, "step": 50235 }, { "epoch": 0.2014, "grad_norm": 5.794668197631836, "learning_rate": 2.5131818181818184e-06, "loss": 6.268351745605469, "step": 50240 }, { "epoch": 0.20145, "grad_norm": 7.039858818054199, "learning_rate": 2.512929292929293e-06, "loss": 6.320880126953125, "step": 50245 }, { "epoch": 0.2015, "grad_norm": 6.126570701599121, "learning_rate": 2.5126767676767677e-06, "loss": 6.290330505371093, "step": 50250 }, { "epoch": 0.20155, "grad_norm": 5.634411334991455, "learning_rate": 2.5124242424242427e-06, "loss": 6.280659866333008, "step": 50255 }, { "epoch": 0.2016, "grad_norm": 7.395592212677002, "learning_rate": 2.5121717171717174e-06, "loss": 6.256629943847656, "step": 50260 }, { "epoch": 0.20165, "grad_norm": 4.17498779296875, "learning_rate": 2.511919191919192e-06, "loss": 6.3099628448486325, "step": 50265 }, { "epoch": 0.2017, "grad_norm": 11.847820281982422, "learning_rate": 2.5116666666666666e-06, "loss": 6.225931930541992, "step": 50270 }, { "epoch": 0.20175, "grad_norm": 10.638266563415527, "learning_rate": 2.5114141414141417e-06, "loss": 6.287227630615234, "step": 50275 }, { "epoch": 0.2018, "grad_norm": 6.0432305335998535, "learning_rate": 2.5111616161616163e-06, "loss": 6.241815185546875, "step": 50280 }, { "epoch": 0.20185, "grad_norm": 4.520680904388428, "learning_rate": 2.510909090909091e-06, "loss": 6.288633728027344, "step": 50285 }, { "epoch": 0.2019, "grad_norm": 7.6169047355651855, "learning_rate": 2.5106565656565656e-06, "loss": 6.242688751220703, "step": 50290 }, { "epoch": 0.20195, "grad_norm": 12.426961898803711, "learning_rate": 2.510404040404041e-06, "loss": 6.249090576171875, "step": 50295 }, { "epoch": 0.202, "grad_norm": 11.57026195526123, "learning_rate": 2.5101515151515152e-06, "loss": 6.283724975585938, "step": 50300 }, { "epoch": 0.20205, "grad_norm": 6.877190113067627, "learning_rate": 2.50989898989899e-06, "loss": 6.290302658081055, "step": 50305 }, { "epoch": 0.2021, "grad_norm": 4.7463507652282715, "learning_rate": 2.5096464646464645e-06, "loss": 6.263843536376953, "step": 50310 }, { "epoch": 0.20215, "grad_norm": 8.984270095825195, "learning_rate": 2.50939393939394e-06, "loss": 6.2411754608154295, "step": 50315 }, { "epoch": 0.2022, "grad_norm": 12.736248970031738, "learning_rate": 2.5091414141414146e-06, "loss": 6.2800437927246096, "step": 50320 }, { "epoch": 0.20225, "grad_norm": 6.36100959777832, "learning_rate": 2.5088888888888892e-06, "loss": 6.247788619995117, "step": 50325 }, { "epoch": 0.2023, "grad_norm": 6.488335609436035, "learning_rate": 2.5086363636363635e-06, "loss": 6.228033447265625, "step": 50330 }, { "epoch": 0.20235, "grad_norm": 8.022052764892578, "learning_rate": 2.508383838383839e-06, "loss": 6.212002944946289, "step": 50335 }, { "epoch": 0.2024, "grad_norm": 5.708883762359619, "learning_rate": 2.5081313131313136e-06, "loss": 6.280640411376953, "step": 50340 }, { "epoch": 0.20245, "grad_norm": 13.014446258544922, "learning_rate": 2.507878787878788e-06, "loss": 6.265510559082031, "step": 50345 }, { "epoch": 0.2025, "grad_norm": 8.831148147583008, "learning_rate": 2.507626262626263e-06, "loss": 6.246980667114258, "step": 50350 }, { "epoch": 0.20255, "grad_norm": 7.502352714538574, "learning_rate": 2.507373737373738e-06, "loss": 6.313706970214843, "step": 50355 }, { "epoch": 0.2026, "grad_norm": 7.295192241668701, "learning_rate": 2.5071212121212125e-06, "loss": 6.225182723999024, "step": 50360 }, { "epoch": 0.20265, "grad_norm": 6.376360893249512, "learning_rate": 2.506868686868687e-06, "loss": 6.260831451416015, "step": 50365 }, { "epoch": 0.2027, "grad_norm": 6.644468784332275, "learning_rate": 2.5066161616161618e-06, "loss": 6.256346893310547, "step": 50370 }, { "epoch": 0.20275, "grad_norm": 3.9910199642181396, "learning_rate": 2.506363636363637e-06, "loss": 6.228023910522461, "step": 50375 }, { "epoch": 0.2028, "grad_norm": 8.688261032104492, "learning_rate": 2.5061111111111114e-06, "loss": 6.345419311523438, "step": 50380 }, { "epoch": 0.20285, "grad_norm": 7.819876670837402, "learning_rate": 2.505858585858586e-06, "loss": 6.260973358154297, "step": 50385 }, { "epoch": 0.2029, "grad_norm": 8.700668334960938, "learning_rate": 2.5056060606060607e-06, "loss": 6.246576309204102, "step": 50390 }, { "epoch": 0.20295, "grad_norm": 6.576988697052002, "learning_rate": 2.5053535353535358e-06, "loss": 6.207525634765625, "step": 50395 }, { "epoch": 0.203, "grad_norm": 8.206652641296387, "learning_rate": 2.5051010101010104e-06, "loss": 6.298147201538086, "step": 50400 }, { "epoch": 0.20305, "grad_norm": 7.315264701843262, "learning_rate": 2.504848484848485e-06, "loss": 6.272156143188477, "step": 50405 }, { "epoch": 0.2031, "grad_norm": 96.8521957397461, "learning_rate": 2.5045959595959596e-06, "loss": 11.190609741210938, "step": 50410 }, { "epoch": 0.20315, "grad_norm": 6.585971832275391, "learning_rate": 2.5043434343434347e-06, "loss": 6.300156021118164, "step": 50415 }, { "epoch": 0.2032, "grad_norm": 8.244462013244629, "learning_rate": 2.5040909090909093e-06, "loss": 6.296827697753907, "step": 50420 }, { "epoch": 0.20325, "grad_norm": 7.2557854652404785, "learning_rate": 2.503838383838384e-06, "loss": 6.2336265563964846, "step": 50425 }, { "epoch": 0.2033, "grad_norm": 4.800219535827637, "learning_rate": 2.5035858585858586e-06, "loss": 6.263823699951172, "step": 50430 }, { "epoch": 0.20335, "grad_norm": 5.132396221160889, "learning_rate": 2.5033333333333336e-06, "loss": 6.1454322814941404, "step": 50435 }, { "epoch": 0.2034, "grad_norm": 11.72627067565918, "learning_rate": 2.5030808080808083e-06, "loss": 6.207251739501953, "step": 50440 }, { "epoch": 0.20345, "grad_norm": 6.949639320373535, "learning_rate": 2.502828282828283e-06, "loss": 6.250994873046875, "step": 50445 }, { "epoch": 0.2035, "grad_norm": 4.519877910614014, "learning_rate": 2.5025757575757575e-06, "loss": 6.224867248535157, "step": 50450 }, { "epoch": 0.20355, "grad_norm": 33.779605865478516, "learning_rate": 2.5023232323232326e-06, "loss": 6.5008491516113285, "step": 50455 }, { "epoch": 0.2036, "grad_norm": 36.74120330810547, "learning_rate": 2.5020707070707072e-06, "loss": 6.360050201416016, "step": 50460 }, { "epoch": 0.20365, "grad_norm": 5.83580207824707, "learning_rate": 2.501818181818182e-06, "loss": 6.3456367492675785, "step": 50465 }, { "epoch": 0.2037, "grad_norm": 4.592923641204834, "learning_rate": 2.5015656565656565e-06, "loss": 6.2611034393310545, "step": 50470 }, { "epoch": 0.20375, "grad_norm": 21.383007049560547, "learning_rate": 2.5013131313131315e-06, "loss": 6.659938049316406, "step": 50475 }, { "epoch": 0.2038, "grad_norm": 6.011013031005859, "learning_rate": 2.501060606060606e-06, "loss": 6.261945343017578, "step": 50480 }, { "epoch": 0.20385, "grad_norm": 6.758711338043213, "learning_rate": 2.500808080808081e-06, "loss": 6.305209732055664, "step": 50485 }, { "epoch": 0.2039, "grad_norm": 7.053555488586426, "learning_rate": 2.5005555555555554e-06, "loss": 6.265921783447266, "step": 50490 }, { "epoch": 0.20395, "grad_norm": 16.356122970581055, "learning_rate": 2.5003030303030305e-06, "loss": 6.257657623291015, "step": 50495 }, { "epoch": 0.204, "grad_norm": 24.292421340942383, "learning_rate": 2.500050505050505e-06, "loss": 6.384332275390625, "step": 50500 }, { "epoch": 0.20405, "grad_norm": 6.64277982711792, "learning_rate": 2.4997979797979797e-06, "loss": 6.300903701782227, "step": 50505 }, { "epoch": 0.2041, "grad_norm": 5.477458953857422, "learning_rate": 2.499545454545455e-06, "loss": 6.234635925292968, "step": 50510 }, { "epoch": 0.20415, "grad_norm": 10.506377220153809, "learning_rate": 2.4992929292929294e-06, "loss": 6.220009231567383, "step": 50515 }, { "epoch": 0.2042, "grad_norm": 6.875919818878174, "learning_rate": 2.4990404040404045e-06, "loss": 6.209540939331054, "step": 50520 }, { "epoch": 0.20425, "grad_norm": 4.474767684936523, "learning_rate": 2.4987878787878787e-06, "loss": 6.2156318664550785, "step": 50525 }, { "epoch": 0.2043, "grad_norm": 4.631478786468506, "learning_rate": 2.4985353535353537e-06, "loss": 6.167596054077149, "step": 50530 }, { "epoch": 0.20435, "grad_norm": 5.7960710525512695, "learning_rate": 2.4982828282828284e-06, "loss": 6.253596496582031, "step": 50535 }, { "epoch": 0.2044, "grad_norm": 5.214746952056885, "learning_rate": 2.4980303030303034e-06, "loss": 6.222603988647461, "step": 50540 }, { "epoch": 0.20445, "grad_norm": 18.205684661865234, "learning_rate": 2.497777777777778e-06, "loss": 6.4908088684082035, "step": 50545 }, { "epoch": 0.2045, "grad_norm": 20.864898681640625, "learning_rate": 2.4975252525252527e-06, "loss": 6.392955017089844, "step": 50550 }, { "epoch": 0.20455, "grad_norm": 8.08456802368164, "learning_rate": 2.4972727272727273e-06, "loss": 6.257477569580078, "step": 50555 }, { "epoch": 0.2046, "grad_norm": 9.643841743469238, "learning_rate": 2.4970202020202024e-06, "loss": 6.209708023071289, "step": 50560 }, { "epoch": 0.20465, "grad_norm": 4.205894947052002, "learning_rate": 2.496767676767677e-06, "loss": 6.243091583251953, "step": 50565 }, { "epoch": 0.2047, "grad_norm": 8.269043922424316, "learning_rate": 2.4965151515151516e-06, "loss": 6.257061386108399, "step": 50570 }, { "epoch": 0.20475, "grad_norm": 5.522393226623535, "learning_rate": 2.4962626262626263e-06, "loss": 6.27593879699707, "step": 50575 }, { "epoch": 0.2048, "grad_norm": 6.009525299072266, "learning_rate": 2.4960101010101013e-06, "loss": 6.243310928344727, "step": 50580 }, { "epoch": 0.20485, "grad_norm": 6.506129264831543, "learning_rate": 2.495757575757576e-06, "loss": 6.27514762878418, "step": 50585 }, { "epoch": 0.2049, "grad_norm": 6.060525417327881, "learning_rate": 2.4955050505050506e-06, "loss": 6.239144134521484, "step": 50590 }, { "epoch": 0.20495, "grad_norm": 4.891116619110107, "learning_rate": 2.4952525252525256e-06, "loss": 6.254197692871093, "step": 50595 }, { "epoch": 0.205, "grad_norm": 5.731975555419922, "learning_rate": 2.4950000000000003e-06, "loss": 6.246897888183594, "step": 50600 }, { "epoch": 0.20505, "grad_norm": 4.666231632232666, "learning_rate": 2.494747474747475e-06, "loss": 6.256787109375, "step": 50605 }, { "epoch": 0.2051, "grad_norm": 6.4136457443237305, "learning_rate": 2.4944949494949495e-06, "loss": 6.389812850952149, "step": 50610 }, { "epoch": 0.20515, "grad_norm": 12.604668617248535, "learning_rate": 2.4942424242424246e-06, "loss": 6.246990966796875, "step": 50615 }, { "epoch": 0.2052, "grad_norm": 4.499661445617676, "learning_rate": 2.493989898989899e-06, "loss": 6.249847412109375, "step": 50620 }, { "epoch": 0.20525, "grad_norm": 6.092418193817139, "learning_rate": 2.4937373737373742e-06, "loss": 6.285489654541015, "step": 50625 }, { "epoch": 0.2053, "grad_norm": 7.834766864776611, "learning_rate": 2.493484848484849e-06, "loss": 6.2256828308105465, "step": 50630 }, { "epoch": 0.20535, "grad_norm": 15.215644836425781, "learning_rate": 2.4932323232323235e-06, "loss": 6.567056274414062, "step": 50635 }, { "epoch": 0.2054, "grad_norm": 4.482730388641357, "learning_rate": 2.492979797979798e-06, "loss": 6.280728912353515, "step": 50640 }, { "epoch": 0.20545, "grad_norm": 3.7118756771087646, "learning_rate": 2.492727272727273e-06, "loss": 6.252156066894531, "step": 50645 }, { "epoch": 0.2055, "grad_norm": 5.24172306060791, "learning_rate": 2.492474747474748e-06, "loss": 6.268701171875, "step": 50650 }, { "epoch": 0.20555, "grad_norm": 5.709250450134277, "learning_rate": 2.4922222222222225e-06, "loss": 6.260549163818359, "step": 50655 }, { "epoch": 0.2056, "grad_norm": 6.374555587768555, "learning_rate": 2.491969696969697e-06, "loss": 6.287192153930664, "step": 50660 }, { "epoch": 0.20565, "grad_norm": 4.0030035972595215, "learning_rate": 2.491717171717172e-06, "loss": 6.260825729370117, "step": 50665 }, { "epoch": 0.2057, "grad_norm": 4.644779682159424, "learning_rate": 2.4914646464646468e-06, "loss": 6.174428939819336, "step": 50670 }, { "epoch": 0.20575, "grad_norm": 10.596787452697754, "learning_rate": 2.4912121212121214e-06, "loss": 6.255480575561523, "step": 50675 }, { "epoch": 0.2058, "grad_norm": 20.568246841430664, "learning_rate": 2.490959595959596e-06, "loss": 6.198371124267578, "step": 50680 }, { "epoch": 0.20585, "grad_norm": 8.221068382263184, "learning_rate": 2.490707070707071e-06, "loss": 6.201399612426758, "step": 50685 }, { "epoch": 0.2059, "grad_norm": 30.213520050048828, "learning_rate": 2.4904545454545457e-06, "loss": 6.450743103027344, "step": 50690 }, { "epoch": 0.20595, "grad_norm": 5.987983226776123, "learning_rate": 2.4902020202020203e-06, "loss": 6.354723358154297, "step": 50695 }, { "epoch": 0.206, "grad_norm": 9.869277954101562, "learning_rate": 2.489949494949495e-06, "loss": 6.248409271240234, "step": 50700 }, { "epoch": 0.20605, "grad_norm": 4.571915149688721, "learning_rate": 2.48969696969697e-06, "loss": 6.2736351013183596, "step": 50705 }, { "epoch": 0.2061, "grad_norm": 5.853895664215088, "learning_rate": 2.4894444444444447e-06, "loss": 6.267697525024414, "step": 50710 }, { "epoch": 0.20615, "grad_norm": 4.332423210144043, "learning_rate": 2.4891919191919193e-06, "loss": 6.352606964111328, "step": 50715 }, { "epoch": 0.2062, "grad_norm": 8.72915267944336, "learning_rate": 2.488939393939394e-06, "loss": 6.263322448730468, "step": 50720 }, { "epoch": 0.20625, "grad_norm": 8.045281410217285, "learning_rate": 2.488686868686869e-06, "loss": 6.2755279541015625, "step": 50725 }, { "epoch": 0.2063, "grad_norm": 5.083230972290039, "learning_rate": 2.4884343434343436e-06, "loss": 6.2021942138671875, "step": 50730 }, { "epoch": 0.20635, "grad_norm": 4.313807487487793, "learning_rate": 2.4881818181818187e-06, "loss": 6.285644149780273, "step": 50735 }, { "epoch": 0.2064, "grad_norm": 5.484350681304932, "learning_rate": 2.4879292929292933e-06, "loss": 6.231778717041015, "step": 50740 }, { "epoch": 0.20645, "grad_norm": 7.026946544647217, "learning_rate": 2.487676767676768e-06, "loss": 6.2568622589111325, "step": 50745 }, { "epoch": 0.2065, "grad_norm": 9.437230110168457, "learning_rate": 2.4874242424242425e-06, "loss": 6.249218368530274, "step": 50750 }, { "epoch": 0.20655, "grad_norm": 10.734655380249023, "learning_rate": 2.4871717171717176e-06, "loss": 6.2982025146484375, "step": 50755 }, { "epoch": 0.2066, "grad_norm": 7.496886253356934, "learning_rate": 2.4869191919191922e-06, "loss": 6.258639526367188, "step": 50760 }, { "epoch": 0.20665, "grad_norm": 6.3916192054748535, "learning_rate": 2.486666666666667e-06, "loss": 6.256439971923828, "step": 50765 }, { "epoch": 0.2067, "grad_norm": 7.533149242401123, "learning_rate": 2.4864141414141415e-06, "loss": 6.274566268920898, "step": 50770 }, { "epoch": 0.20675, "grad_norm": 4.55153226852417, "learning_rate": 2.4861616161616165e-06, "loss": 6.246253585815429, "step": 50775 }, { "epoch": 0.2068, "grad_norm": 3.6581223011016846, "learning_rate": 2.485909090909091e-06, "loss": 6.273342132568359, "step": 50780 }, { "epoch": 0.20685, "grad_norm": 6.641258239746094, "learning_rate": 2.485656565656566e-06, "loss": 6.230439758300781, "step": 50785 }, { "epoch": 0.2069, "grad_norm": 8.055048942565918, "learning_rate": 2.4854040404040404e-06, "loss": 6.323073196411133, "step": 50790 }, { "epoch": 0.20695, "grad_norm": 5.384231090545654, "learning_rate": 2.4851515151515155e-06, "loss": 6.313836669921875, "step": 50795 }, { "epoch": 0.207, "grad_norm": 6.543908596038818, "learning_rate": 2.48489898989899e-06, "loss": 6.248139953613281, "step": 50800 }, { "epoch": 0.20705, "grad_norm": 11.802889823913574, "learning_rate": 2.4846464646464647e-06, "loss": 6.244521713256836, "step": 50805 }, { "epoch": 0.2071, "grad_norm": 6.70557975769043, "learning_rate": 2.4843939393939394e-06, "loss": 6.264381408691406, "step": 50810 }, { "epoch": 0.20715, "grad_norm": 17.66782569885254, "learning_rate": 2.4841414141414144e-06, "loss": 6.268556213378906, "step": 50815 }, { "epoch": 0.2072, "grad_norm": 8.923881530761719, "learning_rate": 2.483888888888889e-06, "loss": 6.246881103515625, "step": 50820 }, { "epoch": 0.20725, "grad_norm": 6.764798164367676, "learning_rate": 2.4836363636363637e-06, "loss": 6.260829162597656, "step": 50825 }, { "epoch": 0.2073, "grad_norm": 4.728209018707275, "learning_rate": 2.4833838383838383e-06, "loss": 6.27118148803711, "step": 50830 }, { "epoch": 0.20735, "grad_norm": 6.626766681671143, "learning_rate": 2.4831313131313134e-06, "loss": 6.240925216674805, "step": 50835 }, { "epoch": 0.2074, "grad_norm": 7.254841327667236, "learning_rate": 2.482878787878788e-06, "loss": 6.233589172363281, "step": 50840 }, { "epoch": 0.20745, "grad_norm": 6.887106418609619, "learning_rate": 2.482626262626263e-06, "loss": 6.3235939025878904, "step": 50845 }, { "epoch": 0.2075, "grad_norm": 5.913443088531494, "learning_rate": 2.4823737373737377e-06, "loss": 6.2978981018066404, "step": 50850 }, { "epoch": 0.20755, "grad_norm": 8.726728439331055, "learning_rate": 2.4821212121212123e-06, "loss": 6.237708663940429, "step": 50855 }, { "epoch": 0.2076, "grad_norm": 3.2534782886505127, "learning_rate": 2.481868686868687e-06, "loss": 6.239460372924805, "step": 50860 }, { "epoch": 0.20765, "grad_norm": 9.068036079406738, "learning_rate": 2.481616161616162e-06, "loss": 6.279609680175781, "step": 50865 }, { "epoch": 0.2077, "grad_norm": 7.105849742889404, "learning_rate": 2.4813636363636366e-06, "loss": 6.244961547851562, "step": 50870 }, { "epoch": 0.20775, "grad_norm": 5.342262268066406, "learning_rate": 2.4811111111111113e-06, "loss": 6.256070709228515, "step": 50875 }, { "epoch": 0.2078, "grad_norm": 5.988504409790039, "learning_rate": 2.480858585858586e-06, "loss": 6.234971237182617, "step": 50880 }, { "epoch": 0.20785, "grad_norm": 12.673774719238281, "learning_rate": 2.480606060606061e-06, "loss": 6.312339019775391, "step": 50885 }, { "epoch": 0.2079, "grad_norm": 6.936028480529785, "learning_rate": 2.4803535353535356e-06, "loss": 6.251005172729492, "step": 50890 }, { "epoch": 0.20795, "grad_norm": 6.15684175491333, "learning_rate": 2.48010101010101e-06, "loss": 6.479811096191407, "step": 50895 }, { "epoch": 0.208, "grad_norm": 21.64484405517578, "learning_rate": 2.479848484848485e-06, "loss": 6.307085418701172, "step": 50900 }, { "epoch": 0.20805, "grad_norm": 5.751492977142334, "learning_rate": 2.47959595959596e-06, "loss": 6.261022567749023, "step": 50905 }, { "epoch": 0.2081, "grad_norm": 9.31032943725586, "learning_rate": 2.4793434343434345e-06, "loss": 6.1938323974609375, "step": 50910 }, { "epoch": 0.20815, "grad_norm": 5.881419658660889, "learning_rate": 2.479090909090909e-06, "loss": 6.230329132080078, "step": 50915 }, { "epoch": 0.2082, "grad_norm": 6.861217498779297, "learning_rate": 2.4788383838383838e-06, "loss": 6.243490219116211, "step": 50920 }, { "epoch": 0.20825, "grad_norm": 7.396465301513672, "learning_rate": 2.478585858585859e-06, "loss": 6.320063781738281, "step": 50925 }, { "epoch": 0.2083, "grad_norm": 5.599546432495117, "learning_rate": 2.4783333333333335e-06, "loss": 6.22918815612793, "step": 50930 }, { "epoch": 0.20835, "grad_norm": 4.476082801818848, "learning_rate": 2.4780808080808085e-06, "loss": 6.213483428955078, "step": 50935 }, { "epoch": 0.2084, "grad_norm": 6.848724842071533, "learning_rate": 2.4778282828282827e-06, "loss": 6.394439697265625, "step": 50940 }, { "epoch": 0.20845, "grad_norm": 5.901665687561035, "learning_rate": 2.4775757575757578e-06, "loss": 6.208905029296875, "step": 50945 }, { "epoch": 0.2085, "grad_norm": 6.262967586517334, "learning_rate": 2.4773232323232324e-06, "loss": 6.315032196044922, "step": 50950 }, { "epoch": 0.20855, "grad_norm": 6.620224475860596, "learning_rate": 2.4770707070707075e-06, "loss": 6.262773895263672, "step": 50955 }, { "epoch": 0.2086, "grad_norm": 5.9481730461120605, "learning_rate": 2.476818181818182e-06, "loss": 6.234036254882812, "step": 50960 }, { "epoch": 0.20865, "grad_norm": 4.727889537811279, "learning_rate": 2.4765656565656567e-06, "loss": 6.2423759460449215, "step": 50965 }, { "epoch": 0.2087, "grad_norm": 5.7111430168151855, "learning_rate": 2.4763131313131314e-06, "loss": 6.399767303466797, "step": 50970 }, { "epoch": 0.20875, "grad_norm": 6.529165267944336, "learning_rate": 2.4760606060606064e-06, "loss": 6.234956741333008, "step": 50975 }, { "epoch": 0.2088, "grad_norm": 5.659181118011475, "learning_rate": 2.475808080808081e-06, "loss": 6.260486602783203, "step": 50980 }, { "epoch": 0.20885, "grad_norm": 7.417525768280029, "learning_rate": 2.4755555555555557e-06, "loss": 6.229229354858399, "step": 50985 }, { "epoch": 0.2089, "grad_norm": 7.555321216583252, "learning_rate": 2.4753030303030303e-06, "loss": 6.253195190429688, "step": 50990 }, { "epoch": 0.20895, "grad_norm": 3.240656614303589, "learning_rate": 2.4750505050505053e-06, "loss": 6.259796142578125, "step": 50995 }, { "epoch": 0.209, "grad_norm": 5.363713264465332, "learning_rate": 2.47479797979798e-06, "loss": 6.255817031860351, "step": 51000 }, { "epoch": 0.20905, "grad_norm": 8.100377082824707, "learning_rate": 2.4745454545454546e-06, "loss": 6.272492218017578, "step": 51005 }, { "epoch": 0.2091, "grad_norm": 25.11182403564453, "learning_rate": 2.4742929292929292e-06, "loss": 6.583322143554687, "step": 51010 }, { "epoch": 0.20915, "grad_norm": 7.402913570404053, "learning_rate": 2.4740404040404043e-06, "loss": 6.241371154785156, "step": 51015 }, { "epoch": 0.2092, "grad_norm": 6.886812686920166, "learning_rate": 2.473787878787879e-06, "loss": 6.248114013671875, "step": 51020 }, { "epoch": 0.20925, "grad_norm": 6.077034950256348, "learning_rate": 2.4735353535353536e-06, "loss": 6.217455291748047, "step": 51025 }, { "epoch": 0.2093, "grad_norm": 3.740452289581299, "learning_rate": 2.4732828282828286e-06, "loss": 6.270792007446289, "step": 51030 }, { "epoch": 0.20935, "grad_norm": 13.0534029006958, "learning_rate": 2.4730303030303032e-06, "loss": 6.279194259643555, "step": 51035 }, { "epoch": 0.2094, "grad_norm": 4.478507041931152, "learning_rate": 2.4727777777777783e-06, "loss": 6.292996597290039, "step": 51040 }, { "epoch": 0.20945, "grad_norm": 19.015886306762695, "learning_rate": 2.472525252525253e-06, "loss": 6.611051940917969, "step": 51045 }, { "epoch": 0.2095, "grad_norm": 5.104238033294678, "learning_rate": 2.4722727272727276e-06, "loss": 6.262798309326172, "step": 51050 }, { "epoch": 0.20955, "grad_norm": 6.318634510040283, "learning_rate": 2.472020202020202e-06, "loss": 6.2688743591308596, "step": 51055 }, { "epoch": 0.2096, "grad_norm": 7.17677116394043, "learning_rate": 2.4717676767676772e-06, "loss": 6.235248947143555, "step": 51060 }, { "epoch": 0.20965, "grad_norm": 7.401055812835693, "learning_rate": 2.471515151515152e-06, "loss": 6.332347106933594, "step": 51065 }, { "epoch": 0.2097, "grad_norm": 7.4810709953308105, "learning_rate": 2.4712626262626265e-06, "loss": 6.318766784667969, "step": 51070 }, { "epoch": 0.20975, "grad_norm": 6.1330742835998535, "learning_rate": 2.471010101010101e-06, "loss": 6.258554840087891, "step": 51075 }, { "epoch": 0.2098, "grad_norm": 2.9578516483306885, "learning_rate": 2.470757575757576e-06, "loss": 6.234726333618164, "step": 51080 }, { "epoch": 0.20985, "grad_norm": 6.655963897705078, "learning_rate": 2.470505050505051e-06, "loss": 6.262426376342773, "step": 51085 }, { "epoch": 0.2099, "grad_norm": 5.589348793029785, "learning_rate": 2.4702525252525254e-06, "loss": 6.344485855102539, "step": 51090 }, { "epoch": 0.20995, "grad_norm": 7.6157708168029785, "learning_rate": 2.47e-06, "loss": 6.286137390136719, "step": 51095 }, { "epoch": 0.21, "grad_norm": 7.973703861236572, "learning_rate": 2.469747474747475e-06, "loss": 6.2888328552246096, "step": 51100 }, { "epoch": 0.21005, "grad_norm": 7.350414276123047, "learning_rate": 2.4694949494949498e-06, "loss": 6.262303161621094, "step": 51105 }, { "epoch": 0.2101, "grad_norm": 9.059775352478027, "learning_rate": 2.4692424242424244e-06, "loss": 6.247030639648438, "step": 51110 }, { "epoch": 0.21015, "grad_norm": 5.57716178894043, "learning_rate": 2.468989898989899e-06, "loss": 6.314599609375, "step": 51115 }, { "epoch": 0.2102, "grad_norm": 8.359626770019531, "learning_rate": 2.468737373737374e-06, "loss": 6.287981033325195, "step": 51120 }, { "epoch": 0.21025, "grad_norm": 5.0562944412231445, "learning_rate": 2.4684848484848487e-06, "loss": 6.359325790405274, "step": 51125 }, { "epoch": 0.2103, "grad_norm": 4.50576639175415, "learning_rate": 2.4682323232323233e-06, "loss": 6.225663757324218, "step": 51130 }, { "epoch": 0.21035, "grad_norm": 5.423783779144287, "learning_rate": 2.467979797979798e-06, "loss": 6.23482666015625, "step": 51135 }, { "epoch": 0.2104, "grad_norm": 6.113128185272217, "learning_rate": 2.467727272727273e-06, "loss": 6.283076477050781, "step": 51140 }, { "epoch": 0.21045, "grad_norm": 7.281699180603027, "learning_rate": 2.4674747474747476e-06, "loss": 6.216949462890625, "step": 51145 }, { "epoch": 0.2105, "grad_norm": 9.338159561157227, "learning_rate": 2.4672222222222227e-06, "loss": 6.239673233032226, "step": 51150 }, { "epoch": 0.21055, "grad_norm": 7.933487892150879, "learning_rate": 2.4669696969696973e-06, "loss": 6.2519981384277346, "step": 51155 }, { "epoch": 0.2106, "grad_norm": 5.633018493652344, "learning_rate": 2.466717171717172e-06, "loss": 6.278321838378906, "step": 51160 }, { "epoch": 0.21065, "grad_norm": 8.894936561584473, "learning_rate": 2.4664646464646466e-06, "loss": 6.248413848876953, "step": 51165 }, { "epoch": 0.2107, "grad_norm": 5.463344573974609, "learning_rate": 2.4662121212121216e-06, "loss": 6.240948867797852, "step": 51170 }, { "epoch": 0.21075, "grad_norm": 20.715492248535156, "learning_rate": 2.4659595959595963e-06, "loss": 6.551706695556641, "step": 51175 }, { "epoch": 0.2108, "grad_norm": 4.767124652862549, "learning_rate": 2.465707070707071e-06, "loss": 6.281942749023438, "step": 51180 }, { "epoch": 0.21085, "grad_norm": 5.242769718170166, "learning_rate": 2.4654545454545455e-06, "loss": 6.229162979125976, "step": 51185 }, { "epoch": 0.2109, "grad_norm": 7.9157633781433105, "learning_rate": 2.4652020202020206e-06, "loss": 6.250652313232422, "step": 51190 }, { "epoch": 0.21095, "grad_norm": 7.005685806274414, "learning_rate": 2.464949494949495e-06, "loss": 6.306711959838867, "step": 51195 }, { "epoch": 0.211, "grad_norm": 6.090155124664307, "learning_rate": 2.46469696969697e-06, "loss": 6.2252552032470705, "step": 51200 }, { "epoch": 0.21105, "grad_norm": 6.868363857269287, "learning_rate": 2.4644444444444445e-06, "loss": 6.243553543090821, "step": 51205 }, { "epoch": 0.2111, "grad_norm": 11.406401634216309, "learning_rate": 2.4641919191919195e-06, "loss": 6.052018356323242, "step": 51210 }, { "epoch": 0.21115, "grad_norm": 13.65166187286377, "learning_rate": 2.463939393939394e-06, "loss": 6.246839141845703, "step": 51215 }, { "epoch": 0.2112, "grad_norm": 3.9746711254119873, "learning_rate": 2.4636868686868688e-06, "loss": 6.2879890441894535, "step": 51220 }, { "epoch": 0.21125, "grad_norm": 5.510758876800537, "learning_rate": 2.4634343434343434e-06, "loss": 6.283474349975586, "step": 51225 }, { "epoch": 0.2113, "grad_norm": 6.260151386260986, "learning_rate": 2.4631818181818185e-06, "loss": 6.2638896942138675, "step": 51230 }, { "epoch": 0.21135, "grad_norm": 5.294466495513916, "learning_rate": 2.462929292929293e-06, "loss": 6.231114196777344, "step": 51235 }, { "epoch": 0.2114, "grad_norm": 3.597033739089966, "learning_rate": 2.462676767676768e-06, "loss": 6.245709228515625, "step": 51240 }, { "epoch": 0.21145, "grad_norm": 3.557478666305542, "learning_rate": 2.4624242424242424e-06, "loss": 6.292256927490234, "step": 51245 }, { "epoch": 0.2115, "grad_norm": 7.972006320953369, "learning_rate": 2.4621717171717174e-06, "loss": 6.264240264892578, "step": 51250 }, { "epoch": 0.21155, "grad_norm": 5.154393672943115, "learning_rate": 2.461919191919192e-06, "loss": 6.22901496887207, "step": 51255 }, { "epoch": 0.2116, "grad_norm": 3.7968223094940186, "learning_rate": 2.461666666666667e-06, "loss": 6.221169281005859, "step": 51260 }, { "epoch": 0.21165, "grad_norm": 5.698702812194824, "learning_rate": 2.4614141414141417e-06, "loss": 6.278104019165039, "step": 51265 }, { "epoch": 0.2117, "grad_norm": 7.324695587158203, "learning_rate": 2.4611616161616164e-06, "loss": 6.222004699707031, "step": 51270 }, { "epoch": 0.21175, "grad_norm": 5.646127223968506, "learning_rate": 2.460909090909091e-06, "loss": 6.256063842773438, "step": 51275 }, { "epoch": 0.2118, "grad_norm": 4.340987205505371, "learning_rate": 2.460656565656566e-06, "loss": 6.340274429321289, "step": 51280 }, { "epoch": 0.21185, "grad_norm": 6.185873508453369, "learning_rate": 2.4604040404040407e-06, "loss": 6.247739791870117, "step": 51285 }, { "epoch": 0.2119, "grad_norm": 6.869162559509277, "learning_rate": 2.4601515151515153e-06, "loss": 6.253234481811523, "step": 51290 }, { "epoch": 0.21195, "grad_norm": 5.638926982879639, "learning_rate": 2.45989898989899e-06, "loss": 6.242322158813477, "step": 51295 }, { "epoch": 0.212, "grad_norm": 10.032388687133789, "learning_rate": 2.459646464646465e-06, "loss": 6.2530677795410154, "step": 51300 }, { "epoch": 0.21205, "grad_norm": 7.064962863922119, "learning_rate": 2.4593939393939396e-06, "loss": 6.274679946899414, "step": 51305 }, { "epoch": 0.2121, "grad_norm": 5.170938014984131, "learning_rate": 2.4591414141414142e-06, "loss": 6.2827003479003904, "step": 51310 }, { "epoch": 0.21215, "grad_norm": 15.785543441772461, "learning_rate": 2.458888888888889e-06, "loss": 6.293438720703125, "step": 51315 }, { "epoch": 0.2122, "grad_norm": 6.064589500427246, "learning_rate": 2.458636363636364e-06, "loss": 6.233417510986328, "step": 51320 }, { "epoch": 0.21225, "grad_norm": 14.839214324951172, "learning_rate": 2.4583838383838386e-06, "loss": 6.335691452026367, "step": 51325 }, { "epoch": 0.2123, "grad_norm": 5.582024574279785, "learning_rate": 2.458131313131313e-06, "loss": 6.549031066894531, "step": 51330 }, { "epoch": 0.21235, "grad_norm": 11.80196475982666, "learning_rate": 2.457878787878788e-06, "loss": 6.319637680053711, "step": 51335 }, { "epoch": 0.2124, "grad_norm": 5.333680629730225, "learning_rate": 2.457626262626263e-06, "loss": 6.223167800903321, "step": 51340 }, { "epoch": 0.21245, "grad_norm": 10.341134071350098, "learning_rate": 2.4573737373737375e-06, "loss": 6.210519409179687, "step": 51345 }, { "epoch": 0.2125, "grad_norm": 8.239900588989258, "learning_rate": 2.4571212121212126e-06, "loss": 6.304873657226563, "step": 51350 }, { "epoch": 0.21255, "grad_norm": 5.85414457321167, "learning_rate": 2.4568686868686868e-06, "loss": 6.266398620605469, "step": 51355 }, { "epoch": 0.2126, "grad_norm": 6.342520713806152, "learning_rate": 2.456616161616162e-06, "loss": 6.341128540039063, "step": 51360 }, { "epoch": 0.21265, "grad_norm": 4.657464027404785, "learning_rate": 2.4563636363636364e-06, "loss": 6.214262771606445, "step": 51365 }, { "epoch": 0.2127, "grad_norm": 5.213290214538574, "learning_rate": 2.4561111111111115e-06, "loss": 6.292685317993164, "step": 51370 }, { "epoch": 0.21275, "grad_norm": 5.39146089553833, "learning_rate": 2.455858585858586e-06, "loss": 6.293395233154297, "step": 51375 }, { "epoch": 0.2128, "grad_norm": 8.82789134979248, "learning_rate": 2.4556060606060608e-06, "loss": 6.291364288330078, "step": 51380 }, { "epoch": 0.21285, "grad_norm": 3.5243186950683594, "learning_rate": 2.4553535353535354e-06, "loss": 6.243863677978515, "step": 51385 }, { "epoch": 0.2129, "grad_norm": 7.234767913818359, "learning_rate": 2.4551010101010104e-06, "loss": 6.2834014892578125, "step": 51390 }, { "epoch": 0.21295, "grad_norm": 15.487224578857422, "learning_rate": 2.454848484848485e-06, "loss": 6.2790061950683596, "step": 51395 }, { "epoch": 0.213, "grad_norm": 4.161523818969727, "learning_rate": 2.4545959595959597e-06, "loss": 6.226154708862305, "step": 51400 }, { "epoch": 0.21305, "grad_norm": 5.875275611877441, "learning_rate": 2.4543434343434343e-06, "loss": 6.250581359863281, "step": 51405 }, { "epoch": 0.2131, "grad_norm": 5.35794734954834, "learning_rate": 2.4540909090909094e-06, "loss": 6.220343399047851, "step": 51410 }, { "epoch": 0.21315, "grad_norm": 21.896791458129883, "learning_rate": 2.453838383838384e-06, "loss": 6.2194969177246096, "step": 51415 }, { "epoch": 0.2132, "grad_norm": 7.9488139152526855, "learning_rate": 2.4535858585858586e-06, "loss": 6.266121673583984, "step": 51420 }, { "epoch": 0.21325, "grad_norm": 5.777406692504883, "learning_rate": 2.4533333333333333e-06, "loss": 6.204006195068359, "step": 51425 }, { "epoch": 0.2133, "grad_norm": 4.49384880065918, "learning_rate": 2.4530808080808083e-06, "loss": 6.25079116821289, "step": 51430 }, { "epoch": 0.21335, "grad_norm": 5.0816826820373535, "learning_rate": 2.452828282828283e-06, "loss": 6.264274978637696, "step": 51435 }, { "epoch": 0.2134, "grad_norm": 12.778704643249512, "learning_rate": 2.4525757575757576e-06, "loss": 6.2124076843261715, "step": 51440 }, { "epoch": 0.21345, "grad_norm": 5.872251510620117, "learning_rate": 2.4523232323232326e-06, "loss": 6.249198532104492, "step": 51445 }, { "epoch": 0.2135, "grad_norm": 8.335023880004883, "learning_rate": 2.4520707070707073e-06, "loss": 6.246257781982422, "step": 51450 }, { "epoch": 0.21355, "grad_norm": 9.092503547668457, "learning_rate": 2.4518181818181823e-06, "loss": 6.204987716674805, "step": 51455 }, { "epoch": 0.2136, "grad_norm": 7.842629909515381, "learning_rate": 2.451565656565657e-06, "loss": 6.2819580078125, "step": 51460 }, { "epoch": 0.21365, "grad_norm": 6.360104560852051, "learning_rate": 2.4513131313131316e-06, "loss": 6.198761749267578, "step": 51465 }, { "epoch": 0.2137, "grad_norm": 7.053884029388428, "learning_rate": 2.4510606060606062e-06, "loss": 6.2600749969482425, "step": 51470 }, { "epoch": 0.21375, "grad_norm": 6.185999870300293, "learning_rate": 2.4508080808080813e-06, "loss": 6.248574447631836, "step": 51475 }, { "epoch": 0.2138, "grad_norm": 5.655125617980957, "learning_rate": 2.450555555555556e-06, "loss": 6.251945495605469, "step": 51480 }, { "epoch": 0.21385, "grad_norm": 9.984302520751953, "learning_rate": 2.4503030303030305e-06, "loss": 6.272206878662109, "step": 51485 }, { "epoch": 0.2139, "grad_norm": 19.93279266357422, "learning_rate": 2.450050505050505e-06, "loss": 6.152031326293946, "step": 51490 }, { "epoch": 0.21395, "grad_norm": 4.629660606384277, "learning_rate": 2.4497979797979802e-06, "loss": 6.256863021850586, "step": 51495 }, { "epoch": 0.214, "grad_norm": 10.875117301940918, "learning_rate": 2.449545454545455e-06, "loss": 6.296335601806641, "step": 51500 }, { "epoch": 0.21405, "grad_norm": 4.486212253570557, "learning_rate": 2.4492929292929295e-06, "loss": 6.250394439697265, "step": 51505 }, { "epoch": 0.2141, "grad_norm": 5.56361722946167, "learning_rate": 2.449040404040404e-06, "loss": 6.244961547851562, "step": 51510 }, { "epoch": 0.21415, "grad_norm": 6.107804298400879, "learning_rate": 2.448787878787879e-06, "loss": 6.310741424560547, "step": 51515 }, { "epoch": 0.2142, "grad_norm": 5.597900867462158, "learning_rate": 2.448535353535354e-06, "loss": 6.241750717163086, "step": 51520 }, { "epoch": 0.21425, "grad_norm": 8.29653263092041, "learning_rate": 2.4482828282828284e-06, "loss": 6.281385803222657, "step": 51525 }, { "epoch": 0.2143, "grad_norm": 6.49276876449585, "learning_rate": 2.448030303030303e-06, "loss": 6.250420379638672, "step": 51530 }, { "epoch": 0.21435, "grad_norm": 6.147127151489258, "learning_rate": 2.447777777777778e-06, "loss": 6.234955596923828, "step": 51535 }, { "epoch": 0.2144, "grad_norm": 5.759504795074463, "learning_rate": 2.4475252525252527e-06, "loss": 6.227988815307617, "step": 51540 }, { "epoch": 0.21445, "grad_norm": 7.2107930183410645, "learning_rate": 2.4472727272727274e-06, "loss": 6.270477676391602, "step": 51545 }, { "epoch": 0.2145, "grad_norm": 8.96046257019043, "learning_rate": 2.447020202020202e-06, "loss": 6.299123764038086, "step": 51550 }, { "epoch": 0.21455, "grad_norm": 8.708113670349121, "learning_rate": 2.446767676767677e-06, "loss": 6.330586242675781, "step": 51555 }, { "epoch": 0.2146, "grad_norm": 5.979678153991699, "learning_rate": 2.4465151515151517e-06, "loss": 6.247374725341797, "step": 51560 }, { "epoch": 0.21465, "grad_norm": 5.504483699798584, "learning_rate": 2.4462626262626267e-06, "loss": 6.226418304443359, "step": 51565 }, { "epoch": 0.2147, "grad_norm": 27.281719207763672, "learning_rate": 2.4460101010101014e-06, "loss": 6.505309295654297, "step": 51570 }, { "epoch": 0.21475, "grad_norm": 6.117286205291748, "learning_rate": 2.445757575757576e-06, "loss": 6.261387634277344, "step": 51575 }, { "epoch": 0.2148, "grad_norm": 7.863574028015137, "learning_rate": 2.4455050505050506e-06, "loss": 6.2833709716796875, "step": 51580 }, { "epoch": 0.21485, "grad_norm": 5.184615135192871, "learning_rate": 2.4452525252525257e-06, "loss": 6.231716156005859, "step": 51585 }, { "epoch": 0.2149, "grad_norm": 11.608134269714355, "learning_rate": 2.4450000000000003e-06, "loss": 6.277294158935547, "step": 51590 }, { "epoch": 0.21495, "grad_norm": 9.80497932434082, "learning_rate": 2.444747474747475e-06, "loss": 6.220586776733398, "step": 51595 }, { "epoch": 0.215, "grad_norm": 7.324535846710205, "learning_rate": 2.4444949494949496e-06, "loss": 6.406243896484375, "step": 51600 }, { "epoch": 0.21505, "grad_norm": 5.4823102951049805, "learning_rate": 2.4442424242424246e-06, "loss": 6.274078369140625, "step": 51605 }, { "epoch": 0.2151, "grad_norm": 7.361666202545166, "learning_rate": 2.4439898989898993e-06, "loss": 6.205318832397461, "step": 51610 }, { "epoch": 0.21515, "grad_norm": 39.872467041015625, "learning_rate": 2.443737373737374e-06, "loss": 6.134758758544922, "step": 51615 }, { "epoch": 0.2152, "grad_norm": 35.62089538574219, "learning_rate": 2.4434848484848485e-06, "loss": 5.657844161987304, "step": 51620 }, { "epoch": 0.21525, "grad_norm": 57.202491760253906, "learning_rate": 2.4432323232323236e-06, "loss": 5.6944324493408205, "step": 51625 }, { "epoch": 0.2153, "grad_norm": 7.114380836486816, "learning_rate": 2.442979797979798e-06, "loss": 6.331563186645508, "step": 51630 }, { "epoch": 0.21535, "grad_norm": 10.373966217041016, "learning_rate": 2.442727272727273e-06, "loss": 6.255901718139649, "step": 51635 }, { "epoch": 0.2154, "grad_norm": 9.035956382751465, "learning_rate": 2.4424747474747475e-06, "loss": 6.2159374237060545, "step": 51640 }, { "epoch": 0.21545, "grad_norm": 7.673954486846924, "learning_rate": 2.4422222222222225e-06, "loss": 6.286244583129883, "step": 51645 }, { "epoch": 0.2155, "grad_norm": 10.140018463134766, "learning_rate": 2.441969696969697e-06, "loss": 6.255821609497071, "step": 51650 }, { "epoch": 0.21555, "grad_norm": 12.393857955932617, "learning_rate": 2.441717171717172e-06, "loss": 6.306755447387696, "step": 51655 }, { "epoch": 0.2156, "grad_norm": 9.28051471710205, "learning_rate": 2.4414646464646464e-06, "loss": 6.286399078369141, "step": 51660 }, { "epoch": 0.21565, "grad_norm": 6.818021297454834, "learning_rate": 2.4412121212121215e-06, "loss": 6.334869766235352, "step": 51665 }, { "epoch": 0.2157, "grad_norm": 10.329705238342285, "learning_rate": 2.440959595959596e-06, "loss": 6.253346633911133, "step": 51670 }, { "epoch": 0.21575, "grad_norm": 5.367230415344238, "learning_rate": 2.440707070707071e-06, "loss": 6.289541244506836, "step": 51675 }, { "epoch": 0.2158, "grad_norm": 5.318551063537598, "learning_rate": 2.4404545454545458e-06, "loss": 6.436114501953125, "step": 51680 }, { "epoch": 0.21585, "grad_norm": 7.016967296600342, "learning_rate": 2.4402020202020204e-06, "loss": 6.231676864624023, "step": 51685 }, { "epoch": 0.2159, "grad_norm": 7.2251362800598145, "learning_rate": 2.439949494949495e-06, "loss": 6.268294143676758, "step": 51690 }, { "epoch": 0.21595, "grad_norm": 8.76424503326416, "learning_rate": 2.43969696969697e-06, "loss": 6.219085693359375, "step": 51695 }, { "epoch": 0.216, "grad_norm": 4.175989627838135, "learning_rate": 2.4394444444444447e-06, "loss": 6.244248962402343, "step": 51700 }, { "epoch": 0.21605, "grad_norm": 8.37943172454834, "learning_rate": 2.4391919191919193e-06, "loss": 6.244679641723633, "step": 51705 }, { "epoch": 0.2161, "grad_norm": 8.187246322631836, "learning_rate": 2.438939393939394e-06, "loss": 6.264329147338867, "step": 51710 }, { "epoch": 0.21615, "grad_norm": 3.7950942516326904, "learning_rate": 2.438686868686869e-06, "loss": 6.211478042602539, "step": 51715 }, { "epoch": 0.2162, "grad_norm": 5.600079536437988, "learning_rate": 2.4384343434343437e-06, "loss": 6.272982788085938, "step": 51720 }, { "epoch": 0.21625, "grad_norm": 6.2233052253723145, "learning_rate": 2.4381818181818183e-06, "loss": 6.27648696899414, "step": 51725 }, { "epoch": 0.2163, "grad_norm": 3.850778579711914, "learning_rate": 2.437929292929293e-06, "loss": 6.5286613464355465, "step": 51730 }, { "epoch": 0.21635, "grad_norm": 7.182032585144043, "learning_rate": 2.437676767676768e-06, "loss": 6.2474933624267575, "step": 51735 }, { "epoch": 0.2164, "grad_norm": 6.1254777908325195, "learning_rate": 2.4374242424242426e-06, "loss": 6.283548736572266, "step": 51740 }, { "epoch": 0.21645, "grad_norm": 3.8990466594696045, "learning_rate": 2.4371717171717172e-06, "loss": 6.185603332519531, "step": 51745 }, { "epoch": 0.2165, "grad_norm": 6.335047721862793, "learning_rate": 2.436919191919192e-06, "loss": 6.303973770141601, "step": 51750 }, { "epoch": 0.21655, "grad_norm": 6.938802719116211, "learning_rate": 2.436666666666667e-06, "loss": 6.212868499755859, "step": 51755 }, { "epoch": 0.2166, "grad_norm": 5.338647842407227, "learning_rate": 2.4364141414141415e-06, "loss": 6.2805023193359375, "step": 51760 }, { "epoch": 0.21665, "grad_norm": 12.086352348327637, "learning_rate": 2.4361616161616166e-06, "loss": 6.531468200683594, "step": 51765 }, { "epoch": 0.2167, "grad_norm": 8.061637878417969, "learning_rate": 2.435909090909091e-06, "loss": 6.243861389160156, "step": 51770 }, { "epoch": 0.21675, "grad_norm": 3.9192330837249756, "learning_rate": 2.435656565656566e-06, "loss": 6.264580535888672, "step": 51775 }, { "epoch": 0.2168, "grad_norm": 10.635513305664062, "learning_rate": 2.4354040404040405e-06, "loss": 6.211273574829102, "step": 51780 }, { "epoch": 0.21685, "grad_norm": 14.31887435913086, "learning_rate": 2.4351515151515155e-06, "loss": 6.260458374023438, "step": 51785 }, { "epoch": 0.2169, "grad_norm": 6.969208240509033, "learning_rate": 2.43489898989899e-06, "loss": 6.407927703857422, "step": 51790 }, { "epoch": 0.21695, "grad_norm": 7.219388008117676, "learning_rate": 2.434646464646465e-06, "loss": 6.227739715576172, "step": 51795 }, { "epoch": 0.217, "grad_norm": 4.3910417556762695, "learning_rate": 2.4343939393939394e-06, "loss": 6.304022598266601, "step": 51800 }, { "epoch": 0.21705, "grad_norm": 5.578765869140625, "learning_rate": 2.4341414141414145e-06, "loss": 6.259788894653321, "step": 51805 }, { "epoch": 0.2171, "grad_norm": 7.24212121963501, "learning_rate": 2.433888888888889e-06, "loss": 6.255804443359375, "step": 51810 }, { "epoch": 0.21715, "grad_norm": 4.884197235107422, "learning_rate": 2.4336363636363637e-06, "loss": 6.180574035644531, "step": 51815 }, { "epoch": 0.2172, "grad_norm": 10.879237174987793, "learning_rate": 2.4333838383838384e-06, "loss": 6.281976318359375, "step": 51820 }, { "epoch": 0.21725, "grad_norm": 7.292664527893066, "learning_rate": 2.4331313131313134e-06, "loss": 6.325645446777344, "step": 51825 }, { "epoch": 0.2173, "grad_norm": 5.065341472625732, "learning_rate": 2.432878787878788e-06, "loss": 6.342150115966797, "step": 51830 }, { "epoch": 0.21735, "grad_norm": 3.2572689056396484, "learning_rate": 2.4326262626262627e-06, "loss": 6.239029312133789, "step": 51835 }, { "epoch": 0.2174, "grad_norm": 5.8653740882873535, "learning_rate": 2.4323737373737373e-06, "loss": 6.242805480957031, "step": 51840 }, { "epoch": 0.21745, "grad_norm": 5.021176338195801, "learning_rate": 2.4321212121212124e-06, "loss": 6.273922729492187, "step": 51845 }, { "epoch": 0.2175, "grad_norm": 8.923537254333496, "learning_rate": 2.431868686868687e-06, "loss": 6.265521240234375, "step": 51850 }, { "epoch": 0.21755, "grad_norm": 5.627609729766846, "learning_rate": 2.4316161616161616e-06, "loss": 6.260851669311523, "step": 51855 }, { "epoch": 0.2176, "grad_norm": 4.166629314422607, "learning_rate": 2.4313636363636363e-06, "loss": 6.244822311401367, "step": 51860 }, { "epoch": 0.21765, "grad_norm": 11.774360656738281, "learning_rate": 2.4311111111111113e-06, "loss": 6.187360000610352, "step": 51865 }, { "epoch": 0.2177, "grad_norm": 4.543371200561523, "learning_rate": 2.4308585858585864e-06, "loss": 6.2107189178466795, "step": 51870 }, { "epoch": 0.21775, "grad_norm": 10.893338203430176, "learning_rate": 2.430606060606061e-06, "loss": 6.232725524902344, "step": 51875 }, { "epoch": 0.2178, "grad_norm": 4.246333599090576, "learning_rate": 2.4303535353535356e-06, "loss": 6.2673297882080075, "step": 51880 }, { "epoch": 0.21785, "grad_norm": 4.192732334136963, "learning_rate": 2.4301010101010103e-06, "loss": 6.296896362304688, "step": 51885 }, { "epoch": 0.2179, "grad_norm": 4.907535076141357, "learning_rate": 2.4298484848484853e-06, "loss": 6.224988555908203, "step": 51890 }, { "epoch": 0.21795, "grad_norm": 6.952675819396973, "learning_rate": 2.42959595959596e-06, "loss": 6.221608734130859, "step": 51895 }, { "epoch": 0.218, "grad_norm": 5.547224998474121, "learning_rate": 2.4293434343434346e-06, "loss": 6.257294464111328, "step": 51900 }, { "epoch": 0.21805, "grad_norm": 5.8282904624938965, "learning_rate": 2.429090909090909e-06, "loss": 6.26077880859375, "step": 51905 }, { "epoch": 0.2181, "grad_norm": 9.379003524780273, "learning_rate": 2.4288383838383843e-06, "loss": 6.447900390625, "step": 51910 }, { "epoch": 0.21815, "grad_norm": 5.2447333335876465, "learning_rate": 2.428585858585859e-06, "loss": 6.2692218780517575, "step": 51915 }, { "epoch": 0.2182, "grad_norm": 7.544928550720215, "learning_rate": 2.4283333333333335e-06, "loss": 6.275684356689453, "step": 51920 }, { "epoch": 0.21825, "grad_norm": 4.871537685394287, "learning_rate": 2.428080808080808e-06, "loss": 6.2388160705566404, "step": 51925 }, { "epoch": 0.2183, "grad_norm": 12.420343399047852, "learning_rate": 2.427828282828283e-06, "loss": 6.3106742858886715, "step": 51930 }, { "epoch": 0.21835, "grad_norm": 8.969200134277344, "learning_rate": 2.427575757575758e-06, "loss": 6.325507354736328, "step": 51935 }, { "epoch": 0.2184, "grad_norm": 6.964548110961914, "learning_rate": 2.4273232323232325e-06, "loss": 6.245747375488281, "step": 51940 }, { "epoch": 0.21845, "grad_norm": 10.385884284973145, "learning_rate": 2.427070707070707e-06, "loss": 6.290007781982422, "step": 51945 }, { "epoch": 0.2185, "grad_norm": 13.474695205688477, "learning_rate": 2.426818181818182e-06, "loss": 6.314355850219727, "step": 51950 }, { "epoch": 0.21855, "grad_norm": 7.357687473297119, "learning_rate": 2.4265656565656568e-06, "loss": 6.257258605957031, "step": 51955 }, { "epoch": 0.2186, "grad_norm": 4.997838020324707, "learning_rate": 2.426313131313132e-06, "loss": 6.281138610839844, "step": 51960 }, { "epoch": 0.21865, "grad_norm": 3.8668034076690674, "learning_rate": 2.426060606060606e-06, "loss": 6.272480773925781, "step": 51965 }, { "epoch": 0.2187, "grad_norm": 8.707681655883789, "learning_rate": 2.425808080808081e-06, "loss": 6.2568412780761715, "step": 51970 }, { "epoch": 0.21875, "grad_norm": 4.933715343475342, "learning_rate": 2.4255555555555557e-06, "loss": 6.243733978271484, "step": 51975 }, { "epoch": 0.2188, "grad_norm": 3.7529752254486084, "learning_rate": 2.4253030303030308e-06, "loss": 6.216053771972656, "step": 51980 }, { "epoch": 0.21885, "grad_norm": 5.028703212738037, "learning_rate": 2.4250505050505054e-06, "loss": 6.387085723876953, "step": 51985 }, { "epoch": 0.2189, "grad_norm": 7.808895111083984, "learning_rate": 2.42479797979798e-06, "loss": 6.236086654663086, "step": 51990 }, { "epoch": 0.21895, "grad_norm": 7.691891193389893, "learning_rate": 2.4245454545454547e-06, "loss": 6.255646514892578, "step": 51995 }, { "epoch": 0.219, "grad_norm": 6.964582920074463, "learning_rate": 2.4242929292929297e-06, "loss": 6.202505874633789, "step": 52000 }, { "epoch": 0.21905, "grad_norm": 6.327816486358643, "learning_rate": 2.4240404040404043e-06, "loss": 6.228401184082031, "step": 52005 }, { "epoch": 0.2191, "grad_norm": 7.0051589012146, "learning_rate": 2.423787878787879e-06, "loss": 6.273062133789063, "step": 52010 }, { "epoch": 0.21915, "grad_norm": 3.8174264430999756, "learning_rate": 2.4235353535353536e-06, "loss": 6.198336791992188, "step": 52015 }, { "epoch": 0.2192, "grad_norm": 4.746889591217041, "learning_rate": 2.4232828282828287e-06, "loss": 6.249980545043945, "step": 52020 }, { "epoch": 0.21925, "grad_norm": 3.8145382404327393, "learning_rate": 2.4230303030303033e-06, "loss": 6.286892318725586, "step": 52025 }, { "epoch": 0.2193, "grad_norm": 5.756411552429199, "learning_rate": 2.422777777777778e-06, "loss": 6.256526947021484, "step": 52030 }, { "epoch": 0.21935, "grad_norm": 10.363317489624023, "learning_rate": 2.4225252525252526e-06, "loss": 6.2958831787109375, "step": 52035 }, { "epoch": 0.2194, "grad_norm": 5.343479633331299, "learning_rate": 2.4222727272727276e-06, "loss": 6.311480331420898, "step": 52040 }, { "epoch": 0.21945, "grad_norm": 12.020803451538086, "learning_rate": 2.4220202020202022e-06, "loss": 6.264813232421875, "step": 52045 }, { "epoch": 0.2195, "grad_norm": 21.358219146728516, "learning_rate": 2.421767676767677e-06, "loss": 6.338143157958984, "step": 52050 }, { "epoch": 0.21955, "grad_norm": 8.32059097290039, "learning_rate": 2.4215151515151515e-06, "loss": 6.26324462890625, "step": 52055 }, { "epoch": 0.2196, "grad_norm": 9.86745548248291, "learning_rate": 2.4212626262626265e-06, "loss": 6.215025329589844, "step": 52060 }, { "epoch": 0.21965, "grad_norm": 6.410556316375732, "learning_rate": 2.421010101010101e-06, "loss": 6.27424201965332, "step": 52065 }, { "epoch": 0.2197, "grad_norm": 5.593894004821777, "learning_rate": 2.4207575757575762e-06, "loss": 6.30566177368164, "step": 52070 }, { "epoch": 0.21975, "grad_norm": 4.042796611785889, "learning_rate": 2.4205050505050504e-06, "loss": 6.214342880249023, "step": 52075 }, { "epoch": 0.2198, "grad_norm": 4.265280723571777, "learning_rate": 2.4202525252525255e-06, "loss": 6.262898635864258, "step": 52080 }, { "epoch": 0.21985, "grad_norm": 6.81882905960083, "learning_rate": 2.42e-06, "loss": 6.1835895538330075, "step": 52085 }, { "epoch": 0.2199, "grad_norm": 20.23203468322754, "learning_rate": 2.419747474747475e-06, "loss": 6.45819320678711, "step": 52090 }, { "epoch": 0.21995, "grad_norm": 9.20993709564209, "learning_rate": 2.41949494949495e-06, "loss": 6.216003036499023, "step": 52095 }, { "epoch": 0.22, "grad_norm": 4.597198486328125, "learning_rate": 2.4192424242424244e-06, "loss": 6.273167419433594, "step": 52100 }, { "epoch": 0.22005, "grad_norm": 7.306369781494141, "learning_rate": 2.418989898989899e-06, "loss": 6.232064437866211, "step": 52105 }, { "epoch": 0.2201, "grad_norm": 6.78074836730957, "learning_rate": 2.418737373737374e-06, "loss": 6.214913177490234, "step": 52110 }, { "epoch": 0.22015, "grad_norm": 19.741369247436523, "learning_rate": 2.4184848484848488e-06, "loss": 6.3640399932861325, "step": 52115 }, { "epoch": 0.2202, "grad_norm": 30.509815216064453, "learning_rate": 2.4182323232323234e-06, "loss": 6.3917686462402346, "step": 52120 }, { "epoch": 0.22025, "grad_norm": 15.272809982299805, "learning_rate": 2.417979797979798e-06, "loss": 6.301815032958984, "step": 52125 }, { "epoch": 0.2203, "grad_norm": 12.077404975891113, "learning_rate": 2.417727272727273e-06, "loss": 6.355253982543945, "step": 52130 }, { "epoch": 0.22035, "grad_norm": 6.065465927124023, "learning_rate": 2.4174747474747477e-06, "loss": 6.260669708251953, "step": 52135 }, { "epoch": 0.2204, "grad_norm": 8.035653114318848, "learning_rate": 2.4172222222222223e-06, "loss": 6.2587028503417965, "step": 52140 }, { "epoch": 0.22045, "grad_norm": 49.724571228027344, "learning_rate": 2.416969696969697e-06, "loss": 6.473872375488281, "step": 52145 }, { "epoch": 0.2205, "grad_norm": 7.193949222564697, "learning_rate": 2.416717171717172e-06, "loss": 6.229759979248047, "step": 52150 }, { "epoch": 0.22055, "grad_norm": 5.63744592666626, "learning_rate": 2.4164646464646466e-06, "loss": 6.2224983215332035, "step": 52155 }, { "epoch": 0.2206, "grad_norm": 6.566977024078369, "learning_rate": 2.4162121212121213e-06, "loss": 6.255902862548828, "step": 52160 }, { "epoch": 0.22065, "grad_norm": 5.562075138092041, "learning_rate": 2.415959595959596e-06, "loss": 6.3541923522949215, "step": 52165 }, { "epoch": 0.2207, "grad_norm": 5.391906261444092, "learning_rate": 2.415707070707071e-06, "loss": 6.262410354614258, "step": 52170 }, { "epoch": 0.22075, "grad_norm": 13.744502067565918, "learning_rate": 2.4154545454545456e-06, "loss": 6.277798461914062, "step": 52175 }, { "epoch": 0.2208, "grad_norm": 16.643953323364258, "learning_rate": 2.4152020202020206e-06, "loss": 6.292957305908203, "step": 52180 }, { "epoch": 0.22085, "grad_norm": 7.645695209503174, "learning_rate": 2.414949494949495e-06, "loss": 6.3662971496582035, "step": 52185 }, { "epoch": 0.2209, "grad_norm": 5.700942516326904, "learning_rate": 2.41469696969697e-06, "loss": 6.237003326416016, "step": 52190 }, { "epoch": 0.22095, "grad_norm": 7.921265602111816, "learning_rate": 2.4144444444444445e-06, "loss": 6.292901229858399, "step": 52195 }, { "epoch": 0.221, "grad_norm": 14.05720043182373, "learning_rate": 2.4141919191919196e-06, "loss": 6.388654327392578, "step": 52200 }, { "epoch": 0.22105, "grad_norm": 26.146926879882812, "learning_rate": 2.413939393939394e-06, "loss": 6.324885559082031, "step": 52205 }, { "epoch": 0.2211, "grad_norm": 4.943215847015381, "learning_rate": 2.413686868686869e-06, "loss": 6.1567119598388675, "step": 52210 }, { "epoch": 0.22115, "grad_norm": 4.00844669342041, "learning_rate": 2.4134343434343435e-06, "loss": 6.226755523681641, "step": 52215 }, { "epoch": 0.2212, "grad_norm": 17.918228149414062, "learning_rate": 2.4131818181818185e-06, "loss": 6.357913589477539, "step": 52220 }, { "epoch": 0.22125, "grad_norm": 6.752430438995361, "learning_rate": 2.412929292929293e-06, "loss": 6.309234619140625, "step": 52225 }, { "epoch": 0.2213, "grad_norm": 9.448173522949219, "learning_rate": 2.4126767676767678e-06, "loss": 6.20665168762207, "step": 52230 }, { "epoch": 0.22135, "grad_norm": 5.233664512634277, "learning_rate": 2.4124242424242424e-06, "loss": 6.250830841064453, "step": 52235 }, { "epoch": 0.2214, "grad_norm": 6.172515392303467, "learning_rate": 2.4121717171717175e-06, "loss": 6.280794906616211, "step": 52240 }, { "epoch": 0.22145, "grad_norm": 5.997997760772705, "learning_rate": 2.411919191919192e-06, "loss": 6.246550750732422, "step": 52245 }, { "epoch": 0.2215, "grad_norm": 5.694090843200684, "learning_rate": 2.4116666666666667e-06, "loss": 6.284852981567383, "step": 52250 }, { "epoch": 0.22155, "grad_norm": 9.08346176147461, "learning_rate": 2.4114141414141414e-06, "loss": 6.272308349609375, "step": 52255 }, { "epoch": 0.2216, "grad_norm": 8.893693923950195, "learning_rate": 2.4111616161616164e-06, "loss": 6.297735977172851, "step": 52260 }, { "epoch": 0.22165, "grad_norm": 4.681769371032715, "learning_rate": 2.410909090909091e-06, "loss": 6.336763381958008, "step": 52265 }, { "epoch": 0.2217, "grad_norm": 4.781570911407471, "learning_rate": 2.4106565656565657e-06, "loss": 6.288786315917969, "step": 52270 }, { "epoch": 0.22175, "grad_norm": 8.420703887939453, "learning_rate": 2.4104040404040403e-06, "loss": 6.302719497680664, "step": 52275 }, { "epoch": 0.2218, "grad_norm": 5.59239387512207, "learning_rate": 2.4101515151515154e-06, "loss": 6.2652019500732425, "step": 52280 }, { "epoch": 0.22185, "grad_norm": 9.888558387756348, "learning_rate": 2.40989898989899e-06, "loss": 6.179604721069336, "step": 52285 }, { "epoch": 0.2219, "grad_norm": 7.0468244552612305, "learning_rate": 2.409646464646465e-06, "loss": 6.2230377197265625, "step": 52290 }, { "epoch": 0.22195, "grad_norm": 19.38671112060547, "learning_rate": 2.4093939393939397e-06, "loss": 6.278894424438477, "step": 52295 }, { "epoch": 0.222, "grad_norm": 3.208070755004883, "learning_rate": 2.4091414141414143e-06, "loss": 6.273501205444336, "step": 52300 }, { "epoch": 0.22205, "grad_norm": 6.409883499145508, "learning_rate": 2.4088888888888894e-06, "loss": 6.227059936523437, "step": 52305 }, { "epoch": 0.2221, "grad_norm": 4.8205952644348145, "learning_rate": 2.408636363636364e-06, "loss": 6.272169494628907, "step": 52310 }, { "epoch": 0.22215, "grad_norm": 4.420736789703369, "learning_rate": 2.4083838383838386e-06, "loss": 6.273876190185547, "step": 52315 }, { "epoch": 0.2222, "grad_norm": 6.323145866394043, "learning_rate": 2.4081313131313132e-06, "loss": 6.275417327880859, "step": 52320 }, { "epoch": 0.22225, "grad_norm": 6.058994293212891, "learning_rate": 2.4078787878787883e-06, "loss": 6.248202514648438, "step": 52325 }, { "epoch": 0.2223, "grad_norm": 6.7446489334106445, "learning_rate": 2.407626262626263e-06, "loss": 6.2396892547607425, "step": 52330 }, { "epoch": 0.22235, "grad_norm": 6.229366779327393, "learning_rate": 2.4073737373737376e-06, "loss": 6.284923553466797, "step": 52335 }, { "epoch": 0.2224, "grad_norm": 7.116120338439941, "learning_rate": 2.407121212121212e-06, "loss": 6.239521026611328, "step": 52340 }, { "epoch": 0.22245, "grad_norm": 7.445160388946533, "learning_rate": 2.4068686868686872e-06, "loss": 6.2539207458496096, "step": 52345 }, { "epoch": 0.2225, "grad_norm": 7.2294602394104, "learning_rate": 2.406616161616162e-06, "loss": 6.21704330444336, "step": 52350 }, { "epoch": 0.22255, "grad_norm": 6.60988187789917, "learning_rate": 2.4063636363636365e-06, "loss": 6.283073806762696, "step": 52355 }, { "epoch": 0.2226, "grad_norm": 6.505133152008057, "learning_rate": 2.406111111111111e-06, "loss": 6.230160522460937, "step": 52360 }, { "epoch": 0.22265, "grad_norm": 7.273616790771484, "learning_rate": 2.405858585858586e-06, "loss": 6.249753952026367, "step": 52365 }, { "epoch": 0.2227, "grad_norm": 8.286860466003418, "learning_rate": 2.405606060606061e-06, "loss": 6.322494125366211, "step": 52370 }, { "epoch": 0.22275, "grad_norm": 4.053256034851074, "learning_rate": 2.405353535353536e-06, "loss": 6.23491096496582, "step": 52375 }, { "epoch": 0.2228, "grad_norm": 7.525416374206543, "learning_rate": 2.40510101010101e-06, "loss": 6.296028137207031, "step": 52380 }, { "epoch": 0.22285, "grad_norm": 10.349153518676758, "learning_rate": 2.404848484848485e-06, "loss": 6.262926483154297, "step": 52385 }, { "epoch": 0.2229, "grad_norm": 3.75951886177063, "learning_rate": 2.4045959595959598e-06, "loss": 6.235081481933594, "step": 52390 }, { "epoch": 0.22295, "grad_norm": 6.216642379760742, "learning_rate": 2.404343434343435e-06, "loss": 6.787730407714844, "step": 52395 }, { "epoch": 0.223, "grad_norm": 4.174787521362305, "learning_rate": 2.4040909090909094e-06, "loss": 6.245485305786133, "step": 52400 }, { "epoch": 0.22305, "grad_norm": 6.476919651031494, "learning_rate": 2.403838383838384e-06, "loss": 6.24810676574707, "step": 52405 }, { "epoch": 0.2231, "grad_norm": 4.582688808441162, "learning_rate": 2.4035858585858587e-06, "loss": 6.246949005126953, "step": 52410 }, { "epoch": 0.22315, "grad_norm": 30.439302444458008, "learning_rate": 2.4033333333333338e-06, "loss": 6.451053619384766, "step": 52415 }, { "epoch": 0.2232, "grad_norm": 6.949732303619385, "learning_rate": 2.4030808080808084e-06, "loss": 6.227011108398438, "step": 52420 }, { "epoch": 0.22325, "grad_norm": 11.689817428588867, "learning_rate": 2.402828282828283e-06, "loss": 6.30840835571289, "step": 52425 }, { "epoch": 0.2233, "grad_norm": 7.20681095123291, "learning_rate": 2.4025757575757576e-06, "loss": 6.332405853271484, "step": 52430 }, { "epoch": 0.22335, "grad_norm": 5.264900207519531, "learning_rate": 2.4023232323232327e-06, "loss": 6.244171142578125, "step": 52435 }, { "epoch": 0.2234, "grad_norm": 6.060609340667725, "learning_rate": 2.4020707070707073e-06, "loss": 6.253787231445313, "step": 52440 }, { "epoch": 0.22345, "grad_norm": 7.827465057373047, "learning_rate": 2.401818181818182e-06, "loss": 6.194804763793945, "step": 52445 }, { "epoch": 0.2235, "grad_norm": 6.772429466247559, "learning_rate": 2.4015656565656566e-06, "loss": 6.292319488525391, "step": 52450 }, { "epoch": 0.22355, "grad_norm": 4.152464866638184, "learning_rate": 2.4013131313131316e-06, "loss": 6.315704345703125, "step": 52455 }, { "epoch": 0.2236, "grad_norm": 4.157100200653076, "learning_rate": 2.4010606060606063e-06, "loss": 6.338651275634765, "step": 52460 }, { "epoch": 0.22365, "grad_norm": 3.6980674266815186, "learning_rate": 2.400808080808081e-06, "loss": 6.241244888305664, "step": 52465 }, { "epoch": 0.2237, "grad_norm": 10.671826362609863, "learning_rate": 2.4005555555555555e-06, "loss": 6.20008544921875, "step": 52470 }, { "epoch": 0.22375, "grad_norm": 5.250607967376709, "learning_rate": 2.4003030303030306e-06, "loss": 6.218241119384766, "step": 52475 }, { "epoch": 0.2238, "grad_norm": 10.552903175354004, "learning_rate": 2.4000505050505052e-06, "loss": 6.247449493408203, "step": 52480 }, { "epoch": 0.22385, "grad_norm": 7.499432563781738, "learning_rate": 2.3997979797979803e-06, "loss": 6.285411834716797, "step": 52485 }, { "epoch": 0.2239, "grad_norm": 14.826802253723145, "learning_rate": 2.3995454545454545e-06, "loss": 6.227261734008789, "step": 52490 }, { "epoch": 0.22395, "grad_norm": 6.328614711761475, "learning_rate": 2.3992929292929295e-06, "loss": 6.244483184814453, "step": 52495 }, { "epoch": 0.224, "grad_norm": 7.11382532119751, "learning_rate": 2.399040404040404e-06, "loss": 6.3098197937011715, "step": 52500 }, { "epoch": 0.22405, "grad_norm": 9.61953067779541, "learning_rate": 2.3987878787878792e-06, "loss": 6.271946716308594, "step": 52505 }, { "epoch": 0.2241, "grad_norm": 10.57318115234375, "learning_rate": 2.398535353535354e-06, "loss": 6.249549102783203, "step": 52510 }, { "epoch": 0.22415, "grad_norm": 7.503575801849365, "learning_rate": 2.3982828282828285e-06, "loss": 6.266286849975586, "step": 52515 }, { "epoch": 0.2242, "grad_norm": 15.080215454101562, "learning_rate": 2.398030303030303e-06, "loss": 6.271240997314453, "step": 52520 }, { "epoch": 0.22425, "grad_norm": 7.887917995452881, "learning_rate": 2.397777777777778e-06, "loss": 6.234103012084961, "step": 52525 }, { "epoch": 0.2243, "grad_norm": 5.501219272613525, "learning_rate": 2.397525252525253e-06, "loss": 6.294393920898438, "step": 52530 }, { "epoch": 0.22435, "grad_norm": 4.195410251617432, "learning_rate": 2.3972727272727274e-06, "loss": 6.2273204803466795, "step": 52535 }, { "epoch": 0.2244, "grad_norm": 8.947026252746582, "learning_rate": 2.397020202020202e-06, "loss": 6.2533424377441404, "step": 52540 }, { "epoch": 0.22445, "grad_norm": 5.914705753326416, "learning_rate": 2.396767676767677e-06, "loss": 6.238822174072266, "step": 52545 }, { "epoch": 0.2245, "grad_norm": 4.751138687133789, "learning_rate": 2.3965151515151517e-06, "loss": 6.259105682373047, "step": 52550 }, { "epoch": 0.22455, "grad_norm": 6.736328601837158, "learning_rate": 2.3962626262626264e-06, "loss": 6.256004714965821, "step": 52555 }, { "epoch": 0.2246, "grad_norm": 14.608474731445312, "learning_rate": 2.396010101010101e-06, "loss": 6.287939834594726, "step": 52560 }, { "epoch": 0.22465, "grad_norm": 17.376588821411133, "learning_rate": 2.395757575757576e-06, "loss": 6.255207824707031, "step": 52565 }, { "epoch": 0.2247, "grad_norm": 8.189092636108398, "learning_rate": 2.3955050505050507e-06, "loss": 6.239899063110352, "step": 52570 }, { "epoch": 0.22475, "grad_norm": 14.042427062988281, "learning_rate": 2.3952525252525253e-06, "loss": 6.235666275024414, "step": 52575 }, { "epoch": 0.2248, "grad_norm": 9.25808048248291, "learning_rate": 2.395e-06, "loss": 6.364173889160156, "step": 52580 }, { "epoch": 0.22485, "grad_norm": 7.4218573570251465, "learning_rate": 2.394747474747475e-06, "loss": 6.29021987915039, "step": 52585 }, { "epoch": 0.2249, "grad_norm": 4.790729999542236, "learning_rate": 2.3944949494949496e-06, "loss": 6.234141540527344, "step": 52590 }, { "epoch": 0.22495, "grad_norm": 7.637439727783203, "learning_rate": 2.3942424242424247e-06, "loss": 6.238411331176758, "step": 52595 }, { "epoch": 0.225, "grad_norm": 9.547316551208496, "learning_rate": 2.393989898989899e-06, "loss": 6.263449859619141, "step": 52600 }, { "epoch": 0.22505, "grad_norm": 4.560067176818848, "learning_rate": 2.393737373737374e-06, "loss": 6.326532363891602, "step": 52605 }, { "epoch": 0.2251, "grad_norm": 11.905479431152344, "learning_rate": 2.3934848484848486e-06, "loss": 6.2998603820800785, "step": 52610 }, { "epoch": 0.22515, "grad_norm": 13.626678466796875, "learning_rate": 2.3932323232323236e-06, "loss": 6.165707778930664, "step": 52615 }, { "epoch": 0.2252, "grad_norm": 6.242610931396484, "learning_rate": 2.3929797979797983e-06, "loss": 6.224074935913086, "step": 52620 }, { "epoch": 0.22525, "grad_norm": 5.712303638458252, "learning_rate": 2.392727272727273e-06, "loss": 6.296954727172851, "step": 52625 }, { "epoch": 0.2253, "grad_norm": 4.94373083114624, "learning_rate": 2.3924747474747475e-06, "loss": 6.256424331665039, "step": 52630 }, { "epoch": 0.22535, "grad_norm": 6.344763278961182, "learning_rate": 2.3922222222222226e-06, "loss": 6.1864372253417965, "step": 52635 }, { "epoch": 0.2254, "grad_norm": 5.548081398010254, "learning_rate": 2.391969696969697e-06, "loss": 6.2420166015625, "step": 52640 }, { "epoch": 0.22545, "grad_norm": 4.895356178283691, "learning_rate": 2.391717171717172e-06, "loss": 6.2474109649658205, "step": 52645 }, { "epoch": 0.2255, "grad_norm": 11.65735149383545, "learning_rate": 2.3914646464646465e-06, "loss": 6.27282485961914, "step": 52650 }, { "epoch": 0.22555, "grad_norm": 42.26799011230469, "learning_rate": 2.3912121212121215e-06, "loss": 6.296435928344726, "step": 52655 }, { "epoch": 0.2256, "grad_norm": 6.696413040161133, "learning_rate": 2.390959595959596e-06, "loss": 6.260096740722656, "step": 52660 }, { "epoch": 0.22565, "grad_norm": 16.01081657409668, "learning_rate": 2.3907070707070708e-06, "loss": 6.362845611572266, "step": 52665 }, { "epoch": 0.2257, "grad_norm": 6.042580604553223, "learning_rate": 2.3904545454545454e-06, "loss": 6.283415985107422, "step": 52670 }, { "epoch": 0.22575, "grad_norm": 5.382635116577148, "learning_rate": 2.3902020202020205e-06, "loss": 6.203421401977539, "step": 52675 }, { "epoch": 0.2258, "grad_norm": 6.98234748840332, "learning_rate": 2.389949494949495e-06, "loss": 6.241590881347657, "step": 52680 }, { "epoch": 0.22585, "grad_norm": 5.9295220375061035, "learning_rate": 2.3896969696969697e-06, "loss": 6.2411048889160154, "step": 52685 }, { "epoch": 0.2259, "grad_norm": 12.870237350463867, "learning_rate": 2.3894444444444443e-06, "loss": 6.284702301025391, "step": 52690 }, { "epoch": 0.22595, "grad_norm": 5.940826892852783, "learning_rate": 2.3891919191919194e-06, "loss": 6.289900207519532, "step": 52695 }, { "epoch": 0.226, "grad_norm": 6.892583847045898, "learning_rate": 2.388939393939394e-06, "loss": 6.264262390136719, "step": 52700 }, { "epoch": 0.22605, "grad_norm": 4.7045817375183105, "learning_rate": 2.388686868686869e-06, "loss": 6.310009384155274, "step": 52705 }, { "epoch": 0.2261, "grad_norm": 5.509156703948975, "learning_rate": 2.3884343434343433e-06, "loss": 6.217286682128906, "step": 52710 }, { "epoch": 0.22615, "grad_norm": 5.08131742477417, "learning_rate": 2.3881818181818183e-06, "loss": 6.24029312133789, "step": 52715 }, { "epoch": 0.2262, "grad_norm": 2.949183225631714, "learning_rate": 2.3879292929292934e-06, "loss": 6.287076568603515, "step": 52720 }, { "epoch": 0.22625, "grad_norm": 4.732574462890625, "learning_rate": 2.387676767676768e-06, "loss": 6.259012603759766, "step": 52725 }, { "epoch": 0.2263, "grad_norm": 6.7884416580200195, "learning_rate": 2.3874242424242427e-06, "loss": 6.093778991699219, "step": 52730 }, { "epoch": 0.22635, "grad_norm": 5.899266719818115, "learning_rate": 2.3871717171717173e-06, "loss": 6.253596496582031, "step": 52735 }, { "epoch": 0.2264, "grad_norm": 3.816175937652588, "learning_rate": 2.3869191919191923e-06, "loss": 6.228401565551758, "step": 52740 }, { "epoch": 0.22645, "grad_norm": 6.019454479217529, "learning_rate": 2.386666666666667e-06, "loss": 6.255288696289062, "step": 52745 }, { "epoch": 0.2265, "grad_norm": 3.8804025650024414, "learning_rate": 2.3864141414141416e-06, "loss": 6.24297866821289, "step": 52750 }, { "epoch": 0.22655, "grad_norm": 4.743251800537109, "learning_rate": 2.3861616161616162e-06, "loss": 6.253774261474609, "step": 52755 }, { "epoch": 0.2266, "grad_norm": 4.305018901824951, "learning_rate": 2.3859090909090913e-06, "loss": 6.274639129638672, "step": 52760 }, { "epoch": 0.22665, "grad_norm": 3.2332372665405273, "learning_rate": 2.385656565656566e-06, "loss": 6.238309097290039, "step": 52765 }, { "epoch": 0.2267, "grad_norm": 19.700927734375, "learning_rate": 2.3854040404040405e-06, "loss": 6.268733215332031, "step": 52770 }, { "epoch": 0.22675, "grad_norm": 10.268705368041992, "learning_rate": 2.385151515151515e-06, "loss": 6.306632995605469, "step": 52775 }, { "epoch": 0.2268, "grad_norm": 7.965517044067383, "learning_rate": 2.3848989898989902e-06, "loss": 6.271483612060547, "step": 52780 }, { "epoch": 0.22685, "grad_norm": 9.208451271057129, "learning_rate": 2.384646464646465e-06, "loss": 6.544108581542969, "step": 52785 }, { "epoch": 0.2269, "grad_norm": 6.381935119628906, "learning_rate": 2.38439393939394e-06, "loss": 6.268777847290039, "step": 52790 }, { "epoch": 0.22695, "grad_norm": 5.593729496002197, "learning_rate": 2.384141414141414e-06, "loss": 6.272514724731446, "step": 52795 }, { "epoch": 0.227, "grad_norm": 7.758678436279297, "learning_rate": 2.383888888888889e-06, "loss": 6.2687225341796875, "step": 52800 }, { "epoch": 0.22705, "grad_norm": 6.1352667808532715, "learning_rate": 2.383636363636364e-06, "loss": 6.276731491088867, "step": 52805 }, { "epoch": 0.2271, "grad_norm": 18.912921905517578, "learning_rate": 2.383383838383839e-06, "loss": 6.365979385375977, "step": 52810 }, { "epoch": 0.22715, "grad_norm": 5.295742511749268, "learning_rate": 2.3831313131313135e-06, "loss": 6.232402420043945, "step": 52815 }, { "epoch": 0.2272, "grad_norm": 6.725299835205078, "learning_rate": 2.382878787878788e-06, "loss": 6.249851226806641, "step": 52820 }, { "epoch": 0.22725, "grad_norm": 8.051712036132812, "learning_rate": 2.3826262626262627e-06, "loss": 6.280692672729492, "step": 52825 }, { "epoch": 0.2273, "grad_norm": 6.897154808044434, "learning_rate": 2.382373737373738e-06, "loss": 6.256839370727539, "step": 52830 }, { "epoch": 0.22735, "grad_norm": 4.368690490722656, "learning_rate": 2.3821212121212124e-06, "loss": 6.1983589172363285, "step": 52835 }, { "epoch": 0.2274, "grad_norm": 7.282412052154541, "learning_rate": 2.381868686868687e-06, "loss": 6.211429595947266, "step": 52840 }, { "epoch": 0.22745, "grad_norm": 7.026226043701172, "learning_rate": 2.3816161616161617e-06, "loss": 6.24368896484375, "step": 52845 }, { "epoch": 0.2275, "grad_norm": 15.75161075592041, "learning_rate": 2.3813636363636367e-06, "loss": 6.237217712402344, "step": 52850 }, { "epoch": 0.22755, "grad_norm": 6.978920936584473, "learning_rate": 2.3811111111111114e-06, "loss": 6.2814075469970705, "step": 52855 }, { "epoch": 0.2276, "grad_norm": 5.276544094085693, "learning_rate": 2.380858585858586e-06, "loss": 6.234728240966797, "step": 52860 }, { "epoch": 0.22765, "grad_norm": 9.062911987304688, "learning_rate": 2.3806060606060606e-06, "loss": 6.330411911010742, "step": 52865 }, { "epoch": 0.2277, "grad_norm": 6.460860729217529, "learning_rate": 2.3803535353535357e-06, "loss": 6.295930099487305, "step": 52870 }, { "epoch": 0.22775, "grad_norm": 7.033974647521973, "learning_rate": 2.3801010101010103e-06, "loss": 6.249566650390625, "step": 52875 }, { "epoch": 0.2278, "grad_norm": 5.7318010330200195, "learning_rate": 2.379848484848485e-06, "loss": 6.244935989379883, "step": 52880 }, { "epoch": 0.22785, "grad_norm": 11.033978462219238, "learning_rate": 2.3795959595959596e-06, "loss": 6.241049194335938, "step": 52885 }, { "epoch": 0.2279, "grad_norm": 8.877074241638184, "learning_rate": 2.3793434343434346e-06, "loss": 6.316382980346679, "step": 52890 }, { "epoch": 0.22795, "grad_norm": 4.7265944480896, "learning_rate": 2.3790909090909093e-06, "loss": 6.270899963378906, "step": 52895 }, { "epoch": 0.228, "grad_norm": 10.41481876373291, "learning_rate": 2.3788383838383843e-06, "loss": 6.263525009155273, "step": 52900 }, { "epoch": 0.22805, "grad_norm": 14.487459182739258, "learning_rate": 2.3785858585858585e-06, "loss": 6.360616302490234, "step": 52905 }, { "epoch": 0.2281, "grad_norm": 8.722671508789062, "learning_rate": 2.3783333333333336e-06, "loss": 6.2024188995361325, "step": 52910 }, { "epoch": 0.22815, "grad_norm": 5.225551128387451, "learning_rate": 2.378080808080808e-06, "loss": 6.219288635253906, "step": 52915 }, { "epoch": 0.2282, "grad_norm": 7.050472736358643, "learning_rate": 2.3778282828282833e-06, "loss": 6.2775623321533205, "step": 52920 }, { "epoch": 0.22825, "grad_norm": 6.2340474128723145, "learning_rate": 2.377575757575758e-06, "loss": 6.271038818359375, "step": 52925 }, { "epoch": 0.2283, "grad_norm": 22.0024471282959, "learning_rate": 2.3773232323232325e-06, "loss": 6.297608947753906, "step": 52930 }, { "epoch": 0.22835, "grad_norm": 8.659248352050781, "learning_rate": 2.377070707070707e-06, "loss": 6.1948810577392575, "step": 52935 }, { "epoch": 0.2284, "grad_norm": 7.3813629150390625, "learning_rate": 2.376818181818182e-06, "loss": 6.309517669677734, "step": 52940 }, { "epoch": 0.22845, "grad_norm": 7.237553596496582, "learning_rate": 2.376565656565657e-06, "loss": 6.318824005126953, "step": 52945 }, { "epoch": 0.2285, "grad_norm": 5.929116725921631, "learning_rate": 2.3763131313131315e-06, "loss": 6.292051696777344, "step": 52950 }, { "epoch": 0.22855, "grad_norm": 5.5734405517578125, "learning_rate": 2.376060606060606e-06, "loss": 6.246417236328125, "step": 52955 }, { "epoch": 0.2286, "grad_norm": 7.698760032653809, "learning_rate": 2.375808080808081e-06, "loss": 6.254170227050781, "step": 52960 }, { "epoch": 0.22865, "grad_norm": 3.9026644229888916, "learning_rate": 2.3755555555555558e-06, "loss": 6.257820892333984, "step": 52965 }, { "epoch": 0.2287, "grad_norm": 7.1406731605529785, "learning_rate": 2.3753030303030304e-06, "loss": 6.272229766845703, "step": 52970 }, { "epoch": 0.22875, "grad_norm": 4.415365695953369, "learning_rate": 2.375050505050505e-06, "loss": 6.238120651245117, "step": 52975 }, { "epoch": 0.2288, "grad_norm": 8.99929141998291, "learning_rate": 2.37479797979798e-06, "loss": 6.270860290527343, "step": 52980 }, { "epoch": 0.22885, "grad_norm": 7.235668182373047, "learning_rate": 2.3745454545454547e-06, "loss": 6.243309783935547, "step": 52985 }, { "epoch": 0.2289, "grad_norm": 3.673187732696533, "learning_rate": 2.3742929292929293e-06, "loss": 6.24022331237793, "step": 52990 }, { "epoch": 0.22895, "grad_norm": 3.975358247756958, "learning_rate": 2.374040404040404e-06, "loss": 6.247119903564453, "step": 52995 }, { "epoch": 0.229, "grad_norm": 22.765533447265625, "learning_rate": 2.373787878787879e-06, "loss": 6.247616958618164, "step": 53000 }, { "epoch": 0.22905, "grad_norm": 4.613255023956299, "learning_rate": 2.3735353535353537e-06, "loss": 6.272881698608399, "step": 53005 }, { "epoch": 0.2291, "grad_norm": 10.496294021606445, "learning_rate": 2.3732828282828287e-06, "loss": 6.2448783874511715, "step": 53010 }, { "epoch": 0.22915, "grad_norm": 5.510524749755859, "learning_rate": 2.373030303030303e-06, "loss": 6.244981384277343, "step": 53015 }, { "epoch": 0.2292, "grad_norm": 7.0647077560424805, "learning_rate": 2.372777777777778e-06, "loss": 6.289427185058594, "step": 53020 }, { "epoch": 0.22925, "grad_norm": 6.648190975189209, "learning_rate": 2.3725252525252526e-06, "loss": 6.268463516235352, "step": 53025 }, { "epoch": 0.2293, "grad_norm": 4.701155185699463, "learning_rate": 2.3722727272727277e-06, "loss": 6.218289184570312, "step": 53030 }, { "epoch": 0.22935, "grad_norm": 11.808871269226074, "learning_rate": 2.3720202020202023e-06, "loss": 6.221478271484375, "step": 53035 }, { "epoch": 0.2294, "grad_norm": 8.761188507080078, "learning_rate": 2.371767676767677e-06, "loss": 6.223453140258789, "step": 53040 }, { "epoch": 0.22945, "grad_norm": 7.281553745269775, "learning_rate": 2.3715151515151516e-06, "loss": 6.233318710327149, "step": 53045 }, { "epoch": 0.2295, "grad_norm": 7.76390266418457, "learning_rate": 2.3712626262626266e-06, "loss": 6.28691291809082, "step": 53050 }, { "epoch": 0.22955, "grad_norm": 6.370750904083252, "learning_rate": 2.3710101010101012e-06, "loss": 6.213009643554687, "step": 53055 }, { "epoch": 0.2296, "grad_norm": 7.755419731140137, "learning_rate": 2.370757575757576e-06, "loss": 6.260792541503906, "step": 53060 }, { "epoch": 0.22965, "grad_norm": 8.515199661254883, "learning_rate": 2.3705050505050505e-06, "loss": 6.2456916809082035, "step": 53065 }, { "epoch": 0.2297, "grad_norm": 6.36236572265625, "learning_rate": 2.3702525252525255e-06, "loss": 6.2869110107421875, "step": 53070 }, { "epoch": 0.22975, "grad_norm": 8.818909645080566, "learning_rate": 2.37e-06, "loss": 6.225117874145508, "step": 53075 }, { "epoch": 0.2298, "grad_norm": 5.314656734466553, "learning_rate": 2.369747474747475e-06, "loss": 6.240492248535157, "step": 53080 }, { "epoch": 0.22985, "grad_norm": 21.82257652282715, "learning_rate": 2.3694949494949494e-06, "loss": 6.348095703125, "step": 53085 }, { "epoch": 0.2299, "grad_norm": 5.875729560852051, "learning_rate": 2.3692424242424245e-06, "loss": 6.307929992675781, "step": 53090 }, { "epoch": 0.22995, "grad_norm": 8.912537574768066, "learning_rate": 2.368989898989899e-06, "loss": 6.235069656372071, "step": 53095 }, { "epoch": 0.23, "grad_norm": 17.402099609375, "learning_rate": 2.3687373737373738e-06, "loss": 6.496294403076172, "step": 53100 }, { "epoch": 0.23005, "grad_norm": 8.835875511169434, "learning_rate": 2.3684848484848484e-06, "loss": 6.345256805419922, "step": 53105 }, { "epoch": 0.2301, "grad_norm": 8.79249095916748, "learning_rate": 2.3682323232323234e-06, "loss": 6.302012252807617, "step": 53110 }, { "epoch": 0.23015, "grad_norm": 7.064168930053711, "learning_rate": 2.367979797979798e-06, "loss": 6.245014572143555, "step": 53115 }, { "epoch": 0.2302, "grad_norm": 6.717340469360352, "learning_rate": 2.367727272727273e-06, "loss": 6.2440650939941404, "step": 53120 }, { "epoch": 0.23025, "grad_norm": 6.546894073486328, "learning_rate": 2.3674747474747473e-06, "loss": 6.0848442077636715, "step": 53125 }, { "epoch": 0.2303, "grad_norm": 7.849869251251221, "learning_rate": 2.3672222222222224e-06, "loss": 6.210033416748047, "step": 53130 }, { "epoch": 0.23035, "grad_norm": 8.853202819824219, "learning_rate": 2.366969696969697e-06, "loss": 6.308766937255859, "step": 53135 }, { "epoch": 0.2304, "grad_norm": 8.410033226013184, "learning_rate": 2.366717171717172e-06, "loss": 6.237578964233398, "step": 53140 }, { "epoch": 0.23045, "grad_norm": 5.213444709777832, "learning_rate": 2.3664646464646467e-06, "loss": 6.262342834472657, "step": 53145 }, { "epoch": 0.2305, "grad_norm": 7.316669464111328, "learning_rate": 2.3662121212121213e-06, "loss": 6.256608581542968, "step": 53150 }, { "epoch": 0.23055, "grad_norm": 9.157419204711914, "learning_rate": 2.3659595959595964e-06, "loss": 6.269996261596679, "step": 53155 }, { "epoch": 0.2306, "grad_norm": 5.823645114898682, "learning_rate": 2.365707070707071e-06, "loss": 6.515411376953125, "step": 53160 }, { "epoch": 0.23065, "grad_norm": 7.314699172973633, "learning_rate": 2.3654545454545456e-06, "loss": 6.3220161437988285, "step": 53165 }, { "epoch": 0.2307, "grad_norm": 8.216435432434082, "learning_rate": 2.3652020202020203e-06, "loss": 6.295036315917969, "step": 53170 }, { "epoch": 0.23075, "grad_norm": 7.999406337738037, "learning_rate": 2.3649494949494953e-06, "loss": 6.289308166503906, "step": 53175 }, { "epoch": 0.2308, "grad_norm": 12.799994468688965, "learning_rate": 2.36469696969697e-06, "loss": 6.24447021484375, "step": 53180 }, { "epoch": 0.23085, "grad_norm": 4.691068172454834, "learning_rate": 2.3644444444444446e-06, "loss": 6.273946380615234, "step": 53185 }, { "epoch": 0.2309, "grad_norm": 7.560248851776123, "learning_rate": 2.3641919191919192e-06, "loss": 6.263804244995117, "step": 53190 }, { "epoch": 0.23095, "grad_norm": 4.034327983856201, "learning_rate": 2.3639393939393943e-06, "loss": 6.273004531860352, "step": 53195 }, { "epoch": 0.231, "grad_norm": 14.23069953918457, "learning_rate": 2.363686868686869e-06, "loss": 6.483370208740235, "step": 53200 }, { "epoch": 0.23105, "grad_norm": 8.343323707580566, "learning_rate": 2.363434343434344e-06, "loss": 6.366194534301758, "step": 53205 }, { "epoch": 0.2311, "grad_norm": 7.490577220916748, "learning_rate": 2.363181818181818e-06, "loss": 6.240438842773438, "step": 53210 }, { "epoch": 0.23115, "grad_norm": 6.203300952911377, "learning_rate": 2.362929292929293e-06, "loss": 6.272561264038086, "step": 53215 }, { "epoch": 0.2312, "grad_norm": 5.420987606048584, "learning_rate": 2.362676767676768e-06, "loss": 6.201140594482422, "step": 53220 }, { "epoch": 0.23125, "grad_norm": 13.277322769165039, "learning_rate": 2.362424242424243e-06, "loss": 6.397774124145508, "step": 53225 }, { "epoch": 0.2313, "grad_norm": 8.926644325256348, "learning_rate": 2.3621717171717175e-06, "loss": 6.6491546630859375, "step": 53230 }, { "epoch": 0.23135, "grad_norm": 6.889772891998291, "learning_rate": 2.361919191919192e-06, "loss": 6.256970977783203, "step": 53235 }, { "epoch": 0.2314, "grad_norm": 14.16237735748291, "learning_rate": 2.3616666666666668e-06, "loss": 6.280641555786133, "step": 53240 }, { "epoch": 0.23145, "grad_norm": 8.60301399230957, "learning_rate": 2.361414141414142e-06, "loss": 6.255502319335937, "step": 53245 }, { "epoch": 0.2315, "grad_norm": 4.302947044372559, "learning_rate": 2.3611616161616165e-06, "loss": 6.234688949584961, "step": 53250 }, { "epoch": 0.23155, "grad_norm": 15.809090614318848, "learning_rate": 2.360909090909091e-06, "loss": 6.212442016601562, "step": 53255 }, { "epoch": 0.2316, "grad_norm": 6.971460819244385, "learning_rate": 2.3606565656565657e-06, "loss": 6.226432800292969, "step": 53260 }, { "epoch": 0.23165, "grad_norm": 7.485438823699951, "learning_rate": 2.3604040404040408e-06, "loss": 6.2315673828125, "step": 53265 }, { "epoch": 0.2317, "grad_norm": 27.556230545043945, "learning_rate": 2.3601515151515154e-06, "loss": 6.384100341796875, "step": 53270 }, { "epoch": 0.23175, "grad_norm": 10.860330581665039, "learning_rate": 2.35989898989899e-06, "loss": 6.253578948974609, "step": 53275 }, { "epoch": 0.2318, "grad_norm": 5.014923095703125, "learning_rate": 2.3596464646464647e-06, "loss": 6.239463806152344, "step": 53280 }, { "epoch": 0.23185, "grad_norm": 4.233177185058594, "learning_rate": 2.3593939393939397e-06, "loss": 6.257968139648438, "step": 53285 }, { "epoch": 0.2319, "grad_norm": 5.430302619934082, "learning_rate": 2.3591414141414144e-06, "loss": 6.268344497680664, "step": 53290 }, { "epoch": 0.23195, "grad_norm": 5.219926357269287, "learning_rate": 2.358888888888889e-06, "loss": 6.2741447448730465, "step": 53295 }, { "epoch": 0.232, "grad_norm": 7.933003902435303, "learning_rate": 2.3586363636363636e-06, "loss": 6.268058395385742, "step": 53300 }, { "epoch": 0.23205, "grad_norm": 9.07319164276123, "learning_rate": 2.3583838383838387e-06, "loss": 6.267370223999023, "step": 53305 }, { "epoch": 0.2321, "grad_norm": 5.728250503540039, "learning_rate": 2.3581313131313133e-06, "loss": 6.198454284667969, "step": 53310 }, { "epoch": 0.23215, "grad_norm": 4.031686305999756, "learning_rate": 2.3578787878787884e-06, "loss": 6.272827911376953, "step": 53315 }, { "epoch": 0.2322, "grad_norm": 7.460416316986084, "learning_rate": 2.3576262626262626e-06, "loss": 6.270056533813476, "step": 53320 }, { "epoch": 0.23225, "grad_norm": 5.062841892242432, "learning_rate": 2.3573737373737376e-06, "loss": 6.266344451904297, "step": 53325 }, { "epoch": 0.2323, "grad_norm": 5.634907245635986, "learning_rate": 2.3571212121212122e-06, "loss": 6.27098388671875, "step": 53330 }, { "epoch": 0.23235, "grad_norm": 36.617557525634766, "learning_rate": 2.3568686868686873e-06, "loss": 6.216738891601563, "step": 53335 }, { "epoch": 0.2324, "grad_norm": 17.140655517578125, "learning_rate": 2.356616161616162e-06, "loss": 6.545231628417969, "step": 53340 }, { "epoch": 0.23245, "grad_norm": 13.684148788452148, "learning_rate": 2.3563636363636366e-06, "loss": 6.297846984863281, "step": 53345 }, { "epoch": 0.2325, "grad_norm": 4.586150646209717, "learning_rate": 2.356111111111111e-06, "loss": 6.223745727539063, "step": 53350 }, { "epoch": 0.23255, "grad_norm": 5.163801193237305, "learning_rate": 2.3558585858585862e-06, "loss": 6.255415725708008, "step": 53355 }, { "epoch": 0.2326, "grad_norm": 7.0720109939575195, "learning_rate": 2.355606060606061e-06, "loss": 6.270806884765625, "step": 53360 }, { "epoch": 0.23265, "grad_norm": 4.6654791831970215, "learning_rate": 2.3553535353535355e-06, "loss": 6.296715545654297, "step": 53365 }, { "epoch": 0.2327, "grad_norm": 4.055288791656494, "learning_rate": 2.35510101010101e-06, "loss": 6.220745849609375, "step": 53370 }, { "epoch": 0.23275, "grad_norm": 4.538575649261475, "learning_rate": 2.354848484848485e-06, "loss": 6.284283828735352, "step": 53375 }, { "epoch": 0.2328, "grad_norm": 6.011362552642822, "learning_rate": 2.35459595959596e-06, "loss": 6.260654449462891, "step": 53380 }, { "epoch": 0.23285, "grad_norm": 6.445408821105957, "learning_rate": 2.3543434343434344e-06, "loss": 6.230744171142578, "step": 53385 }, { "epoch": 0.2329, "grad_norm": 6.240857124328613, "learning_rate": 2.354090909090909e-06, "loss": 6.475697326660156, "step": 53390 }, { "epoch": 0.23295, "grad_norm": 27.557514190673828, "learning_rate": 2.353838383838384e-06, "loss": 6.204234313964844, "step": 53395 }, { "epoch": 0.233, "grad_norm": 5.340032577514648, "learning_rate": 2.3535858585858588e-06, "loss": 6.253090286254883, "step": 53400 }, { "epoch": 0.23305, "grad_norm": 6.5885233879089355, "learning_rate": 2.3533333333333334e-06, "loss": 6.234671401977539, "step": 53405 }, { "epoch": 0.2331, "grad_norm": 5.60505485534668, "learning_rate": 2.353080808080808e-06, "loss": 6.212288665771484, "step": 53410 }, { "epoch": 0.23315, "grad_norm": 5.821695804595947, "learning_rate": 2.352828282828283e-06, "loss": 6.2543998718261715, "step": 53415 }, { "epoch": 0.2332, "grad_norm": 5.7717390060424805, "learning_rate": 2.3525757575757577e-06, "loss": 6.273482131958008, "step": 53420 }, { "epoch": 0.23325, "grad_norm": 5.381186485290527, "learning_rate": 2.3523232323232328e-06, "loss": 6.309925842285156, "step": 53425 }, { "epoch": 0.2333, "grad_norm": 6.150279998779297, "learning_rate": 2.352070707070707e-06, "loss": 6.257486724853516, "step": 53430 }, { "epoch": 0.23335, "grad_norm": 8.3312349319458, "learning_rate": 2.351818181818182e-06, "loss": 6.242471313476562, "step": 53435 }, { "epoch": 0.2334, "grad_norm": 6.206791400909424, "learning_rate": 2.3515656565656566e-06, "loss": 6.236491394042969, "step": 53440 }, { "epoch": 0.23345, "grad_norm": 13.355551719665527, "learning_rate": 2.3513131313131317e-06, "loss": 6.2409820556640625, "step": 53445 }, { "epoch": 0.2335, "grad_norm": 9.038858413696289, "learning_rate": 2.3510606060606063e-06, "loss": 6.2145835876464846, "step": 53450 }, { "epoch": 0.23355, "grad_norm": 4.17866849899292, "learning_rate": 2.350808080808081e-06, "loss": 6.226856994628906, "step": 53455 }, { "epoch": 0.2336, "grad_norm": 7.830767631530762, "learning_rate": 2.3505555555555556e-06, "loss": 6.276983642578125, "step": 53460 }, { "epoch": 0.23365, "grad_norm": 5.117328643798828, "learning_rate": 2.3503030303030306e-06, "loss": 6.231433486938476, "step": 53465 }, { "epoch": 0.2337, "grad_norm": 7.067992210388184, "learning_rate": 2.3500505050505053e-06, "loss": 6.251094055175781, "step": 53470 }, { "epoch": 0.23375, "grad_norm": 7.036153793334961, "learning_rate": 2.34979797979798e-06, "loss": 6.30317497253418, "step": 53475 }, { "epoch": 0.2338, "grad_norm": 6.098345756530762, "learning_rate": 2.3495454545454545e-06, "loss": 6.250276184082031, "step": 53480 }, { "epoch": 0.23385, "grad_norm": 10.543997764587402, "learning_rate": 2.3492929292929296e-06, "loss": 6.298635482788086, "step": 53485 }, { "epoch": 0.2339, "grad_norm": 5.900421142578125, "learning_rate": 2.3490404040404042e-06, "loss": 6.36578254699707, "step": 53490 }, { "epoch": 0.23395, "grad_norm": 8.256733894348145, "learning_rate": 2.348787878787879e-06, "loss": 6.274597930908203, "step": 53495 }, { "epoch": 0.234, "grad_norm": 8.044977188110352, "learning_rate": 2.3485353535353535e-06, "loss": 6.297975158691406, "step": 53500 }, { "epoch": 0.23405, "grad_norm": 5.961170673370361, "learning_rate": 2.3482828282828285e-06, "loss": 6.4507499694824215, "step": 53505 }, { "epoch": 0.2341, "grad_norm": 6.538655757904053, "learning_rate": 2.348030303030303e-06, "loss": 6.283943939208984, "step": 53510 }, { "epoch": 0.23415, "grad_norm": 6.557984828948975, "learning_rate": 2.347777777777778e-06, "loss": 6.237919616699219, "step": 53515 }, { "epoch": 0.2342, "grad_norm": 14.916029930114746, "learning_rate": 2.3475252525252524e-06, "loss": 6.294883346557617, "step": 53520 }, { "epoch": 0.23425, "grad_norm": 4.879173278808594, "learning_rate": 2.3472727272727275e-06, "loss": 6.272137069702149, "step": 53525 }, { "epoch": 0.2343, "grad_norm": 8.087883949279785, "learning_rate": 2.347020202020202e-06, "loss": 6.2599845886230465, "step": 53530 }, { "epoch": 0.23435, "grad_norm": 27.278026580810547, "learning_rate": 2.346767676767677e-06, "loss": 6.351028060913086, "step": 53535 }, { "epoch": 0.2344, "grad_norm": 5.802356719970703, "learning_rate": 2.346515151515152e-06, "loss": 6.185101699829102, "step": 53540 }, { "epoch": 0.23445, "grad_norm": 14.441225051879883, "learning_rate": 2.3462626262626264e-06, "loss": 6.327689743041992, "step": 53545 }, { "epoch": 0.2345, "grad_norm": 10.661688804626465, "learning_rate": 2.346010101010101e-06, "loss": 6.228810119628906, "step": 53550 }, { "epoch": 0.23455, "grad_norm": 6.834971904754639, "learning_rate": 2.345757575757576e-06, "loss": 6.219322586059571, "step": 53555 }, { "epoch": 0.2346, "grad_norm": 5.088314056396484, "learning_rate": 2.3455050505050507e-06, "loss": 6.208656311035156, "step": 53560 }, { "epoch": 0.23465, "grad_norm": 5.115019798278809, "learning_rate": 2.3452525252525254e-06, "loss": 6.2860267639160154, "step": 53565 }, { "epoch": 0.2347, "grad_norm": 6.357446670532227, "learning_rate": 2.345e-06, "loss": 6.278659820556641, "step": 53570 }, { "epoch": 0.23475, "grad_norm": 7.40872049331665, "learning_rate": 2.344747474747475e-06, "loss": 6.296005630493164, "step": 53575 }, { "epoch": 0.2348, "grad_norm": 8.839702606201172, "learning_rate": 2.3444949494949497e-06, "loss": 6.209235382080078, "step": 53580 }, { "epoch": 0.23485, "grad_norm": 4.300047397613525, "learning_rate": 2.3442424242424243e-06, "loss": 6.24438591003418, "step": 53585 }, { "epoch": 0.2349, "grad_norm": 10.100625991821289, "learning_rate": 2.3439898989898994e-06, "loss": 6.413852691650391, "step": 53590 }, { "epoch": 0.23495, "grad_norm": 10.618470191955566, "learning_rate": 2.343737373737374e-06, "loss": 6.210538864135742, "step": 53595 }, { "epoch": 0.235, "grad_norm": 5.6654462814331055, "learning_rate": 2.3434848484848486e-06, "loss": 6.287140655517578, "step": 53600 }, { "epoch": 0.23505, "grad_norm": 6.122907638549805, "learning_rate": 2.3432323232323233e-06, "loss": 6.261139678955078, "step": 53605 }, { "epoch": 0.2351, "grad_norm": 6.306246757507324, "learning_rate": 2.3429797979797983e-06, "loss": 6.240013122558594, "step": 53610 }, { "epoch": 0.23515, "grad_norm": 8.00345516204834, "learning_rate": 2.342727272727273e-06, "loss": 6.235994720458985, "step": 53615 }, { "epoch": 0.2352, "grad_norm": 5.697434902191162, "learning_rate": 2.342474747474748e-06, "loss": 6.2341758728027346, "step": 53620 }, { "epoch": 0.23525, "grad_norm": 5.042847156524658, "learning_rate": 2.342222222222222e-06, "loss": 6.25274429321289, "step": 53625 }, { "epoch": 0.2353, "grad_norm": 7.294451713562012, "learning_rate": 2.3419696969696972e-06, "loss": 6.249192428588867, "step": 53630 }, { "epoch": 0.23535, "grad_norm": 5.517917156219482, "learning_rate": 2.341717171717172e-06, "loss": 6.251516723632813, "step": 53635 }, { "epoch": 0.2354, "grad_norm": 6.75472354888916, "learning_rate": 2.341464646464647e-06, "loss": 6.254238128662109, "step": 53640 }, { "epoch": 0.23545, "grad_norm": 16.74461555480957, "learning_rate": 2.3412121212121216e-06, "loss": 6.557623291015625, "step": 53645 }, { "epoch": 0.2355, "grad_norm": 10.20921516418457, "learning_rate": 2.340959595959596e-06, "loss": 6.428852844238281, "step": 53650 }, { "epoch": 0.23555, "grad_norm": 5.756483554840088, "learning_rate": 2.340707070707071e-06, "loss": 6.304438781738281, "step": 53655 }, { "epoch": 0.2356, "grad_norm": 8.824699401855469, "learning_rate": 2.340454545454546e-06, "loss": 6.268002700805664, "step": 53660 }, { "epoch": 0.23565, "grad_norm": 6.253388404846191, "learning_rate": 2.3402020202020205e-06, "loss": 6.283495330810547, "step": 53665 }, { "epoch": 0.2357, "grad_norm": 7.616441249847412, "learning_rate": 2.339949494949495e-06, "loss": 6.225893020629883, "step": 53670 }, { "epoch": 0.23575, "grad_norm": 6.605254650115967, "learning_rate": 2.3396969696969698e-06, "loss": 6.310692977905274, "step": 53675 }, { "epoch": 0.2358, "grad_norm": 9.558233261108398, "learning_rate": 2.339444444444445e-06, "loss": 6.255867004394531, "step": 53680 }, { "epoch": 0.23585, "grad_norm": 4.679188251495361, "learning_rate": 2.3391919191919195e-06, "loss": 6.25818977355957, "step": 53685 }, { "epoch": 0.2359, "grad_norm": 5.517607688903809, "learning_rate": 2.338939393939394e-06, "loss": 6.2270050048828125, "step": 53690 }, { "epoch": 0.23595, "grad_norm": 3.0695221424102783, "learning_rate": 2.3386868686868687e-06, "loss": 6.223728179931641, "step": 53695 }, { "epoch": 0.236, "grad_norm": 15.22093391418457, "learning_rate": 2.3384343434343438e-06, "loss": 6.23736343383789, "step": 53700 }, { "epoch": 0.23605, "grad_norm": 4.818389892578125, "learning_rate": 2.3381818181818184e-06, "loss": 6.47191162109375, "step": 53705 }, { "epoch": 0.2361, "grad_norm": 4.1582441329956055, "learning_rate": 2.337929292929293e-06, "loss": 6.293943786621094, "step": 53710 }, { "epoch": 0.23615, "grad_norm": 5.113410949707031, "learning_rate": 2.3376767676767677e-06, "loss": 6.249391174316406, "step": 53715 }, { "epoch": 0.2362, "grad_norm": 23.384199142456055, "learning_rate": 2.3374242424242427e-06, "loss": 6.253348922729492, "step": 53720 }, { "epoch": 0.23625, "grad_norm": 4.60028076171875, "learning_rate": 2.3371717171717173e-06, "loss": 6.210304641723633, "step": 53725 }, { "epoch": 0.2363, "grad_norm": 9.484696388244629, "learning_rate": 2.3369191919191924e-06, "loss": 6.265382385253906, "step": 53730 }, { "epoch": 0.23635, "grad_norm": 7.537496089935303, "learning_rate": 2.3366666666666666e-06, "loss": 6.313232803344727, "step": 53735 }, { "epoch": 0.2364, "grad_norm": 11.54015827178955, "learning_rate": 2.3364141414141417e-06, "loss": 6.268046569824219, "step": 53740 }, { "epoch": 0.23645, "grad_norm": 4.382897853851318, "learning_rate": 2.3361616161616163e-06, "loss": 6.268429565429687, "step": 53745 }, { "epoch": 0.2365, "grad_norm": 5.609687328338623, "learning_rate": 2.3359090909090913e-06, "loss": 6.334974670410157, "step": 53750 }, { "epoch": 0.23655, "grad_norm": 8.48486328125, "learning_rate": 2.335656565656566e-06, "loss": 6.284505081176758, "step": 53755 }, { "epoch": 0.2366, "grad_norm": 3.6077966690063477, "learning_rate": 2.3354040404040406e-06, "loss": 6.343404388427734, "step": 53760 }, { "epoch": 0.23665, "grad_norm": 5.21568489074707, "learning_rate": 2.3351515151515152e-06, "loss": 6.256167221069336, "step": 53765 }, { "epoch": 0.2367, "grad_norm": 5.782641410827637, "learning_rate": 2.3348989898989903e-06, "loss": 6.233061218261719, "step": 53770 }, { "epoch": 0.23675, "grad_norm": 5.2553582191467285, "learning_rate": 2.334646464646465e-06, "loss": 6.2958229064941404, "step": 53775 }, { "epoch": 0.2368, "grad_norm": 4.044824600219727, "learning_rate": 2.3343939393939395e-06, "loss": 6.273386383056641, "step": 53780 }, { "epoch": 0.23685, "grad_norm": 17.71698570251465, "learning_rate": 2.334141414141414e-06, "loss": 6.630094909667969, "step": 53785 }, { "epoch": 0.2369, "grad_norm": 7.396774768829346, "learning_rate": 2.3338888888888892e-06, "loss": 6.229325485229492, "step": 53790 }, { "epoch": 0.23695, "grad_norm": 13.644354820251465, "learning_rate": 2.333636363636364e-06, "loss": 6.3495849609375, "step": 53795 }, { "epoch": 0.237, "grad_norm": 7.941227912902832, "learning_rate": 2.3333838383838385e-06, "loss": 6.3047935485839846, "step": 53800 }, { "epoch": 0.23705, "grad_norm": 10.639864921569824, "learning_rate": 2.333131313131313e-06, "loss": 6.283601379394531, "step": 53805 }, { "epoch": 0.2371, "grad_norm": 6.333852291107178, "learning_rate": 2.332878787878788e-06, "loss": 6.273969650268555, "step": 53810 }, { "epoch": 0.23715, "grad_norm": 4.345081806182861, "learning_rate": 2.332626262626263e-06, "loss": 6.215480041503906, "step": 53815 }, { "epoch": 0.2372, "grad_norm": 4.324114799499512, "learning_rate": 2.3323737373737374e-06, "loss": 6.237178802490234, "step": 53820 }, { "epoch": 0.23725, "grad_norm": 6.730421543121338, "learning_rate": 2.332121212121212e-06, "loss": 6.274043273925781, "step": 53825 }, { "epoch": 0.2373, "grad_norm": 8.933131217956543, "learning_rate": 2.331868686868687e-06, "loss": 6.2575328826904295, "step": 53830 }, { "epoch": 0.23735, "grad_norm": 8.134991645812988, "learning_rate": 2.3316161616161617e-06, "loss": 6.325914764404297, "step": 53835 }, { "epoch": 0.2374, "grad_norm": 6.449033260345459, "learning_rate": 2.331363636363637e-06, "loss": 6.189609146118164, "step": 53840 }, { "epoch": 0.23745, "grad_norm": 4.400453090667725, "learning_rate": 2.331111111111111e-06, "loss": 6.226021575927734, "step": 53845 }, { "epoch": 0.2375, "grad_norm": 3.587913990020752, "learning_rate": 2.330858585858586e-06, "loss": 6.284579849243164, "step": 53850 }, { "epoch": 0.23755, "grad_norm": 10.993630409240723, "learning_rate": 2.3306060606060607e-06, "loss": 6.247707366943359, "step": 53855 }, { "epoch": 0.2376, "grad_norm": 4.112060070037842, "learning_rate": 2.3303535353535357e-06, "loss": 6.235470962524414, "step": 53860 }, { "epoch": 0.23765, "grad_norm": 10.123368263244629, "learning_rate": 2.3301010101010104e-06, "loss": 6.396506118774414, "step": 53865 }, { "epoch": 0.2377, "grad_norm": 7.985734939575195, "learning_rate": 2.329848484848485e-06, "loss": 6.266719818115234, "step": 53870 }, { "epoch": 0.23775, "grad_norm": 10.179147720336914, "learning_rate": 2.3295959595959596e-06, "loss": 6.277497100830078, "step": 53875 }, { "epoch": 0.2378, "grad_norm": 6.554091930389404, "learning_rate": 2.3293434343434347e-06, "loss": 6.2710823059082035, "step": 53880 }, { "epoch": 0.23785, "grad_norm": 8.674412727355957, "learning_rate": 2.3290909090909093e-06, "loss": 6.227017974853515, "step": 53885 }, { "epoch": 0.2379, "grad_norm": 4.430182933807373, "learning_rate": 2.328838383838384e-06, "loss": 6.258274841308594, "step": 53890 }, { "epoch": 0.23795, "grad_norm": 7.172296524047852, "learning_rate": 2.3285858585858586e-06, "loss": 6.2916618347167965, "step": 53895 }, { "epoch": 0.238, "grad_norm": 4.493840217590332, "learning_rate": 2.3283333333333336e-06, "loss": 6.270866394042969, "step": 53900 }, { "epoch": 0.23805, "grad_norm": 4.622440814971924, "learning_rate": 2.3280808080808083e-06, "loss": 6.2221229553222654, "step": 53905 }, { "epoch": 0.2381, "grad_norm": 11.331378936767578, "learning_rate": 2.327828282828283e-06, "loss": 6.302344512939453, "step": 53910 }, { "epoch": 0.23815, "grad_norm": 5.214025020599365, "learning_rate": 2.3275757575757575e-06, "loss": 6.291595840454102, "step": 53915 }, { "epoch": 0.2382, "grad_norm": 5.889056205749512, "learning_rate": 2.3273232323232326e-06, "loss": 6.242242431640625, "step": 53920 }, { "epoch": 0.23825, "grad_norm": 11.176382064819336, "learning_rate": 2.327070707070707e-06, "loss": 6.241460800170898, "step": 53925 }, { "epoch": 0.2383, "grad_norm": 14.163167953491211, "learning_rate": 2.326818181818182e-06, "loss": 6.21971549987793, "step": 53930 }, { "epoch": 0.23835, "grad_norm": 18.547279357910156, "learning_rate": 2.3265656565656565e-06, "loss": 6.31804084777832, "step": 53935 }, { "epoch": 0.2384, "grad_norm": 6.636751174926758, "learning_rate": 2.3263131313131315e-06, "loss": 6.249064636230469, "step": 53940 }, { "epoch": 0.23845, "grad_norm": 8.776169776916504, "learning_rate": 2.326060606060606e-06, "loss": 6.2491310119628904, "step": 53945 }, { "epoch": 0.2385, "grad_norm": 5.8105387687683105, "learning_rate": 2.325808080808081e-06, "loss": 6.212085342407226, "step": 53950 }, { "epoch": 0.23855, "grad_norm": 3.9938108921051025, "learning_rate": 2.325555555555556e-06, "loss": 6.241356658935547, "step": 53955 }, { "epoch": 0.2386, "grad_norm": 4.4737772941589355, "learning_rate": 2.3253030303030305e-06, "loss": 6.274858856201172, "step": 53960 }, { "epoch": 0.23865, "grad_norm": 6.167301177978516, "learning_rate": 2.325050505050505e-06, "loss": 6.244707107543945, "step": 53965 }, { "epoch": 0.2387, "grad_norm": 8.420236587524414, "learning_rate": 2.32479797979798e-06, "loss": 6.257106781005859, "step": 53970 }, { "epoch": 0.23875, "grad_norm": 6.479395389556885, "learning_rate": 2.3245454545454548e-06, "loss": 6.24738655090332, "step": 53975 }, { "epoch": 0.2388, "grad_norm": 3.7795615196228027, "learning_rate": 2.3242929292929294e-06, "loss": 6.205750274658203, "step": 53980 }, { "epoch": 0.23885, "grad_norm": 6.135209083557129, "learning_rate": 2.324040404040404e-06, "loss": 6.252893447875977, "step": 53985 }, { "epoch": 0.2389, "grad_norm": 5.759137153625488, "learning_rate": 2.323787878787879e-06, "loss": 6.234038925170898, "step": 53990 }, { "epoch": 0.23895, "grad_norm": 6.266923427581787, "learning_rate": 2.3235353535353537e-06, "loss": 6.269160461425781, "step": 53995 }, { "epoch": 0.239, "grad_norm": 10.378273963928223, "learning_rate": 2.3232828282828283e-06, "loss": 6.256836700439453, "step": 54000 }, { "epoch": 0.23905, "grad_norm": 5.138885498046875, "learning_rate": 2.3230303030303034e-06, "loss": 6.201865005493164, "step": 54005 }, { "epoch": 0.2391, "grad_norm": 8.255350112915039, "learning_rate": 2.322777777777778e-06, "loss": 6.258716583251953, "step": 54010 }, { "epoch": 0.23915, "grad_norm": 5.332558631896973, "learning_rate": 2.3225252525252527e-06, "loss": 6.220664978027344, "step": 54015 }, { "epoch": 0.2392, "grad_norm": 6.060151100158691, "learning_rate": 2.3222727272727273e-06, "loss": 6.260067367553711, "step": 54020 }, { "epoch": 0.23925, "grad_norm": 17.02968978881836, "learning_rate": 2.3220202020202023e-06, "loss": 6.397225952148437, "step": 54025 }, { "epoch": 0.2393, "grad_norm": 9.280436515808105, "learning_rate": 2.321767676767677e-06, "loss": 6.277406311035156, "step": 54030 }, { "epoch": 0.23935, "grad_norm": 5.769432544708252, "learning_rate": 2.321515151515152e-06, "loss": 6.311225509643554, "step": 54035 }, { "epoch": 0.2394, "grad_norm": 9.689005851745605, "learning_rate": 2.3212626262626262e-06, "loss": 6.30213737487793, "step": 54040 }, { "epoch": 0.23945, "grad_norm": 4.2035722732543945, "learning_rate": 2.3210101010101013e-06, "loss": 6.334579849243164, "step": 54045 }, { "epoch": 0.2395, "grad_norm": 4.162347316741943, "learning_rate": 2.320757575757576e-06, "loss": 6.26548957824707, "step": 54050 }, { "epoch": 0.23955, "grad_norm": 6.0266547203063965, "learning_rate": 2.320505050505051e-06, "loss": 6.2528221130371096, "step": 54055 }, { "epoch": 0.2396, "grad_norm": 11.734352111816406, "learning_rate": 2.3202525252525256e-06, "loss": 6.276518249511719, "step": 54060 }, { "epoch": 0.23965, "grad_norm": 3.313575267791748, "learning_rate": 2.3200000000000002e-06, "loss": 6.252543258666992, "step": 54065 }, { "epoch": 0.2397, "grad_norm": 10.135144233703613, "learning_rate": 2.319747474747475e-06, "loss": 6.369168090820312, "step": 54070 }, { "epoch": 0.23975, "grad_norm": 8.298754692077637, "learning_rate": 2.31949494949495e-06, "loss": 6.187963485717773, "step": 54075 }, { "epoch": 0.2398, "grad_norm": 5.034638404846191, "learning_rate": 2.3192424242424245e-06, "loss": 6.262112426757812, "step": 54080 }, { "epoch": 0.23985, "grad_norm": 5.816622257232666, "learning_rate": 2.318989898989899e-06, "loss": 6.24847183227539, "step": 54085 }, { "epoch": 0.2399, "grad_norm": 7.638916492462158, "learning_rate": 2.318737373737374e-06, "loss": 6.2224372863769535, "step": 54090 }, { "epoch": 0.23995, "grad_norm": 7.559815406799316, "learning_rate": 2.318484848484849e-06, "loss": 6.254177856445312, "step": 54095 }, { "epoch": 0.24, "grad_norm": 10.595671653747559, "learning_rate": 2.3182323232323235e-06, "loss": 6.266017913818359, "step": 54100 }, { "epoch": 0.24005, "grad_norm": 7.086364269256592, "learning_rate": 2.317979797979798e-06, "loss": 6.2464958190917965, "step": 54105 }, { "epoch": 0.2401, "grad_norm": 5.964311122894287, "learning_rate": 2.3177272727272728e-06, "loss": 6.279021453857422, "step": 54110 }, { "epoch": 0.24015, "grad_norm": 3.789287805557251, "learning_rate": 2.317474747474748e-06, "loss": 6.216695022583008, "step": 54115 }, { "epoch": 0.2402, "grad_norm": 4.747391223907471, "learning_rate": 2.3172222222222224e-06, "loss": 6.221460342407227, "step": 54120 }, { "epoch": 0.24025, "grad_norm": 8.921984672546387, "learning_rate": 2.316969696969697e-06, "loss": 6.297597122192383, "step": 54125 }, { "epoch": 0.2403, "grad_norm": 5.730370998382568, "learning_rate": 2.3167171717171717e-06, "loss": 6.310346984863282, "step": 54130 }, { "epoch": 0.24035, "grad_norm": 7.733698844909668, "learning_rate": 2.3164646464646467e-06, "loss": 6.299485397338867, "step": 54135 }, { "epoch": 0.2404, "grad_norm": 26.391876220703125, "learning_rate": 2.3162121212121214e-06, "loss": 6.545154571533203, "step": 54140 }, { "epoch": 0.24045, "grad_norm": 6.179319381713867, "learning_rate": 2.3159595959595964e-06, "loss": 6.209276580810547, "step": 54145 }, { "epoch": 0.2405, "grad_norm": 18.558542251586914, "learning_rate": 2.3157070707070706e-06, "loss": 6.238677978515625, "step": 54150 }, { "epoch": 0.24055, "grad_norm": 4.286371231079102, "learning_rate": 2.3154545454545457e-06, "loss": 6.3641510009765625, "step": 54155 }, { "epoch": 0.2406, "grad_norm": 5.454794883728027, "learning_rate": 2.3152020202020203e-06, "loss": 6.234812927246094, "step": 54160 }, { "epoch": 0.24065, "grad_norm": 4.623608112335205, "learning_rate": 2.3149494949494954e-06, "loss": 6.214862823486328, "step": 54165 }, { "epoch": 0.2407, "grad_norm": 5.216143608093262, "learning_rate": 2.31469696969697e-06, "loss": 6.253755569458008, "step": 54170 }, { "epoch": 0.24075, "grad_norm": 7.523550510406494, "learning_rate": 2.3144444444444446e-06, "loss": 6.257445907592773, "step": 54175 }, { "epoch": 0.2408, "grad_norm": 5.366828918457031, "learning_rate": 2.3141919191919193e-06, "loss": 6.2696186065673825, "step": 54180 }, { "epoch": 0.24085, "grad_norm": 4.476797580718994, "learning_rate": 2.3139393939393943e-06, "loss": 6.229270935058594, "step": 54185 }, { "epoch": 0.2409, "grad_norm": 3.682495355606079, "learning_rate": 2.313686868686869e-06, "loss": 6.291003799438476, "step": 54190 }, { "epoch": 0.24095, "grad_norm": 7.527865886688232, "learning_rate": 2.3134343434343436e-06, "loss": 6.243564605712891, "step": 54195 }, { "epoch": 0.241, "grad_norm": 7.06549596786499, "learning_rate": 2.313181818181818e-06, "loss": 6.218463134765625, "step": 54200 }, { "epoch": 0.24105, "grad_norm": 7.657577991485596, "learning_rate": 2.3129292929292933e-06, "loss": 6.2290290832519535, "step": 54205 }, { "epoch": 0.2411, "grad_norm": 6.490534782409668, "learning_rate": 2.312676767676768e-06, "loss": 6.260775756835938, "step": 54210 }, { "epoch": 0.24115, "grad_norm": 5.2901082038879395, "learning_rate": 2.3124242424242425e-06, "loss": 6.22188720703125, "step": 54215 }, { "epoch": 0.2412, "grad_norm": 23.15847396850586, "learning_rate": 2.312171717171717e-06, "loss": 6.292121124267578, "step": 54220 }, { "epoch": 0.24125, "grad_norm": 7.512003421783447, "learning_rate": 2.311919191919192e-06, "loss": 6.248308563232422, "step": 54225 }, { "epoch": 0.2413, "grad_norm": 6.429965972900391, "learning_rate": 2.311666666666667e-06, "loss": 6.257514190673828, "step": 54230 }, { "epoch": 0.24135, "grad_norm": 7.338369369506836, "learning_rate": 2.3114141414141415e-06, "loss": 6.256681823730469, "step": 54235 }, { "epoch": 0.2414, "grad_norm": 8.623291969299316, "learning_rate": 2.311161616161616e-06, "loss": 6.215118408203125, "step": 54240 }, { "epoch": 0.24145, "grad_norm": 9.423702239990234, "learning_rate": 2.310909090909091e-06, "loss": 6.299012756347656, "step": 54245 }, { "epoch": 0.2415, "grad_norm": 5.745853424072266, "learning_rate": 2.3106565656565658e-06, "loss": 6.433404541015625, "step": 54250 }, { "epoch": 0.24155, "grad_norm": 5.300278663635254, "learning_rate": 2.310404040404041e-06, "loss": 6.253190994262695, "step": 54255 }, { "epoch": 0.2416, "grad_norm": 7.08789587020874, "learning_rate": 2.3101515151515155e-06, "loss": 6.282626724243164, "step": 54260 }, { "epoch": 0.24165, "grad_norm": 8.74560546875, "learning_rate": 2.30989898989899e-06, "loss": 6.2659858703613285, "step": 54265 }, { "epoch": 0.2417, "grad_norm": 7.761630058288574, "learning_rate": 2.3096464646464647e-06, "loss": 6.365752410888672, "step": 54270 }, { "epoch": 0.24175, "grad_norm": 6.183999061584473, "learning_rate": 2.3093939393939398e-06, "loss": 6.271831893920899, "step": 54275 }, { "epoch": 0.2418, "grad_norm": 5.851568222045898, "learning_rate": 2.3091414141414144e-06, "loss": 6.210723114013672, "step": 54280 }, { "epoch": 0.24185, "grad_norm": 8.729694366455078, "learning_rate": 2.308888888888889e-06, "loss": 6.269488143920898, "step": 54285 }, { "epoch": 0.2419, "grad_norm": 5.708519458770752, "learning_rate": 2.3086363636363637e-06, "loss": 6.243638610839843, "step": 54290 }, { "epoch": 0.24195, "grad_norm": 9.722355842590332, "learning_rate": 2.3083838383838387e-06, "loss": 6.2360282897949215, "step": 54295 }, { "epoch": 0.242, "grad_norm": 5.225597381591797, "learning_rate": 2.3081313131313134e-06, "loss": 6.263617706298828, "step": 54300 }, { "epoch": 0.24205, "grad_norm": 7.330676555633545, "learning_rate": 2.307878787878788e-06, "loss": 6.334558486938477, "step": 54305 }, { "epoch": 0.2421, "grad_norm": 5.412632465362549, "learning_rate": 2.3076262626262626e-06, "loss": 6.2247966766357425, "step": 54310 }, { "epoch": 0.24215, "grad_norm": 18.0510196685791, "learning_rate": 2.3073737373737377e-06, "loss": 6.2482868194580075, "step": 54315 }, { "epoch": 0.2422, "grad_norm": 8.87410831451416, "learning_rate": 2.3071212121212123e-06, "loss": 6.212980651855469, "step": 54320 }, { "epoch": 0.24225, "grad_norm": 10.447481155395508, "learning_rate": 2.306868686868687e-06, "loss": 6.241486740112305, "step": 54325 }, { "epoch": 0.2423, "grad_norm": 6.136219024658203, "learning_rate": 2.3066161616161616e-06, "loss": 6.264598846435547, "step": 54330 }, { "epoch": 0.24235, "grad_norm": 6.3508100509643555, "learning_rate": 2.3063636363636366e-06, "loss": 6.220231628417968, "step": 54335 }, { "epoch": 0.2424, "grad_norm": 9.7012939453125, "learning_rate": 2.3061111111111112e-06, "loss": 6.2447151184082035, "step": 54340 }, { "epoch": 0.24245, "grad_norm": 4.2761549949646, "learning_rate": 2.305858585858586e-06, "loss": 6.229434967041016, "step": 54345 }, { "epoch": 0.2425, "grad_norm": 12.442429542541504, "learning_rate": 2.3056060606060605e-06, "loss": 6.236570739746094, "step": 54350 }, { "epoch": 0.24255, "grad_norm": 3.438687562942505, "learning_rate": 2.3053535353535356e-06, "loss": 6.27539291381836, "step": 54355 }, { "epoch": 0.2426, "grad_norm": 8.220053672790527, "learning_rate": 2.30510101010101e-06, "loss": 6.334363555908203, "step": 54360 }, { "epoch": 0.24265, "grad_norm": 6.333807468414307, "learning_rate": 2.3048484848484852e-06, "loss": 6.330289840698242, "step": 54365 }, { "epoch": 0.2427, "grad_norm": 3.922124147415161, "learning_rate": 2.30459595959596e-06, "loss": 6.2615306854248045, "step": 54370 }, { "epoch": 0.24275, "grad_norm": 5.935579299926758, "learning_rate": 2.3043434343434345e-06, "loss": 6.34754638671875, "step": 54375 }, { "epoch": 0.2428, "grad_norm": 3.636408805847168, "learning_rate": 2.304090909090909e-06, "loss": 6.253291320800781, "step": 54380 }, { "epoch": 0.24285, "grad_norm": 23.25623321533203, "learning_rate": 2.303838383838384e-06, "loss": 6.307843780517578, "step": 54385 }, { "epoch": 0.2429, "grad_norm": 6.810715675354004, "learning_rate": 2.303585858585859e-06, "loss": 6.284465026855469, "step": 54390 }, { "epoch": 0.24295, "grad_norm": 6.5209879875183105, "learning_rate": 2.3033333333333334e-06, "loss": 6.227023315429688, "step": 54395 }, { "epoch": 0.243, "grad_norm": 8.230334281921387, "learning_rate": 2.303080808080808e-06, "loss": 6.2622123718261715, "step": 54400 }, { "epoch": 0.24305, "grad_norm": 6.233260154724121, "learning_rate": 2.302828282828283e-06, "loss": 6.231059646606445, "step": 54405 }, { "epoch": 0.2431, "grad_norm": 5.023292541503906, "learning_rate": 2.3025757575757578e-06, "loss": 6.15326156616211, "step": 54410 }, { "epoch": 0.24315, "grad_norm": 5.721960067749023, "learning_rate": 2.3023232323232324e-06, "loss": 6.274486541748047, "step": 54415 }, { "epoch": 0.2432, "grad_norm": 5.227303504943848, "learning_rate": 2.302070707070707e-06, "loss": 6.331974029541016, "step": 54420 }, { "epoch": 0.24325, "grad_norm": 13.070014953613281, "learning_rate": 2.301818181818182e-06, "loss": 6.0893512725830075, "step": 54425 }, { "epoch": 0.2433, "grad_norm": 6.446201324462891, "learning_rate": 2.3015656565656567e-06, "loss": 6.331809997558594, "step": 54430 }, { "epoch": 0.24335, "grad_norm": 4.327701091766357, "learning_rate": 2.3013131313131313e-06, "loss": 6.217610549926758, "step": 54435 }, { "epoch": 0.2434, "grad_norm": 29.40936279296875, "learning_rate": 2.3010606060606064e-06, "loss": 6.458628082275391, "step": 54440 }, { "epoch": 0.24345, "grad_norm": 5.174143314361572, "learning_rate": 2.300808080808081e-06, "loss": 6.273553466796875, "step": 54445 }, { "epoch": 0.2435, "grad_norm": 6.280145168304443, "learning_rate": 2.300555555555556e-06, "loss": 6.265470886230469, "step": 54450 }, { "epoch": 0.24355, "grad_norm": 8.580892562866211, "learning_rate": 2.3003030303030303e-06, "loss": 6.284563446044922, "step": 54455 }, { "epoch": 0.2436, "grad_norm": 6.639371871948242, "learning_rate": 2.3000505050505053e-06, "loss": 6.4615318298339846, "step": 54460 }, { "epoch": 0.24365, "grad_norm": 7.327179431915283, "learning_rate": 2.29979797979798e-06, "loss": 6.247771453857422, "step": 54465 }, { "epoch": 0.2437, "grad_norm": 7.390702247619629, "learning_rate": 2.299545454545455e-06, "loss": 6.373773574829102, "step": 54470 }, { "epoch": 0.24375, "grad_norm": 7.567170143127441, "learning_rate": 2.2992929292929296e-06, "loss": 6.244853210449219, "step": 54475 }, { "epoch": 0.2438, "grad_norm": 11.274394035339355, "learning_rate": 2.2990404040404043e-06, "loss": 6.344989013671875, "step": 54480 }, { "epoch": 0.24385, "grad_norm": 5.095001220703125, "learning_rate": 2.298787878787879e-06, "loss": 6.229573440551758, "step": 54485 }, { "epoch": 0.2439, "grad_norm": 7.160693645477295, "learning_rate": 2.298535353535354e-06, "loss": 6.242762756347656, "step": 54490 }, { "epoch": 0.24395, "grad_norm": 9.472740173339844, "learning_rate": 2.2982828282828286e-06, "loss": 6.302507019042968, "step": 54495 }, { "epoch": 0.244, "grad_norm": 14.866138458251953, "learning_rate": 2.2980303030303032e-06, "loss": 6.247676849365234, "step": 54500 }, { "epoch": 0.24405, "grad_norm": 19.42043685913086, "learning_rate": 2.297777777777778e-06, "loss": 6.277056503295898, "step": 54505 }, { "epoch": 0.2441, "grad_norm": 5.575567722320557, "learning_rate": 2.297525252525253e-06, "loss": 6.284452819824219, "step": 54510 }, { "epoch": 0.24415, "grad_norm": 7.007678508758545, "learning_rate": 2.2972727272727275e-06, "loss": 6.25257682800293, "step": 54515 }, { "epoch": 0.2442, "grad_norm": 8.029012680053711, "learning_rate": 2.297020202020202e-06, "loss": 6.23291244506836, "step": 54520 }, { "epoch": 0.24425, "grad_norm": 19.660436630249023, "learning_rate": 2.296767676767677e-06, "loss": 6.536753845214844, "step": 54525 }, { "epoch": 0.2443, "grad_norm": 10.802935600280762, "learning_rate": 2.296515151515152e-06, "loss": 6.432315826416016, "step": 54530 }, { "epoch": 0.24435, "grad_norm": 4.361486434936523, "learning_rate": 2.2962626262626265e-06, "loss": 6.2656597137451175, "step": 54535 }, { "epoch": 0.2444, "grad_norm": 6.339912414550781, "learning_rate": 2.296010101010101e-06, "loss": 6.239645385742188, "step": 54540 }, { "epoch": 0.24445, "grad_norm": 7.31729793548584, "learning_rate": 2.2957575757575757e-06, "loss": 6.264350509643554, "step": 54545 }, { "epoch": 0.2445, "grad_norm": 7.358636379241943, "learning_rate": 2.295505050505051e-06, "loss": 6.184801864624023, "step": 54550 }, { "epoch": 0.24455, "grad_norm": 13.089837074279785, "learning_rate": 2.2952525252525254e-06, "loss": 6.249306488037109, "step": 54555 }, { "epoch": 0.2446, "grad_norm": 7.430058479309082, "learning_rate": 2.2950000000000005e-06, "loss": 6.288786315917969, "step": 54560 }, { "epoch": 0.24465, "grad_norm": 8.623957633972168, "learning_rate": 2.2947474747474747e-06, "loss": 6.2592418670654295, "step": 54565 }, { "epoch": 0.2447, "grad_norm": 10.623812675476074, "learning_rate": 2.2944949494949497e-06, "loss": 6.222797012329101, "step": 54570 }, { "epoch": 0.24475, "grad_norm": 6.3067708015441895, "learning_rate": 2.2942424242424244e-06, "loss": 6.288557815551758, "step": 54575 }, { "epoch": 0.2448, "grad_norm": 3.996670722961426, "learning_rate": 2.2939898989898994e-06, "loss": 6.3318336486816404, "step": 54580 }, { "epoch": 0.24485, "grad_norm": 6.032015800476074, "learning_rate": 2.293737373737374e-06, "loss": 6.221044540405273, "step": 54585 }, { "epoch": 0.2449, "grad_norm": 7.231252670288086, "learning_rate": 2.2934848484848487e-06, "loss": 6.2357124328613285, "step": 54590 }, { "epoch": 0.24495, "grad_norm": 7.085239410400391, "learning_rate": 2.2932323232323233e-06, "loss": 6.276969909667969, "step": 54595 }, { "epoch": 0.245, "grad_norm": 6.146199703216553, "learning_rate": 2.2929797979797984e-06, "loss": 6.214895629882813, "step": 54600 }, { "epoch": 5e-05, "grad_norm": 8.667359352111816, "learning_rate": 2.292727272727273e-06, "loss": 6.2552745819091795, "step": 54605 }, { "epoch": 0.0001, "grad_norm": 6.880889415740967, "learning_rate": 2.2924747474747476e-06, "loss": 6.187526702880859, "step": 54610 }, { "epoch": 0.00015, "grad_norm": 8.492877006530762, "learning_rate": 2.2922222222222223e-06, "loss": 6.317396545410157, "step": 54615 }, { "epoch": 0.0002, "grad_norm": 5.901414394378662, "learning_rate": 2.2919696969696973e-06, "loss": 6.271659088134766, "step": 54620 }, { "epoch": 0.00025, "grad_norm": 15.037792205810547, "learning_rate": 2.291717171717172e-06, "loss": 6.252686309814453, "step": 54625 }, { "epoch": 0.0003, "grad_norm": 3.866588830947876, "learning_rate": 2.2914646464646466e-06, "loss": 6.254892349243164, "step": 54630 }, { "epoch": 0.00035, "grad_norm": 8.403800010681152, "learning_rate": 2.291212121212121e-06, "loss": 6.238437271118164, "step": 54635 }, { "epoch": 0.0004, "grad_norm": 3.925560235977173, "learning_rate": 2.2909595959595962e-06, "loss": 6.294836044311523, "step": 54640 }, { "epoch": 0.00045, "grad_norm": 8.383685111999512, "learning_rate": 2.290707070707071e-06, "loss": 6.299832153320312, "step": 54645 }, { "epoch": 0.0005, "grad_norm": 4.139650821685791, "learning_rate": 2.2904545454545455e-06, "loss": 6.24766845703125, "step": 54650 }, { "epoch": 0.00055, "grad_norm": 5.442991733551025, "learning_rate": 2.29020202020202e-06, "loss": 6.22370719909668, "step": 54655 }, { "epoch": 0.0006, "grad_norm": 10.17952823638916, "learning_rate": 2.289949494949495e-06, "loss": 6.252803802490234, "step": 54660 }, { "epoch": 0.00065, "grad_norm": 7.074005603790283, "learning_rate": 2.28969696969697e-06, "loss": 6.245901870727539, "step": 54665 }, { "epoch": 0.0007, "grad_norm": 3.6935203075408936, "learning_rate": 2.289444444444445e-06, "loss": 6.276023101806641, "step": 54670 }, { "epoch": 0.00075, "grad_norm": 7.216061115264893, "learning_rate": 2.2891919191919195e-06, "loss": 6.251096725463867, "step": 54675 }, { "epoch": 0.0008, "grad_norm": 8.030790328979492, "learning_rate": 2.288939393939394e-06, "loss": 6.274227142333984, "step": 54680 }, { "epoch": 0.00085, "grad_norm": 6.306556701660156, "learning_rate": 2.2886868686868688e-06, "loss": 6.23253059387207, "step": 54685 }, { "epoch": 0.0009, "grad_norm": 8.118612289428711, "learning_rate": 2.288434343434344e-06, "loss": 6.273513412475586, "step": 54690 }, { "epoch": 0.00095, "grad_norm": 3.2857518196105957, "learning_rate": 2.2881818181818185e-06, "loss": 6.238336181640625, "step": 54695 }, { "epoch": 0.001, "grad_norm": 4.610288143157959, "learning_rate": 2.287929292929293e-06, "loss": 6.2268821716308596, "step": 54700 }, { "epoch": 0.00105, "grad_norm": 5.977020740509033, "learning_rate": 2.2876767676767677e-06, "loss": 6.251573944091797, "step": 54705 }, { "epoch": 0.0011, "grad_norm": 6.355082035064697, "learning_rate": 2.2874242424242428e-06, "loss": 6.228936004638672, "step": 54710 }, { "epoch": 0.00115, "grad_norm": 4.595903396606445, "learning_rate": 2.2871717171717174e-06, "loss": 6.249454498291016, "step": 54715 }, { "epoch": 0.0012, "grad_norm": 5.0032172203063965, "learning_rate": 2.286919191919192e-06, "loss": 6.277130889892578, "step": 54720 }, { "epoch": 0.00125, "grad_norm": 3.5580525398254395, "learning_rate": 2.2866666666666667e-06, "loss": 6.218618392944336, "step": 54725 }, { "epoch": 0.0013, "grad_norm": 3.637784242630005, "learning_rate": 2.2864141414141417e-06, "loss": 6.242452239990234, "step": 54730 }, { "epoch": 0.00135, "grad_norm": 6.026608943939209, "learning_rate": 2.2861616161616163e-06, "loss": 6.044623565673828, "step": 54735 }, { "epoch": 0.0014, "grad_norm": 5.174718379974365, "learning_rate": 2.285909090909091e-06, "loss": 6.195711517333985, "step": 54740 }, { "epoch": 0.00145, "grad_norm": 8.587543487548828, "learning_rate": 2.2856565656565656e-06, "loss": 6.309786224365235, "step": 54745 }, { "epoch": 0.0015, "grad_norm": 11.33362102508545, "learning_rate": 2.2854040404040407e-06, "loss": 6.267345809936524, "step": 54750 }, { "epoch": 0.00155, "grad_norm": 14.85449504852295, "learning_rate": 2.2851515151515153e-06, "loss": 6.290382003784179, "step": 54755 }, { "epoch": 0.0016, "grad_norm": 9.999930381774902, "learning_rate": 2.28489898989899e-06, "loss": 6.490119934082031, "step": 54760 }, { "epoch": 0.00165, "grad_norm": 3.8619210720062256, "learning_rate": 2.2846464646464645e-06, "loss": 6.247728729248047, "step": 54765 }, { "epoch": 0.0017, "grad_norm": 7.10304069519043, "learning_rate": 2.2843939393939396e-06, "loss": 6.165635681152343, "step": 54770 }, { "epoch": 0.00175, "grad_norm": 12.169333457946777, "learning_rate": 2.2841414141414142e-06, "loss": 6.309274291992187, "step": 54775 }, { "epoch": 0.0018, "grad_norm": 3.91489839553833, "learning_rate": 2.2838888888888893e-06, "loss": 6.2473876953125, "step": 54780 }, { "epoch": 0.00185, "grad_norm": 9.755937576293945, "learning_rate": 2.283636363636364e-06, "loss": 6.266640853881836, "step": 54785 }, { "epoch": 0.0019, "grad_norm": 6.3620734214782715, "learning_rate": 2.2833838383838385e-06, "loss": 6.2204734802246096, "step": 54790 }, { "epoch": 0.00195, "grad_norm": 4.316466331481934, "learning_rate": 2.283131313131313e-06, "loss": 6.261576080322266, "step": 54795 }, { "epoch": 0.002, "grad_norm": 11.780734062194824, "learning_rate": 2.2828787878787882e-06, "loss": 6.196191024780274, "step": 54800 }, { "epoch": 0.00205, "grad_norm": 36.48588943481445, "learning_rate": 2.282626262626263e-06, "loss": 6.231214141845703, "step": 54805 }, { "epoch": 0.0021, "grad_norm": 4.631005764007568, "learning_rate": 2.2823737373737375e-06, "loss": 6.23334846496582, "step": 54810 }, { "epoch": 0.00215, "grad_norm": 3.7439708709716797, "learning_rate": 2.282121212121212e-06, "loss": 6.273107528686523, "step": 54815 }, { "epoch": 0.0022, "grad_norm": 6.718963623046875, "learning_rate": 2.281868686868687e-06, "loss": 6.24505386352539, "step": 54820 }, { "epoch": 0.00225, "grad_norm": 9.173001289367676, "learning_rate": 2.281616161616162e-06, "loss": 6.290366363525391, "step": 54825 }, { "epoch": 0.0023, "grad_norm": 4.786609172821045, "learning_rate": 2.2813636363636364e-06, "loss": 6.249900054931641, "step": 54830 }, { "epoch": 0.00235, "grad_norm": 5.17202091217041, "learning_rate": 2.281111111111111e-06, "loss": 6.218070983886719, "step": 54835 }, { "epoch": 0.0024, "grad_norm": 5.247699737548828, "learning_rate": 2.280858585858586e-06, "loss": 6.2280220031738285, "step": 54840 }, { "epoch": 0.00245, "grad_norm": 5.127541542053223, "learning_rate": 2.2806060606060607e-06, "loss": 6.248575210571289, "step": 54845 }, { "epoch": 0.0025, "grad_norm": 5.5622429847717285, "learning_rate": 2.2803535353535354e-06, "loss": 6.245406723022461, "step": 54850 }, { "epoch": 0.00255, "grad_norm": 6.496392250061035, "learning_rate": 2.2801010101010104e-06, "loss": 6.303318023681641, "step": 54855 }, { "epoch": 0.0026, "grad_norm": 3.9946389198303223, "learning_rate": 2.279848484848485e-06, "loss": 6.261206817626953, "step": 54860 }, { "epoch": 0.00265, "grad_norm": 4.9725341796875, "learning_rate": 2.27959595959596e-06, "loss": 6.255594635009766, "step": 54865 }, { "epoch": 0.0027, "grad_norm": 6.890835762023926, "learning_rate": 2.2793434343434343e-06, "loss": 6.294471359252929, "step": 54870 }, { "epoch": 0.00275, "grad_norm": 6.5957231521606445, "learning_rate": 2.2790909090909094e-06, "loss": 6.276836395263672, "step": 54875 }, { "epoch": 0.0028, "grad_norm": 5.338305950164795, "learning_rate": 2.278838383838384e-06, "loss": 6.244186019897461, "step": 54880 }, { "epoch": 0.00285, "grad_norm": 5.23325252532959, "learning_rate": 2.278585858585859e-06, "loss": 6.2466590881347654, "step": 54885 }, { "epoch": 0.0029, "grad_norm": 14.115734100341797, "learning_rate": 2.2783333333333337e-06, "loss": 6.1939849853515625, "step": 54890 }, { "epoch": 0.00295, "grad_norm": 6.165467739105225, "learning_rate": 2.2780808080808083e-06, "loss": 6.259896850585937, "step": 54895 }, { "epoch": 0.003, "grad_norm": 5.721373558044434, "learning_rate": 2.277828282828283e-06, "loss": 6.272023391723633, "step": 54900 }, { "epoch": 0.00305, "grad_norm": 6.757893085479736, "learning_rate": 2.277575757575758e-06, "loss": 6.258411407470703, "step": 54905 }, { "epoch": 0.0031, "grad_norm": 18.8227596282959, "learning_rate": 2.2773232323232326e-06, "loss": 6.147814559936523, "step": 54910 }, { "epoch": 0.00315, "grad_norm": 9.504530906677246, "learning_rate": 2.2770707070707073e-06, "loss": 6.20399169921875, "step": 54915 }, { "epoch": 0.0032, "grad_norm": 3.660651445388794, "learning_rate": 2.276818181818182e-06, "loss": 6.205107116699219, "step": 54920 }, { "epoch": 0.00325, "grad_norm": 6.010078430175781, "learning_rate": 2.276565656565657e-06, "loss": 6.2350013732910154, "step": 54925 }, { "epoch": 0.0033, "grad_norm": 4.8154520988464355, "learning_rate": 2.2763131313131316e-06, "loss": 6.241425323486328, "step": 54930 }, { "epoch": 0.00335, "grad_norm": 6.215099334716797, "learning_rate": 2.276060606060606e-06, "loss": 6.216873550415039, "step": 54935 }, { "epoch": 0.0034, "grad_norm": 6.8554558753967285, "learning_rate": 2.275808080808081e-06, "loss": 6.360025405883789, "step": 54940 }, { "epoch": 0.00345, "grad_norm": 6.5399580001831055, "learning_rate": 2.275555555555556e-06, "loss": 6.244036865234375, "step": 54945 }, { "epoch": 0.0035, "grad_norm": 7.331899642944336, "learning_rate": 2.2753030303030305e-06, "loss": 6.255379104614258, "step": 54950 }, { "epoch": 0.00355, "grad_norm": 24.243358612060547, "learning_rate": 2.275050505050505e-06, "loss": 6.247638320922851, "step": 54955 }, { "epoch": 0.0036, "grad_norm": 11.918036460876465, "learning_rate": 2.2747979797979798e-06, "loss": 6.261985778808594, "step": 54960 }, { "epoch": 0.00365, "grad_norm": 6.854541301727295, "learning_rate": 2.274545454545455e-06, "loss": 6.2419921875, "step": 54965 }, { "epoch": 0.0037, "grad_norm": 5.312015056610107, "learning_rate": 2.2742929292929295e-06, "loss": 6.243400955200196, "step": 54970 }, { "epoch": 0.00375, "grad_norm": 4.277090549468994, "learning_rate": 2.2740404040404045e-06, "loss": 6.242910385131836, "step": 54975 }, { "epoch": 0.0038, "grad_norm": 8.733593940734863, "learning_rate": 2.273787878787879e-06, "loss": 6.3093524932861325, "step": 54980 }, { "epoch": 0.00385, "grad_norm": 5.733173847198486, "learning_rate": 2.2735353535353538e-06, "loss": 6.249327087402344, "step": 54985 }, { "epoch": 0.0039, "grad_norm": 30.900985717773438, "learning_rate": 2.2732828282828284e-06, "loss": 6.223883056640625, "step": 54990 }, { "epoch": 0.00395, "grad_norm": 4.482702255249023, "learning_rate": 2.2730303030303035e-06, "loss": 6.2325695037841795, "step": 54995 }, { "epoch": 0.004, "grad_norm": 15.063060760498047, "learning_rate": 2.272777777777778e-06, "loss": 6.30181655883789, "step": 55000 }, { "epoch": 0.00405, "grad_norm": 7.808401107788086, "learning_rate": 2.2725252525252527e-06, "loss": 6.253874969482422, "step": 55005 }, { "epoch": 0.0041, "grad_norm": 6.066341876983643, "learning_rate": 2.2722727272727273e-06, "loss": 6.277171325683594, "step": 55010 }, { "epoch": 0.00415, "grad_norm": 6.820338726043701, "learning_rate": 2.2720202020202024e-06, "loss": 6.377825164794922, "step": 55015 }, { "epoch": 0.0042, "grad_norm": 6.969101905822754, "learning_rate": 2.271767676767677e-06, "loss": 6.238267517089843, "step": 55020 }, { "epoch": 0.00425, "grad_norm": 10.046394348144531, "learning_rate": 2.2715151515151517e-06, "loss": 6.310213088989258, "step": 55025 }, { "epoch": 0.0043, "grad_norm": 5.784063816070557, "learning_rate": 2.2712626262626263e-06, "loss": 6.261535263061523, "step": 55030 }, { "epoch": 0.00435, "grad_norm": 5.242814064025879, "learning_rate": 2.2710101010101013e-06, "loss": 6.240102386474609, "step": 55035 }, { "epoch": 0.0044, "grad_norm": 3.766476631164551, "learning_rate": 2.270757575757576e-06, "loss": 6.252990341186523, "step": 55040 }, { "epoch": 0.00445, "grad_norm": 4.9936628341674805, "learning_rate": 2.2705050505050506e-06, "loss": 6.229459762573242, "step": 55045 }, { "epoch": 0.0045, "grad_norm": 4.488040447235107, "learning_rate": 2.2702525252525252e-06, "loss": 6.254853057861328, "step": 55050 }, { "epoch": 0.00455, "grad_norm": 3.9554026126861572, "learning_rate": 2.2700000000000003e-06, "loss": 6.280983734130859, "step": 55055 }, { "epoch": 0.0046, "grad_norm": 4.240971088409424, "learning_rate": 2.269747474747475e-06, "loss": 6.2050636291503904, "step": 55060 }, { "epoch": 0.00465, "grad_norm": 8.210212707519531, "learning_rate": 2.2694949494949495e-06, "loss": 6.233924865722656, "step": 55065 }, { "epoch": 0.0047, "grad_norm": 7.226136207580566, "learning_rate": 2.269242424242424e-06, "loss": 6.24578857421875, "step": 55070 }, { "epoch": 0.00475, "grad_norm": 7.635437488555908, "learning_rate": 2.2689898989898992e-06, "loss": 6.230142593383789, "step": 55075 }, { "epoch": 0.0048, "grad_norm": 10.491456031799316, "learning_rate": 2.268737373737374e-06, "loss": 6.246035766601563, "step": 55080 }, { "epoch": 0.00485, "grad_norm": 7.606631755828857, "learning_rate": 2.268484848484849e-06, "loss": 6.160263442993164, "step": 55085 }, { "epoch": 0.0049, "grad_norm": 5.03801965713501, "learning_rate": 2.2682323232323235e-06, "loss": 6.289277267456055, "step": 55090 }, { "epoch": 0.00495, "grad_norm": 7.092830181121826, "learning_rate": 2.267979797979798e-06, "loss": 6.225572204589843, "step": 55095 }, { "epoch": 0.005, "grad_norm": 4.373117446899414, "learning_rate": 2.267727272727273e-06, "loss": 6.2409004211425785, "step": 55100 }, { "epoch": 0.00505, "grad_norm": 6.496281623840332, "learning_rate": 2.267474747474748e-06, "loss": 6.269579315185547, "step": 55105 }, { "epoch": 0.0051, "grad_norm": 6.99586820602417, "learning_rate": 2.2672222222222225e-06, "loss": 6.27057113647461, "step": 55110 }, { "epoch": 0.00515, "grad_norm": 8.136173248291016, "learning_rate": 2.266969696969697e-06, "loss": 6.259733963012695, "step": 55115 }, { "epoch": 0.0052, "grad_norm": 6.117746829986572, "learning_rate": 2.2667171717171718e-06, "loss": 6.261005401611328, "step": 55120 }, { "epoch": 0.00525, "grad_norm": 12.281745910644531, "learning_rate": 2.266464646464647e-06, "loss": 6.277224731445313, "step": 55125 }, { "epoch": 0.0053, "grad_norm": 4.958805561065674, "learning_rate": 2.2662121212121214e-06, "loss": 6.234795379638672, "step": 55130 }, { "epoch": 0.00535, "grad_norm": 5.982492446899414, "learning_rate": 2.265959595959596e-06, "loss": 6.258764266967773, "step": 55135 }, { "epoch": 0.0054, "grad_norm": 5.139750003814697, "learning_rate": 2.2657070707070707e-06, "loss": 6.235709762573242, "step": 55140 }, { "epoch": 0.00545, "grad_norm": 6.022611141204834, "learning_rate": 2.2654545454545457e-06, "loss": 6.272317504882812, "step": 55145 }, { "epoch": 0.0055, "grad_norm": 8.309843063354492, "learning_rate": 2.2652020202020204e-06, "loss": 6.509901428222657, "step": 55150 }, { "epoch": 0.00555, "grad_norm": 4.480137348175049, "learning_rate": 2.264949494949495e-06, "loss": 6.264294815063477, "step": 55155 }, { "epoch": 0.0056, "grad_norm": 7.308412075042725, "learning_rate": 2.2646969696969696e-06, "loss": 6.247863006591797, "step": 55160 }, { "epoch": 0.00565, "grad_norm": 5.223592281341553, "learning_rate": 2.2644444444444447e-06, "loss": 6.239296722412109, "step": 55165 }, { "epoch": 0.0057, "grad_norm": 5.895315647125244, "learning_rate": 2.2641919191919193e-06, "loss": 6.262178802490235, "step": 55170 }, { "epoch": 0.00575, "grad_norm": 5.936789512634277, "learning_rate": 2.263939393939394e-06, "loss": 6.267056274414062, "step": 55175 }, { "epoch": 0.0058, "grad_norm": 5.548442840576172, "learning_rate": 2.2636868686868686e-06, "loss": 6.254130935668945, "step": 55180 }, { "epoch": 0.00585, "grad_norm": 9.55578327178955, "learning_rate": 2.2634343434343436e-06, "loss": 6.218416595458985, "step": 55185 }, { "epoch": 0.0059, "grad_norm": 8.633305549621582, "learning_rate": 2.2631818181818183e-06, "loss": 6.314426040649414, "step": 55190 }, { "epoch": 0.00595, "grad_norm": 4.720498561859131, "learning_rate": 2.2629292929292933e-06, "loss": 6.327108764648438, "step": 55195 }, { "epoch": 0.006, "grad_norm": 9.213533401489258, "learning_rate": 2.262676767676768e-06, "loss": 6.221530914306641, "step": 55200 }, { "epoch": 0.00605, "grad_norm": 7.84896183013916, "learning_rate": 2.2624242424242426e-06, "loss": 6.228617858886719, "step": 55205 }, { "epoch": 0.0061, "grad_norm": 7.087899684906006, "learning_rate": 2.262171717171717e-06, "loss": 6.324297332763672, "step": 55210 }, { "epoch": 0.00615, "grad_norm": 5.860839366912842, "learning_rate": 2.2619191919191923e-06, "loss": 6.349237060546875, "step": 55215 }, { "epoch": 0.0062, "grad_norm": 3.6862337589263916, "learning_rate": 2.261666666666667e-06, "loss": 6.229368209838867, "step": 55220 }, { "epoch": 0.00625, "grad_norm": 4.4494853019714355, "learning_rate": 2.2614141414141415e-06, "loss": 6.269309997558594, "step": 55225 }, { "epoch": 0.0063, "grad_norm": 6.226541996002197, "learning_rate": 2.261161616161616e-06, "loss": 6.693595886230469, "step": 55230 }, { "epoch": 0.00635, "grad_norm": 6.027578353881836, "learning_rate": 2.260909090909091e-06, "loss": 6.184881591796875, "step": 55235 }, { "epoch": 0.0064, "grad_norm": 7.537810325622559, "learning_rate": 2.260656565656566e-06, "loss": 6.232533645629883, "step": 55240 }, { "epoch": 0.00645, "grad_norm": 15.059515953063965, "learning_rate": 2.2604040404040405e-06, "loss": 6.310016632080078, "step": 55245 }, { "epoch": 0.0065, "grad_norm": 6.3501410484313965, "learning_rate": 2.260151515151515e-06, "loss": 6.276541900634766, "step": 55250 }, { "epoch": 0.00655, "grad_norm": 5.2333526611328125, "learning_rate": 2.25989898989899e-06, "loss": 6.244926452636719, "step": 55255 }, { "epoch": 0.0066, "grad_norm": 3.2637815475463867, "learning_rate": 2.2596464646464648e-06, "loss": 6.27104606628418, "step": 55260 }, { "epoch": 0.00665, "grad_norm": 7.893517017364502, "learning_rate": 2.2593939393939394e-06, "loss": 6.241421508789062, "step": 55265 }, { "epoch": 0.0067, "grad_norm": 6.872492790222168, "learning_rate": 2.259141414141414e-06, "loss": 6.296711730957031, "step": 55270 }, { "epoch": 0.00675, "grad_norm": 5.399633407592773, "learning_rate": 2.258888888888889e-06, "loss": 6.246369171142578, "step": 55275 }, { "epoch": 0.0068, "grad_norm": 5.301826477050781, "learning_rate": 2.2586363636363637e-06, "loss": 6.188702392578125, "step": 55280 }, { "epoch": 0.00685, "grad_norm": 5.178252696990967, "learning_rate": 2.2583838383838384e-06, "loss": 6.296396636962891, "step": 55285 }, { "epoch": 0.0069, "grad_norm": 7.981398582458496, "learning_rate": 2.2581313131313134e-06, "loss": 6.263945007324219, "step": 55290 }, { "epoch": 0.00695, "grad_norm": 7.400506019592285, "learning_rate": 2.257878787878788e-06, "loss": 6.2022865295410154, "step": 55295 }, { "epoch": 0.007, "grad_norm": 7.374297142028809, "learning_rate": 2.257626262626263e-06, "loss": 6.256303405761718, "step": 55300 }, { "epoch": 0.00705, "grad_norm": 6.856837272644043, "learning_rate": 2.2573737373737377e-06, "loss": 6.224153137207031, "step": 55305 }, { "epoch": 0.0071, "grad_norm": 18.192886352539062, "learning_rate": 2.2571212121212124e-06, "loss": 6.479226684570312, "step": 55310 }, { "epoch": 0.00715, "grad_norm": 4.121739387512207, "learning_rate": 2.256868686868687e-06, "loss": 6.291316986083984, "step": 55315 }, { "epoch": 0.0072, "grad_norm": 7.528702259063721, "learning_rate": 2.256616161616162e-06, "loss": 6.2385406494140625, "step": 55320 }, { "epoch": 0.00725, "grad_norm": 18.970834732055664, "learning_rate": 2.2563636363636367e-06, "loss": 6.595360565185547, "step": 55325 }, { "epoch": 0.0073, "grad_norm": 12.347230911254883, "learning_rate": 2.2561111111111113e-06, "loss": 6.4917236328125, "step": 55330 }, { "epoch": 0.00735, "grad_norm": 29.93522071838379, "learning_rate": 2.255858585858586e-06, "loss": 6.388611602783203, "step": 55335 }, { "epoch": 0.0074, "grad_norm": 25.18149757385254, "learning_rate": 2.255606060606061e-06, "loss": 6.376119995117188, "step": 55340 }, { "epoch": 0.00745, "grad_norm": 15.759343147277832, "learning_rate": 2.2553535353535356e-06, "loss": 6.36231689453125, "step": 55345 }, { "epoch": 0.0075, "grad_norm": 29.542295455932617, "learning_rate": 2.2551010101010102e-06, "loss": 6.331520462036133, "step": 55350 }, { "epoch": 0.00755, "grad_norm": 16.19760513305664, "learning_rate": 2.254848484848485e-06, "loss": 6.247932052612304, "step": 55355 }, { "epoch": 0.0076, "grad_norm": 29.239559173583984, "learning_rate": 2.25459595959596e-06, "loss": 6.1978614807128904, "step": 55360 }, { "epoch": 0.00765, "grad_norm": 10.361992835998535, "learning_rate": 2.2543434343434346e-06, "loss": 6.30879020690918, "step": 55365 }, { "epoch": 0.0077, "grad_norm": 7.1812214851379395, "learning_rate": 2.254090909090909e-06, "loss": 6.284302520751953, "step": 55370 }, { "epoch": 0.00775, "grad_norm": 8.303197860717773, "learning_rate": 2.253838383838384e-06, "loss": 6.235615539550781, "step": 55375 }, { "epoch": 0.0078, "grad_norm": 12.640412330627441, "learning_rate": 2.253585858585859e-06, "loss": 6.269161224365234, "step": 55380 }, { "epoch": 0.00785, "grad_norm": 10.521970748901367, "learning_rate": 2.2533333333333335e-06, "loss": 6.394633483886719, "step": 55385 }, { "epoch": 0.0079, "grad_norm": 11.708281517028809, "learning_rate": 2.2530808080808086e-06, "loss": 6.307267761230468, "step": 55390 }, { "epoch": 0.00795, "grad_norm": 14.165722846984863, "learning_rate": 2.252828282828283e-06, "loss": 6.231996154785156, "step": 55395 }, { "epoch": 0.008, "grad_norm": 9.04283618927002, "learning_rate": 2.252575757575758e-06, "loss": 6.2630775451660154, "step": 55400 }, { "epoch": 0.00805, "grad_norm": 9.241861343383789, "learning_rate": 2.2523232323232324e-06, "loss": 6.0924335479736325, "step": 55405 }, { "epoch": 0.0081, "grad_norm": 9.500019073486328, "learning_rate": 2.2520707070707075e-06, "loss": 6.289955139160156, "step": 55410 }, { "epoch": 0.00815, "grad_norm": 11.915101051330566, "learning_rate": 2.251818181818182e-06, "loss": 6.45520248413086, "step": 55415 }, { "epoch": 0.0082, "grad_norm": 8.25200366973877, "learning_rate": 2.2515656565656568e-06, "loss": 6.2834831237792965, "step": 55420 }, { "epoch": 0.00825, "grad_norm": 9.403433799743652, "learning_rate": 2.2513131313131314e-06, "loss": 6.2515419006347654, "step": 55425 }, { "epoch": 0.0083, "grad_norm": 5.849364280700684, "learning_rate": 2.2510606060606064e-06, "loss": 6.280702209472656, "step": 55430 }, { "epoch": 0.00835, "grad_norm": 7.245185375213623, "learning_rate": 2.250808080808081e-06, "loss": 6.236693572998047, "step": 55435 }, { "epoch": 0.0084, "grad_norm": 3.4506876468658447, "learning_rate": 2.2505555555555557e-06, "loss": 6.265388107299804, "step": 55440 }, { "epoch": 0.00845, "grad_norm": 9.338947296142578, "learning_rate": 2.2503030303030303e-06, "loss": 6.3565937042236325, "step": 55445 }, { "epoch": 0.0085, "grad_norm": 5.707555770874023, "learning_rate": 2.2500505050505054e-06, "loss": 6.234836578369141, "step": 55450 }, { "epoch": 0.00855, "grad_norm": 6.283568859100342, "learning_rate": 2.24979797979798e-06, "loss": 6.243525695800781, "step": 55455 }, { "epoch": 0.0086, "grad_norm": 4.828361988067627, "learning_rate": 2.2495454545454546e-06, "loss": 6.2716209411621096, "step": 55460 }, { "epoch": 0.00865, "grad_norm": 7.324448108673096, "learning_rate": 2.2492929292929293e-06, "loss": 6.294651794433594, "step": 55465 }, { "epoch": 0.0087, "grad_norm": 8.61438274383545, "learning_rate": 2.2490404040404043e-06, "loss": 6.2911731719970705, "step": 55470 }, { "epoch": 0.00875, "grad_norm": 9.766899108886719, "learning_rate": 2.248787878787879e-06, "loss": 6.231574249267578, "step": 55475 }, { "epoch": 0.0088, "grad_norm": 10.589253425598145, "learning_rate": 2.2485353535353536e-06, "loss": 6.292388534545898, "step": 55480 }, { "epoch": 0.00885, "grad_norm": 4.112590312957764, "learning_rate": 2.2482828282828282e-06, "loss": 6.264006423950195, "step": 55485 }, { "epoch": 0.0089, "grad_norm": 5.228659152984619, "learning_rate": 2.2480303030303033e-06, "loss": 6.473737335205078, "step": 55490 }, { "epoch": 0.00895, "grad_norm": 6.604508876800537, "learning_rate": 2.247777777777778e-06, "loss": 6.28828125, "step": 55495 }, { "epoch": 0.009, "grad_norm": 7.759131908416748, "learning_rate": 2.247525252525253e-06, "loss": 6.256314849853515, "step": 55500 }, { "epoch": 0.00905, "grad_norm": 5.7659831047058105, "learning_rate": 2.2472727272727276e-06, "loss": 6.225281143188477, "step": 55505 }, { "epoch": 0.0091, "grad_norm": 6.5819268226623535, "learning_rate": 2.2470202020202022e-06, "loss": 6.275871276855469, "step": 55510 }, { "epoch": 0.00915, "grad_norm": 7.221209526062012, "learning_rate": 2.246767676767677e-06, "loss": 6.260999298095703, "step": 55515 }, { "epoch": 0.0092, "grad_norm": 6.685068130493164, "learning_rate": 2.246515151515152e-06, "loss": 6.272079849243164, "step": 55520 }, { "epoch": 0.00925, "grad_norm": 7.395543575286865, "learning_rate": 2.2462626262626265e-06, "loss": 6.416314697265625, "step": 55525 }, { "epoch": 0.0093, "grad_norm": 8.722616195678711, "learning_rate": 2.246010101010101e-06, "loss": 6.357500839233398, "step": 55530 }, { "epoch": 0.00935, "grad_norm": 10.637331008911133, "learning_rate": 2.245757575757576e-06, "loss": 6.405256652832032, "step": 55535 }, { "epoch": 0.0094, "grad_norm": 5.426046848297119, "learning_rate": 2.245505050505051e-06, "loss": 6.270625305175781, "step": 55540 }, { "epoch": 0.00945, "grad_norm": 6.7551140785217285, "learning_rate": 2.2452525252525255e-06, "loss": 6.2552742004394535, "step": 55545 }, { "epoch": 0.0095, "grad_norm": 15.486772537231445, "learning_rate": 2.245e-06, "loss": 6.381095886230469, "step": 55550 }, { "epoch": 0.00955, "grad_norm": 5.917387962341309, "learning_rate": 2.2447474747474747e-06, "loss": 6.270011138916016, "step": 55555 }, { "epoch": 0.0096, "grad_norm": 9.036145210266113, "learning_rate": 2.24449494949495e-06, "loss": 6.293013000488282, "step": 55560 }, { "epoch": 0.00965, "grad_norm": 5.255424976348877, "learning_rate": 2.2442424242424244e-06, "loss": 6.25639533996582, "step": 55565 }, { "epoch": 0.0097, "grad_norm": 4.822597980499268, "learning_rate": 2.243989898989899e-06, "loss": 6.238325500488282, "step": 55570 }, { "epoch": 0.00975, "grad_norm": 7.821551322937012, "learning_rate": 2.2437373737373737e-06, "loss": 6.439323425292969, "step": 55575 }, { "epoch": 0.0098, "grad_norm": 22.266645431518555, "learning_rate": 2.2434848484848487e-06, "loss": 6.52354736328125, "step": 55580 }, { "epoch": 0.00985, "grad_norm": 10.085411071777344, "learning_rate": 2.2432323232323234e-06, "loss": 6.192454528808594, "step": 55585 }, { "epoch": 0.0099, "grad_norm": 26.464080810546875, "learning_rate": 2.242979797979798e-06, "loss": 6.354348373413086, "step": 55590 }, { "epoch": 0.00995, "grad_norm": 4.733357906341553, "learning_rate": 2.2427272727272726e-06, "loss": 6.266166687011719, "step": 55595 }, { "epoch": 0.01, "grad_norm": 5.584670543670654, "learning_rate": 2.2424747474747477e-06, "loss": 6.2719261169433596, "step": 55600 }, { "epoch": 0.01005, "grad_norm": 4.149133682250977, "learning_rate": 2.2422222222222223e-06, "loss": 6.226016235351563, "step": 55605 }, { "epoch": 0.0101, "grad_norm": 6.471023082733154, "learning_rate": 2.2419696969696974e-06, "loss": 6.262131881713867, "step": 55610 }, { "epoch": 0.01015, "grad_norm": 8.406002044677734, "learning_rate": 2.241717171717172e-06, "loss": 6.327808380126953, "step": 55615 }, { "epoch": 0.0102, "grad_norm": 6.3015899658203125, "learning_rate": 2.2414646464646466e-06, "loss": 6.301862335205078, "step": 55620 }, { "epoch": 0.01025, "grad_norm": 5.128692626953125, "learning_rate": 2.2412121212121213e-06, "loss": 6.237133407592774, "step": 55625 }, { "epoch": 0.0103, "grad_norm": 6.281396865844727, "learning_rate": 2.2409595959595963e-06, "loss": 6.281701278686524, "step": 55630 }, { "epoch": 0.01035, "grad_norm": 6.0756635665893555, "learning_rate": 2.240707070707071e-06, "loss": 6.501734924316406, "step": 55635 }, { "epoch": 0.0104, "grad_norm": 6.375938892364502, "learning_rate": 2.2404545454545456e-06, "loss": 6.380347442626953, "step": 55640 }, { "epoch": 0.01045, "grad_norm": 8.579413414001465, "learning_rate": 2.24020202020202e-06, "loss": 6.219499969482422, "step": 55645 }, { "epoch": 0.0105, "grad_norm": 6.860259532928467, "learning_rate": 2.2399494949494952e-06, "loss": 6.2419178009033205, "step": 55650 }, { "epoch": 0.01055, "grad_norm": 4.61507511138916, "learning_rate": 2.23969696969697e-06, "loss": 6.303063201904297, "step": 55655 }, { "epoch": 0.0106, "grad_norm": 4.355443477630615, "learning_rate": 2.2394444444444445e-06, "loss": 6.324828338623047, "step": 55660 }, { "epoch": 0.01065, "grad_norm": 4.687447547912598, "learning_rate": 2.239191919191919e-06, "loss": 6.232057571411133, "step": 55665 }, { "epoch": 0.0107, "grad_norm": 5.155239105224609, "learning_rate": 2.238939393939394e-06, "loss": 6.220396423339844, "step": 55670 }, { "epoch": 0.01075, "grad_norm": 5.026395797729492, "learning_rate": 2.238686868686869e-06, "loss": 6.216888046264648, "step": 55675 }, { "epoch": 0.0108, "grad_norm": 9.557104110717773, "learning_rate": 2.2384343434343435e-06, "loss": 6.288691711425781, "step": 55680 }, { "epoch": 0.01085, "grad_norm": 5.355003356933594, "learning_rate": 2.238181818181818e-06, "loss": 6.280705261230469, "step": 55685 }, { "epoch": 0.0109, "grad_norm": 4.7892327308654785, "learning_rate": 2.237929292929293e-06, "loss": 6.296241378784179, "step": 55690 }, { "epoch": 0.01095, "grad_norm": 3.5528500080108643, "learning_rate": 2.2376767676767678e-06, "loss": 6.269623565673828, "step": 55695 }, { "epoch": 0.011, "grad_norm": 5.978426456451416, "learning_rate": 2.237424242424243e-06, "loss": 6.2224475860595705, "step": 55700 }, { "epoch": 0.01105, "grad_norm": 7.050121784210205, "learning_rate": 2.237171717171717e-06, "loss": 6.251303100585938, "step": 55705 }, { "epoch": 0.0111, "grad_norm": 6.782392501831055, "learning_rate": 2.236919191919192e-06, "loss": 6.2935127258300785, "step": 55710 }, { "epoch": 0.01115, "grad_norm": 4.797961235046387, "learning_rate": 2.236666666666667e-06, "loss": 6.249351882934571, "step": 55715 }, { "epoch": 0.0112, "grad_norm": 6.292905807495117, "learning_rate": 2.2364141414141418e-06, "loss": 6.251496124267578, "step": 55720 }, { "epoch": 0.01125, "grad_norm": 9.276315689086914, "learning_rate": 2.2361616161616164e-06, "loss": 6.2979389190673825, "step": 55725 }, { "epoch": 0.0113, "grad_norm": 5.2532429695129395, "learning_rate": 2.235909090909091e-06, "loss": 6.247525787353515, "step": 55730 }, { "epoch": 0.01135, "grad_norm": 6.802566051483154, "learning_rate": 2.235656565656566e-06, "loss": 6.259909439086914, "step": 55735 }, { "epoch": 0.0114, "grad_norm": 6.939477443695068, "learning_rate": 2.2354040404040407e-06, "loss": 6.217877578735352, "step": 55740 }, { "epoch": 0.01145, "grad_norm": 11.636287689208984, "learning_rate": 2.2351515151515153e-06, "loss": 6.433662414550781, "step": 55745 }, { "epoch": 0.0115, "grad_norm": 4.105189800262451, "learning_rate": 2.23489898989899e-06, "loss": 6.259777069091797, "step": 55750 }, { "epoch": 0.01155, "grad_norm": 10.7062349319458, "learning_rate": 2.234646464646465e-06, "loss": 6.321342468261719, "step": 55755 }, { "epoch": 0.0116, "grad_norm": 10.434945106506348, "learning_rate": 2.2343939393939397e-06, "loss": 6.278896713256836, "step": 55760 }, { "epoch": 0.01165, "grad_norm": 5.0144362449646, "learning_rate": 2.2341414141414143e-06, "loss": 6.278501892089844, "step": 55765 }, { "epoch": 0.0117, "grad_norm": 8.33598518371582, "learning_rate": 2.233888888888889e-06, "loss": 6.28941764831543, "step": 55770 }, { "epoch": 0.01175, "grad_norm": 4.891776084899902, "learning_rate": 2.233636363636364e-06, "loss": 6.269562530517578, "step": 55775 }, { "epoch": 0.0118, "grad_norm": 4.617874622344971, "learning_rate": 2.2333838383838386e-06, "loss": 6.220915985107422, "step": 55780 }, { "epoch": 0.01185, "grad_norm": 7.857678413391113, "learning_rate": 2.2331313131313132e-06, "loss": 6.255865478515625, "step": 55785 }, { "epoch": 0.0119, "grad_norm": 6.629216194152832, "learning_rate": 2.232878787878788e-06, "loss": 6.301713562011718, "step": 55790 }, { "epoch": 0.01195, "grad_norm": 5.192300796508789, "learning_rate": 2.232626262626263e-06, "loss": 6.269006729125977, "step": 55795 }, { "epoch": 0.012, "grad_norm": 2.9285635948181152, "learning_rate": 2.2323737373737375e-06, "loss": 6.231029510498047, "step": 55800 }, { "epoch": 0.01205, "grad_norm": 5.054631233215332, "learning_rate": 2.2321212121212126e-06, "loss": 6.344683074951172, "step": 55805 }, { "epoch": 0.0121, "grad_norm": 5.0027079582214355, "learning_rate": 2.2318686868686872e-06, "loss": 6.271974182128906, "step": 55810 }, { "epoch": 0.01215, "grad_norm": 5.288295269012451, "learning_rate": 2.231616161616162e-06, "loss": 6.337760543823242, "step": 55815 }, { "epoch": 0.0122, "grad_norm": 4.7464776039123535, "learning_rate": 2.2313636363636365e-06, "loss": 6.184886169433594, "step": 55820 }, { "epoch": 0.01225, "grad_norm": 8.326811790466309, "learning_rate": 2.2311111111111115e-06, "loss": 6.243676376342774, "step": 55825 }, { "epoch": 0.0123, "grad_norm": 15.001068115234375, "learning_rate": 2.230858585858586e-06, "loss": 6.206618881225586, "step": 55830 }, { "epoch": 0.01235, "grad_norm": 6.832383155822754, "learning_rate": 2.230606060606061e-06, "loss": 6.258468246459961, "step": 55835 }, { "epoch": 0.0124, "grad_norm": 5.340137958526611, "learning_rate": 2.2303535353535354e-06, "loss": 6.2405647277832035, "step": 55840 }, { "epoch": 0.01245, "grad_norm": 6.207499980926514, "learning_rate": 2.2301010101010105e-06, "loss": 6.231080627441406, "step": 55845 }, { "epoch": 0.0125, "grad_norm": 8.055034637451172, "learning_rate": 2.229848484848485e-06, "loss": 6.21954116821289, "step": 55850 }, { "epoch": 0.01255, "grad_norm": 6.74752950668335, "learning_rate": 2.2295959595959597e-06, "loss": 6.252195739746094, "step": 55855 }, { "epoch": 0.0126, "grad_norm": 5.151163101196289, "learning_rate": 2.2293434343434344e-06, "loss": 6.4318092346191404, "step": 55860 }, { "epoch": 0.01265, "grad_norm": 5.5406599044799805, "learning_rate": 2.2290909090909094e-06, "loss": 6.646670532226563, "step": 55865 }, { "epoch": 0.0127, "grad_norm": 4.918658256530762, "learning_rate": 2.228838383838384e-06, "loss": 6.2828105926513675, "step": 55870 }, { "epoch": 0.01275, "grad_norm": 7.724645614624023, "learning_rate": 2.2285858585858587e-06, "loss": 6.219248199462891, "step": 55875 }, { "epoch": 0.0128, "grad_norm": 7.5981125831604, "learning_rate": 2.2283333333333333e-06, "loss": 6.3004802703857425, "step": 55880 }, { "epoch": 0.01285, "grad_norm": 11.174755096435547, "learning_rate": 2.2280808080808084e-06, "loss": 6.2702068328857425, "step": 55885 }, { "epoch": 0.0129, "grad_norm": 45.38349151611328, "learning_rate": 2.227828282828283e-06, "loss": 6.431885528564453, "step": 55890 }, { "epoch": 0.01295, "grad_norm": 7.753589630126953, "learning_rate": 2.2275757575757576e-06, "loss": 6.277084732055664, "step": 55895 }, { "epoch": 0.013, "grad_norm": 4.424330711364746, "learning_rate": 2.2273232323232323e-06, "loss": 6.2295490264892575, "step": 55900 }, { "epoch": 0.01305, "grad_norm": 6.27789831161499, "learning_rate": 2.2270707070707073e-06, "loss": 6.246075439453125, "step": 55905 }, { "epoch": 0.0131, "grad_norm": 5.17119836807251, "learning_rate": 2.226818181818182e-06, "loss": 6.237952804565429, "step": 55910 }, { "epoch": 0.01315, "grad_norm": 6.317302227020264, "learning_rate": 2.226565656565657e-06, "loss": 6.2562202453613285, "step": 55915 }, { "epoch": 0.0132, "grad_norm": 7.922659873962402, "learning_rate": 2.2263131313131316e-06, "loss": 6.387746810913086, "step": 55920 }, { "epoch": 0.01325, "grad_norm": 6.001702785491943, "learning_rate": 2.2260606060606063e-06, "loss": 6.528140258789063, "step": 55925 }, { "epoch": 0.0133, "grad_norm": 7.7512946128845215, "learning_rate": 2.225808080808081e-06, "loss": 6.261476135253906, "step": 55930 }, { "epoch": 0.01335, "grad_norm": 4.93289041519165, "learning_rate": 2.225555555555556e-06, "loss": 6.275044250488281, "step": 55935 }, { "epoch": 0.0134, "grad_norm": 4.640034198760986, "learning_rate": 2.2253030303030306e-06, "loss": 6.232272720336914, "step": 55940 }, { "epoch": 0.01345, "grad_norm": 5.831015586853027, "learning_rate": 2.225050505050505e-06, "loss": 6.28984489440918, "step": 55945 }, { "epoch": 0.0135, "grad_norm": 7.133537769317627, "learning_rate": 2.22479797979798e-06, "loss": 6.447408294677734, "step": 55950 }, { "epoch": 0.01355, "grad_norm": 6.0780816078186035, "learning_rate": 2.224545454545455e-06, "loss": 6.3011024475097654, "step": 55955 }, { "epoch": 0.0136, "grad_norm": 7.034844398498535, "learning_rate": 2.2242929292929295e-06, "loss": 6.297059631347656, "step": 55960 }, { "epoch": 0.01365, "grad_norm": 30.152130126953125, "learning_rate": 2.224040404040404e-06, "loss": 6.282871246337891, "step": 55965 }, { "epoch": 0.0137, "grad_norm": 8.641244888305664, "learning_rate": 2.2237878787878788e-06, "loss": 6.273617935180664, "step": 55970 }, { "epoch": 0.01375, "grad_norm": 5.335292816162109, "learning_rate": 2.223535353535354e-06, "loss": 6.269273376464843, "step": 55975 }, { "epoch": 0.0138, "grad_norm": 5.559218406677246, "learning_rate": 2.2232828282828285e-06, "loss": 6.357691955566406, "step": 55980 }, { "epoch": 0.01385, "grad_norm": 4.955188751220703, "learning_rate": 2.223030303030303e-06, "loss": 6.247386932373047, "step": 55985 }, { "epoch": 0.0139, "grad_norm": 6.906797409057617, "learning_rate": 2.2227777777777777e-06, "loss": 6.247916030883789, "step": 55990 }, { "epoch": 0.01395, "grad_norm": 5.973461151123047, "learning_rate": 2.2225252525252528e-06, "loss": 6.2576850891113285, "step": 55995 }, { "epoch": 0.014, "grad_norm": 5.548551082611084, "learning_rate": 2.2222727272727274e-06, "loss": 6.218695068359375, "step": 56000 }, { "epoch": 0.01405, "grad_norm": 5.774709701538086, "learning_rate": 2.222020202020202e-06, "loss": 6.221891021728515, "step": 56005 }, { "epoch": 0.0141, "grad_norm": 5.841799259185791, "learning_rate": 2.2217676767676767e-06, "loss": 6.248642349243164, "step": 56010 }, { "epoch": 0.01415, "grad_norm": 4.387969970703125, "learning_rate": 2.2215151515151517e-06, "loss": 6.252950286865234, "step": 56015 }, { "epoch": 0.0142, "grad_norm": 7.644568920135498, "learning_rate": 2.2212626262626263e-06, "loss": 6.225751495361328, "step": 56020 }, { "epoch": 0.01425, "grad_norm": 4.468446731567383, "learning_rate": 2.2210101010101014e-06, "loss": 6.237131118774414, "step": 56025 }, { "epoch": 0.0143, "grad_norm": 4.616331577301025, "learning_rate": 2.220757575757576e-06, "loss": 6.260491943359375, "step": 56030 }, { "epoch": 0.01435, "grad_norm": 5.1265740394592285, "learning_rate": 2.2205050505050507e-06, "loss": 6.2943767547607425, "step": 56035 }, { "epoch": 0.0144, "grad_norm": 27.995742797851562, "learning_rate": 2.2202525252525253e-06, "loss": 6.542456817626953, "step": 56040 }, { "epoch": 0.01445, "grad_norm": 9.208803176879883, "learning_rate": 2.2200000000000003e-06, "loss": 6.234098052978515, "step": 56045 }, { "epoch": 0.0145, "grad_norm": 8.574763298034668, "learning_rate": 2.219747474747475e-06, "loss": 6.272102737426758, "step": 56050 }, { "epoch": 0.01455, "grad_norm": 6.889362335205078, "learning_rate": 2.2194949494949496e-06, "loss": 6.243094635009766, "step": 56055 }, { "epoch": 0.0146, "grad_norm": 22.008548736572266, "learning_rate": 2.2192424242424242e-06, "loss": 6.314971923828125, "step": 56060 }, { "epoch": 0.01465, "grad_norm": 5.1897664070129395, "learning_rate": 2.2189898989898993e-06, "loss": 6.275286483764648, "step": 56065 }, { "epoch": 0.0147, "grad_norm": 6.433852195739746, "learning_rate": 2.218737373737374e-06, "loss": 6.227709197998047, "step": 56070 }, { "epoch": 0.01475, "grad_norm": 5.240730285644531, "learning_rate": 2.2184848484848485e-06, "loss": 6.250267028808594, "step": 56075 }, { "epoch": 0.0148, "grad_norm": 11.434518814086914, "learning_rate": 2.218232323232323e-06, "loss": 6.303483963012695, "step": 56080 }, { "epoch": 0.01485, "grad_norm": 4.418013572692871, "learning_rate": 2.2179797979797982e-06, "loss": 6.251652526855469, "step": 56085 }, { "epoch": 0.0149, "grad_norm": 7.154845714569092, "learning_rate": 2.217727272727273e-06, "loss": 6.255281066894531, "step": 56090 }, { "epoch": 0.01495, "grad_norm": 5.689499855041504, "learning_rate": 2.2174747474747475e-06, "loss": 6.253896331787109, "step": 56095 }, { "epoch": 0.015, "grad_norm": 5.953764915466309, "learning_rate": 2.217222222222222e-06, "loss": 6.242992401123047, "step": 56100 }, { "epoch": 0.01505, "grad_norm": 6.862473964691162, "learning_rate": 2.216969696969697e-06, "loss": 6.245672225952148, "step": 56105 }, { "epoch": 0.0151, "grad_norm": 9.366861343383789, "learning_rate": 2.216717171717172e-06, "loss": 6.2787025451660154, "step": 56110 }, { "epoch": 0.01515, "grad_norm": 8.541157722473145, "learning_rate": 2.216464646464647e-06, "loss": 6.272380828857422, "step": 56115 }, { "epoch": 0.0152, "grad_norm": 6.680261135101318, "learning_rate": 2.216212121212121e-06, "loss": 6.265886688232422, "step": 56120 }, { "epoch": 0.01525, "grad_norm": 10.582525253295898, "learning_rate": 2.215959595959596e-06, "loss": 6.265020370483398, "step": 56125 }, { "epoch": 0.0153, "grad_norm": 5.738116264343262, "learning_rate": 2.2157070707070708e-06, "loss": 6.304922103881836, "step": 56130 }, { "epoch": 0.01535, "grad_norm": 7.566983222961426, "learning_rate": 2.215454545454546e-06, "loss": 6.217292785644531, "step": 56135 }, { "epoch": 0.0154, "grad_norm": 5.794374942779541, "learning_rate": 2.2152020202020204e-06, "loss": 6.226314544677734, "step": 56140 }, { "epoch": 0.01545, "grad_norm": 6.036230087280273, "learning_rate": 2.214949494949495e-06, "loss": 6.233723449707031, "step": 56145 }, { "epoch": 0.0155, "grad_norm": 17.565690994262695, "learning_rate": 2.21469696969697e-06, "loss": 6.274483108520508, "step": 56150 }, { "epoch": 0.01555, "grad_norm": 5.602835655212402, "learning_rate": 2.2144444444444447e-06, "loss": 6.270854568481445, "step": 56155 }, { "epoch": 0.0156, "grad_norm": 6.423490524291992, "learning_rate": 2.2141919191919194e-06, "loss": 6.304653930664062, "step": 56160 }, { "epoch": 0.01565, "grad_norm": 8.593656539916992, "learning_rate": 2.213939393939394e-06, "loss": 6.247600936889649, "step": 56165 }, { "epoch": 0.0157, "grad_norm": 5.6598429679870605, "learning_rate": 2.213686868686869e-06, "loss": 6.231233215332031, "step": 56170 }, { "epoch": 0.01575, "grad_norm": 12.528000831604004, "learning_rate": 2.2134343434343437e-06, "loss": 6.243989562988281, "step": 56175 }, { "epoch": 0.0158, "grad_norm": 6.603438377380371, "learning_rate": 2.2131818181818183e-06, "loss": 6.285524368286133, "step": 56180 }, { "epoch": 0.01585, "grad_norm": 8.53148078918457, "learning_rate": 2.212929292929293e-06, "loss": 6.275897598266601, "step": 56185 }, { "epoch": 0.0159, "grad_norm": 5.651914596557617, "learning_rate": 2.212676767676768e-06, "loss": 6.342752456665039, "step": 56190 }, { "epoch": 0.01595, "grad_norm": 15.179117202758789, "learning_rate": 2.2124242424242426e-06, "loss": 6.313951873779297, "step": 56195 }, { "epoch": 0.016, "grad_norm": 4.815338611602783, "learning_rate": 2.2121717171717173e-06, "loss": 6.429928588867187, "step": 56200 }, { "epoch": 0.01605, "grad_norm": 4.423083305358887, "learning_rate": 2.211919191919192e-06, "loss": 6.265618896484375, "step": 56205 }, { "epoch": 0.0161, "grad_norm": 7.693796157836914, "learning_rate": 2.211666666666667e-06, "loss": 6.270309066772461, "step": 56210 }, { "epoch": 0.01615, "grad_norm": 5.606104373931885, "learning_rate": 2.2114141414141416e-06, "loss": 6.202603149414062, "step": 56215 }, { "epoch": 0.0162, "grad_norm": 5.449652671813965, "learning_rate": 2.2111616161616166e-06, "loss": 6.286528015136719, "step": 56220 }, { "epoch": 0.01625, "grad_norm": 7.289516925811768, "learning_rate": 2.2109090909090913e-06, "loss": 6.264170837402344, "step": 56225 }, { "epoch": 0.0163, "grad_norm": 6.239193916320801, "learning_rate": 2.210656565656566e-06, "loss": 6.237446212768555, "step": 56230 }, { "epoch": 0.01635, "grad_norm": 5.507393836975098, "learning_rate": 2.2104040404040405e-06, "loss": 6.350254821777344, "step": 56235 }, { "epoch": 0.0164, "grad_norm": 4.21250057220459, "learning_rate": 2.2101515151515156e-06, "loss": 6.491481781005859, "step": 56240 }, { "epoch": 0.01645, "grad_norm": 8.710219383239746, "learning_rate": 2.20989898989899e-06, "loss": 6.3232063293457035, "step": 56245 }, { "epoch": 0.0165, "grad_norm": 5.609013080596924, "learning_rate": 2.209646464646465e-06, "loss": 6.43717041015625, "step": 56250 }, { "epoch": 0.01655, "grad_norm": 6.827110767364502, "learning_rate": 2.2093939393939395e-06, "loss": 6.288896942138672, "step": 56255 }, { "epoch": 0.0166, "grad_norm": 5.624223232269287, "learning_rate": 2.2091414141414145e-06, "loss": 6.319085693359375, "step": 56260 }, { "epoch": 0.01665, "grad_norm": 10.714980125427246, "learning_rate": 2.208888888888889e-06, "loss": 6.303820037841797, "step": 56265 }, { "epoch": 0.0167, "grad_norm": 4.713624477386475, "learning_rate": 2.2086363636363638e-06, "loss": 6.207563018798828, "step": 56270 }, { "epoch": 0.01675, "grad_norm": 5.169791221618652, "learning_rate": 2.2083838383838384e-06, "loss": 6.290684509277344, "step": 56275 }, { "epoch": 0.0168, "grad_norm": 10.977018356323242, "learning_rate": 2.2081313131313135e-06, "loss": 6.305189514160157, "step": 56280 }, { "epoch": 0.01685, "grad_norm": 6.148150444030762, "learning_rate": 2.207878787878788e-06, "loss": 6.214566040039062, "step": 56285 }, { "epoch": 0.0169, "grad_norm": 6.305717945098877, "learning_rate": 2.2076262626262627e-06, "loss": 6.2659049987792965, "step": 56290 }, { "epoch": 0.01695, "grad_norm": 4.725083351135254, "learning_rate": 2.2073737373737374e-06, "loss": 6.291797637939453, "step": 56295 }, { "epoch": 0.017, "grad_norm": 7.99234676361084, "learning_rate": 2.2071212121212124e-06, "loss": 6.2130687713623045, "step": 56300 }, { "epoch": 0.01705, "grad_norm": 27.747163772583008, "learning_rate": 2.206868686868687e-06, "loss": 6.392393493652344, "step": 56305 }, { "epoch": 0.0171, "grad_norm": 11.946158409118652, "learning_rate": 2.2066161616161617e-06, "loss": 6.304953002929688, "step": 56310 }, { "epoch": 0.01715, "grad_norm": 4.6901631355285645, "learning_rate": 2.2063636363636363e-06, "loss": 6.1656852722167965, "step": 56315 }, { "epoch": 0.0172, "grad_norm": 7.018066883087158, "learning_rate": 2.2061111111111114e-06, "loss": 6.245234680175781, "step": 56320 }, { "epoch": 0.01725, "grad_norm": 5.0930376052856445, "learning_rate": 2.205858585858586e-06, "loss": 6.253126525878907, "step": 56325 }, { "epoch": 0.0173, "grad_norm": 8.263676643371582, "learning_rate": 2.205606060606061e-06, "loss": 6.267493057250976, "step": 56330 }, { "epoch": 0.01735, "grad_norm": 29.04561996459961, "learning_rate": 2.2053535353535357e-06, "loss": 6.210452651977539, "step": 56335 }, { "epoch": 0.0174, "grad_norm": 10.832197189331055, "learning_rate": 2.2051010101010103e-06, "loss": 6.315834045410156, "step": 56340 }, { "epoch": 0.01745, "grad_norm": 32.904300689697266, "learning_rate": 2.204848484848485e-06, "loss": 6.831727600097656, "step": 56345 }, { "epoch": 0.0175, "grad_norm": 5.386364936828613, "learning_rate": 2.20459595959596e-06, "loss": 6.264115524291992, "step": 56350 }, { "epoch": 0.01755, "grad_norm": 13.3474760055542, "learning_rate": 2.2043434343434346e-06, "loss": 6.234984970092773, "step": 56355 }, { "epoch": 0.0176, "grad_norm": 9.349204063415527, "learning_rate": 2.2040909090909092e-06, "loss": 6.629777526855468, "step": 56360 }, { "epoch": 0.01765, "grad_norm": 4.664176940917969, "learning_rate": 2.203838383838384e-06, "loss": 6.251838302612304, "step": 56365 }, { "epoch": 0.0177, "grad_norm": 9.667182922363281, "learning_rate": 2.203585858585859e-06, "loss": 6.334435272216797, "step": 56370 }, { "epoch": 0.01775, "grad_norm": 5.434082508087158, "learning_rate": 2.2033333333333336e-06, "loss": 6.296847534179688, "step": 56375 }, { "epoch": 0.0178, "grad_norm": 5.3119215965271, "learning_rate": 2.203080808080808e-06, "loss": 6.308899688720703, "step": 56380 }, { "epoch": 0.01785, "grad_norm": 10.73472785949707, "learning_rate": 2.202828282828283e-06, "loss": 6.415345764160156, "step": 56385 }, { "epoch": 0.0179, "grad_norm": 4.2962517738342285, "learning_rate": 2.202575757575758e-06, "loss": 6.258444213867188, "step": 56390 }, { "epoch": 0.01795, "grad_norm": 6.252317905426025, "learning_rate": 2.2023232323232325e-06, "loss": 6.2463523864746096, "step": 56395 }, { "epoch": 0.018, "grad_norm": 12.177088737487793, "learning_rate": 2.202070707070707e-06, "loss": 6.207799911499023, "step": 56400 }, { "epoch": 0.01805, "grad_norm": 9.933431625366211, "learning_rate": 2.2018181818181818e-06, "loss": 6.2551513671875, "step": 56405 }, { "epoch": 0.0181, "grad_norm": 7.736628532409668, "learning_rate": 2.201565656565657e-06, "loss": 6.27818717956543, "step": 56410 }, { "epoch": 0.01815, "grad_norm": 5.777061939239502, "learning_rate": 2.2013131313131314e-06, "loss": 6.1745258331298825, "step": 56415 }, { "epoch": 0.0182, "grad_norm": 5.971953868865967, "learning_rate": 2.2010606060606065e-06, "loss": 6.213777542114258, "step": 56420 }, { "epoch": 0.01825, "grad_norm": 3.63030743598938, "learning_rate": 2.2008080808080807e-06, "loss": 6.220179748535156, "step": 56425 }, { "epoch": 0.0183, "grad_norm": 11.09650707244873, "learning_rate": 2.2005555555555558e-06, "loss": 6.209419631958008, "step": 56430 }, { "epoch": 0.01835, "grad_norm": 7.7327494621276855, "learning_rate": 2.2003030303030304e-06, "loss": 6.257767105102539, "step": 56435 }, { "epoch": 0.0184, "grad_norm": 9.169635772705078, "learning_rate": 2.2000505050505054e-06, "loss": 6.2422138214111325, "step": 56440 }, { "epoch": 0.01845, "grad_norm": 4.946480751037598, "learning_rate": 2.19979797979798e-06, "loss": 6.24376335144043, "step": 56445 }, { "epoch": 0.0185, "grad_norm": 7.901193618774414, "learning_rate": 2.1995454545454547e-06, "loss": 6.2757526397705075, "step": 56450 }, { "epoch": 0.01855, "grad_norm": 3.5459532737731934, "learning_rate": 2.1992929292929293e-06, "loss": 6.2063041687011715, "step": 56455 }, { "epoch": 0.0186, "grad_norm": 5.003751277923584, "learning_rate": 2.1990404040404044e-06, "loss": 6.296712493896484, "step": 56460 }, { "epoch": 0.01865, "grad_norm": 33.13979721069336, "learning_rate": 2.198787878787879e-06, "loss": 6.426461791992187, "step": 56465 }, { "epoch": 0.0187, "grad_norm": 19.891019821166992, "learning_rate": 2.1985353535353536e-06, "loss": 6.456195831298828, "step": 56470 }, { "epoch": 0.01875, "grad_norm": 19.02680206298828, "learning_rate": 2.1982828282828283e-06, "loss": 6.1930591583251955, "step": 56475 }, { "epoch": 0.0188, "grad_norm": 24.68562126159668, "learning_rate": 2.1980303030303033e-06, "loss": 6.145984649658203, "step": 56480 }, { "epoch": 0.01885, "grad_norm": 25.356266021728516, "learning_rate": 2.197777777777778e-06, "loss": 6.176475906372071, "step": 56485 }, { "epoch": 0.0189, "grad_norm": 13.972722053527832, "learning_rate": 2.1975252525252526e-06, "loss": 6.368863677978515, "step": 56490 }, { "epoch": 0.01895, "grad_norm": 8.342623710632324, "learning_rate": 2.1972727272727272e-06, "loss": 6.29973030090332, "step": 56495 }, { "epoch": 0.019, "grad_norm": 5.124757289886475, "learning_rate": 2.1970202020202023e-06, "loss": 6.239696502685547, "step": 56500 }, { "epoch": 0.01905, "grad_norm": 5.052002906799316, "learning_rate": 2.196767676767677e-06, "loss": 6.22376594543457, "step": 56505 }, { "epoch": 0.0191, "grad_norm": 11.282360076904297, "learning_rate": 2.1965151515151515e-06, "loss": 6.353377151489258, "step": 56510 }, { "epoch": 0.01915, "grad_norm": 8.631482124328613, "learning_rate": 2.196262626262626e-06, "loss": 6.233985519409179, "step": 56515 }, { "epoch": 0.0192, "grad_norm": 8.084610939025879, "learning_rate": 2.1960101010101012e-06, "loss": 6.235418319702148, "step": 56520 }, { "epoch": 0.01925, "grad_norm": 7.765193462371826, "learning_rate": 2.195757575757576e-06, "loss": 6.303272247314453, "step": 56525 }, { "epoch": 0.0193, "grad_norm": 4.954274654388428, "learning_rate": 2.195505050505051e-06, "loss": 6.333013534545898, "step": 56530 }, { "epoch": 0.01935, "grad_norm": 9.450386047363281, "learning_rate": 2.195252525252525e-06, "loss": 6.5044189453125, "step": 56535 }, { "epoch": 0.0194, "grad_norm": 9.702848434448242, "learning_rate": 2.195e-06, "loss": 6.228793334960938, "step": 56540 }, { "epoch": 0.01945, "grad_norm": 6.987534523010254, "learning_rate": 2.194747474747475e-06, "loss": 6.247286224365235, "step": 56545 }, { "epoch": 0.0195, "grad_norm": 6.362093925476074, "learning_rate": 2.19449494949495e-06, "loss": 6.183530044555664, "step": 56550 }, { "epoch": 0.01955, "grad_norm": 4.83483362197876, "learning_rate": 2.1942424242424245e-06, "loss": 6.26739730834961, "step": 56555 }, { "epoch": 0.0196, "grad_norm": 7.113860607147217, "learning_rate": 2.193989898989899e-06, "loss": 6.250343704223633, "step": 56560 }, { "epoch": 0.01965, "grad_norm": 6.9123735427856445, "learning_rate": 2.193737373737374e-06, "loss": 6.277646255493164, "step": 56565 }, { "epoch": 0.0197, "grad_norm": 5.9693708419799805, "learning_rate": 2.1934848484848488e-06, "loss": 6.226507568359375, "step": 56570 }, { "epoch": 0.01975, "grad_norm": 4.883331775665283, "learning_rate": 2.1932323232323234e-06, "loss": 6.173248291015625, "step": 56575 }, { "epoch": 0.0198, "grad_norm": 28.732290267944336, "learning_rate": 2.192979797979798e-06, "loss": 6.212598419189453, "step": 56580 }, { "epoch": 0.01985, "grad_norm": 10.63304615020752, "learning_rate": 2.192727272727273e-06, "loss": 6.310305404663086, "step": 56585 }, { "epoch": 0.0199, "grad_norm": 4.618294715881348, "learning_rate": 2.1924747474747477e-06, "loss": 6.2559459686279295, "step": 56590 }, { "epoch": 0.01995, "grad_norm": 9.302359580993652, "learning_rate": 2.1922222222222224e-06, "loss": 6.261070632934571, "step": 56595 }, { "epoch": 0.02, "grad_norm": 6.732449531555176, "learning_rate": 2.191969696969697e-06, "loss": 6.282602691650391, "step": 56600 }, { "epoch": 0.02005, "grad_norm": 7.4979047775268555, "learning_rate": 2.191717171717172e-06, "loss": 6.204848098754883, "step": 56605 }, { "epoch": 0.0201, "grad_norm": 20.155841827392578, "learning_rate": 2.1914646464646467e-06, "loss": 6.303626632690429, "step": 56610 }, { "epoch": 0.02015, "grad_norm": 4.66737699508667, "learning_rate": 2.1912121212121213e-06, "loss": 6.2649993896484375, "step": 56615 }, { "epoch": 0.0202, "grad_norm": 5.1524338722229, "learning_rate": 2.190959595959596e-06, "loss": 6.2193553924560545, "step": 56620 }, { "epoch": 0.02025, "grad_norm": 4.854858875274658, "learning_rate": 2.190707070707071e-06, "loss": 6.2004859924316404, "step": 56625 }, { "epoch": 0.0203, "grad_norm": 13.123251914978027, "learning_rate": 2.1904545454545456e-06, "loss": 6.2335765838623045, "step": 56630 }, { "epoch": 0.02035, "grad_norm": 7.020074367523193, "learning_rate": 2.1902020202020207e-06, "loss": 6.2836753845214846, "step": 56635 }, { "epoch": 0.0204, "grad_norm": 5.617977619171143, "learning_rate": 2.1899494949494953e-06, "loss": 6.252784729003906, "step": 56640 }, { "epoch": 0.02045, "grad_norm": 10.676392555236816, "learning_rate": 2.18969696969697e-06, "loss": 6.260383605957031, "step": 56645 }, { "epoch": 0.0205, "grad_norm": 6.071321487426758, "learning_rate": 2.1894444444444446e-06, "loss": 6.223605728149414, "step": 56650 }, { "epoch": 0.02055, "grad_norm": 4.9353814125061035, "learning_rate": 2.1891919191919196e-06, "loss": 6.367713546752929, "step": 56655 }, { "epoch": 0.0206, "grad_norm": 7.123733997344971, "learning_rate": 2.1889393939393942e-06, "loss": 6.25396499633789, "step": 56660 }, { "epoch": 0.02065, "grad_norm": 6.326788425445557, "learning_rate": 2.188686868686869e-06, "loss": 6.262447357177734, "step": 56665 }, { "epoch": 0.0207, "grad_norm": 5.682342529296875, "learning_rate": 2.1884343434343435e-06, "loss": 6.3853801727294925, "step": 56670 }, { "epoch": 0.02075, "grad_norm": 6.251222610473633, "learning_rate": 2.1881818181818186e-06, "loss": 6.232818603515625, "step": 56675 }, { "epoch": 0.0208, "grad_norm": 5.737123012542725, "learning_rate": 2.187929292929293e-06, "loss": 6.166588592529297, "step": 56680 }, { "epoch": 0.02085, "grad_norm": 6.462324142456055, "learning_rate": 2.187676767676768e-06, "loss": 6.266089630126953, "step": 56685 }, { "epoch": 0.0209, "grad_norm": 6.380520820617676, "learning_rate": 2.1874242424242425e-06, "loss": 6.206441497802734, "step": 56690 }, { "epoch": 0.02095, "grad_norm": 5.260365009307861, "learning_rate": 2.1871717171717175e-06, "loss": 6.311648559570313, "step": 56695 }, { "epoch": 0.021, "grad_norm": 13.808819770812988, "learning_rate": 2.186919191919192e-06, "loss": 6.3949737548828125, "step": 56700 }, { "epoch": 0.02105, "grad_norm": 4.725271224975586, "learning_rate": 2.1866666666666668e-06, "loss": 6.261779403686523, "step": 56705 }, { "epoch": 0.0211, "grad_norm": 13.096083641052246, "learning_rate": 2.1864141414141414e-06, "loss": 6.346367645263672, "step": 56710 }, { "epoch": 0.02115, "grad_norm": 16.146408081054688, "learning_rate": 2.1861616161616164e-06, "loss": 6.500582122802735, "step": 56715 }, { "epoch": 0.0212, "grad_norm": 12.14456558227539, "learning_rate": 2.185909090909091e-06, "loss": 6.396576690673828, "step": 56720 }, { "epoch": 0.02125, "grad_norm": 7.85613489151001, "learning_rate": 2.1856565656565657e-06, "loss": 6.2496498107910154, "step": 56725 }, { "epoch": 0.0213, "grad_norm": 4.610645294189453, "learning_rate": 2.1854040404040403e-06, "loss": 6.258939743041992, "step": 56730 }, { "epoch": 0.02135, "grad_norm": 7.149487018585205, "learning_rate": 2.1851515151515154e-06, "loss": 6.211525726318359, "step": 56735 }, { "epoch": 0.0214, "grad_norm": 4.722102642059326, "learning_rate": 2.18489898989899e-06, "loss": 6.253692626953125, "step": 56740 }, { "epoch": 0.02145, "grad_norm": 4.639364719390869, "learning_rate": 2.184646464646465e-06, "loss": 6.255459213256836, "step": 56745 }, { "epoch": 0.0215, "grad_norm": 6.087833881378174, "learning_rate": 2.1843939393939397e-06, "loss": 6.307667541503906, "step": 56750 }, { "epoch": 0.02155, "grad_norm": 6.794540882110596, "learning_rate": 2.1841414141414143e-06, "loss": 6.213315963745117, "step": 56755 }, { "epoch": 0.0216, "grad_norm": 8.968896865844727, "learning_rate": 2.183888888888889e-06, "loss": 6.241004180908203, "step": 56760 }, { "epoch": 0.02165, "grad_norm": 4.902349948883057, "learning_rate": 2.183636363636364e-06, "loss": 6.240460205078125, "step": 56765 }, { "epoch": 0.0217, "grad_norm": 5.027188777923584, "learning_rate": 2.1833838383838387e-06, "loss": 6.2621612548828125, "step": 56770 }, { "epoch": 0.02175, "grad_norm": 5.341808795928955, "learning_rate": 2.1831313131313133e-06, "loss": 6.233772277832031, "step": 56775 }, { "epoch": 0.0218, "grad_norm": 4.725331783294678, "learning_rate": 2.182878787878788e-06, "loss": 6.235933685302735, "step": 56780 }, { "epoch": 0.02185, "grad_norm": 6.237729549407959, "learning_rate": 2.182626262626263e-06, "loss": 6.258322143554688, "step": 56785 }, { "epoch": 0.0219, "grad_norm": 4.94667387008667, "learning_rate": 2.1823737373737376e-06, "loss": 6.332482147216797, "step": 56790 }, { "epoch": 0.02195, "grad_norm": 7.296596527099609, "learning_rate": 2.1821212121212122e-06, "loss": 6.374029922485351, "step": 56795 }, { "epoch": 0.022, "grad_norm": 4.666719436645508, "learning_rate": 2.181868686868687e-06, "loss": 6.319889068603516, "step": 56800 }, { "epoch": 0.02205, "grad_norm": 12.965262413024902, "learning_rate": 2.181616161616162e-06, "loss": 6.236655426025391, "step": 56805 }, { "epoch": 0.0221, "grad_norm": 5.704078674316406, "learning_rate": 2.1813636363636365e-06, "loss": 6.268844985961914, "step": 56810 }, { "epoch": 0.02215, "grad_norm": 7.981620788574219, "learning_rate": 2.181111111111111e-06, "loss": 6.221818161010742, "step": 56815 }, { "epoch": 0.0222, "grad_norm": 8.233094215393066, "learning_rate": 2.180858585858586e-06, "loss": 6.247924041748047, "step": 56820 }, { "epoch": 0.02225, "grad_norm": 4.245344638824463, "learning_rate": 2.180606060606061e-06, "loss": 6.228361129760742, "step": 56825 }, { "epoch": 0.0223, "grad_norm": 5.694827079772949, "learning_rate": 2.1803535353535355e-06, "loss": 6.23225212097168, "step": 56830 }, { "epoch": 0.02235, "grad_norm": 8.137443542480469, "learning_rate": 2.1801010101010105e-06, "loss": 6.299345016479492, "step": 56835 }, { "epoch": 0.0224, "grad_norm": 6.5206828117370605, "learning_rate": 2.1798484848484847e-06, "loss": 6.269544219970703, "step": 56840 }, { "epoch": 0.02245, "grad_norm": 41.435184478759766, "learning_rate": 2.17959595959596e-06, "loss": 6.655552673339844, "step": 56845 }, { "epoch": 0.0225, "grad_norm": 10.843306541442871, "learning_rate": 2.1793434343434344e-06, "loss": 6.3352409362792965, "step": 56850 }, { "epoch": 0.02255, "grad_norm": 3.8337149620056152, "learning_rate": 2.1790909090909095e-06, "loss": 6.231926345825196, "step": 56855 }, { "epoch": 0.0226, "grad_norm": 6.825244426727295, "learning_rate": 2.178838383838384e-06, "loss": 6.279751586914062, "step": 56860 }, { "epoch": 0.02265, "grad_norm": 4.636751174926758, "learning_rate": 2.1785858585858587e-06, "loss": 6.282017517089844, "step": 56865 }, { "epoch": 0.0227, "grad_norm": 8.680952072143555, "learning_rate": 2.1783333333333334e-06, "loss": 6.2662353515625, "step": 56870 }, { "epoch": 0.02275, "grad_norm": 34.41004943847656, "learning_rate": 2.1780808080808084e-06, "loss": 6.237017059326172, "step": 56875 }, { "epoch": 0.0228, "grad_norm": 11.168266296386719, "learning_rate": 2.177828282828283e-06, "loss": 6.245069122314453, "step": 56880 }, { "epoch": 0.02285, "grad_norm": 4.659790515899658, "learning_rate": 2.1775757575757577e-06, "loss": 6.251874542236328, "step": 56885 }, { "epoch": 0.0229, "grad_norm": 26.789499282836914, "learning_rate": 2.1773232323232323e-06, "loss": 6.320080184936524, "step": 56890 }, { "epoch": 0.02295, "grad_norm": 4.560754776000977, "learning_rate": 2.1770707070707074e-06, "loss": 6.398975372314453, "step": 56895 }, { "epoch": 0.023, "grad_norm": 3.9939608573913574, "learning_rate": 2.176818181818182e-06, "loss": 6.2169548034667965, "step": 56900 }, { "epoch": 0.02305, "grad_norm": 6.74412727355957, "learning_rate": 2.1765656565656566e-06, "loss": 6.235523986816406, "step": 56905 }, { "epoch": 0.0231, "grad_norm": 5.620357513427734, "learning_rate": 2.1763131313131313e-06, "loss": 6.249933242797852, "step": 56910 }, { "epoch": 0.02315, "grad_norm": 5.1259565353393555, "learning_rate": 2.1760606060606063e-06, "loss": 6.2476142883300785, "step": 56915 }, { "epoch": 0.0232, "grad_norm": 6.593298435211182, "learning_rate": 2.175808080808081e-06, "loss": 6.28267822265625, "step": 56920 }, { "epoch": 0.02325, "grad_norm": 4.657433032989502, "learning_rate": 2.1755555555555556e-06, "loss": 6.220175933837891, "step": 56925 }, { "epoch": 0.0233, "grad_norm": 6.058197021484375, "learning_rate": 2.17530303030303e-06, "loss": 6.526991271972657, "step": 56930 }, { "epoch": 0.02335, "grad_norm": 6.339434623718262, "learning_rate": 2.1750505050505053e-06, "loss": 6.349195861816407, "step": 56935 }, { "epoch": 0.0234, "grad_norm": 5.389250755310059, "learning_rate": 2.17479797979798e-06, "loss": 6.232083129882812, "step": 56940 }, { "epoch": 0.02345, "grad_norm": 5.055088520050049, "learning_rate": 2.174545454545455e-06, "loss": 6.231699371337891, "step": 56945 }, { "epoch": 0.0235, "grad_norm": 6.338824272155762, "learning_rate": 2.174292929292929e-06, "loss": 6.251812744140625, "step": 56950 }, { "epoch": 0.02355, "grad_norm": 4.331363201141357, "learning_rate": 2.174040404040404e-06, "loss": 6.251773834228516, "step": 56955 }, { "epoch": 0.0236, "grad_norm": 6.418778896331787, "learning_rate": 2.173787878787879e-06, "loss": 6.249342346191407, "step": 56960 }, { "epoch": 0.02365, "grad_norm": 4.898882865905762, "learning_rate": 2.173535353535354e-06, "loss": 6.273783493041992, "step": 56965 }, { "epoch": 0.0237, "grad_norm": 5.059467792510986, "learning_rate": 2.1732828282828285e-06, "loss": 6.259314346313476, "step": 56970 }, { "epoch": 0.02375, "grad_norm": 9.253665924072266, "learning_rate": 2.173030303030303e-06, "loss": 6.220629119873047, "step": 56975 }, { "epoch": 0.0238, "grad_norm": 3.6291162967681885, "learning_rate": 2.1727777777777778e-06, "loss": 6.2479499816894535, "step": 56980 }, { "epoch": 0.02385, "grad_norm": 5.295804023742676, "learning_rate": 2.172525252525253e-06, "loss": 6.2650402069091795, "step": 56985 }, { "epoch": 0.0239, "grad_norm": 6.930663108825684, "learning_rate": 2.1722727272727275e-06, "loss": 6.296942520141601, "step": 56990 }, { "epoch": 0.02395, "grad_norm": 5.940296173095703, "learning_rate": 2.172020202020202e-06, "loss": 6.2613975524902346, "step": 56995 }, { "epoch": 0.024, "grad_norm": 7.201550483703613, "learning_rate": 2.171767676767677e-06, "loss": 6.259256744384766, "step": 57000 }, { "epoch": 5e-05, "grad_norm": 4.282774925231934, "learning_rate": 2.1715151515151518e-06, "loss": 6.244738388061523, "step": 57005 }, { "epoch": 0.0001, "grad_norm": 7.519387245178223, "learning_rate": 2.1712626262626264e-06, "loss": 6.25084114074707, "step": 57010 }, { "epoch": 0.00015, "grad_norm": 5.746885299682617, "learning_rate": 2.171010101010101e-06, "loss": 6.2451011657714846, "step": 57015 }, { "epoch": 0.0002, "grad_norm": 11.02363395690918, "learning_rate": 2.170757575757576e-06, "loss": 6.199062347412109, "step": 57020 }, { "epoch": 0.00025, "grad_norm": 9.03878116607666, "learning_rate": 2.1705050505050507e-06, "loss": 6.316693115234375, "step": 57025 }, { "epoch": 0.0003, "grad_norm": 9.672118186950684, "learning_rate": 2.1702525252525253e-06, "loss": 6.286570358276367, "step": 57030 }, { "epoch": 0.00035, "grad_norm": 3.7820138931274414, "learning_rate": 2.17e-06, "loss": 6.212762832641602, "step": 57035 }, { "epoch": 0.0004, "grad_norm": 5.979037761688232, "learning_rate": 2.169747474747475e-06, "loss": 6.207011413574219, "step": 57040 }, { "epoch": 0.00045, "grad_norm": 5.479885101318359, "learning_rate": 2.1694949494949497e-06, "loss": 6.279594421386719, "step": 57045 }, { "epoch": 0.0005, "grad_norm": 5.923830509185791, "learning_rate": 2.1692424242424247e-06, "loss": 6.2910301208496096, "step": 57050 }, { "epoch": 0.00055, "grad_norm": 5.095530986785889, "learning_rate": 2.1689898989898993e-06, "loss": 6.247453689575195, "step": 57055 }, { "epoch": 0.0006, "grad_norm": 6.011431694030762, "learning_rate": 2.168737373737374e-06, "loss": 6.269033050537109, "step": 57060 }, { "epoch": 0.00065, "grad_norm": 5.517580032348633, "learning_rate": 2.1684848484848486e-06, "loss": 6.322472000122071, "step": 57065 }, { "epoch": 0.0007, "grad_norm": 10.842967987060547, "learning_rate": 2.1682323232323237e-06, "loss": 6.285588073730469, "step": 57070 }, { "epoch": 0.00075, "grad_norm": 5.932394027709961, "learning_rate": 2.1679797979797983e-06, "loss": 6.2316429138183596, "step": 57075 }, { "epoch": 0.0008, "grad_norm": 9.306964874267578, "learning_rate": 2.167727272727273e-06, "loss": 6.346364593505859, "step": 57080 }, { "epoch": 0.00085, "grad_norm": 7.195550918579102, "learning_rate": 2.1674747474747475e-06, "loss": 6.2651618957519535, "step": 57085 }, { "epoch": 0.0009, "grad_norm": 4.79799222946167, "learning_rate": 2.1672222222222226e-06, "loss": 6.228447723388672, "step": 57090 }, { "epoch": 0.00095, "grad_norm": 5.767879962921143, "learning_rate": 2.1669696969696972e-06, "loss": 6.300688552856445, "step": 57095 }, { "epoch": 0.001, "grad_norm": 5.380273342132568, "learning_rate": 2.166717171717172e-06, "loss": 6.230257797241211, "step": 57100 }, { "epoch": 0.00105, "grad_norm": 6.152207851409912, "learning_rate": 2.1664646464646465e-06, "loss": 6.254022216796875, "step": 57105 }, { "epoch": 0.0011, "grad_norm": 6.509500980377197, "learning_rate": 2.1662121212121215e-06, "loss": 6.201196670532227, "step": 57110 }, { "epoch": 0.00115, "grad_norm": 8.299946784973145, "learning_rate": 2.165959595959596e-06, "loss": 6.311287689208984, "step": 57115 }, { "epoch": 0.0012, "grad_norm": 24.564090728759766, "learning_rate": 2.165707070707071e-06, "loss": 6.282321929931641, "step": 57120 }, { "epoch": 0.00125, "grad_norm": 16.22191047668457, "learning_rate": 2.1654545454545454e-06, "loss": 6.515467834472656, "step": 57125 }, { "epoch": 0.0013, "grad_norm": 24.314109802246094, "learning_rate": 2.1652020202020205e-06, "loss": 6.31835823059082, "step": 57130 }, { "epoch": 0.00135, "grad_norm": 8.00788402557373, "learning_rate": 2.164949494949495e-06, "loss": 6.272206497192383, "step": 57135 }, { "epoch": 0.0014, "grad_norm": 7.461885452270508, "learning_rate": 2.16469696969697e-06, "loss": 6.2249900817871096, "step": 57140 }, { "epoch": 0.00145, "grad_norm": 6.63645076751709, "learning_rate": 2.1644444444444444e-06, "loss": 6.268244934082031, "step": 57145 }, { "epoch": 0.0015, "grad_norm": 4.513003826141357, "learning_rate": 2.1641919191919194e-06, "loss": 6.2727302551269535, "step": 57150 }, { "epoch": 0.00155, "grad_norm": 33.23722839355469, "learning_rate": 2.163939393939394e-06, "loss": 6.170813369750976, "step": 57155 }, { "epoch": 0.0016, "grad_norm": 22.657512664794922, "learning_rate": 2.163686868686869e-06, "loss": 6.066714477539063, "step": 57160 }, { "epoch": 0.00165, "grad_norm": 6.274256706237793, "learning_rate": 2.1634343434343437e-06, "loss": 6.2844280242919925, "step": 57165 }, { "epoch": 0.0017, "grad_norm": 8.483368873596191, "learning_rate": 2.1631818181818184e-06, "loss": 6.244488906860352, "step": 57170 }, { "epoch": 0.00175, "grad_norm": 7.16425895690918, "learning_rate": 2.162929292929293e-06, "loss": 6.271718215942383, "step": 57175 }, { "epoch": 0.0018, "grad_norm": 5.100413799285889, "learning_rate": 2.162676767676768e-06, "loss": 6.33733901977539, "step": 57180 }, { "epoch": 0.00185, "grad_norm": 5.884507179260254, "learning_rate": 2.1624242424242427e-06, "loss": 6.238667297363281, "step": 57185 }, { "epoch": 0.0019, "grad_norm": 20.600208282470703, "learning_rate": 2.1621717171717173e-06, "loss": 6.451020812988281, "step": 57190 }, { "epoch": 0.00195, "grad_norm": 6.312302589416504, "learning_rate": 2.161919191919192e-06, "loss": 6.260741424560547, "step": 57195 }, { "epoch": 0.002, "grad_norm": 27.682859420776367, "learning_rate": 2.161666666666667e-06, "loss": 6.341499328613281, "step": 57200 }, { "epoch": 0.00205, "grad_norm": 11.340911865234375, "learning_rate": 2.1614141414141416e-06, "loss": 6.229825592041015, "step": 57205 }, { "epoch": 0.0021, "grad_norm": 6.9808220863342285, "learning_rate": 2.1611616161616163e-06, "loss": 6.210009765625, "step": 57210 }, { "epoch": 0.00215, "grad_norm": 17.304174423217773, "learning_rate": 2.160909090909091e-06, "loss": 6.111972045898438, "step": 57215 }, { "epoch": 0.0022, "grad_norm": 4.810727119445801, "learning_rate": 2.160656565656566e-06, "loss": 6.286716461181641, "step": 57220 }, { "epoch": 0.00225, "grad_norm": 4.986473083496094, "learning_rate": 2.1604040404040406e-06, "loss": 6.255978393554687, "step": 57225 }, { "epoch": 0.0023, "grad_norm": 6.781486988067627, "learning_rate": 2.160151515151515e-06, "loss": 6.233836364746094, "step": 57230 }, { "epoch": 0.00235, "grad_norm": 7.4454569816589355, "learning_rate": 2.15989898989899e-06, "loss": 6.2422538757324215, "step": 57235 }, { "epoch": 0.0024, "grad_norm": 9.683294296264648, "learning_rate": 2.159646464646465e-06, "loss": 6.337147521972656, "step": 57240 }, { "epoch": 0.00245, "grad_norm": 8.377121925354004, "learning_rate": 2.1593939393939395e-06, "loss": 6.254204177856446, "step": 57245 }, { "epoch": 0.0025, "grad_norm": 23.129545211791992, "learning_rate": 2.1591414141414146e-06, "loss": 6.298862075805664, "step": 57250 }, { "epoch": 0.00255, "grad_norm": 4.146770477294922, "learning_rate": 2.1588888888888888e-06, "loss": 6.278744125366211, "step": 57255 }, { "epoch": 0.0026, "grad_norm": 10.163594245910645, "learning_rate": 2.158636363636364e-06, "loss": 6.346676254272461, "step": 57260 }, { "epoch": 0.00265, "grad_norm": 3.7849700450897217, "learning_rate": 2.1583838383838385e-06, "loss": 6.256533813476563, "step": 57265 }, { "epoch": 0.0027, "grad_norm": 11.052412033081055, "learning_rate": 2.1581313131313135e-06, "loss": 6.335914611816406, "step": 57270 }, { "epoch": 0.00275, "grad_norm": 7.940736293792725, "learning_rate": 2.157878787878788e-06, "loss": 6.355176544189453, "step": 57275 }, { "epoch": 0.0028, "grad_norm": 8.346489906311035, "learning_rate": 2.1576262626262628e-06, "loss": 6.1874237060546875, "step": 57280 }, { "epoch": 0.00285, "grad_norm": 18.960525512695312, "learning_rate": 2.1573737373737374e-06, "loss": 6.405329895019531, "step": 57285 }, { "epoch": 0.0029, "grad_norm": 8.223164558410645, "learning_rate": 2.1571212121212125e-06, "loss": 6.30670394897461, "step": 57290 }, { "epoch": 0.00295, "grad_norm": 4.434844493865967, "learning_rate": 2.156868686868687e-06, "loss": 6.2330169677734375, "step": 57295 }, { "epoch": 0.003, "grad_norm": 7.404333591461182, "learning_rate": 2.1566161616161617e-06, "loss": 6.325947570800781, "step": 57300 }, { "epoch": 0.00305, "grad_norm": 3.484861135482788, "learning_rate": 2.1563636363636364e-06, "loss": 6.250049591064453, "step": 57305 }, { "epoch": 0.0031, "grad_norm": 4.014464378356934, "learning_rate": 2.1561111111111114e-06, "loss": 6.184513092041016, "step": 57310 }, { "epoch": 0.00315, "grad_norm": 5.762004852294922, "learning_rate": 2.155858585858586e-06, "loss": 6.237563705444336, "step": 57315 }, { "epoch": 0.0032, "grad_norm": 9.251988410949707, "learning_rate": 2.1556060606060607e-06, "loss": 6.189826965332031, "step": 57320 }, { "epoch": 0.00325, "grad_norm": 4.959934234619141, "learning_rate": 2.1553535353535353e-06, "loss": 6.258472442626953, "step": 57325 }, { "epoch": 0.0033, "grad_norm": 31.60756492614746, "learning_rate": 2.1551010101010104e-06, "loss": 6.291977310180664, "step": 57330 }, { "epoch": 0.00335, "grad_norm": 14.957826614379883, "learning_rate": 2.154848484848485e-06, "loss": 6.498627471923828, "step": 57335 }, { "epoch": 0.0034, "grad_norm": 35.1486701965332, "learning_rate": 2.1545959595959596e-06, "loss": 6.396542358398437, "step": 57340 }, { "epoch": 0.00345, "grad_norm": 33.19749069213867, "learning_rate": 2.1543434343434342e-06, "loss": 6.316601943969727, "step": 57345 }, { "epoch": 0.0035, "grad_norm": 33.894317626953125, "learning_rate": 2.1540909090909093e-06, "loss": 6.416358947753906, "step": 57350 }, { "epoch": 0.00355, "grad_norm": 24.031370162963867, "learning_rate": 2.153838383838384e-06, "loss": 6.5346221923828125, "step": 57355 }, { "epoch": 0.0036, "grad_norm": 19.11751365661621, "learning_rate": 2.153585858585859e-06, "loss": 6.402764129638672, "step": 57360 }, { "epoch": 0.00365, "grad_norm": 18.106021881103516, "learning_rate": 2.153333333333333e-06, "loss": 6.374507141113281, "step": 57365 }, { "epoch": 0.0037, "grad_norm": 6.729419231414795, "learning_rate": 2.1530808080808082e-06, "loss": 6.296858596801758, "step": 57370 }, { "epoch": 0.00375, "grad_norm": 8.662023544311523, "learning_rate": 2.152828282828283e-06, "loss": 6.248391723632812, "step": 57375 }, { "epoch": 0.0038, "grad_norm": 5.646495342254639, "learning_rate": 2.152575757575758e-06, "loss": 6.348509216308594, "step": 57380 }, { "epoch": 0.00385, "grad_norm": 14.461926460266113, "learning_rate": 2.1523232323232326e-06, "loss": 6.283998107910156, "step": 57385 }, { "epoch": 0.0039, "grad_norm": 6.2899556159973145, "learning_rate": 2.152070707070707e-06, "loss": 6.212150955200196, "step": 57390 }, { "epoch": 0.00395, "grad_norm": 10.758445739746094, "learning_rate": 2.151818181818182e-06, "loss": 6.265097045898438, "step": 57395 }, { "epoch": 0.004, "grad_norm": 9.28139591217041, "learning_rate": 2.151565656565657e-06, "loss": 6.262187194824219, "step": 57400 }, { "epoch": 0.00405, "grad_norm": 8.636842727661133, "learning_rate": 2.1513131313131315e-06, "loss": 6.235008621215821, "step": 57405 }, { "epoch": 0.0041, "grad_norm": 5.5772809982299805, "learning_rate": 2.151060606060606e-06, "loss": 6.237874603271484, "step": 57410 }, { "epoch": 0.00415, "grad_norm": 5.217741012573242, "learning_rate": 2.150808080808081e-06, "loss": 6.21484146118164, "step": 57415 }, { "epoch": 0.0042, "grad_norm": 12.52638053894043, "learning_rate": 2.150555555555556e-06, "loss": 6.247842407226562, "step": 57420 }, { "epoch": 0.00425, "grad_norm": 16.30757713317871, "learning_rate": 2.1503030303030304e-06, "loss": 6.2397621154785154, "step": 57425 }, { "epoch": 0.0043, "grad_norm": 9.998513221740723, "learning_rate": 2.150050505050505e-06, "loss": 6.2782642364501955, "step": 57430 }, { "epoch": 0.00435, "grad_norm": 8.67658519744873, "learning_rate": 2.14979797979798e-06, "loss": 6.277956771850586, "step": 57435 }, { "epoch": 0.0044, "grad_norm": 7.868230819702148, "learning_rate": 2.1495454545454548e-06, "loss": 6.296639633178711, "step": 57440 }, { "epoch": 0.00445, "grad_norm": 7.605706214904785, "learning_rate": 2.1492929292929294e-06, "loss": 6.1597145080566404, "step": 57445 }, { "epoch": 0.0045, "grad_norm": 4.166457176208496, "learning_rate": 2.149040404040404e-06, "loss": 6.206688308715821, "step": 57450 }, { "epoch": 0.00455, "grad_norm": 9.350994110107422, "learning_rate": 2.148787878787879e-06, "loss": 6.327695083618164, "step": 57455 }, { "epoch": 0.0046, "grad_norm": 6.832162857055664, "learning_rate": 2.1485353535353537e-06, "loss": 6.230829238891602, "step": 57460 }, { "epoch": 0.00465, "grad_norm": 5.264383316040039, "learning_rate": 2.1482828282828288e-06, "loss": 6.267271041870117, "step": 57465 }, { "epoch": 0.0047, "grad_norm": 5.63324499130249, "learning_rate": 2.1480303030303034e-06, "loss": 6.2340087890625, "step": 57470 }, { "epoch": 0.00475, "grad_norm": 9.067317962646484, "learning_rate": 2.147777777777778e-06, "loss": 6.221051788330078, "step": 57475 }, { "epoch": 0.0048, "grad_norm": 7.468384742736816, "learning_rate": 2.1475252525252526e-06, "loss": 6.279523086547852, "step": 57480 }, { "epoch": 0.00485, "grad_norm": 6.490098476409912, "learning_rate": 2.1472727272727277e-06, "loss": 6.327327728271484, "step": 57485 }, { "epoch": 0.0049, "grad_norm": 7.579957962036133, "learning_rate": 2.1470202020202023e-06, "loss": 6.2321319580078125, "step": 57490 }, { "epoch": 0.00495, "grad_norm": 6.544208526611328, "learning_rate": 2.146767676767677e-06, "loss": 6.273623657226563, "step": 57495 }, { "epoch": 0.005, "grad_norm": 8.996082305908203, "learning_rate": 2.1465151515151516e-06, "loss": 6.224722290039063, "step": 57500 }, { "epoch": 0.00505, "grad_norm": 5.7146477699279785, "learning_rate": 2.1462626262626266e-06, "loss": 6.30357780456543, "step": 57505 }, { "epoch": 0.0051, "grad_norm": 36.72578048706055, "learning_rate": 2.1460101010101013e-06, "loss": 6.402689361572266, "step": 57510 }, { "epoch": 0.00515, "grad_norm": 5.255067825317383, "learning_rate": 2.145757575757576e-06, "loss": 6.368858337402344, "step": 57515 }, { "epoch": 0.0052, "grad_norm": 13.248156547546387, "learning_rate": 2.1455050505050505e-06, "loss": 6.496542358398438, "step": 57520 }, { "epoch": 0.00525, "grad_norm": 7.759362697601318, "learning_rate": 2.1452525252525256e-06, "loss": 6.242947387695312, "step": 57525 }, { "epoch": 0.0053, "grad_norm": 6.109527587890625, "learning_rate": 2.1450000000000002e-06, "loss": 6.308911895751953, "step": 57530 }, { "epoch": 0.00535, "grad_norm": 18.862545013427734, "learning_rate": 2.144747474747475e-06, "loss": 6.250395584106445, "step": 57535 }, { "epoch": 0.0054, "grad_norm": 9.131087303161621, "learning_rate": 2.1444949494949495e-06, "loss": 6.23051643371582, "step": 57540 }, { "epoch": 0.00545, "grad_norm": 7.014010906219482, "learning_rate": 2.1442424242424245e-06, "loss": 6.241117858886719, "step": 57545 }, { "epoch": 0.0055, "grad_norm": 10.482648849487305, "learning_rate": 2.143989898989899e-06, "loss": 6.255389785766601, "step": 57550 }, { "epoch": 0.00555, "grad_norm": 3.617630958557129, "learning_rate": 2.1437373737373742e-06, "loss": 6.214511871337891, "step": 57555 }, { "epoch": 0.0056, "grad_norm": 6.260634422302246, "learning_rate": 2.1434848484848484e-06, "loss": 6.249582290649414, "step": 57560 }, { "epoch": 0.00565, "grad_norm": 6.730203628540039, "learning_rate": 2.1432323232323235e-06, "loss": 6.240464401245117, "step": 57565 }, { "epoch": 0.0057, "grad_norm": 7.097848892211914, "learning_rate": 2.142979797979798e-06, "loss": 6.223667144775391, "step": 57570 }, { "epoch": 0.00575, "grad_norm": 4.867612838745117, "learning_rate": 2.142727272727273e-06, "loss": 6.212516784667969, "step": 57575 }, { "epoch": 0.0058, "grad_norm": 6.892399787902832, "learning_rate": 2.1424747474747478e-06, "loss": 6.251478576660157, "step": 57580 }, { "epoch": 0.00585, "grad_norm": 5.232946395874023, "learning_rate": 2.1422222222222224e-06, "loss": 6.237205886840821, "step": 57585 }, { "epoch": 0.0059, "grad_norm": 3.583643913269043, "learning_rate": 2.141969696969697e-06, "loss": 6.220135879516602, "step": 57590 }, { "epoch": 0.00595, "grad_norm": 11.475839614868164, "learning_rate": 2.141717171717172e-06, "loss": 6.235987854003906, "step": 57595 }, { "epoch": 0.006, "grad_norm": 6.815097332000732, "learning_rate": 2.1414646464646467e-06, "loss": 6.2451423645019535, "step": 57600 }, { "epoch": 0.00605, "grad_norm": 6.120882034301758, "learning_rate": 2.1412121212121214e-06, "loss": 6.2720947265625, "step": 57605 }, { "epoch": 0.0061, "grad_norm": 5.324512004852295, "learning_rate": 2.140959595959596e-06, "loss": 6.269615936279297, "step": 57610 }, { "epoch": 0.00615, "grad_norm": 5.380223751068115, "learning_rate": 2.140707070707071e-06, "loss": 6.263793182373047, "step": 57615 }, { "epoch": 0.0062, "grad_norm": 7.598857879638672, "learning_rate": 2.1404545454545457e-06, "loss": 6.278217315673828, "step": 57620 }, { "epoch": 0.00625, "grad_norm": 5.8030548095703125, "learning_rate": 2.1402020202020203e-06, "loss": 6.264385986328125, "step": 57625 }, { "epoch": 0.0063, "grad_norm": 6.698049545288086, "learning_rate": 2.139949494949495e-06, "loss": 6.2402996063232425, "step": 57630 }, { "epoch": 0.00635, "grad_norm": 5.1362152099609375, "learning_rate": 2.13969696969697e-06, "loss": 6.247291564941406, "step": 57635 }, { "epoch": 0.0064, "grad_norm": 5.990663528442383, "learning_rate": 2.1394444444444446e-06, "loss": 6.228567886352539, "step": 57640 }, { "epoch": 0.00645, "grad_norm": 4.494534015655518, "learning_rate": 2.1391919191919192e-06, "loss": 6.263867950439453, "step": 57645 }, { "epoch": 0.0065, "grad_norm": 14.586382865905762, "learning_rate": 2.138939393939394e-06, "loss": 6.274836730957031, "step": 57650 }, { "epoch": 0.00655, "grad_norm": 6.153763771057129, "learning_rate": 2.138686868686869e-06, "loss": 6.278524017333984, "step": 57655 }, { "epoch": 0.0066, "grad_norm": 6.498415946960449, "learning_rate": 2.1384343434343436e-06, "loss": 6.265385437011719, "step": 57660 }, { "epoch": 0.00665, "grad_norm": 4.083136558532715, "learning_rate": 2.1381818181818186e-06, "loss": 6.283138275146484, "step": 57665 }, { "epoch": 0.0067, "grad_norm": 13.105978965759277, "learning_rate": 2.137929292929293e-06, "loss": 6.455508422851563, "step": 57670 }, { "epoch": 0.00675, "grad_norm": 5.504289150238037, "learning_rate": 2.137676767676768e-06, "loss": 6.291826629638672, "step": 57675 }, { "epoch": 0.0068, "grad_norm": 5.32451057434082, "learning_rate": 2.1374242424242425e-06, "loss": 6.410931396484375, "step": 57680 }, { "epoch": 0.00685, "grad_norm": 6.495606422424316, "learning_rate": 2.1371717171717176e-06, "loss": 6.199198913574219, "step": 57685 }, { "epoch": 0.0069, "grad_norm": 10.67115306854248, "learning_rate": 2.136919191919192e-06, "loss": 6.219937515258789, "step": 57690 }, { "epoch": 0.00695, "grad_norm": 8.492120742797852, "learning_rate": 2.136666666666667e-06, "loss": 6.275556182861328, "step": 57695 }, { "epoch": 0.007, "grad_norm": 8.735687255859375, "learning_rate": 2.1364141414141415e-06, "loss": 6.279151916503906, "step": 57700 }, { "epoch": 0.00705, "grad_norm": 5.293882369995117, "learning_rate": 2.1361616161616165e-06, "loss": 6.2287555694580075, "step": 57705 }, { "epoch": 0.0071, "grad_norm": 10.498445510864258, "learning_rate": 2.135909090909091e-06, "loss": 6.324429321289062, "step": 57710 }, { "epoch": 0.00715, "grad_norm": 8.062755584716797, "learning_rate": 2.1356565656565658e-06, "loss": 6.288285827636718, "step": 57715 }, { "epoch": 0.0072, "grad_norm": 4.20753812789917, "learning_rate": 2.1354040404040404e-06, "loss": 6.546878051757813, "step": 57720 }, { "epoch": 0.00725, "grad_norm": 5.710077285766602, "learning_rate": 2.1351515151515154e-06, "loss": 6.2677734375, "step": 57725 }, { "epoch": 0.0073, "grad_norm": 8.753872871398926, "learning_rate": 2.13489898989899e-06, "loss": 6.229185485839844, "step": 57730 }, { "epoch": 0.00735, "grad_norm": 10.824677467346191, "learning_rate": 2.1346464646464647e-06, "loss": 6.245944213867188, "step": 57735 }, { "epoch": 0.0074, "grad_norm": 5.482248783111572, "learning_rate": 2.1343939393939393e-06, "loss": 6.298693466186523, "step": 57740 }, { "epoch": 0.00745, "grad_norm": 6.6611223220825195, "learning_rate": 2.1341414141414144e-06, "loss": 6.248198318481445, "step": 57745 }, { "epoch": 0.0075, "grad_norm": 10.734625816345215, "learning_rate": 2.133888888888889e-06, "loss": 6.362401962280273, "step": 57750 }, { "epoch": 0.00755, "grad_norm": 8.503034591674805, "learning_rate": 2.1336363636363637e-06, "loss": 6.232796859741211, "step": 57755 }, { "epoch": 0.0076, "grad_norm": 12.369526863098145, "learning_rate": 2.1333838383838383e-06, "loss": 6.262522888183594, "step": 57760 }, { "epoch": 0.00765, "grad_norm": 8.523037910461426, "learning_rate": 2.1331313131313133e-06, "loss": 6.255483245849609, "step": 57765 }, { "epoch": 0.0077, "grad_norm": 5.391900539398193, "learning_rate": 2.132878787878788e-06, "loss": 6.300029754638672, "step": 57770 }, { "epoch": 0.00775, "grad_norm": 7.900488376617432, "learning_rate": 2.132626262626263e-06, "loss": 6.26008415222168, "step": 57775 }, { "epoch": 0.0078, "grad_norm": 5.793471336364746, "learning_rate": 2.1323737373737372e-06, "loss": 6.2571979522705075, "step": 57780 }, { "epoch": 0.00785, "grad_norm": 3.9351823329925537, "learning_rate": 2.1321212121212123e-06, "loss": 6.242363739013672, "step": 57785 }, { "epoch": 0.0079, "grad_norm": 6.076104164123535, "learning_rate": 2.131868686868687e-06, "loss": 6.226163101196289, "step": 57790 }, { "epoch": 0.00795, "grad_norm": 5.415538787841797, "learning_rate": 2.131616161616162e-06, "loss": 6.226616287231446, "step": 57795 }, { "epoch": 0.008, "grad_norm": 4.780411243438721, "learning_rate": 2.1313636363636366e-06, "loss": 6.226288223266602, "step": 57800 }, { "epoch": 0.00805, "grad_norm": 4.696537971496582, "learning_rate": 2.1311111111111112e-06, "loss": 6.230506515502929, "step": 57805 }, { "epoch": 0.0081, "grad_norm": 10.238042831420898, "learning_rate": 2.130858585858586e-06, "loss": 6.1916648864746096, "step": 57810 }, { "epoch": 0.00815, "grad_norm": 7.5218024253845215, "learning_rate": 2.130606060606061e-06, "loss": 6.2723346710205075, "step": 57815 }, { "epoch": 0.0082, "grad_norm": 5.094533443450928, "learning_rate": 2.1303535353535355e-06, "loss": 6.251858901977539, "step": 57820 }, { "epoch": 0.00825, "grad_norm": 8.665769577026367, "learning_rate": 2.13010101010101e-06, "loss": 6.257060623168945, "step": 57825 }, { "epoch": 0.0083, "grad_norm": 6.849853515625, "learning_rate": 2.129848484848485e-06, "loss": 6.204507827758789, "step": 57830 }, { "epoch": 0.00835, "grad_norm": 7.253697395324707, "learning_rate": 2.12959595959596e-06, "loss": 6.188078308105469, "step": 57835 }, { "epoch": 0.0084, "grad_norm": 13.0919771194458, "learning_rate": 2.1293434343434345e-06, "loss": 6.2707069396972654, "step": 57840 }, { "epoch": 0.00845, "grad_norm": 11.882155418395996, "learning_rate": 2.129090909090909e-06, "loss": 6.1709442138671875, "step": 57845 }, { "epoch": 0.0085, "grad_norm": 4.024233341217041, "learning_rate": 2.128838383838384e-06, "loss": 6.320576095581055, "step": 57850 }, { "epoch": 0.00855, "grad_norm": 6.154964447021484, "learning_rate": 2.128585858585859e-06, "loss": 6.258798599243164, "step": 57855 }, { "epoch": 0.0086, "grad_norm": 8.962190628051758, "learning_rate": 2.128333333333334e-06, "loss": 6.281869506835937, "step": 57860 }, { "epoch": 0.00865, "grad_norm": 10.367720603942871, "learning_rate": 2.128080808080808e-06, "loss": 6.2227012634277346, "step": 57865 }, { "epoch": 0.0087, "grad_norm": 4.704100608825684, "learning_rate": 2.127828282828283e-06, "loss": 6.238421630859375, "step": 57870 }, { "epoch": 0.00875, "grad_norm": 11.986065864562988, "learning_rate": 2.1275757575757577e-06, "loss": 6.219827651977539, "step": 57875 }, { "epoch": 0.0088, "grad_norm": 5.217957019805908, "learning_rate": 2.127323232323233e-06, "loss": 6.2715599060058596, "step": 57880 }, { "epoch": 0.00885, "grad_norm": 4.87359619140625, "learning_rate": 2.1270707070707074e-06, "loss": 6.225852966308594, "step": 57885 }, { "epoch": 0.0089, "grad_norm": 5.804010391235352, "learning_rate": 2.126818181818182e-06, "loss": 6.200912094116211, "step": 57890 }, { "epoch": 0.00895, "grad_norm": 15.18005657196045, "learning_rate": 2.1265656565656567e-06, "loss": 6.240496826171875, "step": 57895 }, { "epoch": 0.009, "grad_norm": 6.341934680938721, "learning_rate": 2.1263131313131317e-06, "loss": 6.22321891784668, "step": 57900 }, { "epoch": 0.00905, "grad_norm": 5.750938892364502, "learning_rate": 2.1260606060606064e-06, "loss": 6.22126350402832, "step": 57905 }, { "epoch": 0.0091, "grad_norm": 5.308298110961914, "learning_rate": 2.125808080808081e-06, "loss": 6.236571884155273, "step": 57910 }, { "epoch": 0.00915, "grad_norm": 7.710617542266846, "learning_rate": 2.1255555555555556e-06, "loss": 6.209450149536133, "step": 57915 }, { "epoch": 0.0092, "grad_norm": 4.5286173820495605, "learning_rate": 2.1253030303030307e-06, "loss": 6.262691879272461, "step": 57920 }, { "epoch": 0.00925, "grad_norm": 18.392377853393555, "learning_rate": 2.1250505050505053e-06, "loss": 6.306187438964844, "step": 57925 }, { "epoch": 0.0093, "grad_norm": 6.770992755889893, "learning_rate": 2.12479797979798e-06, "loss": 6.2508899688720705, "step": 57930 }, { "epoch": 0.00935, "grad_norm": 4.592305660247803, "learning_rate": 2.1245454545454546e-06, "loss": 6.244648361206055, "step": 57935 }, { "epoch": 0.0094, "grad_norm": 3.8710126876831055, "learning_rate": 2.1242929292929296e-06, "loss": 6.20844612121582, "step": 57940 }, { "epoch": 0.00945, "grad_norm": 7.858886241912842, "learning_rate": 2.1240404040404043e-06, "loss": 6.272440338134766, "step": 57945 }, { "epoch": 0.0095, "grad_norm": 6.074739456176758, "learning_rate": 2.123787878787879e-06, "loss": 6.236060333251953, "step": 57950 }, { "epoch": 0.00955, "grad_norm": 78.62846374511719, "learning_rate": 2.1235353535353535e-06, "loss": 7.238417053222657, "step": 57955 }, { "epoch": 0.0096, "grad_norm": 8.709741592407227, "learning_rate": 2.1232828282828286e-06, "loss": 7.625296020507813, "step": 57960 }, { "epoch": 0.00965, "grad_norm": 5.981181621551514, "learning_rate": 2.123030303030303e-06, "loss": 6.3077842712402346, "step": 57965 }, { "epoch": 0.0097, "grad_norm": 6.159051895141602, "learning_rate": 2.1227777777777783e-06, "loss": 6.213894271850586, "step": 57970 }, { "epoch": 0.00975, "grad_norm": 6.19509220123291, "learning_rate": 2.1225252525252525e-06, "loss": 6.203979873657227, "step": 57975 }, { "epoch": 0.0098, "grad_norm": 6.304594039916992, "learning_rate": 2.1222727272727275e-06, "loss": 6.24237060546875, "step": 57980 }, { "epoch": 0.00985, "grad_norm": 7.9375386238098145, "learning_rate": 2.122020202020202e-06, "loss": 6.317525863647461, "step": 57985 }, { "epoch": 0.0099, "grad_norm": 31.9854679107666, "learning_rate": 2.121767676767677e-06, "loss": 6.314503479003906, "step": 57990 }, { "epoch": 0.00995, "grad_norm": 7.97206974029541, "learning_rate": 2.121515151515152e-06, "loss": 6.247580337524414, "step": 57995 }, { "epoch": 0.01, "grad_norm": 4.957515239715576, "learning_rate": 2.1212626262626265e-06, "loss": 6.286428070068359, "step": 58000 }, { "epoch": 0.01005, "grad_norm": 5.560095310211182, "learning_rate": 2.121010101010101e-06, "loss": 6.210926818847656, "step": 58005 }, { "epoch": 0.0101, "grad_norm": 7.213435173034668, "learning_rate": 2.120757575757576e-06, "loss": 6.2536369323730465, "step": 58010 }, { "epoch": 0.01015, "grad_norm": 7.462596416473389, "learning_rate": 2.1205050505050508e-06, "loss": 6.235035705566406, "step": 58015 }, { "epoch": 0.0102, "grad_norm": 5.113105773925781, "learning_rate": 2.1202525252525254e-06, "loss": 6.2731201171875, "step": 58020 }, { "epoch": 0.01025, "grad_norm": 4.3406171798706055, "learning_rate": 2.12e-06, "loss": 6.26531753540039, "step": 58025 }, { "epoch": 0.0103, "grad_norm": 8.363198280334473, "learning_rate": 2.119747474747475e-06, "loss": 6.224143981933594, "step": 58030 }, { "epoch": 0.01035, "grad_norm": 34.131385803222656, "learning_rate": 2.1194949494949497e-06, "loss": 6.3892566680908205, "step": 58035 }, { "epoch": 0.0104, "grad_norm": 8.165680885314941, "learning_rate": 2.1192424242424243e-06, "loss": 6.258678436279297, "step": 58040 }, { "epoch": 0.01045, "grad_norm": 6.1087493896484375, "learning_rate": 2.118989898989899e-06, "loss": 6.254599761962891, "step": 58045 }, { "epoch": 0.0105, "grad_norm": 9.535125732421875, "learning_rate": 2.118737373737374e-06, "loss": 6.257398986816407, "step": 58050 }, { "epoch": 0.01055, "grad_norm": 9.556593894958496, "learning_rate": 2.1184848484848487e-06, "loss": 6.262100601196289, "step": 58055 }, { "epoch": 0.0106, "grad_norm": 8.248480796813965, "learning_rate": 2.1182323232323233e-06, "loss": 6.241403579711914, "step": 58060 }, { "epoch": 0.01065, "grad_norm": 5.240483283996582, "learning_rate": 2.117979797979798e-06, "loss": 6.224145889282227, "step": 58065 }, { "epoch": 0.0107, "grad_norm": 7.164853572845459, "learning_rate": 2.117727272727273e-06, "loss": 6.270367050170899, "step": 58070 }, { "epoch": 0.01075, "grad_norm": 7.421360969543457, "learning_rate": 2.1174747474747476e-06, "loss": 6.282764434814453, "step": 58075 }, { "epoch": 0.0108, "grad_norm": 9.64595890045166, "learning_rate": 2.1172222222222227e-06, "loss": 6.239479446411133, "step": 58080 }, { "epoch": 0.01085, "grad_norm": 5.243997097015381, "learning_rate": 2.116969696969697e-06, "loss": 6.266201019287109, "step": 58085 }, { "epoch": 0.0109, "grad_norm": 4.614142417907715, "learning_rate": 2.116717171717172e-06, "loss": 6.212181854248047, "step": 58090 }, { "epoch": 0.01095, "grad_norm": 4.7479939460754395, "learning_rate": 2.1164646464646465e-06, "loss": 6.244168853759765, "step": 58095 }, { "epoch": 0.011, "grad_norm": 5.097177982330322, "learning_rate": 2.1162121212121216e-06, "loss": 6.260212707519531, "step": 58100 }, { "epoch": 0.01105, "grad_norm": 3.5909769535064697, "learning_rate": 2.1159595959595962e-06, "loss": 6.284008026123047, "step": 58105 }, { "epoch": 0.0111, "grad_norm": 12.920626640319824, "learning_rate": 2.115707070707071e-06, "loss": 6.310934066772461, "step": 58110 }, { "epoch": 0.01115, "grad_norm": 5.913266658782959, "learning_rate": 2.1154545454545455e-06, "loss": 6.272121429443359, "step": 58115 }, { "epoch": 0.0112, "grad_norm": 9.860183715820312, "learning_rate": 2.1152020202020205e-06, "loss": 6.313437652587891, "step": 58120 }, { "epoch": 0.01125, "grad_norm": 7.044949531555176, "learning_rate": 2.114949494949495e-06, "loss": 6.255780410766602, "step": 58125 }, { "epoch": 0.0113, "grad_norm": 6.33245325088501, "learning_rate": 2.11469696969697e-06, "loss": 6.234459686279297, "step": 58130 }, { "epoch": 0.01135, "grad_norm": 5.069886684417725, "learning_rate": 2.1144444444444444e-06, "loss": 6.256400680541992, "step": 58135 }, { "epoch": 0.0114, "grad_norm": 5.122408866882324, "learning_rate": 2.1141919191919195e-06, "loss": 6.239034271240234, "step": 58140 }, { "epoch": 0.01145, "grad_norm": 6.764098644256592, "learning_rate": 2.113939393939394e-06, "loss": 6.236656951904297, "step": 58145 }, { "epoch": 0.0115, "grad_norm": 4.425897598266602, "learning_rate": 2.1136868686868687e-06, "loss": 6.210929870605469, "step": 58150 }, { "epoch": 0.01155, "grad_norm": 4.856262683868408, "learning_rate": 2.1134343434343434e-06, "loss": 6.279885864257812, "step": 58155 }, { "epoch": 0.0116, "grad_norm": 7.009607315063477, "learning_rate": 2.1131818181818184e-06, "loss": 6.253388977050781, "step": 58160 }, { "epoch": 0.01165, "grad_norm": 6.373064041137695, "learning_rate": 2.112929292929293e-06, "loss": 6.24598274230957, "step": 58165 }, { "epoch": 0.0117, "grad_norm": 6.512892246246338, "learning_rate": 2.1126767676767677e-06, "loss": 6.2370342254638675, "step": 58170 }, { "epoch": 0.01175, "grad_norm": 4.857904434204102, "learning_rate": 2.1124242424242423e-06, "loss": 6.262361145019531, "step": 58175 }, { "epoch": 0.0118, "grad_norm": 5.170569896697998, "learning_rate": 2.1121717171717174e-06, "loss": 6.220446395874023, "step": 58180 }, { "epoch": 0.01185, "grad_norm": 4.993494987487793, "learning_rate": 2.111919191919192e-06, "loss": 6.241411590576172, "step": 58185 }, { "epoch": 0.0119, "grad_norm": 7.719913959503174, "learning_rate": 2.111666666666667e-06, "loss": 6.237577819824219, "step": 58190 }, { "epoch": 0.01195, "grad_norm": 6.527461051940918, "learning_rate": 2.1114141414141413e-06, "loss": 6.240265655517578, "step": 58195 }, { "epoch": 0.012, "grad_norm": 3.570730209350586, "learning_rate": 2.1111616161616163e-06, "loss": 6.244887924194336, "step": 58200 }, { "epoch": 0.01205, "grad_norm": 4.268143177032471, "learning_rate": 2.110909090909091e-06, "loss": 6.27551383972168, "step": 58205 }, { "epoch": 0.0121, "grad_norm": 6.935873031616211, "learning_rate": 2.110656565656566e-06, "loss": 6.308805084228515, "step": 58210 }, { "epoch": 0.01215, "grad_norm": 5.55388879776001, "learning_rate": 2.1104040404040406e-06, "loss": 6.294983673095703, "step": 58215 }, { "epoch": 0.0122, "grad_norm": 5.8228607177734375, "learning_rate": 2.1101515151515153e-06, "loss": 6.289671325683594, "step": 58220 }, { "epoch": 0.01225, "grad_norm": 6.290399074554443, "learning_rate": 2.10989898989899e-06, "loss": 6.179206085205078, "step": 58225 }, { "epoch": 0.0123, "grad_norm": 3.5091724395751953, "learning_rate": 2.109646464646465e-06, "loss": 6.2284423828125, "step": 58230 }, { "epoch": 0.01235, "grad_norm": 7.509869575500488, "learning_rate": 2.1093939393939396e-06, "loss": 6.189321899414063, "step": 58235 }, { "epoch": 0.0124, "grad_norm": 6.210942268371582, "learning_rate": 2.109141414141414e-06, "loss": 6.221383666992187, "step": 58240 }, { "epoch": 0.01245, "grad_norm": 8.037761688232422, "learning_rate": 2.108888888888889e-06, "loss": 6.250811767578125, "step": 58245 }, { "epoch": 0.0125, "grad_norm": 5.191162109375, "learning_rate": 2.108636363636364e-06, "loss": 6.258404541015625, "step": 58250 }, { "epoch": 0.01255, "grad_norm": 5.741734027862549, "learning_rate": 2.1083838383838385e-06, "loss": 6.246728897094727, "step": 58255 }, { "epoch": 0.0126, "grad_norm": 3.6430931091308594, "learning_rate": 2.108131313131313e-06, "loss": 6.233056259155274, "step": 58260 }, { "epoch": 0.01265, "grad_norm": 6.176042556762695, "learning_rate": 2.1078787878787878e-06, "loss": 6.229666900634766, "step": 58265 }, { "epoch": 0.0127, "grad_norm": 4.598489761352539, "learning_rate": 2.107626262626263e-06, "loss": 6.271142196655274, "step": 58270 }, { "epoch": 0.01275, "grad_norm": 4.0911688804626465, "learning_rate": 2.107373737373738e-06, "loss": 6.241697692871094, "step": 58275 }, { "epoch": 0.0128, "grad_norm": 6.526788711547852, "learning_rate": 2.107121212121212e-06, "loss": 6.2447509765625, "step": 58280 }, { "epoch": 0.01285, "grad_norm": 33.78144454956055, "learning_rate": 2.106868686868687e-06, "loss": 6.260301208496093, "step": 58285 }, { "epoch": 0.0129, "grad_norm": 7.0569868087768555, "learning_rate": 2.1066161616161618e-06, "loss": 6.2584999084472654, "step": 58290 }, { "epoch": 0.01295, "grad_norm": 5.797595024108887, "learning_rate": 2.106363636363637e-06, "loss": 6.242031478881836, "step": 58295 }, { "epoch": 0.013, "grad_norm": 15.979517936706543, "learning_rate": 2.1061111111111115e-06, "loss": 6.3183849334716795, "step": 58300 }, { "epoch": 5e-05, "grad_norm": 10.071053504943848, "learning_rate": 2.105858585858586e-06, "loss": 6.331989669799805, "step": 58305 }, { "epoch": 0.0001, "grad_norm": 4.531308174133301, "learning_rate": 2.1056060606060607e-06, "loss": 6.259286499023437, "step": 58310 }, { "epoch": 0.00015, "grad_norm": 8.129968643188477, "learning_rate": 2.1053535353535358e-06, "loss": 6.236580657958984, "step": 58315 }, { "epoch": 0.0002, "grad_norm": 5.623079776763916, "learning_rate": 2.1051010101010104e-06, "loss": 6.240610122680664, "step": 58320 }, { "epoch": 0.00025, "grad_norm": 5.000452995300293, "learning_rate": 2.104848484848485e-06, "loss": 6.316400146484375, "step": 58325 }, { "epoch": 0.0003, "grad_norm": 6.52099084854126, "learning_rate": 2.1045959595959597e-06, "loss": 6.237596893310547, "step": 58330 }, { "epoch": 0.00035, "grad_norm": 7.906565189361572, "learning_rate": 2.1043434343434347e-06, "loss": 6.243443298339844, "step": 58335 }, { "epoch": 0.0004, "grad_norm": 5.749167442321777, "learning_rate": 2.1040909090909094e-06, "loss": 6.255088043212891, "step": 58340 }, { "epoch": 0.00045, "grad_norm": 12.501587867736816, "learning_rate": 2.103838383838384e-06, "loss": 6.276029205322265, "step": 58345 }, { "epoch": 0.0005, "grad_norm": 5.397604465484619, "learning_rate": 2.1035858585858586e-06, "loss": 6.204943084716797, "step": 58350 }, { "epoch": 0.00055, "grad_norm": 3.538440704345703, "learning_rate": 2.1033333333333337e-06, "loss": 6.249106216430664, "step": 58355 }, { "epoch": 0.0006, "grad_norm": 11.428462028503418, "learning_rate": 2.1030808080808083e-06, "loss": 6.31140251159668, "step": 58360 }, { "epoch": 0.00065, "grad_norm": 7.811578750610352, "learning_rate": 2.102828282828283e-06, "loss": 6.223845672607422, "step": 58365 }, { "epoch": 0.0007, "grad_norm": 10.911348342895508, "learning_rate": 2.1025757575757576e-06, "loss": 6.291919708251953, "step": 58370 }, { "epoch": 0.00075, "grad_norm": 7.340371131896973, "learning_rate": 2.1023232323232326e-06, "loss": 6.2730049133300785, "step": 58375 }, { "epoch": 0.0008, "grad_norm": 6.834494590759277, "learning_rate": 2.1020707070707072e-06, "loss": 6.244667816162109, "step": 58380 }, { "epoch": 0.00085, "grad_norm": 5.782610893249512, "learning_rate": 2.1018181818181823e-06, "loss": 6.269546890258789, "step": 58385 }, { "epoch": 0.0009, "grad_norm": 5.7828803062438965, "learning_rate": 2.1015656565656565e-06, "loss": 6.186442565917969, "step": 58390 }, { "epoch": 0.00095, "grad_norm": 6.747575283050537, "learning_rate": 2.1013131313131316e-06, "loss": 6.262683868408203, "step": 58395 }, { "epoch": 0.001, "grad_norm": 5.424884796142578, "learning_rate": 2.101060606060606e-06, "loss": 6.297396087646485, "step": 58400 }, { "epoch": 0.00105, "grad_norm": 7.843175411224365, "learning_rate": 2.1008080808080812e-06, "loss": 6.238264465332032, "step": 58405 }, { "epoch": 0.0011, "grad_norm": 5.421750545501709, "learning_rate": 2.100555555555556e-06, "loss": 6.199758911132813, "step": 58410 }, { "epoch": 0.00115, "grad_norm": 6.446488857269287, "learning_rate": 2.1003030303030305e-06, "loss": 6.298728179931641, "step": 58415 }, { "epoch": 0.0012, "grad_norm": 3.2627923488616943, "learning_rate": 2.100050505050505e-06, "loss": 6.24609489440918, "step": 58420 }, { "epoch": 0.00125, "grad_norm": 6.473231315612793, "learning_rate": 2.09979797979798e-06, "loss": 6.23774299621582, "step": 58425 }, { "epoch": 0.0013, "grad_norm": 8.408305168151855, "learning_rate": 2.099545454545455e-06, "loss": 6.31666259765625, "step": 58430 }, { "epoch": 0.00135, "grad_norm": 4.442447662353516, "learning_rate": 2.0992929292929294e-06, "loss": 6.246646499633789, "step": 58435 }, { "epoch": 0.0014, "grad_norm": 8.77422046661377, "learning_rate": 2.099040404040404e-06, "loss": 6.315263366699218, "step": 58440 }, { "epoch": 0.00145, "grad_norm": 14.109367370605469, "learning_rate": 2.098787878787879e-06, "loss": 6.22675895690918, "step": 58445 }, { "epoch": 0.0015, "grad_norm": 4.954206943511963, "learning_rate": 2.0985353535353538e-06, "loss": 6.265231323242188, "step": 58450 }, { "epoch": 0.00155, "grad_norm": 11.917671203613281, "learning_rate": 2.0982828282828284e-06, "loss": 6.33629035949707, "step": 58455 }, { "epoch": 0.0016, "grad_norm": 5.52129602432251, "learning_rate": 2.098030303030303e-06, "loss": 6.270717620849609, "step": 58460 }, { "epoch": 0.00165, "grad_norm": 13.145471572875977, "learning_rate": 2.097777777777778e-06, "loss": 6.223827743530274, "step": 58465 }, { "epoch": 0.0017, "grad_norm": 11.223716735839844, "learning_rate": 2.0975252525252527e-06, "loss": 6.274269866943359, "step": 58470 }, { "epoch": 0.00175, "grad_norm": 4.794831275939941, "learning_rate": 2.0972727272727273e-06, "loss": 6.240202331542969, "step": 58475 }, { "epoch": 0.0018, "grad_norm": 5.3662567138671875, "learning_rate": 2.097020202020202e-06, "loss": 6.256021118164062, "step": 58480 }, { "epoch": 0.00185, "grad_norm": 6.130589962005615, "learning_rate": 2.096767676767677e-06, "loss": 6.226803207397461, "step": 58485 }, { "epoch": 0.0019, "grad_norm": 5.712375164031982, "learning_rate": 2.0965151515151516e-06, "loss": 6.261801910400391, "step": 58490 }, { "epoch": 0.00195, "grad_norm": 6.38789176940918, "learning_rate": 2.0962626262626267e-06, "loss": 6.222469329833984, "step": 58495 }, { "epoch": 0.002, "grad_norm": 37.815147399902344, "learning_rate": 2.096010101010101e-06, "loss": 6.271954345703125, "step": 58500 }, { "epoch": 0.00205, "grad_norm": 7.986257553100586, "learning_rate": 2.095757575757576e-06, "loss": 6.253636169433594, "step": 58505 }, { "epoch": 0.0021, "grad_norm": 7.071159362792969, "learning_rate": 2.0955050505050506e-06, "loss": 6.305369186401367, "step": 58510 }, { "epoch": 0.00215, "grad_norm": 7.555908203125, "learning_rate": 2.0952525252525256e-06, "loss": 6.225598907470703, "step": 58515 }, { "epoch": 0.0022, "grad_norm": 3.803361177444458, "learning_rate": 2.0950000000000003e-06, "loss": 6.2151538848876955, "step": 58520 }, { "epoch": 0.00225, "grad_norm": 6.78621244430542, "learning_rate": 2.094747474747475e-06, "loss": 6.266167068481446, "step": 58525 }, { "epoch": 0.0023, "grad_norm": 6.25847864151001, "learning_rate": 2.0944949494949495e-06, "loss": 6.268407821655273, "step": 58530 }, { "epoch": 0.00235, "grad_norm": 4.788536548614502, "learning_rate": 2.0942424242424246e-06, "loss": 6.2767993927001955, "step": 58535 }, { "epoch": 0.0024, "grad_norm": 5.617190837860107, "learning_rate": 2.0939898989898992e-06, "loss": 6.296378707885742, "step": 58540 }, { "epoch": 0.00245, "grad_norm": 6.182650566101074, "learning_rate": 2.093737373737374e-06, "loss": 6.2470146179199215, "step": 58545 }, { "epoch": 0.0025, "grad_norm": 6.380931854248047, "learning_rate": 2.0934848484848485e-06, "loss": 6.2162425994873045, "step": 58550 }, { "epoch": 0.00255, "grad_norm": 4.785590171813965, "learning_rate": 2.0932323232323235e-06, "loss": 6.225432968139648, "step": 58555 }, { "epoch": 0.0026, "grad_norm": 4.8684587478637695, "learning_rate": 2.092979797979798e-06, "loss": 6.276217651367188, "step": 58560 }, { "epoch": 0.00265, "grad_norm": 9.265609741210938, "learning_rate": 2.092727272727273e-06, "loss": 6.318246841430664, "step": 58565 }, { "epoch": 0.0027, "grad_norm": 4.073276996612549, "learning_rate": 2.0924747474747474e-06, "loss": 6.27374382019043, "step": 58570 }, { "epoch": 0.00275, "grad_norm": 4.098637104034424, "learning_rate": 2.0922222222222225e-06, "loss": 6.264242935180664, "step": 58575 }, { "epoch": 0.0028, "grad_norm": 11.025843620300293, "learning_rate": 2.091969696969697e-06, "loss": 6.200717163085938, "step": 58580 }, { "epoch": 0.00285, "grad_norm": 21.123376846313477, "learning_rate": 2.0917171717171717e-06, "loss": 6.673307800292969, "step": 58585 }, { "epoch": 0.0029, "grad_norm": 3.5849268436431885, "learning_rate": 2.0914646464646464e-06, "loss": 6.2893516540527346, "step": 58590 }, { "epoch": 0.00295, "grad_norm": 7.158766746520996, "learning_rate": 2.0912121212121214e-06, "loss": 6.263116836547852, "step": 58595 }, { "epoch": 0.003, "grad_norm": 7.203574180603027, "learning_rate": 2.090959595959596e-06, "loss": 6.325869750976563, "step": 58600 }, { "epoch": 0.00305, "grad_norm": 6.050680160522461, "learning_rate": 2.090707070707071e-06, "loss": 6.15042724609375, "step": 58605 }, { "epoch": 0.0031, "grad_norm": 6.012096405029297, "learning_rate": 2.0904545454545453e-06, "loss": 6.265053558349609, "step": 58610 }, { "epoch": 0.00315, "grad_norm": 13.423612594604492, "learning_rate": 2.0902020202020204e-06, "loss": 6.2467914581298825, "step": 58615 }, { "epoch": 0.0032, "grad_norm": 6.9048357009887695, "learning_rate": 2.089949494949495e-06, "loss": 6.247209167480468, "step": 58620 }, { "epoch": 0.00325, "grad_norm": 4.742342948913574, "learning_rate": 2.08969696969697e-06, "loss": 6.272213363647461, "step": 58625 }, { "epoch": 0.0033, "grad_norm": 11.981206893920898, "learning_rate": 2.0894444444444447e-06, "loss": 6.285174560546875, "step": 58630 }, { "epoch": 0.00335, "grad_norm": 3.941469669342041, "learning_rate": 2.0891919191919193e-06, "loss": 6.289917373657227, "step": 58635 }, { "epoch": 0.0034, "grad_norm": 3.8885302543640137, "learning_rate": 2.088939393939394e-06, "loss": 6.273854827880859, "step": 58640 }, { "epoch": 0.00345, "grad_norm": 7.319693565368652, "learning_rate": 2.088686868686869e-06, "loss": 6.4630859375, "step": 58645 }, { "epoch": 0.0035, "grad_norm": 3.7721338272094727, "learning_rate": 2.0884343434343436e-06, "loss": 6.247396087646484, "step": 58650 }, { "epoch": 0.00355, "grad_norm": 7.353837966918945, "learning_rate": 2.0881818181818182e-06, "loss": 6.337646484375, "step": 58655 }, { "epoch": 0.0036, "grad_norm": 4.04203987121582, "learning_rate": 2.087929292929293e-06, "loss": 6.397121429443359, "step": 58660 }, { "epoch": 0.00365, "grad_norm": 6.539156436920166, "learning_rate": 2.087676767676768e-06, "loss": 6.223913955688476, "step": 58665 }, { "epoch": 0.0037, "grad_norm": 5.225399971008301, "learning_rate": 2.0874242424242426e-06, "loss": 6.295248413085938, "step": 58670 }, { "epoch": 0.00375, "grad_norm": 13.436102867126465, "learning_rate": 2.087171717171717e-06, "loss": 6.444306182861328, "step": 58675 }, { "epoch": 0.0038, "grad_norm": 4.631586074829102, "learning_rate": 2.086919191919192e-06, "loss": 6.268806457519531, "step": 58680 }, { "epoch": 0.00385, "grad_norm": 4.335426330566406, "learning_rate": 2.086666666666667e-06, "loss": 6.221486663818359, "step": 58685 }, { "epoch": 0.0039, "grad_norm": 7.3512163162231445, "learning_rate": 2.0864141414141415e-06, "loss": 6.259012985229492, "step": 58690 }, { "epoch": 0.00395, "grad_norm": 7.603573799133301, "learning_rate": 2.086161616161616e-06, "loss": 6.288476181030274, "step": 58695 }, { "epoch": 0.004, "grad_norm": 4.89739990234375, "learning_rate": 2.085909090909091e-06, "loss": 6.317535781860352, "step": 58700 }, { "epoch": 0.00405, "grad_norm": 6.278373718261719, "learning_rate": 2.085656565656566e-06, "loss": 6.239259338378906, "step": 58705 }, { "epoch": 0.0041, "grad_norm": 7.481472492218018, "learning_rate": 2.085404040404041e-06, "loss": 6.2266357421875, "step": 58710 }, { "epoch": 0.00415, "grad_norm": 4.563352584838867, "learning_rate": 2.0851515151515155e-06, "loss": 6.293328857421875, "step": 58715 }, { "epoch": 0.0042, "grad_norm": 5.56203031539917, "learning_rate": 2.08489898989899e-06, "loss": 6.221408081054688, "step": 58720 }, { "epoch": 0.00425, "grad_norm": 21.003725051879883, "learning_rate": 2.0846464646464648e-06, "loss": 6.500498199462891, "step": 58725 }, { "epoch": 0.0043, "grad_norm": 8.92598819732666, "learning_rate": 2.08439393939394e-06, "loss": 6.324872970581055, "step": 58730 }, { "epoch": 0.00435, "grad_norm": 9.277141571044922, "learning_rate": 2.0841414141414144e-06, "loss": 6.2321723937988285, "step": 58735 }, { "epoch": 0.0044, "grad_norm": 14.299290657043457, "learning_rate": 2.083888888888889e-06, "loss": 6.307971572875976, "step": 58740 }, { "epoch": 0.00445, "grad_norm": 9.133995056152344, "learning_rate": 2.0836363636363637e-06, "loss": 6.245074462890625, "step": 58745 }, { "epoch": 0.0045, "grad_norm": 8.481698036193848, "learning_rate": 2.0833838383838388e-06, "loss": 6.1005207061767575, "step": 58750 }, { "epoch": 0.00455, "grad_norm": 4.731527805328369, "learning_rate": 2.0831313131313134e-06, "loss": 6.24229736328125, "step": 58755 }, { "epoch": 0.0046, "grad_norm": 11.300670623779297, "learning_rate": 2.082878787878788e-06, "loss": 6.244376373291016, "step": 58760 }, { "epoch": 0.00465, "grad_norm": 5.929791450500488, "learning_rate": 2.0826262626262627e-06, "loss": 6.259492874145508, "step": 58765 }, { "epoch": 0.0047, "grad_norm": 6.689615726470947, "learning_rate": 2.0823737373737377e-06, "loss": 6.297813415527344, "step": 58770 }, { "epoch": 0.00475, "grad_norm": 3.904517889022827, "learning_rate": 2.0821212121212123e-06, "loss": 6.177570724487305, "step": 58775 }, { "epoch": 0.0048, "grad_norm": 6.369487762451172, "learning_rate": 2.081868686868687e-06, "loss": 6.2642356872558596, "step": 58780 }, { "epoch": 0.00485, "grad_norm": 4.5872039794921875, "learning_rate": 2.0816161616161616e-06, "loss": 6.240995025634765, "step": 58785 }, { "epoch": 0.0049, "grad_norm": 6.004894733428955, "learning_rate": 2.0813636363636366e-06, "loss": 6.245631408691406, "step": 58790 }, { "epoch": 0.00495, "grad_norm": 6.2296366691589355, "learning_rate": 2.0811111111111113e-06, "loss": 6.2500263214111325, "step": 58795 }, { "epoch": 0.005, "grad_norm": 6.170069217681885, "learning_rate": 2.0808585858585863e-06, "loss": 6.241983795166016, "step": 58800 }, { "epoch": 0.00505, "grad_norm": 6.995090007781982, "learning_rate": 2.0806060606060605e-06, "loss": 6.239134979248047, "step": 58805 }, { "epoch": 0.0051, "grad_norm": 5.230010509490967, "learning_rate": 2.0803535353535356e-06, "loss": 6.2425537109375, "step": 58810 }, { "epoch": 0.00515, "grad_norm": 4.865457057952881, "learning_rate": 2.0801010101010102e-06, "loss": 6.296752548217773, "step": 58815 }, { "epoch": 0.0052, "grad_norm": 6.243901252746582, "learning_rate": 2.0798484848484853e-06, "loss": 6.306088256835937, "step": 58820 }, { "epoch": 0.00525, "grad_norm": 6.144627571105957, "learning_rate": 2.07959595959596e-06, "loss": 6.2522834777832035, "step": 58825 }, { "epoch": 0.0053, "grad_norm": 5.954376697540283, "learning_rate": 2.0793434343434345e-06, "loss": 6.229249572753906, "step": 58830 }, { "epoch": 0.00535, "grad_norm": 5.864498615264893, "learning_rate": 2.079090909090909e-06, "loss": 6.24625244140625, "step": 58835 }, { "epoch": 0.0054, "grad_norm": 4.244873523712158, "learning_rate": 2.0788383838383842e-06, "loss": 6.246694183349609, "step": 58840 }, { "epoch": 0.00545, "grad_norm": 4.151030540466309, "learning_rate": 2.078585858585859e-06, "loss": 6.250484848022461, "step": 58845 }, { "epoch": 0.0055, "grad_norm": 7.175971508026123, "learning_rate": 2.0783333333333335e-06, "loss": 6.275516510009766, "step": 58850 }, { "epoch": 0.00555, "grad_norm": 4.066742420196533, "learning_rate": 2.078080808080808e-06, "loss": 6.294928741455078, "step": 58855 }, { "epoch": 0.0056, "grad_norm": 5.917944431304932, "learning_rate": 2.077828282828283e-06, "loss": 6.2820686340332035, "step": 58860 }, { "epoch": 0.00565, "grad_norm": 8.397479057312012, "learning_rate": 2.077575757575758e-06, "loss": 6.257723236083985, "step": 58865 }, { "epoch": 0.0057, "grad_norm": 6.1954803466796875, "learning_rate": 2.0773232323232324e-06, "loss": 6.28545913696289, "step": 58870 }, { "epoch": 0.00575, "grad_norm": 4.7637739181518555, "learning_rate": 2.077070707070707e-06, "loss": 6.259957122802734, "step": 58875 }, { "epoch": 0.0058, "grad_norm": 6.437526226043701, "learning_rate": 2.076818181818182e-06, "loss": 6.227023315429688, "step": 58880 }, { "epoch": 0.00585, "grad_norm": 5.302072048187256, "learning_rate": 2.0765656565656567e-06, "loss": 6.227678680419922, "step": 58885 }, { "epoch": 0.0059, "grad_norm": 14.1445951461792, "learning_rate": 2.0763131313131314e-06, "loss": 6.295816421508789, "step": 58890 }, { "epoch": 0.00595, "grad_norm": 11.199939727783203, "learning_rate": 2.076060606060606e-06, "loss": 6.210978698730469, "step": 58895 }, { "epoch": 0.006, "grad_norm": 7.514898300170898, "learning_rate": 2.075808080808081e-06, "loss": 6.242209625244141, "step": 58900 }, { "epoch": 0.00605, "grad_norm": 14.117866516113281, "learning_rate": 2.0755555555555557e-06, "loss": 6.376760864257813, "step": 58905 }, { "epoch": 0.0061, "grad_norm": 8.217406272888184, "learning_rate": 2.0753030303030307e-06, "loss": 6.203324508666992, "step": 58910 }, { "epoch": 0.00615, "grad_norm": 7.745266437530518, "learning_rate": 2.075050505050505e-06, "loss": 6.230584716796875, "step": 58915 }, { "epoch": 0.0062, "grad_norm": 5.548164367675781, "learning_rate": 2.07479797979798e-06, "loss": 6.29118537902832, "step": 58920 }, { "epoch": 0.00625, "grad_norm": 10.940072059631348, "learning_rate": 2.0745454545454546e-06, "loss": 6.217774200439453, "step": 58925 }, { "epoch": 0.0063, "grad_norm": 5.968781471252441, "learning_rate": 2.0742929292929297e-06, "loss": 6.280405044555664, "step": 58930 }, { "epoch": 0.00635, "grad_norm": 8.195931434631348, "learning_rate": 2.0740404040404043e-06, "loss": 6.245719146728516, "step": 58935 }, { "epoch": 0.0064, "grad_norm": 11.496380805969238, "learning_rate": 2.073787878787879e-06, "loss": 6.2779380798339846, "step": 58940 }, { "epoch": 0.00645, "grad_norm": 7.541276931762695, "learning_rate": 2.0735353535353536e-06, "loss": 6.215398788452148, "step": 58945 }, { "epoch": 0.0065, "grad_norm": 4.501926422119141, "learning_rate": 2.0732828282828286e-06, "loss": 6.245157623291016, "step": 58950 }, { "epoch": 0.00655, "grad_norm": 8.668429374694824, "learning_rate": 2.0730303030303033e-06, "loss": 6.2467296600341795, "step": 58955 }, { "epoch": 0.0066, "grad_norm": 7.481866836547852, "learning_rate": 2.072777777777778e-06, "loss": 6.214098358154297, "step": 58960 }, { "epoch": 0.00665, "grad_norm": 7.55385684967041, "learning_rate": 2.0725252525252525e-06, "loss": 6.257595825195312, "step": 58965 }, { "epoch": 0.0067, "grad_norm": 9.489137649536133, "learning_rate": 2.0722727272727276e-06, "loss": 6.227631378173828, "step": 58970 }, { "epoch": 0.00675, "grad_norm": 4.820658206939697, "learning_rate": 2.072020202020202e-06, "loss": 6.22340087890625, "step": 58975 }, { "epoch": 0.0068, "grad_norm": 5.687729835510254, "learning_rate": 2.071767676767677e-06, "loss": 6.225769805908203, "step": 58980 }, { "epoch": 0.00685, "grad_norm": 10.89376449584961, "learning_rate": 2.0715151515151515e-06, "loss": 6.288202285766602, "step": 58985 }, { "epoch": 0.0069, "grad_norm": 8.388337135314941, "learning_rate": 2.0712626262626265e-06, "loss": 6.239315414428711, "step": 58990 }, { "epoch": 0.00695, "grad_norm": 5.8432722091674805, "learning_rate": 2.071010101010101e-06, "loss": 6.175540924072266, "step": 58995 }, { "epoch": 0.007, "grad_norm": 6.630270957946777, "learning_rate": 2.0707575757575758e-06, "loss": 6.240454864501953, "step": 59000 }, { "epoch": 0.00705, "grad_norm": 7.407249450683594, "learning_rate": 2.0705050505050504e-06, "loss": 6.261737823486328, "step": 59005 }, { "epoch": 0.0071, "grad_norm": 6.996264457702637, "learning_rate": 2.0702525252525255e-06, "loss": 6.208383941650391, "step": 59010 }, { "epoch": 0.00715, "grad_norm": 7.77965784072876, "learning_rate": 2.07e-06, "loss": 6.242526245117188, "step": 59015 }, { "epoch": 0.0072, "grad_norm": 8.68140697479248, "learning_rate": 2.069747474747475e-06, "loss": 6.296956634521484, "step": 59020 }, { "epoch": 0.00725, "grad_norm": 5.560882568359375, "learning_rate": 2.0694949494949493e-06, "loss": 6.243286895751953, "step": 59025 }, { "epoch": 0.0073, "grad_norm": 8.774682998657227, "learning_rate": 2.0692424242424244e-06, "loss": 6.250191497802734, "step": 59030 }, { "epoch": 0.00735, "grad_norm": 6.194063186645508, "learning_rate": 2.068989898989899e-06, "loss": 6.242560195922851, "step": 59035 }, { "epoch": 0.0074, "grad_norm": 10.61329174041748, "learning_rate": 2.068737373737374e-06, "loss": 6.387549972534179, "step": 59040 }, { "epoch": 0.00745, "grad_norm": 6.626562595367432, "learning_rate": 2.0684848484848487e-06, "loss": 6.2502586364746096, "step": 59045 }, { "epoch": 0.0075, "grad_norm": 4.035065174102783, "learning_rate": 2.0682323232323233e-06, "loss": 6.232435607910157, "step": 59050 }, { "epoch": 0.00755, "grad_norm": 7.419530391693115, "learning_rate": 2.067979797979798e-06, "loss": 6.246858215332031, "step": 59055 }, { "epoch": 0.0076, "grad_norm": 6.5832319259643555, "learning_rate": 2.067727272727273e-06, "loss": 6.27789421081543, "step": 59060 }, { "epoch": 0.00765, "grad_norm": 6.354876518249512, "learning_rate": 2.0674747474747477e-06, "loss": 6.240566635131836, "step": 59065 }, { "epoch": 0.0077, "grad_norm": 4.257768154144287, "learning_rate": 2.0672222222222223e-06, "loss": 6.236518096923828, "step": 59070 }, { "epoch": 0.00775, "grad_norm": 8.047410011291504, "learning_rate": 2.066969696969697e-06, "loss": 6.398063659667969, "step": 59075 }, { "epoch": 0.0078, "grad_norm": 6.120869159698486, "learning_rate": 2.066717171717172e-06, "loss": 6.20064697265625, "step": 59080 }, { "epoch": 0.00785, "grad_norm": 4.9740142822265625, "learning_rate": 2.0664646464646466e-06, "loss": 6.226462173461914, "step": 59085 }, { "epoch": 0.0079, "grad_norm": 7.381185531616211, "learning_rate": 2.0662121212121212e-06, "loss": 6.277249908447265, "step": 59090 }, { "epoch": 0.00795, "grad_norm": 7.203022480010986, "learning_rate": 2.065959595959596e-06, "loss": 6.244776916503906, "step": 59095 }, { "epoch": 0.008, "grad_norm": 6.161643028259277, "learning_rate": 2.065707070707071e-06, "loss": 6.271998596191406, "step": 59100 }, { "epoch": 0.00805, "grad_norm": 4.114317893981934, "learning_rate": 2.0654545454545455e-06, "loss": 6.339406585693359, "step": 59105 }, { "epoch": 0.0081, "grad_norm": 5.319577217102051, "learning_rate": 2.06520202020202e-06, "loss": 6.258371353149414, "step": 59110 }, { "epoch": 0.00815, "grad_norm": 7.645069599151611, "learning_rate": 2.064949494949495e-06, "loss": 6.185433578491211, "step": 59115 }, { "epoch": 0.0082, "grad_norm": 5.884204387664795, "learning_rate": 2.06469696969697e-06, "loss": 6.262901306152344, "step": 59120 }, { "epoch": 0.00825, "grad_norm": 5.214514255523682, "learning_rate": 2.064444444444445e-06, "loss": 6.195463943481445, "step": 59125 }, { "epoch": 0.0083, "grad_norm": 7.237389087677002, "learning_rate": 2.0641919191919195e-06, "loss": 6.104991912841797, "step": 59130 }, { "epoch": 0.00835, "grad_norm": 6.023920059204102, "learning_rate": 2.063939393939394e-06, "loss": 6.2403724670410154, "step": 59135 }, { "epoch": 0.0084, "grad_norm": 5.039715766906738, "learning_rate": 2.063686868686869e-06, "loss": 6.185231399536133, "step": 59140 }, { "epoch": 0.00845, "grad_norm": 9.01639175415039, "learning_rate": 2.063434343434344e-06, "loss": 6.261359024047851, "step": 59145 }, { "epoch": 0.0085, "grad_norm": 5.853230953216553, "learning_rate": 2.0631818181818185e-06, "loss": 6.225215148925781, "step": 59150 }, { "epoch": 0.00855, "grad_norm": 7.653721809387207, "learning_rate": 2.062929292929293e-06, "loss": 6.56225357055664, "step": 59155 }, { "epoch": 0.0086, "grad_norm": 4.256361961364746, "learning_rate": 2.0626767676767677e-06, "loss": 6.247796630859375, "step": 59160 }, { "epoch": 0.00865, "grad_norm": 3.34798002243042, "learning_rate": 2.062424242424243e-06, "loss": 6.281182098388672, "step": 59165 }, { "epoch": 0.0087, "grad_norm": 11.940790176391602, "learning_rate": 2.0621717171717174e-06, "loss": 6.456704711914062, "step": 59170 }, { "epoch": 0.00875, "grad_norm": 5.696875095367432, "learning_rate": 2.061919191919192e-06, "loss": 6.37960205078125, "step": 59175 }, { "epoch": 0.0088, "grad_norm": 4.6442484855651855, "learning_rate": 2.0616666666666667e-06, "loss": 6.230566787719726, "step": 59180 }, { "epoch": 0.00885, "grad_norm": 8.425728797912598, "learning_rate": 2.0614141414141417e-06, "loss": 6.189837646484375, "step": 59185 }, { "epoch": 0.0089, "grad_norm": 8.15460205078125, "learning_rate": 2.0611616161616164e-06, "loss": 6.209268951416016, "step": 59190 }, { "epoch": 0.00895, "grad_norm": 4.069056034088135, "learning_rate": 2.060909090909091e-06, "loss": 6.283362579345703, "step": 59195 }, { "epoch": 0.009, "grad_norm": 7.074182033538818, "learning_rate": 2.0606565656565656e-06, "loss": 6.2827095031738285, "step": 59200 }, { "epoch": 0.00905, "grad_norm": 23.030685424804688, "learning_rate": 2.0604040404040407e-06, "loss": 6.443669128417969, "step": 59205 }, { "epoch": 0.0091, "grad_norm": 4.0687994956970215, "learning_rate": 2.0601515151515153e-06, "loss": 6.2302001953125, "step": 59210 }, { "epoch": 0.00915, "grad_norm": 4.404995441436768, "learning_rate": 2.0598989898989904e-06, "loss": 6.183870697021485, "step": 59215 }, { "epoch": 0.0092, "grad_norm": 6.84672212600708, "learning_rate": 2.0596464646464646e-06, "loss": 6.280952072143554, "step": 59220 }, { "epoch": 0.00925, "grad_norm": 14.95130729675293, "learning_rate": 2.0593939393939396e-06, "loss": 6.221119689941406, "step": 59225 }, { "epoch": 0.0093, "grad_norm": 5.483609676361084, "learning_rate": 2.0591414141414143e-06, "loss": 6.461077880859375, "step": 59230 }, { "epoch": 0.00935, "grad_norm": 6.021291732788086, "learning_rate": 2.0588888888888893e-06, "loss": 6.2539722442626955, "step": 59235 }, { "epoch": 0.0094, "grad_norm": 5.072850227355957, "learning_rate": 2.058636363636364e-06, "loss": 6.211588668823242, "step": 59240 }, { "epoch": 0.00945, "grad_norm": 8.354129791259766, "learning_rate": 2.0583838383838386e-06, "loss": 6.215970230102539, "step": 59245 }, { "epoch": 0.0095, "grad_norm": 5.599398612976074, "learning_rate": 2.058131313131313e-06, "loss": 6.245159530639649, "step": 59250 }, { "epoch": 0.00955, "grad_norm": 13.706000328063965, "learning_rate": 2.0578787878787883e-06, "loss": 6.308541870117187, "step": 59255 }, { "epoch": 0.0096, "grad_norm": 6.657070636749268, "learning_rate": 2.057626262626263e-06, "loss": 6.3773963928222654, "step": 59260 }, { "epoch": 0.00965, "grad_norm": 12.861201286315918, "learning_rate": 2.0573737373737375e-06, "loss": 6.297513961791992, "step": 59265 }, { "epoch": 0.0097, "grad_norm": 6.077773571014404, "learning_rate": 2.057121212121212e-06, "loss": 6.253102111816406, "step": 59270 }, { "epoch": 0.00975, "grad_norm": 7.472360134124756, "learning_rate": 2.056868686868687e-06, "loss": 6.310331726074219, "step": 59275 }, { "epoch": 0.0098, "grad_norm": 5.121665954589844, "learning_rate": 2.056616161616162e-06, "loss": 6.230461120605469, "step": 59280 }, { "epoch": 0.00985, "grad_norm": 8.061515808105469, "learning_rate": 2.0563636363636365e-06, "loss": 6.265071868896484, "step": 59285 }, { "epoch": 0.0099, "grad_norm": 8.860408782958984, "learning_rate": 2.056111111111111e-06, "loss": 6.211807632446289, "step": 59290 }, { "epoch": 0.00995, "grad_norm": 24.444046020507812, "learning_rate": 2.055858585858586e-06, "loss": 6.369590377807617, "step": 59295 }, { "epoch": 0.01, "grad_norm": 4.001605033874512, "learning_rate": 2.0556060606060608e-06, "loss": 6.312588882446289, "step": 59300 }, { "epoch": 0.01005, "grad_norm": 6.2984724044799805, "learning_rate": 2.0553535353535354e-06, "loss": 6.410684204101562, "step": 59305 }, { "epoch": 0.0101, "grad_norm": 8.05012321472168, "learning_rate": 2.05510101010101e-06, "loss": 6.2490699768066404, "step": 59310 }, { "epoch": 0.01015, "grad_norm": 5.546839714050293, "learning_rate": 2.054848484848485e-06, "loss": 6.516550445556641, "step": 59315 }, { "epoch": 0.0102, "grad_norm": 8.235722541809082, "learning_rate": 2.0545959595959597e-06, "loss": 6.2799217224121096, "step": 59320 }, { "epoch": 0.01025, "grad_norm": 8.669271469116211, "learning_rate": 2.0543434343434348e-06, "loss": 6.226139831542969, "step": 59325 }, { "epoch": 0.0103, "grad_norm": 11.2813081741333, "learning_rate": 2.054090909090909e-06, "loss": 6.320725631713867, "step": 59330 }, { "epoch": 0.01035, "grad_norm": 18.08547019958496, "learning_rate": 2.053838383838384e-06, "loss": 6.248417663574219, "step": 59335 }, { "epoch": 0.0104, "grad_norm": 5.3775835037231445, "learning_rate": 2.0535858585858587e-06, "loss": 6.2202411651611325, "step": 59340 }, { "epoch": 0.01045, "grad_norm": 4.200712203979492, "learning_rate": 2.0533333333333337e-06, "loss": 6.254697036743164, "step": 59345 }, { "epoch": 0.0105, "grad_norm": 5.73336124420166, "learning_rate": 2.0530808080808084e-06, "loss": 6.25636100769043, "step": 59350 }, { "epoch": 0.01055, "grad_norm": 5.717238426208496, "learning_rate": 2.052828282828283e-06, "loss": 6.251474761962891, "step": 59355 }, { "epoch": 0.0106, "grad_norm": 5.027848720550537, "learning_rate": 2.0525757575757576e-06, "loss": 6.381567001342773, "step": 59360 }, { "epoch": 0.01065, "grad_norm": 5.138001441955566, "learning_rate": 2.0523232323232327e-06, "loss": 6.260155487060547, "step": 59365 }, { "epoch": 0.0107, "grad_norm": 5.387721538543701, "learning_rate": 2.0520707070707073e-06, "loss": 6.319944763183594, "step": 59370 }, { "epoch": 0.01075, "grad_norm": 6.613460063934326, "learning_rate": 2.051818181818182e-06, "loss": 6.251589965820313, "step": 59375 }, { "epoch": 0.0108, "grad_norm": 11.720845222473145, "learning_rate": 2.0515656565656566e-06, "loss": 6.278641891479492, "step": 59380 }, { "epoch": 0.01085, "grad_norm": 4.2058634757995605, "learning_rate": 2.0513131313131316e-06, "loss": 6.293550872802735, "step": 59385 }, { "epoch": 0.0109, "grad_norm": 3.868006706237793, "learning_rate": 2.0510606060606062e-06, "loss": 6.299354553222656, "step": 59390 }, { "epoch": 0.01095, "grad_norm": 4.243855953216553, "learning_rate": 2.050808080808081e-06, "loss": 6.268869781494141, "step": 59395 }, { "epoch": 0.011, "grad_norm": 7.821079730987549, "learning_rate": 2.0505555555555555e-06, "loss": 6.215383911132813, "step": 59400 }, { "epoch": 0.01105, "grad_norm": 4.728719234466553, "learning_rate": 2.0503030303030306e-06, "loss": 6.283706283569336, "step": 59405 }, { "epoch": 0.0111, "grad_norm": 7.089285850524902, "learning_rate": 2.050050505050505e-06, "loss": 6.2631370544433596, "step": 59410 }, { "epoch": 0.01115, "grad_norm": 5.649655818939209, "learning_rate": 2.04979797979798e-06, "loss": 6.2102203369140625, "step": 59415 }, { "epoch": 0.0112, "grad_norm": 7.316986560821533, "learning_rate": 2.0495454545454544e-06, "loss": 6.271542739868164, "step": 59420 }, { "epoch": 0.01125, "grad_norm": 10.58299446105957, "learning_rate": 2.0492929292929295e-06, "loss": 6.2690174102783205, "step": 59425 }, { "epoch": 0.0113, "grad_norm": 7.133983612060547, "learning_rate": 2.049040404040404e-06, "loss": 6.2369026184082035, "step": 59430 }, { "epoch": 0.01135, "grad_norm": 3.673866033554077, "learning_rate": 2.048787878787879e-06, "loss": 6.2385498046875, "step": 59435 }, { "epoch": 0.0114, "grad_norm": 5.4545369148254395, "learning_rate": 2.048535353535354e-06, "loss": 6.269493865966797, "step": 59440 }, { "epoch": 0.01145, "grad_norm": 4.063849925994873, "learning_rate": 2.0482828282828284e-06, "loss": 6.26872673034668, "step": 59445 }, { "epoch": 0.0115, "grad_norm": 4.720810413360596, "learning_rate": 2.048030303030303e-06, "loss": 6.230175018310547, "step": 59450 }, { "epoch": 0.01155, "grad_norm": 3.442251443862915, "learning_rate": 2.047777777777778e-06, "loss": 6.26927490234375, "step": 59455 }, { "epoch": 0.0116, "grad_norm": 3.4552061557769775, "learning_rate": 2.0475252525252528e-06, "loss": 6.241130828857422, "step": 59460 }, { "epoch": 0.01165, "grad_norm": 7.5036301612854, "learning_rate": 2.0472727272727274e-06, "loss": 6.283591461181641, "step": 59465 }, { "epoch": 0.0117, "grad_norm": 6.505871295928955, "learning_rate": 2.047020202020202e-06, "loss": 6.247476196289062, "step": 59470 }, { "epoch": 0.01175, "grad_norm": 4.837193965911865, "learning_rate": 2.046767676767677e-06, "loss": 6.295603561401367, "step": 59475 }, { "epoch": 0.0118, "grad_norm": 7.26710844039917, "learning_rate": 2.0465151515151517e-06, "loss": 6.306903076171875, "step": 59480 }, { "epoch": 0.01185, "grad_norm": 10.514891624450684, "learning_rate": 2.0462626262626263e-06, "loss": 6.236568832397461, "step": 59485 }, { "epoch": 0.0119, "grad_norm": 7.005358695983887, "learning_rate": 2.046010101010101e-06, "loss": 6.236373519897461, "step": 59490 }, { "epoch": 0.01195, "grad_norm": 8.819540023803711, "learning_rate": 2.045757575757576e-06, "loss": 6.158717346191406, "step": 59495 }, { "epoch": 0.012, "grad_norm": 14.289708137512207, "learning_rate": 2.0455050505050506e-06, "loss": 6.3417106628417965, "step": 59500 }, { "epoch": 0.01205, "grad_norm": 29.90252113342285, "learning_rate": 2.0452525252525253e-06, "loss": 6.445090484619141, "step": 59505 }, { "epoch": 0.0121, "grad_norm": 4.061923980712891, "learning_rate": 2.045e-06, "loss": 6.270865631103516, "step": 59510 }, { "epoch": 0.01215, "grad_norm": 4.928311347961426, "learning_rate": 2.044747474747475e-06, "loss": 6.197664642333985, "step": 59515 }, { "epoch": 0.0122, "grad_norm": 7.304781913757324, "learning_rate": 2.0444949494949496e-06, "loss": 6.371374130249023, "step": 59520 }, { "epoch": 0.01225, "grad_norm": 9.379595756530762, "learning_rate": 2.0442424242424242e-06, "loss": 6.230873870849609, "step": 59525 }, { "epoch": 0.0123, "grad_norm": 9.393119812011719, "learning_rate": 2.043989898989899e-06, "loss": 6.2115520477294925, "step": 59530 }, { "epoch": 0.01235, "grad_norm": 8.150581359863281, "learning_rate": 2.043737373737374e-06, "loss": 6.252452850341797, "step": 59535 }, { "epoch": 0.0124, "grad_norm": 4.230960845947266, "learning_rate": 2.0434848484848485e-06, "loss": 6.222021865844726, "step": 59540 }, { "epoch": 0.01245, "grad_norm": 7.788488864898682, "learning_rate": 2.0432323232323236e-06, "loss": 6.220973205566406, "step": 59545 }, { "epoch": 0.0125, "grad_norm": 5.754029273986816, "learning_rate": 2.0429797979797982e-06, "loss": 6.228139495849609, "step": 59550 }, { "epoch": 0.01255, "grad_norm": 16.65953826904297, "learning_rate": 2.042727272727273e-06, "loss": 6.35261344909668, "step": 59555 }, { "epoch": 0.0126, "grad_norm": 7.600273132324219, "learning_rate": 2.042474747474748e-06, "loss": 6.2491302490234375, "step": 59560 }, { "epoch": 0.01265, "grad_norm": 7.213146686553955, "learning_rate": 2.0422222222222225e-06, "loss": 6.26468505859375, "step": 59565 }, { "epoch": 0.0127, "grad_norm": 10.1974458694458, "learning_rate": 2.041969696969697e-06, "loss": 6.288265228271484, "step": 59570 }, { "epoch": 0.01275, "grad_norm": 8.215903282165527, "learning_rate": 2.0417171717171718e-06, "loss": 6.201254272460938, "step": 59575 }, { "epoch": 0.0128, "grad_norm": 4.651272773742676, "learning_rate": 2.041464646464647e-06, "loss": 6.311289596557617, "step": 59580 }, { "epoch": 0.01285, "grad_norm": 4.360549449920654, "learning_rate": 2.0412121212121215e-06, "loss": 6.23725700378418, "step": 59585 }, { "epoch": 0.0129, "grad_norm": 8.401599884033203, "learning_rate": 2.040959595959596e-06, "loss": 6.222241973876953, "step": 59590 }, { "epoch": 0.01295, "grad_norm": 8.613070487976074, "learning_rate": 2.0407070707070707e-06, "loss": 6.30540657043457, "step": 59595 }, { "epoch": 0.013, "grad_norm": 7.509093761444092, "learning_rate": 2.0404545454545458e-06, "loss": 6.280400085449219, "step": 59600 }, { "epoch": 5e-05, "grad_norm": 4.9458746910095215, "learning_rate": 2.0402020202020204e-06, "loss": 6.245373916625977, "step": 59605 }, { "epoch": 0.0001, "grad_norm": 6.579072952270508, "learning_rate": 2.039949494949495e-06, "loss": 6.235141754150391, "step": 59610 }, { "epoch": 0.00015, "grad_norm": 6.200655460357666, "learning_rate": 2.0396969696969697e-06, "loss": 6.201121139526367, "step": 59615 }, { "epoch": 0.0002, "grad_norm": 6.340612888336182, "learning_rate": 2.0394444444444447e-06, "loss": 6.235136413574219, "step": 59620 }, { "epoch": 0.00025, "grad_norm": 5.245672702789307, "learning_rate": 2.0391919191919194e-06, "loss": 6.287697601318359, "step": 59625 }, { "epoch": 0.0003, "grad_norm": 3.7286078929901123, "learning_rate": 2.0389393939393944e-06, "loss": 6.259527587890625, "step": 59630 }, { "epoch": 0.00035, "grad_norm": 5.604214191436768, "learning_rate": 2.0386868686868686e-06, "loss": 6.213122177124023, "step": 59635 }, { "epoch": 0.0004, "grad_norm": 6.735153675079346, "learning_rate": 2.0384343434343437e-06, "loss": 6.250712966918945, "step": 59640 }, { "epoch": 0.00045, "grad_norm": 5.005904197692871, "learning_rate": 2.0381818181818183e-06, "loss": 6.2330787658691404, "step": 59645 }, { "epoch": 0.0005, "grad_norm": 5.3076348304748535, "learning_rate": 2.0379292929292934e-06, "loss": 6.229384994506836, "step": 59650 }, { "epoch": 0.00055, "grad_norm": 6.4799323081970215, "learning_rate": 2.037676767676768e-06, "loss": 6.235152053833008, "step": 59655 }, { "epoch": 0.0006, "grad_norm": 8.953072547912598, "learning_rate": 2.0374242424242426e-06, "loss": 6.2888744354248045, "step": 59660 }, { "epoch": 0.00065, "grad_norm": 6.9857258796691895, "learning_rate": 2.0371717171717172e-06, "loss": 6.2419891357421875, "step": 59665 }, { "epoch": 0.0007, "grad_norm": 16.383989334106445, "learning_rate": 2.0369191919191923e-06, "loss": 6.313602447509766, "step": 59670 }, { "epoch": 0.00075, "grad_norm": 4.819504261016846, "learning_rate": 2.036666666666667e-06, "loss": 6.3113140106201175, "step": 59675 }, { "epoch": 0.0008, "grad_norm": 4.399473667144775, "learning_rate": 2.0364141414141416e-06, "loss": 6.238159942626953, "step": 59680 }, { "epoch": 0.00085, "grad_norm": 15.481772422790527, "learning_rate": 2.036161616161616e-06, "loss": 6.225191116333008, "step": 59685 }, { "epoch": 0.0009, "grad_norm": 6.649672031402588, "learning_rate": 2.0359090909090912e-06, "loss": 6.225462341308594, "step": 59690 }, { "epoch": 0.00095, "grad_norm": 3.5958218574523926, "learning_rate": 2.035656565656566e-06, "loss": 6.281636428833008, "step": 59695 }, { "epoch": 0.001, "grad_norm": 8.453826904296875, "learning_rate": 2.0354040404040405e-06, "loss": 6.178199386596679, "step": 59700 }, { "epoch": 0.00105, "grad_norm": 8.893611907958984, "learning_rate": 2.035151515151515e-06, "loss": 6.227024841308594, "step": 59705 }, { "epoch": 0.0011, "grad_norm": 8.087532043457031, "learning_rate": 2.03489898989899e-06, "loss": 6.222898483276367, "step": 59710 }, { "epoch": 0.00115, "grad_norm": 9.734390258789062, "learning_rate": 2.034646464646465e-06, "loss": 6.226521301269531, "step": 59715 }, { "epoch": 0.0012, "grad_norm": 6.947019577026367, "learning_rate": 2.0343939393939394e-06, "loss": 6.249479675292969, "step": 59720 }, { "epoch": 0.00125, "grad_norm": 6.1662821769714355, "learning_rate": 2.034141414141414e-06, "loss": 6.256335830688476, "step": 59725 }, { "epoch": 0.0013, "grad_norm": 13.549443244934082, "learning_rate": 2.033888888888889e-06, "loss": 6.271659851074219, "step": 59730 }, { "epoch": 0.00135, "grad_norm": 6.757068634033203, "learning_rate": 2.0336363636363638e-06, "loss": 6.226794052124023, "step": 59735 }, { "epoch": 0.0014, "grad_norm": 4.839574337005615, "learning_rate": 2.033383838383839e-06, "loss": 6.226781845092773, "step": 59740 }, { "epoch": 0.00145, "grad_norm": 7.421070098876953, "learning_rate": 2.033131313131313e-06, "loss": 6.244200134277344, "step": 59745 }, { "epoch": 0.0015, "grad_norm": 5.935271263122559, "learning_rate": 2.032878787878788e-06, "loss": 6.2296100616455075, "step": 59750 }, { "epoch": 0.00155, "grad_norm": 6.38646936416626, "learning_rate": 2.0326262626262627e-06, "loss": 6.279094314575195, "step": 59755 }, { "epoch": 0.0016, "grad_norm": 5.865725040435791, "learning_rate": 2.0323737373737378e-06, "loss": 6.2385303497314455, "step": 59760 }, { "epoch": 0.00165, "grad_norm": 9.36620044708252, "learning_rate": 2.0321212121212124e-06, "loss": 6.243671035766601, "step": 59765 }, { "epoch": 0.0017, "grad_norm": 5.349783420562744, "learning_rate": 2.031868686868687e-06, "loss": 6.22540512084961, "step": 59770 }, { "epoch": 0.00175, "grad_norm": 14.827681541442871, "learning_rate": 2.0316161616161617e-06, "loss": 6.603054046630859, "step": 59775 }, { "epoch": 0.0018, "grad_norm": 10.38107967376709, "learning_rate": 2.0313636363636367e-06, "loss": 6.379862976074219, "step": 59780 }, { "epoch": 0.00185, "grad_norm": 7.574892520904541, "learning_rate": 2.0311111111111113e-06, "loss": 6.2152149200439455, "step": 59785 }, { "epoch": 0.0019, "grad_norm": 3.2589337825775146, "learning_rate": 2.030858585858586e-06, "loss": 6.238515090942383, "step": 59790 }, { "epoch": 0.00195, "grad_norm": 8.448206901550293, "learning_rate": 2.0306060606060606e-06, "loss": 6.222703170776367, "step": 59795 }, { "epoch": 0.002, "grad_norm": 8.218720436096191, "learning_rate": 2.0303535353535356e-06, "loss": 6.2609916687011715, "step": 59800 }, { "epoch": 0.00205, "grad_norm": 4.718255519866943, "learning_rate": 2.0301010101010103e-06, "loss": 6.2771343231201175, "step": 59805 }, { "epoch": 0.0021, "grad_norm": 6.30755615234375, "learning_rate": 2.029848484848485e-06, "loss": 6.275223922729492, "step": 59810 }, { "epoch": 0.00215, "grad_norm": 6.164546966552734, "learning_rate": 2.0295959595959595e-06, "loss": 6.3953086853027346, "step": 59815 }, { "epoch": 0.0022, "grad_norm": 5.780943393707275, "learning_rate": 2.0293434343434346e-06, "loss": 6.257404708862305, "step": 59820 }, { "epoch": 0.00225, "grad_norm": 3.8344035148620605, "learning_rate": 2.0290909090909092e-06, "loss": 6.263419342041016, "step": 59825 }, { "epoch": 0.0023, "grad_norm": 7.261663436889648, "learning_rate": 2.028838383838384e-06, "loss": 6.238826751708984, "step": 59830 }, { "epoch": 0.00235, "grad_norm": 6.593562602996826, "learning_rate": 2.0285858585858585e-06, "loss": 6.257351684570312, "step": 59835 }, { "epoch": 0.0024, "grad_norm": 12.700066566467285, "learning_rate": 2.0283333333333335e-06, "loss": 6.240587615966797, "step": 59840 }, { "epoch": 0.00245, "grad_norm": 10.010307312011719, "learning_rate": 2.028080808080808e-06, "loss": 6.2245220184326175, "step": 59845 }, { "epoch": 0.0025, "grad_norm": 8.966395378112793, "learning_rate": 2.0278282828282832e-06, "loss": 6.241418075561524, "step": 59850 }, { "epoch": 0.00255, "grad_norm": 4.632090091705322, "learning_rate": 2.027575757575758e-06, "loss": 6.203256225585937, "step": 59855 }, { "epoch": 0.0026, "grad_norm": 4.753121852874756, "learning_rate": 2.0273232323232325e-06, "loss": 6.282467651367187, "step": 59860 }, { "epoch": 0.00265, "grad_norm": 7.422281265258789, "learning_rate": 2.027070707070707e-06, "loss": 6.284918975830078, "step": 59865 }, { "epoch": 0.0027, "grad_norm": 4.985781192779541, "learning_rate": 2.026818181818182e-06, "loss": 6.249455261230469, "step": 59870 }, { "epoch": 0.00275, "grad_norm": 6.608810901641846, "learning_rate": 2.026565656565657e-06, "loss": 6.2374622344970705, "step": 59875 }, { "epoch": 0.0028, "grad_norm": 4.5697340965271, "learning_rate": 2.0263131313131314e-06, "loss": 6.282121276855468, "step": 59880 }, { "epoch": 0.00285, "grad_norm": 12.852723121643066, "learning_rate": 2.026060606060606e-06, "loss": 6.432363128662109, "step": 59885 }, { "epoch": 0.0029, "grad_norm": 4.338818073272705, "learning_rate": 2.025808080808081e-06, "loss": 6.270924758911133, "step": 59890 }, { "epoch": 0.00295, "grad_norm": 6.760826587677002, "learning_rate": 2.0255555555555557e-06, "loss": 6.252292251586914, "step": 59895 }, { "epoch": 0.003, "grad_norm": 77.43193817138672, "learning_rate": 2.0253030303030304e-06, "loss": 9.314897918701172, "step": 59900 }, { "epoch": 0.00305, "grad_norm": 21.71402931213379, "learning_rate": 2.025050505050505e-06, "loss": 9.21207275390625, "step": 59905 }, { "epoch": 0.0031, "grad_norm": 5.709657669067383, "learning_rate": 2.02479797979798e-06, "loss": 6.2602581024169925, "step": 59910 }, { "epoch": 0.00315, "grad_norm": 5.010549545288086, "learning_rate": 2.0245454545454547e-06, "loss": 6.288776779174805, "step": 59915 }, { "epoch": 0.0032, "grad_norm": 6.869956970214844, "learning_rate": 2.0242929292929293e-06, "loss": 6.2321117401123045, "step": 59920 }, { "epoch": 0.00325, "grad_norm": 13.989686012268066, "learning_rate": 2.024040404040404e-06, "loss": 6.50732650756836, "step": 59925 }, { "epoch": 0.0033, "grad_norm": 9.272558212280273, "learning_rate": 2.023787878787879e-06, "loss": 6.274212646484375, "step": 59930 }, { "epoch": 0.00335, "grad_norm": 7.050143241882324, "learning_rate": 2.0235353535353536e-06, "loss": 6.311806106567383, "step": 59935 }, { "epoch": 0.0034, "grad_norm": 7.0291924476623535, "learning_rate": 2.0232828282828283e-06, "loss": 6.214493179321289, "step": 59940 }, { "epoch": 0.00345, "grad_norm": 5.257014751434326, "learning_rate": 2.023030303030303e-06, "loss": 6.243858337402344, "step": 59945 }, { "epoch": 0.0035, "grad_norm": 5.125995635986328, "learning_rate": 2.022777777777778e-06, "loss": 6.2663532257080075, "step": 59950 }, { "epoch": 0.00355, "grad_norm": 8.277807235717773, "learning_rate": 2.0225252525252526e-06, "loss": 6.26971435546875, "step": 59955 }, { "epoch": 0.0036, "grad_norm": 4.126095771789551, "learning_rate": 2.0222727272727276e-06, "loss": 6.244985198974609, "step": 59960 }, { "epoch": 0.00365, "grad_norm": 7.981186389923096, "learning_rate": 2.0220202020202023e-06, "loss": 6.218435668945313, "step": 59965 }, { "epoch": 0.0037, "grad_norm": 8.124320983886719, "learning_rate": 2.021767676767677e-06, "loss": 6.247775268554688, "step": 59970 }, { "epoch": 0.00375, "grad_norm": 8.51883316040039, "learning_rate": 2.021515151515152e-06, "loss": 6.274928283691406, "step": 59975 }, { "epoch": 0.0038, "grad_norm": 6.059136390686035, "learning_rate": 2.0212626262626266e-06, "loss": 6.3264610290527346, "step": 59980 }, { "epoch": 0.00385, "grad_norm": 7.581604480743408, "learning_rate": 2.021010101010101e-06, "loss": 6.245943450927735, "step": 59985 }, { "epoch": 0.0039, "grad_norm": 16.726388931274414, "learning_rate": 2.020757575757576e-06, "loss": 6.241855621337891, "step": 59990 }, { "epoch": 0.00395, "grad_norm": 7.465245246887207, "learning_rate": 2.020505050505051e-06, "loss": 6.260879135131836, "step": 59995 }, { "epoch": 0.004, "grad_norm": 4.996988773345947, "learning_rate": 2.0202525252525255e-06, "loss": 6.261257934570312, "step": 60000 }, { "epoch": 0.00405, "grad_norm": 6.171202659606934, "learning_rate": 2.02e-06, "loss": 6.222555923461914, "step": 60005 }, { "epoch": 0.0041, "grad_norm": 7.252633094787598, "learning_rate": 2.0197474747474748e-06, "loss": 6.287104797363281, "step": 60010 }, { "epoch": 0.00415, "grad_norm": 6.054281234741211, "learning_rate": 2.01949494949495e-06, "loss": 6.177817535400391, "step": 60015 }, { "epoch": 0.0042, "grad_norm": 9.523404121398926, "learning_rate": 2.0192424242424245e-06, "loss": 6.288098907470703, "step": 60020 }, { "epoch": 0.00425, "grad_norm": 12.537303924560547, "learning_rate": 2.018989898989899e-06, "loss": 6.329929351806641, "step": 60025 }, { "epoch": 0.0043, "grad_norm": 11.161575317382812, "learning_rate": 2.0187373737373737e-06, "loss": 6.269992065429688, "step": 60030 }, { "epoch": 0.00435, "grad_norm": 6.9387593269348145, "learning_rate": 2.0184848484848488e-06, "loss": 6.25155029296875, "step": 60035 }, { "epoch": 0.0044, "grad_norm": 6.724871635437012, "learning_rate": 2.0182323232323234e-06, "loss": 6.301598739624024, "step": 60040 }, { "epoch": 0.00445, "grad_norm": 4.521924018859863, "learning_rate": 2.0179797979797985e-06, "loss": 6.236788558959961, "step": 60045 }, { "epoch": 0.0045, "grad_norm": 13.769444465637207, "learning_rate": 2.0177272727272727e-06, "loss": 6.416142272949219, "step": 60050 }, { "epoch": 0.00455, "grad_norm": 10.463642120361328, "learning_rate": 2.0174747474747477e-06, "loss": 6.235598754882813, "step": 60055 }, { "epoch": 0.0046, "grad_norm": 6.326048851013184, "learning_rate": 2.0172222222222223e-06, "loss": 6.173157501220703, "step": 60060 }, { "epoch": 0.00465, "grad_norm": 6.822091102600098, "learning_rate": 2.0169696969696974e-06, "loss": 6.33012809753418, "step": 60065 }, { "epoch": 0.0047, "grad_norm": 5.950543403625488, "learning_rate": 2.016717171717172e-06, "loss": 6.277599716186524, "step": 60070 }, { "epoch": 0.00475, "grad_norm": 6.905148029327393, "learning_rate": 2.0164646464646467e-06, "loss": 6.243090057373047, "step": 60075 }, { "epoch": 0.0048, "grad_norm": 8.327091217041016, "learning_rate": 2.0162121212121213e-06, "loss": 6.226247024536133, "step": 60080 }, { "epoch": 0.00485, "grad_norm": 10.265803337097168, "learning_rate": 2.0159595959595963e-06, "loss": 6.230131530761719, "step": 60085 }, { "epoch": 0.0049, "grad_norm": 6.534043788909912, "learning_rate": 2.015707070707071e-06, "loss": 6.281332397460938, "step": 60090 }, { "epoch": 0.00495, "grad_norm": 8.848848342895508, "learning_rate": 2.0154545454545456e-06, "loss": 6.2816730499267575, "step": 60095 }, { "epoch": 0.005, "grad_norm": 5.33677339553833, "learning_rate": 2.0152020202020202e-06, "loss": 6.262407684326172, "step": 60100 }, { "epoch": 0.00505, "grad_norm": 4.994957447052002, "learning_rate": 2.0149494949494953e-06, "loss": 6.271898651123047, "step": 60105 }, { "epoch": 0.0051, "grad_norm": 7.089867115020752, "learning_rate": 2.01469696969697e-06, "loss": 6.2675727844238285, "step": 60110 }, { "epoch": 0.00515, "grad_norm": 6.446727275848389, "learning_rate": 2.0144444444444445e-06, "loss": 6.255917739868164, "step": 60115 }, { "epoch": 0.0052, "grad_norm": 12.923054695129395, "learning_rate": 2.014191919191919e-06, "loss": 6.235004806518555, "step": 60120 }, { "epoch": 0.00525, "grad_norm": 11.632373809814453, "learning_rate": 2.0139393939393942e-06, "loss": 6.225760269165039, "step": 60125 }, { "epoch": 0.0053, "grad_norm": 5.69319486618042, "learning_rate": 2.013686868686869e-06, "loss": 6.265936279296875, "step": 60130 }, { "epoch": 0.00535, "grad_norm": 3.9598515033721924, "learning_rate": 2.0134343434343435e-06, "loss": 6.250956344604492, "step": 60135 }, { "epoch": 0.0054, "grad_norm": 6.504842281341553, "learning_rate": 2.013181818181818e-06, "loss": 6.25641975402832, "step": 60140 }, { "epoch": 0.00545, "grad_norm": 15.156455039978027, "learning_rate": 2.012929292929293e-06, "loss": 6.238505554199219, "step": 60145 }, { "epoch": 0.0055, "grad_norm": 6.301206588745117, "learning_rate": 2.012676767676768e-06, "loss": 6.34530029296875, "step": 60150 }, { "epoch": 0.00555, "grad_norm": 7.3131279945373535, "learning_rate": 2.012424242424243e-06, "loss": 6.230226516723633, "step": 60155 }, { "epoch": 0.0056, "grad_norm": 24.909421920776367, "learning_rate": 2.0121717171717175e-06, "loss": 6.2019702911376955, "step": 60160 }, { "epoch": 0.00565, "grad_norm": 6.771926403045654, "learning_rate": 2.011919191919192e-06, "loss": 6.258347702026367, "step": 60165 }, { "epoch": 0.0057, "grad_norm": 6.397648811340332, "learning_rate": 2.0116666666666667e-06, "loss": 6.244483566284179, "step": 60170 }, { "epoch": 0.00575, "grad_norm": 6.347175598144531, "learning_rate": 2.011414141414142e-06, "loss": 6.259653472900391, "step": 60175 }, { "epoch": 0.0058, "grad_norm": 11.093610763549805, "learning_rate": 2.0111616161616164e-06, "loss": 6.292001342773437, "step": 60180 }, { "epoch": 0.00585, "grad_norm": 12.047823905944824, "learning_rate": 2.010909090909091e-06, "loss": 6.200966644287109, "step": 60185 }, { "epoch": 0.0059, "grad_norm": 4.255692481994629, "learning_rate": 2.0106565656565657e-06, "loss": 6.215106201171875, "step": 60190 }, { "epoch": 0.00595, "grad_norm": 15.39276123046875, "learning_rate": 2.0104040404040407e-06, "loss": 6.346302032470703, "step": 60195 }, { "epoch": 0.006, "grad_norm": 4.610053062438965, "learning_rate": 2.0101515151515154e-06, "loss": 6.21270523071289, "step": 60200 }, { "epoch": 0.00605, "grad_norm": 7.643107891082764, "learning_rate": 2.00989898989899e-06, "loss": 6.325513076782227, "step": 60205 }, { "epoch": 0.0061, "grad_norm": 6.443765163421631, "learning_rate": 2.0096464646464646e-06, "loss": 6.24625244140625, "step": 60210 }, { "epoch": 0.00615, "grad_norm": 6.429826736450195, "learning_rate": 2.0093939393939397e-06, "loss": 6.231423187255859, "step": 60215 }, { "epoch": 0.0062, "grad_norm": 6.8534016609191895, "learning_rate": 2.0091414141414143e-06, "loss": 6.255816650390625, "step": 60220 }, { "epoch": 0.00625, "grad_norm": 5.815697193145752, "learning_rate": 2.008888888888889e-06, "loss": 6.20636100769043, "step": 60225 }, { "epoch": 0.0063, "grad_norm": 5.52397346496582, "learning_rate": 2.0086363636363636e-06, "loss": 6.208478927612305, "step": 60230 }, { "epoch": 0.00635, "grad_norm": 7.683022499084473, "learning_rate": 2.0083838383838386e-06, "loss": 6.226814270019531, "step": 60235 }, { "epoch": 0.0064, "grad_norm": 5.193235397338867, "learning_rate": 2.0081313131313133e-06, "loss": 6.231290054321289, "step": 60240 }, { "epoch": 0.00645, "grad_norm": 4.641952991485596, "learning_rate": 2.007878787878788e-06, "loss": 6.4659889221191404, "step": 60245 }, { "epoch": 0.0065, "grad_norm": 15.077449798583984, "learning_rate": 2.0076262626262625e-06, "loss": 6.231301116943359, "step": 60250 }, { "epoch": 0.00655, "grad_norm": 9.206083297729492, "learning_rate": 2.0073737373737376e-06, "loss": 6.288860321044922, "step": 60255 }, { "epoch": 0.0066, "grad_norm": 5.910830497741699, "learning_rate": 2.007121212121212e-06, "loss": 6.2178489685058596, "step": 60260 }, { "epoch": 0.00665, "grad_norm": 5.643435001373291, "learning_rate": 2.0068686868686873e-06, "loss": 6.241171264648438, "step": 60265 }, { "epoch": 0.0067, "grad_norm": 6.373255252838135, "learning_rate": 2.006616161616162e-06, "loss": 6.220059967041015, "step": 60270 }, { "epoch": 0.00675, "grad_norm": 6.569882869720459, "learning_rate": 2.0063636363636365e-06, "loss": 6.289024734497071, "step": 60275 }, { "epoch": 0.0068, "grad_norm": 7.0203022956848145, "learning_rate": 2.006111111111111e-06, "loss": 6.260668563842773, "step": 60280 }, { "epoch": 0.00685, "grad_norm": 4.479996204376221, "learning_rate": 2.005858585858586e-06, "loss": 6.209384155273438, "step": 60285 }, { "epoch": 0.0069, "grad_norm": 6.043213844299316, "learning_rate": 2.005606060606061e-06, "loss": 6.265098953247071, "step": 60290 }, { "epoch": 0.00695, "grad_norm": 5.4271368980407715, "learning_rate": 2.0053535353535355e-06, "loss": 6.273579025268555, "step": 60295 }, { "epoch": 0.007, "grad_norm": 6.797764301300049, "learning_rate": 2.00510101010101e-06, "loss": 6.22705307006836, "step": 60300 }, { "epoch": 0.00705, "grad_norm": 9.559730529785156, "learning_rate": 2.004848484848485e-06, "loss": 6.228847503662109, "step": 60305 }, { "epoch": 0.0071, "grad_norm": 5.022701263427734, "learning_rate": 2.0045959595959598e-06, "loss": 6.237430191040039, "step": 60310 }, { "epoch": 0.00715, "grad_norm": 7.107255458831787, "learning_rate": 2.0043434343434344e-06, "loss": 6.243033981323242, "step": 60315 }, { "epoch": 0.0072, "grad_norm": 6.131774425506592, "learning_rate": 2.004090909090909e-06, "loss": 6.221183776855469, "step": 60320 }, { "epoch": 0.00725, "grad_norm": 11.75971508026123, "learning_rate": 2.003838383838384e-06, "loss": 6.243957901000977, "step": 60325 }, { "epoch": 0.0073, "grad_norm": 7.2638959884643555, "learning_rate": 2.0035858585858587e-06, "loss": 6.22242431640625, "step": 60330 }, { "epoch": 0.00735, "grad_norm": 7.119653224945068, "learning_rate": 2.0033333333333334e-06, "loss": 6.5706642150878904, "step": 60335 }, { "epoch": 0.0074, "grad_norm": 4.83133602142334, "learning_rate": 2.003080808080808e-06, "loss": 6.252746963500977, "step": 60340 }, { "epoch": 0.00745, "grad_norm": 5.236952304840088, "learning_rate": 2.002828282828283e-06, "loss": 6.4335884094238285, "step": 60345 }, { "epoch": 0.0075, "grad_norm": 8.259407997131348, "learning_rate": 2.0025757575757577e-06, "loss": 6.218289947509765, "step": 60350 }, { "epoch": 0.00755, "grad_norm": 2.9876673221588135, "learning_rate": 2.0023232323232323e-06, "loss": 6.292863845825195, "step": 60355 }, { "epoch": 0.0076, "grad_norm": 5.527287006378174, "learning_rate": 2.002070707070707e-06, "loss": 6.3529094696044925, "step": 60360 }, { "epoch": 0.00765, "grad_norm": 14.29595947265625, "learning_rate": 2.001818181818182e-06, "loss": 6.214306640625, "step": 60365 }, { "epoch": 0.0077, "grad_norm": 7.4387688636779785, "learning_rate": 2.0015656565656566e-06, "loss": 6.2567626953125, "step": 60370 }, { "epoch": 0.00775, "grad_norm": 10.032439231872559, "learning_rate": 2.0013131313131317e-06, "loss": 6.311246490478515, "step": 60375 }, { "epoch": 0.0078, "grad_norm": 3.9964418411254883, "learning_rate": 2.0010606060606063e-06, "loss": 6.221828460693359, "step": 60380 }, { "epoch": 0.00785, "grad_norm": 3.7931039333343506, "learning_rate": 2.000808080808081e-06, "loss": 6.2532707214355465, "step": 60385 }, { "epoch": 0.0079, "grad_norm": 5.531356334686279, "learning_rate": 2.0005555555555556e-06, "loss": 6.284926986694336, "step": 60390 }, { "epoch": 0.00795, "grad_norm": 5.465713024139404, "learning_rate": 2.0003030303030306e-06, "loss": 6.235190582275391, "step": 60395 }, { "epoch": 0.008, "grad_norm": 26.469636917114258, "learning_rate": 2.0000505050505052e-06, "loss": 6.261493301391601, "step": 60400 }, { "epoch": 0.00805, "grad_norm": 12.369901657104492, "learning_rate": 1.99979797979798e-06, "loss": 6.281419372558593, "step": 60405 }, { "epoch": 0.0081, "grad_norm": 7.636369705200195, "learning_rate": 1.999545454545455e-06, "loss": 6.244049072265625, "step": 60410 }, { "epoch": 0.00815, "grad_norm": 8.567362785339355, "learning_rate": 1.9992929292929296e-06, "loss": 6.239253234863281, "step": 60415 }, { "epoch": 0.0082, "grad_norm": 7.124471187591553, "learning_rate": 1.999040404040404e-06, "loss": 6.256874847412109, "step": 60420 }, { "epoch": 0.00825, "grad_norm": 9.46297550201416, "learning_rate": 1.998787878787879e-06, "loss": 6.2257530212402346, "step": 60425 }, { "epoch": 0.0083, "grad_norm": 4.428560733795166, "learning_rate": 1.998535353535354e-06, "loss": 6.232695770263672, "step": 60430 }, { "epoch": 0.00835, "grad_norm": 4.473817825317383, "learning_rate": 1.9982828282828285e-06, "loss": 6.25379524230957, "step": 60435 }, { "epoch": 0.0084, "grad_norm": 5.506465435028076, "learning_rate": 1.998030303030303e-06, "loss": 6.293743896484375, "step": 60440 }, { "epoch": 0.00845, "grad_norm": 5.7820000648498535, "learning_rate": 1.9977777777777778e-06, "loss": 6.2140850067138675, "step": 60445 }, { "epoch": 0.0085, "grad_norm": 5.462398052215576, "learning_rate": 1.997525252525253e-06, "loss": 6.28991470336914, "step": 60450 }, { "epoch": 0.00855, "grad_norm": 8.032393455505371, "learning_rate": 1.9972727272727274e-06, "loss": 6.2767189025878904, "step": 60455 }, { "epoch": 0.0086, "grad_norm": 6.902390003204346, "learning_rate": 1.9970202020202025e-06, "loss": 6.20337142944336, "step": 60460 }, { "epoch": 0.00865, "grad_norm": 6.40850305557251, "learning_rate": 1.9967676767676767e-06, "loss": 6.2323345184326175, "step": 60465 }, { "epoch": 0.0087, "grad_norm": 5.9168477058410645, "learning_rate": 1.9965151515151518e-06, "loss": 6.231288146972656, "step": 60470 }, { "epoch": 0.00875, "grad_norm": 5.216104030609131, "learning_rate": 1.9962626262626264e-06, "loss": 6.195757293701172, "step": 60475 }, { "epoch": 0.0088, "grad_norm": 6.119329929351807, "learning_rate": 1.9960101010101014e-06, "loss": 6.369736099243164, "step": 60480 }, { "epoch": 0.00885, "grad_norm": 6.043377876281738, "learning_rate": 1.995757575757576e-06, "loss": 6.215894317626953, "step": 60485 }, { "epoch": 0.0089, "grad_norm": 5.906756401062012, "learning_rate": 1.9955050505050507e-06, "loss": 6.242370223999023, "step": 60490 }, { "epoch": 0.00895, "grad_norm": 6.157674312591553, "learning_rate": 1.9952525252525253e-06, "loss": 6.296054840087891, "step": 60495 }, { "epoch": 0.009, "grad_norm": 36.200233459472656, "learning_rate": 1.9950000000000004e-06, "loss": 6.289266967773438, "step": 60500 }, { "epoch": 0.00905, "grad_norm": 6.483680248260498, "learning_rate": 1.994747474747475e-06, "loss": 6.228556823730469, "step": 60505 }, { "epoch": 0.0091, "grad_norm": 5.807936668395996, "learning_rate": 1.9944949494949496e-06, "loss": 6.239947509765625, "step": 60510 }, { "epoch": 0.00915, "grad_norm": 4.65435791015625, "learning_rate": 1.9942424242424243e-06, "loss": 6.181417083740234, "step": 60515 }, { "epoch": 0.0092, "grad_norm": 17.837310791015625, "learning_rate": 1.9939898989898993e-06, "loss": 6.286280822753906, "step": 60520 }, { "epoch": 0.00925, "grad_norm": 45.70423889160156, "learning_rate": 1.993737373737374e-06, "loss": 6.392487335205078, "step": 60525 }, { "epoch": 0.0093, "grad_norm": 6.2581963539123535, "learning_rate": 1.9934848484848486e-06, "loss": 6.355695343017578, "step": 60530 }, { "epoch": 0.00935, "grad_norm": 3.0929348468780518, "learning_rate": 1.9932323232323232e-06, "loss": 6.25744400024414, "step": 60535 }, { "epoch": 0.0094, "grad_norm": 7.831854343414307, "learning_rate": 1.9929797979797983e-06, "loss": 6.222169494628906, "step": 60540 }, { "epoch": 0.00945, "grad_norm": 5.140584468841553, "learning_rate": 1.992727272727273e-06, "loss": 6.266685485839844, "step": 60545 }, { "epoch": 0.0095, "grad_norm": 5.776650905609131, "learning_rate": 1.9924747474747475e-06, "loss": 6.232190704345703, "step": 60550 }, { "epoch": 0.00955, "grad_norm": 6.661998748779297, "learning_rate": 1.992222222222222e-06, "loss": 6.0201061248779295, "step": 60555 }, { "epoch": 0.0096, "grad_norm": 8.122564315795898, "learning_rate": 1.9919696969696972e-06, "loss": 6.2887725830078125, "step": 60560 }, { "epoch": 0.00965, "grad_norm": 11.944304466247559, "learning_rate": 1.991717171717172e-06, "loss": 6.307456207275391, "step": 60565 }, { "epoch": 0.0097, "grad_norm": 10.974482536315918, "learning_rate": 1.991464646464647e-06, "loss": 6.268170928955078, "step": 60570 }, { "epoch": 0.00975, "grad_norm": 12.80008602142334, "learning_rate": 1.9912121212121215e-06, "loss": 6.320352935791016, "step": 60575 }, { "epoch": 0.0098, "grad_norm": 10.840353965759277, "learning_rate": 1.990959595959596e-06, "loss": 6.218272018432617, "step": 60580 }, { "epoch": 0.00985, "grad_norm": 7.079646110534668, "learning_rate": 1.9907070707070708e-06, "loss": 6.2582744598388675, "step": 60585 }, { "epoch": 0.0099, "grad_norm": 6.2472124099731445, "learning_rate": 1.990454545454546e-06, "loss": 6.276491928100586, "step": 60590 }, { "epoch": 0.00995, "grad_norm": 4.6305365562438965, "learning_rate": 1.9902020202020205e-06, "loss": 6.297570037841797, "step": 60595 }, { "epoch": 0.01, "grad_norm": 7.899741172790527, "learning_rate": 1.989949494949495e-06, "loss": 6.255580139160156, "step": 60600 }, { "epoch": 0.01005, "grad_norm": 11.585744857788086, "learning_rate": 1.9896969696969697e-06, "loss": 6.269823455810547, "step": 60605 }, { "epoch": 0.0101, "grad_norm": 8.503395080566406, "learning_rate": 1.9894444444444448e-06, "loss": 6.229752731323242, "step": 60610 }, { "epoch": 0.01015, "grad_norm": 10.491551399230957, "learning_rate": 1.9891919191919194e-06, "loss": 6.226107788085938, "step": 60615 }, { "epoch": 0.0102, "grad_norm": 4.6829833984375, "learning_rate": 1.988939393939394e-06, "loss": 6.230603790283203, "step": 60620 }, { "epoch": 0.01025, "grad_norm": 6.6746320724487305, "learning_rate": 1.9886868686868687e-06, "loss": 6.268526458740235, "step": 60625 }, { "epoch": 0.0103, "grad_norm": 5.729833126068115, "learning_rate": 1.9884343434343437e-06, "loss": 6.224689483642578, "step": 60630 }, { "epoch": 0.01035, "grad_norm": 9.361841201782227, "learning_rate": 1.9881818181818184e-06, "loss": 6.19384994506836, "step": 60635 }, { "epoch": 0.0104, "grad_norm": 6.529880523681641, "learning_rate": 1.987929292929293e-06, "loss": 6.556993103027343, "step": 60640 }, { "epoch": 0.01045, "grad_norm": 7.6001973152160645, "learning_rate": 1.9876767676767676e-06, "loss": 6.283411026000977, "step": 60645 }, { "epoch": 0.0105, "grad_norm": 6.009276390075684, "learning_rate": 1.9874242424242427e-06, "loss": 6.22357177734375, "step": 60650 }, { "epoch": 0.01055, "grad_norm": 6.773976802825928, "learning_rate": 1.9871717171717173e-06, "loss": 6.256934356689453, "step": 60655 }, { "epoch": 0.0106, "grad_norm": 4.642366409301758, "learning_rate": 1.986919191919192e-06, "loss": 6.23255615234375, "step": 60660 }, { "epoch": 0.01065, "grad_norm": 6.0892014503479, "learning_rate": 1.9866666666666666e-06, "loss": 6.278923797607422, "step": 60665 }, { "epoch": 0.0107, "grad_norm": 6.796353816986084, "learning_rate": 1.9864141414141416e-06, "loss": 6.269960021972656, "step": 60670 }, { "epoch": 0.01075, "grad_norm": 8.000864028930664, "learning_rate": 1.9861616161616162e-06, "loss": 6.2492225646972654, "step": 60675 }, { "epoch": 0.0108, "grad_norm": 8.959948539733887, "learning_rate": 1.9859090909090913e-06, "loss": 6.299065399169922, "step": 60680 }, { "epoch": 0.01085, "grad_norm": 8.231047630310059, "learning_rate": 1.985656565656566e-06, "loss": 6.350974273681641, "step": 60685 }, { "epoch": 0.0109, "grad_norm": 3.9421379566192627, "learning_rate": 1.9854040404040406e-06, "loss": 6.284710693359375, "step": 60690 }, { "epoch": 0.01095, "grad_norm": 7.266407012939453, "learning_rate": 1.985151515151515e-06, "loss": 6.264046478271484, "step": 60695 }, { "epoch": 0.011, "grad_norm": 4.170773029327393, "learning_rate": 1.9848989898989902e-06, "loss": 6.263513565063477, "step": 60700 }, { "epoch": 0.01105, "grad_norm": 5.349092483520508, "learning_rate": 1.984646464646465e-06, "loss": 6.2103118896484375, "step": 60705 }, { "epoch": 0.0111, "grad_norm": 5.523039817810059, "learning_rate": 1.9843939393939395e-06, "loss": 6.4494880676269535, "step": 60710 }, { "epoch": 0.01115, "grad_norm": 8.66810131072998, "learning_rate": 1.984141414141414e-06, "loss": 6.268756484985351, "step": 60715 }, { "epoch": 0.0112, "grad_norm": 8.47950267791748, "learning_rate": 1.983888888888889e-06, "loss": 6.22501220703125, "step": 60720 }, { "epoch": 0.01125, "grad_norm": 5.457543849945068, "learning_rate": 1.983636363636364e-06, "loss": 6.204750823974609, "step": 60725 }, { "epoch": 0.0113, "grad_norm": 4.667806625366211, "learning_rate": 1.9833838383838384e-06, "loss": 6.272332000732422, "step": 60730 }, { "epoch": 0.01135, "grad_norm": 5.816122055053711, "learning_rate": 1.983131313131313e-06, "loss": 6.313468170166016, "step": 60735 }, { "epoch": 0.0114, "grad_norm": 5.834235668182373, "learning_rate": 1.982878787878788e-06, "loss": 6.212045288085937, "step": 60740 }, { "epoch": 0.01145, "grad_norm": 7.278931140899658, "learning_rate": 1.9826262626262628e-06, "loss": 6.240943908691406, "step": 60745 }, { "epoch": 0.0115, "grad_norm": 7.393906593322754, "learning_rate": 1.9823737373737374e-06, "loss": 6.258908462524414, "step": 60750 }, { "epoch": 0.01155, "grad_norm": 7.47499418258667, "learning_rate": 1.982121212121212e-06, "loss": 6.202984619140625, "step": 60755 }, { "epoch": 0.0116, "grad_norm": 5.429882049560547, "learning_rate": 1.981868686868687e-06, "loss": 6.190528869628906, "step": 60760 }, { "epoch": 0.01165, "grad_norm": 10.82950210571289, "learning_rate": 1.9816161616161617e-06, "loss": 6.340422821044922, "step": 60765 }, { "epoch": 0.0117, "grad_norm": 6.817320823669434, "learning_rate": 1.9813636363636363e-06, "loss": 6.258853530883789, "step": 60770 }, { "epoch": 0.01175, "grad_norm": 5.612744331359863, "learning_rate": 1.981111111111111e-06, "loss": 6.226914978027343, "step": 60775 }, { "epoch": 0.0118, "grad_norm": 8.702081680297852, "learning_rate": 1.980858585858586e-06, "loss": 6.331951141357422, "step": 60780 }, { "epoch": 0.01185, "grad_norm": 8.235336303710938, "learning_rate": 1.9806060606060606e-06, "loss": 6.283246612548828, "step": 60785 }, { "epoch": 0.0119, "grad_norm": 6.59804630279541, "learning_rate": 1.9803535353535357e-06, "loss": 6.235300445556641, "step": 60790 }, { "epoch": 0.01195, "grad_norm": 9.56743049621582, "learning_rate": 1.9801010101010103e-06, "loss": 6.216157531738281, "step": 60795 }, { "epoch": 0.012, "grad_norm": 5.223670482635498, "learning_rate": 1.979848484848485e-06, "loss": 6.275536727905274, "step": 60800 }, { "epoch": 0.01205, "grad_norm": 11.709803581237793, "learning_rate": 1.9795959595959596e-06, "loss": 6.273484802246093, "step": 60805 }, { "epoch": 0.0121, "grad_norm": 5.715659141540527, "learning_rate": 1.9793434343434346e-06, "loss": 6.249721527099609, "step": 60810 }, { "epoch": 0.01215, "grad_norm": 4.231480121612549, "learning_rate": 1.9790909090909093e-06, "loss": 6.277945327758789, "step": 60815 }, { "epoch": 0.0122, "grad_norm": 8.180102348327637, "learning_rate": 1.978838383838384e-06, "loss": 6.236728668212891, "step": 60820 }, { "epoch": 0.01225, "grad_norm": 5.081385612487793, "learning_rate": 1.9785858585858585e-06, "loss": 6.230796432495117, "step": 60825 }, { "epoch": 0.0123, "grad_norm": 5.7273430824279785, "learning_rate": 1.9783333333333336e-06, "loss": 6.309064102172852, "step": 60830 }, { "epoch": 0.01235, "grad_norm": 23.83836555480957, "learning_rate": 1.9780808080808082e-06, "loss": 6.239939498901367, "step": 60835 }, { "epoch": 0.0124, "grad_norm": 6.262448787689209, "learning_rate": 1.977828282828283e-06, "loss": 6.277952194213867, "step": 60840 }, { "epoch": 0.01245, "grad_norm": 6.221514701843262, "learning_rate": 1.977575757575758e-06, "loss": 6.244728469848633, "step": 60845 }, { "epoch": 0.0125, "grad_norm": 6.298793792724609, "learning_rate": 1.9773232323232325e-06, "loss": 6.2583152770996096, "step": 60850 }, { "epoch": 0.01255, "grad_norm": 7.83601713180542, "learning_rate": 1.977070707070707e-06, "loss": 6.282139587402344, "step": 60855 }, { "epoch": 0.0126, "grad_norm": 27.3237361907959, "learning_rate": 1.976818181818182e-06, "loss": 6.40284423828125, "step": 60860 }, { "epoch": 0.01265, "grad_norm": 18.165090560913086, "learning_rate": 1.976565656565657e-06, "loss": 6.237545394897461, "step": 60865 }, { "epoch": 0.0127, "grad_norm": 5.933725357055664, "learning_rate": 1.9763131313131315e-06, "loss": 6.274195861816406, "step": 60870 }, { "epoch": 0.01275, "grad_norm": 17.821800231933594, "learning_rate": 1.9760606060606065e-06, "loss": 6.346512985229492, "step": 60875 }, { "epoch": 0.0128, "grad_norm": 3.730544090270996, "learning_rate": 1.975808080808081e-06, "loss": 6.273693084716797, "step": 60880 }, { "epoch": 0.01285, "grad_norm": 13.370499610900879, "learning_rate": 1.975555555555556e-06, "loss": 6.332862854003906, "step": 60885 }, { "epoch": 0.0129, "grad_norm": 3.9764621257781982, "learning_rate": 1.9753030303030304e-06, "loss": 6.2769218444824215, "step": 60890 }, { "epoch": 0.01295, "grad_norm": 10.856285095214844, "learning_rate": 1.9750505050505055e-06, "loss": 6.279421997070313, "step": 60895 }, { "epoch": 0.013, "grad_norm": 8.861839294433594, "learning_rate": 1.97479797979798e-06, "loss": 6.118195343017578, "step": 60900 }, { "epoch": 0.01305, "grad_norm": 8.741432189941406, "learning_rate": 1.9745454545454547e-06, "loss": 6.212464904785156, "step": 60905 }, { "epoch": 0.0131, "grad_norm": 10.367667198181152, "learning_rate": 1.9742929292929294e-06, "loss": 6.213652038574219, "step": 60910 }, { "epoch": 0.01315, "grad_norm": 9.280802726745605, "learning_rate": 1.9740404040404044e-06, "loss": 6.267676544189453, "step": 60915 }, { "epoch": 0.0132, "grad_norm": 6.423469066619873, "learning_rate": 1.973787878787879e-06, "loss": 6.259995651245117, "step": 60920 }, { "epoch": 0.01325, "grad_norm": 6.7366180419921875, "learning_rate": 1.9735353535353537e-06, "loss": 7.04517822265625, "step": 60925 }, { "epoch": 0.0133, "grad_norm": 22.47977066040039, "learning_rate": 1.9732828282828283e-06, "loss": 6.3927349090576175, "step": 60930 }, { "epoch": 0.01335, "grad_norm": 8.344408988952637, "learning_rate": 1.9730303030303034e-06, "loss": 6.288350296020508, "step": 60935 }, { "epoch": 0.0134, "grad_norm": 10.705971717834473, "learning_rate": 1.972777777777778e-06, "loss": 6.211843872070313, "step": 60940 }, { "epoch": 0.01345, "grad_norm": 5.872666358947754, "learning_rate": 1.9725252525252526e-06, "loss": 6.271096038818359, "step": 60945 }, { "epoch": 0.0135, "grad_norm": 5.434274673461914, "learning_rate": 1.9722727272727273e-06, "loss": 6.2392021179199215, "step": 60950 }, { "epoch": 0.01355, "grad_norm": 5.090490341186523, "learning_rate": 1.9720202020202023e-06, "loss": 6.474132537841797, "step": 60955 }, { "epoch": 0.0136, "grad_norm": 7.486021518707275, "learning_rate": 1.971767676767677e-06, "loss": 6.243103790283203, "step": 60960 }, { "epoch": 0.01365, "grad_norm": 4.96299409866333, "learning_rate": 1.9715151515151516e-06, "loss": 6.221944427490234, "step": 60965 }, { "epoch": 0.0137, "grad_norm": 5.648576736450195, "learning_rate": 1.971262626262626e-06, "loss": 6.304189300537109, "step": 60970 }, { "epoch": 0.01375, "grad_norm": 4.215732574462891, "learning_rate": 1.9710101010101013e-06, "loss": 6.3655242919921875, "step": 60975 }, { "epoch": 0.0138, "grad_norm": 7.619739532470703, "learning_rate": 1.970757575757576e-06, "loss": 6.271966552734375, "step": 60980 }, { "epoch": 0.01385, "grad_norm": 8.520320892333984, "learning_rate": 1.970505050505051e-06, "loss": 6.250365829467773, "step": 60985 }, { "epoch": 0.0139, "grad_norm": 6.148305416107178, "learning_rate": 1.9702525252525256e-06, "loss": 6.244662094116211, "step": 60990 }, { "epoch": 0.01395, "grad_norm": 4.868682384490967, "learning_rate": 1.97e-06, "loss": 6.3282421112060545, "step": 60995 }, { "epoch": 0.014, "grad_norm": 6.870163917541504, "learning_rate": 1.969747474747475e-06, "loss": 6.297433471679687, "step": 61000 }, { "epoch": 0.01405, "grad_norm": 4.963321685791016, "learning_rate": 1.96949494949495e-06, "loss": 6.239634704589844, "step": 61005 }, { "epoch": 0.0141, "grad_norm": 5.765824317932129, "learning_rate": 1.9692424242424245e-06, "loss": 6.259420013427734, "step": 61010 }, { "epoch": 0.01415, "grad_norm": 9.140663146972656, "learning_rate": 1.968989898989899e-06, "loss": 6.230980682373047, "step": 61015 }, { "epoch": 0.0142, "grad_norm": 29.48866844177246, "learning_rate": 1.9687373737373738e-06, "loss": 6.4918052673339846, "step": 61020 }, { "epoch": 0.01425, "grad_norm": 17.52800178527832, "learning_rate": 1.968484848484849e-06, "loss": 6.206296920776367, "step": 61025 }, { "epoch": 0.0143, "grad_norm": 4.271504878997803, "learning_rate": 1.9682323232323235e-06, "loss": 6.385227203369141, "step": 61030 }, { "epoch": 0.01435, "grad_norm": 5.432065010070801, "learning_rate": 1.967979797979798e-06, "loss": 6.209176254272461, "step": 61035 }, { "epoch": 0.0144, "grad_norm": 8.290299415588379, "learning_rate": 1.9677272727272727e-06, "loss": 6.303031539916992, "step": 61040 }, { "epoch": 0.01445, "grad_norm": 4.586382865905762, "learning_rate": 1.9674747474747478e-06, "loss": 6.246053314208984, "step": 61045 }, { "epoch": 0.0145, "grad_norm": 7.7480645179748535, "learning_rate": 1.9672222222222224e-06, "loss": 6.279686355590821, "step": 61050 }, { "epoch": 0.01455, "grad_norm": 5.018340587615967, "learning_rate": 1.966969696969697e-06, "loss": 6.269371414184571, "step": 61055 }, { "epoch": 0.0146, "grad_norm": 7.234313011169434, "learning_rate": 1.9667171717171717e-06, "loss": 6.343376541137696, "step": 61060 }, { "epoch": 0.01465, "grad_norm": 7.07390832901001, "learning_rate": 1.9664646464646467e-06, "loss": 6.252316284179687, "step": 61065 }, { "epoch": 0.0147, "grad_norm": 6.568362236022949, "learning_rate": 1.9662121212121213e-06, "loss": 6.241109466552734, "step": 61070 }, { "epoch": 0.01475, "grad_norm": 6.508078098297119, "learning_rate": 1.965959595959596e-06, "loss": 6.238957214355469, "step": 61075 }, { "epoch": 0.0148, "grad_norm": 6.145944118499756, "learning_rate": 1.9657070707070706e-06, "loss": 6.258496475219727, "step": 61080 }, { "epoch": 0.01485, "grad_norm": 7.486818313598633, "learning_rate": 1.9654545454545457e-06, "loss": 6.2622119903564455, "step": 61085 }, { "epoch": 0.0149, "grad_norm": 20.24607276916504, "learning_rate": 1.9652020202020203e-06, "loss": 6.274840545654297, "step": 61090 }, { "epoch": 0.01495, "grad_norm": 7.916991710662842, "learning_rate": 1.9649494949494953e-06, "loss": 6.277845001220703, "step": 61095 }, { "epoch": 0.015, "grad_norm": 6.320483684539795, "learning_rate": 1.96469696969697e-06, "loss": 6.196188354492188, "step": 61100 }, { "epoch": 0.01505, "grad_norm": 8.552095413208008, "learning_rate": 1.9644444444444446e-06, "loss": 6.24039306640625, "step": 61105 }, { "epoch": 0.0151, "grad_norm": 5.940351963043213, "learning_rate": 1.9641919191919192e-06, "loss": 6.195687484741211, "step": 61110 }, { "epoch": 0.01515, "grad_norm": 4.1484270095825195, "learning_rate": 1.9639393939393943e-06, "loss": 6.259931945800782, "step": 61115 }, { "epoch": 0.0152, "grad_norm": 8.740252494812012, "learning_rate": 1.963686868686869e-06, "loss": 6.240729904174804, "step": 61120 }, { "epoch": 0.01525, "grad_norm": 18.98682975769043, "learning_rate": 1.9634343434343435e-06, "loss": 6.606785583496094, "step": 61125 }, { "epoch": 0.0153, "grad_norm": 12.902613639831543, "learning_rate": 1.963181818181818e-06, "loss": 6.462968444824218, "step": 61130 }, { "epoch": 0.01535, "grad_norm": 7.9222493171691895, "learning_rate": 1.9629292929292932e-06, "loss": 6.277954864501953, "step": 61135 }, { "epoch": 0.0154, "grad_norm": 10.02670669555664, "learning_rate": 1.962676767676768e-06, "loss": 6.2491096496582035, "step": 61140 }, { "epoch": 0.01545, "grad_norm": 3.3952555656433105, "learning_rate": 1.9624242424242425e-06, "loss": 6.2495983123779295, "step": 61145 }, { "epoch": 0.0155, "grad_norm": 6.913630485534668, "learning_rate": 1.962171717171717e-06, "loss": 6.200589752197265, "step": 61150 }, { "epoch": 0.01555, "grad_norm": 6.814145088195801, "learning_rate": 1.961919191919192e-06, "loss": 6.249624252319336, "step": 61155 }, { "epoch": 0.0156, "grad_norm": 3.7733800411224365, "learning_rate": 1.961666666666667e-06, "loss": 6.282230377197266, "step": 61160 }, { "epoch": 0.01565, "grad_norm": 6.1585259437561035, "learning_rate": 1.9614141414141414e-06, "loss": 6.226895141601562, "step": 61165 }, { "epoch": 0.0157, "grad_norm": 5.8989949226379395, "learning_rate": 1.961161616161616e-06, "loss": 6.266678237915039, "step": 61170 }, { "epoch": 0.01575, "grad_norm": 8.671382904052734, "learning_rate": 1.960909090909091e-06, "loss": 6.250029373168945, "step": 61175 }, { "epoch": 0.0158, "grad_norm": 8.636626243591309, "learning_rate": 1.9606565656565657e-06, "loss": 6.351825714111328, "step": 61180 }, { "epoch": 0.01585, "grad_norm": 12.288698196411133, "learning_rate": 1.9604040404040404e-06, "loss": 6.231764602661133, "step": 61185 }, { "epoch": 0.0159, "grad_norm": 9.077890396118164, "learning_rate": 1.960151515151515e-06, "loss": 6.2196098327636715, "step": 61190 }, { "epoch": 0.01595, "grad_norm": 15.452398300170898, "learning_rate": 1.95989898989899e-06, "loss": 6.2748157501220705, "step": 61195 }, { "epoch": 0.016, "grad_norm": 5.283596515655518, "learning_rate": 1.9596464646464647e-06, "loss": 6.2949989318847654, "step": 61200 }, { "epoch": 0.01605, "grad_norm": 9.215930938720703, "learning_rate": 1.9593939393939397e-06, "loss": 6.17376708984375, "step": 61205 }, { "epoch": 0.0161, "grad_norm": 7.631320953369141, "learning_rate": 1.9591414141414144e-06, "loss": 6.206688308715821, "step": 61210 }, { "epoch": 0.01615, "grad_norm": 4.491982936859131, "learning_rate": 1.958888888888889e-06, "loss": 6.205833816528321, "step": 61215 }, { "epoch": 0.0162, "grad_norm": 9.624600410461426, "learning_rate": 1.9586363636363636e-06, "loss": 6.339756774902344, "step": 61220 }, { "epoch": 0.01625, "grad_norm": 7.85897159576416, "learning_rate": 1.9583838383838387e-06, "loss": 6.315199661254883, "step": 61225 }, { "epoch": 0.0163, "grad_norm": 5.664154529571533, "learning_rate": 1.9581313131313133e-06, "loss": 6.298531723022461, "step": 61230 }, { "epoch": 0.01635, "grad_norm": 4.653364658355713, "learning_rate": 1.957878787878788e-06, "loss": 6.28577880859375, "step": 61235 }, { "epoch": 0.0164, "grad_norm": 6.0546956062316895, "learning_rate": 1.9576262626262626e-06, "loss": 6.291125869750976, "step": 61240 }, { "epoch": 0.01645, "grad_norm": 6.361539363861084, "learning_rate": 1.9573737373737376e-06, "loss": 6.23552360534668, "step": 61245 }, { "epoch": 0.0165, "grad_norm": 9.9686861038208, "learning_rate": 1.9571212121212123e-06, "loss": 6.242179870605469, "step": 61250 }, { "epoch": 0.01655, "grad_norm": 4.378689765930176, "learning_rate": 1.956868686868687e-06, "loss": 6.21802978515625, "step": 61255 }, { "epoch": 0.0166, "grad_norm": 7.030879974365234, "learning_rate": 1.956616161616162e-06, "loss": 6.2537086486816404, "step": 61260 }, { "epoch": 0.01665, "grad_norm": 12.64846134185791, "learning_rate": 1.9563636363636366e-06, "loss": 6.298160171508789, "step": 61265 }, { "epoch": 0.0167, "grad_norm": 6.216583251953125, "learning_rate": 1.956111111111111e-06, "loss": 6.2561698913574215, "step": 61270 }, { "epoch": 0.01675, "grad_norm": 5.1598687171936035, "learning_rate": 1.955858585858586e-06, "loss": 6.277424240112305, "step": 61275 }, { "epoch": 0.0168, "grad_norm": 7.809713363647461, "learning_rate": 1.955606060606061e-06, "loss": 6.354208374023438, "step": 61280 }, { "epoch": 0.01685, "grad_norm": 5.174133777618408, "learning_rate": 1.9553535353535355e-06, "loss": 6.214371871948242, "step": 61285 }, { "epoch": 0.0169, "grad_norm": 3.9287097454071045, "learning_rate": 1.9551010101010106e-06, "loss": 6.266456985473633, "step": 61290 }, { "epoch": 0.01695, "grad_norm": 7.26790714263916, "learning_rate": 1.954848484848485e-06, "loss": 6.2532798767089846, "step": 61295 }, { "epoch": 0.017, "grad_norm": 7.2275285720825195, "learning_rate": 1.95459595959596e-06, "loss": 6.2603607177734375, "step": 61300 }, { "epoch": 0.01705, "grad_norm": 3.835983991622925, "learning_rate": 1.9543434343434345e-06, "loss": 6.249977493286133, "step": 61305 }, { "epoch": 0.0171, "grad_norm": 4.869104862213135, "learning_rate": 1.9540909090909095e-06, "loss": 6.226443481445313, "step": 61310 }, { "epoch": 0.01715, "grad_norm": 6.6758503913879395, "learning_rate": 1.953838383838384e-06, "loss": 6.227153015136719, "step": 61315 }, { "epoch": 0.0172, "grad_norm": 4.540431022644043, "learning_rate": 1.9535858585858588e-06, "loss": 6.252095031738281, "step": 61320 }, { "epoch": 0.01725, "grad_norm": 4.527101039886475, "learning_rate": 1.9533333333333334e-06, "loss": 6.236965942382812, "step": 61325 }, { "epoch": 0.0173, "grad_norm": 7.01650333404541, "learning_rate": 1.9530808080808085e-06, "loss": 6.2501380920410154, "step": 61330 }, { "epoch": 0.01735, "grad_norm": 4.949934959411621, "learning_rate": 1.952828282828283e-06, "loss": 6.223410797119141, "step": 61335 }, { "epoch": 0.0174, "grad_norm": 6.10227632522583, "learning_rate": 1.9525757575757577e-06, "loss": 6.129615783691406, "step": 61340 }, { "epoch": 0.01745, "grad_norm": 5.5046210289001465, "learning_rate": 1.9523232323232324e-06, "loss": 6.286882400512695, "step": 61345 }, { "epoch": 0.0175, "grad_norm": 5.89518404006958, "learning_rate": 1.9520707070707074e-06, "loss": 6.292240905761719, "step": 61350 }, { "epoch": 0.01755, "grad_norm": 7.987174034118652, "learning_rate": 1.951818181818182e-06, "loss": 6.239162445068359, "step": 61355 }, { "epoch": 0.0176, "grad_norm": 6.682633399963379, "learning_rate": 1.9515656565656567e-06, "loss": 6.278237533569336, "step": 61360 }, { "epoch": 0.01765, "grad_norm": 5.755557060241699, "learning_rate": 1.9513131313131313e-06, "loss": 6.2776939392089846, "step": 61365 }, { "epoch": 0.0177, "grad_norm": 6.745285987854004, "learning_rate": 1.9510606060606063e-06, "loss": 6.228536224365234, "step": 61370 }, { "epoch": 0.01775, "grad_norm": 5.551852703094482, "learning_rate": 1.950808080808081e-06, "loss": 6.249359512329102, "step": 61375 }, { "epoch": 0.0178, "grad_norm": 9.563072204589844, "learning_rate": 1.9505555555555556e-06, "loss": 6.31646957397461, "step": 61380 }, { "epoch": 0.01785, "grad_norm": 3.599745750427246, "learning_rate": 1.9503030303030302e-06, "loss": 6.248042297363281, "step": 61385 }, { "epoch": 0.0179, "grad_norm": 3.1189823150634766, "learning_rate": 1.9500505050505053e-06, "loss": 6.242288208007812, "step": 61390 }, { "epoch": 0.01795, "grad_norm": 6.692440032958984, "learning_rate": 1.94979797979798e-06, "loss": 6.266823577880859, "step": 61395 }, { "epoch": 0.018, "grad_norm": 4.841796875, "learning_rate": 1.949545454545455e-06, "loss": 6.245434188842774, "step": 61400 }, { "epoch": 0.01805, "grad_norm": 5.50950813293457, "learning_rate": 1.9492929292929296e-06, "loss": 6.279673004150391, "step": 61405 }, { "epoch": 0.0181, "grad_norm": 5.189558506011963, "learning_rate": 1.9490404040404042e-06, "loss": 6.215266418457031, "step": 61410 }, { "epoch": 0.01815, "grad_norm": 42.00397872924805, "learning_rate": 1.948787878787879e-06, "loss": 6.1592857360839846, "step": 61415 }, { "epoch": 0.0182, "grad_norm": 22.234539031982422, "learning_rate": 1.948535353535354e-06, "loss": 6.311231231689453, "step": 61420 }, { "epoch": 0.01825, "grad_norm": 3.9721574783325195, "learning_rate": 1.9482828282828286e-06, "loss": 6.302267837524414, "step": 61425 }, { "epoch": 0.0183, "grad_norm": 5.625781536102295, "learning_rate": 1.948030303030303e-06, "loss": 6.249305725097656, "step": 61430 }, { "epoch": 0.01835, "grad_norm": 6.223180770874023, "learning_rate": 1.947777777777778e-06, "loss": 6.282767105102539, "step": 61435 }, { "epoch": 0.0184, "grad_norm": 4.846621513366699, "learning_rate": 1.947525252525253e-06, "loss": 6.293905258178711, "step": 61440 }, { "epoch": 0.01845, "grad_norm": 5.970860004425049, "learning_rate": 1.9472727272727275e-06, "loss": 6.286924743652344, "step": 61445 }, { "epoch": 0.0185, "grad_norm": 4.048754692077637, "learning_rate": 1.947020202020202e-06, "loss": 6.2968284606933596, "step": 61450 }, { "epoch": 0.01855, "grad_norm": 7.790341854095459, "learning_rate": 1.9467676767676768e-06, "loss": 6.321151351928711, "step": 61455 }, { "epoch": 0.0186, "grad_norm": 3.7478649616241455, "learning_rate": 1.946515151515152e-06, "loss": 6.294407653808594, "step": 61460 }, { "epoch": 0.01865, "grad_norm": 8.140721321105957, "learning_rate": 1.9462626262626264e-06, "loss": 6.205355072021485, "step": 61465 }, { "epoch": 0.0187, "grad_norm": 24.883724212646484, "learning_rate": 1.946010101010101e-06, "loss": 6.281058883666992, "step": 61470 }, { "epoch": 0.01875, "grad_norm": 6.2435126304626465, "learning_rate": 1.9457575757575757e-06, "loss": 6.257168197631836, "step": 61475 }, { "epoch": 0.0188, "grad_norm": 3.840346097946167, "learning_rate": 1.9455050505050508e-06, "loss": 6.28880729675293, "step": 61480 }, { "epoch": 0.01885, "grad_norm": 6.704296112060547, "learning_rate": 1.9452525252525254e-06, "loss": 6.2451332092285154, "step": 61485 }, { "epoch": 0.0189, "grad_norm": 8.649197578430176, "learning_rate": 1.945e-06, "loss": 6.262692260742187, "step": 61490 }, { "epoch": 0.01895, "grad_norm": 5.336084842681885, "learning_rate": 1.9447474747474746e-06, "loss": 6.256069183349609, "step": 61495 }, { "epoch": 0.019, "grad_norm": 5.044567108154297, "learning_rate": 1.9444949494949497e-06, "loss": 6.313071441650391, "step": 61500 }, { "epoch": 0.01905, "grad_norm": 3.126112699508667, "learning_rate": 1.9442424242424243e-06, "loss": 6.301910781860352, "step": 61505 }, { "epoch": 0.0191, "grad_norm": 6.032655715942383, "learning_rate": 1.9439898989898994e-06, "loss": 6.250076675415039, "step": 61510 }, { "epoch": 0.01915, "grad_norm": 14.7001314163208, "learning_rate": 1.943737373737374e-06, "loss": 6.245975494384766, "step": 61515 }, { "epoch": 0.0192, "grad_norm": 5.747743606567383, "learning_rate": 1.9434848484848486e-06, "loss": 6.327332305908203, "step": 61520 }, { "epoch": 0.01925, "grad_norm": 11.085129737854004, "learning_rate": 1.9432323232323233e-06, "loss": 6.2676513671875, "step": 61525 }, { "epoch": 0.0193, "grad_norm": 7.937230110168457, "learning_rate": 1.9429797979797983e-06, "loss": 6.233176040649414, "step": 61530 }, { "epoch": 0.01935, "grad_norm": 8.465858459472656, "learning_rate": 1.942727272727273e-06, "loss": 6.256004333496094, "step": 61535 }, { "epoch": 0.0194, "grad_norm": 4.647256851196289, "learning_rate": 1.9424747474747476e-06, "loss": 6.235884094238282, "step": 61540 }, { "epoch": 0.01945, "grad_norm": 10.896171569824219, "learning_rate": 1.9422222222222222e-06, "loss": 6.370201492309571, "step": 61545 }, { "epoch": 0.0195, "grad_norm": 24.029809951782227, "learning_rate": 1.9419696969696973e-06, "loss": 6.283937072753906, "step": 61550 }, { "epoch": 0.01955, "grad_norm": 5.491039276123047, "learning_rate": 1.941717171717172e-06, "loss": 6.304247283935547, "step": 61555 }, { "epoch": 0.0196, "grad_norm": 8.935555458068848, "learning_rate": 1.9414646464646465e-06, "loss": 6.209046173095703, "step": 61560 }, { "epoch": 0.01965, "grad_norm": 4.10166597366333, "learning_rate": 1.941212121212121e-06, "loss": 6.227637863159179, "step": 61565 }, { "epoch": 0.0197, "grad_norm": 4.717437267303467, "learning_rate": 1.9409595959595962e-06, "loss": 6.253296661376953, "step": 61570 }, { "epoch": 0.01975, "grad_norm": 4.183695316314697, "learning_rate": 1.940707070707071e-06, "loss": 6.2250110626220705, "step": 61575 }, { "epoch": 0.0198, "grad_norm": 6.231771469116211, "learning_rate": 1.9404545454545455e-06, "loss": 6.271195602416992, "step": 61580 }, { "epoch": 0.01985, "grad_norm": 8.122112274169922, "learning_rate": 1.94020202020202e-06, "loss": 6.250812149047851, "step": 61585 }, { "epoch": 0.0199, "grad_norm": 9.013800621032715, "learning_rate": 1.939949494949495e-06, "loss": 6.282105255126953, "step": 61590 }, { "epoch": 0.01995, "grad_norm": 8.199732780456543, "learning_rate": 1.9396969696969698e-06, "loss": 6.286434173583984, "step": 61595 }, { "epoch": 0.02, "grad_norm": 5.709147930145264, "learning_rate": 1.939444444444445e-06, "loss": 6.214973068237304, "step": 61600 }, { "epoch": 0.02005, "grad_norm": 6.143204689025879, "learning_rate": 1.939191919191919e-06, "loss": 6.274457931518555, "step": 61605 }, { "epoch": 0.0201, "grad_norm": 5.981971263885498, "learning_rate": 1.938939393939394e-06, "loss": 6.22220458984375, "step": 61610 }, { "epoch": 0.02015, "grad_norm": 4.69464635848999, "learning_rate": 1.9386868686868687e-06, "loss": 6.217979812622071, "step": 61615 }, { "epoch": 0.0202, "grad_norm": 3.884500503540039, "learning_rate": 1.9384343434343438e-06, "loss": 6.194882202148437, "step": 61620 }, { "epoch": 0.02025, "grad_norm": 6.779564380645752, "learning_rate": 1.9381818181818184e-06, "loss": 6.2675025939941404, "step": 61625 }, { "epoch": 0.0203, "grad_norm": 9.418289184570312, "learning_rate": 1.937929292929293e-06, "loss": 6.2377464294433596, "step": 61630 }, { "epoch": 0.02035, "grad_norm": 11.848189353942871, "learning_rate": 1.9376767676767677e-06, "loss": 6.436459350585937, "step": 61635 }, { "epoch": 0.0204, "grad_norm": 6.030075550079346, "learning_rate": 1.9374242424242427e-06, "loss": 6.286392593383789, "step": 61640 }, { "epoch": 0.02045, "grad_norm": 6.008823394775391, "learning_rate": 1.9371717171717174e-06, "loss": 6.292548751831054, "step": 61645 }, { "epoch": 0.0205, "grad_norm": 5.7092509269714355, "learning_rate": 1.936919191919192e-06, "loss": 6.198352432250976, "step": 61650 }, { "epoch": 0.02055, "grad_norm": 6.07267951965332, "learning_rate": 1.9366666666666666e-06, "loss": 6.279766845703125, "step": 61655 }, { "epoch": 0.0206, "grad_norm": 5.346611976623535, "learning_rate": 1.9364141414141417e-06, "loss": 6.198014831542968, "step": 61660 }, { "epoch": 0.02065, "grad_norm": 6.156179428100586, "learning_rate": 1.9361616161616163e-06, "loss": 6.219424438476563, "step": 61665 }, { "epoch": 0.0207, "grad_norm": 7.8975982666015625, "learning_rate": 1.935909090909091e-06, "loss": 6.238116836547851, "step": 61670 }, { "epoch": 0.02075, "grad_norm": 3.1565701961517334, "learning_rate": 1.9356565656565656e-06, "loss": 6.22462272644043, "step": 61675 }, { "epoch": 0.0208, "grad_norm": 6.538504123687744, "learning_rate": 1.9354040404040406e-06, "loss": 6.307218170166015, "step": 61680 }, { "epoch": 0.02085, "grad_norm": 4.47265625, "learning_rate": 1.9351515151515152e-06, "loss": 6.242021942138672, "step": 61685 }, { "epoch": 0.0209, "grad_norm": 6.992912292480469, "learning_rate": 1.93489898989899e-06, "loss": 6.218651962280274, "step": 61690 }, { "epoch": 0.02095, "grad_norm": 3.7009317874908447, "learning_rate": 1.934646464646465e-06, "loss": 6.3001960754394535, "step": 61695 }, { "epoch": 0.021, "grad_norm": 6.0685343742370605, "learning_rate": 1.9343939393939396e-06, "loss": 6.245230484008789, "step": 61700 }, { "epoch": 0.02105, "grad_norm": 5.606391429901123, "learning_rate": 1.9341414141414146e-06, "loss": 6.2077678680419925, "step": 61705 }, { "epoch": 0.0211, "grad_norm": 8.75272274017334, "learning_rate": 1.9338888888888892e-06, "loss": 6.263013076782227, "step": 61710 }, { "epoch": 0.02115, "grad_norm": 5.203914165496826, "learning_rate": 1.933636363636364e-06, "loss": 6.251709365844727, "step": 61715 }, { "epoch": 0.0212, "grad_norm": 6.909201145172119, "learning_rate": 1.9333838383838385e-06, "loss": 6.2318061828613285, "step": 61720 }, { "epoch": 0.02125, "grad_norm": 21.401166915893555, "learning_rate": 1.9331313131313136e-06, "loss": 6.20640983581543, "step": 61725 }, { "epoch": 0.0213, "grad_norm": 8.283108711242676, "learning_rate": 1.932878787878788e-06, "loss": 6.262865829467773, "step": 61730 }, { "epoch": 0.02135, "grad_norm": 8.976180076599121, "learning_rate": 1.932626262626263e-06, "loss": 6.19238510131836, "step": 61735 }, { "epoch": 0.0214, "grad_norm": 5.278120040893555, "learning_rate": 1.9323737373737374e-06, "loss": 6.388739776611328, "step": 61740 }, { "epoch": 0.02145, "grad_norm": 9.951393127441406, "learning_rate": 1.9321212121212125e-06, "loss": 6.289361572265625, "step": 61745 }, { "epoch": 0.0215, "grad_norm": 6.602175712585449, "learning_rate": 1.931868686868687e-06, "loss": 6.238614654541015, "step": 61750 }, { "epoch": 0.02155, "grad_norm": 6.3720784187316895, "learning_rate": 1.9316161616161618e-06, "loss": 6.279434585571289, "step": 61755 }, { "epoch": 0.0216, "grad_norm": 4.6925482749938965, "learning_rate": 1.9313636363636364e-06, "loss": 6.260642242431641, "step": 61760 }, { "epoch": 0.02165, "grad_norm": 5.205041408538818, "learning_rate": 1.9311111111111114e-06, "loss": 6.261301422119141, "step": 61765 }, { "epoch": 0.0217, "grad_norm": 4.751651763916016, "learning_rate": 1.930858585858586e-06, "loss": 6.291161346435547, "step": 61770 }, { "epoch": 0.02175, "grad_norm": 6.124937534332275, "learning_rate": 1.9306060606060607e-06, "loss": 6.288378524780273, "step": 61775 }, { "epoch": 0.0218, "grad_norm": 9.878928184509277, "learning_rate": 1.9303535353535353e-06, "loss": 6.21301498413086, "step": 61780 }, { "epoch": 0.02185, "grad_norm": 6.701279640197754, "learning_rate": 1.9301010101010104e-06, "loss": 6.218178176879883, "step": 61785 }, { "epoch": 0.0219, "grad_norm": 6.244784355163574, "learning_rate": 1.929848484848485e-06, "loss": 6.279876708984375, "step": 61790 }, { "epoch": 0.02195, "grad_norm": 6.256587028503418, "learning_rate": 1.9295959595959596e-06, "loss": 6.267554092407226, "step": 61795 }, { "epoch": 0.022, "grad_norm": 6.190325736999512, "learning_rate": 1.9293434343434343e-06, "loss": 6.254549026489258, "step": 61800 }, { "epoch": 0.02205, "grad_norm": 10.282642364501953, "learning_rate": 1.9290909090909093e-06, "loss": 6.226092910766601, "step": 61805 }, { "epoch": 0.0221, "grad_norm": 14.925570487976074, "learning_rate": 1.928838383838384e-06, "loss": 6.397134399414062, "step": 61810 }, { "epoch": 0.02215, "grad_norm": 6.977142810821533, "learning_rate": 1.928585858585859e-06, "loss": 6.220896911621094, "step": 61815 }, { "epoch": 0.0222, "grad_norm": 4.9232869148254395, "learning_rate": 1.9283333333333336e-06, "loss": 6.319570159912109, "step": 61820 }, { "epoch": 0.02225, "grad_norm": 3.727400064468384, "learning_rate": 1.9280808080808083e-06, "loss": 6.319313812255859, "step": 61825 }, { "epoch": 0.0223, "grad_norm": 17.58816146850586, "learning_rate": 1.927828282828283e-06, "loss": 6.4946449279785154, "step": 61830 }, { "epoch": 0.02235, "grad_norm": 10.620623588562012, "learning_rate": 1.927575757575758e-06, "loss": 6.3991554260253904, "step": 61835 }, { "epoch": 0.0224, "grad_norm": 5.123876094818115, "learning_rate": 1.9273232323232326e-06, "loss": 6.226839828491211, "step": 61840 }, { "epoch": 0.02245, "grad_norm": 5.374415874481201, "learning_rate": 1.9270707070707072e-06, "loss": 6.266827774047852, "step": 61845 }, { "epoch": 0.0225, "grad_norm": 5.971323013305664, "learning_rate": 1.926818181818182e-06, "loss": 6.286631393432617, "step": 61850 }, { "epoch": 0.02255, "grad_norm": 5.941018581390381, "learning_rate": 1.926565656565657e-06, "loss": 6.2928119659423825, "step": 61855 }, { "epoch": 0.0226, "grad_norm": 6.759298801422119, "learning_rate": 1.9263131313131315e-06, "loss": 6.250281524658203, "step": 61860 }, { "epoch": 0.02265, "grad_norm": 7.127414226531982, "learning_rate": 1.926060606060606e-06, "loss": 6.233768081665039, "step": 61865 }, { "epoch": 0.0227, "grad_norm": 4.722720623016357, "learning_rate": 1.925808080808081e-06, "loss": 6.2406166076660154, "step": 61870 }, { "epoch": 0.02275, "grad_norm": 8.772945404052734, "learning_rate": 1.925555555555556e-06, "loss": 6.26063232421875, "step": 61875 }, { "epoch": 0.0228, "grad_norm": 7.427657604217529, "learning_rate": 1.9253030303030305e-06, "loss": 6.236722564697265, "step": 61880 }, { "epoch": 0.02285, "grad_norm": 6.803843975067139, "learning_rate": 1.925050505050505e-06, "loss": 6.295931243896485, "step": 61885 }, { "epoch": 0.0229, "grad_norm": 6.515902042388916, "learning_rate": 1.9247979797979797e-06, "loss": 6.273680114746094, "step": 61890 }, { "epoch": 0.02295, "grad_norm": 5.2738447189331055, "learning_rate": 1.924545454545455e-06, "loss": 6.246062850952148, "step": 61895 }, { "epoch": 0.023, "grad_norm": 6.175250053405762, "learning_rate": 1.9242929292929294e-06, "loss": 6.244352340698242, "step": 61900 }, { "epoch": 0.02305, "grad_norm": 23.273672103881836, "learning_rate": 1.924040404040404e-06, "loss": 6.355327606201172, "step": 61905 }, { "epoch": 0.0231, "grad_norm": 6.186550140380859, "learning_rate": 1.9237878787878787e-06, "loss": 6.255298614501953, "step": 61910 }, { "epoch": 0.02315, "grad_norm": 4.951099872589111, "learning_rate": 1.9235353535353537e-06, "loss": 6.26000747680664, "step": 61915 }, { "epoch": 0.0232, "grad_norm": 13.800759315490723, "learning_rate": 1.9232828282828284e-06, "loss": 6.232140350341797, "step": 61920 }, { "epoch": 0.02325, "grad_norm": 7.828312397003174, "learning_rate": 1.9230303030303034e-06, "loss": 6.289051055908203, "step": 61925 }, { "epoch": 0.0233, "grad_norm": 15.248344421386719, "learning_rate": 1.922777777777778e-06, "loss": 6.237822341918945, "step": 61930 }, { "epoch": 0.02335, "grad_norm": 6.613580226898193, "learning_rate": 1.9225252525252527e-06, "loss": 6.359769058227539, "step": 61935 }, { "epoch": 0.0234, "grad_norm": 8.175440788269043, "learning_rate": 1.9222727272727273e-06, "loss": 6.227716064453125, "step": 61940 }, { "epoch": 0.02345, "grad_norm": 5.862253189086914, "learning_rate": 1.9220202020202024e-06, "loss": 6.290945434570313, "step": 61945 }, { "epoch": 0.0235, "grad_norm": 3.361764669418335, "learning_rate": 1.921767676767677e-06, "loss": 6.27653923034668, "step": 61950 }, { "epoch": 0.02355, "grad_norm": 4.757517337799072, "learning_rate": 1.9215151515151516e-06, "loss": 6.224477386474609, "step": 61955 }, { "epoch": 0.0236, "grad_norm": 9.488749504089355, "learning_rate": 1.9212626262626263e-06, "loss": 6.205791091918945, "step": 61960 }, { "epoch": 0.02365, "grad_norm": 4.83558988571167, "learning_rate": 1.9210101010101013e-06, "loss": 6.2749778747558596, "step": 61965 }, { "epoch": 0.0237, "grad_norm": 4.518133640289307, "learning_rate": 1.920757575757576e-06, "loss": 6.2604621887207035, "step": 61970 }, { "epoch": 0.02375, "grad_norm": 5.616275310516357, "learning_rate": 1.9205050505050506e-06, "loss": 6.260063171386719, "step": 61975 }, { "epoch": 0.0238, "grad_norm": 6.206455230712891, "learning_rate": 1.920252525252525e-06, "loss": 6.232390975952148, "step": 61980 }, { "epoch": 0.02385, "grad_norm": 12.338299751281738, "learning_rate": 1.9200000000000003e-06, "loss": 6.336439514160157, "step": 61985 }, { "epoch": 0.0239, "grad_norm": 3.878108263015747, "learning_rate": 1.919747474747475e-06, "loss": 6.228158187866211, "step": 61990 }, { "epoch": 0.02395, "grad_norm": 8.768754005432129, "learning_rate": 1.9194949494949495e-06, "loss": 6.261961364746094, "step": 61995 }, { "epoch": 0.024, "grad_norm": 5.549461841583252, "learning_rate": 1.919242424242424e-06, "loss": 6.3027793884277346, "step": 62000 }, { "epoch": 0.02405, "grad_norm": 7.1424880027771, "learning_rate": 1.918989898989899e-06, "loss": 6.263557434082031, "step": 62005 }, { "epoch": 0.0241, "grad_norm": 5.786823749542236, "learning_rate": 1.918737373737374e-06, "loss": 6.2421833038330075, "step": 62010 }, { "epoch": 0.02415, "grad_norm": 4.902639865875244, "learning_rate": 1.918484848484849e-06, "loss": 6.261465072631836, "step": 62015 }, { "epoch": 0.0242, "grad_norm": 4.81596565246582, "learning_rate": 1.918232323232323e-06, "loss": 6.27205810546875, "step": 62020 }, { "epoch": 0.02425, "grad_norm": 5.978924751281738, "learning_rate": 1.917979797979798e-06, "loss": 6.230600738525391, "step": 62025 }, { "epoch": 0.0243, "grad_norm": 5.704242706298828, "learning_rate": 1.9177272727272728e-06, "loss": 6.257980728149414, "step": 62030 }, { "epoch": 0.02435, "grad_norm": 7.736745357513428, "learning_rate": 1.917474747474748e-06, "loss": 6.3867240905761715, "step": 62035 }, { "epoch": 0.0244, "grad_norm": 4.6141157150268555, "learning_rate": 1.9172222222222225e-06, "loss": 6.297271347045898, "step": 62040 }, { "epoch": 0.02445, "grad_norm": 10.416067123413086, "learning_rate": 1.916969696969697e-06, "loss": 6.474153137207031, "step": 62045 }, { "epoch": 0.0245, "grad_norm": 7.163963794708252, "learning_rate": 1.9167171717171717e-06, "loss": 6.230937576293945, "step": 62050 }, { "epoch": 0.02455, "grad_norm": 12.32824993133545, "learning_rate": 1.9164646464646468e-06, "loss": 6.233685302734375, "step": 62055 }, { "epoch": 0.0246, "grad_norm": 7.68446683883667, "learning_rate": 1.9162121212121214e-06, "loss": 6.288049697875977, "step": 62060 }, { "epoch": 0.02465, "grad_norm": 3.817000150680542, "learning_rate": 1.915959595959596e-06, "loss": 6.283369445800782, "step": 62065 }, { "epoch": 0.0247, "grad_norm": 5.948622703552246, "learning_rate": 1.9157070707070707e-06, "loss": 6.28637580871582, "step": 62070 }, { "epoch": 0.02475, "grad_norm": 6.980443000793457, "learning_rate": 1.9154545454545457e-06, "loss": 6.259815979003906, "step": 62075 }, { "epoch": 0.0248, "grad_norm": 4.461381435394287, "learning_rate": 1.9152020202020203e-06, "loss": 6.1970672607421875, "step": 62080 }, { "epoch": 0.02485, "grad_norm": 5.8962626457214355, "learning_rate": 1.914949494949495e-06, "loss": 6.274631118774414, "step": 62085 }, { "epoch": 0.0249, "grad_norm": 11.72916316986084, "learning_rate": 1.9146969696969696e-06, "loss": 6.232873916625977, "step": 62090 }, { "epoch": 0.02495, "grad_norm": 5.8649678230285645, "learning_rate": 1.9144444444444447e-06, "loss": 6.29283332824707, "step": 62095 }, { "epoch": 0.025, "grad_norm": 7.566601276397705, "learning_rate": 1.9141919191919193e-06, "loss": 6.226845550537109, "step": 62100 }, { "epoch": 0.02505, "grad_norm": 11.646729469299316, "learning_rate": 1.913939393939394e-06, "loss": 6.047228622436523, "step": 62105 }, { "epoch": 0.0251, "grad_norm": 6.043450355529785, "learning_rate": 1.913686868686869e-06, "loss": 6.258866500854492, "step": 62110 }, { "epoch": 0.02515, "grad_norm": 7.806583404541016, "learning_rate": 1.9134343434343436e-06, "loss": 6.258141326904297, "step": 62115 }, { "epoch": 0.0252, "grad_norm": 4.805337905883789, "learning_rate": 1.9131818181818187e-06, "loss": 6.216482543945313, "step": 62120 }, { "epoch": 0.02525, "grad_norm": 9.091444969177246, "learning_rate": 1.9129292929292933e-06, "loss": 6.249955749511718, "step": 62125 }, { "epoch": 0.0253, "grad_norm": 3.679234027862549, "learning_rate": 1.912676767676768e-06, "loss": 6.247993087768554, "step": 62130 }, { "epoch": 0.02535, "grad_norm": 4.803818702697754, "learning_rate": 1.9124242424242425e-06, "loss": 6.244263458251953, "step": 62135 }, { "epoch": 0.0254, "grad_norm": 4.430932998657227, "learning_rate": 1.9121717171717176e-06, "loss": 6.236671066284179, "step": 62140 }, { "epoch": 0.02545, "grad_norm": 5.70927619934082, "learning_rate": 1.9119191919191922e-06, "loss": 6.215878677368164, "step": 62145 }, { "epoch": 0.0255, "grad_norm": 16.420671463012695, "learning_rate": 1.911666666666667e-06, "loss": 6.205671310424805, "step": 62150 }, { "epoch": 0.02555, "grad_norm": 10.058878898620605, "learning_rate": 1.9114141414141415e-06, "loss": 6.273883056640625, "step": 62155 }, { "epoch": 0.0256, "grad_norm": 6.590727806091309, "learning_rate": 1.9111616161616165e-06, "loss": 6.223825073242187, "step": 62160 }, { "epoch": 0.02565, "grad_norm": 6.853649139404297, "learning_rate": 1.910909090909091e-06, "loss": 6.245399093627929, "step": 62165 }, { "epoch": 0.0257, "grad_norm": 7.075381755828857, "learning_rate": 1.910656565656566e-06, "loss": 6.255885314941406, "step": 62170 }, { "epoch": 0.02575, "grad_norm": 5.533735752105713, "learning_rate": 1.9104040404040404e-06, "loss": 6.243045043945313, "step": 62175 }, { "epoch": 0.0258, "grad_norm": 4.350753307342529, "learning_rate": 1.9101515151515155e-06, "loss": 6.237922668457031, "step": 62180 }, { "epoch": 0.02585, "grad_norm": 5.734804153442383, "learning_rate": 1.90989898989899e-06, "loss": 6.225393676757813, "step": 62185 }, { "epoch": 0.0259, "grad_norm": 12.700912475585938, "learning_rate": 1.9096464646464647e-06, "loss": 6.266473388671875, "step": 62190 }, { "epoch": 0.02595, "grad_norm": 8.333916664123535, "learning_rate": 1.9093939393939394e-06, "loss": 6.253589248657226, "step": 62195 }, { "epoch": 0.026, "grad_norm": 3.3213086128234863, "learning_rate": 1.9091414141414144e-06, "loss": 6.401548767089844, "step": 62200 }, { "epoch": 0.02605, "grad_norm": 8.22468090057373, "learning_rate": 1.908888888888889e-06, "loss": 6.229007339477539, "step": 62205 }, { "epoch": 0.0261, "grad_norm": 8.443292617797852, "learning_rate": 1.9086363636363637e-06, "loss": 6.28258171081543, "step": 62210 }, { "epoch": 0.02615, "grad_norm": 15.681092262268066, "learning_rate": 1.9083838383838383e-06, "loss": 6.269416046142578, "step": 62215 }, { "epoch": 0.0262, "grad_norm": 8.588384628295898, "learning_rate": 1.9081313131313134e-06, "loss": 6.3749542236328125, "step": 62220 }, { "epoch": 0.02625, "grad_norm": 5.015449523925781, "learning_rate": 1.907878787878788e-06, "loss": 6.284757995605469, "step": 62225 }, { "epoch": 0.0263, "grad_norm": 14.487648963928223, "learning_rate": 1.907626262626263e-06, "loss": 6.260013580322266, "step": 62230 }, { "epoch": 0.02635, "grad_norm": 3.982177257537842, "learning_rate": 1.9073737373737377e-06, "loss": 6.285531616210937, "step": 62235 }, { "epoch": 0.0264, "grad_norm": 25.296480178833008, "learning_rate": 1.9071212121212123e-06, "loss": 6.3536632537841795, "step": 62240 }, { "epoch": 0.02645, "grad_norm": 10.046147346496582, "learning_rate": 1.906868686868687e-06, "loss": 6.2960655212402346, "step": 62245 }, { "epoch": 0.0265, "grad_norm": 7.2062859535217285, "learning_rate": 1.9066161616161618e-06, "loss": 6.272810363769532, "step": 62250 }, { "epoch": 0.02655, "grad_norm": 9.377452850341797, "learning_rate": 1.9063636363636364e-06, "loss": 6.228921508789062, "step": 62255 }, { "epoch": 0.0266, "grad_norm": 8.327960968017578, "learning_rate": 1.9061111111111113e-06, "loss": 6.321055221557617, "step": 62260 }, { "epoch": 0.02665, "grad_norm": 8.070571899414062, "learning_rate": 1.9058585858585859e-06, "loss": 6.349167251586914, "step": 62265 }, { "epoch": 0.0267, "grad_norm": 13.41532039642334, "learning_rate": 1.905606060606061e-06, "loss": 6.371846771240234, "step": 62270 }, { "epoch": 0.02675, "grad_norm": 6.4769439697265625, "learning_rate": 1.9053535353535354e-06, "loss": 6.222506713867188, "step": 62275 }, { "epoch": 0.0268, "grad_norm": 13.056445121765137, "learning_rate": 1.9051010101010104e-06, "loss": 6.228475952148438, "step": 62280 }, { "epoch": 0.02685, "grad_norm": 13.471928596496582, "learning_rate": 1.904848484848485e-06, "loss": 6.339588928222656, "step": 62285 }, { "epoch": 0.0269, "grad_norm": 11.52112102508545, "learning_rate": 1.9045959595959599e-06, "loss": 6.219236755371094, "step": 62290 }, { "epoch": 0.02695, "grad_norm": 3.216845750808716, "learning_rate": 1.9043434343434345e-06, "loss": 6.200608825683593, "step": 62295 }, { "epoch": 0.027, "grad_norm": 15.345287322998047, "learning_rate": 1.9040909090909094e-06, "loss": 6.22989501953125, "step": 62300 }, { "epoch": 0.02705, "grad_norm": 5.245137691497803, "learning_rate": 1.903838383838384e-06, "loss": 6.232751846313477, "step": 62305 }, { "epoch": 0.0271, "grad_norm": 4.800722122192383, "learning_rate": 1.9035858585858588e-06, "loss": 6.330547714233399, "step": 62310 }, { "epoch": 0.02715, "grad_norm": 6.387035369873047, "learning_rate": 1.9033333333333335e-06, "loss": 6.268454360961914, "step": 62315 }, { "epoch": 0.0272, "grad_norm": 9.972704887390137, "learning_rate": 1.9030808080808083e-06, "loss": 6.3202354431152346, "step": 62320 }, { "epoch": 0.02725, "grad_norm": 4.787247180938721, "learning_rate": 1.902828282828283e-06, "loss": 6.259056854248047, "step": 62325 }, { "epoch": 0.0273, "grad_norm": 6.9118571281433105, "learning_rate": 1.9025757575757578e-06, "loss": 6.269126129150391, "step": 62330 }, { "epoch": 0.02735, "grad_norm": 9.22292423248291, "learning_rate": 1.9023232323232324e-06, "loss": 6.571133422851562, "step": 62335 }, { "epoch": 0.0274, "grad_norm": 14.658958435058594, "learning_rate": 1.9020707070707072e-06, "loss": 6.267678833007812, "step": 62340 }, { "epoch": 0.02745, "grad_norm": 21.688621520996094, "learning_rate": 1.9018181818181819e-06, "loss": 6.1356555938720705, "step": 62345 }, { "epoch": 0.0275, "grad_norm": 7.203467845916748, "learning_rate": 1.9015656565656567e-06, "loss": 6.215506744384766, "step": 62350 }, { "epoch": 0.02755, "grad_norm": 6.4397125244140625, "learning_rate": 1.9013131313131314e-06, "loss": 6.233172607421875, "step": 62355 }, { "epoch": 0.0276, "grad_norm": 8.97672176361084, "learning_rate": 1.9010606060606062e-06, "loss": 6.26298828125, "step": 62360 }, { "epoch": 0.02765, "grad_norm": 5.140129089355469, "learning_rate": 1.9008080808080808e-06, "loss": 6.302192687988281, "step": 62365 }, { "epoch": 0.0277, "grad_norm": 4.425889492034912, "learning_rate": 1.9005555555555557e-06, "loss": 6.221224975585938, "step": 62370 }, { "epoch": 0.02775, "grad_norm": 7.122312545776367, "learning_rate": 1.9003030303030303e-06, "loss": 6.309427642822266, "step": 62375 }, { "epoch": 0.0278, "grad_norm": 8.101231575012207, "learning_rate": 1.9000505050505053e-06, "loss": 6.253390121459961, "step": 62380 }, { "epoch": 0.02785, "grad_norm": 6.043264389038086, "learning_rate": 1.8997979797979798e-06, "loss": 6.2268013000488285, "step": 62385 }, { "epoch": 0.0279, "grad_norm": 14.180349349975586, "learning_rate": 1.8995454545454548e-06, "loss": 6.341179656982422, "step": 62390 }, { "epoch": 0.02795, "grad_norm": 8.012643814086914, "learning_rate": 1.8992929292929295e-06, "loss": 6.3863780975341795, "step": 62395 }, { "epoch": 0.028, "grad_norm": 5.554315567016602, "learning_rate": 1.8990404040404043e-06, "loss": 6.245582580566406, "step": 62400 }, { "epoch": 0.02805, "grad_norm": 6.849372863769531, "learning_rate": 1.898787878787879e-06, "loss": 6.219215011596679, "step": 62405 }, { "epoch": 0.0281, "grad_norm": 6.9014058113098145, "learning_rate": 1.8985353535353538e-06, "loss": 6.266943359375, "step": 62410 }, { "epoch": 0.02815, "grad_norm": 9.870644569396973, "learning_rate": 1.8982828282828284e-06, "loss": 6.3021797180175785, "step": 62415 }, { "epoch": 0.0282, "grad_norm": 12.132411003112793, "learning_rate": 1.8980303030303032e-06, "loss": 6.276427459716797, "step": 62420 }, { "epoch": 0.02825, "grad_norm": 5.277702808380127, "learning_rate": 1.8977777777777779e-06, "loss": 6.2724853515625, "step": 62425 }, { "epoch": 0.0283, "grad_norm": 8.306949615478516, "learning_rate": 1.8975252525252527e-06, "loss": 6.210828018188477, "step": 62430 }, { "epoch": 0.02835, "grad_norm": 24.117225646972656, "learning_rate": 1.8972727272727273e-06, "loss": 6.303520965576172, "step": 62435 }, { "epoch": 0.0284, "grad_norm": 7.112334251403809, "learning_rate": 1.8970202020202022e-06, "loss": 6.1880126953125, "step": 62440 }, { "epoch": 0.02845, "grad_norm": 6.102401256561279, "learning_rate": 1.8967676767676768e-06, "loss": 6.229666900634766, "step": 62445 }, { "epoch": 0.0285, "grad_norm": 9.191045761108398, "learning_rate": 1.8965151515151517e-06, "loss": 6.298749542236328, "step": 62450 }, { "epoch": 0.02855, "grad_norm": 5.626814365386963, "learning_rate": 1.8962626262626263e-06, "loss": 6.327130126953125, "step": 62455 }, { "epoch": 0.0286, "grad_norm": 4.8786725997924805, "learning_rate": 1.8960101010101011e-06, "loss": 6.213555526733399, "step": 62460 }, { "epoch": 0.02865, "grad_norm": 5.205348491668701, "learning_rate": 1.8957575757575758e-06, "loss": 6.281430053710937, "step": 62465 }, { "epoch": 0.0287, "grad_norm": 5.960262775421143, "learning_rate": 1.8955050505050506e-06, "loss": 6.287853622436524, "step": 62470 }, { "epoch": 0.02875, "grad_norm": 9.148475646972656, "learning_rate": 1.8952525252525252e-06, "loss": 6.28929557800293, "step": 62475 }, { "epoch": 0.0288, "grad_norm": 4.81479549407959, "learning_rate": 1.895e-06, "loss": 6.306608581542969, "step": 62480 }, { "epoch": 0.02885, "grad_norm": 24.291471481323242, "learning_rate": 1.8947474747474747e-06, "loss": 6.324548721313477, "step": 62485 }, { "epoch": 0.0289, "grad_norm": 25.420801162719727, "learning_rate": 1.8944949494949498e-06, "loss": 6.180773544311523, "step": 62490 }, { "epoch": 0.02895, "grad_norm": 5.628326416015625, "learning_rate": 1.8942424242424242e-06, "loss": 6.304501342773437, "step": 62495 }, { "epoch": 0.029, "grad_norm": 21.377071380615234, "learning_rate": 1.8939898989898992e-06, "loss": 6.275628662109375, "step": 62500 }, { "epoch": 0.02905, "grad_norm": 5.3338847160339355, "learning_rate": 1.8937373737373739e-06, "loss": 6.238192749023438, "step": 62505 }, { "epoch": 0.0291, "grad_norm": 4.034029960632324, "learning_rate": 1.8934848484848487e-06, "loss": 6.320311737060547, "step": 62510 }, { "epoch": 0.02915, "grad_norm": 5.4108147621154785, "learning_rate": 1.8932323232323233e-06, "loss": 6.230154418945313, "step": 62515 }, { "epoch": 0.0292, "grad_norm": 5.564615726470947, "learning_rate": 1.8929797979797982e-06, "loss": 6.25451545715332, "step": 62520 }, { "epoch": 0.02925, "grad_norm": 6.226053237915039, "learning_rate": 1.8927272727272728e-06, "loss": 6.1721549987792965, "step": 62525 }, { "epoch": 0.0293, "grad_norm": 6.799611568450928, "learning_rate": 1.8924747474747476e-06, "loss": 6.229994201660157, "step": 62530 }, { "epoch": 0.02935, "grad_norm": 18.338869094848633, "learning_rate": 1.8922222222222225e-06, "loss": 6.423902893066407, "step": 62535 }, { "epoch": 0.0294, "grad_norm": 6.119081497192383, "learning_rate": 1.8919696969696971e-06, "loss": 6.399499893188477, "step": 62540 }, { "epoch": 0.02945, "grad_norm": 6.500056266784668, "learning_rate": 1.891717171717172e-06, "loss": 6.220116424560547, "step": 62545 }, { "epoch": 0.0295, "grad_norm": 7.1745100021362305, "learning_rate": 1.8914646464646466e-06, "loss": 6.210031890869141, "step": 62550 }, { "epoch": 0.02955, "grad_norm": 6.271444320678711, "learning_rate": 1.8912121212121214e-06, "loss": 6.271413421630859, "step": 62555 }, { "epoch": 0.0296, "grad_norm": 4.670347690582275, "learning_rate": 1.890959595959596e-06, "loss": 6.313129043579101, "step": 62560 }, { "epoch": 0.02965, "grad_norm": 5.4525604248046875, "learning_rate": 1.890707070707071e-06, "loss": 6.230620193481445, "step": 62565 }, { "epoch": 0.0297, "grad_norm": 4.861306667327881, "learning_rate": 1.8904545454545455e-06, "loss": 6.2237693786621096, "step": 62570 }, { "epoch": 0.02975, "grad_norm": 6.077636241912842, "learning_rate": 1.8902020202020206e-06, "loss": 6.28429946899414, "step": 62575 }, { "epoch": 0.0298, "grad_norm": 6.289973258972168, "learning_rate": 1.889949494949495e-06, "loss": 6.346307754516602, "step": 62580 }, { "epoch": 0.02985, "grad_norm": 4.6981611251831055, "learning_rate": 1.88969696969697e-06, "loss": 6.280269622802734, "step": 62585 }, { "epoch": 0.0299, "grad_norm": 3.6391735076904297, "learning_rate": 1.8894444444444447e-06, "loss": 6.227191925048828, "step": 62590 }, { "epoch": 0.02995, "grad_norm": 6.85783052444458, "learning_rate": 1.8891919191919195e-06, "loss": 6.243787384033203, "step": 62595 }, { "epoch": 0.03, "grad_norm": 5.612940311431885, "learning_rate": 1.8889393939393942e-06, "loss": 6.195180511474609, "step": 62600 }, { "epoch": 0.03005, "grad_norm": 3.9319005012512207, "learning_rate": 1.888686868686869e-06, "loss": 6.250684356689453, "step": 62605 }, { "epoch": 0.0301, "grad_norm": 3.945385694503784, "learning_rate": 1.8884343434343436e-06, "loss": 6.196837997436523, "step": 62610 }, { "epoch": 0.03015, "grad_norm": 5.286442279815674, "learning_rate": 1.8881818181818185e-06, "loss": 6.425250244140625, "step": 62615 }, { "epoch": 0.0302, "grad_norm": 4.681535720825195, "learning_rate": 1.887929292929293e-06, "loss": 6.203900909423828, "step": 62620 }, { "epoch": 0.03025, "grad_norm": 5.293543815612793, "learning_rate": 1.887676767676768e-06, "loss": 6.215904998779297, "step": 62625 }, { "epoch": 0.0303, "grad_norm": 9.437519073486328, "learning_rate": 1.8874242424242426e-06, "loss": 6.252590560913086, "step": 62630 }, { "epoch": 0.03035, "grad_norm": 5.709925651550293, "learning_rate": 1.8871717171717174e-06, "loss": 6.214181137084961, "step": 62635 }, { "epoch": 0.0304, "grad_norm": 6.1027069091796875, "learning_rate": 1.886919191919192e-06, "loss": 6.235104751586914, "step": 62640 }, { "epoch": 0.03045, "grad_norm": 5.396678447723389, "learning_rate": 1.8866666666666669e-06, "loss": 6.229944229125977, "step": 62645 }, { "epoch": 0.0305, "grad_norm": 4.656377792358398, "learning_rate": 1.8864141414141415e-06, "loss": 6.266973876953125, "step": 62650 }, { "epoch": 0.03055, "grad_norm": 4.172024250030518, "learning_rate": 1.8861616161616164e-06, "loss": 6.203564453125, "step": 62655 }, { "epoch": 0.0306, "grad_norm": 4.6647443771362305, "learning_rate": 1.885909090909091e-06, "loss": 6.244805145263672, "step": 62660 }, { "epoch": 0.03065, "grad_norm": 6.313098907470703, "learning_rate": 1.8856565656565658e-06, "loss": 6.248321533203125, "step": 62665 }, { "epoch": 0.0307, "grad_norm": 4.372366905212402, "learning_rate": 1.8854040404040405e-06, "loss": 6.2128559112548825, "step": 62670 }, { "epoch": 0.03075, "grad_norm": 4.608881950378418, "learning_rate": 1.8851515151515153e-06, "loss": 6.22448501586914, "step": 62675 }, { "epoch": 0.0308, "grad_norm": 15.128547668457031, "learning_rate": 1.88489898989899e-06, "loss": 6.283994674682617, "step": 62680 }, { "epoch": 0.03085, "grad_norm": 6.840599536895752, "learning_rate": 1.884646464646465e-06, "loss": 6.234212493896484, "step": 62685 }, { "epoch": 0.0309, "grad_norm": 8.03896427154541, "learning_rate": 1.8843939393939394e-06, "loss": 6.2204429626464846, "step": 62690 }, { "epoch": 0.03095, "grad_norm": 7.480104923248291, "learning_rate": 1.8841414141414145e-06, "loss": 6.209047317504883, "step": 62695 }, { "epoch": 0.031, "grad_norm": 7.1753249168396, "learning_rate": 1.883888888888889e-06, "loss": 6.218749618530273, "step": 62700 }, { "epoch": 0.03105, "grad_norm": 4.749782562255859, "learning_rate": 1.883636363636364e-06, "loss": 6.198015213012695, "step": 62705 }, { "epoch": 0.0311, "grad_norm": 6.275830268859863, "learning_rate": 1.8833838383838386e-06, "loss": 6.227254104614258, "step": 62710 }, { "epoch": 0.03115, "grad_norm": 4.785470962524414, "learning_rate": 1.8831313131313134e-06, "loss": 6.253419494628906, "step": 62715 }, { "epoch": 0.0312, "grad_norm": 57.57500076293945, "learning_rate": 1.882878787878788e-06, "loss": 6.55755615234375, "step": 62720 }, { "epoch": 0.03125, "grad_norm": 4.011620998382568, "learning_rate": 1.8826262626262629e-06, "loss": 6.287051773071289, "step": 62725 }, { "epoch": 0.0313, "grad_norm": 3.9420838356018066, "learning_rate": 1.8823737373737375e-06, "loss": 6.226145172119141, "step": 62730 }, { "epoch": 0.03135, "grad_norm": 5.443935871124268, "learning_rate": 1.8821212121212123e-06, "loss": 6.272207641601563, "step": 62735 }, { "epoch": 0.0314, "grad_norm": 6.777082920074463, "learning_rate": 1.881868686868687e-06, "loss": 6.24189453125, "step": 62740 }, { "epoch": 0.03145, "grad_norm": 5.9978928565979, "learning_rate": 1.8816161616161618e-06, "loss": 6.246139907836914, "step": 62745 }, { "epoch": 0.0315, "grad_norm": 4.859372615814209, "learning_rate": 1.8813636363636364e-06, "loss": 6.218721389770508, "step": 62750 }, { "epoch": 0.03155, "grad_norm": 8.483358383178711, "learning_rate": 1.8811111111111113e-06, "loss": 6.268097686767578, "step": 62755 }, { "epoch": 0.0316, "grad_norm": 12.237682342529297, "learning_rate": 1.880858585858586e-06, "loss": 6.293423461914062, "step": 62760 }, { "epoch": 0.03165, "grad_norm": 17.202882766723633, "learning_rate": 1.8806060606060608e-06, "loss": 6.500018310546875, "step": 62765 }, { "epoch": 0.0317, "grad_norm": 9.674863815307617, "learning_rate": 1.8803535353535354e-06, "loss": 6.246653747558594, "step": 62770 }, { "epoch": 0.03175, "grad_norm": 6.9159159660339355, "learning_rate": 1.8801010101010102e-06, "loss": 6.230746459960938, "step": 62775 }, { "epoch": 0.0318, "grad_norm": 10.242659568786621, "learning_rate": 1.8798484848484849e-06, "loss": 6.255489349365234, "step": 62780 }, { "epoch": 0.03185, "grad_norm": 8.988558769226074, "learning_rate": 1.8795959595959597e-06, "loss": 6.216098022460938, "step": 62785 }, { "epoch": 0.0319, "grad_norm": 7.258554458618164, "learning_rate": 1.8793434343434343e-06, "loss": 6.1946971893310545, "step": 62790 }, { "epoch": 0.03195, "grad_norm": 10.848084449768066, "learning_rate": 1.8790909090909094e-06, "loss": 6.237393188476562, "step": 62795 }, { "epoch": 0.032, "grad_norm": 9.772246360778809, "learning_rate": 1.8788383838383838e-06, "loss": 6.249011611938476, "step": 62800 }, { "epoch": 0.03205, "grad_norm": 5.0418171882629395, "learning_rate": 1.8785858585858589e-06, "loss": 6.456198120117188, "step": 62805 }, { "epoch": 0.0321, "grad_norm": 6.0337090492248535, "learning_rate": 1.8783333333333335e-06, "loss": 6.241810989379883, "step": 62810 }, { "epoch": 0.03215, "grad_norm": 7.385995388031006, "learning_rate": 1.8780808080808083e-06, "loss": 6.265116119384766, "step": 62815 }, { "epoch": 0.0322, "grad_norm": 4.770933151245117, "learning_rate": 1.877828282828283e-06, "loss": 6.248974227905274, "step": 62820 }, { "epoch": 0.03225, "grad_norm": 5.716039657592773, "learning_rate": 1.8775757575757578e-06, "loss": 6.214493179321289, "step": 62825 }, { "epoch": 0.0323, "grad_norm": 6.895079135894775, "learning_rate": 1.8773232323232324e-06, "loss": 6.243717956542969, "step": 62830 }, { "epoch": 0.03235, "grad_norm": 5.05888032913208, "learning_rate": 1.8770707070707073e-06, "loss": 6.331613540649414, "step": 62835 }, { "epoch": 0.0324, "grad_norm": 6.510175704956055, "learning_rate": 1.876818181818182e-06, "loss": 6.2291419982910154, "step": 62840 }, { "epoch": 0.03245, "grad_norm": 5.007889270782471, "learning_rate": 1.8765656565656567e-06, "loss": 6.247008514404297, "step": 62845 }, { "epoch": 0.0325, "grad_norm": 9.776569366455078, "learning_rate": 1.8763131313131314e-06, "loss": 6.215446090698242, "step": 62850 }, { "epoch": 0.03255, "grad_norm": 4.612044334411621, "learning_rate": 1.8760606060606062e-06, "loss": 6.270708084106445, "step": 62855 }, { "epoch": 0.0326, "grad_norm": 5.462477684020996, "learning_rate": 1.8758080808080808e-06, "loss": 6.217986679077148, "step": 62860 }, { "epoch": 0.03265, "grad_norm": 6.706149101257324, "learning_rate": 1.8755555555555557e-06, "loss": 6.257896423339844, "step": 62865 }, { "epoch": 0.0327, "grad_norm": 7.255356311798096, "learning_rate": 1.8753030303030303e-06, "loss": 6.233326721191406, "step": 62870 }, { "epoch": 0.03275, "grad_norm": 5.95625638961792, "learning_rate": 1.8750505050505052e-06, "loss": 6.228143310546875, "step": 62875 }, { "epoch": 0.0328, "grad_norm": 4.806092739105225, "learning_rate": 1.8747979797979798e-06, "loss": 6.2368816375732425, "step": 62880 }, { "epoch": 0.03285, "grad_norm": 36.272335052490234, "learning_rate": 1.8745454545454546e-06, "loss": 6.414154052734375, "step": 62885 }, { "epoch": 0.0329, "grad_norm": 7.309157371520996, "learning_rate": 1.8742929292929293e-06, "loss": 6.314483642578125, "step": 62890 }, { "epoch": 0.03295, "grad_norm": 6.959654331207275, "learning_rate": 1.8740404040404043e-06, "loss": 6.2446044921875, "step": 62895 }, { "epoch": 0.033, "grad_norm": 4.979482173919678, "learning_rate": 1.8737878787878787e-06, "loss": 6.291908645629883, "step": 62900 }, { "epoch": 0.03305, "grad_norm": 3.694979667663574, "learning_rate": 1.8735353535353538e-06, "loss": 6.343397903442383, "step": 62905 }, { "epoch": 0.0331, "grad_norm": 4.921634674072266, "learning_rate": 1.8732828282828282e-06, "loss": 6.258416748046875, "step": 62910 }, { "epoch": 0.03315, "grad_norm": 6.81943416595459, "learning_rate": 1.8730303030303033e-06, "loss": 6.21172866821289, "step": 62915 }, { "epoch": 0.0332, "grad_norm": 4.89826774597168, "learning_rate": 1.8727777777777779e-06, "loss": 6.241236877441406, "step": 62920 }, { "epoch": 0.03325, "grad_norm": 7.4008378982543945, "learning_rate": 1.8725252525252527e-06, "loss": 6.217368316650391, "step": 62925 }, { "epoch": 0.0333, "grad_norm": 4.267000675201416, "learning_rate": 1.8722727272727274e-06, "loss": 6.297184753417969, "step": 62930 }, { "epoch": 0.03335, "grad_norm": 6.468404293060303, "learning_rate": 1.8720202020202022e-06, "loss": 6.276628494262695, "step": 62935 }, { "epoch": 0.0334, "grad_norm": 4.509921073913574, "learning_rate": 1.8717676767676768e-06, "loss": 6.271505355834961, "step": 62940 }, { "epoch": 0.03345, "grad_norm": 9.127167701721191, "learning_rate": 1.8715151515151517e-06, "loss": 6.1982170104980465, "step": 62945 }, { "epoch": 0.0335, "grad_norm": 6.066013336181641, "learning_rate": 1.8712626262626263e-06, "loss": 6.233052825927734, "step": 62950 }, { "epoch": 0.03355, "grad_norm": 10.72216796875, "learning_rate": 1.8710101010101012e-06, "loss": 6.21356086730957, "step": 62955 }, { "epoch": 0.0336, "grad_norm": 26.21099090576172, "learning_rate": 1.8707575757575758e-06, "loss": 6.305002593994141, "step": 62960 }, { "epoch": 0.03365, "grad_norm": 9.450539588928223, "learning_rate": 1.8705050505050506e-06, "loss": 6.240253829956055, "step": 62965 }, { "epoch": 0.0337, "grad_norm": 6.456004619598389, "learning_rate": 1.8702525252525255e-06, "loss": 6.281785583496093, "step": 62970 }, { "epoch": 0.03375, "grad_norm": 4.762877464294434, "learning_rate": 1.87e-06, "loss": 6.226884460449218, "step": 62975 }, { "epoch": 0.0338, "grad_norm": 5.89227819442749, "learning_rate": 1.869747474747475e-06, "loss": 6.247333526611328, "step": 62980 }, { "epoch": 0.03385, "grad_norm": 10.57930850982666, "learning_rate": 1.8694949494949496e-06, "loss": 6.3080322265625, "step": 62985 }, { "epoch": 0.0339, "grad_norm": 5.590902805328369, "learning_rate": 1.8692424242424246e-06, "loss": 6.243936157226562, "step": 62990 }, { "epoch": 0.03395, "grad_norm": 6.116273880004883, "learning_rate": 1.868989898989899e-06, "loss": 6.260150909423828, "step": 62995 }, { "epoch": 0.034, "grad_norm": 6.011617660522461, "learning_rate": 1.868737373737374e-06, "loss": 6.205790710449219, "step": 63000 }, { "epoch": 0.03405, "grad_norm": 4.232112407684326, "learning_rate": 1.8684848484848487e-06, "loss": 6.211810302734375, "step": 63005 }, { "epoch": 0.0341, "grad_norm": 6.675339221954346, "learning_rate": 1.8682323232323236e-06, "loss": 6.318603515625, "step": 63010 }, { "epoch": 0.03415, "grad_norm": 5.134880542755127, "learning_rate": 1.8679797979797982e-06, "loss": 6.264231109619141, "step": 63015 }, { "epoch": 0.0342, "grad_norm": 11.716522216796875, "learning_rate": 1.867727272727273e-06, "loss": 6.235802841186524, "step": 63020 }, { "epoch": 0.03425, "grad_norm": 6.141246318817139, "learning_rate": 1.8674747474747477e-06, "loss": 6.33147964477539, "step": 63025 }, { "epoch": 0.0343, "grad_norm": 5.0893168449401855, "learning_rate": 1.8672222222222225e-06, "loss": 6.264434814453125, "step": 63030 }, { "epoch": 0.03435, "grad_norm": 5.155243873596191, "learning_rate": 1.8669696969696971e-06, "loss": 6.258856201171875, "step": 63035 }, { "epoch": 0.0344, "grad_norm": 4.447329998016357, "learning_rate": 1.866717171717172e-06, "loss": 6.214199829101562, "step": 63040 }, { "epoch": 0.03445, "grad_norm": 8.268938064575195, "learning_rate": 1.8664646464646466e-06, "loss": 6.273750686645508, "step": 63045 }, { "epoch": 0.0345, "grad_norm": 8.447366714477539, "learning_rate": 1.8662121212121215e-06, "loss": 6.2320610046386715, "step": 63050 }, { "epoch": 0.03455, "grad_norm": 7.9442644119262695, "learning_rate": 1.865959595959596e-06, "loss": 6.224609375, "step": 63055 }, { "epoch": 0.0346, "grad_norm": 8.911898612976074, "learning_rate": 1.865707070707071e-06, "loss": 6.2461097717285154, "step": 63060 }, { "epoch": 0.03465, "grad_norm": 6.631239414215088, "learning_rate": 1.8654545454545456e-06, "loss": 6.247247695922852, "step": 63065 }, { "epoch": 0.0347, "grad_norm": 24.05006980895996, "learning_rate": 1.8652020202020204e-06, "loss": 6.347602462768554, "step": 63070 }, { "epoch": 0.03475, "grad_norm": 7.583700656890869, "learning_rate": 1.864949494949495e-06, "loss": 6.314117813110352, "step": 63075 }, { "epoch": 0.0348, "grad_norm": 9.594945907592773, "learning_rate": 1.8646969696969699e-06, "loss": 6.219723129272461, "step": 63080 }, { "epoch": 0.03485, "grad_norm": 7.3650312423706055, "learning_rate": 1.8644444444444445e-06, "loss": 6.254695892333984, "step": 63085 }, { "epoch": 0.0349, "grad_norm": 11.260332107543945, "learning_rate": 1.8641919191919193e-06, "loss": 6.249645614624024, "step": 63090 }, { "epoch": 0.03495, "grad_norm": 6.17494010925293, "learning_rate": 1.863939393939394e-06, "loss": 6.2059883117675785, "step": 63095 }, { "epoch": 0.035, "grad_norm": 13.92389965057373, "learning_rate": 1.863686868686869e-06, "loss": 6.422450256347656, "step": 63100 }, { "epoch": 0.03505, "grad_norm": 5.565371513366699, "learning_rate": 1.8634343434343434e-06, "loss": 6.235791778564453, "step": 63105 }, { "epoch": 0.0351, "grad_norm": 4.647841453552246, "learning_rate": 1.8631818181818185e-06, "loss": 6.238362884521484, "step": 63110 }, { "epoch": 0.03515, "grad_norm": 40.24412536621094, "learning_rate": 1.8629292929292931e-06, "loss": 6.193016815185547, "step": 63115 }, { "epoch": 0.0352, "grad_norm": 7.892206192016602, "learning_rate": 1.862676767676768e-06, "loss": 6.111701965332031, "step": 63120 }, { "epoch": 0.03525, "grad_norm": 4.01322078704834, "learning_rate": 1.8624242424242426e-06, "loss": 6.228893661499024, "step": 63125 }, { "epoch": 0.0353, "grad_norm": 5.386997699737549, "learning_rate": 1.8621717171717174e-06, "loss": 6.266762161254883, "step": 63130 }, { "epoch": 0.03535, "grad_norm": 5.763922691345215, "learning_rate": 1.861919191919192e-06, "loss": 6.29063606262207, "step": 63135 }, { "epoch": 0.0354, "grad_norm": 7.502102851867676, "learning_rate": 1.861666666666667e-06, "loss": 6.258273696899414, "step": 63140 }, { "epoch": 0.03545, "grad_norm": 6.434737205505371, "learning_rate": 1.8614141414141415e-06, "loss": 6.277191925048828, "step": 63145 }, { "epoch": 0.0355, "grad_norm": 11.850452423095703, "learning_rate": 1.8611616161616164e-06, "loss": 6.322178649902344, "step": 63150 }, { "epoch": 0.03555, "grad_norm": 5.10305118560791, "learning_rate": 1.860909090909091e-06, "loss": 6.259673309326172, "step": 63155 }, { "epoch": 0.0356, "grad_norm": 8.549970626831055, "learning_rate": 1.8606565656565659e-06, "loss": 6.19665412902832, "step": 63160 }, { "epoch": 0.03565, "grad_norm": 9.004919052124023, "learning_rate": 1.8604040404040405e-06, "loss": 6.2287639617919925, "step": 63165 }, { "epoch": 0.0357, "grad_norm": 4.158962726593018, "learning_rate": 1.8601515151515153e-06, "loss": 6.253522872924805, "step": 63170 }, { "epoch": 0.03575, "grad_norm": 27.275623321533203, "learning_rate": 1.85989898989899e-06, "loss": 6.195326232910157, "step": 63175 }, { "epoch": 0.0358, "grad_norm": 13.485187530517578, "learning_rate": 1.8596464646464648e-06, "loss": 5.685382461547851, "step": 63180 }, { "epoch": 0.03585, "grad_norm": 8.101243019104004, "learning_rate": 1.8593939393939394e-06, "loss": 6.2543083190917965, "step": 63185 }, { "epoch": 0.0359, "grad_norm": 8.586892127990723, "learning_rate": 1.8591414141414143e-06, "loss": 6.254184722900391, "step": 63190 }, { "epoch": 0.03595, "grad_norm": 25.462453842163086, "learning_rate": 1.858888888888889e-06, "loss": 6.292184066772461, "step": 63195 }, { "epoch": 0.036, "grad_norm": 4.922920227050781, "learning_rate": 1.8586363636363637e-06, "loss": 6.248078155517578, "step": 63200 }, { "epoch": 0.03605, "grad_norm": 5.647204875946045, "learning_rate": 1.8583838383838384e-06, "loss": 6.29119644165039, "step": 63205 }, { "epoch": 0.0361, "grad_norm": 6.2679123878479, "learning_rate": 1.8581313131313134e-06, "loss": 6.300991058349609, "step": 63210 }, { "epoch": 0.03615, "grad_norm": 8.124344825744629, "learning_rate": 1.8578787878787878e-06, "loss": 6.225589370727539, "step": 63215 }, { "epoch": 0.0362, "grad_norm": 6.75507116317749, "learning_rate": 1.857626262626263e-06, "loss": 6.252435302734375, "step": 63220 }, { "epoch": 0.03625, "grad_norm": 8.823911666870117, "learning_rate": 1.8573737373737375e-06, "loss": 6.25610122680664, "step": 63225 }, { "epoch": 0.0363, "grad_norm": 7.762730598449707, "learning_rate": 1.8571212121212124e-06, "loss": 6.233866119384766, "step": 63230 }, { "epoch": 0.03635, "grad_norm": 5.88719367980957, "learning_rate": 1.856868686868687e-06, "loss": 6.268344116210938, "step": 63235 }, { "epoch": 0.0364, "grad_norm": 6.02085018157959, "learning_rate": 1.8566161616161618e-06, "loss": 6.244473648071289, "step": 63240 }, { "epoch": 0.03645, "grad_norm": 4.281991004943848, "learning_rate": 1.8563636363636365e-06, "loss": 6.239959716796875, "step": 63245 }, { "epoch": 0.0365, "grad_norm": 7.133559226989746, "learning_rate": 1.8561111111111113e-06, "loss": 6.28869743347168, "step": 63250 }, { "epoch": 0.03655, "grad_norm": 4.812272548675537, "learning_rate": 1.855858585858586e-06, "loss": 6.302893829345703, "step": 63255 }, { "epoch": 0.0366, "grad_norm": 7.350673198699951, "learning_rate": 1.8556060606060608e-06, "loss": 6.230399322509766, "step": 63260 }, { "epoch": 0.03665, "grad_norm": 7.8250041007995605, "learning_rate": 1.8553535353535354e-06, "loss": 6.2784278869628904, "step": 63265 }, { "epoch": 0.0367, "grad_norm": 6.51397180557251, "learning_rate": 1.8551010101010103e-06, "loss": 6.272776794433594, "step": 63270 }, { "epoch": 0.03675, "grad_norm": 4.36152982711792, "learning_rate": 1.8548484848484849e-06, "loss": 6.262687683105469, "step": 63275 }, { "epoch": 0.0368, "grad_norm": 7.3797287940979, "learning_rate": 1.8545959595959597e-06, "loss": 6.252901458740235, "step": 63280 }, { "epoch": 0.03685, "grad_norm": 5.69044303894043, "learning_rate": 1.8543434343434344e-06, "loss": 6.213308334350586, "step": 63285 }, { "epoch": 0.0369, "grad_norm": 7.6013078689575195, "learning_rate": 1.8540909090909092e-06, "loss": 6.230357360839844, "step": 63290 }, { "epoch": 0.03695, "grad_norm": 4.344671249389648, "learning_rate": 1.8538383838383838e-06, "loss": 6.209222030639649, "step": 63295 }, { "epoch": 0.037, "grad_norm": 6.399798393249512, "learning_rate": 1.8535858585858587e-06, "loss": 6.282685852050781, "step": 63300 }, { "epoch": 0.03705, "grad_norm": 3.9360508918762207, "learning_rate": 1.8533333333333333e-06, "loss": 6.259840393066407, "step": 63305 }, { "epoch": 0.0371, "grad_norm": 4.420217037200928, "learning_rate": 1.8530808080808084e-06, "loss": 6.271533203125, "step": 63310 }, { "epoch": 0.03715, "grad_norm": 8.077632904052734, "learning_rate": 1.8528282828282828e-06, "loss": 6.265077209472656, "step": 63315 }, { "epoch": 0.0372, "grad_norm": 5.042235851287842, "learning_rate": 1.8525757575757578e-06, "loss": 6.266518783569336, "step": 63320 }, { "epoch": 0.03725, "grad_norm": 6.203110694885254, "learning_rate": 1.8523232323232325e-06, "loss": 6.2779090881347654, "step": 63325 }, { "epoch": 0.0373, "grad_norm": 5.600529670715332, "learning_rate": 1.8520707070707073e-06, "loss": 6.351722335815429, "step": 63330 }, { "epoch": 0.03735, "grad_norm": 9.142404556274414, "learning_rate": 1.851818181818182e-06, "loss": 6.210244750976562, "step": 63335 }, { "epoch": 0.0374, "grad_norm": 4.845542907714844, "learning_rate": 1.8515656565656568e-06, "loss": 6.304673767089843, "step": 63340 }, { "epoch": 0.03745, "grad_norm": 11.03978443145752, "learning_rate": 1.8513131313131314e-06, "loss": 6.2466999053955075, "step": 63345 }, { "epoch": 0.0375, "grad_norm": 5.555411338806152, "learning_rate": 1.8510606060606062e-06, "loss": 6.240317153930664, "step": 63350 }, { "epoch": 0.03755, "grad_norm": 10.264180183410645, "learning_rate": 1.8508080808080809e-06, "loss": 6.212852859497071, "step": 63355 }, { "epoch": 0.0376, "grad_norm": 4.319433212280273, "learning_rate": 1.8505555555555557e-06, "loss": 6.5056404113769535, "step": 63360 }, { "epoch": 0.03765, "grad_norm": 5.881531715393066, "learning_rate": 1.8503030303030303e-06, "loss": 6.594321441650391, "step": 63365 }, { "epoch": 0.0377, "grad_norm": 5.777106285095215, "learning_rate": 1.8500505050505052e-06, "loss": 6.17679443359375, "step": 63370 }, { "epoch": 0.03775, "grad_norm": 5.252884864807129, "learning_rate": 1.8497979797979798e-06, "loss": 6.2729438781738285, "step": 63375 }, { "epoch": 0.0378, "grad_norm": 23.662830352783203, "learning_rate": 1.8495454545454547e-06, "loss": 6.222146224975586, "step": 63380 }, { "epoch": 0.03785, "grad_norm": 5.053994655609131, "learning_rate": 1.8492929292929293e-06, "loss": 6.255959320068359, "step": 63385 }, { "epoch": 0.0379, "grad_norm": 31.04216766357422, "learning_rate": 1.8490404040404041e-06, "loss": 6.409464263916016, "step": 63390 }, { "epoch": 0.03795, "grad_norm": 12.459811210632324, "learning_rate": 1.848787878787879e-06, "loss": 6.46009521484375, "step": 63395 }, { "epoch": 0.038, "grad_norm": 9.44340705871582, "learning_rate": 1.8485353535353536e-06, "loss": 6.285673904418945, "step": 63400 }, { "epoch": 0.03805, "grad_norm": 5.854964256286621, "learning_rate": 1.8482828282828287e-06, "loss": 6.196659088134766, "step": 63405 }, { "epoch": 0.0381, "grad_norm": 9.129405975341797, "learning_rate": 1.848030303030303e-06, "loss": 6.244552993774414, "step": 63410 }, { "epoch": 0.03815, "grad_norm": 5.18573522567749, "learning_rate": 1.8477777777777781e-06, "loss": 6.275113677978515, "step": 63415 }, { "epoch": 0.0382, "grad_norm": 7.211594581604004, "learning_rate": 1.8475252525252528e-06, "loss": 6.051445388793946, "step": 63420 }, { "epoch": 0.03825, "grad_norm": 5.058740615844727, "learning_rate": 1.8472727272727276e-06, "loss": 6.26842155456543, "step": 63425 }, { "epoch": 0.0383, "grad_norm": 4.605823516845703, "learning_rate": 1.8470202020202022e-06, "loss": 6.278911972045899, "step": 63430 }, { "epoch": 0.03835, "grad_norm": 19.718828201293945, "learning_rate": 1.846767676767677e-06, "loss": 6.151982116699219, "step": 63435 }, { "epoch": 0.0384, "grad_norm": 5.91071081161499, "learning_rate": 1.8465151515151517e-06, "loss": 6.257181930541992, "step": 63440 }, { "epoch": 0.03845, "grad_norm": 5.6441144943237305, "learning_rate": 1.8462626262626265e-06, "loss": 6.193684387207031, "step": 63445 }, { "epoch": 0.0385, "grad_norm": 5.396775722503662, "learning_rate": 1.8460101010101012e-06, "loss": 6.242092514038086, "step": 63450 }, { "epoch": 0.03855, "grad_norm": 3.399127960205078, "learning_rate": 1.845757575757576e-06, "loss": 6.200938034057617, "step": 63455 }, { "epoch": 0.0386, "grad_norm": 6.382723331451416, "learning_rate": 1.8455050505050507e-06, "loss": 6.2448982238769535, "step": 63460 }, { "epoch": 0.03865, "grad_norm": 5.355175495147705, "learning_rate": 1.8452525252525255e-06, "loss": 6.423387908935547, "step": 63465 }, { "epoch": 0.0387, "grad_norm": 5.250028133392334, "learning_rate": 1.8450000000000001e-06, "loss": 6.259105682373047, "step": 63470 }, { "epoch": 0.03875, "grad_norm": 3.5137619972229004, "learning_rate": 1.844747474747475e-06, "loss": 6.21685791015625, "step": 63475 }, { "epoch": 0.0388, "grad_norm": 4.162325859069824, "learning_rate": 1.8444949494949496e-06, "loss": 6.228852081298828, "step": 63480 }, { "epoch": 0.03885, "grad_norm": 5.770090103149414, "learning_rate": 1.8442424242424244e-06, "loss": 6.247181701660156, "step": 63485 }, { "epoch": 0.0389, "grad_norm": 6.208181381225586, "learning_rate": 1.843989898989899e-06, "loss": 6.234351348876953, "step": 63490 }, { "epoch": 0.03895, "grad_norm": 9.40956974029541, "learning_rate": 1.843737373737374e-06, "loss": 6.295174789428711, "step": 63495 }, { "epoch": 0.039, "grad_norm": 10.740508079528809, "learning_rate": 1.8434848484848485e-06, "loss": 6.186486434936524, "step": 63500 }, { "epoch": 0.03905, "grad_norm": 6.136538028717041, "learning_rate": 1.8432323232323234e-06, "loss": 6.228067779541016, "step": 63505 }, { "epoch": 0.0391, "grad_norm": 4.480775833129883, "learning_rate": 1.842979797979798e-06, "loss": 6.21702880859375, "step": 63510 }, { "epoch": 0.03915, "grad_norm": 5.7619099617004395, "learning_rate": 1.842727272727273e-06, "loss": 6.257749938964844, "step": 63515 }, { "epoch": 0.0392, "grad_norm": 7.975964069366455, "learning_rate": 1.8424747474747475e-06, "loss": 6.258835983276367, "step": 63520 }, { "epoch": 0.03925, "grad_norm": 6.819736003875732, "learning_rate": 1.8422222222222225e-06, "loss": 6.268622207641601, "step": 63525 }, { "epoch": 0.0393, "grad_norm": 4.34967565536499, "learning_rate": 1.8419696969696972e-06, "loss": 6.2624870300292965, "step": 63530 }, { "epoch": 0.03935, "grad_norm": 4.606088161468506, "learning_rate": 1.841717171717172e-06, "loss": 6.219048309326172, "step": 63535 }, { "epoch": 0.0394, "grad_norm": 4.4782023429870605, "learning_rate": 1.8414646464646466e-06, "loss": 6.266913223266601, "step": 63540 }, { "epoch": 0.03945, "grad_norm": 5.709232807159424, "learning_rate": 1.8412121212121215e-06, "loss": 6.317603302001953, "step": 63545 }, { "epoch": 0.0395, "grad_norm": 7.272075176239014, "learning_rate": 1.8409595959595961e-06, "loss": 6.274281311035156, "step": 63550 }, { "epoch": 0.03955, "grad_norm": 5.234487533569336, "learning_rate": 1.840707070707071e-06, "loss": 6.243783569335937, "step": 63555 }, { "epoch": 0.0396, "grad_norm": 10.803768157958984, "learning_rate": 1.8404545454545456e-06, "loss": 6.030181121826172, "step": 63560 }, { "epoch": 0.03965, "grad_norm": 8.406307220458984, "learning_rate": 1.8402020202020204e-06, "loss": 6.218347549438477, "step": 63565 }, { "epoch": 0.0397, "grad_norm": 4.652546405792236, "learning_rate": 1.839949494949495e-06, "loss": 6.393054962158203, "step": 63570 }, { "epoch": 0.03975, "grad_norm": 6.827479839324951, "learning_rate": 1.83969696969697e-06, "loss": 6.270752716064453, "step": 63575 }, { "epoch": 0.0398, "grad_norm": 3.8859434127807617, "learning_rate": 1.8394444444444445e-06, "loss": 6.286027526855468, "step": 63580 }, { "epoch": 0.03985, "grad_norm": 6.249242305755615, "learning_rate": 1.8391919191919194e-06, "loss": 6.272420883178711, "step": 63585 }, { "epoch": 0.0399, "grad_norm": 12.077126502990723, "learning_rate": 1.838939393939394e-06, "loss": 6.253696441650391, "step": 63590 }, { "epoch": 0.03995, "grad_norm": 4.366604328155518, "learning_rate": 1.8386868686868688e-06, "loss": 6.273829650878906, "step": 63595 }, { "epoch": 0.04, "grad_norm": 9.498092651367188, "learning_rate": 1.8384343434343435e-06, "loss": 6.299504089355469, "step": 63600 }, { "epoch": 0.04005, "grad_norm": 5.877297878265381, "learning_rate": 1.8381818181818183e-06, "loss": 6.194567108154297, "step": 63605 }, { "epoch": 0.0401, "grad_norm": 5.387487411499023, "learning_rate": 1.837929292929293e-06, "loss": 6.269086074829102, "step": 63610 }, { "epoch": 0.04015, "grad_norm": 6.667598724365234, "learning_rate": 1.837676767676768e-06, "loss": 6.226121520996093, "step": 63615 }, { "epoch": 0.0402, "grad_norm": 6.030226707458496, "learning_rate": 1.8374242424242424e-06, "loss": 6.254964447021484, "step": 63620 }, { "epoch": 0.04025, "grad_norm": 8.503873825073242, "learning_rate": 1.8371717171717175e-06, "loss": 6.4447174072265625, "step": 63625 }, { "epoch": 0.0403, "grad_norm": 8.396950721740723, "learning_rate": 1.8369191919191919e-06, "loss": 6.23528938293457, "step": 63630 }, { "epoch": 0.04035, "grad_norm": 5.751489639282227, "learning_rate": 1.836666666666667e-06, "loss": 6.297233581542969, "step": 63635 }, { "epoch": 0.0404, "grad_norm": 11.038162231445312, "learning_rate": 1.8364141414141416e-06, "loss": 6.261130905151367, "step": 63640 }, { "epoch": 0.04045, "grad_norm": 5.666700839996338, "learning_rate": 1.8361616161616164e-06, "loss": 6.242860794067383, "step": 63645 }, { "epoch": 0.0405, "grad_norm": 5.689192771911621, "learning_rate": 1.835909090909091e-06, "loss": 6.276399230957031, "step": 63650 }, { "epoch": 0.04055, "grad_norm": 8.541776657104492, "learning_rate": 1.8356565656565659e-06, "loss": 6.247968292236328, "step": 63655 }, { "epoch": 0.0406, "grad_norm": 6.289704322814941, "learning_rate": 1.8354040404040405e-06, "loss": 6.264659118652344, "step": 63660 }, { "epoch": 0.04065, "grad_norm": 7.209199905395508, "learning_rate": 1.8351515151515154e-06, "loss": 6.267370223999023, "step": 63665 }, { "epoch": 0.0407, "grad_norm": 6.7269287109375, "learning_rate": 1.83489898989899e-06, "loss": 6.357455825805664, "step": 63670 }, { "epoch": 0.04075, "grad_norm": 7.6590399742126465, "learning_rate": 1.8346464646464648e-06, "loss": 6.242164993286133, "step": 63675 }, { "epoch": 0.0408, "grad_norm": 9.692299842834473, "learning_rate": 1.8343939393939395e-06, "loss": 6.131193923950195, "step": 63680 }, { "epoch": 0.04085, "grad_norm": 6.345726490020752, "learning_rate": 1.8341414141414143e-06, "loss": 6.274262237548828, "step": 63685 }, { "epoch": 0.0409, "grad_norm": 45.96640396118164, "learning_rate": 1.833888888888889e-06, "loss": 6.12349853515625, "step": 63690 }, { "epoch": 0.04095, "grad_norm": 6.09083890914917, "learning_rate": 1.8336363636363638e-06, "loss": 6.1735984802246096, "step": 63695 }, { "epoch": 0.041, "grad_norm": 5.969856262207031, "learning_rate": 1.8333838383838384e-06, "loss": 6.227943801879883, "step": 63700 }, { "epoch": 0.04105, "grad_norm": 4.612977981567383, "learning_rate": 1.8331313131313132e-06, "loss": 6.214680099487305, "step": 63705 }, { "epoch": 0.0411, "grad_norm": 8.186897277832031, "learning_rate": 1.8328787878787879e-06, "loss": 6.261192321777344, "step": 63710 }, { "epoch": 0.04115, "grad_norm": 37.41847229003906, "learning_rate": 1.8326262626262627e-06, "loss": 6.22698860168457, "step": 63715 }, { "epoch": 0.0412, "grad_norm": 6.990822792053223, "learning_rate": 1.8323737373737373e-06, "loss": 6.255609130859375, "step": 63720 }, { "epoch": 0.04125, "grad_norm": 5.018374443054199, "learning_rate": 1.8321212121212124e-06, "loss": 6.270475006103515, "step": 63725 }, { "epoch": 0.0413, "grad_norm": 19.334266662597656, "learning_rate": 1.8318686868686868e-06, "loss": 6.246289443969727, "step": 63730 }, { "epoch": 0.04135, "grad_norm": 4.252907752990723, "learning_rate": 1.8316161616161619e-06, "loss": 6.2620796203613285, "step": 63735 }, { "epoch": 0.0414, "grad_norm": 6.304240703582764, "learning_rate": 1.8313636363636365e-06, "loss": 6.291230773925781, "step": 63740 }, { "epoch": 0.04145, "grad_norm": 9.268484115600586, "learning_rate": 1.8311111111111113e-06, "loss": 6.248701095581055, "step": 63745 }, { "epoch": 0.0415, "grad_norm": 10.427882194519043, "learning_rate": 1.830858585858586e-06, "loss": 6.4011474609375, "step": 63750 }, { "epoch": 0.04155, "grad_norm": 6.6828155517578125, "learning_rate": 1.8306060606060608e-06, "loss": 6.2243804931640625, "step": 63755 }, { "epoch": 0.0416, "grad_norm": 5.97808313369751, "learning_rate": 1.8303535353535354e-06, "loss": 6.254096221923828, "step": 63760 }, { "epoch": 0.04165, "grad_norm": 5.977655410766602, "learning_rate": 1.8301010101010103e-06, "loss": 6.243564605712891, "step": 63765 }, { "epoch": 0.0417, "grad_norm": 5.0958356857299805, "learning_rate": 1.829848484848485e-06, "loss": 6.189931106567383, "step": 63770 }, { "epoch": 0.04175, "grad_norm": 4.71998929977417, "learning_rate": 1.8295959595959598e-06, "loss": 6.277275085449219, "step": 63775 }, { "epoch": 0.0418, "grad_norm": 6.286371231079102, "learning_rate": 1.8293434343434344e-06, "loss": 6.258329772949219, "step": 63780 }, { "epoch": 0.04185, "grad_norm": 4.9238457679748535, "learning_rate": 1.8290909090909092e-06, "loss": 6.273722839355469, "step": 63785 }, { "epoch": 0.0419, "grad_norm": 5.280450344085693, "learning_rate": 1.8288383838383839e-06, "loss": 6.239279937744141, "step": 63790 }, { "epoch": 0.04195, "grad_norm": 7.784715175628662, "learning_rate": 1.8285858585858587e-06, "loss": 6.231131744384766, "step": 63795 }, { "epoch": 0.042, "grad_norm": 3.2942988872528076, "learning_rate": 1.8283333333333333e-06, "loss": 6.2681537628173825, "step": 63800 }, { "epoch": 0.04205, "grad_norm": 3.945326805114746, "learning_rate": 1.8280808080808082e-06, "loss": 6.232179260253906, "step": 63805 }, { "epoch": 0.0421, "grad_norm": 8.162704467773438, "learning_rate": 1.8278282828282828e-06, "loss": 6.2393440246582035, "step": 63810 }, { "epoch": 0.04215, "grad_norm": 26.146692276000977, "learning_rate": 1.8275757575757576e-06, "loss": 6.506771850585937, "step": 63815 }, { "epoch": 0.0422, "grad_norm": 4.920234680175781, "learning_rate": 1.8273232323232327e-06, "loss": 6.247260284423828, "step": 63820 }, { "epoch": 0.04225, "grad_norm": 5.24242639541626, "learning_rate": 1.8270707070707071e-06, "loss": 6.265586471557617, "step": 63825 }, { "epoch": 0.0423, "grad_norm": 5.7620368003845215, "learning_rate": 1.8268181818181822e-06, "loss": 6.527190399169922, "step": 63830 }, { "epoch": 0.04235, "grad_norm": 9.031364440917969, "learning_rate": 1.8265656565656568e-06, "loss": 6.3147529602050785, "step": 63835 }, { "epoch": 0.0424, "grad_norm": 12.51961612701416, "learning_rate": 1.8263131313131316e-06, "loss": 6.3748432159423825, "step": 63840 }, { "epoch": 0.04245, "grad_norm": 7.7886834144592285, "learning_rate": 1.8260606060606063e-06, "loss": 6.502082061767578, "step": 63845 }, { "epoch": 0.0425, "grad_norm": 5.371231555938721, "learning_rate": 1.8258080808080811e-06, "loss": 6.253247833251953, "step": 63850 }, { "epoch": 0.04255, "grad_norm": 11.773390769958496, "learning_rate": 1.8255555555555557e-06, "loss": 6.3622886657714846, "step": 63855 }, { "epoch": 0.0426, "grad_norm": 6.088590145111084, "learning_rate": 1.8253030303030306e-06, "loss": 6.231707763671875, "step": 63860 }, { "epoch": 0.04265, "grad_norm": 14.321440696716309, "learning_rate": 1.8250505050505052e-06, "loss": 6.861228179931641, "step": 63865 }, { "epoch": 0.0427, "grad_norm": 8.052658081054688, "learning_rate": 1.82479797979798e-06, "loss": 6.253096008300782, "step": 63870 }, { "epoch": 0.04275, "grad_norm": 4.610217094421387, "learning_rate": 1.8245454545454547e-06, "loss": 6.228322982788086, "step": 63875 }, { "epoch": 0.0428, "grad_norm": 7.374529838562012, "learning_rate": 1.8242929292929295e-06, "loss": 6.220591354370117, "step": 63880 }, { "epoch": 0.04285, "grad_norm": 7.82834529876709, "learning_rate": 1.8240404040404042e-06, "loss": 6.252062606811523, "step": 63885 }, { "epoch": 0.0429, "grad_norm": 8.5219087600708, "learning_rate": 1.823787878787879e-06, "loss": 6.490788269042969, "step": 63890 }, { "epoch": 0.04295, "grad_norm": 6.682433128356934, "learning_rate": 1.8235353535353536e-06, "loss": 6.367348098754883, "step": 63895 }, { "epoch": 0.043, "grad_norm": 3.6663296222686768, "learning_rate": 1.8232828282828285e-06, "loss": 6.2537586212158205, "step": 63900 }, { "epoch": 0.04305, "grad_norm": 7.389737129211426, "learning_rate": 1.8230303030303031e-06, "loss": 6.261312484741211, "step": 63905 }, { "epoch": 0.0431, "grad_norm": 9.73507022857666, "learning_rate": 1.822777777777778e-06, "loss": 6.240570831298828, "step": 63910 }, { "epoch": 0.04315, "grad_norm": 6.065658092498779, "learning_rate": 1.8225252525252526e-06, "loss": 6.279478454589844, "step": 63915 }, { "epoch": 0.0432, "grad_norm": 6.460761547088623, "learning_rate": 1.8222727272727274e-06, "loss": 6.275661849975586, "step": 63920 }, { "epoch": 0.04325, "grad_norm": 6.009379863739014, "learning_rate": 1.822020202020202e-06, "loss": 6.231656646728515, "step": 63925 }, { "epoch": 0.0433, "grad_norm": 5.845779895782471, "learning_rate": 1.821767676767677e-06, "loss": 6.246223449707031, "step": 63930 }, { "epoch": 0.04335, "grad_norm": 15.77898120880127, "learning_rate": 1.8215151515151515e-06, "loss": 6.459068298339844, "step": 63935 }, { "epoch": 0.0434, "grad_norm": 4.352250576019287, "learning_rate": 1.8212626262626266e-06, "loss": 6.305323791503906, "step": 63940 }, { "epoch": 0.04345, "grad_norm": 3.0947747230529785, "learning_rate": 1.8210101010101012e-06, "loss": 6.228583145141601, "step": 63945 }, { "epoch": 0.0435, "grad_norm": 21.288347244262695, "learning_rate": 1.820757575757576e-06, "loss": 6.331330490112305, "step": 63950 }, { "epoch": 0.04355, "grad_norm": 5.53897762298584, "learning_rate": 1.8205050505050507e-06, "loss": 6.266426467895508, "step": 63955 }, { "epoch": 0.0436, "grad_norm": 6.8038554191589355, "learning_rate": 1.8202525252525255e-06, "loss": 6.313303375244141, "step": 63960 }, { "epoch": 0.04365, "grad_norm": 5.390533447265625, "learning_rate": 1.8200000000000002e-06, "loss": 6.179523468017578, "step": 63965 }, { "epoch": 0.0437, "grad_norm": 5.600951194763184, "learning_rate": 1.819747474747475e-06, "loss": 6.240968322753906, "step": 63970 }, { "epoch": 0.04375, "grad_norm": 6.536344528198242, "learning_rate": 1.8194949494949496e-06, "loss": 6.294186401367187, "step": 63975 }, { "epoch": 0.0438, "grad_norm": 11.76267147064209, "learning_rate": 1.8192424242424245e-06, "loss": 6.206294250488281, "step": 63980 }, { "epoch": 0.04385, "grad_norm": 7.255908012390137, "learning_rate": 1.818989898989899e-06, "loss": 6.292045974731446, "step": 63985 }, { "epoch": 0.0439, "grad_norm": 4.3932085037231445, "learning_rate": 1.818737373737374e-06, "loss": 6.284463500976562, "step": 63990 }, { "epoch": 0.04395, "grad_norm": 6.843501567840576, "learning_rate": 1.8184848484848486e-06, "loss": 6.203104400634766, "step": 63995 }, { "epoch": 0.044, "grad_norm": 9.345640182495117, "learning_rate": 1.8182323232323234e-06, "loss": 6.195814514160157, "step": 64000 }, { "epoch": 0.04405, "grad_norm": 9.997307777404785, "learning_rate": 1.817979797979798e-06, "loss": 6.254281997680664, "step": 64005 }, { "epoch": 0.0441, "grad_norm": 5.298280715942383, "learning_rate": 1.8177272727272729e-06, "loss": 6.314870834350586, "step": 64010 }, { "epoch": 0.04415, "grad_norm": 13.183037757873535, "learning_rate": 1.8174747474747475e-06, "loss": 6.277528381347656, "step": 64015 }, { "epoch": 0.0442, "grad_norm": 10.565498352050781, "learning_rate": 1.8172222222222224e-06, "loss": 6.254827880859375, "step": 64020 }, { "epoch": 0.04425, "grad_norm": 5.355894088745117, "learning_rate": 1.816969696969697e-06, "loss": 6.264015960693359, "step": 64025 }, { "epoch": 0.0443, "grad_norm": 8.776520729064941, "learning_rate": 1.816717171717172e-06, "loss": 6.2912445068359375, "step": 64030 }, { "epoch": 0.04435, "grad_norm": 6.43766450881958, "learning_rate": 1.8164646464646465e-06, "loss": 6.267041015625, "step": 64035 }, { "epoch": 0.0444, "grad_norm": 6.233709812164307, "learning_rate": 1.8162121212121215e-06, "loss": 6.387623596191406, "step": 64040 }, { "epoch": 0.04445, "grad_norm": 9.299727439880371, "learning_rate": 1.8159595959595961e-06, "loss": 6.2385498046875, "step": 64045 }, { "epoch": 0.0445, "grad_norm": 8.3668794631958, "learning_rate": 1.815707070707071e-06, "loss": 6.234900665283203, "step": 64050 }, { "epoch": 0.04455, "grad_norm": 6.243688106536865, "learning_rate": 1.8154545454545456e-06, "loss": 6.203908157348633, "step": 64055 }, { "epoch": 0.0446, "grad_norm": 7.142633438110352, "learning_rate": 1.8152020202020205e-06, "loss": 6.294619750976563, "step": 64060 }, { "epoch": 0.04465, "grad_norm": 6.378704071044922, "learning_rate": 1.814949494949495e-06, "loss": 6.212623214721679, "step": 64065 }, { "epoch": 0.0447, "grad_norm": 14.410196304321289, "learning_rate": 1.81469696969697e-06, "loss": 6.2584785461425785, "step": 64070 }, { "epoch": 0.04475, "grad_norm": 4.522699356079102, "learning_rate": 1.8144444444444446e-06, "loss": 6.2280632019042965, "step": 64075 }, { "epoch": 0.0448, "grad_norm": 4.692818641662598, "learning_rate": 1.8141919191919194e-06, "loss": 6.268125915527344, "step": 64080 }, { "epoch": 0.04485, "grad_norm": 8.945298194885254, "learning_rate": 1.813939393939394e-06, "loss": 6.305958557128906, "step": 64085 }, { "epoch": 0.0449, "grad_norm": 5.300865650177002, "learning_rate": 1.8136868686868689e-06, "loss": 6.22606201171875, "step": 64090 }, { "epoch": 0.04495, "grad_norm": 6.501276969909668, "learning_rate": 1.8134343434343435e-06, "loss": 6.244099426269531, "step": 64095 }, { "epoch": 0.045, "grad_norm": 7.3417534828186035, "learning_rate": 1.8131818181818183e-06, "loss": 6.259761810302734, "step": 64100 }, { "epoch": 0.04505, "grad_norm": 4.4701642990112305, "learning_rate": 1.812929292929293e-06, "loss": 6.4082481384277346, "step": 64105 }, { "epoch": 0.0451, "grad_norm": 6.463104724884033, "learning_rate": 1.8126767676767678e-06, "loss": 6.355242919921875, "step": 64110 }, { "epoch": 0.04515, "grad_norm": 3.614912986755371, "learning_rate": 1.8124242424242424e-06, "loss": 6.241604614257812, "step": 64115 }, { "epoch": 0.0452, "grad_norm": 5.286493301391602, "learning_rate": 1.8121717171717173e-06, "loss": 6.243497467041015, "step": 64120 }, { "epoch": 0.04525, "grad_norm": 5.6884589195251465, "learning_rate": 1.811919191919192e-06, "loss": 6.219822692871094, "step": 64125 }, { "epoch": 0.0453, "grad_norm": 6.172665596008301, "learning_rate": 1.8116666666666668e-06, "loss": 6.242216873168945, "step": 64130 }, { "epoch": 0.04535, "grad_norm": 5.135211944580078, "learning_rate": 1.8114141414141414e-06, "loss": 6.2486015319824215, "step": 64135 }, { "epoch": 0.0454, "grad_norm": 8.020879745483398, "learning_rate": 1.8111616161616164e-06, "loss": 6.220149230957031, "step": 64140 }, { "epoch": 0.04545, "grad_norm": 3.277358055114746, "learning_rate": 1.8109090909090909e-06, "loss": 6.2524677276611325, "step": 64145 }, { "epoch": 0.0455, "grad_norm": 5.319203853607178, "learning_rate": 1.810656565656566e-06, "loss": 6.243610382080078, "step": 64150 }, { "epoch": 0.04555, "grad_norm": 9.518383026123047, "learning_rate": 1.8104040404040405e-06, "loss": 6.291757202148437, "step": 64155 }, { "epoch": 0.0456, "grad_norm": 3.94065260887146, "learning_rate": 1.8101515151515154e-06, "loss": 6.229730224609375, "step": 64160 }, { "epoch": 0.04565, "grad_norm": 6.425050258636475, "learning_rate": 1.80989898989899e-06, "loss": 6.257107925415039, "step": 64165 }, { "epoch": 0.0457, "grad_norm": 4.495912551879883, "learning_rate": 1.8096464646464649e-06, "loss": 6.226162719726562, "step": 64170 }, { "epoch": 0.04575, "grad_norm": 7.890367031097412, "learning_rate": 1.8093939393939395e-06, "loss": 6.243947219848633, "step": 64175 }, { "epoch": 0.0458, "grad_norm": 5.029972553253174, "learning_rate": 1.8091414141414143e-06, "loss": 6.24165153503418, "step": 64180 }, { "epoch": 0.04585, "grad_norm": 9.792916297912598, "learning_rate": 1.808888888888889e-06, "loss": 6.271401214599609, "step": 64185 }, { "epoch": 0.0459, "grad_norm": 5.123444080352783, "learning_rate": 1.8086363636363638e-06, "loss": 6.258549499511719, "step": 64190 }, { "epoch": 0.04595, "grad_norm": 7.586676597595215, "learning_rate": 1.8083838383838384e-06, "loss": 6.20000114440918, "step": 64195 }, { "epoch": 0.046, "grad_norm": 6.342522144317627, "learning_rate": 1.8081313131313133e-06, "loss": 6.233256530761719, "step": 64200 }, { "epoch": 0.04605, "grad_norm": 9.468435287475586, "learning_rate": 1.807878787878788e-06, "loss": 6.2350818634033205, "step": 64205 }, { "epoch": 0.0461, "grad_norm": 5.433266639709473, "learning_rate": 1.8076262626262627e-06, "loss": 6.233349609375, "step": 64210 }, { "epoch": 0.04615, "grad_norm": 10.761374473571777, "learning_rate": 1.8073737373737374e-06, "loss": 6.2744590759277346, "step": 64215 }, { "epoch": 0.0462, "grad_norm": 6.138106822967529, "learning_rate": 1.8071212121212122e-06, "loss": 6.197789001464844, "step": 64220 }, { "epoch": 0.04625, "grad_norm": 5.6526312828063965, "learning_rate": 1.8068686868686868e-06, "loss": 6.2651512145996096, "step": 64225 }, { "epoch": 0.0463, "grad_norm": 5.2215471267700195, "learning_rate": 1.8066161616161617e-06, "loss": 6.234880065917968, "step": 64230 }, { "epoch": 0.04635, "grad_norm": 7.518875598907471, "learning_rate": 1.8063636363636363e-06, "loss": 6.230785369873047, "step": 64235 }, { "epoch": 0.0464, "grad_norm": 5.43435525894165, "learning_rate": 1.8061111111111112e-06, "loss": 6.298881149291992, "step": 64240 }, { "epoch": 0.04645, "grad_norm": 26.36265754699707, "learning_rate": 1.8058585858585862e-06, "loss": 6.271054458618164, "step": 64245 }, { "epoch": 0.0465, "grad_norm": 4.390528202056885, "learning_rate": 1.8056060606060608e-06, "loss": 6.243636703491211, "step": 64250 }, { "epoch": 0.04655, "grad_norm": 8.808510780334473, "learning_rate": 1.8053535353535357e-06, "loss": 6.184375762939453, "step": 64255 }, { "epoch": 0.0466, "grad_norm": 6.59709358215332, "learning_rate": 1.8051010101010103e-06, "loss": 6.256897354125977, "step": 64260 }, { "epoch": 0.04665, "grad_norm": 4.84901762008667, "learning_rate": 1.8048484848484852e-06, "loss": 6.238007736206055, "step": 64265 }, { "epoch": 0.0467, "grad_norm": 4.723090171813965, "learning_rate": 1.8045959595959598e-06, "loss": 6.255257797241211, "step": 64270 }, { "epoch": 0.04675, "grad_norm": 4.052208423614502, "learning_rate": 1.8043434343434346e-06, "loss": 6.268461227416992, "step": 64275 }, { "epoch": 0.0468, "grad_norm": 9.277946472167969, "learning_rate": 1.8040909090909093e-06, "loss": 6.264738845825195, "step": 64280 }, { "epoch": 0.04685, "grad_norm": 6.915939807891846, "learning_rate": 1.803838383838384e-06, "loss": 6.256756591796875, "step": 64285 }, { "epoch": 0.0469, "grad_norm": 7.7435712814331055, "learning_rate": 1.8035858585858587e-06, "loss": 6.212151718139649, "step": 64290 }, { "epoch": 0.04695, "grad_norm": 5.358702182769775, "learning_rate": 1.8033333333333336e-06, "loss": 6.360283660888672, "step": 64295 }, { "epoch": 0.047, "grad_norm": 8.968027114868164, "learning_rate": 1.8030808080808082e-06, "loss": 6.258599853515625, "step": 64300 }, { "epoch": 0.04705, "grad_norm": 7.886824607849121, "learning_rate": 1.802828282828283e-06, "loss": 6.249900817871094, "step": 64305 }, { "epoch": 0.0471, "grad_norm": 7.484749794006348, "learning_rate": 1.8025757575757577e-06, "loss": 6.251715469360351, "step": 64310 }, { "epoch": 0.04715, "grad_norm": 7.379101276397705, "learning_rate": 1.8023232323232325e-06, "loss": 6.336040496826172, "step": 64315 }, { "epoch": 0.0472, "grad_norm": 13.912994384765625, "learning_rate": 1.8020707070707071e-06, "loss": 6.260861587524414, "step": 64320 }, { "epoch": 0.04725, "grad_norm": 6.15310001373291, "learning_rate": 1.801818181818182e-06, "loss": 6.194292831420898, "step": 64325 }, { "epoch": 0.0473, "grad_norm": 18.302181243896484, "learning_rate": 1.8015656565656566e-06, "loss": 6.4917457580566404, "step": 64330 }, { "epoch": 0.04735, "grad_norm": 12.668583869934082, "learning_rate": 1.8013131313131317e-06, "loss": 6.7390495300292965, "step": 64335 }, { "epoch": 0.0474, "grad_norm": 7.250110626220703, "learning_rate": 1.801060606060606e-06, "loss": 6.350239181518555, "step": 64340 }, { "epoch": 0.04745, "grad_norm": 7.809356212615967, "learning_rate": 1.8008080808080811e-06, "loss": 6.247915649414063, "step": 64345 }, { "epoch": 0.0475, "grad_norm": 5.9174933433532715, "learning_rate": 1.8005555555555556e-06, "loss": 6.234493255615234, "step": 64350 }, { "epoch": 0.04755, "grad_norm": 4.951122760772705, "learning_rate": 1.8003030303030306e-06, "loss": 6.236125564575195, "step": 64355 }, { "epoch": 0.0476, "grad_norm": 7.015044212341309, "learning_rate": 1.8000505050505052e-06, "loss": 6.2691089630126955, "step": 64360 }, { "epoch": 0.04765, "grad_norm": 13.557948112487793, "learning_rate": 1.79979797979798e-06, "loss": 6.274773406982422, "step": 64365 }, { "epoch": 0.0477, "grad_norm": 7.5015411376953125, "learning_rate": 1.7995454545454547e-06, "loss": 6.234795379638672, "step": 64370 }, { "epoch": 0.04775, "grad_norm": 6.1997456550598145, "learning_rate": 1.7992929292929296e-06, "loss": 6.273141860961914, "step": 64375 }, { "epoch": 0.0478, "grad_norm": 7.205848693847656, "learning_rate": 1.7990404040404042e-06, "loss": 6.2300464630126955, "step": 64380 }, { "epoch": 0.04785, "grad_norm": 9.958065032958984, "learning_rate": 1.798787878787879e-06, "loss": 6.235015487670898, "step": 64385 }, { "epoch": 0.0479, "grad_norm": 6.007519721984863, "learning_rate": 1.7985353535353537e-06, "loss": 6.258592987060547, "step": 64390 }, { "epoch": 0.04795, "grad_norm": 10.017556190490723, "learning_rate": 1.7982828282828285e-06, "loss": 6.286162185668945, "step": 64395 }, { "epoch": 0.048, "grad_norm": 11.313971519470215, "learning_rate": 1.7980303030303031e-06, "loss": 6.287919235229492, "step": 64400 }, { "epoch": 0.04805, "grad_norm": 8.539130210876465, "learning_rate": 1.797777777777778e-06, "loss": 6.370859909057617, "step": 64405 }, { "epoch": 0.0481, "grad_norm": 8.193318367004395, "learning_rate": 1.7975252525252526e-06, "loss": 6.271310806274414, "step": 64410 }, { "epoch": 0.04815, "grad_norm": 9.825521469116211, "learning_rate": 1.7972727272727274e-06, "loss": 6.2269134521484375, "step": 64415 }, { "epoch": 0.0482, "grad_norm": 4.840420722961426, "learning_rate": 1.797020202020202e-06, "loss": 6.299693298339844, "step": 64420 }, { "epoch": 0.04825, "grad_norm": 5.788722991943359, "learning_rate": 1.796767676767677e-06, "loss": 6.251481628417968, "step": 64425 }, { "epoch": 0.0483, "grad_norm": 5.572360992431641, "learning_rate": 1.7965151515151516e-06, "loss": 6.306673812866211, "step": 64430 }, { "epoch": 0.04835, "grad_norm": 8.961752891540527, "learning_rate": 1.7962626262626264e-06, "loss": 6.1793701171875, "step": 64435 }, { "epoch": 0.0484, "grad_norm": 5.433196544647217, "learning_rate": 1.796010101010101e-06, "loss": 6.302726745605469, "step": 64440 }, { "epoch": 0.04845, "grad_norm": 7.031136512756348, "learning_rate": 1.795757575757576e-06, "loss": 6.324901580810547, "step": 64445 }, { "epoch": 0.0485, "grad_norm": 10.489356994628906, "learning_rate": 1.7955050505050505e-06, "loss": 6.272932052612305, "step": 64450 }, { "epoch": 0.04855, "grad_norm": 4.883767604827881, "learning_rate": 1.7952525252525255e-06, "loss": 6.234714126586914, "step": 64455 }, { "epoch": 0.0486, "grad_norm": 4.630354404449463, "learning_rate": 1.7950000000000002e-06, "loss": 6.268260192871094, "step": 64460 }, { "epoch": 0.04865, "grad_norm": 8.103524208068848, "learning_rate": 1.794747474747475e-06, "loss": 6.27485237121582, "step": 64465 }, { "epoch": 0.0487, "grad_norm": 10.122710227966309, "learning_rate": 1.7944949494949497e-06, "loss": 6.24609603881836, "step": 64470 }, { "epoch": 0.04875, "grad_norm": 5.117050647735596, "learning_rate": 1.7942424242424245e-06, "loss": 6.234374237060547, "step": 64475 }, { "epoch": 0.0488, "grad_norm": 4.391269683837891, "learning_rate": 1.7939898989898991e-06, "loss": 6.2415283203125, "step": 64480 }, { "epoch": 0.04885, "grad_norm": 3.372833013534546, "learning_rate": 1.793737373737374e-06, "loss": 6.191271209716797, "step": 64485 }, { "epoch": 0.0489, "grad_norm": 4.341271877288818, "learning_rate": 1.7934848484848486e-06, "loss": 6.318022537231445, "step": 64490 }, { "epoch": 0.04895, "grad_norm": 3.68121600151062, "learning_rate": 1.7932323232323234e-06, "loss": 6.230024337768555, "step": 64495 }, { "epoch": 0.049, "grad_norm": 4.742092609405518, "learning_rate": 1.792979797979798e-06, "loss": 6.307786560058593, "step": 64500 }, { "epoch": 0.04905, "grad_norm": 7.395102024078369, "learning_rate": 1.792727272727273e-06, "loss": 6.228709411621094, "step": 64505 }, { "epoch": 0.0491, "grad_norm": 7.171814918518066, "learning_rate": 1.7924747474747475e-06, "loss": 6.242545700073242, "step": 64510 }, { "epoch": 0.04915, "grad_norm": 3.840628147125244, "learning_rate": 1.7922222222222224e-06, "loss": 6.245962524414063, "step": 64515 }, { "epoch": 0.0492, "grad_norm": 6.538686752319336, "learning_rate": 1.791969696969697e-06, "loss": 6.270323944091797, "step": 64520 }, { "epoch": 0.04925, "grad_norm": 3.508136034011841, "learning_rate": 1.7917171717171719e-06, "loss": 6.195792770385742, "step": 64525 }, { "epoch": 0.0493, "grad_norm": 3.7954728603363037, "learning_rate": 1.7914646464646465e-06, "loss": 6.217453384399414, "step": 64530 }, { "epoch": 0.04935, "grad_norm": 7.118030071258545, "learning_rate": 1.7912121212121213e-06, "loss": 6.234273910522461, "step": 64535 }, { "epoch": 0.0494, "grad_norm": 10.035820007324219, "learning_rate": 1.790959595959596e-06, "loss": 6.249651336669922, "step": 64540 }, { "epoch": 0.04945, "grad_norm": 6.003922939300537, "learning_rate": 1.7907070707070708e-06, "loss": 6.234181976318359, "step": 64545 }, { "epoch": 0.0495, "grad_norm": 5.361344337463379, "learning_rate": 1.7904545454545454e-06, "loss": 6.226123809814453, "step": 64550 }, { "epoch": 0.04955, "grad_norm": 8.818748474121094, "learning_rate": 1.7902020202020205e-06, "loss": 6.309692764282227, "step": 64555 }, { "epoch": 0.0496, "grad_norm": 8.264819145202637, "learning_rate": 1.789949494949495e-06, "loss": 6.294614028930664, "step": 64560 }, { "epoch": 0.04965, "grad_norm": 7.680489540100098, "learning_rate": 1.78969696969697e-06, "loss": 6.2266845703125, "step": 64565 }, { "epoch": 0.0497, "grad_norm": 4.940861701965332, "learning_rate": 1.7894444444444446e-06, "loss": 6.219209671020508, "step": 64570 }, { "epoch": 0.04975, "grad_norm": 11.48792552947998, "learning_rate": 1.7891919191919194e-06, "loss": 6.2937675476074215, "step": 64575 }, { "epoch": 0.0498, "grad_norm": 5.042182922363281, "learning_rate": 1.788939393939394e-06, "loss": 6.223870468139649, "step": 64580 }, { "epoch": 0.04985, "grad_norm": 5.191922664642334, "learning_rate": 1.788686868686869e-06, "loss": 6.2314308166503904, "step": 64585 }, { "epoch": 0.0499, "grad_norm": 7.447678565979004, "learning_rate": 1.7884343434343435e-06, "loss": 6.356531143188477, "step": 64590 }, { "epoch": 0.04995, "grad_norm": 10.372215270996094, "learning_rate": 1.7881818181818184e-06, "loss": 6.223954391479492, "step": 64595 }, { "epoch": 0.05, "grad_norm": 8.507596015930176, "learning_rate": 1.787929292929293e-06, "loss": 6.296051025390625, "step": 64600 }, { "epoch": 0.05005, "grad_norm": 46.37196731567383, "learning_rate": 1.7876767676767678e-06, "loss": 6.250182723999023, "step": 64605 }, { "epoch": 0.0501, "grad_norm": 8.669857025146484, "learning_rate": 1.7874242424242425e-06, "loss": 6.303602981567383, "step": 64610 }, { "epoch": 0.05015, "grad_norm": 7.901124000549316, "learning_rate": 1.7871717171717173e-06, "loss": 6.232662200927734, "step": 64615 }, { "epoch": 0.0502, "grad_norm": 6.462088584899902, "learning_rate": 1.786919191919192e-06, "loss": 6.28046875, "step": 64620 }, { "epoch": 0.05025, "grad_norm": 9.917712211608887, "learning_rate": 1.7866666666666668e-06, "loss": 6.199420547485351, "step": 64625 }, { "epoch": 0.0503, "grad_norm": 5.8288421630859375, "learning_rate": 1.7864141414141414e-06, "loss": 6.223270034790039, "step": 64630 }, { "epoch": 0.05035, "grad_norm": 7.006783962249756, "learning_rate": 1.7861616161616163e-06, "loss": 6.255004501342773, "step": 64635 }, { "epoch": 0.0504, "grad_norm": 3.440261125564575, "learning_rate": 1.7859090909090909e-06, "loss": 6.243783950805664, "step": 64640 }, { "epoch": 0.05045, "grad_norm": 5.6720356941223145, "learning_rate": 1.7856565656565657e-06, "loss": 6.235412216186523, "step": 64645 }, { "epoch": 0.0505, "grad_norm": 6.531790256500244, "learning_rate": 1.7854040404040404e-06, "loss": 6.19392204284668, "step": 64650 }, { "epoch": 0.05055, "grad_norm": 7.856037616729736, "learning_rate": 1.7851515151515152e-06, "loss": 6.303317260742188, "step": 64655 }, { "epoch": 0.0506, "grad_norm": 16.065967559814453, "learning_rate": 1.7848989898989898e-06, "loss": 6.254555511474609, "step": 64660 }, { "epoch": 0.05065, "grad_norm": 6.44582986831665, "learning_rate": 1.7846464646464649e-06, "loss": 6.324856185913086, "step": 64665 }, { "epoch": 0.0507, "grad_norm": 6.935845375061035, "learning_rate": 1.7843939393939397e-06, "loss": 6.249081420898437, "step": 64670 }, { "epoch": 0.05075, "grad_norm": 6.977933883666992, "learning_rate": 1.7841414141414144e-06, "loss": 6.298194885253906, "step": 64675 }, { "epoch": 0.0508, "grad_norm": 6.822606086730957, "learning_rate": 1.7838888888888892e-06, "loss": 6.457970428466797, "step": 64680 }, { "epoch": 0.05085, "grad_norm": 8.780726432800293, "learning_rate": 1.7836363636363638e-06, "loss": 6.256601715087891, "step": 64685 }, { "epoch": 0.0509, "grad_norm": 4.779819965362549, "learning_rate": 1.7833838383838387e-06, "loss": 6.246327972412109, "step": 64690 }, { "epoch": 0.05095, "grad_norm": 4.298603057861328, "learning_rate": 1.7831313131313133e-06, "loss": 6.262970352172852, "step": 64695 }, { "epoch": 0.051, "grad_norm": 12.421082496643066, "learning_rate": 1.7828787878787881e-06, "loss": 6.24635009765625, "step": 64700 }, { "epoch": 0.05105, "grad_norm": 4.790146350860596, "learning_rate": 1.7826262626262628e-06, "loss": 6.202412033081055, "step": 64705 }, { "epoch": 0.0511, "grad_norm": 7.055522918701172, "learning_rate": 1.7823737373737376e-06, "loss": 6.1795799255371096, "step": 64710 }, { "epoch": 0.05115, "grad_norm": 5.128937721252441, "learning_rate": 1.7821212121212122e-06, "loss": 6.235145568847656, "step": 64715 }, { "epoch": 0.0512, "grad_norm": 6.688382148742676, "learning_rate": 1.781868686868687e-06, "loss": 6.40643310546875, "step": 64720 }, { "epoch": 0.05125, "grad_norm": 27.739639282226562, "learning_rate": 1.7816161616161617e-06, "loss": 6.339942932128906, "step": 64725 }, { "epoch": 0.0513, "grad_norm": 8.215283393859863, "learning_rate": 1.7813636363636366e-06, "loss": 6.243391418457032, "step": 64730 }, { "epoch": 0.05135, "grad_norm": 3.289060354232788, "learning_rate": 1.7811111111111112e-06, "loss": 6.235636901855469, "step": 64735 }, { "epoch": 0.0514, "grad_norm": 4.831718444824219, "learning_rate": 1.780858585858586e-06, "loss": 6.283943557739258, "step": 64740 }, { "epoch": 0.05145, "grad_norm": 9.551619529724121, "learning_rate": 1.7806060606060607e-06, "loss": 6.217580413818359, "step": 64745 }, { "epoch": 0.0515, "grad_norm": 19.049104690551758, "learning_rate": 1.7803535353535357e-06, "loss": 6.340091323852539, "step": 64750 }, { "epoch": 0.05155, "grad_norm": 8.852360725402832, "learning_rate": 1.7801010101010101e-06, "loss": 6.2475738525390625, "step": 64755 }, { "epoch": 0.0516, "grad_norm": 5.774298191070557, "learning_rate": 1.7798484848484852e-06, "loss": 6.2245330810546875, "step": 64760 }, { "epoch": 0.05165, "grad_norm": 3.9098498821258545, "learning_rate": 1.7795959595959598e-06, "loss": 6.23168716430664, "step": 64765 }, { "epoch": 0.0517, "grad_norm": 3.2955873012542725, "learning_rate": 1.7793434343434347e-06, "loss": 6.243663787841797, "step": 64770 }, { "epoch": 0.05175, "grad_norm": 7.757964611053467, "learning_rate": 1.7790909090909093e-06, "loss": 6.195045089721679, "step": 64775 }, { "epoch": 0.0518, "grad_norm": 7.411890983581543, "learning_rate": 1.7788383838383841e-06, "loss": 6.229795837402344, "step": 64780 }, { "epoch": 0.05185, "grad_norm": 10.650171279907227, "learning_rate": 1.7785858585858588e-06, "loss": 6.2251228332519535, "step": 64785 }, { "epoch": 0.0519, "grad_norm": 4.916946887969971, "learning_rate": 1.7783333333333336e-06, "loss": 6.3251087188720705, "step": 64790 }, { "epoch": 0.05195, "grad_norm": 5.128292083740234, "learning_rate": 1.7780808080808082e-06, "loss": 6.239251708984375, "step": 64795 }, { "epoch": 0.052, "grad_norm": 5.555354595184326, "learning_rate": 1.777828282828283e-06, "loss": 6.222351837158203, "step": 64800 }, { "epoch": 0.05205, "grad_norm": 7.305614948272705, "learning_rate": 1.7775757575757577e-06, "loss": 6.300971221923828, "step": 64805 }, { "epoch": 0.0521, "grad_norm": 9.236101150512695, "learning_rate": 1.7773232323232325e-06, "loss": 6.219646453857422, "step": 64810 }, { "epoch": 0.05215, "grad_norm": 6.320587635040283, "learning_rate": 1.7770707070707072e-06, "loss": 6.224132919311524, "step": 64815 }, { "epoch": 0.0522, "grad_norm": 7.06455659866333, "learning_rate": 1.776818181818182e-06, "loss": 6.315763854980469, "step": 64820 }, { "epoch": 0.05225, "grad_norm": 5.493971824645996, "learning_rate": 1.7765656565656566e-06, "loss": 6.275391387939453, "step": 64825 }, { "epoch": 0.0523, "grad_norm": 6.960247039794922, "learning_rate": 1.7763131313131315e-06, "loss": 6.245594024658203, "step": 64830 }, { "epoch": 0.05235, "grad_norm": 4.273688316345215, "learning_rate": 1.7760606060606061e-06, "loss": 6.2541862487792965, "step": 64835 }, { "epoch": 0.0524, "grad_norm": 3.0775346755981445, "learning_rate": 1.775808080808081e-06, "loss": 6.251230239868164, "step": 64840 }, { "epoch": 0.05245, "grad_norm": 5.006857395172119, "learning_rate": 1.7755555555555556e-06, "loss": 6.285615158081055, "step": 64845 }, { "epoch": 0.0525, "grad_norm": 6.971516132354736, "learning_rate": 1.7753030303030304e-06, "loss": 6.201743316650391, "step": 64850 }, { "epoch": 0.05255, "grad_norm": 4.527109146118164, "learning_rate": 1.775050505050505e-06, "loss": 6.252779388427735, "step": 64855 }, { "epoch": 0.0526, "grad_norm": 7.515388488769531, "learning_rate": 1.7747979797979801e-06, "loss": 6.404304504394531, "step": 64860 }, { "epoch": 0.05265, "grad_norm": 6.0596466064453125, "learning_rate": 1.7745454545454545e-06, "loss": 6.242582702636719, "step": 64865 }, { "epoch": 0.0527, "grad_norm": 9.957716941833496, "learning_rate": 1.7742929292929296e-06, "loss": 6.265680313110352, "step": 64870 }, { "epoch": 0.05275, "grad_norm": 6.789843559265137, "learning_rate": 1.7740404040404042e-06, "loss": 6.240800476074218, "step": 64875 }, { "epoch": 0.0528, "grad_norm": 6.143019199371338, "learning_rate": 1.773787878787879e-06, "loss": 6.20985107421875, "step": 64880 }, { "epoch": 0.05285, "grad_norm": 6.8597869873046875, "learning_rate": 1.7735353535353537e-06, "loss": 6.245920944213867, "step": 64885 }, { "epoch": 0.0529, "grad_norm": 8.366990089416504, "learning_rate": 1.7732828282828285e-06, "loss": 6.240082168579102, "step": 64890 }, { "epoch": 0.05295, "grad_norm": 5.885533332824707, "learning_rate": 1.7730303030303032e-06, "loss": 6.244680786132813, "step": 64895 }, { "epoch": 0.053, "grad_norm": 4.373715877532959, "learning_rate": 1.772777777777778e-06, "loss": 6.178395843505859, "step": 64900 }, { "epoch": 0.05305, "grad_norm": 8.773508071899414, "learning_rate": 1.7725252525252526e-06, "loss": 6.263991928100586, "step": 64905 }, { "epoch": 0.0531, "grad_norm": 9.34281063079834, "learning_rate": 1.7722727272727275e-06, "loss": 6.187401199340821, "step": 64910 }, { "epoch": 0.05315, "grad_norm": 6.722169876098633, "learning_rate": 1.7720202020202021e-06, "loss": 6.251068115234375, "step": 64915 }, { "epoch": 0.0532, "grad_norm": 7.094295501708984, "learning_rate": 1.771767676767677e-06, "loss": 6.327795791625976, "step": 64920 }, { "epoch": 0.05325, "grad_norm": 4.591699123382568, "learning_rate": 1.7715151515151516e-06, "loss": 6.263811492919922, "step": 64925 }, { "epoch": 0.0533, "grad_norm": 12.243279457092285, "learning_rate": 1.7712626262626264e-06, "loss": 6.455361175537109, "step": 64930 }, { "epoch": 0.05335, "grad_norm": 4.386172771453857, "learning_rate": 1.771010101010101e-06, "loss": 6.241432571411133, "step": 64935 }, { "epoch": 0.0534, "grad_norm": 4.945713520050049, "learning_rate": 1.7707575757575759e-06, "loss": 6.2217052459716795, "step": 64940 }, { "epoch": 0.05345, "grad_norm": 7.2462286949157715, "learning_rate": 1.7705050505050505e-06, "loss": 6.26311149597168, "step": 64945 }, { "epoch": 0.0535, "grad_norm": 5.382934093475342, "learning_rate": 1.7702525252525254e-06, "loss": 6.242133331298828, "step": 64950 }, { "epoch": 0.05355, "grad_norm": 7.027083873748779, "learning_rate": 1.77e-06, "loss": 6.181192016601562, "step": 64955 }, { "epoch": 0.0536, "grad_norm": 5.542324542999268, "learning_rate": 1.7697474747474748e-06, "loss": 6.302117156982422, "step": 64960 }, { "epoch": 0.05365, "grad_norm": 4.798008441925049, "learning_rate": 1.7694949494949495e-06, "loss": 6.309562301635742, "step": 64965 }, { "epoch": 0.0537, "grad_norm": 5.425313472747803, "learning_rate": 1.7692424242424245e-06, "loss": 6.211037445068359, "step": 64970 }, { "epoch": 0.05375, "grad_norm": 7.120856761932373, "learning_rate": 1.768989898989899e-06, "loss": 6.229544830322266, "step": 64975 }, { "epoch": 0.0538, "grad_norm": 5.4461774826049805, "learning_rate": 1.768737373737374e-06, "loss": 6.240694427490235, "step": 64980 }, { "epoch": 0.05385, "grad_norm": 5.010530948638916, "learning_rate": 1.7684848484848486e-06, "loss": 6.267343902587891, "step": 64985 }, { "epoch": 0.0539, "grad_norm": 4.043389797210693, "learning_rate": 1.7682323232323235e-06, "loss": 6.243431091308594, "step": 64990 }, { "epoch": 0.05395, "grad_norm": 3.2319393157958984, "learning_rate": 1.767979797979798e-06, "loss": 6.226934814453125, "step": 64995 }, { "epoch": 0.054, "grad_norm": 7.241064548492432, "learning_rate": 1.767727272727273e-06, "loss": 6.258535385131836, "step": 65000 }, { "epoch": 0.05405, "grad_norm": 26.696273803710938, "learning_rate": 1.7674747474747476e-06, "loss": 6.2677146911621096, "step": 65005 }, { "epoch": 0.0541, "grad_norm": 3.7986257076263428, "learning_rate": 1.7672222222222224e-06, "loss": 6.188557434082031, "step": 65010 }, { "epoch": 0.05415, "grad_norm": 16.835372924804688, "learning_rate": 1.766969696969697e-06, "loss": 6.2698108673095705, "step": 65015 }, { "epoch": 0.0542, "grad_norm": 10.266523361206055, "learning_rate": 1.7667171717171719e-06, "loss": 6.220892333984375, "step": 65020 }, { "epoch": 0.05425, "grad_norm": 13.395267486572266, "learning_rate": 1.7664646464646465e-06, "loss": 6.302743530273437, "step": 65025 }, { "epoch": 0.0543, "grad_norm": 12.57806396484375, "learning_rate": 1.7662121212121214e-06, "loss": 6.195790863037109, "step": 65030 }, { "epoch": 0.05435, "grad_norm": 5.192023277282715, "learning_rate": 1.765959595959596e-06, "loss": 6.270148849487304, "step": 65035 }, { "epoch": 0.0544, "grad_norm": 11.057901382446289, "learning_rate": 1.7657070707070708e-06, "loss": 6.31196174621582, "step": 65040 }, { "epoch": 0.05445, "grad_norm": 7.735151290893555, "learning_rate": 1.7654545454545455e-06, "loss": 6.296978759765625, "step": 65045 }, { "epoch": 0.0545, "grad_norm": 6.178502559661865, "learning_rate": 1.7652020202020203e-06, "loss": 6.253580093383789, "step": 65050 }, { "epoch": 0.05455, "grad_norm": 5.599023342132568, "learning_rate": 1.764949494949495e-06, "loss": 6.229573059082031, "step": 65055 }, { "epoch": 0.0546, "grad_norm": 5.815474987030029, "learning_rate": 1.7646969696969698e-06, "loss": 6.231867980957031, "step": 65060 }, { "epoch": 0.05465, "grad_norm": 6.612547397613525, "learning_rate": 1.7644444444444444e-06, "loss": 6.216616821289063, "step": 65065 }, { "epoch": 0.0547, "grad_norm": 4.367131233215332, "learning_rate": 1.7641919191919192e-06, "loss": 6.2545013427734375, "step": 65070 }, { "epoch": 0.05475, "grad_norm": 17.22922706604004, "learning_rate": 1.7639393939393939e-06, "loss": 6.279743576049805, "step": 65075 }, { "epoch": 0.0548, "grad_norm": 9.515551567077637, "learning_rate": 1.763686868686869e-06, "loss": 6.280702209472656, "step": 65080 }, { "epoch": 0.05485, "grad_norm": 7.440774440765381, "learning_rate": 1.7634343434343433e-06, "loss": 6.259157943725586, "step": 65085 }, { "epoch": 0.0549, "grad_norm": 4.269670009613037, "learning_rate": 1.7631818181818184e-06, "loss": 6.224879455566406, "step": 65090 }, { "epoch": 0.05495, "grad_norm": 8.478686332702637, "learning_rate": 1.7629292929292932e-06, "loss": 6.230297470092774, "step": 65095 }, { "epoch": 0.055, "grad_norm": 8.339715003967285, "learning_rate": 1.7626767676767679e-06, "loss": 6.378755187988281, "step": 65100 }, { "epoch": 0.05505, "grad_norm": 6.377094745635986, "learning_rate": 1.7624242424242427e-06, "loss": 6.2145942687988285, "step": 65105 }, { "epoch": 0.0551, "grad_norm": 15.606830596923828, "learning_rate": 1.7621717171717173e-06, "loss": 6.250661468505859, "step": 65110 }, { "epoch": 0.05515, "grad_norm": 8.128774642944336, "learning_rate": 1.7619191919191922e-06, "loss": 6.256118392944336, "step": 65115 }, { "epoch": 0.0552, "grad_norm": 8.280364036560059, "learning_rate": 1.7616666666666668e-06, "loss": 6.238312149047852, "step": 65120 }, { "epoch": 0.05525, "grad_norm": 4.0668745040893555, "learning_rate": 1.7614141414141417e-06, "loss": 6.263299942016602, "step": 65125 }, { "epoch": 0.0553, "grad_norm": 5.653567314147949, "learning_rate": 1.7611616161616163e-06, "loss": 6.199816131591797, "step": 65130 }, { "epoch": 0.05535, "grad_norm": 5.870588779449463, "learning_rate": 1.7609090909090911e-06, "loss": 6.246640014648437, "step": 65135 }, { "epoch": 0.0554, "grad_norm": 4.311883449554443, "learning_rate": 1.7606565656565658e-06, "loss": 6.349324417114258, "step": 65140 }, { "epoch": 0.05545, "grad_norm": 9.075450897216797, "learning_rate": 1.7604040404040406e-06, "loss": 6.2041065216064455, "step": 65145 }, { "epoch": 0.0555, "grad_norm": 35.29816436767578, "learning_rate": 1.7601515151515152e-06, "loss": 6.394267272949219, "step": 65150 }, { "epoch": 0.05555, "grad_norm": 7.161290168762207, "learning_rate": 1.75989898989899e-06, "loss": 6.285271072387696, "step": 65155 }, { "epoch": 0.0556, "grad_norm": 9.267369270324707, "learning_rate": 1.7596464646464647e-06, "loss": 6.280790328979492, "step": 65160 }, { "epoch": 0.05565, "grad_norm": 4.378617763519287, "learning_rate": 1.7593939393939398e-06, "loss": 6.275582122802734, "step": 65165 }, { "epoch": 0.0557, "grad_norm": 6.996663570404053, "learning_rate": 1.7591414141414142e-06, "loss": 6.250347900390625, "step": 65170 }, { "epoch": 0.05575, "grad_norm": 7.621191024780273, "learning_rate": 1.7588888888888892e-06, "loss": 6.23578109741211, "step": 65175 }, { "epoch": 0.0558, "grad_norm": 6.664196014404297, "learning_rate": 1.7586363636363639e-06, "loss": 6.2061004638671875, "step": 65180 }, { "epoch": 0.05585, "grad_norm": 7.320015907287598, "learning_rate": 1.7583838383838387e-06, "loss": 6.263908767700196, "step": 65185 }, { "epoch": 0.0559, "grad_norm": 7.301239490509033, "learning_rate": 1.7581313131313133e-06, "loss": 6.212914276123047, "step": 65190 }, { "epoch": 0.05595, "grad_norm": 7.317206382751465, "learning_rate": 1.7578787878787882e-06, "loss": 6.229571533203125, "step": 65195 }, { "epoch": 0.056, "grad_norm": 7.691757678985596, "learning_rate": 1.7576262626262628e-06, "loss": 6.2297813415527346, "step": 65200 }, { "epoch": 0.05605, "grad_norm": 10.988706588745117, "learning_rate": 1.7573737373737376e-06, "loss": 6.23254508972168, "step": 65205 }, { "epoch": 0.0561, "grad_norm": 3.1193418502807617, "learning_rate": 1.7571212121212123e-06, "loss": 6.251005554199219, "step": 65210 }, { "epoch": 0.05615, "grad_norm": 7.678251266479492, "learning_rate": 1.7568686868686871e-06, "loss": 6.226293563842773, "step": 65215 }, { "epoch": 0.0562, "grad_norm": 5.0608391761779785, "learning_rate": 1.7566161616161617e-06, "loss": 6.256377410888672, "step": 65220 }, { "epoch": 0.05625, "grad_norm": 5.273802757263184, "learning_rate": 1.7563636363636366e-06, "loss": 6.245328521728515, "step": 65225 }, { "epoch": 0.0563, "grad_norm": 6.416277885437012, "learning_rate": 1.7561111111111112e-06, "loss": 6.160545730590821, "step": 65230 }, { "epoch": 0.05635, "grad_norm": 8.066812515258789, "learning_rate": 1.755858585858586e-06, "loss": 6.289083862304688, "step": 65235 }, { "epoch": 0.0564, "grad_norm": 3.3713459968566895, "learning_rate": 1.7556060606060607e-06, "loss": 6.218203353881836, "step": 65240 }, { "epoch": 0.05645, "grad_norm": 4.418837547302246, "learning_rate": 1.7553535353535355e-06, "loss": 6.217873382568359, "step": 65245 }, { "epoch": 0.0565, "grad_norm": 4.481410980224609, "learning_rate": 1.7551010101010102e-06, "loss": 6.259193420410156, "step": 65250 }, { "epoch": 0.05655, "grad_norm": 26.36863899230957, "learning_rate": 1.754848484848485e-06, "loss": 6.352720260620117, "step": 65255 }, { "epoch": 0.0566, "grad_norm": 5.605569839477539, "learning_rate": 1.7545959595959596e-06, "loss": 6.3834785461425785, "step": 65260 }, { "epoch": 0.05665, "grad_norm": 7.638155937194824, "learning_rate": 1.7543434343434345e-06, "loss": 6.2399452209472654, "step": 65265 }, { "epoch": 0.0567, "grad_norm": 6.658874988555908, "learning_rate": 1.754090909090909e-06, "loss": 6.171012115478516, "step": 65270 }, { "epoch": 0.05675, "grad_norm": 3.781588554382324, "learning_rate": 1.7538383838383842e-06, "loss": 6.243976593017578, "step": 65275 }, { "epoch": 0.0568, "grad_norm": 4.342329025268555, "learning_rate": 1.7535858585858586e-06, "loss": 6.236565399169922, "step": 65280 }, { "epoch": 0.05685, "grad_norm": 9.173303604125977, "learning_rate": 1.7533333333333336e-06, "loss": 6.224419403076172, "step": 65285 }, { "epoch": 0.0569, "grad_norm": 4.541243553161621, "learning_rate": 1.7530808080808083e-06, "loss": 6.2473808288574215, "step": 65290 }, { "epoch": 0.05695, "grad_norm": 5.377750873565674, "learning_rate": 1.752828282828283e-06, "loss": 6.243883514404297, "step": 65295 }, { "epoch": 0.057, "grad_norm": 4.883563995361328, "learning_rate": 1.7525757575757577e-06, "loss": 6.2430267333984375, "step": 65300 }, { "epoch": 0.05705, "grad_norm": 6.090480804443359, "learning_rate": 1.7523232323232326e-06, "loss": 6.260042953491211, "step": 65305 }, { "epoch": 0.0571, "grad_norm": 7.658328056335449, "learning_rate": 1.7520707070707072e-06, "loss": 6.208308410644531, "step": 65310 }, { "epoch": 0.05715, "grad_norm": 5.936210632324219, "learning_rate": 1.751818181818182e-06, "loss": 6.207574844360352, "step": 65315 }, { "epoch": 0.0572, "grad_norm": 8.979191780090332, "learning_rate": 1.7515656565656567e-06, "loss": 6.3198081970214846, "step": 65320 }, { "epoch": 0.05725, "grad_norm": 6.674270153045654, "learning_rate": 1.7513131313131315e-06, "loss": 6.221415328979492, "step": 65325 }, { "epoch": 0.0573, "grad_norm": 6.2063140869140625, "learning_rate": 1.7510606060606061e-06, "loss": 6.206269836425781, "step": 65330 }, { "epoch": 0.05735, "grad_norm": 7.175876617431641, "learning_rate": 1.750808080808081e-06, "loss": 6.304767990112305, "step": 65335 }, { "epoch": 0.0574, "grad_norm": 12.570212364196777, "learning_rate": 1.7505555555555556e-06, "loss": 6.253631973266602, "step": 65340 }, { "epoch": 0.05745, "grad_norm": 4.5413498878479, "learning_rate": 1.7503030303030305e-06, "loss": 6.283907318115235, "step": 65345 }, { "epoch": 0.0575, "grad_norm": 7.086191654205322, "learning_rate": 1.750050505050505e-06, "loss": 6.265770721435547, "step": 65350 }, { "epoch": 0.05755, "grad_norm": 8.42487621307373, "learning_rate": 1.74979797979798e-06, "loss": 6.163114547729492, "step": 65355 }, { "epoch": 0.0576, "grad_norm": 6.911253929138184, "learning_rate": 1.7495454545454546e-06, "loss": 6.250620651245117, "step": 65360 }, { "epoch": 0.05765, "grad_norm": 4.658714294433594, "learning_rate": 1.7492929292929294e-06, "loss": 6.232027816772461, "step": 65365 }, { "epoch": 0.0577, "grad_norm": 14.145217895507812, "learning_rate": 1.749040404040404e-06, "loss": 6.276029205322265, "step": 65370 }, { "epoch": 0.05775, "grad_norm": 7.01018762588501, "learning_rate": 1.7487878787878789e-06, "loss": 6.297161483764649, "step": 65375 }, { "epoch": 0.0578, "grad_norm": 10.679825782775879, "learning_rate": 1.7485353535353535e-06, "loss": 6.214054870605469, "step": 65380 }, { "epoch": 0.05785, "grad_norm": 7.313573837280273, "learning_rate": 1.7482828282828286e-06, "loss": 6.513581085205078, "step": 65385 }, { "epoch": 0.0579, "grad_norm": 6.160538196563721, "learning_rate": 1.748030303030303e-06, "loss": 6.352527236938476, "step": 65390 }, { "epoch": 0.05795, "grad_norm": 7.796149253845215, "learning_rate": 1.747777777777778e-06, "loss": 6.258735656738281, "step": 65395 }, { "epoch": 0.058, "grad_norm": 6.484922885894775, "learning_rate": 1.7475252525252527e-06, "loss": 6.298869705200195, "step": 65400 }, { "epoch": 0.05805, "grad_norm": 7.6498589515686035, "learning_rate": 1.7472727272727275e-06, "loss": 6.240570449829102, "step": 65405 }, { "epoch": 0.0581, "grad_norm": 6.300982475280762, "learning_rate": 1.7470202020202021e-06, "loss": 6.2678680419921875, "step": 65410 }, { "epoch": 0.05815, "grad_norm": 5.046743392944336, "learning_rate": 1.746767676767677e-06, "loss": 6.264728546142578, "step": 65415 }, { "epoch": 0.0582, "grad_norm": 4.226310729980469, "learning_rate": 1.7465151515151516e-06, "loss": 6.252939605712891, "step": 65420 }, { "epoch": 0.05825, "grad_norm": 6.321202278137207, "learning_rate": 1.7462626262626264e-06, "loss": 6.27430419921875, "step": 65425 }, { "epoch": 0.0583, "grad_norm": 7.762344837188721, "learning_rate": 1.746010101010101e-06, "loss": 6.237644958496094, "step": 65430 }, { "epoch": 0.05835, "grad_norm": 4.894748687744141, "learning_rate": 1.745757575757576e-06, "loss": 6.220959854125977, "step": 65435 }, { "epoch": 0.0584, "grad_norm": 10.121949195861816, "learning_rate": 1.7455050505050505e-06, "loss": 6.313338088989258, "step": 65440 }, { "epoch": 0.05845, "grad_norm": 4.319450378417969, "learning_rate": 1.7452525252525254e-06, "loss": 6.285614776611328, "step": 65445 }, { "epoch": 0.0585, "grad_norm": 7.182220458984375, "learning_rate": 1.745e-06, "loss": 6.314968872070312, "step": 65450 }, { "epoch": 0.05855, "grad_norm": 6.100467681884766, "learning_rate": 1.7447474747474749e-06, "loss": 6.242650985717773, "step": 65455 }, { "epoch": 0.0586, "grad_norm": 6.266513347625732, "learning_rate": 1.7444949494949495e-06, "loss": 6.228600311279297, "step": 65460 }, { "epoch": 0.05865, "grad_norm": 6.114872932434082, "learning_rate": 1.7442424242424243e-06, "loss": 6.176288604736328, "step": 65465 }, { "epoch": 0.0587, "grad_norm": 7.8191328048706055, "learning_rate": 1.743989898989899e-06, "loss": 6.326560211181641, "step": 65470 }, { "epoch": 0.05875, "grad_norm": 7.090770244598389, "learning_rate": 1.7437373737373738e-06, "loss": 6.207294464111328, "step": 65475 }, { "epoch": 0.0588, "grad_norm": 6.865314483642578, "learning_rate": 1.7434848484848484e-06, "loss": 6.47171630859375, "step": 65480 }, { "epoch": 0.05885, "grad_norm": 5.706386089324951, "learning_rate": 1.7432323232323235e-06, "loss": 6.232311248779297, "step": 65485 }, { "epoch": 0.0589, "grad_norm": 12.546734809875488, "learning_rate": 1.742979797979798e-06, "loss": 6.256247329711914, "step": 65490 }, { "epoch": 0.05895, "grad_norm": 4.040778160095215, "learning_rate": 1.742727272727273e-06, "loss": 6.242745208740234, "step": 65495 }, { "epoch": 0.059, "grad_norm": 5.546571731567383, "learning_rate": 1.7424747474747474e-06, "loss": 6.260718154907226, "step": 65500 }, { "epoch": 0.05905, "grad_norm": 4.6641764640808105, "learning_rate": 1.7422222222222224e-06, "loss": 6.275225830078125, "step": 65505 }, { "epoch": 0.0591, "grad_norm": 6.5411906242370605, "learning_rate": 1.741969696969697e-06, "loss": 6.4850822448730465, "step": 65510 }, { "epoch": 0.05915, "grad_norm": 6.374220371246338, "learning_rate": 1.741717171717172e-06, "loss": 6.337374877929688, "step": 65515 }, { "epoch": 0.0592, "grad_norm": 11.634953498840332, "learning_rate": 1.7414646464646465e-06, "loss": 6.379816055297852, "step": 65520 }, { "epoch": 0.05925, "grad_norm": 3.8672046661376953, "learning_rate": 1.7412121212121214e-06, "loss": 6.260409545898438, "step": 65525 }, { "epoch": 0.0593, "grad_norm": 6.3221001625061035, "learning_rate": 1.7409595959595962e-06, "loss": 6.301274108886719, "step": 65530 }, { "epoch": 0.05935, "grad_norm": 7.564828872680664, "learning_rate": 1.7407070707070709e-06, "loss": 6.263564682006836, "step": 65535 }, { "epoch": 0.0594, "grad_norm": 5.422926902770996, "learning_rate": 1.7404545454545457e-06, "loss": 6.2330474853515625, "step": 65540 }, { "epoch": 0.05945, "grad_norm": 6.944742202758789, "learning_rate": 1.7402020202020203e-06, "loss": 6.259048843383789, "step": 65545 }, { "epoch": 0.0595, "grad_norm": 3.624966621398926, "learning_rate": 1.7399494949494952e-06, "loss": 6.29151611328125, "step": 65550 }, { "epoch": 0.05955, "grad_norm": 4.643570423126221, "learning_rate": 1.7396969696969698e-06, "loss": 6.208872604370117, "step": 65555 }, { "epoch": 0.0596, "grad_norm": 4.406294345855713, "learning_rate": 1.7394444444444446e-06, "loss": 6.257979202270508, "step": 65560 }, { "epoch": 0.05965, "grad_norm": 4.049442768096924, "learning_rate": 1.7391919191919193e-06, "loss": 6.239189147949219, "step": 65565 }, { "epoch": 0.0597, "grad_norm": 6.80618953704834, "learning_rate": 1.7389393939393941e-06, "loss": 6.26038703918457, "step": 65570 }, { "epoch": 0.05975, "grad_norm": 5.416005611419678, "learning_rate": 1.7386868686868687e-06, "loss": 6.187258529663086, "step": 65575 }, { "epoch": 0.0598, "grad_norm": 5.413612365722656, "learning_rate": 1.7384343434343438e-06, "loss": 6.286339569091797, "step": 65580 }, { "epoch": 0.05985, "grad_norm": 5.797331809997559, "learning_rate": 1.7381818181818182e-06, "loss": 6.322195434570313, "step": 65585 }, { "epoch": 0.0599, "grad_norm": 5.8523712158203125, "learning_rate": 1.7379292929292933e-06, "loss": 6.234060668945313, "step": 65590 }, { "epoch": 0.05995, "grad_norm": 4.415982723236084, "learning_rate": 1.737676767676768e-06, "loss": 6.238933563232422, "step": 65595 }, { "epoch": 0.06, "grad_norm": 9.12866497039795, "learning_rate": 1.7374242424242427e-06, "loss": 6.269498443603515, "step": 65600 }, { "epoch": 0.06005, "grad_norm": 3.699002742767334, "learning_rate": 1.7371717171717174e-06, "loss": 6.229903793334961, "step": 65605 }, { "epoch": 0.0601, "grad_norm": 5.03972053527832, "learning_rate": 1.7369191919191922e-06, "loss": 6.282897186279297, "step": 65610 }, { "epoch": 0.06015, "grad_norm": 36.24827575683594, "learning_rate": 1.7366666666666668e-06, "loss": 6.4779106140136715, "step": 65615 }, { "epoch": 0.0602, "grad_norm": 3.71773624420166, "learning_rate": 1.7364141414141417e-06, "loss": 6.387662124633789, "step": 65620 }, { "epoch": 0.06025, "grad_norm": 5.850876808166504, "learning_rate": 1.7361616161616163e-06, "loss": 6.307433319091797, "step": 65625 }, { "epoch": 0.0603, "grad_norm": 7.093226909637451, "learning_rate": 1.7359090909090912e-06, "loss": 6.208033752441406, "step": 65630 }, { "epoch": 0.06035, "grad_norm": 4.391338348388672, "learning_rate": 1.7356565656565658e-06, "loss": 6.263829040527344, "step": 65635 }, { "epoch": 0.0604, "grad_norm": 4.631786346435547, "learning_rate": 1.7354040404040406e-06, "loss": 6.26683464050293, "step": 65640 }, { "epoch": 0.06045, "grad_norm": 5.970221996307373, "learning_rate": 1.7351515151515153e-06, "loss": 6.226055145263672, "step": 65645 }, { "epoch": 0.0605, "grad_norm": 13.044900894165039, "learning_rate": 1.73489898989899e-06, "loss": 6.3273262023925785, "step": 65650 }, { "epoch": 0.06055, "grad_norm": 6.4588847160339355, "learning_rate": 1.7346464646464647e-06, "loss": 6.234544372558593, "step": 65655 }, { "epoch": 0.0606, "grad_norm": 9.346610069274902, "learning_rate": 1.7343939393939396e-06, "loss": 6.269529342651367, "step": 65660 }, { "epoch": 0.06065, "grad_norm": 7.976192474365234, "learning_rate": 1.7341414141414142e-06, "loss": 6.3059333801269535, "step": 65665 }, { "epoch": 0.0607, "grad_norm": 4.353762626647949, "learning_rate": 1.733888888888889e-06, "loss": 6.2515007019042965, "step": 65670 }, { "epoch": 0.06075, "grad_norm": 4.377441883087158, "learning_rate": 1.7336363636363637e-06, "loss": 6.239535903930664, "step": 65675 }, { "epoch": 0.0608, "grad_norm": 4.790865898132324, "learning_rate": 1.7333838383838385e-06, "loss": 6.2634765625, "step": 65680 }, { "epoch": 0.06085, "grad_norm": 7.251501083374023, "learning_rate": 1.7331313131313131e-06, "loss": 6.2942657470703125, "step": 65685 }, { "epoch": 0.0609, "grad_norm": 11.012249946594238, "learning_rate": 1.7328787878787882e-06, "loss": 6.391874313354492, "step": 65690 }, { "epoch": 0.06095, "grad_norm": 8.633187294006348, "learning_rate": 1.7326262626262626e-06, "loss": 6.234037399291992, "step": 65695 }, { "epoch": 0.061, "grad_norm": 7.09401273727417, "learning_rate": 1.7323737373737377e-06, "loss": 6.276063919067383, "step": 65700 }, { "epoch": 0.06105, "grad_norm": 7.2318243980407715, "learning_rate": 1.7321212121212123e-06, "loss": 6.269033813476563, "step": 65705 }, { "epoch": 0.0611, "grad_norm": 18.171354293823242, "learning_rate": 1.7318686868686871e-06, "loss": 6.276282501220703, "step": 65710 }, { "epoch": 0.06115, "grad_norm": 14.97885799407959, "learning_rate": 1.7316161616161618e-06, "loss": 6.336403656005859, "step": 65715 }, { "epoch": 0.0612, "grad_norm": 8.059917449951172, "learning_rate": 1.7313636363636366e-06, "loss": 6.2889350891113285, "step": 65720 }, { "epoch": 0.06125, "grad_norm": 7.566070079803467, "learning_rate": 1.7311111111111112e-06, "loss": 6.192340469360351, "step": 65725 }, { "epoch": 0.0613, "grad_norm": 7.504062175750732, "learning_rate": 1.730858585858586e-06, "loss": 6.248338317871093, "step": 65730 }, { "epoch": 0.06135, "grad_norm": 5.292293548583984, "learning_rate": 1.7306060606060607e-06, "loss": 6.248657989501953, "step": 65735 }, { "epoch": 0.0614, "grad_norm": 6.271351337432861, "learning_rate": 1.7303535353535356e-06, "loss": 6.277063369750977, "step": 65740 }, { "epoch": 0.06145, "grad_norm": 4.8198347091674805, "learning_rate": 1.7301010101010102e-06, "loss": 6.46468505859375, "step": 65745 }, { "epoch": 0.0615, "grad_norm": 6.489058971405029, "learning_rate": 1.729848484848485e-06, "loss": 6.2189079284667965, "step": 65750 }, { "epoch": 0.06155, "grad_norm": 3.9785895347595215, "learning_rate": 1.7295959595959597e-06, "loss": 6.232281494140625, "step": 65755 }, { "epoch": 0.0616, "grad_norm": 5.785003662109375, "learning_rate": 1.7293434343434345e-06, "loss": 6.297397613525391, "step": 65760 }, { "epoch": 0.06165, "grad_norm": 6.839208602905273, "learning_rate": 1.7290909090909091e-06, "loss": 6.228233718872071, "step": 65765 }, { "epoch": 0.0617, "grad_norm": 7.1684722900390625, "learning_rate": 1.728838383838384e-06, "loss": 6.230297470092774, "step": 65770 }, { "epoch": 0.06175, "grad_norm": 4.118459701538086, "learning_rate": 1.7285858585858586e-06, "loss": 6.259757232666016, "step": 65775 }, { "epoch": 0.0618, "grad_norm": 4.594665050506592, "learning_rate": 1.7283333333333334e-06, "loss": 6.220254516601562, "step": 65780 }, { "epoch": 0.06185, "grad_norm": 5.249074459075928, "learning_rate": 1.728080808080808e-06, "loss": 6.252297592163086, "step": 65785 }, { "epoch": 0.0619, "grad_norm": 5.774895668029785, "learning_rate": 1.727828282828283e-06, "loss": 6.252193450927734, "step": 65790 }, { "epoch": 0.06195, "grad_norm": 5.827839374542236, "learning_rate": 1.7275757575757575e-06, "loss": 6.21380615234375, "step": 65795 }, { "epoch": 0.062, "grad_norm": 7.02239990234375, "learning_rate": 1.7273232323232326e-06, "loss": 6.268426513671875, "step": 65800 }, { "epoch": 0.06205, "grad_norm": 5.1170973777771, "learning_rate": 1.727070707070707e-06, "loss": 6.265458679199218, "step": 65805 }, { "epoch": 0.0621, "grad_norm": 7.604434490203857, "learning_rate": 1.726818181818182e-06, "loss": 6.2361289978027346, "step": 65810 }, { "epoch": 0.06215, "grad_norm": 18.040708541870117, "learning_rate": 1.7265656565656567e-06, "loss": 6.477741241455078, "step": 65815 }, { "epoch": 0.0622, "grad_norm": 7.367584228515625, "learning_rate": 1.7263131313131315e-06, "loss": 6.198371124267578, "step": 65820 }, { "epoch": 0.06225, "grad_norm": 6.7483696937561035, "learning_rate": 1.7260606060606062e-06, "loss": 6.214250946044922, "step": 65825 }, { "epoch": 0.0623, "grad_norm": 6.799319267272949, "learning_rate": 1.725808080808081e-06, "loss": 6.236785507202148, "step": 65830 }, { "epoch": 0.06235, "grad_norm": 7.582779884338379, "learning_rate": 1.7255555555555556e-06, "loss": 6.183013534545898, "step": 65835 }, { "epoch": 0.0624, "grad_norm": 6.442776203155518, "learning_rate": 1.7253030303030305e-06, "loss": 6.247000122070313, "step": 65840 }, { "epoch": 0.06245, "grad_norm": 5.221776962280273, "learning_rate": 1.7250505050505051e-06, "loss": 6.283657836914062, "step": 65845 }, { "epoch": 0.0625, "grad_norm": 5.879793167114258, "learning_rate": 1.72479797979798e-06, "loss": 6.232629013061524, "step": 65850 }, { "epoch": 0.06255, "grad_norm": 7.086514472961426, "learning_rate": 1.7245454545454546e-06, "loss": 6.2331787109375, "step": 65855 }, { "epoch": 0.0626, "grad_norm": 6.874833106994629, "learning_rate": 1.7242929292929294e-06, "loss": 6.246557998657226, "step": 65860 }, { "epoch": 0.06265, "grad_norm": 4.24851131439209, "learning_rate": 1.724040404040404e-06, "loss": 6.261996459960938, "step": 65865 }, { "epoch": 0.0627, "grad_norm": 4.650578498840332, "learning_rate": 1.723787878787879e-06, "loss": 6.2733722686767575, "step": 65870 }, { "epoch": 0.06275, "grad_norm": 5.402723789215088, "learning_rate": 1.7235353535353535e-06, "loss": 6.220689392089843, "step": 65875 }, { "epoch": 0.0628, "grad_norm": 5.231682300567627, "learning_rate": 1.7232828282828284e-06, "loss": 6.240775299072266, "step": 65880 }, { "epoch": 0.06285, "grad_norm": 8.935704231262207, "learning_rate": 1.723030303030303e-06, "loss": 6.225674057006836, "step": 65885 }, { "epoch": 0.0629, "grad_norm": 6.771743297576904, "learning_rate": 1.7227777777777778e-06, "loss": 6.238433074951172, "step": 65890 }, { "epoch": 0.06295, "grad_norm": 5.038820266723633, "learning_rate": 1.7225252525252525e-06, "loss": 6.178882598876953, "step": 65895 }, { "epoch": 0.063, "grad_norm": 6.466598987579346, "learning_rate": 1.7222727272727275e-06, "loss": 6.270684814453125, "step": 65900 }, { "epoch": 0.06305, "grad_norm": 7.088811874389648, "learning_rate": 1.722020202020202e-06, "loss": 6.267372894287109, "step": 65905 }, { "epoch": 0.0631, "grad_norm": 9.013504028320312, "learning_rate": 1.721767676767677e-06, "loss": 6.227058792114258, "step": 65910 }, { "epoch": 0.06315, "grad_norm": 8.17469310760498, "learning_rate": 1.7215151515151516e-06, "loss": 6.275911331176758, "step": 65915 }, { "epoch": 0.0632, "grad_norm": 5.263009071350098, "learning_rate": 1.7212626262626265e-06, "loss": 6.216289520263672, "step": 65920 }, { "epoch": 0.06325, "grad_norm": 13.907835006713867, "learning_rate": 1.721010101010101e-06, "loss": 6.2689697265625, "step": 65925 }, { "epoch": 0.0633, "grad_norm": 4.986100196838379, "learning_rate": 1.720757575757576e-06, "loss": 6.2433006286621096, "step": 65930 }, { "epoch": 0.06335, "grad_norm": 9.7167329788208, "learning_rate": 1.7205050505050506e-06, "loss": 6.226920318603516, "step": 65935 }, { "epoch": 0.0634, "grad_norm": 4.485090732574463, "learning_rate": 1.7202525252525254e-06, "loss": 6.268840789794922, "step": 65940 }, { "epoch": 0.06345, "grad_norm": 3.466092348098755, "learning_rate": 1.72e-06, "loss": 6.192888259887695, "step": 65945 }, { "epoch": 0.0635, "grad_norm": 6.979863166809082, "learning_rate": 1.7197474747474749e-06, "loss": 6.253474426269531, "step": 65950 }, { "epoch": 0.06355, "grad_norm": 8.60177230834961, "learning_rate": 1.7194949494949497e-06, "loss": 6.222371673583984, "step": 65955 }, { "epoch": 0.0636, "grad_norm": 11.588777542114258, "learning_rate": 1.7192424242424244e-06, "loss": 6.342339324951172, "step": 65960 }, { "epoch": 0.06365, "grad_norm": 6.186742305755615, "learning_rate": 1.7189898989898992e-06, "loss": 6.283105087280274, "step": 65965 }, { "epoch": 0.0637, "grad_norm": 4.340980052947998, "learning_rate": 1.7187373737373738e-06, "loss": 6.229424285888672, "step": 65970 }, { "epoch": 0.06375, "grad_norm": 7.2637939453125, "learning_rate": 1.7184848484848487e-06, "loss": 6.251953125, "step": 65975 }, { "epoch": 0.0638, "grad_norm": 8.965249061584473, "learning_rate": 1.7182323232323233e-06, "loss": 6.260762786865234, "step": 65980 }, { "epoch": 0.06385, "grad_norm": 6.000462055206299, "learning_rate": 1.7179797979797981e-06, "loss": 6.260291290283203, "step": 65985 }, { "epoch": 0.0639, "grad_norm": 5.782944202423096, "learning_rate": 1.7177272727272728e-06, "loss": 6.2898406982421875, "step": 65990 }, { "epoch": 0.06395, "grad_norm": 20.487947463989258, "learning_rate": 1.7174747474747478e-06, "loss": 6.473568725585937, "step": 65995 }, { "epoch": 0.064, "grad_norm": 7.696946144104004, "learning_rate": 1.7172222222222223e-06, "loss": 6.259840393066407, "step": 66000 }, { "epoch": 0.06405, "grad_norm": 7.381952285766602, "learning_rate": 1.7169696969696973e-06, "loss": 6.282158660888672, "step": 66005 }, { "epoch": 0.0641, "grad_norm": 9.150415420532227, "learning_rate": 1.716717171717172e-06, "loss": 6.246146011352539, "step": 66010 }, { "epoch": 0.06415, "grad_norm": 5.564603805541992, "learning_rate": 1.7164646464646468e-06, "loss": 6.253484725952148, "step": 66015 }, { "epoch": 0.0642, "grad_norm": 3.568030834197998, "learning_rate": 1.7162121212121214e-06, "loss": 6.350770568847656, "step": 66020 }, { "epoch": 0.06425, "grad_norm": 5.581572532653809, "learning_rate": 1.7159595959595962e-06, "loss": 6.2024494171142575, "step": 66025 }, { "epoch": 0.0643, "grad_norm": 10.666000366210938, "learning_rate": 1.7157070707070709e-06, "loss": 6.3039299011230465, "step": 66030 }, { "epoch": 0.06435, "grad_norm": 4.619931697845459, "learning_rate": 1.7154545454545457e-06, "loss": 6.2730560302734375, "step": 66035 }, { "epoch": 0.0644, "grad_norm": 6.078108787536621, "learning_rate": 1.7152020202020204e-06, "loss": 6.292003631591797, "step": 66040 }, { "epoch": 0.06445, "grad_norm": 5.237896919250488, "learning_rate": 1.7149494949494952e-06, "loss": 6.35555419921875, "step": 66045 }, { "epoch": 0.0645, "grad_norm": 4.978615760803223, "learning_rate": 1.7146969696969698e-06, "loss": 6.234835433959961, "step": 66050 }, { "epoch": 0.06455, "grad_norm": 4.210301876068115, "learning_rate": 1.7144444444444447e-06, "loss": 6.276388931274414, "step": 66055 }, { "epoch": 0.0646, "grad_norm": 5.185657501220703, "learning_rate": 1.7141919191919193e-06, "loss": 6.259960556030274, "step": 66060 }, { "epoch": 0.06465, "grad_norm": 6.630853652954102, "learning_rate": 1.7139393939393941e-06, "loss": 6.246440505981445, "step": 66065 }, { "epoch": 0.0647, "grad_norm": 4.285782814025879, "learning_rate": 1.7136868686868688e-06, "loss": 6.221086120605468, "step": 66070 }, { "epoch": 0.06475, "grad_norm": 28.705129623413086, "learning_rate": 1.7134343434343436e-06, "loss": 6.450157165527344, "step": 66075 }, { "epoch": 0.0648, "grad_norm": 7.323063850402832, "learning_rate": 1.7131818181818182e-06, "loss": 6.220716094970703, "step": 66080 }, { "epoch": 0.06485, "grad_norm": 4.700553894042969, "learning_rate": 1.712929292929293e-06, "loss": 6.247000122070313, "step": 66085 }, { "epoch": 0.0649, "grad_norm": 27.80937957763672, "learning_rate": 1.7126767676767677e-06, "loss": 6.2338615417480465, "step": 66090 }, { "epoch": 0.06495, "grad_norm": 5.437457084655762, "learning_rate": 1.7124242424242426e-06, "loss": 6.178628158569336, "step": 66095 }, { "epoch": 0.065, "grad_norm": 8.313647270202637, "learning_rate": 1.7121717171717172e-06, "loss": 6.226594543457031, "step": 66100 }, { "epoch": 0.06505, "grad_norm": 6.261960506439209, "learning_rate": 1.7119191919191922e-06, "loss": 6.223278427124024, "step": 66105 }, { "epoch": 0.0651, "grad_norm": 4.416557788848877, "learning_rate": 1.7116666666666667e-06, "loss": 6.309904861450195, "step": 66110 }, { "epoch": 0.06515, "grad_norm": 7.816929340362549, "learning_rate": 1.7114141414141417e-06, "loss": 6.214398956298828, "step": 66115 }, { "epoch": 0.0652, "grad_norm": 11.177490234375, "learning_rate": 1.7111616161616163e-06, "loss": 6.467115783691407, "step": 66120 }, { "epoch": 0.06525, "grad_norm": 5.12833309173584, "learning_rate": 1.7109090909090912e-06, "loss": 6.183420562744141, "step": 66125 }, { "epoch": 0.0653, "grad_norm": 8.163956642150879, "learning_rate": 1.7106565656565658e-06, "loss": 6.2627616882324215, "step": 66130 }, { "epoch": 0.06535, "grad_norm": 8.709395408630371, "learning_rate": 1.7104040404040407e-06, "loss": 6.258246612548828, "step": 66135 }, { "epoch": 0.0654, "grad_norm": 7.616428852081299, "learning_rate": 1.7101515151515153e-06, "loss": 6.253038787841797, "step": 66140 }, { "epoch": 0.06545, "grad_norm": 6.422046661376953, "learning_rate": 1.7098989898989901e-06, "loss": 6.259191513061523, "step": 66145 }, { "epoch": 0.0655, "grad_norm": 3.589242935180664, "learning_rate": 1.7096464646464648e-06, "loss": 6.252895355224609, "step": 66150 }, { "epoch": 0.06555, "grad_norm": 12.763039588928223, "learning_rate": 1.7093939393939396e-06, "loss": 6.225995635986328, "step": 66155 }, { "epoch": 0.0656, "grad_norm": 6.4753241539001465, "learning_rate": 1.7091414141414142e-06, "loss": 6.292939758300781, "step": 66160 }, { "epoch": 0.06565, "grad_norm": 9.756047248840332, "learning_rate": 1.708888888888889e-06, "loss": 6.248007965087891, "step": 66165 }, { "epoch": 0.0657, "grad_norm": 4.294727802276611, "learning_rate": 1.7086363636363637e-06, "loss": 6.235165023803711, "step": 66170 }, { "epoch": 0.06575, "grad_norm": 6.0751423835754395, "learning_rate": 1.7083838383838385e-06, "loss": 6.269194030761719, "step": 66175 }, { "epoch": 0.0658, "grad_norm": 6.966557025909424, "learning_rate": 1.7081313131313132e-06, "loss": 6.297936248779297, "step": 66180 }, { "epoch": 0.06585, "grad_norm": 5.369654655456543, "learning_rate": 1.707878787878788e-06, "loss": 6.212129211425781, "step": 66185 }, { "epoch": 0.0659, "grad_norm": 7.206320285797119, "learning_rate": 1.7076262626262626e-06, "loss": 6.219408416748047, "step": 66190 }, { "epoch": 0.06595, "grad_norm": 10.413665771484375, "learning_rate": 1.7073737373737375e-06, "loss": 6.363441467285156, "step": 66195 }, { "epoch": 0.066, "grad_norm": 4.3494791984558105, "learning_rate": 1.7071212121212121e-06, "loss": 6.297170257568359, "step": 66200 }, { "epoch": 0.06605, "grad_norm": 3.417548656463623, "learning_rate": 1.7068686868686872e-06, "loss": 6.247763061523438, "step": 66205 }, { "epoch": 0.0661, "grad_norm": 8.006550788879395, "learning_rate": 1.7066161616161616e-06, "loss": 6.229637908935547, "step": 66210 }, { "epoch": 0.06615, "grad_norm": 7.3339104652404785, "learning_rate": 1.7063636363636366e-06, "loss": 6.25540771484375, "step": 66215 }, { "epoch": 0.0662, "grad_norm": 3.403244972229004, "learning_rate": 1.706111111111111e-06, "loss": 6.257078552246094, "step": 66220 }, { "epoch": 0.06625, "grad_norm": 6.919339656829834, "learning_rate": 1.7058585858585861e-06, "loss": 6.2626396179199215, "step": 66225 }, { "epoch": 0.0663, "grad_norm": 3.637018918991089, "learning_rate": 1.7056060606060607e-06, "loss": 6.322189712524414, "step": 66230 }, { "epoch": 0.06635, "grad_norm": 6.079360008239746, "learning_rate": 1.7053535353535356e-06, "loss": 6.220242309570312, "step": 66235 }, { "epoch": 0.0664, "grad_norm": 4.758529186248779, "learning_rate": 1.7051010101010102e-06, "loss": 6.227021408081055, "step": 66240 }, { "epoch": 0.06645, "grad_norm": 7.0764546394348145, "learning_rate": 1.704848484848485e-06, "loss": 6.246488952636719, "step": 66245 }, { "epoch": 0.0665, "grad_norm": 4.136918544769287, "learning_rate": 1.7045959595959597e-06, "loss": 6.2440849304199215, "step": 66250 }, { "epoch": 0.06655, "grad_norm": 6.644164562225342, "learning_rate": 1.7043434343434345e-06, "loss": 6.182559967041016, "step": 66255 }, { "epoch": 0.0666, "grad_norm": 5.308660984039307, "learning_rate": 1.7040909090909092e-06, "loss": 6.2308403015136715, "step": 66260 }, { "epoch": 0.06665, "grad_norm": 6.7550435066223145, "learning_rate": 1.703838383838384e-06, "loss": 6.268856811523437, "step": 66265 }, { "epoch": 0.0667, "grad_norm": 5.137763977050781, "learning_rate": 1.7035858585858586e-06, "loss": 6.244340515136718, "step": 66270 }, { "epoch": 0.06675, "grad_norm": 6.59822416305542, "learning_rate": 1.7033333333333335e-06, "loss": 6.2509815216064455, "step": 66275 }, { "epoch": 0.0668, "grad_norm": 5.4048590660095215, "learning_rate": 1.703080808080808e-06, "loss": 6.255007934570313, "step": 66280 }, { "epoch": 0.06685, "grad_norm": 5.327803134918213, "learning_rate": 1.702828282828283e-06, "loss": 6.2181640625, "step": 66285 }, { "epoch": 0.0669, "grad_norm": 5.488121032714844, "learning_rate": 1.7025757575757576e-06, "loss": 6.230437469482422, "step": 66290 }, { "epoch": 0.06695, "grad_norm": 9.462759017944336, "learning_rate": 1.7023232323232324e-06, "loss": 6.269149398803711, "step": 66295 }, { "epoch": 0.067, "grad_norm": 4.843019485473633, "learning_rate": 1.702070707070707e-06, "loss": 6.26152229309082, "step": 66300 }, { "epoch": 0.06705, "grad_norm": 10.160429000854492, "learning_rate": 1.7018181818181819e-06, "loss": 6.258780670166016, "step": 66305 }, { "epoch": 0.0671, "grad_norm": 7.003495693206787, "learning_rate": 1.7015656565656565e-06, "loss": 6.235566711425781, "step": 66310 }, { "epoch": 0.06715, "grad_norm": 9.618766784667969, "learning_rate": 1.7013131313131316e-06, "loss": 6.219063949584961, "step": 66315 }, { "epoch": 0.0672, "grad_norm": 3.714146852493286, "learning_rate": 1.701060606060606e-06, "loss": 6.386838912963867, "step": 66320 }, { "epoch": 0.06725, "grad_norm": 9.136879920959473, "learning_rate": 1.700808080808081e-06, "loss": 6.2198020935058596, "step": 66325 }, { "epoch": 0.0673, "grad_norm": 4.214871883392334, "learning_rate": 1.7005555555555557e-06, "loss": 6.258691787719727, "step": 66330 }, { "epoch": 0.06735, "grad_norm": 3.588575839996338, "learning_rate": 1.7003030303030305e-06, "loss": 6.253984069824218, "step": 66335 }, { "epoch": 0.0674, "grad_norm": 4.977000713348389, "learning_rate": 1.7000505050505051e-06, "loss": 6.451488494873047, "step": 66340 }, { "epoch": 0.06745, "grad_norm": 10.638126373291016, "learning_rate": 1.69979797979798e-06, "loss": 6.196933746337891, "step": 66345 }, { "epoch": 0.0675, "grad_norm": 4.7223100662231445, "learning_rate": 1.6995454545454546e-06, "loss": 6.228416442871094, "step": 66350 }, { "epoch": 0.06755, "grad_norm": 5.724508285522461, "learning_rate": 1.6992929292929295e-06, "loss": 6.267296981811524, "step": 66355 }, { "epoch": 0.0676, "grad_norm": 4.569919109344482, "learning_rate": 1.699040404040404e-06, "loss": 6.2398193359375, "step": 66360 }, { "epoch": 0.06765, "grad_norm": 6.164245128631592, "learning_rate": 1.698787878787879e-06, "loss": 6.256383895874023, "step": 66365 }, { "epoch": 0.0677, "grad_norm": 13.399935722351074, "learning_rate": 1.6985353535353536e-06, "loss": 6.2706748962402346, "step": 66370 }, { "epoch": 0.06775, "grad_norm": 10.607918739318848, "learning_rate": 1.6982828282828284e-06, "loss": 6.229550170898437, "step": 66375 }, { "epoch": 0.0678, "grad_norm": 20.17076301574707, "learning_rate": 1.6980303030303032e-06, "loss": 6.406351470947266, "step": 66380 }, { "epoch": 0.06785, "grad_norm": 4.139901638031006, "learning_rate": 1.6977777777777779e-06, "loss": 6.244061279296875, "step": 66385 }, { "epoch": 0.0679, "grad_norm": 10.404656410217285, "learning_rate": 1.6975252525252527e-06, "loss": 6.242138671875, "step": 66390 }, { "epoch": 0.06795, "grad_norm": 5.628965377807617, "learning_rate": 1.6972727272727273e-06, "loss": 6.2847236633300785, "step": 66395 }, { "epoch": 0.068, "grad_norm": 7.462181091308594, "learning_rate": 1.6970202020202022e-06, "loss": 6.219943237304688, "step": 66400 }, { "epoch": 0.06805, "grad_norm": 15.120787620544434, "learning_rate": 1.6967676767676768e-06, "loss": 6.298888397216797, "step": 66405 }, { "epoch": 0.0681, "grad_norm": 4.9216203689575195, "learning_rate": 1.6965151515151519e-06, "loss": 6.288260650634766, "step": 66410 }, { "epoch": 0.06815, "grad_norm": 6.806882858276367, "learning_rate": 1.6962626262626263e-06, "loss": 6.248066329956055, "step": 66415 }, { "epoch": 0.0682, "grad_norm": 7.01479959487915, "learning_rate": 1.6960101010101013e-06, "loss": 6.24238395690918, "step": 66420 }, { "epoch": 0.06825, "grad_norm": 7.88714075088501, "learning_rate": 1.695757575757576e-06, "loss": 6.197238922119141, "step": 66425 }, { "epoch": 0.0683, "grad_norm": 6.433613300323486, "learning_rate": 1.6955050505050508e-06, "loss": 6.174630737304687, "step": 66430 }, { "epoch": 0.06835, "grad_norm": 7.741850852966309, "learning_rate": 1.6952525252525254e-06, "loss": 6.255828857421875, "step": 66435 }, { "epoch": 0.0684, "grad_norm": 6.869352340698242, "learning_rate": 1.6950000000000003e-06, "loss": 6.211191940307617, "step": 66440 }, { "epoch": 0.06845, "grad_norm": 15.96113109588623, "learning_rate": 1.694747474747475e-06, "loss": 6.153985595703125, "step": 66445 }, { "epoch": 0.0685, "grad_norm": 6.816077709197998, "learning_rate": 1.6944949494949498e-06, "loss": 6.230862426757812, "step": 66450 }, { "epoch": 0.06855, "grad_norm": 9.407057762145996, "learning_rate": 1.6942424242424244e-06, "loss": 6.252012634277344, "step": 66455 }, { "epoch": 0.0686, "grad_norm": 6.543179035186768, "learning_rate": 1.6939898989898992e-06, "loss": 6.278553771972656, "step": 66460 }, { "epoch": 0.06865, "grad_norm": 9.13317584991455, "learning_rate": 1.6937373737373739e-06, "loss": 6.257377624511719, "step": 66465 }, { "epoch": 0.0687, "grad_norm": 4.451229095458984, "learning_rate": 1.6934848484848487e-06, "loss": 6.244658279418945, "step": 66470 }, { "epoch": 0.06875, "grad_norm": 5.536858558654785, "learning_rate": 1.6932323232323233e-06, "loss": 6.278306579589843, "step": 66475 }, { "epoch": 0.0688, "grad_norm": 9.214041709899902, "learning_rate": 1.6929797979797982e-06, "loss": 6.241621017456055, "step": 66480 }, { "epoch": 0.06885, "grad_norm": 5.134233474731445, "learning_rate": 1.6927272727272728e-06, "loss": 6.220970535278321, "step": 66485 }, { "epoch": 0.0689, "grad_norm": 13.066136360168457, "learning_rate": 1.6924747474747476e-06, "loss": 6.218753814697266, "step": 66490 }, { "epoch": 0.06895, "grad_norm": 31.907691955566406, "learning_rate": 1.6922222222222223e-06, "loss": 6.224750137329101, "step": 66495 }, { "epoch": 0.069, "grad_norm": 34.301876068115234, "learning_rate": 1.6919696969696971e-06, "loss": 6.295145416259766, "step": 66500 }, { "epoch": 0.06905, "grad_norm": 7.682512283325195, "learning_rate": 1.6917171717171718e-06, "loss": 6.323597717285156, "step": 66505 }, { "epoch": 0.0691, "grad_norm": 7.993558406829834, "learning_rate": 1.6914646464646466e-06, "loss": 6.23961067199707, "step": 66510 }, { "epoch": 0.06915, "grad_norm": 27.40794563293457, "learning_rate": 1.6912121212121212e-06, "loss": 6.403021240234375, "step": 66515 }, { "epoch": 0.0692, "grad_norm": 4.032140731811523, "learning_rate": 1.6909595959595963e-06, "loss": 6.583545684814453, "step": 66520 }, { "epoch": 0.06925, "grad_norm": 12.52536392211914, "learning_rate": 1.6907070707070707e-06, "loss": 6.265705871582031, "step": 66525 }, { "epoch": 0.0693, "grad_norm": 10.975342750549316, "learning_rate": 1.6904545454545457e-06, "loss": 6.256227111816406, "step": 66530 }, { "epoch": 0.06935, "grad_norm": 5.752685546875, "learning_rate": 1.6902020202020204e-06, "loss": 6.281337738037109, "step": 66535 }, { "epoch": 0.0694, "grad_norm": 6.6339521408081055, "learning_rate": 1.6899494949494952e-06, "loss": 6.216249465942383, "step": 66540 }, { "epoch": 0.06945, "grad_norm": 7.913100242614746, "learning_rate": 1.6896969696969699e-06, "loss": 6.2172119140625, "step": 66545 }, { "epoch": 0.0695, "grad_norm": 6.407020568847656, "learning_rate": 1.6894444444444447e-06, "loss": 6.247430419921875, "step": 66550 }, { "epoch": 0.06955, "grad_norm": 6.047022819519043, "learning_rate": 1.6891919191919193e-06, "loss": 6.3256996154785154, "step": 66555 }, { "epoch": 0.0696, "grad_norm": 4.796283721923828, "learning_rate": 1.6889393939393942e-06, "loss": 6.224968338012696, "step": 66560 }, { "epoch": 0.06965, "grad_norm": 6.904263496398926, "learning_rate": 1.6886868686868688e-06, "loss": 6.255163955688476, "step": 66565 }, { "epoch": 0.0697, "grad_norm": 4.9425129890441895, "learning_rate": 1.6884343434343436e-06, "loss": 6.244914245605469, "step": 66570 }, { "epoch": 0.06975, "grad_norm": 5.1878132820129395, "learning_rate": 1.6881818181818183e-06, "loss": 6.311453247070313, "step": 66575 }, { "epoch": 0.0698, "grad_norm": 13.060309410095215, "learning_rate": 1.6879292929292931e-06, "loss": 6.239046478271485, "step": 66580 }, { "epoch": 0.06985, "grad_norm": 5.387986660003662, "learning_rate": 1.6876767676767677e-06, "loss": 6.245869827270508, "step": 66585 }, { "epoch": 0.0699, "grad_norm": 4.043570518493652, "learning_rate": 1.6874242424242426e-06, "loss": 6.242410659790039, "step": 66590 }, { "epoch": 0.06995, "grad_norm": 6.47554349899292, "learning_rate": 1.6871717171717172e-06, "loss": 6.258382415771484, "step": 66595 }, { "epoch": 0.07, "grad_norm": 4.793996334075928, "learning_rate": 1.686919191919192e-06, "loss": 6.283049011230469, "step": 66600 }, { "epoch": 0.07005, "grad_norm": 3.9591619968414307, "learning_rate": 1.6866666666666667e-06, "loss": 6.252975082397461, "step": 66605 }, { "epoch": 0.0701, "grad_norm": 4.501852989196777, "learning_rate": 1.6864141414141415e-06, "loss": 6.249829864501953, "step": 66610 }, { "epoch": 0.07015, "grad_norm": 4.239077091217041, "learning_rate": 1.6861616161616162e-06, "loss": 6.275090026855469, "step": 66615 }, { "epoch": 0.0702, "grad_norm": 4.846503734588623, "learning_rate": 1.6859090909090912e-06, "loss": 6.250979232788086, "step": 66620 }, { "epoch": 0.07025, "grad_norm": 6.763078689575195, "learning_rate": 1.6856565656565656e-06, "loss": 6.213971710205078, "step": 66625 }, { "epoch": 0.0703, "grad_norm": 4.898061752319336, "learning_rate": 1.6854040404040407e-06, "loss": 6.226777648925781, "step": 66630 }, { "epoch": 0.07035, "grad_norm": 7.829683780670166, "learning_rate": 1.6851515151515153e-06, "loss": 6.247608947753906, "step": 66635 }, { "epoch": 0.0704, "grad_norm": 5.44594669342041, "learning_rate": 1.6848989898989902e-06, "loss": 6.258434677124024, "step": 66640 }, { "epoch": 0.07045, "grad_norm": 9.771390914916992, "learning_rate": 1.6846464646464648e-06, "loss": 6.293168258666992, "step": 66645 }, { "epoch": 0.0705, "grad_norm": 7.443203926086426, "learning_rate": 1.6843939393939396e-06, "loss": 6.201123809814453, "step": 66650 }, { "epoch": 0.07055, "grad_norm": 5.766367435455322, "learning_rate": 1.6841414141414143e-06, "loss": 6.2608692169189455, "step": 66655 }, { "epoch": 0.0706, "grad_norm": 7.212408542633057, "learning_rate": 1.683888888888889e-06, "loss": 6.2338512420654295, "step": 66660 }, { "epoch": 0.07065, "grad_norm": 4.518867492675781, "learning_rate": 1.6836363636363637e-06, "loss": 6.270831298828125, "step": 66665 }, { "epoch": 0.0707, "grad_norm": 6.354170799255371, "learning_rate": 1.6833838383838386e-06, "loss": 6.253330993652344, "step": 66670 }, { "epoch": 0.07075, "grad_norm": 6.426168441772461, "learning_rate": 1.6831313131313132e-06, "loss": 6.2804912567138675, "step": 66675 }, { "epoch": 0.0708, "grad_norm": 5.305918216705322, "learning_rate": 1.682878787878788e-06, "loss": 6.237419128417969, "step": 66680 }, { "epoch": 0.07085, "grad_norm": 9.386476516723633, "learning_rate": 1.6826262626262627e-06, "loss": 6.260951995849609, "step": 66685 }, { "epoch": 0.0709, "grad_norm": 6.991048336029053, "learning_rate": 1.6823737373737375e-06, "loss": 6.241711807250977, "step": 66690 }, { "epoch": 0.07095, "grad_norm": 6.540029525756836, "learning_rate": 1.6821212121212121e-06, "loss": 6.2677978515625, "step": 66695 }, { "epoch": 0.071, "grad_norm": 5.9120306968688965, "learning_rate": 1.681868686868687e-06, "loss": 6.240496444702148, "step": 66700 }, { "epoch": 0.07105, "grad_norm": 21.944429397583008, "learning_rate": 1.6816161616161616e-06, "loss": 6.264336776733399, "step": 66705 }, { "epoch": 0.0711, "grad_norm": 14.691611289978027, "learning_rate": 1.6813636363636365e-06, "loss": 6.195718002319336, "step": 66710 }, { "epoch": 0.07115, "grad_norm": 7.642651081085205, "learning_rate": 1.681111111111111e-06, "loss": 6.270307922363282, "step": 66715 }, { "epoch": 0.0712, "grad_norm": 5.809664249420166, "learning_rate": 1.680858585858586e-06, "loss": 6.2938690185546875, "step": 66720 }, { "epoch": 0.07125, "grad_norm": 8.364645957946777, "learning_rate": 1.6806060606060606e-06, "loss": 6.257210540771484, "step": 66725 }, { "epoch": 0.0713, "grad_norm": 3.95160174369812, "learning_rate": 1.6803535353535356e-06, "loss": 6.275844573974609, "step": 66730 }, { "epoch": 0.07135, "grad_norm": 7.474793434143066, "learning_rate": 1.68010101010101e-06, "loss": 6.197529602050781, "step": 66735 }, { "epoch": 0.0714, "grad_norm": 4.211397171020508, "learning_rate": 1.679848484848485e-06, "loss": 6.232733917236328, "step": 66740 }, { "epoch": 0.07145, "grad_norm": 5.065145492553711, "learning_rate": 1.6795959595959597e-06, "loss": 6.227064895629883, "step": 66745 }, { "epoch": 0.0715, "grad_norm": 6.64362096786499, "learning_rate": 1.6793434343434346e-06, "loss": 6.227158737182617, "step": 66750 }, { "epoch": 0.07155, "grad_norm": 9.859370231628418, "learning_rate": 1.6790909090909092e-06, "loss": 6.220893478393554, "step": 66755 }, { "epoch": 0.0716, "grad_norm": 5.385838985443115, "learning_rate": 1.678838383838384e-06, "loss": 6.206459808349609, "step": 66760 }, { "epoch": 0.07165, "grad_norm": 5.009071350097656, "learning_rate": 1.6785858585858587e-06, "loss": 6.173466491699219, "step": 66765 }, { "epoch": 0.0717, "grad_norm": 7.756752014160156, "learning_rate": 1.6783333333333335e-06, "loss": 6.262116241455078, "step": 66770 }, { "epoch": 0.07175, "grad_norm": 6.852177143096924, "learning_rate": 1.6780808080808081e-06, "loss": 6.2273704528808596, "step": 66775 }, { "epoch": 0.0718, "grad_norm": 11.542301177978516, "learning_rate": 1.677828282828283e-06, "loss": 6.205813980102539, "step": 66780 }, { "epoch": 0.07185, "grad_norm": 6.170810222625732, "learning_rate": 1.6775757575757576e-06, "loss": 6.23189697265625, "step": 66785 }, { "epoch": 0.0719, "grad_norm": 8.510496139526367, "learning_rate": 1.6773232323232324e-06, "loss": 6.444342041015625, "step": 66790 }, { "epoch": 0.07195, "grad_norm": 3.7251713275909424, "learning_rate": 1.677070707070707e-06, "loss": 6.220070648193359, "step": 66795 }, { "epoch": 0.072, "grad_norm": 7.442929267883301, "learning_rate": 1.676818181818182e-06, "loss": 6.274032974243164, "step": 66800 }, { "epoch": 0.07205, "grad_norm": 5.154984951019287, "learning_rate": 1.6765656565656568e-06, "loss": 6.213645553588867, "step": 66805 }, { "epoch": 0.0721, "grad_norm": 6.928219795227051, "learning_rate": 1.6763131313131314e-06, "loss": 6.344789123535156, "step": 66810 }, { "epoch": 0.07215, "grad_norm": 11.991657257080078, "learning_rate": 1.6760606060606062e-06, "loss": 6.2474311828613285, "step": 66815 }, { "epoch": 0.0722, "grad_norm": 4.950830936431885, "learning_rate": 1.6758080808080809e-06, "loss": 6.226139068603516, "step": 66820 }, { "epoch": 0.07225, "grad_norm": 7.721171855926514, "learning_rate": 1.675555555555556e-06, "loss": 6.225967025756836, "step": 66825 }, { "epoch": 0.0723, "grad_norm": 4.155689716339111, "learning_rate": 1.6753030303030303e-06, "loss": 6.245489501953125, "step": 66830 }, { "epoch": 0.07235, "grad_norm": 6.7812700271606445, "learning_rate": 1.6750505050505054e-06, "loss": 6.233506774902343, "step": 66835 }, { "epoch": 0.0724, "grad_norm": 5.097720146179199, "learning_rate": 1.67479797979798e-06, "loss": 6.284716415405273, "step": 66840 }, { "epoch": 0.07245, "grad_norm": 4.343050003051758, "learning_rate": 1.6745454545454549e-06, "loss": 6.286292266845703, "step": 66845 }, { "epoch": 0.0725, "grad_norm": 12.36262321472168, "learning_rate": 1.6742929292929295e-06, "loss": 6.268727874755859, "step": 66850 }, { "epoch": 0.07255, "grad_norm": 8.473738670349121, "learning_rate": 1.6740404040404043e-06, "loss": 6.241419982910156, "step": 66855 }, { "epoch": 0.0726, "grad_norm": 5.330711841583252, "learning_rate": 1.673787878787879e-06, "loss": 6.38854751586914, "step": 66860 }, { "epoch": 0.07265, "grad_norm": 6.764120578765869, "learning_rate": 1.6735353535353538e-06, "loss": 6.2959941864013675, "step": 66865 }, { "epoch": 0.0727, "grad_norm": 7.664856910705566, "learning_rate": 1.6732828282828284e-06, "loss": 6.248047256469727, "step": 66870 }, { "epoch": 0.07275, "grad_norm": 8.716785430908203, "learning_rate": 1.6730303030303033e-06, "loss": 6.297762680053711, "step": 66875 }, { "epoch": 0.0728, "grad_norm": 3.71866774559021, "learning_rate": 1.672777777777778e-06, "loss": 6.2310302734375, "step": 66880 }, { "epoch": 0.07285, "grad_norm": 5.274337291717529, "learning_rate": 1.6725252525252527e-06, "loss": 6.286685943603516, "step": 66885 }, { "epoch": 0.0729, "grad_norm": 8.777344703674316, "learning_rate": 1.6722727272727274e-06, "loss": 6.298732757568359, "step": 66890 }, { "epoch": 0.07295, "grad_norm": 5.9917731285095215, "learning_rate": 1.6720202020202022e-06, "loss": 6.237001037597656, "step": 66895 }, { "epoch": 0.073, "grad_norm": 5.673536777496338, "learning_rate": 1.6717676767676768e-06, "loss": 6.277872467041016, "step": 66900 }, { "epoch": 0.07305, "grad_norm": 6.6186299324035645, "learning_rate": 1.6715151515151517e-06, "loss": 6.262302398681641, "step": 66905 }, { "epoch": 0.0731, "grad_norm": 8.656669616699219, "learning_rate": 1.6712626262626263e-06, "loss": 6.228644180297851, "step": 66910 }, { "epoch": 0.07315, "grad_norm": 7.859682083129883, "learning_rate": 1.6710101010101012e-06, "loss": 6.299396514892578, "step": 66915 }, { "epoch": 0.0732, "grad_norm": 3.3357269763946533, "learning_rate": 1.6707575757575758e-06, "loss": 6.2257648468017575, "step": 66920 }, { "epoch": 0.07325, "grad_norm": 13.270245552062988, "learning_rate": 1.6705050505050508e-06, "loss": 6.317365264892578, "step": 66925 }, { "epoch": 0.0733, "grad_norm": 6.733044147491455, "learning_rate": 1.6702525252525253e-06, "loss": 6.261484527587891, "step": 66930 }, { "epoch": 0.07335, "grad_norm": 4.925628185272217, "learning_rate": 1.6700000000000003e-06, "loss": 6.247397232055664, "step": 66935 }, { "epoch": 0.0734, "grad_norm": 9.191264152526855, "learning_rate": 1.6697474747474747e-06, "loss": 6.267113113403321, "step": 66940 }, { "epoch": 0.07345, "grad_norm": 7.5159525871276855, "learning_rate": 1.6694949494949498e-06, "loss": 6.296604156494141, "step": 66945 }, { "epoch": 0.0735, "grad_norm": 6.917036056518555, "learning_rate": 1.6692424242424244e-06, "loss": 6.205605316162109, "step": 66950 }, { "epoch": 0.07355, "grad_norm": 5.941437244415283, "learning_rate": 1.6689898989898993e-06, "loss": 6.298007202148438, "step": 66955 }, { "epoch": 0.0736, "grad_norm": 7.841940879821777, "learning_rate": 1.6687373737373739e-06, "loss": 6.1930397033691404, "step": 66960 }, { "epoch": 0.07365, "grad_norm": 4.034249782562256, "learning_rate": 1.6684848484848487e-06, "loss": 6.230327606201172, "step": 66965 }, { "epoch": 0.0737, "grad_norm": 6.375181198120117, "learning_rate": 1.6682323232323234e-06, "loss": 6.3028308868408205, "step": 66970 }, { "epoch": 0.07375, "grad_norm": 11.67187213897705, "learning_rate": 1.6679797979797982e-06, "loss": 6.3321693420410154, "step": 66975 }, { "epoch": 0.0738, "grad_norm": 22.714027404785156, "learning_rate": 1.6677272727272728e-06, "loss": 6.354092407226562, "step": 66980 }, { "epoch": 0.07385, "grad_norm": 8.630751609802246, "learning_rate": 1.6674747474747477e-06, "loss": 6.375128173828125, "step": 66985 }, { "epoch": 0.0739, "grad_norm": 4.428469181060791, "learning_rate": 1.6672222222222223e-06, "loss": 6.217602920532227, "step": 66990 }, { "epoch": 0.07395, "grad_norm": 7.888657569885254, "learning_rate": 1.6669696969696971e-06, "loss": 6.163916015625, "step": 66995 }, { "epoch": 0.074, "grad_norm": 6.813203811645508, "learning_rate": 1.6667171717171718e-06, "loss": 6.254452896118164, "step": 67000 }, { "epoch": 0.07405, "grad_norm": 7.165389537811279, "learning_rate": 1.6664646464646466e-06, "loss": 6.243295669555664, "step": 67005 }, { "epoch": 0.0741, "grad_norm": 8.310712814331055, "learning_rate": 1.6662121212121213e-06, "loss": 6.208291625976562, "step": 67010 }, { "epoch": 0.07415, "grad_norm": 4.837979793548584, "learning_rate": 1.665959595959596e-06, "loss": 6.2461082458496096, "step": 67015 }, { "epoch": 0.0742, "grad_norm": 5.729977607727051, "learning_rate": 1.6657070707070707e-06, "loss": 6.267649078369141, "step": 67020 }, { "epoch": 0.07425, "grad_norm": 28.752960205078125, "learning_rate": 1.6654545454545456e-06, "loss": 6.231096649169922, "step": 67025 }, { "epoch": 0.0743, "grad_norm": 10.139350891113281, "learning_rate": 1.6652020202020202e-06, "loss": 6.281904602050782, "step": 67030 }, { "epoch": 0.07435, "grad_norm": 13.142845153808594, "learning_rate": 1.6649494949494952e-06, "loss": 6.284098434448242, "step": 67035 }, { "epoch": 0.0744, "grad_norm": 6.03406286239624, "learning_rate": 1.6646969696969697e-06, "loss": 6.246023941040039, "step": 67040 }, { "epoch": 0.07445, "grad_norm": 6.210331439971924, "learning_rate": 1.6644444444444447e-06, "loss": 6.267349624633789, "step": 67045 }, { "epoch": 0.0745, "grad_norm": 25.118425369262695, "learning_rate": 1.6641919191919193e-06, "loss": 6.402649688720703, "step": 67050 }, { "epoch": 0.07455, "grad_norm": 6.314655780792236, "learning_rate": 1.6639393939393942e-06, "loss": 6.197834014892578, "step": 67055 }, { "epoch": 0.0746, "grad_norm": 2.856656551361084, "learning_rate": 1.6636868686868688e-06, "loss": 6.204808044433594, "step": 67060 }, { "epoch": 0.07465, "grad_norm": 4.676720142364502, "learning_rate": 1.6634343434343437e-06, "loss": 6.08953857421875, "step": 67065 }, { "epoch": 0.0747, "grad_norm": 4.430919647216797, "learning_rate": 1.6631818181818183e-06, "loss": 6.305049896240234, "step": 67070 }, { "epoch": 0.07475, "grad_norm": 6.612840175628662, "learning_rate": 1.6629292929292931e-06, "loss": 6.242228317260742, "step": 67075 }, { "epoch": 0.0748, "grad_norm": 23.68415641784668, "learning_rate": 1.6626767676767678e-06, "loss": 6.260010147094727, "step": 67080 }, { "epoch": 0.07485, "grad_norm": 8.681134223937988, "learning_rate": 1.6624242424242426e-06, "loss": 6.318877029418945, "step": 67085 }, { "epoch": 0.0749, "grad_norm": 6.6279096603393555, "learning_rate": 1.6621717171717172e-06, "loss": 6.1865287780761715, "step": 67090 }, { "epoch": 0.07495, "grad_norm": 7.408749580383301, "learning_rate": 1.661919191919192e-06, "loss": 6.218962860107422, "step": 67095 }, { "epoch": 0.075, "grad_norm": 5.5011186599731445, "learning_rate": 1.6616666666666667e-06, "loss": 6.2669731140136715, "step": 67100 }, { "epoch": 0.07505, "grad_norm": 6.862961292266846, "learning_rate": 1.6614141414141416e-06, "loss": 6.219792175292969, "step": 67105 }, { "epoch": 0.0751, "grad_norm": 8.011595726013184, "learning_rate": 1.6611616161616162e-06, "loss": 6.267979431152344, "step": 67110 }, { "epoch": 0.07515, "grad_norm": 5.4261250495910645, "learning_rate": 1.660909090909091e-06, "loss": 6.2546028137207035, "step": 67115 }, { "epoch": 0.0752, "grad_norm": 7.062658786773682, "learning_rate": 1.6606565656565657e-06, "loss": 6.321611022949218, "step": 67120 }, { "epoch": 0.07525, "grad_norm": 4.605218410491943, "learning_rate": 1.6604040404040405e-06, "loss": 6.292785263061523, "step": 67125 }, { "epoch": 0.0753, "grad_norm": 4.593461513519287, "learning_rate": 1.6601515151515151e-06, "loss": 6.243962097167969, "step": 67130 }, { "epoch": 0.07535, "grad_norm": 10.528093338012695, "learning_rate": 1.65989898989899e-06, "loss": 6.255412673950195, "step": 67135 }, { "epoch": 0.0754, "grad_norm": 4.658772945404053, "learning_rate": 1.6596464646464646e-06, "loss": 6.269566345214844, "step": 67140 }, { "epoch": 0.07545, "grad_norm": 10.081796646118164, "learning_rate": 1.6593939393939397e-06, "loss": 6.317112731933594, "step": 67145 }, { "epoch": 0.0755, "grad_norm": 14.443259239196777, "learning_rate": 1.659141414141414e-06, "loss": 6.392361068725586, "step": 67150 }, { "epoch": 0.07555, "grad_norm": 5.5970048904418945, "learning_rate": 1.6588888888888891e-06, "loss": 6.258982849121094, "step": 67155 }, { "epoch": 0.0756, "grad_norm": 8.535996437072754, "learning_rate": 1.6586363636363638e-06, "loss": 6.231755065917969, "step": 67160 }, { "epoch": 0.07565, "grad_norm": 27.695085525512695, "learning_rate": 1.6583838383838386e-06, "loss": 6.322100830078125, "step": 67165 }, { "epoch": 0.0757, "grad_norm": 6.6866912841796875, "learning_rate": 1.6581313131313132e-06, "loss": 6.235567474365235, "step": 67170 }, { "epoch": 0.07575, "grad_norm": 5.936610221862793, "learning_rate": 1.657878787878788e-06, "loss": 6.250206375122071, "step": 67175 }, { "epoch": 0.0758, "grad_norm": 4.696446895599365, "learning_rate": 1.6576262626262627e-06, "loss": 6.255342102050781, "step": 67180 }, { "epoch": 0.07585, "grad_norm": 14.66552734375, "learning_rate": 1.6573737373737375e-06, "loss": 8.99875717163086, "step": 67185 }, { "epoch": 0.0759, "grad_norm": 5.719045639038086, "learning_rate": 1.6571212121212122e-06, "loss": 6.237542724609375, "step": 67190 }, { "epoch": 0.07595, "grad_norm": 10.964690208435059, "learning_rate": 1.656868686868687e-06, "loss": 6.260063934326172, "step": 67195 }, { "epoch": 0.076, "grad_norm": 4.6868181228637695, "learning_rate": 1.6566161616161616e-06, "loss": 6.257347869873047, "step": 67200 }, { "epoch": 0.07605, "grad_norm": 4.5217366218566895, "learning_rate": 1.6563636363636365e-06, "loss": 6.205026245117187, "step": 67205 }, { "epoch": 0.0761, "grad_norm": 6.182888507843018, "learning_rate": 1.6561111111111111e-06, "loss": 6.215607070922852, "step": 67210 }, { "epoch": 0.07615, "grad_norm": 33.83673858642578, "learning_rate": 1.655858585858586e-06, "loss": 6.2854572296142575, "step": 67215 }, { "epoch": 0.0762, "grad_norm": 4.649621486663818, "learning_rate": 1.6556060606060606e-06, "loss": 6.2342884063720705, "step": 67220 }, { "epoch": 0.07625, "grad_norm": 11.680132865905762, "learning_rate": 1.6553535353535354e-06, "loss": 6.48589859008789, "step": 67225 }, { "epoch": 0.0763, "grad_norm": 9.52846908569336, "learning_rate": 1.6551010101010103e-06, "loss": 6.261978530883789, "step": 67230 }, { "epoch": 0.07635, "grad_norm": 18.70293617248535, "learning_rate": 1.654848484848485e-06, "loss": 6.326731872558594, "step": 67235 }, { "epoch": 0.0764, "grad_norm": 4.197718143463135, "learning_rate": 1.65459595959596e-06, "loss": 6.465175628662109, "step": 67240 }, { "epoch": 0.07645, "grad_norm": 4.910754203796387, "learning_rate": 1.6543434343434344e-06, "loss": 6.238689041137695, "step": 67245 }, { "epoch": 0.0765, "grad_norm": 9.078868865966797, "learning_rate": 1.6540909090909094e-06, "loss": 6.269768905639649, "step": 67250 }, { "epoch": 0.07655, "grad_norm": 5.23130464553833, "learning_rate": 1.653838383838384e-06, "loss": 6.211724090576172, "step": 67255 }, { "epoch": 0.0766, "grad_norm": 7.957825660705566, "learning_rate": 1.653585858585859e-06, "loss": 6.274599075317383, "step": 67260 }, { "epoch": 0.07665, "grad_norm": 37.13931655883789, "learning_rate": 1.6533333333333335e-06, "loss": 6.194308471679688, "step": 67265 }, { "epoch": 0.0767, "grad_norm": 5.188248157501221, "learning_rate": 1.6530808080808084e-06, "loss": 6.229327392578125, "step": 67270 }, { "epoch": 0.07675, "grad_norm": 5.43469762802124, "learning_rate": 1.652828282828283e-06, "loss": 6.197406768798828, "step": 67275 }, { "epoch": 0.0768, "grad_norm": 7.046252250671387, "learning_rate": 1.6525757575757578e-06, "loss": 6.240939331054688, "step": 67280 }, { "epoch": 0.07685, "grad_norm": 9.346805572509766, "learning_rate": 1.6523232323232325e-06, "loss": 6.445100402832031, "step": 67285 }, { "epoch": 0.0769, "grad_norm": 8.126006126403809, "learning_rate": 1.6520707070707073e-06, "loss": 6.231577682495117, "step": 67290 }, { "epoch": 0.07695, "grad_norm": 5.0931267738342285, "learning_rate": 1.651818181818182e-06, "loss": 6.2618263244628904, "step": 67295 }, { "epoch": 0.077, "grad_norm": 5.816989898681641, "learning_rate": 1.6515656565656568e-06, "loss": 6.216603088378906, "step": 67300 }, { "epoch": 0.07705, "grad_norm": 12.151991844177246, "learning_rate": 1.6513131313131314e-06, "loss": 6.282543182373047, "step": 67305 }, { "epoch": 0.0771, "grad_norm": 8.20792293548584, "learning_rate": 1.6510606060606063e-06, "loss": 6.303852462768555, "step": 67310 }, { "epoch": 0.07715, "grad_norm": 5.390141010284424, "learning_rate": 1.6508080808080809e-06, "loss": 6.220110702514648, "step": 67315 }, { "epoch": 0.0772, "grad_norm": 4.785421371459961, "learning_rate": 1.6505555555555557e-06, "loss": 6.263191986083984, "step": 67320 }, { "epoch": 0.07725, "grad_norm": 12.818017959594727, "learning_rate": 1.6503030303030304e-06, "loss": 6.275766754150391, "step": 67325 }, { "epoch": 0.0773, "grad_norm": 6.268979549407959, "learning_rate": 1.6500505050505052e-06, "loss": 6.227426528930664, "step": 67330 }, { "epoch": 0.07735, "grad_norm": 6.731168270111084, "learning_rate": 1.6497979797979798e-06, "loss": 6.281745910644531, "step": 67335 }, { "epoch": 0.0774, "grad_norm": 10.57622241973877, "learning_rate": 1.6495454545454549e-06, "loss": 6.228261947631836, "step": 67340 }, { "epoch": 0.07745, "grad_norm": 4.741379261016846, "learning_rate": 1.6492929292929293e-06, "loss": 6.260938262939453, "step": 67345 }, { "epoch": 0.0775, "grad_norm": 4.93731689453125, "learning_rate": 1.6490404040404044e-06, "loss": 6.165606689453125, "step": 67350 }, { "epoch": 0.07755, "grad_norm": 6.149819850921631, "learning_rate": 1.648787878787879e-06, "loss": 6.190977096557617, "step": 67355 }, { "epoch": 0.0776, "grad_norm": 7.334464073181152, "learning_rate": 1.6485353535353538e-06, "loss": 6.259605407714844, "step": 67360 }, { "epoch": 0.07765, "grad_norm": 10.363024711608887, "learning_rate": 1.6482828282828285e-06, "loss": 6.459387969970703, "step": 67365 }, { "epoch": 0.0777, "grad_norm": 7.720767021179199, "learning_rate": 1.6480303030303033e-06, "loss": 6.257353973388672, "step": 67370 }, { "epoch": 0.07775, "grad_norm": 4.660161972045898, "learning_rate": 1.647777777777778e-06, "loss": 6.334947967529297, "step": 67375 }, { "epoch": 0.0778, "grad_norm": 8.98572826385498, "learning_rate": 1.6475252525252528e-06, "loss": 6.258035278320312, "step": 67380 }, { "epoch": 0.07785, "grad_norm": 10.139760971069336, "learning_rate": 1.6472727272727274e-06, "loss": 6.245917129516601, "step": 67385 }, { "epoch": 0.0779, "grad_norm": 8.263921737670898, "learning_rate": 1.6470202020202022e-06, "loss": 6.2987617492675785, "step": 67390 }, { "epoch": 0.07795, "grad_norm": 7.515267372131348, "learning_rate": 1.6467676767676769e-06, "loss": 6.263603973388672, "step": 67395 }, { "epoch": 0.078, "grad_norm": 6.685860633850098, "learning_rate": 1.6465151515151517e-06, "loss": 6.212256240844726, "step": 67400 }, { "epoch": 0.07805, "grad_norm": 8.095151901245117, "learning_rate": 1.6462626262626263e-06, "loss": 6.253326416015625, "step": 67405 }, { "epoch": 0.0781, "grad_norm": 5.389197826385498, "learning_rate": 1.6460101010101012e-06, "loss": 6.264118957519531, "step": 67410 }, { "epoch": 0.07815, "grad_norm": 16.66035270690918, "learning_rate": 1.6457575757575758e-06, "loss": 6.29560661315918, "step": 67415 }, { "epoch": 0.0782, "grad_norm": 4.174916744232178, "learning_rate": 1.6455050505050507e-06, "loss": 6.238037872314453, "step": 67420 }, { "epoch": 0.07825, "grad_norm": 6.3450493812561035, "learning_rate": 1.6452525252525253e-06, "loss": 6.39105224609375, "step": 67425 }, { "epoch": 0.0783, "grad_norm": 5.370755195617676, "learning_rate": 1.6450000000000001e-06, "loss": 6.2310333251953125, "step": 67430 }, { "epoch": 0.07835, "grad_norm": 8.252110481262207, "learning_rate": 1.6447474747474748e-06, "loss": 6.224236297607422, "step": 67435 }, { "epoch": 0.0784, "grad_norm": 44.491153717041016, "learning_rate": 1.6444949494949496e-06, "loss": 6.402571105957032, "step": 67440 }, { "epoch": 0.07845, "grad_norm": 7.144503593444824, "learning_rate": 1.6442424242424242e-06, "loss": 6.46932373046875, "step": 67445 }, { "epoch": 0.0785, "grad_norm": 5.997300148010254, "learning_rate": 1.6439898989898993e-06, "loss": 6.2170360565185545, "step": 67450 }, { "epoch": 0.07855, "grad_norm": 6.4436235427856445, "learning_rate": 1.6437373737373737e-06, "loss": 6.195291137695312, "step": 67455 }, { "epoch": 0.0786, "grad_norm": 4.463476181030273, "learning_rate": 1.6434848484848488e-06, "loss": 6.303199005126953, "step": 67460 }, { "epoch": 0.07865, "grad_norm": 7.484311580657959, "learning_rate": 1.6432323232323234e-06, "loss": 6.261862945556641, "step": 67465 }, { "epoch": 0.0787, "grad_norm": 13.429766654968262, "learning_rate": 1.6429797979797982e-06, "loss": 6.272409439086914, "step": 67470 }, { "epoch": 0.07875, "grad_norm": 6.682587146759033, "learning_rate": 1.6427272727272729e-06, "loss": 6.299173355102539, "step": 67475 }, { "epoch": 0.0788, "grad_norm": 7.4914774894714355, "learning_rate": 1.6424747474747477e-06, "loss": 6.2360893249511715, "step": 67480 }, { "epoch": 0.07885, "grad_norm": 12.158146858215332, "learning_rate": 1.6422222222222223e-06, "loss": 6.504476165771484, "step": 67485 }, { "epoch": 0.0789, "grad_norm": 8.626891136169434, "learning_rate": 1.6419696969696972e-06, "loss": 6.404395294189453, "step": 67490 }, { "epoch": 0.07895, "grad_norm": 5.1805877685546875, "learning_rate": 1.6417171717171718e-06, "loss": 6.2689208984375, "step": 67495 }, { "epoch": 0.079, "grad_norm": 4.457931995391846, "learning_rate": 1.6414646464646466e-06, "loss": 6.252556610107422, "step": 67500 }, { "epoch": 0.07905, "grad_norm": 9.858596801757812, "learning_rate": 1.6412121212121213e-06, "loss": 6.243936920166016, "step": 67505 }, { "epoch": 0.0791, "grad_norm": 6.137274265289307, "learning_rate": 1.6409595959595961e-06, "loss": 6.229312896728516, "step": 67510 }, { "epoch": 0.07915, "grad_norm": 45.86907196044922, "learning_rate": 1.6407070707070707e-06, "loss": 6.087805938720703, "step": 67515 }, { "epoch": 0.0792, "grad_norm": 35.95911407470703, "learning_rate": 1.6404545454545456e-06, "loss": 5.7133544921875, "step": 67520 }, { "epoch": 0.07925, "grad_norm": 43.87601089477539, "learning_rate": 1.6402020202020202e-06, "loss": 5.9341270446777346, "step": 67525 }, { "epoch": 0.0793, "grad_norm": 8.438127517700195, "learning_rate": 1.639949494949495e-06, "loss": 6.205067825317383, "step": 67530 }, { "epoch": 0.07935, "grad_norm": 8.508034706115723, "learning_rate": 1.6396969696969697e-06, "loss": 6.235186004638672, "step": 67535 }, { "epoch": 0.0794, "grad_norm": 23.292116165161133, "learning_rate": 1.6394444444444445e-06, "loss": 6.414584350585938, "step": 67540 }, { "epoch": 0.07945, "grad_norm": 6.286035537719727, "learning_rate": 1.6391919191919192e-06, "loss": 6.278558349609375, "step": 67545 }, { "epoch": 0.0795, "grad_norm": 6.789996147155762, "learning_rate": 1.638939393939394e-06, "loss": 6.256460952758789, "step": 67550 }, { "epoch": 0.07955, "grad_norm": 4.980437278747559, "learning_rate": 1.6386868686868686e-06, "loss": 6.24859619140625, "step": 67555 }, { "epoch": 0.0796, "grad_norm": 8.48459243774414, "learning_rate": 1.6384343434343437e-06, "loss": 6.210050582885742, "step": 67560 }, { "epoch": 0.07965, "grad_norm": 7.922573089599609, "learning_rate": 1.6381818181818181e-06, "loss": 6.266993713378906, "step": 67565 }, { "epoch": 0.0797, "grad_norm": 11.659849166870117, "learning_rate": 1.6379292929292932e-06, "loss": 6.237688446044922, "step": 67570 }, { "epoch": 0.07975, "grad_norm": 14.246847152709961, "learning_rate": 1.6376767676767678e-06, "loss": 6.814814758300781, "step": 67575 }, { "epoch": 0.0798, "grad_norm": 11.328825950622559, "learning_rate": 1.6374242424242426e-06, "loss": 6.440885925292969, "step": 67580 }, { "epoch": 0.07985, "grad_norm": 16.534137725830078, "learning_rate": 1.6371717171717173e-06, "loss": 6.310224914550782, "step": 67585 }, { "epoch": 0.0799, "grad_norm": 6.2736992835998535, "learning_rate": 1.6369191919191921e-06, "loss": 6.302394866943359, "step": 67590 }, { "epoch": 0.07995, "grad_norm": 14.068726539611816, "learning_rate": 1.6366666666666667e-06, "loss": 6.337668991088867, "step": 67595 }, { "epoch": 0.08, "grad_norm": 11.29350757598877, "learning_rate": 1.6364141414141416e-06, "loss": 6.255840301513672, "step": 67600 }, { "epoch": 0.08005, "grad_norm": 6.99368953704834, "learning_rate": 1.6361616161616162e-06, "loss": 6.277493286132812, "step": 67605 }, { "epoch": 0.0801, "grad_norm": 6.596593856811523, "learning_rate": 1.635909090909091e-06, "loss": 6.200252532958984, "step": 67610 }, { "epoch": 0.08015, "grad_norm": 5.134659767150879, "learning_rate": 1.6356565656565657e-06, "loss": 6.270723724365235, "step": 67615 }, { "epoch": 0.0802, "grad_norm": 9.081636428833008, "learning_rate": 1.6354040404040405e-06, "loss": 6.252311325073242, "step": 67620 }, { "epoch": 0.08025, "grad_norm": 28.274370193481445, "learning_rate": 1.6351515151515152e-06, "loss": 6.210406875610351, "step": 67625 }, { "epoch": 0.0803, "grad_norm": 5.705901145935059, "learning_rate": 1.63489898989899e-06, "loss": 6.2922523498535154, "step": 67630 }, { "epoch": 0.08035, "grad_norm": 3.9479448795318604, "learning_rate": 1.6346464646464646e-06, "loss": 6.27076416015625, "step": 67635 }, { "epoch": 0.0804, "grad_norm": 5.289205551147461, "learning_rate": 1.6343939393939395e-06, "loss": 6.220604324340821, "step": 67640 }, { "epoch": 0.08045, "grad_norm": 3.978760004043579, "learning_rate": 1.634141414141414e-06, "loss": 6.177287673950195, "step": 67645 }, { "epoch": 0.0805, "grad_norm": 19.25391387939453, "learning_rate": 1.633888888888889e-06, "loss": 6.2040962219238285, "step": 67650 }, { "epoch": 0.08055, "grad_norm": 5.032575607299805, "learning_rate": 1.6336363636363636e-06, "loss": 6.228739166259766, "step": 67655 }, { "epoch": 0.0806, "grad_norm": 19.557016372680664, "learning_rate": 1.6333838383838384e-06, "loss": 6.3084465026855465, "step": 67660 }, { "epoch": 0.08065, "grad_norm": 3.839205026626587, "learning_rate": 1.6331313131313135e-06, "loss": 6.22178955078125, "step": 67665 }, { "epoch": 0.0807, "grad_norm": 10.260281562805176, "learning_rate": 1.632878787878788e-06, "loss": 6.220943450927734, "step": 67670 }, { "epoch": 0.08075, "grad_norm": 6.086826324462891, "learning_rate": 1.632626262626263e-06, "loss": 6.242675018310547, "step": 67675 }, { "epoch": 0.0808, "grad_norm": 8.19084358215332, "learning_rate": 1.6323737373737376e-06, "loss": 6.176053619384765, "step": 67680 }, { "epoch": 0.08085, "grad_norm": 6.042530536651611, "learning_rate": 1.6321212121212124e-06, "loss": 6.259477615356445, "step": 67685 }, { "epoch": 0.0809, "grad_norm": 12.969033241271973, "learning_rate": 1.631868686868687e-06, "loss": 6.343133544921875, "step": 67690 }, { "epoch": 0.08095, "grad_norm": 5.366542339324951, "learning_rate": 1.6316161616161619e-06, "loss": 6.232950592041016, "step": 67695 }, { "epoch": 0.081, "grad_norm": 4.220341682434082, "learning_rate": 1.6313636363636365e-06, "loss": 6.2344825744628904, "step": 67700 }, { "epoch": 0.08105, "grad_norm": 5.814554691314697, "learning_rate": 1.6311111111111114e-06, "loss": 6.263970184326172, "step": 67705 }, { "epoch": 0.0811, "grad_norm": 4.872402191162109, "learning_rate": 1.630858585858586e-06, "loss": 6.1936088562011715, "step": 67710 }, { "epoch": 0.08115, "grad_norm": 11.441375732421875, "learning_rate": 1.6306060606060608e-06, "loss": 6.247540283203125, "step": 67715 }, { "epoch": 0.0812, "grad_norm": 6.263543128967285, "learning_rate": 1.6303535353535355e-06, "loss": 6.252691650390625, "step": 67720 }, { "epoch": 0.08125, "grad_norm": 6.189016342163086, "learning_rate": 1.6301010101010103e-06, "loss": 6.234823608398438, "step": 67725 }, { "epoch": 0.0813, "grad_norm": 4.7466630935668945, "learning_rate": 1.629848484848485e-06, "loss": 6.248161697387696, "step": 67730 }, { "epoch": 0.08135, "grad_norm": 9.343341827392578, "learning_rate": 1.6295959595959598e-06, "loss": 6.277152252197266, "step": 67735 }, { "epoch": 0.0814, "grad_norm": 5.7494330406188965, "learning_rate": 1.6293434343434344e-06, "loss": 6.22405776977539, "step": 67740 }, { "epoch": 0.08145, "grad_norm": 5.2661237716674805, "learning_rate": 1.6290909090909092e-06, "loss": 6.223161315917968, "step": 67745 }, { "epoch": 0.0815, "grad_norm": 7.264105319976807, "learning_rate": 1.6288383838383839e-06, "loss": 6.235430908203125, "step": 67750 }, { "epoch": 0.08155, "grad_norm": 4.009241104125977, "learning_rate": 1.628585858585859e-06, "loss": 6.277658081054687, "step": 67755 }, { "epoch": 0.0816, "grad_norm": 6.422910690307617, "learning_rate": 1.6283333333333333e-06, "loss": 6.268106842041016, "step": 67760 }, { "epoch": 0.08165, "grad_norm": 6.200218200683594, "learning_rate": 1.6280808080808084e-06, "loss": 6.2800758361816404, "step": 67765 }, { "epoch": 0.0817, "grad_norm": 5.410836219787598, "learning_rate": 1.627828282828283e-06, "loss": 6.216603088378906, "step": 67770 }, { "epoch": 0.08175, "grad_norm": 5.310351848602295, "learning_rate": 1.6275757575757579e-06, "loss": 6.266691207885742, "step": 67775 }, { "epoch": 0.0818, "grad_norm": 6.013910293579102, "learning_rate": 1.6273232323232325e-06, "loss": 6.26541748046875, "step": 67780 }, { "epoch": 0.08185, "grad_norm": 7.738881587982178, "learning_rate": 1.6270707070707073e-06, "loss": 6.274266052246094, "step": 67785 }, { "epoch": 0.0819, "grad_norm": 6.8991498947143555, "learning_rate": 1.626818181818182e-06, "loss": 6.216093826293945, "step": 67790 }, { "epoch": 0.08195, "grad_norm": 6.685734748840332, "learning_rate": 1.6265656565656568e-06, "loss": 6.254594421386718, "step": 67795 }, { "epoch": 0.082, "grad_norm": 8.719515800476074, "learning_rate": 1.6263131313131314e-06, "loss": 6.259923934936523, "step": 67800 }, { "epoch": 0.08205, "grad_norm": 38.41505813598633, "learning_rate": 1.6260606060606063e-06, "loss": 6.183220672607422, "step": 67805 }, { "epoch": 0.0821, "grad_norm": 5.735619068145752, "learning_rate": 1.625808080808081e-06, "loss": 6.151051330566406, "step": 67810 }, { "epoch": 0.08215, "grad_norm": 3.9512507915496826, "learning_rate": 1.6255555555555558e-06, "loss": 6.205488967895508, "step": 67815 }, { "epoch": 0.0822, "grad_norm": 7.2785725593566895, "learning_rate": 1.6253030303030304e-06, "loss": 6.221299362182617, "step": 67820 }, { "epoch": 0.08225, "grad_norm": 7.386477947235107, "learning_rate": 1.6250505050505052e-06, "loss": 6.242499160766601, "step": 67825 }, { "epoch": 0.0823, "grad_norm": 7.744307994842529, "learning_rate": 1.6247979797979799e-06, "loss": 6.227522277832032, "step": 67830 }, { "epoch": 0.08235, "grad_norm": 8.094497680664062, "learning_rate": 1.6245454545454547e-06, "loss": 6.257695007324219, "step": 67835 }, { "epoch": 0.0824, "grad_norm": 13.343533515930176, "learning_rate": 1.6242929292929293e-06, "loss": 6.269753265380859, "step": 67840 }, { "epoch": 0.08245, "grad_norm": 4.233196258544922, "learning_rate": 1.6240404040404042e-06, "loss": 6.280826187133789, "step": 67845 }, { "epoch": 0.0825, "grad_norm": 6.104916572570801, "learning_rate": 1.6237878787878788e-06, "loss": 6.266438293457031, "step": 67850 }, { "epoch": 0.08255, "grad_norm": 6.73159122467041, "learning_rate": 1.6235353535353536e-06, "loss": 6.26457633972168, "step": 67855 }, { "epoch": 0.0826, "grad_norm": 6.955192565917969, "learning_rate": 1.6232828282828283e-06, "loss": 6.222341918945313, "step": 67860 }, { "epoch": 0.08265, "grad_norm": 7.125829696655273, "learning_rate": 1.6230303030303033e-06, "loss": 6.241864013671875, "step": 67865 }, { "epoch": 0.0827, "grad_norm": 7.97471809387207, "learning_rate": 1.6227777777777777e-06, "loss": 6.241533279418945, "step": 67870 }, { "epoch": 0.08275, "grad_norm": 6.865375518798828, "learning_rate": 1.6225252525252528e-06, "loss": 6.271086883544922, "step": 67875 }, { "epoch": 0.0828, "grad_norm": 4.725345611572266, "learning_rate": 1.6222727272727274e-06, "loss": 6.365765380859375, "step": 67880 }, { "epoch": 0.08285, "grad_norm": 5.113654136657715, "learning_rate": 1.6220202020202023e-06, "loss": 6.249844741821289, "step": 67885 }, { "epoch": 0.0829, "grad_norm": 4.696580410003662, "learning_rate": 1.621767676767677e-06, "loss": 6.205767059326172, "step": 67890 }, { "epoch": 0.08295, "grad_norm": 6.746333122253418, "learning_rate": 1.6215151515151517e-06, "loss": 6.199056243896484, "step": 67895 }, { "epoch": 0.083, "grad_norm": 6.460727691650391, "learning_rate": 1.6212626262626264e-06, "loss": 6.2305351257324215, "step": 67900 }, { "epoch": 0.08305, "grad_norm": 8.3560209274292, "learning_rate": 1.6210101010101012e-06, "loss": 6.305005645751953, "step": 67905 }, { "epoch": 0.0831, "grad_norm": 3.7669503688812256, "learning_rate": 1.6207575757575758e-06, "loss": 6.19129638671875, "step": 67910 }, { "epoch": 0.08315, "grad_norm": 4.863406181335449, "learning_rate": 1.6205050505050507e-06, "loss": 6.325352478027344, "step": 67915 }, { "epoch": 0.0832, "grad_norm": 4.625027656555176, "learning_rate": 1.6202525252525253e-06, "loss": 6.234782791137695, "step": 67920 }, { "epoch": 0.08325, "grad_norm": 8.135788917541504, "learning_rate": 1.6200000000000002e-06, "loss": 6.199491500854492, "step": 67925 }, { "epoch": 0.0833, "grad_norm": 7.9138712882995605, "learning_rate": 1.6197474747474748e-06, "loss": 6.204254913330078, "step": 67930 }, { "epoch": 0.08335, "grad_norm": 4.428237438201904, "learning_rate": 1.6194949494949496e-06, "loss": 6.261589813232422, "step": 67935 }, { "epoch": 0.0834, "grad_norm": 6.827216148376465, "learning_rate": 1.6192424242424243e-06, "loss": 6.253746795654297, "step": 67940 }, { "epoch": 0.08345, "grad_norm": 9.646376609802246, "learning_rate": 1.618989898989899e-06, "loss": 6.249897003173828, "step": 67945 }, { "epoch": 0.0835, "grad_norm": 9.245662689208984, "learning_rate": 1.6187373737373737e-06, "loss": 6.230661010742187, "step": 67950 }, { "epoch": 0.08355, "grad_norm": 5.914920330047607, "learning_rate": 1.6184848484848486e-06, "loss": 6.253987884521484, "step": 67955 }, { "epoch": 0.0836, "grad_norm": 5.798128604888916, "learning_rate": 1.6182323232323232e-06, "loss": 6.204825210571289, "step": 67960 }, { "epoch": 0.08365, "grad_norm": 5.583381175994873, "learning_rate": 1.617979797979798e-06, "loss": 6.254814529418946, "step": 67965 }, { "epoch": 0.0837, "grad_norm": 5.474242210388184, "learning_rate": 1.6177272727272727e-06, "loss": 6.243137359619141, "step": 67970 }, { "epoch": 0.08375, "grad_norm": 5.4115071296691895, "learning_rate": 1.6174747474747477e-06, "loss": 6.231884765625, "step": 67975 }, { "epoch": 0.0838, "grad_norm": 4.392685413360596, "learning_rate": 1.6172222222222221e-06, "loss": 6.2547649383544925, "step": 67980 }, { "epoch": 0.08385, "grad_norm": 11.952957153320312, "learning_rate": 1.6169696969696972e-06, "loss": 6.326251220703125, "step": 67985 }, { "epoch": 0.0839, "grad_norm": 30.120391845703125, "learning_rate": 1.6167171717171718e-06, "loss": 6.737517547607422, "step": 67990 }, { "epoch": 0.08395, "grad_norm": 6.2516889572143555, "learning_rate": 1.6164646464646467e-06, "loss": 6.276971054077149, "step": 67995 }, { "epoch": 0.084, "grad_norm": 11.981853485107422, "learning_rate": 1.6162121212121213e-06, "loss": 6.296961975097656, "step": 68000 }, { "epoch": 0.08405, "grad_norm": 4.134936809539795, "learning_rate": 1.6159595959595961e-06, "loss": 6.213412094116211, "step": 68005 }, { "epoch": 0.0841, "grad_norm": 8.800566673278809, "learning_rate": 1.6157070707070708e-06, "loss": 6.263444519042968, "step": 68010 }, { "epoch": 0.08415, "grad_norm": 4.907958984375, "learning_rate": 1.6154545454545456e-06, "loss": 6.201774597167969, "step": 68015 }, { "epoch": 0.0842, "grad_norm": 8.365675926208496, "learning_rate": 1.6152020202020202e-06, "loss": 6.259011840820312, "step": 68020 }, { "epoch": 0.08425, "grad_norm": 5.120132923126221, "learning_rate": 1.614949494949495e-06, "loss": 6.371106719970703, "step": 68025 }, { "epoch": 0.0843, "grad_norm": 7.107611656188965, "learning_rate": 1.6146969696969697e-06, "loss": 6.295449829101562, "step": 68030 }, { "epoch": 0.08435, "grad_norm": 9.592951774597168, "learning_rate": 1.6144444444444446e-06, "loss": 6.232204437255859, "step": 68035 }, { "epoch": 0.0844, "grad_norm": 4.881063461303711, "learning_rate": 1.6141919191919192e-06, "loss": 6.241677856445312, "step": 68040 }, { "epoch": 0.08445, "grad_norm": 4.004124641418457, "learning_rate": 1.613939393939394e-06, "loss": 6.252657699584961, "step": 68045 }, { "epoch": 0.0845, "grad_norm": 13.783756256103516, "learning_rate": 1.6136868686868687e-06, "loss": 6.363603210449218, "step": 68050 }, { "epoch": 0.08455, "grad_norm": 5.782992362976074, "learning_rate": 1.6134343434343435e-06, "loss": 6.261353302001953, "step": 68055 }, { "epoch": 0.0846, "grad_norm": 4.230885028839111, "learning_rate": 1.6131818181818181e-06, "loss": 6.235014343261719, "step": 68060 }, { "epoch": 0.08465, "grad_norm": 8.719497680664062, "learning_rate": 1.612929292929293e-06, "loss": 6.269380950927735, "step": 68065 }, { "epoch": 0.0847, "grad_norm": 8.370138168334961, "learning_rate": 1.6126767676767676e-06, "loss": 6.274061584472657, "step": 68070 }, { "epoch": 0.08475, "grad_norm": 7.458622932434082, "learning_rate": 1.6124242424242427e-06, "loss": 6.273218154907227, "step": 68075 }, { "epoch": 0.0848, "grad_norm": 7.374997138977051, "learning_rate": 1.612171717171717e-06, "loss": 6.178070068359375, "step": 68080 }, { "epoch": 0.08485, "grad_norm": 7.19040060043335, "learning_rate": 1.6119191919191921e-06, "loss": 6.238426971435547, "step": 68085 }, { "epoch": 0.0849, "grad_norm": 8.296574592590332, "learning_rate": 1.611666666666667e-06, "loss": 6.231276321411133, "step": 68090 }, { "epoch": 0.08495, "grad_norm": 5.938658237457275, "learning_rate": 1.6114141414141416e-06, "loss": 6.241914367675781, "step": 68095 }, { "epoch": 0.085, "grad_norm": 5.810720443725586, "learning_rate": 1.6111616161616164e-06, "loss": 6.276242065429687, "step": 68100 }, { "epoch": 0.08505, "grad_norm": 151.32887268066406, "learning_rate": 1.610909090909091e-06, "loss": 11.071710205078125, "step": 68105 }, { "epoch": 0.0851, "grad_norm": 16.99557876586914, "learning_rate": 1.610656565656566e-06, "loss": 7.40106430053711, "step": 68110 }, { "epoch": 0.08515, "grad_norm": 10.015318870544434, "learning_rate": 1.6104040404040406e-06, "loss": 6.2759651184082035, "step": 68115 }, { "epoch": 0.0852, "grad_norm": 8.900857925415039, "learning_rate": 1.6101515151515154e-06, "loss": 6.267348098754883, "step": 68120 }, { "epoch": 0.08525, "grad_norm": 5.3333892822265625, "learning_rate": 1.60989898989899e-06, "loss": 6.2515113830566404, "step": 68125 }, { "epoch": 0.0853, "grad_norm": 5.67011022567749, "learning_rate": 1.6096464646464649e-06, "loss": 6.236592102050781, "step": 68130 }, { "epoch": 0.08535, "grad_norm": 4.393640518188477, "learning_rate": 1.6093939393939395e-06, "loss": 6.330816268920898, "step": 68135 }, { "epoch": 0.0854, "grad_norm": 3.089141845703125, "learning_rate": 1.6091414141414143e-06, "loss": 6.288617324829102, "step": 68140 }, { "epoch": 0.08545, "grad_norm": 4.862539291381836, "learning_rate": 1.608888888888889e-06, "loss": 6.20807113647461, "step": 68145 }, { "epoch": 0.0855, "grad_norm": 6.387592315673828, "learning_rate": 1.6086363636363638e-06, "loss": 6.217916870117188, "step": 68150 }, { "epoch": 0.08555, "grad_norm": 7.905552387237549, "learning_rate": 1.6083838383838384e-06, "loss": 6.247480773925782, "step": 68155 }, { "epoch": 0.0856, "grad_norm": 9.374300956726074, "learning_rate": 1.6081313131313133e-06, "loss": 6.256856918334961, "step": 68160 }, { "epoch": 0.08565, "grad_norm": 7.862767696380615, "learning_rate": 1.607878787878788e-06, "loss": 6.246783447265625, "step": 68165 }, { "epoch": 0.0857, "grad_norm": 4.794373512268066, "learning_rate": 1.607626262626263e-06, "loss": 6.2922004699707035, "step": 68170 }, { "epoch": 0.08575, "grad_norm": 5.987009048461914, "learning_rate": 1.6073737373737374e-06, "loss": 6.3222503662109375, "step": 68175 }, { "epoch": 0.0858, "grad_norm": 3.5684049129486084, "learning_rate": 1.6071212121212124e-06, "loss": 6.473400115966797, "step": 68180 }, { "epoch": 0.08585, "grad_norm": 7.307270526885986, "learning_rate": 1.606868686868687e-06, "loss": 6.212144470214843, "step": 68185 }, { "epoch": 0.0859, "grad_norm": 5.619901180267334, "learning_rate": 1.606616161616162e-06, "loss": 6.220479202270508, "step": 68190 }, { "epoch": 0.08595, "grad_norm": 6.72964334487915, "learning_rate": 1.6063636363636365e-06, "loss": 6.217216873168946, "step": 68195 }, { "epoch": 0.086, "grad_norm": 7.209021091461182, "learning_rate": 1.6061111111111114e-06, "loss": 6.1976478576660154, "step": 68200 }, { "epoch": 0.08605, "grad_norm": 3.866325855255127, "learning_rate": 1.605858585858586e-06, "loss": 6.243569183349609, "step": 68205 }, { "epoch": 0.0861, "grad_norm": 10.057393074035645, "learning_rate": 1.6056060606060609e-06, "loss": 6.238461685180664, "step": 68210 }, { "epoch": 0.08615, "grad_norm": 5.648272514343262, "learning_rate": 1.6053535353535355e-06, "loss": 6.257833480834961, "step": 68215 }, { "epoch": 0.0862, "grad_norm": 6.872942924499512, "learning_rate": 1.6051010101010103e-06, "loss": 6.287327575683594, "step": 68220 }, { "epoch": 0.08625, "grad_norm": 13.237266540527344, "learning_rate": 1.604848484848485e-06, "loss": 6.299374771118164, "step": 68225 }, { "epoch": 0.0863, "grad_norm": 11.033956527709961, "learning_rate": 1.6045959595959598e-06, "loss": 6.280892181396484, "step": 68230 }, { "epoch": 0.08635, "grad_norm": 5.178877353668213, "learning_rate": 1.6043434343434344e-06, "loss": 6.232608795166016, "step": 68235 }, { "epoch": 0.0864, "grad_norm": 5.69475793838501, "learning_rate": 1.6040909090909093e-06, "loss": 6.264961242675781, "step": 68240 }, { "epoch": 0.08645, "grad_norm": 4.788723468780518, "learning_rate": 1.603838383838384e-06, "loss": 6.195264053344727, "step": 68245 }, { "epoch": 0.0865, "grad_norm": 9.114925384521484, "learning_rate": 1.6035858585858587e-06, "loss": 6.239858627319336, "step": 68250 }, { "epoch": 0.08655, "grad_norm": 14.334339141845703, "learning_rate": 1.6033333333333334e-06, "loss": 6.577363586425781, "step": 68255 }, { "epoch": 0.0866, "grad_norm": 6.625946998596191, "learning_rate": 1.6030808080808082e-06, "loss": 6.212545013427734, "step": 68260 }, { "epoch": 0.08665, "grad_norm": 4.336483478546143, "learning_rate": 1.6028282828282828e-06, "loss": 6.269819641113282, "step": 68265 }, { "epoch": 0.0867, "grad_norm": 5.978705883026123, "learning_rate": 1.6025757575757577e-06, "loss": 6.2618976593017575, "step": 68270 }, { "epoch": 0.08675, "grad_norm": 26.40206527709961, "learning_rate": 1.6023232323232323e-06, "loss": 6.491594696044922, "step": 68275 }, { "epoch": 0.0868, "grad_norm": 22.59023666381836, "learning_rate": 1.6020707070707074e-06, "loss": 6.407736968994141, "step": 68280 }, { "epoch": 0.08685, "grad_norm": 26.13314437866211, "learning_rate": 1.6018181818181818e-06, "loss": 6.1435096740722654, "step": 68285 }, { "epoch": 0.0869, "grad_norm": 16.604387283325195, "learning_rate": 1.6015656565656568e-06, "loss": 6.056850433349609, "step": 68290 }, { "epoch": 0.08695, "grad_norm": 26.665193557739258, "learning_rate": 1.6013131313131315e-06, "loss": 6.155484390258789, "step": 68295 }, { "epoch": 0.087, "grad_norm": 14.265118598937988, "learning_rate": 1.6010606060606063e-06, "loss": 6.132965850830078, "step": 68300 }, { "epoch": 0.08705, "grad_norm": 26.1540584564209, "learning_rate": 1.600808080808081e-06, "loss": 6.121370697021485, "step": 68305 }, { "epoch": 0.0871, "grad_norm": 20.219104766845703, "learning_rate": 1.6005555555555558e-06, "loss": 6.069096374511719, "step": 68310 }, { "epoch": 0.08715, "grad_norm": 21.884010314941406, "learning_rate": 1.6003030303030304e-06, "loss": 6.075076293945313, "step": 68315 }, { "epoch": 0.0872, "grad_norm": 15.429051399230957, "learning_rate": 1.6000505050505053e-06, "loss": 6.049326705932617, "step": 68320 }, { "epoch": 0.08725, "grad_norm": 18.19685173034668, "learning_rate": 1.5997979797979799e-06, "loss": 6.085511016845703, "step": 68325 }, { "epoch": 0.0873, "grad_norm": 12.808303833007812, "learning_rate": 1.5995454545454547e-06, "loss": 6.103668975830078, "step": 68330 }, { "epoch": 0.08735, "grad_norm": 13.808426856994629, "learning_rate": 1.5992929292929294e-06, "loss": 6.40851058959961, "step": 68335 }, { "epoch": 0.0874, "grad_norm": 9.506705284118652, "learning_rate": 1.5990404040404042e-06, "loss": 6.273345565795898, "step": 68340 }, { "epoch": 0.08745, "grad_norm": 36.01426315307617, "learning_rate": 1.5987878787878788e-06, "loss": 6.269451904296875, "step": 68345 }, { "epoch": 0.0875, "grad_norm": 14.356122016906738, "learning_rate": 1.5985353535353537e-06, "loss": 6.3302257537841795, "step": 68350 }, { "epoch": 0.08755, "grad_norm": 10.59303092956543, "learning_rate": 1.5982828282828283e-06, "loss": 6.314650344848633, "step": 68355 }, { "epoch": 0.0876, "grad_norm": 9.102739334106445, "learning_rate": 1.5980303030303031e-06, "loss": 6.252481842041016, "step": 68360 }, { "epoch": 0.08765, "grad_norm": 6.962002277374268, "learning_rate": 1.5977777777777778e-06, "loss": 6.3080394744873045, "step": 68365 }, { "epoch": 0.0877, "grad_norm": 8.317675590515137, "learning_rate": 1.5975252525252526e-06, "loss": 6.266812515258789, "step": 68370 }, { "epoch": 0.08775, "grad_norm": 8.927763938903809, "learning_rate": 1.5972727272727272e-06, "loss": 6.259712219238281, "step": 68375 }, { "epoch": 0.0878, "grad_norm": 9.100605964660645, "learning_rate": 1.597020202020202e-06, "loss": 6.276482772827149, "step": 68380 }, { "epoch": 0.08785, "grad_norm": 7.157580852508545, "learning_rate": 1.5967676767676767e-06, "loss": 6.272844314575195, "step": 68385 }, { "epoch": 0.0879, "grad_norm": 6.666101932525635, "learning_rate": 1.5965151515151518e-06, "loss": 6.267860412597656, "step": 68390 }, { "epoch": 0.08795, "grad_norm": 12.995580673217773, "learning_rate": 1.5962626262626262e-06, "loss": 6.283257293701172, "step": 68395 }, { "epoch": 0.088, "grad_norm": 4.958277225494385, "learning_rate": 1.5960101010101012e-06, "loss": 6.256583023071289, "step": 68400 }, { "epoch": 0.08805, "grad_norm": 12.124619483947754, "learning_rate": 1.5957575757575759e-06, "loss": 6.263058471679687, "step": 68405 }, { "epoch": 0.0881, "grad_norm": 11.106489181518555, "learning_rate": 1.5955050505050507e-06, "loss": 6.250572586059571, "step": 68410 }, { "epoch": 0.08815, "grad_norm": 4.97368049621582, "learning_rate": 1.5952525252525253e-06, "loss": 6.242839050292969, "step": 68415 }, { "epoch": 0.0882, "grad_norm": 8.500083923339844, "learning_rate": 1.5950000000000002e-06, "loss": 6.280374526977539, "step": 68420 }, { "epoch": 0.08825, "grad_norm": 5.624425888061523, "learning_rate": 1.5947474747474748e-06, "loss": 6.228078460693359, "step": 68425 }, { "epoch": 0.0883, "grad_norm": 12.68785572052002, "learning_rate": 1.5944949494949497e-06, "loss": 6.244724273681641, "step": 68430 }, { "epoch": 0.08835, "grad_norm": 8.59717082977295, "learning_rate": 1.5942424242424243e-06, "loss": 6.265054321289062, "step": 68435 }, { "epoch": 0.0884, "grad_norm": 6.976657390594482, "learning_rate": 1.5939898989898991e-06, "loss": 6.247876739501953, "step": 68440 }, { "epoch": 0.08845, "grad_norm": 6.365911960601807, "learning_rate": 1.5937373737373738e-06, "loss": 6.3568778991699215, "step": 68445 }, { "epoch": 0.0885, "grad_norm": 5.773349761962891, "learning_rate": 1.5934848484848486e-06, "loss": 6.24682502746582, "step": 68450 }, { "epoch": 0.08855, "grad_norm": 16.435997009277344, "learning_rate": 1.5932323232323232e-06, "loss": 6.250936889648438, "step": 68455 }, { "epoch": 0.0886, "grad_norm": 5.544093608856201, "learning_rate": 1.592979797979798e-06, "loss": 6.239647293090821, "step": 68460 }, { "epoch": 0.08865, "grad_norm": 6.701617240905762, "learning_rate": 1.5927272727272727e-06, "loss": 6.264475631713867, "step": 68465 }, { "epoch": 0.0887, "grad_norm": 6.3103814125061035, "learning_rate": 1.5924747474747475e-06, "loss": 6.210774612426758, "step": 68470 }, { "epoch": 0.08875, "grad_norm": 6.796063423156738, "learning_rate": 1.5922222222222222e-06, "loss": 6.264931488037109, "step": 68475 }, { "epoch": 0.0888, "grad_norm": 7.362947940826416, "learning_rate": 1.591969696969697e-06, "loss": 6.2067218780517575, "step": 68480 }, { "epoch": 0.08885, "grad_norm": 6.112558364868164, "learning_rate": 1.5917171717171716e-06, "loss": 6.233968353271484, "step": 68485 }, { "epoch": 0.0889, "grad_norm": 6.19677734375, "learning_rate": 1.5914646464646467e-06, "loss": 6.2453468322753904, "step": 68490 }, { "epoch": 0.08895, "grad_norm": 4.654987335205078, "learning_rate": 1.5912121212121211e-06, "loss": 6.220082092285156, "step": 68495 }, { "epoch": 0.089, "grad_norm": 8.37671947479248, "learning_rate": 1.5909595959595962e-06, "loss": 6.387126159667969, "step": 68500 }, { "epoch": 0.08905, "grad_norm": 5.295370101928711, "learning_rate": 1.5907070707070708e-06, "loss": 6.215462112426758, "step": 68505 }, { "epoch": 0.0891, "grad_norm": 5.464320659637451, "learning_rate": 1.5904545454545456e-06, "loss": 6.228319549560547, "step": 68510 }, { "epoch": 0.08915, "grad_norm": 7.002796649932861, "learning_rate": 1.5902020202020205e-06, "loss": 6.266805267333984, "step": 68515 }, { "epoch": 0.0892, "grad_norm": 6.574508190155029, "learning_rate": 1.5899494949494951e-06, "loss": 6.256491851806641, "step": 68520 }, { "epoch": 0.08925, "grad_norm": 6.124186992645264, "learning_rate": 1.58969696969697e-06, "loss": 6.240212631225586, "step": 68525 }, { "epoch": 0.0893, "grad_norm": 8.4375638961792, "learning_rate": 1.5894444444444446e-06, "loss": 6.260652160644531, "step": 68530 }, { "epoch": 0.08935, "grad_norm": 8.720924377441406, "learning_rate": 1.5891919191919194e-06, "loss": 6.432191467285156, "step": 68535 }, { "epoch": 0.0894, "grad_norm": 7.317401885986328, "learning_rate": 1.588939393939394e-06, "loss": 6.268520355224609, "step": 68540 }, { "epoch": 0.08945, "grad_norm": 7.07523775100708, "learning_rate": 1.588686868686869e-06, "loss": 6.237063598632813, "step": 68545 }, { "epoch": 0.0895, "grad_norm": 8.23422622680664, "learning_rate": 1.5884343434343435e-06, "loss": 6.217866134643555, "step": 68550 }, { "epoch": 0.08955, "grad_norm": 17.404226303100586, "learning_rate": 1.5881818181818184e-06, "loss": 6.1899574279785154, "step": 68555 }, { "epoch": 0.0896, "grad_norm": 6.881229877471924, "learning_rate": 1.587929292929293e-06, "loss": 6.245463180541992, "step": 68560 }, { "epoch": 0.08965, "grad_norm": 5.454588890075684, "learning_rate": 1.5876767676767678e-06, "loss": 6.248381042480469, "step": 68565 }, { "epoch": 0.0897, "grad_norm": 13.943465232849121, "learning_rate": 1.5874242424242425e-06, "loss": 6.270164489746094, "step": 68570 }, { "epoch": 0.08975, "grad_norm": 6.053116798400879, "learning_rate": 1.5871717171717173e-06, "loss": 6.251766204833984, "step": 68575 }, { "epoch": 0.0898, "grad_norm": 4.380888938903809, "learning_rate": 1.586919191919192e-06, "loss": 6.22996940612793, "step": 68580 }, { "epoch": 0.08985, "grad_norm": 6.1960062980651855, "learning_rate": 1.586666666666667e-06, "loss": 6.296883392333984, "step": 68585 }, { "epoch": 0.0899, "grad_norm": 3.6194231510162354, "learning_rate": 1.5864141414141414e-06, "loss": 6.210314559936523, "step": 68590 }, { "epoch": 0.08995, "grad_norm": 5.913259506225586, "learning_rate": 1.5861616161616165e-06, "loss": 6.273317337036133, "step": 68595 }, { "epoch": 0.09, "grad_norm": 12.532811164855957, "learning_rate": 1.5859090909090911e-06, "loss": 6.221103286743164, "step": 68600 }, { "epoch": 0.09005, "grad_norm": 18.542972564697266, "learning_rate": 1.585656565656566e-06, "loss": 6.2378288269042965, "step": 68605 }, { "epoch": 0.0901, "grad_norm": 7.933225631713867, "learning_rate": 1.5854040404040406e-06, "loss": 6.192073059082031, "step": 68610 }, { "epoch": 0.09015, "grad_norm": 6.513733863830566, "learning_rate": 1.5851515151515154e-06, "loss": 6.260355758666992, "step": 68615 }, { "epoch": 0.0902, "grad_norm": 4.8691511154174805, "learning_rate": 1.58489898989899e-06, "loss": 6.198682022094727, "step": 68620 }, { "epoch": 0.09025, "grad_norm": 5.949094772338867, "learning_rate": 1.5846464646464649e-06, "loss": 6.274376296997071, "step": 68625 }, { "epoch": 0.0903, "grad_norm": 8.66055679321289, "learning_rate": 1.5843939393939395e-06, "loss": 6.227044296264649, "step": 68630 }, { "epoch": 0.09035, "grad_norm": 6.357010841369629, "learning_rate": 1.5841414141414144e-06, "loss": 6.382799911499023, "step": 68635 }, { "epoch": 0.0904, "grad_norm": 8.037514686584473, "learning_rate": 1.583888888888889e-06, "loss": 6.2481330871582035, "step": 68640 }, { "epoch": 0.09045, "grad_norm": 11.522111892700195, "learning_rate": 1.5836363636363638e-06, "loss": 6.26739273071289, "step": 68645 }, { "epoch": 0.0905, "grad_norm": 5.609579563140869, "learning_rate": 1.5833838383838385e-06, "loss": 6.270120620727539, "step": 68650 }, { "epoch": 0.09055, "grad_norm": 7.151195049285889, "learning_rate": 1.5831313131313133e-06, "loss": 6.283375549316406, "step": 68655 }, { "epoch": 0.0906, "grad_norm": 7.717036724090576, "learning_rate": 1.582878787878788e-06, "loss": 6.2354789733886715, "step": 68660 }, { "epoch": 0.09065, "grad_norm": 3.9953665733337402, "learning_rate": 1.5826262626262628e-06, "loss": 6.205415725708008, "step": 68665 }, { "epoch": 0.0907, "grad_norm": 4.387796878814697, "learning_rate": 1.5823737373737374e-06, "loss": 6.31265869140625, "step": 68670 }, { "epoch": 0.09075, "grad_norm": 6.127048492431641, "learning_rate": 1.5821212121212123e-06, "loss": 6.244026184082031, "step": 68675 }, { "epoch": 0.0908, "grad_norm": 4.865087509155273, "learning_rate": 1.5818686868686869e-06, "loss": 6.262338638305664, "step": 68680 }, { "epoch": 0.09085, "grad_norm": 7.786529064178467, "learning_rate": 1.5816161616161617e-06, "loss": 6.33027458190918, "step": 68685 }, { "epoch": 0.0909, "grad_norm": 6.761791706085205, "learning_rate": 1.5813636363636364e-06, "loss": 6.169318389892578, "step": 68690 }, { "epoch": 0.09095, "grad_norm": 5.355432033538818, "learning_rate": 1.5811111111111114e-06, "loss": 6.262015533447266, "step": 68695 }, { "epoch": 0.091, "grad_norm": 5.922513008117676, "learning_rate": 1.5808585858585858e-06, "loss": 6.254376983642578, "step": 68700 }, { "epoch": 0.09105, "grad_norm": 8.729522705078125, "learning_rate": 1.5806060606060609e-06, "loss": 6.223418045043945, "step": 68705 }, { "epoch": 0.0911, "grad_norm": 3.676426649093628, "learning_rate": 1.5803535353535355e-06, "loss": 6.30638427734375, "step": 68710 }, { "epoch": 0.09115, "grad_norm": 9.454465866088867, "learning_rate": 1.5801010101010104e-06, "loss": 6.411592864990235, "step": 68715 }, { "epoch": 0.0912, "grad_norm": 4.840556621551514, "learning_rate": 1.579848484848485e-06, "loss": 6.348005294799805, "step": 68720 }, { "epoch": 0.09125, "grad_norm": 16.931516647338867, "learning_rate": 1.5795959595959598e-06, "loss": 6.183093643188476, "step": 68725 }, { "epoch": 0.0913, "grad_norm": 9.347898483276367, "learning_rate": 1.5793434343434345e-06, "loss": 6.230922698974609, "step": 68730 }, { "epoch": 0.09135, "grad_norm": 6.540523529052734, "learning_rate": 1.5790909090909093e-06, "loss": 6.198273468017578, "step": 68735 }, { "epoch": 0.0914, "grad_norm": 4.645453453063965, "learning_rate": 1.578838383838384e-06, "loss": 6.195700836181641, "step": 68740 }, { "epoch": 0.09145, "grad_norm": 4.387323379516602, "learning_rate": 1.5785858585858588e-06, "loss": 6.2355201721191404, "step": 68745 }, { "epoch": 0.0915, "grad_norm": 6.623274326324463, "learning_rate": 1.5783333333333334e-06, "loss": 6.2223461151123045, "step": 68750 }, { "epoch": 0.09155, "grad_norm": 7.770120143890381, "learning_rate": 1.5780808080808082e-06, "loss": 6.2152351379394535, "step": 68755 }, { "epoch": 0.0916, "grad_norm": 6.8864545822143555, "learning_rate": 1.5778282828282829e-06, "loss": 6.23809928894043, "step": 68760 }, { "epoch": 0.09165, "grad_norm": 6.643836498260498, "learning_rate": 1.5775757575757577e-06, "loss": 6.253174591064453, "step": 68765 }, { "epoch": 0.0917, "grad_norm": 9.366877555847168, "learning_rate": 1.5773232323232323e-06, "loss": 6.289954757690429, "step": 68770 }, { "epoch": 0.09175, "grad_norm": 6.247757911682129, "learning_rate": 1.5770707070707072e-06, "loss": 6.291299438476562, "step": 68775 }, { "epoch": 0.0918, "grad_norm": 7.713843822479248, "learning_rate": 1.5768181818181818e-06, "loss": 6.2397197723388675, "step": 68780 }, { "epoch": 0.09185, "grad_norm": 14.358726501464844, "learning_rate": 1.5765656565656567e-06, "loss": 6.2458351135253904, "step": 68785 }, { "epoch": 0.0919, "grad_norm": 5.7580037117004395, "learning_rate": 1.5763131313131313e-06, "loss": 6.252772521972656, "step": 68790 }, { "epoch": 0.09195, "grad_norm": 21.128204345703125, "learning_rate": 1.5760606060606063e-06, "loss": 6.3734485626220705, "step": 68795 }, { "epoch": 0.092, "grad_norm": 6.48899507522583, "learning_rate": 1.5758080808080808e-06, "loss": 6.303300094604492, "step": 68800 }, { "epoch": 0.09205, "grad_norm": 4.846275806427002, "learning_rate": 1.5755555555555558e-06, "loss": 6.260122680664063, "step": 68805 }, { "epoch": 0.0921, "grad_norm": 11.505905151367188, "learning_rate": 1.5753030303030302e-06, "loss": 6.274589920043946, "step": 68810 }, { "epoch": 0.09215, "grad_norm": 4.502528667449951, "learning_rate": 1.5750505050505053e-06, "loss": 6.2397407531738285, "step": 68815 }, { "epoch": 0.0922, "grad_norm": 6.448880195617676, "learning_rate": 1.57479797979798e-06, "loss": 6.243462371826172, "step": 68820 }, { "epoch": 0.09225, "grad_norm": 7.630986213684082, "learning_rate": 1.5745454545454548e-06, "loss": 6.258782958984375, "step": 68825 }, { "epoch": 0.0923, "grad_norm": 5.937536716461182, "learning_rate": 1.5742929292929294e-06, "loss": 6.215673828125, "step": 68830 }, { "epoch": 0.09235, "grad_norm": 30.121519088745117, "learning_rate": 1.5740404040404042e-06, "loss": 5.91575927734375, "step": 68835 }, { "epoch": 0.0924, "grad_norm": 7.7958221435546875, "learning_rate": 1.5737878787878789e-06, "loss": 6.109455871582031, "step": 68840 }, { "epoch": 0.09245, "grad_norm": 4.327955722808838, "learning_rate": 1.5735353535353537e-06, "loss": 6.232059097290039, "step": 68845 }, { "epoch": 0.0925, "grad_norm": 5.208034038543701, "learning_rate": 1.5732828282828283e-06, "loss": 6.241994476318359, "step": 68850 }, { "epoch": 0.09255, "grad_norm": 5.370757579803467, "learning_rate": 1.5730303030303032e-06, "loss": 6.280300903320312, "step": 68855 }, { "epoch": 0.0926, "grad_norm": 10.526167869567871, "learning_rate": 1.5727777777777778e-06, "loss": 6.341094207763672, "step": 68860 }, { "epoch": 0.09265, "grad_norm": 5.429749488830566, "learning_rate": 1.5725252525252526e-06, "loss": 6.261759948730469, "step": 68865 }, { "epoch": 0.0927, "grad_norm": 7.9871392250061035, "learning_rate": 1.5722727272727273e-06, "loss": 6.189188766479492, "step": 68870 }, { "epoch": 0.09275, "grad_norm": 5.685506343841553, "learning_rate": 1.5720202020202021e-06, "loss": 6.265463256835938, "step": 68875 }, { "epoch": 0.0928, "grad_norm": 10.339866638183594, "learning_rate": 1.5717676767676767e-06, "loss": 6.44752197265625, "step": 68880 }, { "epoch": 0.09285, "grad_norm": 6.541537761688232, "learning_rate": 1.5715151515151516e-06, "loss": 6.215811157226563, "step": 68885 }, { "epoch": 0.0929, "grad_norm": 9.742391586303711, "learning_rate": 1.5712626262626262e-06, "loss": 6.21392936706543, "step": 68890 }, { "epoch": 0.09295, "grad_norm": 5.193240165710449, "learning_rate": 1.571010101010101e-06, "loss": 6.264768981933594, "step": 68895 }, { "epoch": 0.093, "grad_norm": 7.804476261138916, "learning_rate": 1.5707575757575757e-06, "loss": 6.27020263671875, "step": 68900 }, { "epoch": 0.09305, "grad_norm": 6.259206295013428, "learning_rate": 1.5705050505050507e-06, "loss": 6.234945678710938, "step": 68905 }, { "epoch": 0.0931, "grad_norm": 5.775605201721191, "learning_rate": 1.5702525252525252e-06, "loss": 6.318234252929687, "step": 68910 }, { "epoch": 0.09315, "grad_norm": 9.140390396118164, "learning_rate": 1.5700000000000002e-06, "loss": 6.24393310546875, "step": 68915 }, { "epoch": 0.0932, "grad_norm": 6.06035852432251, "learning_rate": 1.5697474747474748e-06, "loss": 6.2165069580078125, "step": 68920 }, { "epoch": 0.09325, "grad_norm": 13.126267433166504, "learning_rate": 1.5694949494949497e-06, "loss": 6.313214111328125, "step": 68925 }, { "epoch": 0.0933, "grad_norm": 5.6190056800842285, "learning_rate": 1.5692424242424243e-06, "loss": 6.277804183959961, "step": 68930 }, { "epoch": 0.09335, "grad_norm": 6.24449348449707, "learning_rate": 1.5689898989898992e-06, "loss": 6.225328826904297, "step": 68935 }, { "epoch": 0.0934, "grad_norm": 21.911251068115234, "learning_rate": 1.568737373737374e-06, "loss": 6.470682525634766, "step": 68940 }, { "epoch": 0.09345, "grad_norm": 8.606542587280273, "learning_rate": 1.5684848484848486e-06, "loss": 6.349967956542969, "step": 68945 }, { "epoch": 0.0935, "grad_norm": 20.68255043029785, "learning_rate": 1.5682323232323235e-06, "loss": 6.416899108886719, "step": 68950 }, { "epoch": 0.09355, "grad_norm": 8.022112846374512, "learning_rate": 1.567979797979798e-06, "loss": 6.262191390991211, "step": 68955 }, { "epoch": 0.0936, "grad_norm": 6.331380367279053, "learning_rate": 1.567727272727273e-06, "loss": 6.210697174072266, "step": 68960 }, { "epoch": 0.09365, "grad_norm": 6.862239360809326, "learning_rate": 1.5674747474747476e-06, "loss": 6.24051513671875, "step": 68965 }, { "epoch": 0.0937, "grad_norm": 8.143112182617188, "learning_rate": 1.5672222222222224e-06, "loss": 6.2192729949951175, "step": 68970 }, { "epoch": 0.09375, "grad_norm": 6.938131332397461, "learning_rate": 1.566969696969697e-06, "loss": 6.278463745117188, "step": 68975 }, { "epoch": 0.0938, "grad_norm": 5.573713302612305, "learning_rate": 1.5667171717171719e-06, "loss": 6.248586654663086, "step": 68980 }, { "epoch": 0.09385, "grad_norm": 23.201200485229492, "learning_rate": 1.5664646464646465e-06, "loss": 6.304859924316406, "step": 68985 }, { "epoch": 0.0939, "grad_norm": 5.411651611328125, "learning_rate": 1.5662121212121214e-06, "loss": 6.28824462890625, "step": 68990 }, { "epoch": 0.09395, "grad_norm": 4.493564605712891, "learning_rate": 1.565959595959596e-06, "loss": 6.228548049926758, "step": 68995 }, { "epoch": 0.094, "grad_norm": 7.408254623413086, "learning_rate": 1.565707070707071e-06, "loss": 6.254468154907227, "step": 69000 }, { "epoch": 0.09405, "grad_norm": 8.597015380859375, "learning_rate": 1.5654545454545455e-06, "loss": 6.22870101928711, "step": 69005 }, { "epoch": 0.0941, "grad_norm": 15.089723587036133, "learning_rate": 1.5652020202020205e-06, "loss": 6.226176452636719, "step": 69010 }, { "epoch": 0.09415, "grad_norm": 4.853477954864502, "learning_rate": 1.5649494949494951e-06, "loss": 6.196156311035156, "step": 69015 }, { "epoch": 0.0942, "grad_norm": 6.0251874923706055, "learning_rate": 1.56469696969697e-06, "loss": 6.281474304199219, "step": 69020 }, { "epoch": 0.09425, "grad_norm": 8.83232593536377, "learning_rate": 1.5644444444444446e-06, "loss": 6.216189575195313, "step": 69025 }, { "epoch": 0.0943, "grad_norm": 7.530549049377441, "learning_rate": 1.5641919191919195e-06, "loss": 6.42522964477539, "step": 69030 }, { "epoch": 0.09435, "grad_norm": 7.5517072677612305, "learning_rate": 1.563939393939394e-06, "loss": 6.221974563598633, "step": 69035 }, { "epoch": 0.0944, "grad_norm": 7.171329021453857, "learning_rate": 1.563686868686869e-06, "loss": 6.265129852294922, "step": 69040 }, { "epoch": 0.09445, "grad_norm": 8.784139633178711, "learning_rate": 1.5634343434343436e-06, "loss": 6.237677764892578, "step": 69045 }, { "epoch": 0.0945, "grad_norm": 5.32128381729126, "learning_rate": 1.5631818181818184e-06, "loss": 6.203376007080078, "step": 69050 }, { "epoch": 0.09455, "grad_norm": 5.802401542663574, "learning_rate": 1.562929292929293e-06, "loss": 6.246008682250976, "step": 69055 }, { "epoch": 0.0946, "grad_norm": 12.16514778137207, "learning_rate": 1.5626767676767679e-06, "loss": 6.267401123046875, "step": 69060 }, { "epoch": 0.09465, "grad_norm": 7.889964580535889, "learning_rate": 1.5624242424242425e-06, "loss": 6.286418533325195, "step": 69065 }, { "epoch": 0.0947, "grad_norm": 4.569300651550293, "learning_rate": 1.5621717171717173e-06, "loss": 6.2231201171875, "step": 69070 }, { "epoch": 0.09475, "grad_norm": 7.496580600738525, "learning_rate": 1.561919191919192e-06, "loss": 6.252500534057617, "step": 69075 }, { "epoch": 0.0948, "grad_norm": 4.501032829284668, "learning_rate": 1.5616666666666668e-06, "loss": 6.236304473876953, "step": 69080 }, { "epoch": 0.09485, "grad_norm": 7.12131929397583, "learning_rate": 1.5614141414141415e-06, "loss": 6.193370056152344, "step": 69085 }, { "epoch": 0.0949, "grad_norm": 6.259496688842773, "learning_rate": 1.5611616161616163e-06, "loss": 6.22106819152832, "step": 69090 }, { "epoch": 0.09495, "grad_norm": 6.167606353759766, "learning_rate": 1.560909090909091e-06, "loss": 6.191121292114258, "step": 69095 }, { "epoch": 0.095, "grad_norm": 6.5063676834106445, "learning_rate": 1.5606565656565658e-06, "loss": 6.37084732055664, "step": 69100 }, { "epoch": 0.09505, "grad_norm": 3.8654844760894775, "learning_rate": 1.5604040404040404e-06, "loss": 6.204423522949218, "step": 69105 }, { "epoch": 0.0951, "grad_norm": 22.27290916442871, "learning_rate": 1.5601515151515154e-06, "loss": 6.134803009033203, "step": 69110 }, { "epoch": 0.09515, "grad_norm": 5.565456867218018, "learning_rate": 1.5598989898989899e-06, "loss": 6.212638854980469, "step": 69115 }, { "epoch": 0.0952, "grad_norm": 5.206862926483154, "learning_rate": 1.559646464646465e-06, "loss": 6.230635452270508, "step": 69120 }, { "epoch": 0.09525, "grad_norm": 4.795496463775635, "learning_rate": 1.5593939393939395e-06, "loss": 6.236833572387695, "step": 69125 }, { "epoch": 0.0953, "grad_norm": 7.388526916503906, "learning_rate": 1.5591414141414144e-06, "loss": 6.265694427490234, "step": 69130 }, { "epoch": 0.09535, "grad_norm": 8.104926109313965, "learning_rate": 1.558888888888889e-06, "loss": 6.241702270507813, "step": 69135 }, { "epoch": 0.0954, "grad_norm": 9.066976547241211, "learning_rate": 1.5586363636363639e-06, "loss": 6.333129119873047, "step": 69140 }, { "epoch": 0.09545, "grad_norm": 5.110625267028809, "learning_rate": 1.5583838383838385e-06, "loss": 6.2337791442871096, "step": 69145 }, { "epoch": 0.0955, "grad_norm": 4.712457180023193, "learning_rate": 1.5581313131313133e-06, "loss": 6.196755218505859, "step": 69150 }, { "epoch": 0.09555, "grad_norm": 6.81787633895874, "learning_rate": 1.557878787878788e-06, "loss": 6.3087211608886715, "step": 69155 }, { "epoch": 0.0956, "grad_norm": 7.548593521118164, "learning_rate": 1.5576262626262628e-06, "loss": 6.194660949707031, "step": 69160 }, { "epoch": 0.09565, "grad_norm": 4.7881178855896, "learning_rate": 1.5573737373737374e-06, "loss": 6.247665023803711, "step": 69165 }, { "epoch": 0.0957, "grad_norm": 8.401731491088867, "learning_rate": 1.5571212121212123e-06, "loss": 6.203215789794922, "step": 69170 }, { "epoch": 0.09575, "grad_norm": 5.976311206817627, "learning_rate": 1.556868686868687e-06, "loss": 6.251618576049805, "step": 69175 }, { "epoch": 0.0958, "grad_norm": 19.481719970703125, "learning_rate": 1.5566161616161618e-06, "loss": 6.323868179321289, "step": 69180 }, { "epoch": 0.09585, "grad_norm": 4.660898685455322, "learning_rate": 1.5563636363636364e-06, "loss": 6.218047332763672, "step": 69185 }, { "epoch": 0.0959, "grad_norm": 8.999588966369629, "learning_rate": 1.5561111111111112e-06, "loss": 6.276861953735351, "step": 69190 }, { "epoch": 0.09595, "grad_norm": 6.649381637573242, "learning_rate": 1.5558585858585859e-06, "loss": 6.21825942993164, "step": 69195 }, { "epoch": 0.096, "grad_norm": 27.80484962463379, "learning_rate": 1.5556060606060607e-06, "loss": 6.443971252441406, "step": 69200 }, { "epoch": 0.09605, "grad_norm": 5.671781063079834, "learning_rate": 1.5553535353535353e-06, "loss": 6.266683959960938, "step": 69205 }, { "epoch": 0.0961, "grad_norm": 5.005762100219727, "learning_rate": 1.5551010101010104e-06, "loss": 5.43610725402832, "step": 69210 }, { "epoch": 0.09615, "grad_norm": 3.879923105239868, "learning_rate": 1.5548484848484848e-06, "loss": 6.271680450439453, "step": 69215 }, { "epoch": 0.0962, "grad_norm": 8.611238479614258, "learning_rate": 1.5545959595959599e-06, "loss": 6.2193351745605465, "step": 69220 }, { "epoch": 0.09625, "grad_norm": 6.423185348510742, "learning_rate": 1.5543434343434345e-06, "loss": 6.246839904785157, "step": 69225 }, { "epoch": 0.0963, "grad_norm": 8.52085018157959, "learning_rate": 1.5540909090909093e-06, "loss": 6.327392959594727, "step": 69230 }, { "epoch": 0.09635, "grad_norm": 19.9050235748291, "learning_rate": 1.553838383838384e-06, "loss": 6.276440811157227, "step": 69235 }, { "epoch": 0.0964, "grad_norm": 12.463805198669434, "learning_rate": 1.5535858585858588e-06, "loss": 6.3173164367675785, "step": 69240 }, { "epoch": 0.09645, "grad_norm": 4.923677921295166, "learning_rate": 1.5533333333333334e-06, "loss": 6.237506866455078, "step": 69245 }, { "epoch": 0.0965, "grad_norm": 6.614983558654785, "learning_rate": 1.5530808080808083e-06, "loss": 6.18414535522461, "step": 69250 }, { "epoch": 0.09655, "grad_norm": 6.551353931427002, "learning_rate": 1.552828282828283e-06, "loss": 6.233306503295898, "step": 69255 }, { "epoch": 0.0966, "grad_norm": 6.107916355133057, "learning_rate": 1.5525757575757577e-06, "loss": 6.256178665161133, "step": 69260 }, { "epoch": 0.09665, "grad_norm": 7.06215763092041, "learning_rate": 1.5523232323232324e-06, "loss": 6.200793838500976, "step": 69265 }, { "epoch": 0.0967, "grad_norm": 4.70161247253418, "learning_rate": 1.5520707070707072e-06, "loss": 6.273701858520508, "step": 69270 }, { "epoch": 0.09675, "grad_norm": 6.288102626800537, "learning_rate": 1.5518181818181818e-06, "loss": 6.302139663696289, "step": 69275 }, { "epoch": 0.0968, "grad_norm": 4.877311706542969, "learning_rate": 1.5515656565656567e-06, "loss": 6.188449859619141, "step": 69280 }, { "epoch": 0.09685, "grad_norm": 10.349947929382324, "learning_rate": 1.5513131313131313e-06, "loss": 6.236978530883789, "step": 69285 }, { "epoch": 0.0969, "grad_norm": 5.052997589111328, "learning_rate": 1.5510606060606062e-06, "loss": 6.257381439208984, "step": 69290 }, { "epoch": 0.09695, "grad_norm": 16.070690155029297, "learning_rate": 1.5508080808080808e-06, "loss": 6.248753356933594, "step": 69295 }, { "epoch": 0.097, "grad_norm": 6.382606506347656, "learning_rate": 1.5505555555555556e-06, "loss": 6.3302864074707035, "step": 69300 }, { "epoch": 0.09705, "grad_norm": 4.3332905769348145, "learning_rate": 1.5503030303030303e-06, "loss": 6.267464447021484, "step": 69305 }, { "epoch": 0.0971, "grad_norm": 4.829068183898926, "learning_rate": 1.550050505050505e-06, "loss": 6.24278564453125, "step": 69310 }, { "epoch": 0.09715, "grad_norm": 5.522424697875977, "learning_rate": 1.5497979797979797e-06, "loss": 6.206704711914062, "step": 69315 }, { "epoch": 0.0972, "grad_norm": 23.22768783569336, "learning_rate": 1.5495454545454548e-06, "loss": 6.517229461669922, "step": 69320 }, { "epoch": 0.09725, "grad_norm": 6.282406806945801, "learning_rate": 1.5492929292929292e-06, "loss": 6.289273452758789, "step": 69325 }, { "epoch": 0.0973, "grad_norm": 7.1454997062683105, "learning_rate": 1.5490404040404043e-06, "loss": 6.20696907043457, "step": 69330 }, { "epoch": 0.09735, "grad_norm": 6.620607852935791, "learning_rate": 1.5487878787878789e-06, "loss": 6.2759040832519535, "step": 69335 }, { "epoch": 0.0974, "grad_norm": 8.895538330078125, "learning_rate": 1.5485353535353537e-06, "loss": 6.258265686035156, "step": 69340 }, { "epoch": 0.09745, "grad_norm": 3.758849620819092, "learning_rate": 1.5482828282828284e-06, "loss": 6.257617568969726, "step": 69345 }, { "epoch": 0.0975, "grad_norm": 4.745532035827637, "learning_rate": 1.5480303030303032e-06, "loss": 6.2366992950439455, "step": 69350 }, { "epoch": 0.09755, "grad_norm": 8.403188705444336, "learning_rate": 1.5477777777777778e-06, "loss": 6.220742416381836, "step": 69355 }, { "epoch": 0.0976, "grad_norm": 4.889970302581787, "learning_rate": 1.5475252525252527e-06, "loss": 6.217706298828125, "step": 69360 }, { "epoch": 0.09765, "grad_norm": 6.993582725524902, "learning_rate": 1.5472727272727275e-06, "loss": 6.199949645996094, "step": 69365 }, { "epoch": 0.0977, "grad_norm": 6.398688316345215, "learning_rate": 1.5470202020202021e-06, "loss": 6.280363464355469, "step": 69370 }, { "epoch": 0.09775, "grad_norm": 5.875310897827148, "learning_rate": 1.546767676767677e-06, "loss": 6.249153900146484, "step": 69375 }, { "epoch": 0.0978, "grad_norm": 9.45569133758545, "learning_rate": 1.5465151515151516e-06, "loss": 6.228491973876953, "step": 69380 }, { "epoch": 0.09785, "grad_norm": 3.9134020805358887, "learning_rate": 1.5462626262626265e-06, "loss": 6.251824951171875, "step": 69385 }, { "epoch": 0.0979, "grad_norm": 5.167562961578369, "learning_rate": 1.546010101010101e-06, "loss": 6.257137298583984, "step": 69390 }, { "epoch": 0.09795, "grad_norm": 4.09098482131958, "learning_rate": 1.545757575757576e-06, "loss": 6.171733856201172, "step": 69395 }, { "epoch": 0.098, "grad_norm": 6.145762920379639, "learning_rate": 1.5455050505050506e-06, "loss": 6.23004264831543, "step": 69400 }, { "epoch": 0.09805, "grad_norm": 9.674710273742676, "learning_rate": 1.5452525252525254e-06, "loss": 6.234002304077149, "step": 69405 }, { "epoch": 0.0981, "grad_norm": 10.149322509765625, "learning_rate": 1.545e-06, "loss": 6.394666290283203, "step": 69410 }, { "epoch": 0.09815, "grad_norm": 6.266510009765625, "learning_rate": 1.544747474747475e-06, "loss": 6.24141845703125, "step": 69415 }, { "epoch": 0.0982, "grad_norm": 71.31550598144531, "learning_rate": 1.5444949494949495e-06, "loss": 6.983908843994141, "step": 69420 }, { "epoch": 0.09825, "grad_norm": 77.48998260498047, "learning_rate": 1.5442424242424246e-06, "loss": 8.883917999267577, "step": 69425 }, { "epoch": 0.0983, "grad_norm": 13.741813659667969, "learning_rate": 1.5439898989898992e-06, "loss": 7.029902648925781, "step": 69430 }, { "epoch": 0.09835, "grad_norm": 13.597463607788086, "learning_rate": 1.543737373737374e-06, "loss": 6.341259002685547, "step": 69435 }, { "epoch": 0.0984, "grad_norm": 4.150789737701416, "learning_rate": 1.5434848484848487e-06, "loss": 6.241429138183594, "step": 69440 }, { "epoch": 0.09845, "grad_norm": 6.996358394622803, "learning_rate": 1.5432323232323235e-06, "loss": 6.283261489868164, "step": 69445 }, { "epoch": 0.0985, "grad_norm": 4.047050952911377, "learning_rate": 1.5429797979797981e-06, "loss": 6.275844955444336, "step": 69450 }, { "epoch": 0.09855, "grad_norm": 7.229933261871338, "learning_rate": 1.542727272727273e-06, "loss": 6.273773956298828, "step": 69455 }, { "epoch": 0.0986, "grad_norm": 5.943408966064453, "learning_rate": 1.5424747474747476e-06, "loss": 6.272219848632813, "step": 69460 }, { "epoch": 0.09865, "grad_norm": 6.835803031921387, "learning_rate": 1.5422222222222224e-06, "loss": 6.349821090698242, "step": 69465 }, { "epoch": 0.0987, "grad_norm": 5.371696949005127, "learning_rate": 1.541969696969697e-06, "loss": 6.240107727050781, "step": 69470 }, { "epoch": 0.09875, "grad_norm": 6.303291320800781, "learning_rate": 1.541717171717172e-06, "loss": 6.322772216796875, "step": 69475 }, { "epoch": 0.0988, "grad_norm": 9.931221961975098, "learning_rate": 1.5414646464646465e-06, "loss": 6.245140075683594, "step": 69480 }, { "epoch": 0.09885, "grad_norm": 3.734388589859009, "learning_rate": 1.5412121212121214e-06, "loss": 6.221798706054687, "step": 69485 }, { "epoch": 0.0989, "grad_norm": 15.286189079284668, "learning_rate": 1.540959595959596e-06, "loss": 6.324143981933593, "step": 69490 }, { "epoch": 0.09895, "grad_norm": 11.956453323364258, "learning_rate": 1.5407070707070709e-06, "loss": 6.620227813720703, "step": 69495 }, { "epoch": 0.099, "grad_norm": 9.132294654846191, "learning_rate": 1.5404545454545455e-06, "loss": 6.261430358886718, "step": 69500 }, { "epoch": 0.09905, "grad_norm": 7.009271144866943, "learning_rate": 1.5402020202020203e-06, "loss": 6.271954345703125, "step": 69505 }, { "epoch": 0.0991, "grad_norm": 8.70576000213623, "learning_rate": 1.539949494949495e-06, "loss": 6.270662689208985, "step": 69510 }, { "epoch": 0.09915, "grad_norm": 5.163009166717529, "learning_rate": 1.53969696969697e-06, "loss": 6.244068145751953, "step": 69515 }, { "epoch": 0.0992, "grad_norm": 4.413712501525879, "learning_rate": 1.5394444444444444e-06, "loss": 6.259499359130859, "step": 69520 }, { "epoch": 0.09925, "grad_norm": 8.057535171508789, "learning_rate": 1.5391919191919195e-06, "loss": 6.22387466430664, "step": 69525 }, { "epoch": 0.0993, "grad_norm": 4.452139377593994, "learning_rate": 1.538939393939394e-06, "loss": 6.242102813720703, "step": 69530 }, { "epoch": 0.09935, "grad_norm": 10.684853553771973, "learning_rate": 1.538686868686869e-06, "loss": 6.229332351684571, "step": 69535 }, { "epoch": 0.0994, "grad_norm": 4.867793083190918, "learning_rate": 1.5384343434343436e-06, "loss": 6.2237060546875, "step": 69540 }, { "epoch": 0.09945, "grad_norm": 4.437867641448975, "learning_rate": 1.5381818181818184e-06, "loss": 6.257070922851563, "step": 69545 }, { "epoch": 0.0995, "grad_norm": 5.311295509338379, "learning_rate": 1.537929292929293e-06, "loss": 6.271135330200195, "step": 69550 }, { "epoch": 0.09955, "grad_norm": 31.813199996948242, "learning_rate": 1.537676767676768e-06, "loss": 6.204911422729492, "step": 69555 }, { "epoch": 0.0996, "grad_norm": 6.300601959228516, "learning_rate": 1.5374242424242425e-06, "loss": 6.249200057983399, "step": 69560 }, { "epoch": 0.09965, "grad_norm": 13.126708984375, "learning_rate": 1.5371717171717174e-06, "loss": 6.368363952636718, "step": 69565 }, { "epoch": 0.0997, "grad_norm": 3.4197113513946533, "learning_rate": 1.536919191919192e-06, "loss": 6.322563934326172, "step": 69570 }, { "epoch": 0.09975, "grad_norm": 5.326809406280518, "learning_rate": 1.5366666666666668e-06, "loss": 6.25750617980957, "step": 69575 }, { "epoch": 0.0998, "grad_norm": 6.891108512878418, "learning_rate": 1.5364141414141415e-06, "loss": 6.266494369506836, "step": 69580 }, { "epoch": 0.09985, "grad_norm": 9.919321060180664, "learning_rate": 1.5361616161616163e-06, "loss": 6.223246765136719, "step": 69585 }, { "epoch": 0.0999, "grad_norm": 5.455216884613037, "learning_rate": 1.535909090909091e-06, "loss": 6.286400985717774, "step": 69590 }, { "epoch": 0.09995, "grad_norm": 14.548954963684082, "learning_rate": 1.5356565656565658e-06, "loss": 6.534386444091797, "step": 69595 }, { "epoch": 0.1, "grad_norm": 6.654651165008545, "learning_rate": 1.5354040404040404e-06, "loss": 6.256783294677734, "step": 69600 }, { "epoch": 0.10005, "grad_norm": 3.6224820613861084, "learning_rate": 1.5351515151515153e-06, "loss": 6.227585983276367, "step": 69605 }, { "epoch": 0.1001, "grad_norm": 6.38872766494751, "learning_rate": 1.5348989898989899e-06, "loss": 6.225975799560547, "step": 69610 }, { "epoch": 0.10015, "grad_norm": 7.598637580871582, "learning_rate": 1.5346464646464647e-06, "loss": 6.2436668395996096, "step": 69615 }, { "epoch": 0.1002, "grad_norm": 13.28370189666748, "learning_rate": 1.5343939393939394e-06, "loss": 6.191681671142578, "step": 69620 }, { "epoch": 0.10025, "grad_norm": 10.03581714630127, "learning_rate": 1.5341414141414144e-06, "loss": 6.2676544189453125, "step": 69625 }, { "epoch": 0.1003, "grad_norm": 4.692815780639648, "learning_rate": 1.5338888888888888e-06, "loss": 6.281380462646484, "step": 69630 }, { "epoch": 0.10035, "grad_norm": 5.545220375061035, "learning_rate": 1.5336363636363639e-06, "loss": 6.230376052856445, "step": 69635 }, { "epoch": 0.1004, "grad_norm": 4.22117805480957, "learning_rate": 1.5333838383838385e-06, "loss": 6.256169128417969, "step": 69640 }, { "epoch": 0.10045, "grad_norm": 8.224137306213379, "learning_rate": 1.5331313131313134e-06, "loss": 6.205984497070313, "step": 69645 }, { "epoch": 0.1005, "grad_norm": 9.192310333251953, "learning_rate": 1.532878787878788e-06, "loss": 6.2722118377685545, "step": 69650 }, { "epoch": 0.10055, "grad_norm": 5.609637260437012, "learning_rate": 1.5326262626262628e-06, "loss": 6.214276885986328, "step": 69655 }, { "epoch": 0.1006, "grad_norm": 3.1007955074310303, "learning_rate": 1.5323737373737375e-06, "loss": 6.21006965637207, "step": 69660 }, { "epoch": 0.10065, "grad_norm": 5.235879898071289, "learning_rate": 1.5321212121212123e-06, "loss": 6.214688873291015, "step": 69665 }, { "epoch": 0.1007, "grad_norm": 12.221334457397461, "learning_rate": 1.531868686868687e-06, "loss": 6.382823944091797, "step": 69670 }, { "epoch": 0.10075, "grad_norm": 4.948221206665039, "learning_rate": 1.5316161616161618e-06, "loss": 6.1910865783691404, "step": 69675 }, { "epoch": 0.1008, "grad_norm": 5.922194957733154, "learning_rate": 1.5313636363636364e-06, "loss": 6.260441589355469, "step": 69680 }, { "epoch": 0.10085, "grad_norm": 5.265747547149658, "learning_rate": 1.5311111111111113e-06, "loss": 6.2380516052246096, "step": 69685 }, { "epoch": 0.1009, "grad_norm": 5.622743606567383, "learning_rate": 1.5308585858585859e-06, "loss": 6.206553268432617, "step": 69690 }, { "epoch": 0.10095, "grad_norm": 8.155200004577637, "learning_rate": 1.5306060606060607e-06, "loss": 6.221061706542969, "step": 69695 }, { "epoch": 0.101, "grad_norm": 7.319873809814453, "learning_rate": 1.5303535353535354e-06, "loss": 6.256492996215821, "step": 69700 }, { "epoch": 0.10105, "grad_norm": 3.62444806098938, "learning_rate": 1.5301010101010102e-06, "loss": 6.274919509887695, "step": 69705 }, { "epoch": 0.1011, "grad_norm": 22.72197151184082, "learning_rate": 1.5298484848484848e-06, "loss": 6.317089080810547, "step": 69710 }, { "epoch": 0.10115, "grad_norm": 8.26749324798584, "learning_rate": 1.5295959595959597e-06, "loss": 6.277611160278321, "step": 69715 }, { "epoch": 0.1012, "grad_norm": 5.37979793548584, "learning_rate": 1.5293434343434343e-06, "loss": 6.262963104248047, "step": 69720 }, { "epoch": 0.10125, "grad_norm": 8.211739540100098, "learning_rate": 1.5290909090909091e-06, "loss": 6.197469711303711, "step": 69725 }, { "epoch": 0.1013, "grad_norm": 6.4922261238098145, "learning_rate": 1.5288383838383838e-06, "loss": 6.273396682739258, "step": 69730 }, { "epoch": 0.10135, "grad_norm": 6.28837776184082, "learning_rate": 1.5285858585858588e-06, "loss": 6.2328941345214846, "step": 69735 }, { "epoch": 0.1014, "grad_norm": 5.55908727645874, "learning_rate": 1.5283333333333332e-06, "loss": 6.200650787353515, "step": 69740 }, { "epoch": 0.10145, "grad_norm": 7.9614739418029785, "learning_rate": 1.5280808080808083e-06, "loss": 6.222938919067383, "step": 69745 }, { "epoch": 0.1015, "grad_norm": 6.5946044921875, "learning_rate": 1.527828282828283e-06, "loss": 6.250992202758789, "step": 69750 }, { "epoch": 0.10155, "grad_norm": 7.185013294219971, "learning_rate": 1.5275757575757578e-06, "loss": 6.223256683349609, "step": 69755 }, { "epoch": 0.1016, "grad_norm": 6.706727027893066, "learning_rate": 1.5273232323232324e-06, "loss": 6.295187377929688, "step": 69760 }, { "epoch": 0.10165, "grad_norm": 11.28437328338623, "learning_rate": 1.5270707070707072e-06, "loss": 6.425630187988281, "step": 69765 }, { "epoch": 0.1017, "grad_norm": 16.563880920410156, "learning_rate": 1.5268181818181819e-06, "loss": 6.262357711791992, "step": 69770 }, { "epoch": 0.10175, "grad_norm": 5.872164726257324, "learning_rate": 1.5265656565656567e-06, "loss": 6.242399597167969, "step": 69775 }, { "epoch": 0.1018, "grad_norm": 12.3408842086792, "learning_rate": 1.5263131313131313e-06, "loss": 6.25868034362793, "step": 69780 }, { "epoch": 0.10185, "grad_norm": 14.229727745056152, "learning_rate": 1.5260606060606062e-06, "loss": 6.237813186645508, "step": 69785 }, { "epoch": 0.1019, "grad_norm": 4.4432692527771, "learning_rate": 1.525808080808081e-06, "loss": 6.237071228027344, "step": 69790 }, { "epoch": 0.10195, "grad_norm": 8.864755630493164, "learning_rate": 1.5255555555555557e-06, "loss": 6.243339157104492, "step": 69795 }, { "epoch": 0.102, "grad_norm": 14.311020851135254, "learning_rate": 1.5253030303030305e-06, "loss": 6.239898681640625, "step": 69800 }, { "epoch": 0.10205, "grad_norm": 4.2846999168396, "learning_rate": 1.5250505050505051e-06, "loss": 6.184924697875976, "step": 69805 }, { "epoch": 0.1021, "grad_norm": 12.436625480651855, "learning_rate": 1.52479797979798e-06, "loss": 6.177059555053711, "step": 69810 }, { "epoch": 0.10215, "grad_norm": 6.003546714782715, "learning_rate": 1.5245454545454546e-06, "loss": 6.2401268005371096, "step": 69815 }, { "epoch": 0.1022, "grad_norm": 6.579974174499512, "learning_rate": 1.5242929292929294e-06, "loss": 6.202879333496094, "step": 69820 }, { "epoch": 0.10225, "grad_norm": 4.236875057220459, "learning_rate": 1.524040404040404e-06, "loss": 6.247172546386719, "step": 69825 }, { "epoch": 0.1023, "grad_norm": 4.880104064941406, "learning_rate": 1.5237878787878791e-06, "loss": 6.290971755981445, "step": 69830 }, { "epoch": 0.10235, "grad_norm": 5.129511833190918, "learning_rate": 1.5235353535353535e-06, "loss": 6.255743408203125, "step": 69835 }, { "epoch": 0.1024, "grad_norm": 10.137799263000488, "learning_rate": 1.5232828282828286e-06, "loss": 6.195932388305664, "step": 69840 }, { "epoch": 0.10245, "grad_norm": 7.059858322143555, "learning_rate": 1.5230303030303032e-06, "loss": 6.194368743896485, "step": 69845 }, { "epoch": 0.1025, "grad_norm": 8.526978492736816, "learning_rate": 1.522777777777778e-06, "loss": 6.185575103759765, "step": 69850 }, { "epoch": 0.10255, "grad_norm": 13.95495891571045, "learning_rate": 1.5225252525252527e-06, "loss": 6.270240402221679, "step": 69855 }, { "epoch": 0.1026, "grad_norm": 20.566669464111328, "learning_rate": 1.5222727272727275e-06, "loss": 6.252448272705078, "step": 69860 }, { "epoch": 0.10265, "grad_norm": 4.166781902313232, "learning_rate": 1.5220202020202022e-06, "loss": 6.259589004516601, "step": 69865 }, { "epoch": 0.1027, "grad_norm": 5.2048659324646, "learning_rate": 1.521767676767677e-06, "loss": 6.215940856933594, "step": 69870 }, { "epoch": 0.10275, "grad_norm": 5.139145374298096, "learning_rate": 1.5215151515151516e-06, "loss": 6.316371154785156, "step": 69875 }, { "epoch": 0.1028, "grad_norm": 11.298568725585938, "learning_rate": 1.5212626262626265e-06, "loss": 6.198626327514648, "step": 69880 }, { "epoch": 0.10285, "grad_norm": 6.460776329040527, "learning_rate": 1.5210101010101011e-06, "loss": 6.267860412597656, "step": 69885 }, { "epoch": 0.1029, "grad_norm": 69.19715881347656, "learning_rate": 1.520757575757576e-06, "loss": 6.182583999633789, "step": 69890 }, { "epoch": 0.10295, "grad_norm": 5.489862442016602, "learning_rate": 1.5205050505050506e-06, "loss": 6.133612442016601, "step": 69895 }, { "epoch": 0.103, "grad_norm": 5.957542419433594, "learning_rate": 1.5202525252525254e-06, "loss": 6.254172897338867, "step": 69900 }, { "epoch": 0.10305, "grad_norm": 3.876161813735962, "learning_rate": 1.52e-06, "loss": 6.251986312866211, "step": 69905 }, { "epoch": 0.1031, "grad_norm": 7.069336891174316, "learning_rate": 1.519747474747475e-06, "loss": 6.240657806396484, "step": 69910 }, { "epoch": 0.10315, "grad_norm": 5.402816295623779, "learning_rate": 1.5194949494949495e-06, "loss": 6.236772918701172, "step": 69915 }, { "epoch": 0.1032, "grad_norm": 9.022480964660645, "learning_rate": 1.5192424242424244e-06, "loss": 6.247328186035157, "step": 69920 }, { "epoch": 0.10325, "grad_norm": 8.963666915893555, "learning_rate": 1.518989898989899e-06, "loss": 6.244709777832031, "step": 69925 }, { "epoch": 0.1033, "grad_norm": 5.964870452880859, "learning_rate": 1.518737373737374e-06, "loss": 6.248283004760742, "step": 69930 }, { "epoch": 0.10335, "grad_norm": 31.1531925201416, "learning_rate": 1.5184848484848485e-06, "loss": 5.874058532714844, "step": 69935 }, { "epoch": 0.1034, "grad_norm": 24.827714920043945, "learning_rate": 1.5182323232323235e-06, "loss": 5.656063842773437, "step": 69940 }, { "epoch": 0.10345, "grad_norm": 11.647584915161133, "learning_rate": 1.5179797979797982e-06, "loss": 6.228829193115234, "step": 69945 }, { "epoch": 0.1035, "grad_norm": 7.142101764678955, "learning_rate": 1.517727272727273e-06, "loss": 6.236428833007812, "step": 69950 }, { "epoch": 0.10355, "grad_norm": 9.438726425170898, "learning_rate": 1.5174747474747476e-06, "loss": 6.201677322387695, "step": 69955 }, { "epoch": 0.1036, "grad_norm": 9.632033348083496, "learning_rate": 1.5172222222222225e-06, "loss": 6.2188568115234375, "step": 69960 }, { "epoch": 0.10365, "grad_norm": 4.930743217468262, "learning_rate": 1.516969696969697e-06, "loss": 6.300019454956055, "step": 69965 }, { "epoch": 0.1037, "grad_norm": 5.744776248931885, "learning_rate": 1.516717171717172e-06, "loss": 6.24951400756836, "step": 69970 }, { "epoch": 0.10375, "grad_norm": 5.7224860191345215, "learning_rate": 1.5164646464646466e-06, "loss": 6.24515380859375, "step": 69975 }, { "epoch": 0.1038, "grad_norm": 8.35798454284668, "learning_rate": 1.5162121212121214e-06, "loss": 6.258012771606445, "step": 69980 }, { "epoch": 0.10385, "grad_norm": 5.971210479736328, "learning_rate": 1.515959595959596e-06, "loss": 6.200835418701172, "step": 69985 }, { "epoch": 0.1039, "grad_norm": 11.250112533569336, "learning_rate": 1.5157070707070709e-06, "loss": 6.311390686035156, "step": 69990 }, { "epoch": 0.10395, "grad_norm": 4.787052154541016, "learning_rate": 1.5154545454545455e-06, "loss": 6.196207427978516, "step": 69995 }, { "epoch": 0.104, "grad_norm": 9.09445858001709, "learning_rate": 1.5152020202020204e-06, "loss": 6.250798034667969, "step": 70000 }, { "epoch": 0.10405, "grad_norm": 5.368104457855225, "learning_rate": 1.514949494949495e-06, "loss": 6.260558319091797, "step": 70005 }, { "epoch": 0.1041, "grad_norm": 4.585999011993408, "learning_rate": 1.5146969696969698e-06, "loss": 6.2481689453125, "step": 70010 }, { "epoch": 0.10415, "grad_norm": 6.250848293304443, "learning_rate": 1.5144444444444445e-06, "loss": 6.2637981414794925, "step": 70015 }, { "epoch": 0.1042, "grad_norm": 7.508022785186768, "learning_rate": 1.5141919191919193e-06, "loss": 6.189240264892578, "step": 70020 }, { "epoch": 0.10425, "grad_norm": 17.536020278930664, "learning_rate": 1.513939393939394e-06, "loss": 6.260718536376953, "step": 70025 }, { "epoch": 0.1043, "grad_norm": 34.615455627441406, "learning_rate": 1.5136868686868688e-06, "loss": 6.429454803466797, "step": 70030 }, { "epoch": 0.10435, "grad_norm": 6.022301197052002, "learning_rate": 1.5134343434343434e-06, "loss": 6.373382186889648, "step": 70035 }, { "epoch": 0.1044, "grad_norm": 11.31711196899414, "learning_rate": 1.5131818181818185e-06, "loss": 6.301255416870117, "step": 70040 }, { "epoch": 0.10445, "grad_norm": 14.024144172668457, "learning_rate": 1.5129292929292929e-06, "loss": 6.362357330322266, "step": 70045 }, { "epoch": 0.1045, "grad_norm": 6.217692852020264, "learning_rate": 1.512676767676768e-06, "loss": 6.294585418701172, "step": 70050 }, { "epoch": 0.10455, "grad_norm": 6.578646659851074, "learning_rate": 1.5124242424242426e-06, "loss": 6.211361694335937, "step": 70055 }, { "epoch": 0.1046, "grad_norm": 6.838810443878174, "learning_rate": 1.5121717171717174e-06, "loss": 6.222134399414062, "step": 70060 }, { "epoch": 0.10465, "grad_norm": 7.00046443939209, "learning_rate": 1.511919191919192e-06, "loss": 6.473004913330078, "step": 70065 }, { "epoch": 0.1047, "grad_norm": 4.078529357910156, "learning_rate": 1.5116666666666669e-06, "loss": 6.304496002197266, "step": 70070 }, { "epoch": 0.10475, "grad_norm": 6.784806728363037, "learning_rate": 1.5114141414141415e-06, "loss": 6.255331420898438, "step": 70075 }, { "epoch": 0.1048, "grad_norm": 7.417270183563232, "learning_rate": 1.5111616161616163e-06, "loss": 6.251804351806641, "step": 70080 }, { "epoch": 0.10485, "grad_norm": 4.264366626739502, "learning_rate": 1.510909090909091e-06, "loss": 6.276866149902344, "step": 70085 }, { "epoch": 0.1049, "grad_norm": 7.064784526824951, "learning_rate": 1.5106565656565658e-06, "loss": 6.274673080444336, "step": 70090 }, { "epoch": 0.10495, "grad_norm": 9.293214797973633, "learning_rate": 1.5104040404040404e-06, "loss": 6.228495025634766, "step": 70095 }, { "epoch": 0.105, "grad_norm": 8.082324028015137, "learning_rate": 1.5101515151515153e-06, "loss": 6.273787307739258, "step": 70100 }, { "epoch": 0.10505, "grad_norm": 5.533097267150879, "learning_rate": 1.50989898989899e-06, "loss": 6.306704330444336, "step": 70105 }, { "epoch": 0.1051, "grad_norm": 7.885391712188721, "learning_rate": 1.5096464646464648e-06, "loss": 6.281864166259766, "step": 70110 }, { "epoch": 0.10515, "grad_norm": 6.018885612487793, "learning_rate": 1.5093939393939394e-06, "loss": 6.255643844604492, "step": 70115 }, { "epoch": 0.1052, "grad_norm": 18.803895950317383, "learning_rate": 1.5091414141414142e-06, "loss": 6.230621719360352, "step": 70120 }, { "epoch": 0.10525, "grad_norm": 8.603285789489746, "learning_rate": 1.5088888888888889e-06, "loss": 6.378117370605469, "step": 70125 }, { "epoch": 0.1053, "grad_norm": 6.0321125984191895, "learning_rate": 1.5086363636363637e-06, "loss": 6.221832656860352, "step": 70130 }, { "epoch": 0.10535, "grad_norm": 7.258693218231201, "learning_rate": 1.5083838383838383e-06, "loss": 6.233095169067383, "step": 70135 }, { "epoch": 0.1054, "grad_norm": 6.24540376663208, "learning_rate": 1.5081313131313132e-06, "loss": 6.213459777832031, "step": 70140 }, { "epoch": 0.10545, "grad_norm": 4.382788181304932, "learning_rate": 1.5078787878787878e-06, "loss": 6.264875793457032, "step": 70145 }, { "epoch": 0.1055, "grad_norm": 11.932379722595215, "learning_rate": 1.5076262626262629e-06, "loss": 6.2314403533935545, "step": 70150 }, { "epoch": 0.10555, "grad_norm": 4.716244220733643, "learning_rate": 1.5073737373737373e-06, "loss": 6.27198486328125, "step": 70155 }, { "epoch": 0.1056, "grad_norm": 4.40574836730957, "learning_rate": 1.5071212121212123e-06, "loss": 6.2579795837402346, "step": 70160 }, { "epoch": 0.10565, "grad_norm": 5.340152263641357, "learning_rate": 1.506868686868687e-06, "loss": 6.2458240509033205, "step": 70165 }, { "epoch": 0.1057, "grad_norm": 4.740099906921387, "learning_rate": 1.5066161616161618e-06, "loss": 6.248078155517578, "step": 70170 }, { "epoch": 0.10575, "grad_norm": 4.984585762023926, "learning_rate": 1.5063636363636364e-06, "loss": 6.259487152099609, "step": 70175 }, { "epoch": 0.1058, "grad_norm": 4.907613277435303, "learning_rate": 1.5061111111111113e-06, "loss": 6.2829231262207035, "step": 70180 }, { "epoch": 0.10585, "grad_norm": 5.620975017547607, "learning_rate": 1.505858585858586e-06, "loss": 6.215739822387695, "step": 70185 }, { "epoch": 0.1059, "grad_norm": 4.859480857849121, "learning_rate": 1.5056060606060608e-06, "loss": 6.25788688659668, "step": 70190 }, { "epoch": 0.10595, "grad_norm": 3.7855327129364014, "learning_rate": 1.5053535353535354e-06, "loss": 6.2956794738769535, "step": 70195 }, { "epoch": 0.106, "grad_norm": 3.811877489089966, "learning_rate": 1.5051010101010102e-06, "loss": 6.246413040161133, "step": 70200 }, { "epoch": 0.10605, "grad_norm": 7.213291168212891, "learning_rate": 1.5048484848484849e-06, "loss": 6.240888214111328, "step": 70205 }, { "epoch": 0.1061, "grad_norm": 9.880549430847168, "learning_rate": 1.5045959595959597e-06, "loss": 6.287513732910156, "step": 70210 }, { "epoch": 0.10615, "grad_norm": 5.620789527893066, "learning_rate": 1.5043434343434343e-06, "loss": 6.481846618652344, "step": 70215 }, { "epoch": 0.1062, "grad_norm": 5.59331750869751, "learning_rate": 1.5040909090909092e-06, "loss": 6.250838088989258, "step": 70220 }, { "epoch": 0.10625, "grad_norm": 6.5511651039123535, "learning_rate": 1.503838383838384e-06, "loss": 6.294349288940429, "step": 70225 }, { "epoch": 0.1063, "grad_norm": 4.873459815979004, "learning_rate": 1.5035858585858586e-06, "loss": 6.269921875, "step": 70230 }, { "epoch": 0.10635, "grad_norm": 4.741708755493164, "learning_rate": 1.5033333333333337e-06, "loss": 6.257433700561523, "step": 70235 }, { "epoch": 0.1064, "grad_norm": 5.410181045532227, "learning_rate": 1.5030808080808081e-06, "loss": 6.243133544921875, "step": 70240 }, { "epoch": 0.10645, "grad_norm": 9.414806365966797, "learning_rate": 1.5028282828282832e-06, "loss": 6.222256851196289, "step": 70245 }, { "epoch": 0.1065, "grad_norm": 6.543602466583252, "learning_rate": 1.5025757575757576e-06, "loss": 6.291792678833008, "step": 70250 }, { "epoch": 0.10655, "grad_norm": 3.5766642093658447, "learning_rate": 1.5023232323232326e-06, "loss": 6.241476440429688, "step": 70255 }, { "epoch": 0.1066, "grad_norm": 8.665578842163086, "learning_rate": 1.5020707070707073e-06, "loss": 6.312579345703125, "step": 70260 }, { "epoch": 0.10665, "grad_norm": 5.953514099121094, "learning_rate": 1.5018181818181821e-06, "loss": 6.224444198608398, "step": 70265 }, { "epoch": 0.1067, "grad_norm": 5.600226402282715, "learning_rate": 1.5015656565656567e-06, "loss": 6.226136016845703, "step": 70270 }, { "epoch": 0.10675, "grad_norm": 10.065235137939453, "learning_rate": 1.5013131313131316e-06, "loss": 6.260222625732422, "step": 70275 }, { "epoch": 0.1068, "grad_norm": 6.075634479522705, "learning_rate": 1.5010606060606062e-06, "loss": 6.26611442565918, "step": 70280 }, { "epoch": 0.10685, "grad_norm": 27.269681930541992, "learning_rate": 1.500808080808081e-06, "loss": 6.2349193572998045, "step": 70285 }, { "epoch": 0.1069, "grad_norm": 4.279699325561523, "learning_rate": 1.5005555555555557e-06, "loss": 6.099803161621094, "step": 70290 }, { "epoch": 0.10695, "grad_norm": 4.364034175872803, "learning_rate": 1.5003030303030305e-06, "loss": 6.223847961425781, "step": 70295 }, { "epoch": 0.107, "grad_norm": 4.29464054107666, "learning_rate": 1.5000505050505052e-06, "loss": 6.224812316894531, "step": 70300 }, { "epoch": 0.10705, "grad_norm": 5.289552211761475, "learning_rate": 1.49979797979798e-06, "loss": 6.48115234375, "step": 70305 }, { "epoch": 0.1071, "grad_norm": 6.428898811340332, "learning_rate": 1.4995454545454546e-06, "loss": 6.244331359863281, "step": 70310 }, { "epoch": 0.10715, "grad_norm": 7.897966384887695, "learning_rate": 1.4992929292929295e-06, "loss": 6.285530090332031, "step": 70315 }, { "epoch": 0.1072, "grad_norm": 6.52786922454834, "learning_rate": 1.499040404040404e-06, "loss": 6.23895263671875, "step": 70320 }, { "epoch": 0.10725, "grad_norm": 6.497035980224609, "learning_rate": 1.498787878787879e-06, "loss": 6.375001525878906, "step": 70325 }, { "epoch": 0.1073, "grad_norm": 6.014400005340576, "learning_rate": 1.4985353535353536e-06, "loss": 6.259087371826172, "step": 70330 }, { "epoch": 0.10735, "grad_norm": 5.667745113372803, "learning_rate": 1.4982828282828284e-06, "loss": 6.236745071411133, "step": 70335 }, { "epoch": 0.1074, "grad_norm": 4.16525411605835, "learning_rate": 1.498030303030303e-06, "loss": 6.264295196533203, "step": 70340 }, { "epoch": 0.10745, "grad_norm": 6.932337284088135, "learning_rate": 1.497777777777778e-06, "loss": 6.223941421508789, "step": 70345 }, { "epoch": 0.1075, "grad_norm": 9.45105266571045, "learning_rate": 1.4975252525252525e-06, "loss": 6.227922058105468, "step": 70350 }, { "epoch": 0.10755, "grad_norm": 21.06622314453125, "learning_rate": 1.4972727272727276e-06, "loss": 6.470458984375, "step": 70355 }, { "epoch": 0.1076, "grad_norm": 5.836840629577637, "learning_rate": 1.4970202020202022e-06, "loss": 6.355976486206055, "step": 70360 }, { "epoch": 0.10765, "grad_norm": 4.516486644744873, "learning_rate": 1.496767676767677e-06, "loss": 6.216770935058594, "step": 70365 }, { "epoch": 0.1077, "grad_norm": 4.557652473449707, "learning_rate": 1.4965151515151517e-06, "loss": 6.274875640869141, "step": 70370 }, { "epoch": 0.10775, "grad_norm": 9.080934524536133, "learning_rate": 1.4962626262626265e-06, "loss": 6.243246459960938, "step": 70375 }, { "epoch": 0.1078, "grad_norm": 7.43522310256958, "learning_rate": 1.4960101010101011e-06, "loss": 6.246138000488282, "step": 70380 }, { "epoch": 0.10785, "grad_norm": 4.300600051879883, "learning_rate": 1.495757575757576e-06, "loss": 6.185092544555664, "step": 70385 }, { "epoch": 0.1079, "grad_norm": 5.872352123260498, "learning_rate": 1.4955050505050506e-06, "loss": 6.276237869262696, "step": 70390 }, { "epoch": 0.10795, "grad_norm": 4.333263874053955, "learning_rate": 1.4952525252525255e-06, "loss": 6.316579818725586, "step": 70395 }, { "epoch": 0.108, "grad_norm": 11.968606948852539, "learning_rate": 1.495e-06, "loss": 6.255261993408203, "step": 70400 }, { "epoch": 0.10805, "grad_norm": 5.893119812011719, "learning_rate": 1.494747474747475e-06, "loss": 6.229133224487304, "step": 70405 }, { "epoch": 0.1081, "grad_norm": 5.888654708862305, "learning_rate": 1.4944949494949496e-06, "loss": 6.242646789550781, "step": 70410 }, { "epoch": 0.10815, "grad_norm": 16.648193359375, "learning_rate": 1.4942424242424244e-06, "loss": 6.233001327514648, "step": 70415 }, { "epoch": 0.1082, "grad_norm": 9.83410930633545, "learning_rate": 1.493989898989899e-06, "loss": 6.295920562744141, "step": 70420 }, { "epoch": 0.10825, "grad_norm": 6.0635247230529785, "learning_rate": 1.4937373737373739e-06, "loss": 6.281613159179687, "step": 70425 }, { "epoch": 0.1083, "grad_norm": 32.05974578857422, "learning_rate": 1.4934848484848485e-06, "loss": 6.253609466552734, "step": 70430 }, { "epoch": 0.10835, "grad_norm": 16.697751998901367, "learning_rate": 1.4932323232323233e-06, "loss": 6.2254383087158205, "step": 70435 }, { "epoch": 0.1084, "grad_norm": 8.260332107543945, "learning_rate": 1.492979797979798e-06, "loss": 6.218616104125976, "step": 70440 }, { "epoch": 0.10845, "grad_norm": 6.301198482513428, "learning_rate": 1.4927272727272728e-06, "loss": 6.277152252197266, "step": 70445 }, { "epoch": 0.1085, "grad_norm": 5.945717811584473, "learning_rate": 1.4924747474747474e-06, "loss": 6.333709716796875, "step": 70450 }, { "epoch": 0.10855, "grad_norm": 6.007282257080078, "learning_rate": 1.4922222222222225e-06, "loss": 6.22431640625, "step": 70455 }, { "epoch": 0.1086, "grad_norm": 5.968386173248291, "learning_rate": 1.491969696969697e-06, "loss": 6.233551788330078, "step": 70460 }, { "epoch": 0.10865, "grad_norm": 6.2929463386535645, "learning_rate": 1.491717171717172e-06, "loss": 6.270417785644531, "step": 70465 }, { "epoch": 0.1087, "grad_norm": 5.647006034851074, "learning_rate": 1.4914646464646466e-06, "loss": 6.379016494750976, "step": 70470 }, { "epoch": 0.10875, "grad_norm": 4.676347255706787, "learning_rate": 1.4912121212121214e-06, "loss": 6.289689636230468, "step": 70475 }, { "epoch": 0.1088, "grad_norm": 3.9319701194763184, "learning_rate": 1.490959595959596e-06, "loss": 6.2204734802246096, "step": 70480 }, { "epoch": 0.10885, "grad_norm": 4.88107442855835, "learning_rate": 1.490707070707071e-06, "loss": 6.258384323120117, "step": 70485 }, { "epoch": 0.1089, "grad_norm": 5.679380416870117, "learning_rate": 1.4904545454545455e-06, "loss": 6.26208267211914, "step": 70490 }, { "epoch": 0.10895, "grad_norm": 6.2144060134887695, "learning_rate": 1.4902020202020204e-06, "loss": 6.224525070190429, "step": 70495 }, { "epoch": 0.109, "grad_norm": 6.1950225830078125, "learning_rate": 1.489949494949495e-06, "loss": 6.236910629272461, "step": 70500 }, { "epoch": 0.10905, "grad_norm": 8.069609642028809, "learning_rate": 1.4896969696969699e-06, "loss": 6.255545806884766, "step": 70505 }, { "epoch": 0.1091, "grad_norm": 7.013514995574951, "learning_rate": 1.4894444444444445e-06, "loss": 6.290047836303711, "step": 70510 }, { "epoch": 0.10915, "grad_norm": 5.9338884353637695, "learning_rate": 1.4891919191919193e-06, "loss": 6.232958984375, "step": 70515 }, { "epoch": 0.1092, "grad_norm": 4.7337870597839355, "learning_rate": 1.488939393939394e-06, "loss": 6.205607604980469, "step": 70520 }, { "epoch": 0.10925, "grad_norm": 8.574862480163574, "learning_rate": 1.4886868686868688e-06, "loss": 6.464659881591797, "step": 70525 }, { "epoch": 0.1093, "grad_norm": 4.819967269897461, "learning_rate": 1.4884343434343434e-06, "loss": 6.26697006225586, "step": 70530 }, { "epoch": 0.10935, "grad_norm": 52.89642333984375, "learning_rate": 1.4881818181818183e-06, "loss": 6.4767303466796875, "step": 70535 }, { "epoch": 0.1094, "grad_norm": 6.329042911529541, "learning_rate": 1.487929292929293e-06, "loss": 6.4414527893066404, "step": 70540 }, { "epoch": 0.10945, "grad_norm": 5.913768291473389, "learning_rate": 1.4876767676767677e-06, "loss": 6.24035873413086, "step": 70545 }, { "epoch": 0.1095, "grad_norm": 4.436302661895752, "learning_rate": 1.4874242424242424e-06, "loss": 6.189162063598633, "step": 70550 }, { "epoch": 0.10955, "grad_norm": 7.437015056610107, "learning_rate": 1.4871717171717172e-06, "loss": 6.264532852172851, "step": 70555 }, { "epoch": 0.1096, "grad_norm": 6.148346424102783, "learning_rate": 1.4869191919191918e-06, "loss": 6.223948669433594, "step": 70560 }, { "epoch": 0.10965, "grad_norm": 6.309460639953613, "learning_rate": 1.486666666666667e-06, "loss": 6.328600311279297, "step": 70565 }, { "epoch": 0.1097, "grad_norm": 12.29411792755127, "learning_rate": 1.4864141414141413e-06, "loss": 6.255525970458985, "step": 70570 }, { "epoch": 0.10975, "grad_norm": 6.789039134979248, "learning_rate": 1.4861616161616164e-06, "loss": 6.246724700927734, "step": 70575 }, { "epoch": 0.1098, "grad_norm": 8.433144569396973, "learning_rate": 1.485909090909091e-06, "loss": 6.219697952270508, "step": 70580 }, { "epoch": 0.10985, "grad_norm": 8.98473072052002, "learning_rate": 1.4856565656565658e-06, "loss": 6.347524642944336, "step": 70585 }, { "epoch": 0.1099, "grad_norm": 8.299501419067383, "learning_rate": 1.4854040404040405e-06, "loss": 6.263444900512695, "step": 70590 }, { "epoch": 0.10995, "grad_norm": 7.714624404907227, "learning_rate": 1.4851515151515153e-06, "loss": 6.297417068481446, "step": 70595 }, { "epoch": 0.11, "grad_norm": 11.265871047973633, "learning_rate": 1.48489898989899e-06, "loss": 6.252170562744141, "step": 70600 }, { "epoch": 0.11005, "grad_norm": 10.227030754089355, "learning_rate": 1.4846464646464648e-06, "loss": 6.235006332397461, "step": 70605 }, { "epoch": 0.1101, "grad_norm": 4.458352565765381, "learning_rate": 1.4843939393939394e-06, "loss": 6.2093955993652346, "step": 70610 }, { "epoch": 0.11015, "grad_norm": 5.629148006439209, "learning_rate": 1.4841414141414143e-06, "loss": 6.168504333496093, "step": 70615 }, { "epoch": 0.1102, "grad_norm": 7.8775200843811035, "learning_rate": 1.4838888888888889e-06, "loss": 6.245716857910156, "step": 70620 }, { "epoch": 0.11025, "grad_norm": 6.965467929840088, "learning_rate": 1.4836363636363637e-06, "loss": 6.25240478515625, "step": 70625 }, { "epoch": 0.1103, "grad_norm": 10.880308151245117, "learning_rate": 1.4833838383838384e-06, "loss": 6.2130992889404295, "step": 70630 }, { "epoch": 0.11035, "grad_norm": 6.548719882965088, "learning_rate": 1.4831313131313132e-06, "loss": 6.271269607543945, "step": 70635 }, { "epoch": 0.1104, "grad_norm": 5.457833290100098, "learning_rate": 1.4828787878787878e-06, "loss": 6.216322326660157, "step": 70640 }, { "epoch": 0.11045, "grad_norm": 6.130603313446045, "learning_rate": 1.4826262626262627e-06, "loss": 6.261547470092774, "step": 70645 }, { "epoch": 0.1105, "grad_norm": 4.87970495223999, "learning_rate": 1.4823737373737377e-06, "loss": 6.277933502197266, "step": 70650 }, { "epoch": 0.11055, "grad_norm": 6.199944972991943, "learning_rate": 1.4821212121212122e-06, "loss": 6.264662170410157, "step": 70655 }, { "epoch": 0.1106, "grad_norm": 4.307426929473877, "learning_rate": 1.4818686868686872e-06, "loss": 6.255156326293945, "step": 70660 }, { "epoch": 0.11065, "grad_norm": 18.038227081298828, "learning_rate": 1.4816161616161618e-06, "loss": 6.241727066040039, "step": 70665 }, { "epoch": 0.1107, "grad_norm": 6.070144176483154, "learning_rate": 1.4813636363636367e-06, "loss": 6.213169097900391, "step": 70670 }, { "epoch": 0.11075, "grad_norm": 6.119583606719971, "learning_rate": 1.4811111111111113e-06, "loss": 6.243610382080078, "step": 70675 }, { "epoch": 0.1108, "grad_norm": 5.817990779876709, "learning_rate": 1.4808585858585861e-06, "loss": 6.266510391235352, "step": 70680 }, { "epoch": 0.11085, "grad_norm": 3.7423250675201416, "learning_rate": 1.4806060606060608e-06, "loss": 6.195258331298828, "step": 70685 }, { "epoch": 0.1109, "grad_norm": 13.172792434692383, "learning_rate": 1.4803535353535356e-06, "loss": 6.250706481933594, "step": 70690 }, { "epoch": 0.11095, "grad_norm": 7.8013916015625, "learning_rate": 1.4801010101010103e-06, "loss": 6.248471450805664, "step": 70695 }, { "epoch": 0.111, "grad_norm": 6.031282424926758, "learning_rate": 1.479848484848485e-06, "loss": 6.208718109130859, "step": 70700 }, { "epoch": 0.11105, "grad_norm": 8.499100685119629, "learning_rate": 1.4795959595959597e-06, "loss": 6.2614189147949215, "step": 70705 }, { "epoch": 0.1111, "grad_norm": 7.5123066902160645, "learning_rate": 1.4793434343434346e-06, "loss": 6.273057556152343, "step": 70710 }, { "epoch": 0.11115, "grad_norm": 9.178064346313477, "learning_rate": 1.4790909090909092e-06, "loss": 6.281016159057617, "step": 70715 }, { "epoch": 0.1112, "grad_norm": 6.169647216796875, "learning_rate": 1.478838383838384e-06, "loss": 6.265370559692383, "step": 70720 }, { "epoch": 0.11125, "grad_norm": 5.66239595413208, "learning_rate": 1.4785858585858587e-06, "loss": 6.289680099487304, "step": 70725 }, { "epoch": 0.1113, "grad_norm": 6.531017303466797, "learning_rate": 1.4783333333333335e-06, "loss": 6.30152587890625, "step": 70730 }, { "epoch": 0.11135, "grad_norm": 11.30678653717041, "learning_rate": 1.4780808080808081e-06, "loss": 6.2258861541748045, "step": 70735 }, { "epoch": 0.1114, "grad_norm": 5.696952819824219, "learning_rate": 1.477828282828283e-06, "loss": 6.205899810791015, "step": 70740 }, { "epoch": 0.11145, "grad_norm": 4.309878349304199, "learning_rate": 1.4775757575757576e-06, "loss": 6.254526901245117, "step": 70745 }, { "epoch": 0.1115, "grad_norm": 6.054539203643799, "learning_rate": 1.4773232323232325e-06, "loss": 6.5326080322265625, "step": 70750 }, { "epoch": 0.11155, "grad_norm": 6.533455848693848, "learning_rate": 1.477070707070707e-06, "loss": 6.2250518798828125, "step": 70755 }, { "epoch": 0.1116, "grad_norm": 10.897772789001465, "learning_rate": 1.4768181818181821e-06, "loss": 6.2970024108886715, "step": 70760 }, { "epoch": 0.11165, "grad_norm": 15.450627326965332, "learning_rate": 1.4765656565656566e-06, "loss": 6.097810745239258, "step": 70765 }, { "epoch": 0.1117, "grad_norm": 4.3769025802612305, "learning_rate": 1.4763131313131316e-06, "loss": 6.230551910400391, "step": 70770 }, { "epoch": 0.11175, "grad_norm": 19.72295570373535, "learning_rate": 1.4760606060606062e-06, "loss": 6.429962921142578, "step": 70775 }, { "epoch": 0.1118, "grad_norm": 29.51087188720703, "learning_rate": 1.475808080808081e-06, "loss": 6.465242004394531, "step": 70780 }, { "epoch": 0.11185, "grad_norm": 5.867867469787598, "learning_rate": 1.4755555555555557e-06, "loss": 6.276731872558594, "step": 70785 }, { "epoch": 0.1119, "grad_norm": 5.2450175285339355, "learning_rate": 1.4753030303030306e-06, "loss": 6.228615951538086, "step": 70790 }, { "epoch": 0.11195, "grad_norm": 4.43710994720459, "learning_rate": 1.4750505050505052e-06, "loss": 6.243564605712891, "step": 70795 }, { "epoch": 0.112, "grad_norm": 6.230121612548828, "learning_rate": 1.47479797979798e-06, "loss": 6.205290222167969, "step": 70800 }, { "epoch": 0.11205, "grad_norm": 3.9443254470825195, "learning_rate": 1.4745454545454547e-06, "loss": 6.311504364013672, "step": 70805 }, { "epoch": 0.1121, "grad_norm": 6.32754373550415, "learning_rate": 1.4742929292929295e-06, "loss": 6.260771942138672, "step": 70810 }, { "epoch": 0.11215, "grad_norm": 4.344845294952393, "learning_rate": 1.4740404040404041e-06, "loss": 6.210549545288086, "step": 70815 }, { "epoch": 0.1122, "grad_norm": 5.034313678741455, "learning_rate": 1.473787878787879e-06, "loss": 6.189772033691407, "step": 70820 }, { "epoch": 0.11225, "grad_norm": 5.114178657531738, "learning_rate": 1.4735353535353536e-06, "loss": 6.271826934814453, "step": 70825 }, { "epoch": 0.1123, "grad_norm": 16.961441040039062, "learning_rate": 1.4732828282828284e-06, "loss": 6.240067672729492, "step": 70830 }, { "epoch": 0.11235, "grad_norm": 8.579511642456055, "learning_rate": 1.473030303030303e-06, "loss": 6.273058319091797, "step": 70835 }, { "epoch": 0.1124, "grad_norm": 5.582240104675293, "learning_rate": 1.472777777777778e-06, "loss": 6.2580726623535154, "step": 70840 }, { "epoch": 0.11245, "grad_norm": 12.315756797790527, "learning_rate": 1.4725252525252525e-06, "loss": 6.173306274414062, "step": 70845 }, { "epoch": 0.1125, "grad_norm": 3.9696338176727295, "learning_rate": 1.4722727272727274e-06, "loss": 6.190219879150391, "step": 70850 }, { "epoch": 0.11255, "grad_norm": 4.551172733306885, "learning_rate": 1.472020202020202e-06, "loss": 6.204149627685547, "step": 70855 }, { "epoch": 0.1126, "grad_norm": 6.222223281860352, "learning_rate": 1.4717676767676769e-06, "loss": 6.212909317016601, "step": 70860 }, { "epoch": 0.11265, "grad_norm": 7.588503360748291, "learning_rate": 1.4715151515151515e-06, "loss": 6.198088073730469, "step": 70865 }, { "epoch": 0.1127, "grad_norm": 4.820215702056885, "learning_rate": 1.4712626262626265e-06, "loss": 6.254140472412109, "step": 70870 }, { "epoch": 0.11275, "grad_norm": 4.541482448577881, "learning_rate": 1.471010101010101e-06, "loss": 6.165190124511719, "step": 70875 }, { "epoch": 0.1128, "grad_norm": 5.472412586212158, "learning_rate": 1.470757575757576e-06, "loss": 6.279804992675781, "step": 70880 }, { "epoch": 0.11285, "grad_norm": 5.102203369140625, "learning_rate": 1.4705050505050506e-06, "loss": 6.303050994873047, "step": 70885 }, { "epoch": 0.1129, "grad_norm": 7.649402618408203, "learning_rate": 1.4702525252525255e-06, "loss": 6.228931427001953, "step": 70890 }, { "epoch": 0.11295, "grad_norm": 4.232516765594482, "learning_rate": 1.4700000000000001e-06, "loss": 6.222789764404297, "step": 70895 }, { "epoch": 0.113, "grad_norm": 4.21560001373291, "learning_rate": 1.469747474747475e-06, "loss": 6.234303665161133, "step": 70900 }, { "epoch": 0.11305, "grad_norm": 7.938162326812744, "learning_rate": 1.4694949494949496e-06, "loss": 6.247964096069336, "step": 70905 }, { "epoch": 0.1131, "grad_norm": 13.286309242248535, "learning_rate": 1.4692424242424244e-06, "loss": 6.366983413696289, "step": 70910 }, { "epoch": 0.11315, "grad_norm": 6.743062973022461, "learning_rate": 1.468989898989899e-06, "loss": 6.444520568847656, "step": 70915 }, { "epoch": 0.1132, "grad_norm": 5.393292427062988, "learning_rate": 1.468737373737374e-06, "loss": 6.247224044799805, "step": 70920 }, { "epoch": 0.11325, "grad_norm": 8.649447441101074, "learning_rate": 1.4684848484848485e-06, "loss": 6.244340896606445, "step": 70925 }, { "epoch": 0.1133, "grad_norm": 4.490907192230225, "learning_rate": 1.4682323232323234e-06, "loss": 6.1954193115234375, "step": 70930 }, { "epoch": 0.11335, "grad_norm": 4.730749607086182, "learning_rate": 1.467979797979798e-06, "loss": 6.199177169799805, "step": 70935 }, { "epoch": 0.1134, "grad_norm": 11.80495834350586, "learning_rate": 1.4677272727272728e-06, "loss": 6.297029876708985, "step": 70940 }, { "epoch": 0.11345, "grad_norm": 7.061036586761475, "learning_rate": 1.4674747474747475e-06, "loss": 6.222843933105469, "step": 70945 }, { "epoch": 0.1135, "grad_norm": 4.385884761810303, "learning_rate": 1.4672222222222223e-06, "loss": 6.249789810180664, "step": 70950 }, { "epoch": 0.11355, "grad_norm": 8.127381324768066, "learning_rate": 1.466969696969697e-06, "loss": 6.27716064453125, "step": 70955 }, { "epoch": 0.1136, "grad_norm": 4.742341041564941, "learning_rate": 1.4667171717171718e-06, "loss": 6.255127716064453, "step": 70960 }, { "epoch": 0.11365, "grad_norm": 5.915189266204834, "learning_rate": 1.4664646464646464e-06, "loss": 6.250969696044922, "step": 70965 }, { "epoch": 0.1137, "grad_norm": 5.168898582458496, "learning_rate": 1.4662121212121213e-06, "loss": 6.227506256103515, "step": 70970 }, { "epoch": 0.11375, "grad_norm": 5.928437232971191, "learning_rate": 1.4659595959595959e-06, "loss": 6.228416442871094, "step": 70975 }, { "epoch": 0.1138, "grad_norm": 4.894220352172852, "learning_rate": 1.465707070707071e-06, "loss": 6.210292816162109, "step": 70980 }, { "epoch": 0.11385, "grad_norm": 16.99139404296875, "learning_rate": 1.4654545454545454e-06, "loss": 6.321816635131836, "step": 70985 }, { "epoch": 0.1139, "grad_norm": 4.162569046020508, "learning_rate": 1.4652020202020204e-06, "loss": 6.242947387695312, "step": 70990 }, { "epoch": 0.11395, "grad_norm": 7.3562211990356445, "learning_rate": 1.464949494949495e-06, "loss": 6.239608383178711, "step": 70995 }, { "epoch": 0.114, "grad_norm": 6.449595928192139, "learning_rate": 1.4646969696969699e-06, "loss": 6.2353355407714846, "step": 71000 }, { "epoch": 0.11405, "grad_norm": 17.703411102294922, "learning_rate": 1.4644444444444445e-06, "loss": 6.301509857177734, "step": 71005 }, { "epoch": 0.1141, "grad_norm": 17.145145416259766, "learning_rate": 1.4641919191919194e-06, "loss": 6.287225723266602, "step": 71010 }, { "epoch": 0.11415, "grad_norm": 17.487895965576172, "learning_rate": 1.463939393939394e-06, "loss": 6.280493927001953, "step": 71015 }, { "epoch": 0.1142, "grad_norm": 6.6910881996154785, "learning_rate": 1.4636868686868688e-06, "loss": 6.229396820068359, "step": 71020 }, { "epoch": 0.11425, "grad_norm": 5.5057373046875, "learning_rate": 1.4634343434343435e-06, "loss": 6.2261192321777346, "step": 71025 }, { "epoch": 0.1143, "grad_norm": 11.558438301086426, "learning_rate": 1.4631818181818183e-06, "loss": 6.278601455688476, "step": 71030 }, { "epoch": 0.11435, "grad_norm": 4.008595943450928, "learning_rate": 1.462929292929293e-06, "loss": 6.220378112792969, "step": 71035 }, { "epoch": 0.1144, "grad_norm": 6.240907192230225, "learning_rate": 1.4626767676767678e-06, "loss": 6.298085784912109, "step": 71040 }, { "epoch": 0.11445, "grad_norm": 7.405116558074951, "learning_rate": 1.4624242424242424e-06, "loss": 6.216365814208984, "step": 71045 }, { "epoch": 0.1145, "grad_norm": 8.802513122558594, "learning_rate": 1.4621717171717172e-06, "loss": 6.241999816894531, "step": 71050 }, { "epoch": 0.11455, "grad_norm": 4.959086894989014, "learning_rate": 1.4619191919191919e-06, "loss": 6.202229309082031, "step": 71055 }, { "epoch": 0.1146, "grad_norm": 8.905279159545898, "learning_rate": 1.4616666666666667e-06, "loss": 6.264311981201172, "step": 71060 }, { "epoch": 0.11465, "grad_norm": 6.940439701080322, "learning_rate": 1.4614141414141413e-06, "loss": 6.252835083007812, "step": 71065 }, { "epoch": 0.1147, "grad_norm": 4.582200527191162, "learning_rate": 1.4611616161616162e-06, "loss": 6.2541454315185545, "step": 71070 }, { "epoch": 0.11475, "grad_norm": 5.846879482269287, "learning_rate": 1.4609090909090912e-06, "loss": 6.299633407592774, "step": 71075 }, { "epoch": 0.1148, "grad_norm": 6.64246129989624, "learning_rate": 1.4606565656565659e-06, "loss": 6.242847442626953, "step": 71080 }, { "epoch": 0.11485, "grad_norm": 6.720762252807617, "learning_rate": 1.4604040404040407e-06, "loss": 6.27467041015625, "step": 71085 }, { "epoch": 0.1149, "grad_norm": 4.488288402557373, "learning_rate": 1.4601515151515153e-06, "loss": 6.2323486328125, "step": 71090 }, { "epoch": 0.11495, "grad_norm": 4.855941295623779, "learning_rate": 1.4598989898989902e-06, "loss": 6.19652214050293, "step": 71095 }, { "epoch": 0.115, "grad_norm": 23.835634231567383, "learning_rate": 1.4596464646464648e-06, "loss": 6.456216430664062, "step": 71100 }, { "epoch": 0.11505, "grad_norm": 5.500720024108887, "learning_rate": 1.4593939393939397e-06, "loss": 6.254848480224609, "step": 71105 }, { "epoch": 0.1151, "grad_norm": 4.77895450592041, "learning_rate": 1.4591414141414143e-06, "loss": 6.28772201538086, "step": 71110 }, { "epoch": 0.11515, "grad_norm": 6.911376476287842, "learning_rate": 1.4588888888888891e-06, "loss": 6.27319221496582, "step": 71115 }, { "epoch": 0.1152, "grad_norm": 5.526822090148926, "learning_rate": 1.4586363636363638e-06, "loss": 6.242013168334961, "step": 71120 }, { "epoch": 0.11525, "grad_norm": 4.670149803161621, "learning_rate": 1.4583838383838386e-06, "loss": 6.197670745849609, "step": 71125 }, { "epoch": 0.1153, "grad_norm": 7.348355770111084, "learning_rate": 1.4581313131313132e-06, "loss": 6.241608047485352, "step": 71130 }, { "epoch": 0.11535, "grad_norm": 5.692392826080322, "learning_rate": 1.457878787878788e-06, "loss": 6.2594757080078125, "step": 71135 }, { "epoch": 0.1154, "grad_norm": 6.052067756652832, "learning_rate": 1.4576262626262627e-06, "loss": 6.228694152832031, "step": 71140 }, { "epoch": 0.11545, "grad_norm": 9.309746742248535, "learning_rate": 1.4573737373737375e-06, "loss": 6.254579162597656, "step": 71145 }, { "epoch": 0.1155, "grad_norm": 9.342081069946289, "learning_rate": 1.4571212121212122e-06, "loss": 6.380626678466797, "step": 71150 }, { "epoch": 0.11555, "grad_norm": 4.293792724609375, "learning_rate": 1.456868686868687e-06, "loss": 6.2431079864501955, "step": 71155 }, { "epoch": 0.1156, "grad_norm": 4.151412010192871, "learning_rate": 1.4566161616161617e-06, "loss": 6.245188140869141, "step": 71160 }, { "epoch": 0.11565, "grad_norm": 4.770872592926025, "learning_rate": 1.4563636363636365e-06, "loss": 6.2585704803466795, "step": 71165 }, { "epoch": 0.1157, "grad_norm": 5.462607383728027, "learning_rate": 1.4561111111111111e-06, "loss": 6.263034439086914, "step": 71170 }, { "epoch": 0.11575, "grad_norm": 5.07378625869751, "learning_rate": 1.4558585858585862e-06, "loss": 6.2819366455078125, "step": 71175 }, { "epoch": 0.1158, "grad_norm": 4.520764350891113, "learning_rate": 1.4556060606060606e-06, "loss": 6.269296646118164, "step": 71180 }, { "epoch": 0.11585, "grad_norm": 8.329793930053711, "learning_rate": 1.4553535353535356e-06, "loss": 6.255178451538086, "step": 71185 }, { "epoch": 0.1159, "grad_norm": 5.227603912353516, "learning_rate": 1.4551010101010103e-06, "loss": 6.267551422119141, "step": 71190 }, { "epoch": 0.11595, "grad_norm": 4.839012622833252, "learning_rate": 1.4548484848484851e-06, "loss": 6.279519271850586, "step": 71195 }, { "epoch": 0.116, "grad_norm": 8.156819343566895, "learning_rate": 1.4545959595959597e-06, "loss": 6.2913970947265625, "step": 71200 }, { "epoch": 0.11605, "grad_norm": 8.790789604187012, "learning_rate": 1.4543434343434346e-06, "loss": 6.233060836791992, "step": 71205 }, { "epoch": 0.1161, "grad_norm": 5.6531901359558105, "learning_rate": 1.4540909090909092e-06, "loss": 6.315775299072266, "step": 71210 }, { "epoch": 0.11615, "grad_norm": 4.299149990081787, "learning_rate": 1.453838383838384e-06, "loss": 6.226066970825196, "step": 71215 }, { "epoch": 0.1162, "grad_norm": 6.813946723937988, "learning_rate": 1.4535858585858587e-06, "loss": 6.263469314575195, "step": 71220 }, { "epoch": 0.11625, "grad_norm": 4.879310131072998, "learning_rate": 1.4533333333333335e-06, "loss": 6.392726898193359, "step": 71225 }, { "epoch": 0.1163, "grad_norm": 11.26491641998291, "learning_rate": 1.4530808080808082e-06, "loss": 6.344464874267578, "step": 71230 }, { "epoch": 0.11635, "grad_norm": 12.988669395446777, "learning_rate": 1.452828282828283e-06, "loss": 6.217463684082031, "step": 71235 }, { "epoch": 0.1164, "grad_norm": 5.648284912109375, "learning_rate": 1.4525757575757576e-06, "loss": 6.240857315063477, "step": 71240 }, { "epoch": 0.11645, "grad_norm": 5.77839994430542, "learning_rate": 1.4523232323232325e-06, "loss": 6.2349601745605465, "step": 71245 }, { "epoch": 0.1165, "grad_norm": 8.21295166015625, "learning_rate": 1.4520707070707071e-06, "loss": 6.2339519500732425, "step": 71250 }, { "epoch": 0.11655, "grad_norm": 6.027764320373535, "learning_rate": 1.451818181818182e-06, "loss": 6.217894744873047, "step": 71255 }, { "epoch": 0.1166, "grad_norm": 6.005073547363281, "learning_rate": 1.4515656565656566e-06, "loss": 6.221229934692383, "step": 71260 }, { "epoch": 0.11665, "grad_norm": 5.556330680847168, "learning_rate": 1.4513131313131314e-06, "loss": 6.35010986328125, "step": 71265 }, { "epoch": 0.1167, "grad_norm": 7.194943904876709, "learning_rate": 1.451060606060606e-06, "loss": 6.242879486083984, "step": 71270 }, { "epoch": 0.11675, "grad_norm": 7.847644805908203, "learning_rate": 1.450808080808081e-06, "loss": 6.217774200439453, "step": 71275 }, { "epoch": 0.1168, "grad_norm": 12.432095527648926, "learning_rate": 1.4505555555555555e-06, "loss": 6.310239028930664, "step": 71280 }, { "epoch": 0.11685, "grad_norm": 8.951979637145996, "learning_rate": 1.4503030303030306e-06, "loss": 6.221516799926758, "step": 71285 }, { "epoch": 0.1169, "grad_norm": 32.71767807006836, "learning_rate": 1.450050505050505e-06, "loss": 6.232900619506836, "step": 71290 }, { "epoch": 0.11695, "grad_norm": 4.789072036743164, "learning_rate": 1.44979797979798e-06, "loss": 6.220142364501953, "step": 71295 }, { "epoch": 0.117, "grad_norm": 13.722853660583496, "learning_rate": 1.4495454545454547e-06, "loss": 6.261393356323242, "step": 71300 }, { "epoch": 0.11705, "grad_norm": 6.538764476776123, "learning_rate": 1.4492929292929295e-06, "loss": 6.193209838867188, "step": 71305 }, { "epoch": 0.1171, "grad_norm": 21.733257293701172, "learning_rate": 1.4490404040404042e-06, "loss": 6.200273895263672, "step": 71310 }, { "epoch": 0.11715, "grad_norm": 7.157898426055908, "learning_rate": 1.448787878787879e-06, "loss": 6.228910827636719, "step": 71315 }, { "epoch": 0.1172, "grad_norm": 5.489356517791748, "learning_rate": 1.4485353535353536e-06, "loss": 6.244960784912109, "step": 71320 }, { "epoch": 0.11725, "grad_norm": 18.78152847290039, "learning_rate": 1.4482828282828285e-06, "loss": 6.2358348846435545, "step": 71325 }, { "epoch": 0.1173, "grad_norm": 5.586100101470947, "learning_rate": 1.448030303030303e-06, "loss": 6.281199645996094, "step": 71330 }, { "epoch": 0.11735, "grad_norm": 7.718406677246094, "learning_rate": 1.447777777777778e-06, "loss": 6.257496643066406, "step": 71335 }, { "epoch": 0.1174, "grad_norm": 5.159696102142334, "learning_rate": 1.4475252525252526e-06, "loss": 6.281306457519531, "step": 71340 }, { "epoch": 0.11745, "grad_norm": 6.560358047485352, "learning_rate": 1.4472727272727274e-06, "loss": 6.254804992675782, "step": 71345 }, { "epoch": 0.1175, "grad_norm": 7.071283340454102, "learning_rate": 1.447020202020202e-06, "loss": 6.470374298095703, "step": 71350 }, { "epoch": 0.11755, "grad_norm": 4.781947612762451, "learning_rate": 1.4467676767676769e-06, "loss": 6.231656646728515, "step": 71355 }, { "epoch": 0.1176, "grad_norm": 28.44536018371582, "learning_rate": 1.4465151515151515e-06, "loss": 6.391431427001953, "step": 71360 }, { "epoch": 0.11765, "grad_norm": 12.774090766906738, "learning_rate": 1.4462626262626264e-06, "loss": 6.249874496459961, "step": 71365 }, { "epoch": 0.1177, "grad_norm": 5.1287360191345215, "learning_rate": 1.446010101010101e-06, "loss": 6.229756927490234, "step": 71370 }, { "epoch": 0.11775, "grad_norm": 7.840236663818359, "learning_rate": 1.4457575757575758e-06, "loss": 6.2183074951171875, "step": 71375 }, { "epoch": 0.1178, "grad_norm": 8.650823593139648, "learning_rate": 1.4455050505050505e-06, "loss": 6.23829460144043, "step": 71380 }, { "epoch": 0.11785, "grad_norm": 6.975605487823486, "learning_rate": 1.4452525252525255e-06, "loss": 6.20633544921875, "step": 71385 }, { "epoch": 0.1179, "grad_norm": 20.962249755859375, "learning_rate": 1.445e-06, "loss": 6.29403076171875, "step": 71390 }, { "epoch": 0.11795, "grad_norm": 8.74927043914795, "learning_rate": 1.444747474747475e-06, "loss": 6.220177459716797, "step": 71395 }, { "epoch": 0.118, "grad_norm": 7.122379302978516, "learning_rate": 1.4444949494949494e-06, "loss": 6.235001754760742, "step": 71400 }, { "epoch": 0.11805, "grad_norm": 5.646695137023926, "learning_rate": 1.4442424242424245e-06, "loss": 6.278347015380859, "step": 71405 }, { "epoch": 0.1181, "grad_norm": 4.741393089294434, "learning_rate": 1.443989898989899e-06, "loss": 6.260182571411133, "step": 71410 }, { "epoch": 0.11815, "grad_norm": 4.368388652801514, "learning_rate": 1.443737373737374e-06, "loss": 6.293814086914063, "step": 71415 }, { "epoch": 0.1182, "grad_norm": 19.20926284790039, "learning_rate": 1.4434848484848486e-06, "loss": 6.278135681152344, "step": 71420 }, { "epoch": 0.11825, "grad_norm": 9.371469497680664, "learning_rate": 1.4432323232323234e-06, "loss": 6.213376998901367, "step": 71425 }, { "epoch": 0.1183, "grad_norm": 10.20363712310791, "learning_rate": 1.442979797979798e-06, "loss": 6.325283050537109, "step": 71430 }, { "epoch": 0.11835, "grad_norm": 4.765652656555176, "learning_rate": 1.4427272727272729e-06, "loss": 6.2411041259765625, "step": 71435 }, { "epoch": 0.1184, "grad_norm": 7.490548133850098, "learning_rate": 1.4424747474747475e-06, "loss": 6.234325408935547, "step": 71440 }, { "epoch": 0.11845, "grad_norm": 3.6883738040924072, "learning_rate": 1.4422222222222223e-06, "loss": 6.259643173217773, "step": 71445 }, { "epoch": 0.1185, "grad_norm": 6.604689598083496, "learning_rate": 1.441969696969697e-06, "loss": 6.2473602294921875, "step": 71450 }, { "epoch": 0.11855, "grad_norm": 6.283584117889404, "learning_rate": 1.4417171717171718e-06, "loss": 6.25169792175293, "step": 71455 }, { "epoch": 0.1186, "grad_norm": 6.663431644439697, "learning_rate": 1.4414646464646464e-06, "loss": 6.244150543212891, "step": 71460 }, { "epoch": 0.11865, "grad_norm": 7.8146538734436035, "learning_rate": 1.4412121212121213e-06, "loss": 6.239147186279297, "step": 71465 }, { "epoch": 0.1187, "grad_norm": 8.228196144104004, "learning_rate": 1.440959595959596e-06, "loss": 6.2452037811279295, "step": 71470 }, { "epoch": 0.11875, "grad_norm": 5.554755687713623, "learning_rate": 1.4407070707070708e-06, "loss": 6.219847869873047, "step": 71475 }, { "epoch": 0.1188, "grad_norm": 4.2726240158081055, "learning_rate": 1.4404545454545454e-06, "loss": 6.294058227539063, "step": 71480 }, { "epoch": 0.11885, "grad_norm": 6.243851184844971, "learning_rate": 1.4402020202020202e-06, "loss": 6.276213073730469, "step": 71485 }, { "epoch": 0.1189, "grad_norm": 13.441274642944336, "learning_rate": 1.4399494949494949e-06, "loss": 6.236750793457031, "step": 71490 }, { "epoch": 0.11895, "grad_norm": 5.370758533477783, "learning_rate": 1.43969696969697e-06, "loss": 6.18463363647461, "step": 71495 }, { "epoch": 0.119, "grad_norm": 6.423685073852539, "learning_rate": 1.4394444444444448e-06, "loss": 6.267454147338867, "step": 71500 }, { "epoch": 0.11905, "grad_norm": 7.8532304763793945, "learning_rate": 1.4391919191919194e-06, "loss": 6.22725830078125, "step": 71505 }, { "epoch": 0.1191, "grad_norm": 4.662537097930908, "learning_rate": 1.4389393939393942e-06, "loss": 6.2709697723388675, "step": 71510 }, { "epoch": 0.11915, "grad_norm": 11.405319213867188, "learning_rate": 1.4386868686868689e-06, "loss": 6.257378005981446, "step": 71515 }, { "epoch": 0.1192, "grad_norm": 4.9941840171813965, "learning_rate": 1.4384343434343437e-06, "loss": 6.242971801757813, "step": 71520 }, { "epoch": 0.11925, "grad_norm": 6.947277545928955, "learning_rate": 1.4381818181818183e-06, "loss": 6.355160522460937, "step": 71525 }, { "epoch": 0.1193, "grad_norm": 8.209263801574707, "learning_rate": 1.4379292929292932e-06, "loss": 6.184441375732422, "step": 71530 }, { "epoch": 0.11935, "grad_norm": 12.91999626159668, "learning_rate": 1.4376767676767678e-06, "loss": 6.240282821655273, "step": 71535 }, { "epoch": 0.1194, "grad_norm": 12.266441345214844, "learning_rate": 1.4374242424242426e-06, "loss": 6.297610473632813, "step": 71540 }, { "epoch": 0.11945, "grad_norm": 4.840099334716797, "learning_rate": 1.4371717171717173e-06, "loss": 6.241071701049805, "step": 71545 }, { "epoch": 0.1195, "grad_norm": 6.3402628898620605, "learning_rate": 1.4369191919191921e-06, "loss": 6.310507965087891, "step": 71550 }, { "epoch": 0.11955, "grad_norm": 4.889016151428223, "learning_rate": 1.4366666666666667e-06, "loss": 6.2960365295410154, "step": 71555 }, { "epoch": 0.1196, "grad_norm": 9.210515022277832, "learning_rate": 1.4364141414141416e-06, "loss": 6.256860733032227, "step": 71560 }, { "epoch": 0.11965, "grad_norm": 8.864142417907715, "learning_rate": 1.4361616161616162e-06, "loss": 6.341326522827148, "step": 71565 }, { "epoch": 0.1197, "grad_norm": 9.634050369262695, "learning_rate": 1.435909090909091e-06, "loss": 6.237086486816406, "step": 71570 }, { "epoch": 0.11975, "grad_norm": 4.480681896209717, "learning_rate": 1.4356565656565657e-06, "loss": 6.218888854980468, "step": 71575 }, { "epoch": 0.1198, "grad_norm": 4.425445556640625, "learning_rate": 1.4354040404040405e-06, "loss": 6.185700225830078, "step": 71580 }, { "epoch": 0.11985, "grad_norm": 7.560326099395752, "learning_rate": 1.4351515151515152e-06, "loss": 6.272945785522461, "step": 71585 }, { "epoch": 0.1199, "grad_norm": 4.585119724273682, "learning_rate": 1.4348989898989902e-06, "loss": 6.287757110595703, "step": 71590 }, { "epoch": 0.11995, "grad_norm": 7.771923065185547, "learning_rate": 1.4346464646464646e-06, "loss": 6.308366394042968, "step": 71595 }, { "epoch": 0.12, "grad_norm": 3.8172333240509033, "learning_rate": 1.4343939393939397e-06, "loss": 6.282509231567383, "step": 71600 }, { "epoch": 0.12005, "grad_norm": 3.626612901687622, "learning_rate": 1.4341414141414143e-06, "loss": 6.318154144287109, "step": 71605 }, { "epoch": 0.1201, "grad_norm": 5.258589267730713, "learning_rate": 1.4338888888888892e-06, "loss": 6.234099197387695, "step": 71610 }, { "epoch": 0.12015, "grad_norm": 6.448572635650635, "learning_rate": 1.4336363636363638e-06, "loss": 6.229687118530274, "step": 71615 }, { "epoch": 0.1202, "grad_norm": 5.872189998626709, "learning_rate": 1.4333838383838386e-06, "loss": 6.234915161132813, "step": 71620 }, { "epoch": 0.12025, "grad_norm": 5.39528226852417, "learning_rate": 1.4331313131313133e-06, "loss": 6.238816452026367, "step": 71625 }, { "epoch": 0.1203, "grad_norm": 7.770577907562256, "learning_rate": 1.432878787878788e-06, "loss": 6.185610580444336, "step": 71630 }, { "epoch": 0.12035, "grad_norm": 6.648695468902588, "learning_rate": 1.4326262626262627e-06, "loss": 6.2485710144042965, "step": 71635 }, { "epoch": 0.1204, "grad_norm": 3.9885382652282715, "learning_rate": 1.4323737373737376e-06, "loss": 6.185882186889648, "step": 71640 }, { "epoch": 0.12045, "grad_norm": 7.999271869659424, "learning_rate": 1.4321212121212122e-06, "loss": 6.212902069091797, "step": 71645 }, { "epoch": 0.1205, "grad_norm": 8.893942832946777, "learning_rate": 1.431868686868687e-06, "loss": 6.251136016845703, "step": 71650 }, { "epoch": 0.12055, "grad_norm": 6.432363510131836, "learning_rate": 1.4316161616161617e-06, "loss": 6.241813278198242, "step": 71655 }, { "epoch": 0.1206, "grad_norm": 10.815011024475098, "learning_rate": 1.4313636363636365e-06, "loss": 6.2720191955566404, "step": 71660 }, { "epoch": 0.12065, "grad_norm": 7.441836833953857, "learning_rate": 1.4311111111111111e-06, "loss": 6.241923141479492, "step": 71665 }, { "epoch": 0.1207, "grad_norm": 6.550032138824463, "learning_rate": 1.430858585858586e-06, "loss": 6.213164901733398, "step": 71670 }, { "epoch": 0.12075, "grad_norm": 6.643069267272949, "learning_rate": 1.4306060606060606e-06, "loss": 6.420645141601563, "step": 71675 }, { "epoch": 0.1208, "grad_norm": 9.30480670928955, "learning_rate": 1.4303535353535355e-06, "loss": 6.288536834716797, "step": 71680 }, { "epoch": 0.12085, "grad_norm": 6.92040491104126, "learning_rate": 1.43010101010101e-06, "loss": 6.243875885009766, "step": 71685 }, { "epoch": 0.1209, "grad_norm": 5.930427074432373, "learning_rate": 1.429848484848485e-06, "loss": 6.455677795410156, "step": 71690 }, { "epoch": 0.12095, "grad_norm": 4.108961582183838, "learning_rate": 1.4295959595959596e-06, "loss": 6.2149810791015625, "step": 71695 }, { "epoch": 0.121, "grad_norm": 8.15344524383545, "learning_rate": 1.4293434343434346e-06, "loss": 6.306789779663086, "step": 71700 }, { "epoch": 0.12105, "grad_norm": 5.950042724609375, "learning_rate": 1.429090909090909e-06, "loss": 6.246378326416016, "step": 71705 }, { "epoch": 0.1211, "grad_norm": 5.294959545135498, "learning_rate": 1.428838383838384e-06, "loss": 6.336148834228515, "step": 71710 }, { "epoch": 0.12115, "grad_norm": 7.722880840301514, "learning_rate": 1.4285858585858587e-06, "loss": 6.262268829345703, "step": 71715 }, { "epoch": 0.1212, "grad_norm": 9.48990249633789, "learning_rate": 1.4283333333333336e-06, "loss": 6.415673828125, "step": 71720 }, { "epoch": 0.12125, "grad_norm": 7.858924388885498, "learning_rate": 1.4280808080808082e-06, "loss": 6.241584396362304, "step": 71725 }, { "epoch": 0.1213, "grad_norm": 18.259157180786133, "learning_rate": 1.427828282828283e-06, "loss": 6.465480041503906, "step": 71730 }, { "epoch": 0.12135, "grad_norm": 5.568859100341797, "learning_rate": 1.4275757575757577e-06, "loss": 6.239472579956055, "step": 71735 }, { "epoch": 0.1214, "grad_norm": 4.406759262084961, "learning_rate": 1.4273232323232325e-06, "loss": 6.19481201171875, "step": 71740 }, { "epoch": 0.12145, "grad_norm": 4.438705921173096, "learning_rate": 1.4270707070707071e-06, "loss": 6.228238677978515, "step": 71745 }, { "epoch": 0.1215, "grad_norm": 4.7827253341674805, "learning_rate": 1.426818181818182e-06, "loss": 6.249240112304688, "step": 71750 }, { "epoch": 0.12155, "grad_norm": 4.186526775360107, "learning_rate": 1.4265656565656566e-06, "loss": 6.224688720703125, "step": 71755 }, { "epoch": 0.1216, "grad_norm": 3.9088683128356934, "learning_rate": 1.4263131313131315e-06, "loss": 6.2279609680175785, "step": 71760 }, { "epoch": 0.12165, "grad_norm": 4.037386894226074, "learning_rate": 1.426060606060606e-06, "loss": 6.276984786987304, "step": 71765 }, { "epoch": 0.1217, "grad_norm": 6.331262588500977, "learning_rate": 1.425808080808081e-06, "loss": 6.231372833251953, "step": 71770 }, { "epoch": 0.12175, "grad_norm": 5.777124404907227, "learning_rate": 1.4255555555555556e-06, "loss": 6.275795364379883, "step": 71775 }, { "epoch": 0.1218, "grad_norm": 7.851014137268066, "learning_rate": 1.4253030303030304e-06, "loss": 6.260978698730469, "step": 71780 }, { "epoch": 0.12185, "grad_norm": 5.365261077880859, "learning_rate": 1.425050505050505e-06, "loss": 6.336936187744141, "step": 71785 }, { "epoch": 0.1219, "grad_norm": 7.13517951965332, "learning_rate": 1.4247979797979799e-06, "loss": 6.244535064697265, "step": 71790 }, { "epoch": 0.12195, "grad_norm": 21.6882381439209, "learning_rate": 1.4245454545454545e-06, "loss": 6.267034912109375, "step": 71795 }, { "epoch": 0.122, "grad_norm": 7.338829040527344, "learning_rate": 1.4242929292929296e-06, "loss": 6.199608612060547, "step": 71800 }, { "epoch": 0.12205, "grad_norm": 5.843225955963135, "learning_rate": 1.424040404040404e-06, "loss": 6.245959854125976, "step": 71805 }, { "epoch": 0.1221, "grad_norm": 8.186197280883789, "learning_rate": 1.423787878787879e-06, "loss": 6.268339538574219, "step": 71810 }, { "epoch": 0.12215, "grad_norm": 5.99219274520874, "learning_rate": 1.4235353535353537e-06, "loss": 6.229582595825195, "step": 71815 }, { "epoch": 0.1222, "grad_norm": 6.359896659851074, "learning_rate": 1.4232828282828285e-06, "loss": 6.274512863159179, "step": 71820 }, { "epoch": 0.12225, "grad_norm": 5.278909206390381, "learning_rate": 1.4230303030303031e-06, "loss": 6.249585723876953, "step": 71825 }, { "epoch": 0.1223, "grad_norm": 6.480034828186035, "learning_rate": 1.422777777777778e-06, "loss": 6.246062850952148, "step": 71830 }, { "epoch": 0.12235, "grad_norm": 5.373158931732178, "learning_rate": 1.4225252525252526e-06, "loss": 6.236580657958984, "step": 71835 }, { "epoch": 0.1224, "grad_norm": 26.370162963867188, "learning_rate": 1.4222727272727274e-06, "loss": 6.921978759765625, "step": 71840 }, { "epoch": 0.12245, "grad_norm": 15.679486274719238, "learning_rate": 1.422020202020202e-06, "loss": 6.238547515869141, "step": 71845 }, { "epoch": 0.1225, "grad_norm": 5.1182942390441895, "learning_rate": 1.421767676767677e-06, "loss": 6.270463562011718, "step": 71850 }, { "epoch": 0.12255, "grad_norm": 6.9262003898620605, "learning_rate": 1.4215151515151515e-06, "loss": 6.198942565917969, "step": 71855 }, { "epoch": 0.1226, "grad_norm": 29.73260498046875, "learning_rate": 1.4212626262626264e-06, "loss": 6.40916748046875, "step": 71860 }, { "epoch": 0.12265, "grad_norm": 4.349701881408691, "learning_rate": 1.421010101010101e-06, "loss": 6.421334838867187, "step": 71865 }, { "epoch": 0.1227, "grad_norm": 10.244588851928711, "learning_rate": 1.4207575757575759e-06, "loss": 6.248141098022461, "step": 71870 }, { "epoch": 0.12275, "grad_norm": 4.586117267608643, "learning_rate": 1.4205050505050505e-06, "loss": 6.294564437866211, "step": 71875 }, { "epoch": 0.1228, "grad_norm": 18.98235321044922, "learning_rate": 1.4202525252525253e-06, "loss": 6.349617004394531, "step": 71880 }, { "epoch": 0.12285, "grad_norm": 11.611150741577148, "learning_rate": 1.42e-06, "loss": 6.464691162109375, "step": 71885 }, { "epoch": 0.1229, "grad_norm": 6.093522548675537, "learning_rate": 1.4197474747474748e-06, "loss": 6.257992935180664, "step": 71890 }, { "epoch": 0.12295, "grad_norm": 3.9871013164520264, "learning_rate": 1.4194949494949494e-06, "loss": 6.249969863891602, "step": 71895 }, { "epoch": 0.123, "grad_norm": 5.888080596923828, "learning_rate": 1.4192424242424243e-06, "loss": 6.215484619140625, "step": 71900 }, { "epoch": 0.12305, "grad_norm": 7.12352180480957, "learning_rate": 1.418989898989899e-06, "loss": 6.226937866210937, "step": 71905 }, { "epoch": 0.1231, "grad_norm": 5.5171732902526855, "learning_rate": 1.418737373737374e-06, "loss": 6.271125793457031, "step": 71910 }, { "epoch": 0.12315, "grad_norm": 6.6646857261657715, "learning_rate": 1.4184848484848484e-06, "loss": 6.31456298828125, "step": 71915 }, { "epoch": 0.1232, "grad_norm": 3.6664698123931885, "learning_rate": 1.4182323232323234e-06, "loss": 6.264922332763672, "step": 71920 }, { "epoch": 0.12325, "grad_norm": 5.391177177429199, "learning_rate": 1.4179797979797983e-06, "loss": 6.2572998046875, "step": 71925 }, { "epoch": 0.1233, "grad_norm": 5.691713333129883, "learning_rate": 1.417727272727273e-06, "loss": 6.282867050170898, "step": 71930 }, { "epoch": 0.12335, "grad_norm": 6.288618564605713, "learning_rate": 1.4174747474747477e-06, "loss": 6.258311462402344, "step": 71935 }, { "epoch": 0.1234, "grad_norm": 8.255109786987305, "learning_rate": 1.4172222222222224e-06, "loss": 6.266722106933594, "step": 71940 }, { "epoch": 0.12345, "grad_norm": 5.730734825134277, "learning_rate": 1.4169696969696972e-06, "loss": 6.2532917022705075, "step": 71945 }, { "epoch": 0.1235, "grad_norm": 5.736021041870117, "learning_rate": 1.4167171717171718e-06, "loss": 6.244854354858399, "step": 71950 }, { "epoch": 0.12355, "grad_norm": 6.226949691772461, "learning_rate": 1.4164646464646467e-06, "loss": 6.192862319946289, "step": 71955 }, { "epoch": 0.1236, "grad_norm": 12.296841621398926, "learning_rate": 1.4162121212121213e-06, "loss": 6.231365585327149, "step": 71960 }, { "epoch": 0.12365, "grad_norm": 4.853231430053711, "learning_rate": 1.4159595959595962e-06, "loss": 6.208079147338867, "step": 71965 }, { "epoch": 0.1237, "grad_norm": 3.827458620071411, "learning_rate": 1.4157070707070708e-06, "loss": 6.17347297668457, "step": 71970 }, { "epoch": 0.12375, "grad_norm": 4.6262407302856445, "learning_rate": 1.4154545454545456e-06, "loss": 6.229492568969727, "step": 71975 }, { "epoch": 0.1238, "grad_norm": 10.92652702331543, "learning_rate": 1.4152020202020203e-06, "loss": 6.339496994018555, "step": 71980 }, { "epoch": 0.12385, "grad_norm": 4.707934379577637, "learning_rate": 1.414949494949495e-06, "loss": 6.2349090576171875, "step": 71985 }, { "epoch": 0.1239, "grad_norm": 9.398394584655762, "learning_rate": 1.4146969696969697e-06, "loss": 6.231025695800781, "step": 71990 }, { "epoch": 0.12395, "grad_norm": 6.140611171722412, "learning_rate": 1.4144444444444446e-06, "loss": 6.368376541137695, "step": 71995 }, { "epoch": 0.124, "grad_norm": 4.217563629150391, "learning_rate": 1.4141919191919192e-06, "loss": 6.248024368286133, "step": 72000 }, { "epoch": 0.12405, "grad_norm": 3.8126609325408936, "learning_rate": 1.4139393939393943e-06, "loss": 6.285043716430664, "step": 72005 }, { "epoch": 0.1241, "grad_norm": 5.866466045379639, "learning_rate": 1.4136868686868687e-06, "loss": 6.2802375793457035, "step": 72010 }, { "epoch": 0.12415, "grad_norm": 5.2998223304748535, "learning_rate": 1.4134343434343437e-06, "loss": 6.43383560180664, "step": 72015 }, { "epoch": 0.1242, "grad_norm": 4.858412265777588, "learning_rate": 1.4131818181818184e-06, "loss": 6.265129089355469, "step": 72020 }, { "epoch": 0.12425, "grad_norm": 9.119839668273926, "learning_rate": 1.4129292929292932e-06, "loss": 6.255317687988281, "step": 72025 }, { "epoch": 0.1243, "grad_norm": 36.799049377441406, "learning_rate": 1.4126767676767678e-06, "loss": 6.411750030517578, "step": 72030 }, { "epoch": 0.12435, "grad_norm": 6.8954548835754395, "learning_rate": 1.4124242424242427e-06, "loss": 6.221443939208984, "step": 72035 }, { "epoch": 0.1244, "grad_norm": 6.533575057983398, "learning_rate": 1.4121717171717173e-06, "loss": 6.19360122680664, "step": 72040 }, { "epoch": 0.12445, "grad_norm": 7.317163944244385, "learning_rate": 1.4119191919191921e-06, "loss": 6.274861907958984, "step": 72045 }, { "epoch": 0.1245, "grad_norm": 11.244245529174805, "learning_rate": 1.4116666666666668e-06, "loss": 6.237692260742188, "step": 72050 }, { "epoch": 0.12455, "grad_norm": 3.197624444961548, "learning_rate": 1.4114141414141416e-06, "loss": 6.270331573486328, "step": 72055 }, { "epoch": 0.1246, "grad_norm": 4.1028852462768555, "learning_rate": 1.4111616161616162e-06, "loss": 6.220381546020508, "step": 72060 }, { "epoch": 0.12465, "grad_norm": 6.338580131530762, "learning_rate": 1.410909090909091e-06, "loss": 6.225308227539062, "step": 72065 }, { "epoch": 0.1247, "grad_norm": 30.337923049926758, "learning_rate": 1.4106565656565657e-06, "loss": 6.311801147460938, "step": 72070 }, { "epoch": 0.12475, "grad_norm": 11.155892372131348, "learning_rate": 1.4104040404040406e-06, "loss": 6.405140686035156, "step": 72075 }, { "epoch": 0.1248, "grad_norm": 7.488796234130859, "learning_rate": 1.4101515151515152e-06, "loss": 6.224909973144531, "step": 72080 }, { "epoch": 0.12485, "grad_norm": 3.2704975605010986, "learning_rate": 1.40989898989899e-06, "loss": 6.21942367553711, "step": 72085 }, { "epoch": 0.1249, "grad_norm": 20.555065155029297, "learning_rate": 1.4096464646464647e-06, "loss": 6.187798309326172, "step": 72090 }, { "epoch": 0.12495, "grad_norm": 6.089480400085449, "learning_rate": 1.4093939393939395e-06, "loss": 6.119944000244141, "step": 72095 }, { "epoch": 0.125, "grad_norm": 5.306990623474121, "learning_rate": 1.4091414141414141e-06, "loss": 6.237059783935547, "step": 72100 }, { "epoch": 0.12505, "grad_norm": 10.853933334350586, "learning_rate": 1.4088888888888892e-06, "loss": 6.420098876953125, "step": 72105 }, { "epoch": 0.1251, "grad_norm": 3.9129140377044678, "learning_rate": 1.4086363636363636e-06, "loss": 6.258989715576172, "step": 72110 }, { "epoch": 0.12515, "grad_norm": 5.526532173156738, "learning_rate": 1.4083838383838387e-06, "loss": 6.178235244750977, "step": 72115 }, { "epoch": 0.1252, "grad_norm": 4.4771623611450195, "learning_rate": 1.408131313131313e-06, "loss": 6.215015029907226, "step": 72120 }, { "epoch": 0.12525, "grad_norm": 3.578934907913208, "learning_rate": 1.4078787878787881e-06, "loss": 6.223296356201172, "step": 72125 }, { "epoch": 0.1253, "grad_norm": 12.12261962890625, "learning_rate": 1.4076262626262628e-06, "loss": 6.2312877655029295, "step": 72130 }, { "epoch": 0.12535, "grad_norm": 5.044617652893066, "learning_rate": 1.4073737373737376e-06, "loss": 6.237131500244141, "step": 72135 }, { "epoch": 0.1254, "grad_norm": 5.204591274261475, "learning_rate": 1.4071212121212122e-06, "loss": 6.219819259643555, "step": 72140 }, { "epoch": 0.12545, "grad_norm": 4.932890892028809, "learning_rate": 1.406868686868687e-06, "loss": 6.310665893554687, "step": 72145 }, { "epoch": 0.1255, "grad_norm": 7.415680408477783, "learning_rate": 1.4066161616161617e-06, "loss": 6.277514266967773, "step": 72150 }, { "epoch": 0.12555, "grad_norm": 24.326496124267578, "learning_rate": 1.4063636363636365e-06, "loss": 6.3669075012207035, "step": 72155 }, { "epoch": 0.1256, "grad_norm": 7.035928249359131, "learning_rate": 1.4061111111111112e-06, "loss": 6.2217552185058596, "step": 72160 }, { "epoch": 0.12565, "grad_norm": 6.705284595489502, "learning_rate": 1.405858585858586e-06, "loss": 6.266395568847656, "step": 72165 }, { "epoch": 0.1257, "grad_norm": 6.855554580688477, "learning_rate": 1.4056060606060606e-06, "loss": 6.257292938232422, "step": 72170 }, { "epoch": 0.12575, "grad_norm": 9.023209571838379, "learning_rate": 1.4053535353535355e-06, "loss": 6.2199546813964846, "step": 72175 }, { "epoch": 0.1258, "grad_norm": 5.44822883605957, "learning_rate": 1.4051010101010101e-06, "loss": 6.250267410278321, "step": 72180 }, { "epoch": 0.12585, "grad_norm": 8.598494529724121, "learning_rate": 1.404848484848485e-06, "loss": 6.234655380249023, "step": 72185 }, { "epoch": 0.1259, "grad_norm": 3.4351987838745117, "learning_rate": 1.4045959595959596e-06, "loss": 6.2310539245605465, "step": 72190 }, { "epoch": 0.12595, "grad_norm": 5.722133636474609, "learning_rate": 1.4043434343434344e-06, "loss": 6.253060913085937, "step": 72195 }, { "epoch": 0.126, "grad_norm": 6.864473819732666, "learning_rate": 1.404090909090909e-06, "loss": 6.248149871826172, "step": 72200 }, { "epoch": 0.12605, "grad_norm": 10.702364921569824, "learning_rate": 1.403838383838384e-06, "loss": 6.255168914794922, "step": 72205 }, { "epoch": 0.1261, "grad_norm": 5.489292144775391, "learning_rate": 1.4035858585858585e-06, "loss": 6.210247039794922, "step": 72210 }, { "epoch": 0.12615, "grad_norm": 10.728789329528809, "learning_rate": 1.4033333333333336e-06, "loss": 6.244375991821289, "step": 72215 }, { "epoch": 0.1262, "grad_norm": 6.850706100463867, "learning_rate": 1.403080808080808e-06, "loss": 6.215954208374024, "step": 72220 }, { "epoch": 0.12625, "grad_norm": 7.330436706542969, "learning_rate": 1.402828282828283e-06, "loss": 6.238434600830078, "step": 72225 }, { "epoch": 0.1263, "grad_norm": 4.053848743438721, "learning_rate": 1.4025757575757577e-06, "loss": 6.261651611328125, "step": 72230 }, { "epoch": 0.12635, "grad_norm": 3.9731969833374023, "learning_rate": 1.4023232323232325e-06, "loss": 6.226219177246094, "step": 72235 }, { "epoch": 0.1264, "grad_norm": 7.083467960357666, "learning_rate": 1.4020707070707072e-06, "loss": 6.2873176574707035, "step": 72240 }, { "epoch": 0.12645, "grad_norm": 10.503536224365234, "learning_rate": 1.401818181818182e-06, "loss": 6.2407981872558596, "step": 72245 }, { "epoch": 0.1265, "grad_norm": 8.793343544006348, "learning_rate": 1.4015656565656566e-06, "loss": 6.259902572631836, "step": 72250 }, { "epoch": 0.12655, "grad_norm": 3.399928569793701, "learning_rate": 1.4013131313131315e-06, "loss": 6.2589881896972654, "step": 72255 }, { "epoch": 0.1266, "grad_norm": 6.475004196166992, "learning_rate": 1.4010606060606061e-06, "loss": 6.258908081054687, "step": 72260 }, { "epoch": 0.12665, "grad_norm": 5.519937515258789, "learning_rate": 1.400808080808081e-06, "loss": 6.2570854187011715, "step": 72265 }, { "epoch": 0.1267, "grad_norm": 5.833618640899658, "learning_rate": 1.4005555555555556e-06, "loss": 6.251493453979492, "step": 72270 }, { "epoch": 0.12675, "grad_norm": 5.362236976623535, "learning_rate": 1.4003030303030304e-06, "loss": 6.221570205688477, "step": 72275 }, { "epoch": 0.1268, "grad_norm": 7.923679351806641, "learning_rate": 1.400050505050505e-06, "loss": 6.251823806762696, "step": 72280 }, { "epoch": 0.12685, "grad_norm": 5.246730804443359, "learning_rate": 1.39979797979798e-06, "loss": 6.314226531982422, "step": 72285 }, { "epoch": 0.1269, "grad_norm": 8.028165817260742, "learning_rate": 1.3995454545454545e-06, "loss": 6.320392608642578, "step": 72290 }, { "epoch": 0.12695, "grad_norm": 5.170856952667236, "learning_rate": 1.3992929292929294e-06, "loss": 6.219726181030273, "step": 72295 }, { "epoch": 0.127, "grad_norm": 22.987180709838867, "learning_rate": 1.399040404040404e-06, "loss": 6.125537490844726, "step": 72300 }, { "epoch": 0.12705, "grad_norm": 5.122769832611084, "learning_rate": 1.3987878787878788e-06, "loss": 6.258256912231445, "step": 72305 }, { "epoch": 0.1271, "grad_norm": 5.855654239654541, "learning_rate": 1.3985353535353535e-06, "loss": 6.2168231964111325, "step": 72310 }, { "epoch": 0.12715, "grad_norm": 8.071866035461426, "learning_rate": 1.3982828282828283e-06, "loss": 6.3894775390625, "step": 72315 }, { "epoch": 0.1272, "grad_norm": 8.481955528259277, "learning_rate": 1.398030303030303e-06, "loss": 6.0714881896972654, "step": 72320 }, { "epoch": 0.12725, "grad_norm": 5.004131317138672, "learning_rate": 1.397777777777778e-06, "loss": 6.246303558349609, "step": 72325 }, { "epoch": 0.1273, "grad_norm": 6.7849955558776855, "learning_rate": 1.3975252525252524e-06, "loss": 6.242625427246094, "step": 72330 }, { "epoch": 0.12735, "grad_norm": 6.456030368804932, "learning_rate": 1.3972727272727275e-06, "loss": 6.2738494873046875, "step": 72335 }, { "epoch": 0.1274, "grad_norm": 6.3647332191467285, "learning_rate": 1.397020202020202e-06, "loss": 6.243589782714844, "step": 72340 }, { "epoch": 0.12745, "grad_norm": 5.3074846267700195, "learning_rate": 1.396767676767677e-06, "loss": 6.381671524047851, "step": 72345 }, { "epoch": 0.1275, "grad_norm": 5.02237606048584, "learning_rate": 1.3965151515151518e-06, "loss": 6.2368114471435545, "step": 72350 }, { "epoch": 0.12755, "grad_norm": 16.84322738647461, "learning_rate": 1.3962626262626264e-06, "loss": 6.266755676269531, "step": 72355 }, { "epoch": 0.1276, "grad_norm": 6.845759868621826, "learning_rate": 1.3960101010101013e-06, "loss": 6.273494720458984, "step": 72360 }, { "epoch": 0.12765, "grad_norm": 4.188724517822266, "learning_rate": 1.3957575757575759e-06, "loss": 6.210995483398437, "step": 72365 }, { "epoch": 0.1277, "grad_norm": 6.261929035186768, "learning_rate": 1.3955050505050507e-06, "loss": 6.278929901123047, "step": 72370 }, { "epoch": 0.12775, "grad_norm": 6.094764709472656, "learning_rate": 1.3952525252525254e-06, "loss": 6.225909805297851, "step": 72375 }, { "epoch": 0.1278, "grad_norm": 6.5658745765686035, "learning_rate": 1.3950000000000002e-06, "loss": 6.232949066162109, "step": 72380 }, { "epoch": 0.12785, "grad_norm": 5.110970497131348, "learning_rate": 1.3947474747474748e-06, "loss": 6.213413238525391, "step": 72385 }, { "epoch": 0.1279, "grad_norm": 4.757972240447998, "learning_rate": 1.3944949494949497e-06, "loss": 6.2453155517578125, "step": 72390 }, { "epoch": 0.12795, "grad_norm": 5.742959499359131, "learning_rate": 1.3942424242424243e-06, "loss": 6.235582733154297, "step": 72395 }, { "epoch": 0.128, "grad_norm": 10.714353561401367, "learning_rate": 1.3939898989898991e-06, "loss": 6.282502746582031, "step": 72400 }, { "epoch": 0.12805, "grad_norm": 6.665566444396973, "learning_rate": 1.3937373737373738e-06, "loss": 6.291911315917969, "step": 72405 }, { "epoch": 0.1281, "grad_norm": 6.1664838790893555, "learning_rate": 1.3934848484848486e-06, "loss": 6.2572486877441404, "step": 72410 }, { "epoch": 0.12815, "grad_norm": 5.6946940422058105, "learning_rate": 1.3932323232323232e-06, "loss": 6.306580352783203, "step": 72415 }, { "epoch": 0.1282, "grad_norm": 14.182703018188477, "learning_rate": 1.3929797979797983e-06, "loss": 6.226580810546875, "step": 72420 }, { "epoch": 0.12825, "grad_norm": 7.50028133392334, "learning_rate": 1.3927272727272727e-06, "loss": 6.236914825439453, "step": 72425 }, { "epoch": 0.1283, "grad_norm": 6.661437034606934, "learning_rate": 1.3924747474747478e-06, "loss": 6.251095962524414, "step": 72430 }, { "epoch": 0.12835, "grad_norm": 6.541402339935303, "learning_rate": 1.3922222222222224e-06, "loss": 6.249695205688477, "step": 72435 }, { "epoch": 0.1284, "grad_norm": 6.236366271972656, "learning_rate": 1.3919696969696972e-06, "loss": 6.27355728149414, "step": 72440 }, { "epoch": 0.12845, "grad_norm": 7.59915828704834, "learning_rate": 1.3917171717171719e-06, "loss": 6.2630363464355465, "step": 72445 }, { "epoch": 0.1285, "grad_norm": 5.5180535316467285, "learning_rate": 1.3914646464646467e-06, "loss": 6.376476287841797, "step": 72450 }, { "epoch": 0.12855, "grad_norm": 9.418651580810547, "learning_rate": 1.3912121212121213e-06, "loss": 6.540553283691406, "step": 72455 }, { "epoch": 0.1286, "grad_norm": 4.526059627532959, "learning_rate": 1.3909595959595962e-06, "loss": 6.23864631652832, "step": 72460 }, { "epoch": 0.12865, "grad_norm": 5.205795764923096, "learning_rate": 1.3907070707070708e-06, "loss": 6.24950065612793, "step": 72465 }, { "epoch": 0.1287, "grad_norm": 8.2130126953125, "learning_rate": 1.3904545454545457e-06, "loss": 6.2514801025390625, "step": 72470 }, { "epoch": 0.12875, "grad_norm": 4.840183734893799, "learning_rate": 1.3902020202020203e-06, "loss": 6.209715270996094, "step": 72475 }, { "epoch": 0.1288, "grad_norm": 9.862961769104004, "learning_rate": 1.3899494949494951e-06, "loss": 6.24706916809082, "step": 72480 }, { "epoch": 0.12885, "grad_norm": 6.986170768737793, "learning_rate": 1.3896969696969698e-06, "loss": 6.25708236694336, "step": 72485 }, { "epoch": 0.1289, "grad_norm": 8.381758689880371, "learning_rate": 1.3894444444444446e-06, "loss": 6.185386657714844, "step": 72490 }, { "epoch": 0.12895, "grad_norm": 15.985708236694336, "learning_rate": 1.3891919191919192e-06, "loss": 6.333415985107422, "step": 72495 }, { "epoch": 0.129, "grad_norm": 10.815374374389648, "learning_rate": 1.388939393939394e-06, "loss": 6.410923767089844, "step": 72500 }, { "epoch": 0.12905, "grad_norm": 8.081852912902832, "learning_rate": 1.3886868686868687e-06, "loss": 6.258322143554688, "step": 72505 }, { "epoch": 0.1291, "grad_norm": 8.83304214477539, "learning_rate": 1.3884343434343435e-06, "loss": 6.296475982666015, "step": 72510 }, { "epoch": 0.12915, "grad_norm": 6.673840045928955, "learning_rate": 1.3881818181818182e-06, "loss": 6.286724090576172, "step": 72515 }, { "epoch": 0.1292, "grad_norm": 7.893888473510742, "learning_rate": 1.3879292929292932e-06, "loss": 6.265949249267578, "step": 72520 }, { "epoch": 0.12925, "grad_norm": 6.000682353973389, "learning_rate": 1.3876767676767676e-06, "loss": 6.235721588134766, "step": 72525 }, { "epoch": 0.1293, "grad_norm": 10.931361198425293, "learning_rate": 1.3874242424242427e-06, "loss": 6.259165191650391, "step": 72530 }, { "epoch": 0.12935, "grad_norm": 6.72264289855957, "learning_rate": 1.3871717171717173e-06, "loss": 6.227452087402344, "step": 72535 }, { "epoch": 0.1294, "grad_norm": 13.313909530639648, "learning_rate": 1.3869191919191922e-06, "loss": 6.259217071533203, "step": 72540 }, { "epoch": 0.12945, "grad_norm": 3.630506753921509, "learning_rate": 1.3866666666666668e-06, "loss": 6.263875579833984, "step": 72545 }, { "epoch": 0.1295, "grad_norm": 5.036264896392822, "learning_rate": 1.3864141414141416e-06, "loss": 6.206753921508789, "step": 72550 }, { "epoch": 0.12955, "grad_norm": 8.125991821289062, "learning_rate": 1.3861616161616163e-06, "loss": 6.246722793579101, "step": 72555 }, { "epoch": 0.1296, "grad_norm": 6.688732624053955, "learning_rate": 1.3859090909090911e-06, "loss": 6.28233528137207, "step": 72560 }, { "epoch": 0.12965, "grad_norm": 51.457664489746094, "learning_rate": 1.3856565656565657e-06, "loss": 6.065206909179688, "step": 72565 }, { "epoch": 0.1297, "grad_norm": 9.461919784545898, "learning_rate": 1.3854040404040406e-06, "loss": 6.1694896697998045, "step": 72570 }, { "epoch": 0.12975, "grad_norm": 5.528608322143555, "learning_rate": 1.3851515151515152e-06, "loss": 6.210044097900391, "step": 72575 }, { "epoch": 0.1298, "grad_norm": 4.704505920410156, "learning_rate": 1.38489898989899e-06, "loss": 6.3333690643310545, "step": 72580 }, { "epoch": 0.12985, "grad_norm": 7.546390533447266, "learning_rate": 1.3846464646464647e-06, "loss": 6.295734024047851, "step": 72585 }, { "epoch": 0.1299, "grad_norm": 7.683620452880859, "learning_rate": 1.3843939393939395e-06, "loss": 6.227569580078125, "step": 72590 }, { "epoch": 0.12995, "grad_norm": 4.8859405517578125, "learning_rate": 1.3841414141414142e-06, "loss": 6.239406967163086, "step": 72595 }, { "epoch": 0.13, "grad_norm": 5.372814655303955, "learning_rate": 1.383888888888889e-06, "loss": 6.2107086181640625, "step": 72600 }, { "epoch": 0.13005, "grad_norm": 15.786569595336914, "learning_rate": 1.3836363636363636e-06, "loss": 6.3422698974609375, "step": 72605 }, { "epoch": 0.1301, "grad_norm": 9.528112411499023, "learning_rate": 1.3833838383838385e-06, "loss": 6.246278381347656, "step": 72610 }, { "epoch": 0.13015, "grad_norm": 7.70271635055542, "learning_rate": 1.383131313131313e-06, "loss": 6.20422248840332, "step": 72615 }, { "epoch": 0.1302, "grad_norm": 5.875172138214111, "learning_rate": 1.382878787878788e-06, "loss": 6.237479400634766, "step": 72620 }, { "epoch": 0.13025, "grad_norm": 5.771132946014404, "learning_rate": 1.3826262626262626e-06, "loss": 6.406166839599609, "step": 72625 }, { "epoch": 0.1303, "grad_norm": 12.704976081848145, "learning_rate": 1.3823737373737376e-06, "loss": 6.307921600341797, "step": 72630 }, { "epoch": 0.13035, "grad_norm": 7.132180213928223, "learning_rate": 1.382121212121212e-06, "loss": 6.251139068603516, "step": 72635 }, { "epoch": 0.1304, "grad_norm": 6.395718097686768, "learning_rate": 1.381868686868687e-06, "loss": 6.2231090545654295, "step": 72640 }, { "epoch": 0.13045, "grad_norm": 8.936473846435547, "learning_rate": 1.3816161616161617e-06, "loss": 6.260891723632812, "step": 72645 }, { "epoch": 0.1305, "grad_norm": 8.47281551361084, "learning_rate": 1.3813636363636366e-06, "loss": 6.226327133178711, "step": 72650 }, { "epoch": 0.13055, "grad_norm": 6.023569107055664, "learning_rate": 1.3811111111111112e-06, "loss": 6.269731521606445, "step": 72655 }, { "epoch": 0.1306, "grad_norm": 3.470802068710327, "learning_rate": 1.380858585858586e-06, "loss": 6.2533203125, "step": 72660 }, { "epoch": 0.13065, "grad_norm": 7.493227005004883, "learning_rate": 1.3806060606060607e-06, "loss": 6.248664474487304, "step": 72665 }, { "epoch": 0.1307, "grad_norm": 5.8954644203186035, "learning_rate": 1.3803535353535355e-06, "loss": 6.234171676635742, "step": 72670 }, { "epoch": 0.13075, "grad_norm": 5.330089569091797, "learning_rate": 1.3801010101010101e-06, "loss": 6.2779075622558596, "step": 72675 }, { "epoch": 0.1308, "grad_norm": 10.058738708496094, "learning_rate": 1.379848484848485e-06, "loss": 6.22771110534668, "step": 72680 }, { "epoch": 0.13085, "grad_norm": 9.101061820983887, "learning_rate": 1.3795959595959596e-06, "loss": 6.254739761352539, "step": 72685 }, { "epoch": 0.1309, "grad_norm": 5.231633186340332, "learning_rate": 1.3793434343434345e-06, "loss": 6.2850791931152346, "step": 72690 }, { "epoch": 0.13095, "grad_norm": 9.290593147277832, "learning_rate": 1.379090909090909e-06, "loss": 6.273253631591797, "step": 72695 }, { "epoch": 0.131, "grad_norm": 4.7962446212768555, "learning_rate": 1.378838383838384e-06, "loss": 6.229969787597656, "step": 72700 }, { "epoch": 0.13105, "grad_norm": 4.139087677001953, "learning_rate": 1.3785858585858586e-06, "loss": 6.22882308959961, "step": 72705 }, { "epoch": 0.1311, "grad_norm": 6.12857723236084, "learning_rate": 1.3783333333333334e-06, "loss": 6.291384506225586, "step": 72710 }, { "epoch": 0.13115, "grad_norm": 4.479878902435303, "learning_rate": 1.378080808080808e-06, "loss": 6.226400375366211, "step": 72715 }, { "epoch": 0.1312, "grad_norm": 6.684912204742432, "learning_rate": 1.3778282828282829e-06, "loss": 6.275428771972656, "step": 72720 }, { "epoch": 0.13125, "grad_norm": 7.383899688720703, "learning_rate": 1.3775757575757575e-06, "loss": 6.2771247863769535, "step": 72725 }, { "epoch": 0.1313, "grad_norm": 8.74825668334961, "learning_rate": 1.3773232323232324e-06, "loss": 6.217302703857422, "step": 72730 }, { "epoch": 0.13135, "grad_norm": 6.341443061828613, "learning_rate": 1.377070707070707e-06, "loss": 6.269826889038086, "step": 72735 }, { "epoch": 0.1314, "grad_norm": 5.203768730163574, "learning_rate": 1.376818181818182e-06, "loss": 6.212256622314453, "step": 72740 }, { "epoch": 0.13145, "grad_norm": 7.050826549530029, "learning_rate": 1.3765656565656565e-06, "loss": 6.256029510498047, "step": 72745 }, { "epoch": 0.1315, "grad_norm": 5.185737609863281, "learning_rate": 1.3763131313131315e-06, "loss": 6.249605560302735, "step": 72750 }, { "epoch": 0.13155, "grad_norm": 4.140324592590332, "learning_rate": 1.3760606060606061e-06, "loss": 6.23271484375, "step": 72755 }, { "epoch": 0.1316, "grad_norm": 7.317927837371826, "learning_rate": 1.375808080808081e-06, "loss": 6.207856369018555, "step": 72760 }, { "epoch": 0.13165, "grad_norm": 7.565467357635498, "learning_rate": 1.3755555555555556e-06, "loss": 6.2340232849121096, "step": 72765 }, { "epoch": 0.1317, "grad_norm": 10.388609886169434, "learning_rate": 1.3753030303030305e-06, "loss": 6.545552825927734, "step": 72770 }, { "epoch": 0.13175, "grad_norm": 5.506117820739746, "learning_rate": 1.375050505050505e-06, "loss": 6.2742431640625, "step": 72775 }, { "epoch": 0.1318, "grad_norm": 3.480760097503662, "learning_rate": 1.37479797979798e-06, "loss": 6.261568450927735, "step": 72780 }, { "epoch": 0.13185, "grad_norm": 4.11122465133667, "learning_rate": 1.3745454545454548e-06, "loss": 6.253939437866211, "step": 72785 }, { "epoch": 0.1319, "grad_norm": 7.283763408660889, "learning_rate": 1.3742929292929294e-06, "loss": 6.298431015014648, "step": 72790 }, { "epoch": 0.13195, "grad_norm": 5.662075519561768, "learning_rate": 1.3740404040404042e-06, "loss": 6.24890022277832, "step": 72795 }, { "epoch": 0.132, "grad_norm": 4.857743740081787, "learning_rate": 1.3737878787878789e-06, "loss": 6.233623886108399, "step": 72800 }, { "epoch": 0.13205, "grad_norm": 4.271862506866455, "learning_rate": 1.3735353535353537e-06, "loss": 6.201395034790039, "step": 72805 }, { "epoch": 0.1321, "grad_norm": 8.710576057434082, "learning_rate": 1.3732828282828283e-06, "loss": 6.271302795410156, "step": 72810 }, { "epoch": 0.13215, "grad_norm": 9.012434005737305, "learning_rate": 1.3730303030303032e-06, "loss": 6.223396682739258, "step": 72815 }, { "epoch": 0.1322, "grad_norm": 5.0282206535339355, "learning_rate": 1.3727777777777778e-06, "loss": 6.236809921264649, "step": 72820 }, { "epoch": 0.13225, "grad_norm": 9.327211380004883, "learning_rate": 1.3725252525252529e-06, "loss": 6.219329452514648, "step": 72825 }, { "epoch": 0.1323, "grad_norm": 6.08135461807251, "learning_rate": 1.3722727272727273e-06, "loss": 6.252198028564453, "step": 72830 }, { "epoch": 0.13235, "grad_norm": 6.384705066680908, "learning_rate": 1.3720202020202023e-06, "loss": 6.2236991882324215, "step": 72835 }, { "epoch": 0.1324, "grad_norm": 6.933772087097168, "learning_rate": 1.3717676767676768e-06, "loss": 6.262277603149414, "step": 72840 }, { "epoch": 0.13245, "grad_norm": 35.953975677490234, "learning_rate": 1.3715151515151518e-06, "loss": 6.4270469665527346, "step": 72845 }, { "epoch": 0.1325, "grad_norm": 4.956327438354492, "learning_rate": 1.3712626262626264e-06, "loss": 6.389732742309571, "step": 72850 }, { "epoch": 0.13255, "grad_norm": 9.515737533569336, "learning_rate": 1.3710101010101013e-06, "loss": 6.217837524414063, "step": 72855 }, { "epoch": 0.1326, "grad_norm": 4.65161657333374, "learning_rate": 1.370757575757576e-06, "loss": 6.294412994384766, "step": 72860 }, { "epoch": 0.13265, "grad_norm": 7.4156646728515625, "learning_rate": 1.3705050505050508e-06, "loss": 6.230988693237305, "step": 72865 }, { "epoch": 0.1327, "grad_norm": 5.166014194488525, "learning_rate": 1.3702525252525254e-06, "loss": 6.204397583007813, "step": 72870 }, { "epoch": 0.13275, "grad_norm": 9.361371040344238, "learning_rate": 1.3700000000000002e-06, "loss": 6.2670635223388675, "step": 72875 }, { "epoch": 0.1328, "grad_norm": 7.538749694824219, "learning_rate": 1.3697474747474749e-06, "loss": 6.29464111328125, "step": 72880 }, { "epoch": 0.13285, "grad_norm": 5.463902950286865, "learning_rate": 1.3694949494949497e-06, "loss": 6.229369354248047, "step": 72885 }, { "epoch": 0.1329, "grad_norm": 6.553383827209473, "learning_rate": 1.3692424242424243e-06, "loss": 6.2796882629394535, "step": 72890 }, { "epoch": 0.13295, "grad_norm": 7.817630767822266, "learning_rate": 1.3689898989898992e-06, "loss": 6.275544738769531, "step": 72895 }, { "epoch": 0.133, "grad_norm": 26.85154151916504, "learning_rate": 1.3687373737373738e-06, "loss": 6.331230163574219, "step": 72900 }, { "epoch": 0.13305, "grad_norm": 8.225223541259766, "learning_rate": 1.3684848484848486e-06, "loss": 6.191698455810547, "step": 72905 }, { "epoch": 0.1331, "grad_norm": 5.194640636444092, "learning_rate": 1.3682323232323233e-06, "loss": 6.240132522583008, "step": 72910 }, { "epoch": 0.13315, "grad_norm": 4.686760902404785, "learning_rate": 1.3679797979797981e-06, "loss": 6.244041442871094, "step": 72915 }, { "epoch": 0.1332, "grad_norm": 5.384839057922363, "learning_rate": 1.3677272727272727e-06, "loss": 6.250349807739258, "step": 72920 }, { "epoch": 0.13325, "grad_norm": 9.061654090881348, "learning_rate": 1.3674747474747476e-06, "loss": 6.209395217895508, "step": 72925 }, { "epoch": 0.1333, "grad_norm": 8.45060920715332, "learning_rate": 1.3672222222222222e-06, "loss": 6.246502685546875, "step": 72930 }, { "epoch": 0.13335, "grad_norm": 5.823925018310547, "learning_rate": 1.3669696969696973e-06, "loss": 6.216043853759766, "step": 72935 }, { "epoch": 0.1334, "grad_norm": 12.005875587463379, "learning_rate": 1.3667171717171717e-06, "loss": 5.941276550292969, "step": 72940 }, { "epoch": 0.13345, "grad_norm": 8.181265830993652, "learning_rate": 1.3664646464646467e-06, "loss": 6.248302459716797, "step": 72945 }, { "epoch": 0.1335, "grad_norm": 6.3638105392456055, "learning_rate": 1.3662121212121214e-06, "loss": 6.268737030029297, "step": 72950 }, { "epoch": 0.13355, "grad_norm": 7.701712608337402, "learning_rate": 1.3659595959595962e-06, "loss": 6.219850540161133, "step": 72955 }, { "epoch": 0.1336, "grad_norm": 4.749985218048096, "learning_rate": 1.3657070707070708e-06, "loss": 6.241619491577149, "step": 72960 }, { "epoch": 0.13365, "grad_norm": 3.8939242362976074, "learning_rate": 1.3654545454545457e-06, "loss": 6.237386703491211, "step": 72965 }, { "epoch": 0.1337, "grad_norm": 9.173443794250488, "learning_rate": 1.3652020202020203e-06, "loss": 6.26617660522461, "step": 72970 }, { "epoch": 0.13375, "grad_norm": 5.842432975769043, "learning_rate": 1.3649494949494952e-06, "loss": 6.283150100708008, "step": 72975 }, { "epoch": 0.1338, "grad_norm": 5.2501935958862305, "learning_rate": 1.3646969696969698e-06, "loss": 6.238656234741211, "step": 72980 }, { "epoch": 0.13385, "grad_norm": 6.967891693115234, "learning_rate": 1.3644444444444446e-06, "loss": 6.179389190673828, "step": 72985 }, { "epoch": 0.1339, "grad_norm": 4.3066277503967285, "learning_rate": 1.3641919191919193e-06, "loss": 6.260377502441406, "step": 72990 }, { "epoch": 0.13395, "grad_norm": 5.237133026123047, "learning_rate": 1.363939393939394e-06, "loss": 6.387936019897461, "step": 72995 }, { "epoch": 0.134, "grad_norm": 8.366695404052734, "learning_rate": 1.3636868686868687e-06, "loss": 6.223817443847656, "step": 73000 }, { "epoch": 0.13405, "grad_norm": 7.382944107055664, "learning_rate": 1.3634343434343436e-06, "loss": 6.256778335571289, "step": 73005 }, { "epoch": 0.1341, "grad_norm": 6.667216777801514, "learning_rate": 1.3631818181818182e-06, "loss": 6.333994674682617, "step": 73010 }, { "epoch": 0.13415, "grad_norm": 6.298126697540283, "learning_rate": 1.362929292929293e-06, "loss": 6.2571868896484375, "step": 73015 }, { "epoch": 0.1342, "grad_norm": 6.680832386016846, "learning_rate": 1.3626767676767677e-06, "loss": 6.358585357666016, "step": 73020 }, { "epoch": 0.13425, "grad_norm": 9.931838035583496, "learning_rate": 1.3624242424242425e-06, "loss": 6.223495864868164, "step": 73025 }, { "epoch": 0.1343, "grad_norm": 7.14206600189209, "learning_rate": 1.3621717171717171e-06, "loss": 6.333016967773437, "step": 73030 }, { "epoch": 0.13435, "grad_norm": 8.270692825317383, "learning_rate": 1.361919191919192e-06, "loss": 6.230085754394532, "step": 73035 }, { "epoch": 0.1344, "grad_norm": 6.4614787101745605, "learning_rate": 1.3616666666666666e-06, "loss": 6.266476058959961, "step": 73040 }, { "epoch": 0.13445, "grad_norm": 9.27800464630127, "learning_rate": 1.3614141414141417e-06, "loss": 6.246361541748047, "step": 73045 }, { "epoch": 0.1345, "grad_norm": 4.5928192138671875, "learning_rate": 1.361161616161616e-06, "loss": 6.20712890625, "step": 73050 }, { "epoch": 0.13455, "grad_norm": 8.179418563842773, "learning_rate": 1.3609090909090911e-06, "loss": 6.270842742919922, "step": 73055 }, { "epoch": 0.1346, "grad_norm": 5.08419132232666, "learning_rate": 1.3606565656565658e-06, "loss": 6.237959671020508, "step": 73060 }, { "epoch": 0.13465, "grad_norm": 7.4774956703186035, "learning_rate": 1.3604040404040406e-06, "loss": 6.375712585449219, "step": 73065 }, { "epoch": 0.1347, "grad_norm": 4.5160322189331055, "learning_rate": 1.3601515151515152e-06, "loss": 6.2828834533691404, "step": 73070 }, { "epoch": 0.13475, "grad_norm": 7.862586498260498, "learning_rate": 1.35989898989899e-06, "loss": 6.253034591674805, "step": 73075 }, { "epoch": 0.1348, "grad_norm": 4.30304479598999, "learning_rate": 1.3596464646464647e-06, "loss": 6.2132720947265625, "step": 73080 }, { "epoch": 0.13485, "grad_norm": 6.612369060516357, "learning_rate": 1.3593939393939396e-06, "loss": 6.248345947265625, "step": 73085 }, { "epoch": 0.1349, "grad_norm": 5.4304022789001465, "learning_rate": 1.3591414141414142e-06, "loss": 6.197782516479492, "step": 73090 }, { "epoch": 0.13495, "grad_norm": 4.830604553222656, "learning_rate": 1.358888888888889e-06, "loss": 6.2087360382080075, "step": 73095 }, { "epoch": 0.135, "grad_norm": 6.688281536102295, "learning_rate": 1.3586363636363637e-06, "loss": 6.266600036621094, "step": 73100 }, { "epoch": 0.13505, "grad_norm": 8.854203224182129, "learning_rate": 1.3583838383838385e-06, "loss": 6.289669799804687, "step": 73105 }, { "epoch": 0.1351, "grad_norm": 10.642318725585938, "learning_rate": 1.3581313131313131e-06, "loss": 6.1978302001953125, "step": 73110 }, { "epoch": 0.13515, "grad_norm": 6.373115539550781, "learning_rate": 1.357878787878788e-06, "loss": 6.295624923706055, "step": 73115 }, { "epoch": 0.1352, "grad_norm": 7.464424133300781, "learning_rate": 1.3576262626262626e-06, "loss": 6.2277679443359375, "step": 73120 }, { "epoch": 0.13525, "grad_norm": 3.4082019329071045, "learning_rate": 1.3573737373737374e-06, "loss": 6.250430679321289, "step": 73125 }, { "epoch": 0.1353, "grad_norm": 5.6862382888793945, "learning_rate": 1.357121212121212e-06, "loss": 6.235698318481445, "step": 73130 }, { "epoch": 0.13535, "grad_norm": 6.83771276473999, "learning_rate": 1.356868686868687e-06, "loss": 6.2441150665283205, "step": 73135 }, { "epoch": 0.1354, "grad_norm": 5.06077241897583, "learning_rate": 1.3566161616161615e-06, "loss": 6.423397064208984, "step": 73140 }, { "epoch": 0.13545, "grad_norm": 3.357405424118042, "learning_rate": 1.3563636363636364e-06, "loss": 6.270118713378906, "step": 73145 }, { "epoch": 0.1355, "grad_norm": 10.386279106140137, "learning_rate": 1.356111111111111e-06, "loss": 6.261636352539062, "step": 73150 }, { "epoch": 0.13555, "grad_norm": 26.646148681640625, "learning_rate": 1.355858585858586e-06, "loss": 6.277619934082031, "step": 73155 }, { "epoch": 0.1356, "grad_norm": 13.87376880645752, "learning_rate": 1.3556060606060605e-06, "loss": 6.344699859619141, "step": 73160 }, { "epoch": 0.13565, "grad_norm": 8.655750274658203, "learning_rate": 1.3553535353535355e-06, "loss": 6.38487548828125, "step": 73165 }, { "epoch": 0.1357, "grad_norm": 7.310064315795898, "learning_rate": 1.3551010101010102e-06, "loss": 6.3625740051269535, "step": 73170 }, { "epoch": 0.13575, "grad_norm": 4.5229411125183105, "learning_rate": 1.354848484848485e-06, "loss": 6.384629821777343, "step": 73175 }, { "epoch": 0.1358, "grad_norm": 22.610515594482422, "learning_rate": 1.3545959595959596e-06, "loss": 6.280409240722657, "step": 73180 }, { "epoch": 0.13585, "grad_norm": 3.5349323749542236, "learning_rate": 1.3543434343434345e-06, "loss": 6.2950386047363285, "step": 73185 }, { "epoch": 0.1359, "grad_norm": 5.151086807250977, "learning_rate": 1.3540909090909091e-06, "loss": 6.242147064208984, "step": 73190 }, { "epoch": 0.13595, "grad_norm": 5.791029930114746, "learning_rate": 1.353838383838384e-06, "loss": 6.245421981811523, "step": 73195 }, { "epoch": 0.136, "grad_norm": 4.607745170593262, "learning_rate": 1.3535858585858586e-06, "loss": 6.227019500732422, "step": 73200 }, { "epoch": 0.13605, "grad_norm": 5.286259174346924, "learning_rate": 1.3533333333333334e-06, "loss": 6.205660247802735, "step": 73205 }, { "epoch": 0.1361, "grad_norm": 21.317546844482422, "learning_rate": 1.3530808080808083e-06, "loss": 6.2813720703125, "step": 73210 }, { "epoch": 0.13615, "grad_norm": 4.3780741691589355, "learning_rate": 1.352828282828283e-06, "loss": 6.248403167724609, "step": 73215 }, { "epoch": 0.1362, "grad_norm": 6.0962724685668945, "learning_rate": 1.3525757575757577e-06, "loss": 6.345193099975586, "step": 73220 }, { "epoch": 0.13625, "grad_norm": 5.974603652954102, "learning_rate": 1.3523232323232324e-06, "loss": 6.268743896484375, "step": 73225 }, { "epoch": 0.1363, "grad_norm": 3.796064615249634, "learning_rate": 1.3520707070707072e-06, "loss": 6.240907287597656, "step": 73230 }, { "epoch": 0.13635, "grad_norm": 7.526639461517334, "learning_rate": 1.3518181818181819e-06, "loss": 6.262704467773437, "step": 73235 }, { "epoch": 0.1364, "grad_norm": 4.527552604675293, "learning_rate": 1.351565656565657e-06, "loss": 6.267513275146484, "step": 73240 }, { "epoch": 0.13645, "grad_norm": 8.73568058013916, "learning_rate": 1.3513131313131313e-06, "loss": 6.266496276855468, "step": 73245 }, { "epoch": 0.1365, "grad_norm": 7.444570541381836, "learning_rate": 1.3510606060606064e-06, "loss": 6.216794967651367, "step": 73250 }, { "epoch": 0.13655, "grad_norm": 4.441771507263184, "learning_rate": 1.350808080808081e-06, "loss": 6.191824340820313, "step": 73255 }, { "epoch": 0.1366, "grad_norm": 8.344087600708008, "learning_rate": 1.3505555555555558e-06, "loss": 6.231110000610352, "step": 73260 }, { "epoch": 0.13665, "grad_norm": 6.372521877288818, "learning_rate": 1.3503030303030305e-06, "loss": 6.23822250366211, "step": 73265 }, { "epoch": 0.1367, "grad_norm": 9.813756942749023, "learning_rate": 1.3500505050505053e-06, "loss": 6.271350860595703, "step": 73270 }, { "epoch": 0.13675, "grad_norm": 6.108236789703369, "learning_rate": 1.34979797979798e-06, "loss": 6.277891159057617, "step": 73275 }, { "epoch": 0.1368, "grad_norm": 35.1893424987793, "learning_rate": 1.3495454545454548e-06, "loss": 6.543667602539062, "step": 73280 }, { "epoch": 0.13685, "grad_norm": 5.630121231079102, "learning_rate": 1.3492929292929294e-06, "loss": 6.28214111328125, "step": 73285 }, { "epoch": 0.1369, "grad_norm": 6.299350738525391, "learning_rate": 1.3490404040404043e-06, "loss": 6.254913330078125, "step": 73290 }, { "epoch": 0.13695, "grad_norm": 7.9402594566345215, "learning_rate": 1.3487878787878789e-06, "loss": 6.294940948486328, "step": 73295 }, { "epoch": 0.137, "grad_norm": 8.553919792175293, "learning_rate": 1.3485353535353537e-06, "loss": 6.220746231079102, "step": 73300 }, { "epoch": 0.13705, "grad_norm": 4.59849214553833, "learning_rate": 1.3482828282828284e-06, "loss": 6.212496185302735, "step": 73305 }, { "epoch": 0.1371, "grad_norm": 4.942038059234619, "learning_rate": 1.3480303030303032e-06, "loss": 6.264511489868164, "step": 73310 }, { "epoch": 0.13715, "grad_norm": 6.425468444824219, "learning_rate": 1.3477777777777778e-06, "loss": 6.246620559692383, "step": 73315 }, { "epoch": 0.1372, "grad_norm": 3.790377140045166, "learning_rate": 1.3475252525252527e-06, "loss": 6.262419891357422, "step": 73320 }, { "epoch": 0.13725, "grad_norm": 7.888794898986816, "learning_rate": 1.3472727272727273e-06, "loss": 6.237712097167969, "step": 73325 }, { "epoch": 0.1373, "grad_norm": 4.971283912658691, "learning_rate": 1.3470202020202022e-06, "loss": 6.2602588653564455, "step": 73330 }, { "epoch": 0.13735, "grad_norm": 5.420119762420654, "learning_rate": 1.3467676767676768e-06, "loss": 6.204121398925781, "step": 73335 }, { "epoch": 0.1374, "grad_norm": 4.486352443695068, "learning_rate": 1.3465151515151516e-06, "loss": 6.201401138305664, "step": 73340 }, { "epoch": 0.13745, "grad_norm": 69.30490112304688, "learning_rate": 1.3462626262626263e-06, "loss": 6.206978607177734, "step": 73345 }, { "epoch": 0.1375, "grad_norm": 6.274879455566406, "learning_rate": 1.3460101010101013e-06, "loss": 6.244609451293945, "step": 73350 }, { "epoch": 0.13755, "grad_norm": 7.753452777862549, "learning_rate": 1.3457575757575757e-06, "loss": 6.302691650390625, "step": 73355 }, { "epoch": 0.1376, "grad_norm": 5.339166164398193, "learning_rate": 1.3455050505050508e-06, "loss": 6.274420166015625, "step": 73360 }, { "epoch": 0.13765, "grad_norm": 5.253957271575928, "learning_rate": 1.3452525252525254e-06, "loss": 6.256266784667969, "step": 73365 }, { "epoch": 0.1377, "grad_norm": 4.833682537078857, "learning_rate": 1.3450000000000003e-06, "loss": 6.266374588012695, "step": 73370 }, { "epoch": 0.13775, "grad_norm": 3.7455716133117676, "learning_rate": 1.3447474747474749e-06, "loss": 6.216480255126953, "step": 73375 }, { "epoch": 0.1378, "grad_norm": 5.7371087074279785, "learning_rate": 1.3444949494949497e-06, "loss": 6.1899982452392575, "step": 73380 }, { "epoch": 0.13785, "grad_norm": 4.410851955413818, "learning_rate": 1.3442424242424244e-06, "loss": 6.249266052246094, "step": 73385 }, { "epoch": 0.1379, "grad_norm": 6.361946105957031, "learning_rate": 1.3439898989898992e-06, "loss": 6.239887237548828, "step": 73390 }, { "epoch": 0.13795, "grad_norm": 4.492264747619629, "learning_rate": 1.3437373737373738e-06, "loss": 6.288278579711914, "step": 73395 }, { "epoch": 0.138, "grad_norm": 7.287191390991211, "learning_rate": 1.3434848484848487e-06, "loss": 6.279750823974609, "step": 73400 }, { "epoch": 0.13805, "grad_norm": 4.9743781089782715, "learning_rate": 1.3432323232323233e-06, "loss": 6.257644271850586, "step": 73405 }, { "epoch": 0.1381, "grad_norm": 7.8515543937683105, "learning_rate": 1.3429797979797981e-06, "loss": 6.199366760253906, "step": 73410 }, { "epoch": 0.13815, "grad_norm": 4.441071033477783, "learning_rate": 1.3427272727272728e-06, "loss": 6.24821662902832, "step": 73415 }, { "epoch": 0.1382, "grad_norm": 4.8543171882629395, "learning_rate": 1.3424747474747476e-06, "loss": 6.172284698486328, "step": 73420 }, { "epoch": 0.13825, "grad_norm": 6.004489421844482, "learning_rate": 1.3422222222222222e-06, "loss": 6.274761962890625, "step": 73425 }, { "epoch": 0.1383, "grad_norm": 5.118751049041748, "learning_rate": 1.341969696969697e-06, "loss": 6.255103302001953, "step": 73430 }, { "epoch": 0.13835, "grad_norm": 5.31057071685791, "learning_rate": 1.3417171717171717e-06, "loss": 6.232338714599609, "step": 73435 }, { "epoch": 0.1384, "grad_norm": 7.726137161254883, "learning_rate": 1.3414646464646466e-06, "loss": 6.227620697021484, "step": 73440 }, { "epoch": 0.13845, "grad_norm": 4.280829429626465, "learning_rate": 1.3412121212121212e-06, "loss": 6.227680206298828, "step": 73445 }, { "epoch": 0.1385, "grad_norm": 8.216300964355469, "learning_rate": 1.340959595959596e-06, "loss": 6.239097213745117, "step": 73450 }, { "epoch": 0.13855, "grad_norm": 10.146086692810059, "learning_rate": 1.3407070707070707e-06, "loss": 6.173473358154297, "step": 73455 }, { "epoch": 0.1386, "grad_norm": 8.489233016967773, "learning_rate": 1.3404545454545457e-06, "loss": 6.296357727050781, "step": 73460 }, { "epoch": 0.13865, "grad_norm": 6.186124324798584, "learning_rate": 1.3402020202020201e-06, "loss": 6.292958068847656, "step": 73465 }, { "epoch": 0.1387, "grad_norm": 23.340131759643555, "learning_rate": 1.3399494949494952e-06, "loss": 6.378070831298828, "step": 73470 }, { "epoch": 0.13875, "grad_norm": 12.585243225097656, "learning_rate": 1.3396969696969698e-06, "loss": 6.450328063964844, "step": 73475 }, { "epoch": 0.1388, "grad_norm": 6.686207294464111, "learning_rate": 1.3394444444444447e-06, "loss": 6.293222045898437, "step": 73480 }, { "epoch": 0.13885, "grad_norm": 8.617496490478516, "learning_rate": 1.3391919191919193e-06, "loss": 6.284915161132813, "step": 73485 }, { "epoch": 0.1389, "grad_norm": 3.9316906929016113, "learning_rate": 1.3389393939393941e-06, "loss": 6.323502731323242, "step": 73490 }, { "epoch": 0.13895, "grad_norm": 8.19398307800293, "learning_rate": 1.3386868686868688e-06, "loss": 6.231022644042969, "step": 73495 }, { "epoch": 0.139, "grad_norm": 11.954316139221191, "learning_rate": 1.3384343434343436e-06, "loss": 6.241648864746094, "step": 73500 }, { "epoch": 0.13905, "grad_norm": 6.8280205726623535, "learning_rate": 1.3381818181818182e-06, "loss": 6.241720962524414, "step": 73505 }, { "epoch": 0.1391, "grad_norm": 3.8131206035614014, "learning_rate": 1.337929292929293e-06, "loss": 6.228635787963867, "step": 73510 }, { "epoch": 0.13915, "grad_norm": 9.422626495361328, "learning_rate": 1.3376767676767677e-06, "loss": 6.235861968994141, "step": 73515 }, { "epoch": 0.1392, "grad_norm": 9.601048469543457, "learning_rate": 1.3374242424242425e-06, "loss": 6.206102752685547, "step": 73520 }, { "epoch": 0.13925, "grad_norm": 5.662851333618164, "learning_rate": 1.3371717171717172e-06, "loss": 6.2627513885498045, "step": 73525 }, { "epoch": 0.1393, "grad_norm": 4.675796985626221, "learning_rate": 1.336919191919192e-06, "loss": 6.243603515625, "step": 73530 }, { "epoch": 0.13935, "grad_norm": 7.792404651641846, "learning_rate": 1.3366666666666666e-06, "loss": 6.231024169921875, "step": 73535 }, { "epoch": 0.1394, "grad_norm": 7.13101863861084, "learning_rate": 1.3364141414141415e-06, "loss": 6.227274322509766, "step": 73540 }, { "epoch": 0.13945, "grad_norm": 8.325708389282227, "learning_rate": 1.3361616161616161e-06, "loss": 6.199253082275391, "step": 73545 }, { "epoch": 0.1395, "grad_norm": 22.67593765258789, "learning_rate": 1.335909090909091e-06, "loss": 6.2513469696044925, "step": 73550 }, { "epoch": 0.13955, "grad_norm": 7.867340564727783, "learning_rate": 1.3356565656565656e-06, "loss": 6.271105194091797, "step": 73555 }, { "epoch": 0.1396, "grad_norm": 6.703165054321289, "learning_rate": 1.3354040404040404e-06, "loss": 6.234305572509766, "step": 73560 }, { "epoch": 0.13965, "grad_norm": 3.7976365089416504, "learning_rate": 1.335151515151515e-06, "loss": 6.260285949707031, "step": 73565 }, { "epoch": 0.1397, "grad_norm": 5.37935733795166, "learning_rate": 1.3348989898989901e-06, "loss": 6.217185974121094, "step": 73570 }, { "epoch": 0.13975, "grad_norm": 3.9553515911102295, "learning_rate": 1.3346464646464645e-06, "loss": 6.232958602905273, "step": 73575 }, { "epoch": 0.1398, "grad_norm": 3.7671024799346924, "learning_rate": 1.3343939393939396e-06, "loss": 6.264215087890625, "step": 73580 }, { "epoch": 0.13985, "grad_norm": 3.578019380569458, "learning_rate": 1.3341414141414142e-06, "loss": 6.218351745605469, "step": 73585 }, { "epoch": 0.1399, "grad_norm": 8.56920337677002, "learning_rate": 1.333888888888889e-06, "loss": 6.291695022583008, "step": 73590 }, { "epoch": 0.13995, "grad_norm": 2.995694398880005, "learning_rate": 1.3336363636363637e-06, "loss": 6.18297119140625, "step": 73595 }, { "epoch": 0.14, "grad_norm": 14.819070816040039, "learning_rate": 1.3333838383838385e-06, "loss": 6.290894317626953, "step": 73600 }, { "epoch": 0.14005, "grad_norm": 12.887091636657715, "learning_rate": 1.3331313131313132e-06, "loss": 6.425175476074219, "step": 73605 }, { "epoch": 0.1401, "grad_norm": 8.207342147827148, "learning_rate": 1.332878787878788e-06, "loss": 6.219371795654297, "step": 73610 }, { "epoch": 0.14015, "grad_norm": 6.039576530456543, "learning_rate": 1.3326262626262626e-06, "loss": 6.207020568847656, "step": 73615 }, { "epoch": 0.1402, "grad_norm": 6.13785982131958, "learning_rate": 1.3323737373737375e-06, "loss": 6.252142333984375, "step": 73620 }, { "epoch": 0.14025, "grad_norm": 34.80143356323242, "learning_rate": 1.332121212121212e-06, "loss": 6.586344909667969, "step": 73625 }, { "epoch": 0.1403, "grad_norm": 23.786672592163086, "learning_rate": 1.331868686868687e-06, "loss": 6.559537506103515, "step": 73630 }, { "epoch": 0.14035, "grad_norm": 16.94866180419922, "learning_rate": 1.3316161616161618e-06, "loss": 6.345255279541016, "step": 73635 }, { "epoch": 0.1404, "grad_norm": 18.91353416442871, "learning_rate": 1.3313636363636364e-06, "loss": 6.342918014526367, "step": 73640 }, { "epoch": 0.14045, "grad_norm": 25.381467819213867, "learning_rate": 1.3311111111111113e-06, "loss": 6.423371887207031, "step": 73645 }, { "epoch": 0.1405, "grad_norm": 12.307013511657715, "learning_rate": 1.3308585858585859e-06, "loss": 6.344001007080078, "step": 73650 }, { "epoch": 0.14055, "grad_norm": 8.266462326049805, "learning_rate": 1.330606060606061e-06, "loss": 6.2265678405761715, "step": 73655 }, { "epoch": 0.1406, "grad_norm": 5.943678379058838, "learning_rate": 1.3303535353535354e-06, "loss": 6.250637817382812, "step": 73660 }, { "epoch": 0.14065, "grad_norm": 7.901583671569824, "learning_rate": 1.3301010101010104e-06, "loss": 6.252055358886719, "step": 73665 }, { "epoch": 0.1407, "grad_norm": 4.218603610992432, "learning_rate": 1.329848484848485e-06, "loss": 6.370100021362305, "step": 73670 }, { "epoch": 0.14075, "grad_norm": 10.783609390258789, "learning_rate": 1.3295959595959599e-06, "loss": 6.224441146850586, "step": 73675 }, { "epoch": 0.1408, "grad_norm": 6.30511474609375, "learning_rate": 1.3293434343434345e-06, "loss": 6.234968566894532, "step": 73680 }, { "epoch": 0.14085, "grad_norm": 8.472932815551758, "learning_rate": 1.3290909090909094e-06, "loss": 6.224680709838867, "step": 73685 }, { "epoch": 0.1409, "grad_norm": 7.523307800292969, "learning_rate": 1.328838383838384e-06, "loss": 6.235917663574218, "step": 73690 }, { "epoch": 0.14095, "grad_norm": 8.326972007751465, "learning_rate": 1.3285858585858588e-06, "loss": 6.249770736694336, "step": 73695 }, { "epoch": 0.141, "grad_norm": 7.83673095703125, "learning_rate": 1.3283333333333335e-06, "loss": 6.252391052246094, "step": 73700 }, { "epoch": 0.14105, "grad_norm": 7.4674272537231445, "learning_rate": 1.3280808080808083e-06, "loss": 6.236809921264649, "step": 73705 }, { "epoch": 0.1411, "grad_norm": 24.767810821533203, "learning_rate": 1.327828282828283e-06, "loss": 6.17261962890625, "step": 73710 }, { "epoch": 0.14115, "grad_norm": 6.128482818603516, "learning_rate": 1.3275757575757578e-06, "loss": 6.22806396484375, "step": 73715 }, { "epoch": 0.1412, "grad_norm": 4.962475776672363, "learning_rate": 1.3273232323232324e-06, "loss": 6.261818313598633, "step": 73720 }, { "epoch": 0.14125, "grad_norm": 5.520827770233154, "learning_rate": 1.3270707070707072e-06, "loss": 6.228826904296875, "step": 73725 }, { "epoch": 0.1413, "grad_norm": 10.732525825500488, "learning_rate": 1.3268181818181819e-06, "loss": 6.232879257202148, "step": 73730 }, { "epoch": 0.14135, "grad_norm": 10.992622375488281, "learning_rate": 1.3265656565656567e-06, "loss": 6.324156951904297, "step": 73735 }, { "epoch": 0.1414, "grad_norm": 7.495684623718262, "learning_rate": 1.3263131313131313e-06, "loss": 6.240663146972656, "step": 73740 }, { "epoch": 0.14145, "grad_norm": 4.494860649108887, "learning_rate": 1.3260606060606062e-06, "loss": 6.303411865234375, "step": 73745 }, { "epoch": 0.1415, "grad_norm": 23.42270851135254, "learning_rate": 1.3258080808080808e-06, "loss": 6.353924179077149, "step": 73750 }, { "epoch": 0.14155, "grad_norm": 5.879036903381348, "learning_rate": 1.3255555555555557e-06, "loss": 6.26049575805664, "step": 73755 }, { "epoch": 0.1416, "grad_norm": 4.420217990875244, "learning_rate": 1.3253030303030303e-06, "loss": 6.267494964599609, "step": 73760 }, { "epoch": 0.14165, "grad_norm": 6.871709823608398, "learning_rate": 1.3250505050505053e-06, "loss": 6.239924621582031, "step": 73765 }, { "epoch": 0.1417, "grad_norm": 4.9247965812683105, "learning_rate": 1.3247979797979798e-06, "loss": 6.213812255859375, "step": 73770 }, { "epoch": 0.14175, "grad_norm": 7.739094257354736, "learning_rate": 1.3245454545454548e-06, "loss": 6.274115371704101, "step": 73775 }, { "epoch": 0.1418, "grad_norm": 7.140951633453369, "learning_rate": 1.3242929292929294e-06, "loss": 6.233832550048828, "step": 73780 }, { "epoch": 0.14185, "grad_norm": 9.079277992248535, "learning_rate": 1.3240404040404043e-06, "loss": 6.221986770629883, "step": 73785 }, { "epoch": 0.1419, "grad_norm": 6.001097202301025, "learning_rate": 1.323787878787879e-06, "loss": 6.267258071899414, "step": 73790 }, { "epoch": 0.14195, "grad_norm": 4.21389102935791, "learning_rate": 1.3235353535353538e-06, "loss": 6.415662384033203, "step": 73795 }, { "epoch": 0.142, "grad_norm": 10.187236785888672, "learning_rate": 1.3232828282828284e-06, "loss": 6.337351989746094, "step": 73800 }, { "epoch": 0.14205, "grad_norm": 31.104305267333984, "learning_rate": 1.3230303030303032e-06, "loss": 6.249983596801758, "step": 73805 }, { "epoch": 0.1421, "grad_norm": 22.743877410888672, "learning_rate": 1.3227777777777779e-06, "loss": 6.3596752166748045, "step": 73810 }, { "epoch": 0.14215, "grad_norm": 14.80178165435791, "learning_rate": 1.3225252525252527e-06, "loss": 6.599275207519531, "step": 73815 }, { "epoch": 0.1422, "grad_norm": 11.075322151184082, "learning_rate": 1.3222727272727273e-06, "loss": 6.335784912109375, "step": 73820 }, { "epoch": 0.14225, "grad_norm": 8.723320960998535, "learning_rate": 1.3220202020202022e-06, "loss": 6.238345336914063, "step": 73825 }, { "epoch": 0.1423, "grad_norm": 7.232884883880615, "learning_rate": 1.3217676767676768e-06, "loss": 6.216961669921875, "step": 73830 }, { "epoch": 0.14235, "grad_norm": 7.156980991363525, "learning_rate": 1.3215151515151517e-06, "loss": 6.26635856628418, "step": 73835 }, { "epoch": 0.1424, "grad_norm": 7.010166645050049, "learning_rate": 1.3212626262626263e-06, "loss": 6.204489135742188, "step": 73840 }, { "epoch": 0.14245, "grad_norm": 7.105006217956543, "learning_rate": 1.3210101010101011e-06, "loss": 6.283420562744141, "step": 73845 }, { "epoch": 0.1425, "grad_norm": 5.642064094543457, "learning_rate": 1.3207575757575758e-06, "loss": 6.254133224487305, "step": 73850 }, { "epoch": 0.14255, "grad_norm": 6.170275688171387, "learning_rate": 1.3205050505050506e-06, "loss": 6.24780044555664, "step": 73855 }, { "epoch": 0.1426, "grad_norm": 6.233284950256348, "learning_rate": 1.3202525252525252e-06, "loss": 6.253822708129883, "step": 73860 }, { "epoch": 0.14265, "grad_norm": 5.106185436248779, "learning_rate": 1.32e-06, "loss": 6.26630744934082, "step": 73865 }, { "epoch": 0.1427, "grad_norm": 4.562084674835205, "learning_rate": 1.3197474747474747e-06, "loss": 6.272969055175781, "step": 73870 }, { "epoch": 0.14275, "grad_norm": 9.973880767822266, "learning_rate": 1.3194949494949498e-06, "loss": 6.2316947937011715, "step": 73875 }, { "epoch": 0.1428, "grad_norm": 6.611664295196533, "learning_rate": 1.3192424242424242e-06, "loss": 6.28462028503418, "step": 73880 }, { "epoch": 0.14285, "grad_norm": 6.152750015258789, "learning_rate": 1.3189898989898992e-06, "loss": 6.2198341369628904, "step": 73885 }, { "epoch": 0.1429, "grad_norm": 7.919829368591309, "learning_rate": 1.3187373737373739e-06, "loss": 6.228375244140625, "step": 73890 }, { "epoch": 0.14295, "grad_norm": 5.2195281982421875, "learning_rate": 1.3184848484848487e-06, "loss": 6.343362045288086, "step": 73895 }, { "epoch": 0.143, "grad_norm": 9.82302474975586, "learning_rate": 1.3182323232323233e-06, "loss": 6.231008148193359, "step": 73900 }, { "epoch": 0.14305, "grad_norm": 12.215896606445312, "learning_rate": 1.3179797979797982e-06, "loss": 6.005696105957031, "step": 73905 }, { "epoch": 0.1431, "grad_norm": 4.350798606872559, "learning_rate": 1.3177272727272728e-06, "loss": 6.26783561706543, "step": 73910 }, { "epoch": 0.14315, "grad_norm": 25.52348518371582, "learning_rate": 1.3174747474747476e-06, "loss": 6.282317733764648, "step": 73915 }, { "epoch": 0.1432, "grad_norm": 7.981392860412598, "learning_rate": 1.3172222222222223e-06, "loss": 6.2885276794433596, "step": 73920 }, { "epoch": 0.14325, "grad_norm": 8.465792655944824, "learning_rate": 1.3169696969696971e-06, "loss": 6.231646347045898, "step": 73925 }, { "epoch": 0.1433, "grad_norm": 5.38322639465332, "learning_rate": 1.3167171717171717e-06, "loss": 6.2197013854980465, "step": 73930 }, { "epoch": 0.14335, "grad_norm": 7.899281024932861, "learning_rate": 1.3164646464646466e-06, "loss": 6.239177703857422, "step": 73935 }, { "epoch": 0.1434, "grad_norm": 8.81231689453125, "learning_rate": 1.3162121212121212e-06, "loss": 6.207391357421875, "step": 73940 }, { "epoch": 0.14345, "grad_norm": 4.281449317932129, "learning_rate": 1.315959595959596e-06, "loss": 6.251656341552734, "step": 73945 }, { "epoch": 0.1435, "grad_norm": 8.219447135925293, "learning_rate": 1.3157070707070707e-06, "loss": 6.211940765380859, "step": 73950 }, { "epoch": 0.14355, "grad_norm": 3.5438432693481445, "learning_rate": 1.3154545454545455e-06, "loss": 6.261522674560547, "step": 73955 }, { "epoch": 0.1436, "grad_norm": 5.224743366241455, "learning_rate": 1.3152020202020202e-06, "loss": 6.23126220703125, "step": 73960 }, { "epoch": 0.14365, "grad_norm": 7.891627788543701, "learning_rate": 1.314949494949495e-06, "loss": 6.308998870849609, "step": 73965 }, { "epoch": 0.1437, "grad_norm": 10.626019477844238, "learning_rate": 1.3146969696969696e-06, "loss": 6.393842697143555, "step": 73970 }, { "epoch": 0.14375, "grad_norm": 6.220195770263672, "learning_rate": 1.3144444444444447e-06, "loss": 6.230384445190429, "step": 73975 }, { "epoch": 0.1438, "grad_norm": 8.449676513671875, "learning_rate": 1.314191919191919e-06, "loss": 6.235114669799804, "step": 73980 }, { "epoch": 0.14385, "grad_norm": 5.803347587585449, "learning_rate": 1.3139393939393942e-06, "loss": 6.26820182800293, "step": 73985 }, { "epoch": 0.1439, "grad_norm": 7.329825401306152, "learning_rate": 1.3136868686868686e-06, "loss": 6.3360542297363285, "step": 73990 }, { "epoch": 0.14395, "grad_norm": 4.179161071777344, "learning_rate": 1.3134343434343436e-06, "loss": 6.223100280761718, "step": 73995 }, { "epoch": 0.144, "grad_norm": 6.306048393249512, "learning_rate": 1.3131818181818183e-06, "loss": 6.249840545654297, "step": 74000 }, { "epoch": 0.14405, "grad_norm": 6.038132667541504, "learning_rate": 1.312929292929293e-06, "loss": 6.22032356262207, "step": 74005 }, { "epoch": 0.1441, "grad_norm": 11.504322052001953, "learning_rate": 1.3126767676767677e-06, "loss": 6.327765655517578, "step": 74010 }, { "epoch": 0.14415, "grad_norm": 6.935719966888428, "learning_rate": 1.3124242424242426e-06, "loss": 6.234482192993164, "step": 74015 }, { "epoch": 0.1442, "grad_norm": 12.291118621826172, "learning_rate": 1.3121717171717172e-06, "loss": 6.229669570922852, "step": 74020 }, { "epoch": 0.14425, "grad_norm": 4.314820289611816, "learning_rate": 1.311919191919192e-06, "loss": 6.196693420410156, "step": 74025 }, { "epoch": 0.1443, "grad_norm": 4.835911273956299, "learning_rate": 1.3116666666666667e-06, "loss": 6.251676177978515, "step": 74030 }, { "epoch": 0.14435, "grad_norm": 5.568008899688721, "learning_rate": 1.3114141414141415e-06, "loss": 6.219872283935547, "step": 74035 }, { "epoch": 0.1444, "grad_norm": 8.831846237182617, "learning_rate": 1.3111616161616161e-06, "loss": 6.2275947570800785, "step": 74040 }, { "epoch": 0.14445, "grad_norm": 5.046711444854736, "learning_rate": 1.310909090909091e-06, "loss": 6.253678894042968, "step": 74045 }, { "epoch": 0.1445, "grad_norm": 6.639548301696777, "learning_rate": 1.3106565656565656e-06, "loss": 6.204153442382813, "step": 74050 }, { "epoch": 0.14455, "grad_norm": 4.518559455871582, "learning_rate": 1.3104040404040405e-06, "loss": 6.212227249145508, "step": 74055 }, { "epoch": 0.1446, "grad_norm": 11.805069923400879, "learning_rate": 1.3101515151515153e-06, "loss": 6.241442108154297, "step": 74060 }, { "epoch": 0.14465, "grad_norm": 5.295022010803223, "learning_rate": 1.30989898989899e-06, "loss": 6.257531356811524, "step": 74065 }, { "epoch": 0.1447, "grad_norm": 12.453039169311523, "learning_rate": 1.309646464646465e-06, "loss": 6.221036529541015, "step": 74070 }, { "epoch": 0.14475, "grad_norm": 5.757079124450684, "learning_rate": 1.3093939393939394e-06, "loss": 6.243160247802734, "step": 74075 }, { "epoch": 0.1448, "grad_norm": 9.539419174194336, "learning_rate": 1.3091414141414145e-06, "loss": 6.349324035644531, "step": 74080 }, { "epoch": 0.14485, "grad_norm": 5.553004264831543, "learning_rate": 1.308888888888889e-06, "loss": 6.362334060668945, "step": 74085 }, { "epoch": 0.1449, "grad_norm": 4.831832408905029, "learning_rate": 1.308636363636364e-06, "loss": 6.270175933837891, "step": 74090 }, { "epoch": 0.14495, "grad_norm": 5.539151191711426, "learning_rate": 1.3083838383838386e-06, "loss": 6.23582649230957, "step": 74095 }, { "epoch": 0.145, "grad_norm": 4.593319892883301, "learning_rate": 1.3081313131313134e-06, "loss": 6.225848770141601, "step": 74100 }, { "epoch": 0.14505, "grad_norm": 4.725855827331543, "learning_rate": 1.307878787878788e-06, "loss": 6.225731277465821, "step": 74105 }, { "epoch": 0.1451, "grad_norm": 17.21245002746582, "learning_rate": 1.3076262626262629e-06, "loss": 6.292733001708984, "step": 74110 }, { "epoch": 0.14515, "grad_norm": 4.472686290740967, "learning_rate": 1.3073737373737375e-06, "loss": 6.3415687561035154, "step": 74115 }, { "epoch": 0.1452, "grad_norm": 5.940283298492432, "learning_rate": 1.3071212121212123e-06, "loss": 6.273468017578125, "step": 74120 }, { "epoch": 0.14525, "grad_norm": 6.287174701690674, "learning_rate": 1.306868686868687e-06, "loss": 6.167076110839844, "step": 74125 }, { "epoch": 0.1453, "grad_norm": 7.678295612335205, "learning_rate": 1.3066161616161618e-06, "loss": 6.231556701660156, "step": 74130 }, { "epoch": 0.14535, "grad_norm": 5.749528884887695, "learning_rate": 1.3063636363636364e-06, "loss": 6.257829666137695, "step": 74135 }, { "epoch": 0.1454, "grad_norm": 3.6789820194244385, "learning_rate": 1.3061111111111113e-06, "loss": 6.206572341918945, "step": 74140 }, { "epoch": 0.14545, "grad_norm": 9.07838249206543, "learning_rate": 1.305858585858586e-06, "loss": 6.249381637573242, "step": 74145 }, { "epoch": 0.1455, "grad_norm": 6.990270614624023, "learning_rate": 1.3056060606060608e-06, "loss": 6.203200912475586, "step": 74150 }, { "epoch": 0.14555, "grad_norm": 11.976143836975098, "learning_rate": 1.3053535353535354e-06, "loss": 6.197963714599609, "step": 74155 }, { "epoch": 0.1456, "grad_norm": 8.093676567077637, "learning_rate": 1.3051010101010102e-06, "loss": 6.2532508850097654, "step": 74160 }, { "epoch": 0.14565, "grad_norm": 7.428822040557861, "learning_rate": 1.3048484848484849e-06, "loss": 6.2108406066894535, "step": 74165 }, { "epoch": 0.1457, "grad_norm": 10.163280487060547, "learning_rate": 1.3045959595959597e-06, "loss": 6.269666290283203, "step": 74170 }, { "epoch": 0.14575, "grad_norm": 4.856110572814941, "learning_rate": 1.3043434343434343e-06, "loss": 6.275127410888672, "step": 74175 }, { "epoch": 0.1458, "grad_norm": 8.046445846557617, "learning_rate": 1.3040909090909094e-06, "loss": 6.348670959472656, "step": 74180 }, { "epoch": 0.14585, "grad_norm": 6.830883979797363, "learning_rate": 1.3038383838383838e-06, "loss": 6.251253890991211, "step": 74185 }, { "epoch": 0.1459, "grad_norm": 4.199123382568359, "learning_rate": 1.3035858585858589e-06, "loss": 6.211019515991211, "step": 74190 }, { "epoch": 0.14595, "grad_norm": 7.766753673553467, "learning_rate": 1.3033333333333335e-06, "loss": 6.207057952880859, "step": 74195 }, { "epoch": 0.146, "grad_norm": 4.514311790466309, "learning_rate": 1.3030808080808083e-06, "loss": 6.226253509521484, "step": 74200 }, { "epoch": 0.14605, "grad_norm": 6.786512851715088, "learning_rate": 1.302828282828283e-06, "loss": 6.232072830200195, "step": 74205 }, { "epoch": 0.1461, "grad_norm": 15.933206558227539, "learning_rate": 1.3025757575757578e-06, "loss": 6.234319686889648, "step": 74210 }, { "epoch": 0.14615, "grad_norm": 6.21903133392334, "learning_rate": 1.3023232323232324e-06, "loss": 6.2156730651855465, "step": 74215 }, { "epoch": 0.1462, "grad_norm": 6.269929885864258, "learning_rate": 1.3020707070707073e-06, "loss": 6.34631462097168, "step": 74220 }, { "epoch": 0.14625, "grad_norm": 7.663226127624512, "learning_rate": 1.301818181818182e-06, "loss": 6.26288070678711, "step": 74225 }, { "epoch": 0.1463, "grad_norm": 7.080150604248047, "learning_rate": 1.3015656565656567e-06, "loss": 6.288679122924805, "step": 74230 }, { "epoch": 0.14635, "grad_norm": 5.012781620025635, "learning_rate": 1.3013131313131314e-06, "loss": 6.212924575805664, "step": 74235 }, { "epoch": 0.1464, "grad_norm": 41.74115753173828, "learning_rate": 1.3010606060606062e-06, "loss": 6.238952255249023, "step": 74240 }, { "epoch": 0.14645, "grad_norm": 9.361504554748535, "learning_rate": 1.3008080808080808e-06, "loss": 6.247830581665039, "step": 74245 }, { "epoch": 0.1465, "grad_norm": 4.5163187980651855, "learning_rate": 1.3005555555555557e-06, "loss": 6.259066772460938, "step": 74250 }, { "epoch": 0.14655, "grad_norm": 11.529528617858887, "learning_rate": 1.3003030303030303e-06, "loss": 6.2200878143310545, "step": 74255 }, { "epoch": 0.1466, "grad_norm": 6.6402668952941895, "learning_rate": 1.3000505050505052e-06, "loss": 6.211861419677734, "step": 74260 }, { "epoch": 0.14665, "grad_norm": 4.822423934936523, "learning_rate": 1.2997979797979798e-06, "loss": 6.2390602111816404, "step": 74265 }, { "epoch": 0.1467, "grad_norm": 7.06641149520874, "learning_rate": 1.2995454545454546e-06, "loss": 6.20814094543457, "step": 74270 }, { "epoch": 0.14675, "grad_norm": 5.341720104217529, "learning_rate": 1.2992929292929293e-06, "loss": 6.223938369750977, "step": 74275 }, { "epoch": 0.1468, "grad_norm": 3.9533658027648926, "learning_rate": 1.2990404040404041e-06, "loss": 6.259819793701172, "step": 74280 }, { "epoch": 0.14685, "grad_norm": 4.059379577636719, "learning_rate": 1.2987878787878787e-06, "loss": 6.158832550048828, "step": 74285 }, { "epoch": 0.1469, "grad_norm": 8.408429145812988, "learning_rate": 1.2985353535353538e-06, "loss": 6.287986373901367, "step": 74290 }, { "epoch": 0.14695, "grad_norm": 5.930466651916504, "learning_rate": 1.2982828282828282e-06, "loss": 6.222241973876953, "step": 74295 }, { "epoch": 0.147, "grad_norm": 6.936233043670654, "learning_rate": 1.2980303030303033e-06, "loss": 6.244640350341797, "step": 74300 }, { "epoch": 0.14705, "grad_norm": 8.908565521240234, "learning_rate": 1.2977777777777779e-06, "loss": 6.321146392822266, "step": 74305 }, { "epoch": 0.1471, "grad_norm": 15.211065292358398, "learning_rate": 1.2975252525252527e-06, "loss": 6.202307891845703, "step": 74310 }, { "epoch": 0.14715, "grad_norm": 11.278511047363281, "learning_rate": 1.2972727272727274e-06, "loss": 6.2487953186035154, "step": 74315 }, { "epoch": 0.1472, "grad_norm": 4.312129020690918, "learning_rate": 1.2970202020202022e-06, "loss": 6.225859832763672, "step": 74320 }, { "epoch": 0.14725, "grad_norm": 21.812089920043945, "learning_rate": 1.2967676767676768e-06, "loss": 6.280328750610352, "step": 74325 }, { "epoch": 0.1473, "grad_norm": 7.79986572265625, "learning_rate": 1.2965151515151517e-06, "loss": 6.2068023681640625, "step": 74330 }, { "epoch": 0.14735, "grad_norm": 7.714255332946777, "learning_rate": 1.2962626262626263e-06, "loss": 6.217417144775391, "step": 74335 }, { "epoch": 0.1474, "grad_norm": 4.751977443695068, "learning_rate": 1.2960101010101012e-06, "loss": 6.255190658569336, "step": 74340 }, { "epoch": 0.14745, "grad_norm": 7.427833080291748, "learning_rate": 1.2957575757575758e-06, "loss": 6.262841796875, "step": 74345 }, { "epoch": 0.1475, "grad_norm": 9.20028305053711, "learning_rate": 1.2955050505050506e-06, "loss": 6.186980819702148, "step": 74350 }, { "epoch": 0.14755, "grad_norm": 5.387282371520996, "learning_rate": 1.2952525252525253e-06, "loss": 6.241214370727539, "step": 74355 }, { "epoch": 0.1476, "grad_norm": 4.229618549346924, "learning_rate": 1.295e-06, "loss": 6.2285308837890625, "step": 74360 }, { "epoch": 0.14765, "grad_norm": 6.258855819702148, "learning_rate": 1.2947474747474747e-06, "loss": 6.202434158325195, "step": 74365 }, { "epoch": 0.1477, "grad_norm": 2.904984951019287, "learning_rate": 1.2944949494949496e-06, "loss": 6.244039916992188, "step": 74370 }, { "epoch": 0.14775, "grad_norm": 5.980227470397949, "learning_rate": 1.2942424242424242e-06, "loss": 6.184892272949218, "step": 74375 }, { "epoch": 0.1478, "grad_norm": 6.676224708557129, "learning_rate": 1.293989898989899e-06, "loss": 6.237839126586914, "step": 74380 }, { "epoch": 0.14785, "grad_norm": 6.899206161499023, "learning_rate": 1.2937373737373737e-06, "loss": 6.2760498046875, "step": 74385 }, { "epoch": 0.1479, "grad_norm": 5.673489093780518, "learning_rate": 1.2934848484848487e-06, "loss": 6.2575233459472654, "step": 74390 }, { "epoch": 0.14795, "grad_norm": 3.8072524070739746, "learning_rate": 1.2932323232323231e-06, "loss": 6.181300354003906, "step": 74395 }, { "epoch": 0.148, "grad_norm": 6.405351638793945, "learning_rate": 1.2929797979797982e-06, "loss": 6.247571182250977, "step": 74400 }, { "epoch": 0.14805, "grad_norm": 4.62186861038208, "learning_rate": 1.2927272727272728e-06, "loss": 6.246406173706054, "step": 74405 }, { "epoch": 0.1481, "grad_norm": 5.682870864868164, "learning_rate": 1.2924747474747477e-06, "loss": 6.309776306152344, "step": 74410 }, { "epoch": 0.14815, "grad_norm": 26.960039138793945, "learning_rate": 1.2922222222222223e-06, "loss": 6.625910949707031, "step": 74415 }, { "epoch": 0.1482, "grad_norm": 8.070480346679688, "learning_rate": 1.2919696969696971e-06, "loss": 6.239234924316406, "step": 74420 }, { "epoch": 0.14825, "grad_norm": 3.950868606567383, "learning_rate": 1.2917171717171718e-06, "loss": 6.244194412231446, "step": 74425 }, { "epoch": 0.1483, "grad_norm": 6.946744918823242, "learning_rate": 1.2914646464646466e-06, "loss": 6.248828506469726, "step": 74430 }, { "epoch": 0.14835, "grad_norm": 7.807821750640869, "learning_rate": 1.2912121212121212e-06, "loss": 6.195366668701172, "step": 74435 }, { "epoch": 0.1484, "grad_norm": 3.9510111808776855, "learning_rate": 1.290959595959596e-06, "loss": 6.226460266113281, "step": 74440 }, { "epoch": 0.14845, "grad_norm": 5.523468494415283, "learning_rate": 1.2907070707070707e-06, "loss": 6.26825180053711, "step": 74445 }, { "epoch": 0.1485, "grad_norm": 5.2713117599487305, "learning_rate": 1.2904545454545456e-06, "loss": 6.210486602783203, "step": 74450 }, { "epoch": 0.14855, "grad_norm": 7.53066873550415, "learning_rate": 1.2902020202020202e-06, "loss": 6.207552337646485, "step": 74455 }, { "epoch": 0.1486, "grad_norm": 4.097397804260254, "learning_rate": 1.289949494949495e-06, "loss": 6.211825561523438, "step": 74460 }, { "epoch": 0.14865, "grad_norm": 9.648951530456543, "learning_rate": 1.2896969696969697e-06, "loss": 6.214399337768555, "step": 74465 }, { "epoch": 0.1487, "grad_norm": 5.74350118637085, "learning_rate": 1.2894444444444445e-06, "loss": 6.246177673339844, "step": 74470 }, { "epoch": 0.14875, "grad_norm": 7.364799976348877, "learning_rate": 1.2891919191919191e-06, "loss": 6.161995697021484, "step": 74475 }, { "epoch": 0.1488, "grad_norm": 5.476169109344482, "learning_rate": 1.288939393939394e-06, "loss": 6.2545623779296875, "step": 74480 }, { "epoch": 0.14885, "grad_norm": 10.190523147583008, "learning_rate": 1.288686868686869e-06, "loss": 6.278118515014649, "step": 74485 }, { "epoch": 0.1489, "grad_norm": 3.4314637184143066, "learning_rate": 1.2884343434343434e-06, "loss": 6.2302490234375, "step": 74490 }, { "epoch": 0.14895, "grad_norm": 5.03079080581665, "learning_rate": 1.2881818181818185e-06, "loss": 6.2854057312011715, "step": 74495 }, { "epoch": 0.149, "grad_norm": 5.025050640106201, "learning_rate": 1.2879292929292931e-06, "loss": 6.273344039916992, "step": 74500 }, { "epoch": 0.14905, "grad_norm": 7.986606121063232, "learning_rate": 1.287676767676768e-06, "loss": 6.249858093261719, "step": 74505 }, { "epoch": 0.1491, "grad_norm": 6.2098236083984375, "learning_rate": 1.2874242424242426e-06, "loss": 6.422475433349609, "step": 74510 }, { "epoch": 0.14915, "grad_norm": 11.08008098602295, "learning_rate": 1.2871717171717174e-06, "loss": 6.289669036865234, "step": 74515 }, { "epoch": 0.1492, "grad_norm": 5.182950019836426, "learning_rate": 1.286919191919192e-06, "loss": 6.2461200714111325, "step": 74520 }, { "epoch": 0.14925, "grad_norm": 8.169476509094238, "learning_rate": 1.286666666666667e-06, "loss": 6.2734527587890625, "step": 74525 }, { "epoch": 0.1493, "grad_norm": 6.361490726470947, "learning_rate": 1.2864141414141415e-06, "loss": 6.264346313476563, "step": 74530 }, { "epoch": 0.14935, "grad_norm": 5.887649059295654, "learning_rate": 1.2861616161616164e-06, "loss": 6.23408088684082, "step": 74535 }, { "epoch": 0.1494, "grad_norm": 6.784523010253906, "learning_rate": 1.285909090909091e-06, "loss": 6.242623901367187, "step": 74540 }, { "epoch": 0.14945, "grad_norm": 6.6938300132751465, "learning_rate": 1.2856565656565659e-06, "loss": 6.173602294921875, "step": 74545 }, { "epoch": 0.1495, "grad_norm": 4.31292200088501, "learning_rate": 1.2854040404040405e-06, "loss": 6.234366607666016, "step": 74550 }, { "epoch": 0.14955, "grad_norm": 6.607089519500732, "learning_rate": 1.2851515151515153e-06, "loss": 6.227011871337891, "step": 74555 }, { "epoch": 0.1496, "grad_norm": 4.630942344665527, "learning_rate": 1.28489898989899e-06, "loss": 6.222422027587891, "step": 74560 }, { "epoch": 0.14965, "grad_norm": 7.1147284507751465, "learning_rate": 1.2846464646464648e-06, "loss": 6.2253883361816404, "step": 74565 }, { "epoch": 0.1497, "grad_norm": 4.46334981918335, "learning_rate": 1.2843939393939394e-06, "loss": 6.207757568359375, "step": 74570 }, { "epoch": 0.14975, "grad_norm": 3.3613367080688477, "learning_rate": 1.2841414141414143e-06, "loss": 6.242367935180664, "step": 74575 }, { "epoch": 0.1498, "grad_norm": 30.29077911376953, "learning_rate": 1.283888888888889e-06, "loss": 5.836056518554687, "step": 74580 }, { "epoch": 0.14985, "grad_norm": 4.033308029174805, "learning_rate": 1.2836363636363637e-06, "loss": 6.113925170898438, "step": 74585 }, { "epoch": 0.1499, "grad_norm": 7.289294242858887, "learning_rate": 1.2833838383838384e-06, "loss": 6.26716423034668, "step": 74590 }, { "epoch": 0.14995, "grad_norm": 4.874955177307129, "learning_rate": 1.2831313131313134e-06, "loss": 6.289202117919922, "step": 74595 }, { "epoch": 0.15, "grad_norm": 7.75226354598999, "learning_rate": 1.2828787878787878e-06, "loss": 6.218993759155273, "step": 74600 }, { "epoch": 0.15005, "grad_norm": 6.137247085571289, "learning_rate": 1.282626262626263e-06, "loss": 6.358692932128906, "step": 74605 }, { "epoch": 0.1501, "grad_norm": 7.086844444274902, "learning_rate": 1.2823737373737375e-06, "loss": 6.284283065795899, "step": 74610 }, { "epoch": 0.15015, "grad_norm": 3.559983015060425, "learning_rate": 1.2821212121212124e-06, "loss": 6.376381683349609, "step": 74615 }, { "epoch": 0.1502, "grad_norm": 6.99554967880249, "learning_rate": 1.281868686868687e-06, "loss": 6.225765991210937, "step": 74620 }, { "epoch": 0.15025, "grad_norm": 5.8353681564331055, "learning_rate": 1.2816161616161618e-06, "loss": 6.219741439819336, "step": 74625 }, { "epoch": 0.1503, "grad_norm": 17.080291748046875, "learning_rate": 1.2813636363636365e-06, "loss": 6.307320022583008, "step": 74630 }, { "epoch": 0.15035, "grad_norm": 5.472774028778076, "learning_rate": 1.2811111111111113e-06, "loss": 6.199253082275391, "step": 74635 }, { "epoch": 0.1504, "grad_norm": 17.022594451904297, "learning_rate": 1.280858585858586e-06, "loss": 6.385768890380859, "step": 74640 }, { "epoch": 0.15045, "grad_norm": 25.243450164794922, "learning_rate": 1.2806060606060608e-06, "loss": 6.245241928100586, "step": 74645 }, { "epoch": 0.1505, "grad_norm": 9.694743156433105, "learning_rate": 1.2803535353535354e-06, "loss": 6.262911987304688, "step": 74650 }, { "epoch": 0.15055, "grad_norm": 6.3824262619018555, "learning_rate": 1.2801010101010103e-06, "loss": 6.297955703735352, "step": 74655 }, { "epoch": 0.1506, "grad_norm": 10.676584243774414, "learning_rate": 1.2798484848484849e-06, "loss": 6.253322601318359, "step": 74660 }, { "epoch": 0.15065, "grad_norm": 8.847744941711426, "learning_rate": 1.2795959595959597e-06, "loss": 6.187360763549805, "step": 74665 }, { "epoch": 0.1507, "grad_norm": 4.388944625854492, "learning_rate": 1.2793434343434344e-06, "loss": 6.3659912109375, "step": 74670 }, { "epoch": 0.15075, "grad_norm": 6.938973426818848, "learning_rate": 1.2790909090909092e-06, "loss": 6.243964385986328, "step": 74675 }, { "epoch": 0.1508, "grad_norm": 4.024846076965332, "learning_rate": 1.2788383838383838e-06, "loss": 6.244544219970703, "step": 74680 }, { "epoch": 0.15085, "grad_norm": 4.780511856079102, "learning_rate": 1.2785858585858587e-06, "loss": 6.243486022949218, "step": 74685 }, { "epoch": 0.1509, "grad_norm": 4.625631332397461, "learning_rate": 1.2783333333333333e-06, "loss": 6.230573272705078, "step": 74690 }, { "epoch": 0.15095, "grad_norm": 10.096856117248535, "learning_rate": 1.2780808080808084e-06, "loss": 6.414005279541016, "step": 74695 }, { "epoch": 0.151, "grad_norm": 11.864567756652832, "learning_rate": 1.2778282828282828e-06, "loss": 6.22706298828125, "step": 74700 }, { "epoch": 0.15105, "grad_norm": 5.36466121673584, "learning_rate": 1.2775757575757578e-06, "loss": 6.189449691772461, "step": 74705 }, { "epoch": 0.1511, "grad_norm": 8.950313568115234, "learning_rate": 1.2773232323232322e-06, "loss": 6.156805419921875, "step": 74710 }, { "epoch": 0.15115, "grad_norm": 9.387650489807129, "learning_rate": 1.2770707070707073e-06, "loss": 6.24068603515625, "step": 74715 }, { "epoch": 0.1512, "grad_norm": 6.565792560577393, "learning_rate": 1.276818181818182e-06, "loss": 6.226278686523438, "step": 74720 }, { "epoch": 0.15125, "grad_norm": 5.9832634925842285, "learning_rate": 1.2765656565656568e-06, "loss": 6.271955871582032, "step": 74725 }, { "epoch": 0.1513, "grad_norm": 5.024960994720459, "learning_rate": 1.2763131313131314e-06, "loss": 6.253292846679687, "step": 74730 }, { "epoch": 0.15135, "grad_norm": 5.508500576019287, "learning_rate": 1.2760606060606062e-06, "loss": 6.219696426391602, "step": 74735 }, { "epoch": 0.1514, "grad_norm": 7.980085849761963, "learning_rate": 1.2758080808080809e-06, "loss": 6.1957275390625, "step": 74740 }, { "epoch": 0.15145, "grad_norm": 3.7758705615997314, "learning_rate": 1.2755555555555557e-06, "loss": 6.26617317199707, "step": 74745 }, { "epoch": 0.1515, "grad_norm": 7.0287699699401855, "learning_rate": 1.2753030303030303e-06, "loss": 6.212626266479492, "step": 74750 }, { "epoch": 0.15155, "grad_norm": 6.7096662521362305, "learning_rate": 1.2750505050505052e-06, "loss": 6.28082046508789, "step": 74755 }, { "epoch": 0.1516, "grad_norm": 5.706987380981445, "learning_rate": 1.2747979797979798e-06, "loss": 6.254047775268555, "step": 74760 }, { "epoch": 0.15165, "grad_norm": 7.194350242614746, "learning_rate": 1.2745454545454547e-06, "loss": 6.345429992675781, "step": 74765 }, { "epoch": 0.1517, "grad_norm": 6.433564186096191, "learning_rate": 1.2742929292929293e-06, "loss": 6.255047607421875, "step": 74770 }, { "epoch": 0.15175, "grad_norm": 8.580489158630371, "learning_rate": 1.2740404040404041e-06, "loss": 6.2576904296875, "step": 74775 }, { "epoch": 0.1518, "grad_norm": 5.910364151000977, "learning_rate": 1.2737878787878788e-06, "loss": 6.226593399047852, "step": 74780 }, { "epoch": 0.15185, "grad_norm": 7.927070617675781, "learning_rate": 1.2735353535353536e-06, "loss": 6.295431518554688, "step": 74785 }, { "epoch": 0.1519, "grad_norm": 4.653357982635498, "learning_rate": 1.2732828282828282e-06, "loss": 6.242361831665039, "step": 74790 }, { "epoch": 0.15195, "grad_norm": 6.908787250518799, "learning_rate": 1.273030303030303e-06, "loss": 6.239910507202149, "step": 74795 }, { "epoch": 0.152, "grad_norm": 10.292915344238281, "learning_rate": 1.2727777777777777e-06, "loss": 6.23939208984375, "step": 74800 }, { "epoch": 0.15205, "grad_norm": 8.111412048339844, "learning_rate": 1.2725252525252528e-06, "loss": 6.225950622558594, "step": 74805 }, { "epoch": 0.1521, "grad_norm": 19.816299438476562, "learning_rate": 1.2722727272727272e-06, "loss": 6.511268615722656, "step": 74810 }, { "epoch": 0.15215, "grad_norm": 14.500628471374512, "learning_rate": 1.2720202020202022e-06, "loss": 6.217486190795898, "step": 74815 }, { "epoch": 0.1522, "grad_norm": 6.350764274597168, "learning_rate": 1.2717676767676769e-06, "loss": 6.250115203857422, "step": 74820 }, { "epoch": 0.15225, "grad_norm": 5.852832794189453, "learning_rate": 1.2715151515151517e-06, "loss": 6.237650680541992, "step": 74825 }, { "epoch": 0.1523, "grad_norm": 7.506080627441406, "learning_rate": 1.2712626262626263e-06, "loss": 6.232112121582031, "step": 74830 }, { "epoch": 0.15235, "grad_norm": 5.394845485687256, "learning_rate": 1.2710101010101012e-06, "loss": 6.256331253051758, "step": 74835 }, { "epoch": 0.1524, "grad_norm": 6.1077656745910645, "learning_rate": 1.2707575757575758e-06, "loss": 6.499471282958984, "step": 74840 }, { "epoch": 0.15245, "grad_norm": 4.592828750610352, "learning_rate": 1.2705050505050507e-06, "loss": 6.232889938354492, "step": 74845 }, { "epoch": 0.1525, "grad_norm": 8.370278358459473, "learning_rate": 1.2702525252525253e-06, "loss": 6.243658065795898, "step": 74850 }, { "epoch": 0.15255, "grad_norm": 4.292181968688965, "learning_rate": 1.2700000000000001e-06, "loss": 6.23565444946289, "step": 74855 }, { "epoch": 0.1526, "grad_norm": 7.669984817504883, "learning_rate": 1.2697474747474748e-06, "loss": 6.241493225097656, "step": 74860 }, { "epoch": 0.15265, "grad_norm": 4.792873859405518, "learning_rate": 1.2694949494949496e-06, "loss": 6.224595260620117, "step": 74865 }, { "epoch": 0.1527, "grad_norm": 4.767852306365967, "learning_rate": 1.2692424242424242e-06, "loss": 6.189799499511719, "step": 74870 }, { "epoch": 0.15275, "grad_norm": 5.572659015655518, "learning_rate": 1.268989898989899e-06, "loss": 6.2597393035888675, "step": 74875 }, { "epoch": 0.1528, "grad_norm": 5.424083709716797, "learning_rate": 1.2687373737373737e-06, "loss": 6.2391925811767575, "step": 74880 }, { "epoch": 0.15285, "grad_norm": 6.843721389770508, "learning_rate": 1.2684848484848485e-06, "loss": 6.241059494018555, "step": 74885 }, { "epoch": 0.1529, "grad_norm": 9.97791576385498, "learning_rate": 1.2682323232323232e-06, "loss": 6.252522659301758, "step": 74890 }, { "epoch": 0.15295, "grad_norm": 6.367930889129639, "learning_rate": 1.267979797979798e-06, "loss": 6.247344970703125, "step": 74895 }, { "epoch": 0.153, "grad_norm": 6.479358673095703, "learning_rate": 1.2677272727272726e-06, "loss": 6.256669998168945, "step": 74900 }, { "epoch": 0.15305, "grad_norm": 5.146621227264404, "learning_rate": 1.2674747474747475e-06, "loss": 6.206320953369141, "step": 74905 }, { "epoch": 0.1531, "grad_norm": 4.834164142608643, "learning_rate": 1.2672222222222225e-06, "loss": 6.250431823730469, "step": 74910 }, { "epoch": 0.15315, "grad_norm": 6.928816795349121, "learning_rate": 1.2669696969696972e-06, "loss": 6.274456405639649, "step": 74915 }, { "epoch": 0.1532, "grad_norm": 3.4220941066741943, "learning_rate": 1.266717171717172e-06, "loss": 6.2398529052734375, "step": 74920 }, { "epoch": 0.15325, "grad_norm": 4.344491958618164, "learning_rate": 1.2664646464646466e-06, "loss": 6.229390716552734, "step": 74925 }, { "epoch": 0.1533, "grad_norm": 4.2248406410217285, "learning_rate": 1.2662121212121215e-06, "loss": 6.234172058105469, "step": 74930 }, { "epoch": 0.15335, "grad_norm": 6.012477874755859, "learning_rate": 1.2659595959595961e-06, "loss": 6.2517860412597654, "step": 74935 }, { "epoch": 0.1534, "grad_norm": 7.300714492797852, "learning_rate": 1.265707070707071e-06, "loss": 6.251566314697266, "step": 74940 }, { "epoch": 0.15345, "grad_norm": 7.4501872062683105, "learning_rate": 1.2654545454545456e-06, "loss": 6.290587615966797, "step": 74945 }, { "epoch": 0.1535, "grad_norm": 4.589632511138916, "learning_rate": 1.2652020202020204e-06, "loss": 6.190826416015625, "step": 74950 }, { "epoch": 0.15355, "grad_norm": 4.946479320526123, "learning_rate": 1.264949494949495e-06, "loss": 6.236514663696289, "step": 74955 }, { "epoch": 0.1536, "grad_norm": 5.142756938934326, "learning_rate": 1.26469696969697e-06, "loss": 6.2325996398925785, "step": 74960 }, { "epoch": 0.15365, "grad_norm": 4.485238552093506, "learning_rate": 1.2644444444444445e-06, "loss": 6.239675903320313, "step": 74965 }, { "epoch": 0.1537, "grad_norm": 4.692289352416992, "learning_rate": 1.2641919191919194e-06, "loss": 6.25774040222168, "step": 74970 }, { "epoch": 0.15375, "grad_norm": 6.025549411773682, "learning_rate": 1.263939393939394e-06, "loss": 6.225696563720703, "step": 74975 }, { "epoch": 0.1538, "grad_norm": 3.6170618534088135, "learning_rate": 1.2636868686868688e-06, "loss": 6.316873168945312, "step": 74980 }, { "epoch": 0.15385, "grad_norm": 3.4421112537384033, "learning_rate": 1.2634343434343435e-06, "loss": 6.224048233032226, "step": 74985 }, { "epoch": 0.1539, "grad_norm": 4.818371772766113, "learning_rate": 1.2631818181818183e-06, "loss": 6.249916839599609, "step": 74990 }, { "epoch": 0.15395, "grad_norm": 7.8996806144714355, "learning_rate": 1.262929292929293e-06, "loss": 6.234967422485352, "step": 74995 }, { "epoch": 0.154, "grad_norm": 12.915905952453613, "learning_rate": 1.2626767676767678e-06, "loss": 6.253430557250977, "step": 75000 }, { "epoch": 0.15405, "grad_norm": 3.7189345359802246, "learning_rate": 1.2624242424242424e-06, "loss": 6.248134613037109, "step": 75005 }, { "epoch": 0.1541, "grad_norm": 7.279516220092773, "learning_rate": 1.2621717171717175e-06, "loss": 6.223016357421875, "step": 75010 }, { "epoch": 0.15415, "grad_norm": 3.941697835922241, "learning_rate": 1.2619191919191919e-06, "loss": 6.216687393188477, "step": 75015 }, { "epoch": 0.1542, "grad_norm": 6.274088382720947, "learning_rate": 1.261666666666667e-06, "loss": 6.2390399932861325, "step": 75020 }, { "epoch": 0.15425, "grad_norm": 26.748149871826172, "learning_rate": 1.2614141414141416e-06, "loss": 6.416621398925781, "step": 75025 }, { "epoch": 0.1543, "grad_norm": 6.6277289390563965, "learning_rate": 1.2611616161616164e-06, "loss": 6.308944320678711, "step": 75030 }, { "epoch": 0.15435, "grad_norm": 9.954639434814453, "learning_rate": 1.260909090909091e-06, "loss": 6.2475227355957035, "step": 75035 }, { "epoch": 0.1544, "grad_norm": 8.239768028259277, "learning_rate": 1.2606565656565659e-06, "loss": 6.241039657592774, "step": 75040 }, { "epoch": 0.15445, "grad_norm": 7.1581220626831055, "learning_rate": 1.2604040404040405e-06, "loss": 6.144541931152344, "step": 75045 }, { "epoch": 0.1545, "grad_norm": 4.033621788024902, "learning_rate": 1.2601515151515154e-06, "loss": 6.237398529052735, "step": 75050 }, { "epoch": 0.15455, "grad_norm": 5.034806728363037, "learning_rate": 1.25989898989899e-06, "loss": 6.21448974609375, "step": 75055 }, { "epoch": 0.1546, "grad_norm": 4.100320339202881, "learning_rate": 1.2596464646464648e-06, "loss": 6.4805137634277346, "step": 75060 }, { "epoch": 0.15465, "grad_norm": 6.154018402099609, "learning_rate": 1.2593939393939395e-06, "loss": 6.233946990966797, "step": 75065 }, { "epoch": 0.1547, "grad_norm": 5.0249223709106445, "learning_rate": 1.2591414141414143e-06, "loss": 6.200268173217774, "step": 75070 }, { "epoch": 0.15475, "grad_norm": 6.929437160491943, "learning_rate": 1.258888888888889e-06, "loss": 6.344448852539062, "step": 75075 }, { "epoch": 0.1548, "grad_norm": 4.010641098022461, "learning_rate": 1.2586363636363638e-06, "loss": 6.2711738586425785, "step": 75080 }, { "epoch": 0.15485, "grad_norm": 5.071040153503418, "learning_rate": 1.2583838383838384e-06, "loss": 6.312607955932617, "step": 75085 }, { "epoch": 0.1549, "grad_norm": 5.151876449584961, "learning_rate": 1.2581313131313132e-06, "loss": 6.20647087097168, "step": 75090 }, { "epoch": 0.15495, "grad_norm": 7.823906898498535, "learning_rate": 1.2578787878787879e-06, "loss": 6.183932495117188, "step": 75095 }, { "epoch": 0.155, "grad_norm": 14.30170726776123, "learning_rate": 1.2576262626262627e-06, "loss": 6.331748199462891, "step": 75100 }, { "epoch": 0.15505, "grad_norm": 8.86341667175293, "learning_rate": 1.2573737373737373e-06, "loss": 6.24580192565918, "step": 75105 }, { "epoch": 0.1551, "grad_norm": 5.205596923828125, "learning_rate": 1.2571212121212124e-06, "loss": 6.241753005981446, "step": 75110 }, { "epoch": 0.15515, "grad_norm": 6.331380367279053, "learning_rate": 1.2568686868686868e-06, "loss": 6.252550506591797, "step": 75115 }, { "epoch": 0.1552, "grad_norm": 3.7262752056121826, "learning_rate": 1.2566161616161619e-06, "loss": 6.266852569580078, "step": 75120 }, { "epoch": 0.15525, "grad_norm": 4.395182132720947, "learning_rate": 1.2563636363636365e-06, "loss": 6.216812515258789, "step": 75125 }, { "epoch": 0.1553, "grad_norm": 13.03410530090332, "learning_rate": 1.2561111111111113e-06, "loss": 6.362335968017578, "step": 75130 }, { "epoch": 0.15535, "grad_norm": 5.9682745933532715, "learning_rate": 1.255858585858586e-06, "loss": 6.251123046875, "step": 75135 }, { "epoch": 0.1554, "grad_norm": 4.440389156341553, "learning_rate": 1.2556060606060608e-06, "loss": 6.259073638916016, "step": 75140 }, { "epoch": 0.15545, "grad_norm": 7.720395565032959, "learning_rate": 1.2553535353535354e-06, "loss": 6.225209426879883, "step": 75145 }, { "epoch": 0.1555, "grad_norm": 5.1239237785339355, "learning_rate": 1.2551010101010103e-06, "loss": 6.322224426269531, "step": 75150 }, { "epoch": 0.15555, "grad_norm": 5.256804466247559, "learning_rate": 1.254848484848485e-06, "loss": 6.242914199829102, "step": 75155 }, { "epoch": 0.1556, "grad_norm": 3.724416732788086, "learning_rate": 1.2545959595959598e-06, "loss": 6.20997314453125, "step": 75160 }, { "epoch": 0.15565, "grad_norm": 4.241456031799316, "learning_rate": 1.2543434343434344e-06, "loss": 6.258034515380859, "step": 75165 }, { "epoch": 0.1557, "grad_norm": 5.17391300201416, "learning_rate": 1.2540909090909092e-06, "loss": 6.255418014526367, "step": 75170 }, { "epoch": 0.15575, "grad_norm": 7.823019981384277, "learning_rate": 1.2538383838383839e-06, "loss": 6.248666000366211, "step": 75175 }, { "epoch": 0.1558, "grad_norm": 7.084842205047607, "learning_rate": 1.2535858585858587e-06, "loss": 6.230002212524414, "step": 75180 }, { "epoch": 0.15585, "grad_norm": 4.352348804473877, "learning_rate": 1.2533333333333333e-06, "loss": 6.217587661743164, "step": 75185 }, { "epoch": 0.1559, "grad_norm": 11.321301460266113, "learning_rate": 1.2530808080808082e-06, "loss": 6.258500671386718, "step": 75190 }, { "epoch": 0.15595, "grad_norm": 5.917731761932373, "learning_rate": 1.2528282828282828e-06, "loss": 6.182617950439453, "step": 75195 }, { "epoch": 0.156, "grad_norm": 5.34178352355957, "learning_rate": 1.2525757575757576e-06, "loss": 6.193733215332031, "step": 75200 }, { "epoch": 0.15605, "grad_norm": 3.9569003582000732, "learning_rate": 1.2523232323232323e-06, "loss": 6.219124603271484, "step": 75205 }, { "epoch": 0.1561, "grad_norm": 6.134114742279053, "learning_rate": 1.2520707070707071e-06, "loss": 6.235329437255859, "step": 75210 }, { "epoch": 0.15615, "grad_norm": 3.5082311630249023, "learning_rate": 1.2518181818181817e-06, "loss": 6.25323257446289, "step": 75215 }, { "epoch": 0.1562, "grad_norm": 5.687015533447266, "learning_rate": 1.2515656565656568e-06, "loss": 6.238995361328125, "step": 75220 }, { "epoch": 0.15625, "grad_norm": 4.618686199188232, "learning_rate": 1.2513131313131312e-06, "loss": 6.228168487548828, "step": 75225 }, { "epoch": 0.1563, "grad_norm": 5.97711181640625, "learning_rate": 1.2510606060606063e-06, "loss": 6.242482757568359, "step": 75230 }, { "epoch": 0.15635, "grad_norm": 5.377412796020508, "learning_rate": 1.250808080808081e-06, "loss": 6.239083099365234, "step": 75235 }, { "epoch": 0.1564, "grad_norm": 6.805724620819092, "learning_rate": 1.2505555555555557e-06, "loss": 6.207192993164062, "step": 75240 }, { "epoch": 0.15645, "grad_norm": 3.9508800506591797, "learning_rate": 1.2503030303030304e-06, "loss": 6.23319091796875, "step": 75245 }, { "epoch": 0.1565, "grad_norm": 4.93553352355957, "learning_rate": 1.2500505050505052e-06, "loss": 6.215253067016602, "step": 75250 }, { "epoch": 0.15655, "grad_norm": 7.821374893188477, "learning_rate": 1.2497979797979798e-06, "loss": 6.2839813232421875, "step": 75255 }, { "epoch": 0.1566, "grad_norm": 5.350115776062012, "learning_rate": 1.2495454545454547e-06, "loss": 6.235452270507812, "step": 75260 }, { "epoch": 0.15665, "grad_norm": 5.858108043670654, "learning_rate": 1.2492929292929293e-06, "loss": 6.271259307861328, "step": 75265 }, { "epoch": 0.1567, "grad_norm": 7.262343406677246, "learning_rate": 1.2490404040404042e-06, "loss": 6.220214080810547, "step": 75270 }, { "epoch": 0.15675, "grad_norm": 4.674764156341553, "learning_rate": 1.248787878787879e-06, "loss": 6.229149627685547, "step": 75275 }, { "epoch": 0.1568, "grad_norm": 4.135441780090332, "learning_rate": 1.2485353535353536e-06, "loss": 6.249447250366211, "step": 75280 }, { "epoch": 0.15685, "grad_norm": 6.471755504608154, "learning_rate": 1.2482828282828285e-06, "loss": 6.234306716918946, "step": 75285 }, { "epoch": 0.1569, "grad_norm": 5.9411139488220215, "learning_rate": 1.2480303030303031e-06, "loss": 6.235723876953125, "step": 75290 }, { "epoch": 0.15695, "grad_norm": 4.427250385284424, "learning_rate": 1.247777777777778e-06, "loss": 6.2395984649658205, "step": 75295 }, { "epoch": 0.157, "grad_norm": 8.796195030212402, "learning_rate": 1.2475252525252526e-06, "loss": 6.330513000488281, "step": 75300 }, { "epoch": 0.15705, "grad_norm": 3.294217348098755, "learning_rate": 1.2472727272727274e-06, "loss": 6.230996322631836, "step": 75305 }, { "epoch": 0.1571, "grad_norm": 6.297418117523193, "learning_rate": 1.247020202020202e-06, "loss": 6.24590835571289, "step": 75310 }, { "epoch": 0.15715, "grad_norm": 7.116874694824219, "learning_rate": 1.2467676767676769e-06, "loss": 6.273774719238281, "step": 75315 }, { "epoch": 0.1572, "grad_norm": 6.6390604972839355, "learning_rate": 1.2465151515151515e-06, "loss": 6.283566665649414, "step": 75320 }, { "epoch": 0.15725, "grad_norm": 6.156033515930176, "learning_rate": 1.2462626262626264e-06, "loss": 6.202934265136719, "step": 75325 }, { "epoch": 0.1573, "grad_norm": 7.8700032234191895, "learning_rate": 1.2460101010101012e-06, "loss": 6.2407279968261715, "step": 75330 }, { "epoch": 0.15735, "grad_norm": 5.160825729370117, "learning_rate": 1.2457575757575758e-06, "loss": 6.370338821411133, "step": 75335 }, { "epoch": 0.1574, "grad_norm": 4.943718433380127, "learning_rate": 1.2455050505050507e-06, "loss": 6.240385055541992, "step": 75340 }, { "epoch": 0.15745, "grad_norm": 5.646724224090576, "learning_rate": 1.2452525252525253e-06, "loss": 6.261104583740234, "step": 75345 }, { "epoch": 0.1575, "grad_norm": 4.028902053833008, "learning_rate": 1.2450000000000002e-06, "loss": 6.274745941162109, "step": 75350 }, { "epoch": 0.15755, "grad_norm": 4.139111518859863, "learning_rate": 1.2447474747474748e-06, "loss": 6.199967956542968, "step": 75355 }, { "epoch": 0.1576, "grad_norm": 11.311842918395996, "learning_rate": 1.2444949494949496e-06, "loss": 6.29931640625, "step": 75360 }, { "epoch": 0.15765, "grad_norm": 4.872209072113037, "learning_rate": 1.2442424242424243e-06, "loss": 6.20692024230957, "step": 75365 }, { "epoch": 0.1577, "grad_norm": 4.883072853088379, "learning_rate": 1.243989898989899e-06, "loss": 6.2300464630126955, "step": 75370 }, { "epoch": 0.15775, "grad_norm": 5.619246006011963, "learning_rate": 1.2437373737373737e-06, "loss": 6.26843376159668, "step": 75375 }, { "epoch": 0.1578, "grad_norm": 5.810131072998047, "learning_rate": 1.2434848484848486e-06, "loss": 6.241746520996093, "step": 75380 }, { "epoch": 0.15785, "grad_norm": 3.557060718536377, "learning_rate": 1.2432323232323234e-06, "loss": 6.259650039672851, "step": 75385 }, { "epoch": 0.1579, "grad_norm": 4.624415874481201, "learning_rate": 1.242979797979798e-06, "loss": 6.2498424530029295, "step": 75390 }, { "epoch": 0.15795, "grad_norm": 3.6395368576049805, "learning_rate": 1.2427272727272729e-06, "loss": 6.293640899658203, "step": 75395 }, { "epoch": 0.158, "grad_norm": 5.772538661956787, "learning_rate": 1.2424747474747475e-06, "loss": 6.2561485290527346, "step": 75400 }, { "epoch": 0.15805, "grad_norm": 5.7110595703125, "learning_rate": 1.2422222222222224e-06, "loss": 6.252012634277344, "step": 75405 }, { "epoch": 0.1581, "grad_norm": 6.649394989013672, "learning_rate": 1.241969696969697e-06, "loss": 6.20811653137207, "step": 75410 }, { "epoch": 0.15815, "grad_norm": 4.594396591186523, "learning_rate": 1.2417171717171718e-06, "loss": 6.228930282592773, "step": 75415 }, { "epoch": 0.1582, "grad_norm": 6.914183616638184, "learning_rate": 1.2414646464646465e-06, "loss": 6.260160064697265, "step": 75420 }, { "epoch": 0.15825, "grad_norm": 3.7778615951538086, "learning_rate": 1.2412121212121213e-06, "loss": 6.213476181030273, "step": 75425 }, { "epoch": 0.1583, "grad_norm": 10.568780899047852, "learning_rate": 1.240959595959596e-06, "loss": 6.3797870635986325, "step": 75430 }, { "epoch": 0.15835, "grad_norm": 11.425265312194824, "learning_rate": 1.2407070707070708e-06, "loss": 6.238298797607422, "step": 75435 }, { "epoch": 0.1584, "grad_norm": 8.472023010253906, "learning_rate": 1.2404545454545456e-06, "loss": 6.24048080444336, "step": 75440 }, { "epoch": 0.15845, "grad_norm": 4.381643295288086, "learning_rate": 1.2402020202020202e-06, "loss": 6.25023193359375, "step": 75445 }, { "epoch": 0.1585, "grad_norm": 16.300722122192383, "learning_rate": 1.239949494949495e-06, "loss": 6.254934692382813, "step": 75450 }, { "epoch": 0.15855, "grad_norm": 9.558006286621094, "learning_rate": 1.2396969696969697e-06, "loss": 6.260816192626953, "step": 75455 }, { "epoch": 0.1586, "grad_norm": 12.093559265136719, "learning_rate": 1.2394444444444446e-06, "loss": 6.308939743041992, "step": 75460 }, { "epoch": 0.15865, "grad_norm": 4.8466339111328125, "learning_rate": 1.2391919191919192e-06, "loss": 6.202334594726563, "step": 75465 }, { "epoch": 0.1587, "grad_norm": 5.133594512939453, "learning_rate": 1.238939393939394e-06, "loss": 6.237519454956055, "step": 75470 }, { "epoch": 0.15875, "grad_norm": 16.94546890258789, "learning_rate": 1.2386868686868687e-06, "loss": 6.280222702026367, "step": 75475 }, { "epoch": 0.1588, "grad_norm": 5.964803695678711, "learning_rate": 1.2384343434343435e-06, "loss": 6.259177398681641, "step": 75480 }, { "epoch": 0.15885, "grad_norm": 6.220442771911621, "learning_rate": 1.2381818181818183e-06, "loss": 6.221588897705078, "step": 75485 }, { "epoch": 0.1589, "grad_norm": 8.469977378845215, "learning_rate": 1.237929292929293e-06, "loss": 6.223221969604492, "step": 75490 }, { "epoch": 0.15895, "grad_norm": 4.975347995758057, "learning_rate": 1.2376767676767678e-06, "loss": 6.267742538452149, "step": 75495 }, { "epoch": 0.159, "grad_norm": 5.534213066101074, "learning_rate": 1.2374242424242424e-06, "loss": 6.253791809082031, "step": 75500 }, { "epoch": 0.15905, "grad_norm": 4.86947774887085, "learning_rate": 1.2371717171717173e-06, "loss": 6.224187850952148, "step": 75505 }, { "epoch": 0.1591, "grad_norm": 4.8647613525390625, "learning_rate": 1.236919191919192e-06, "loss": 6.248656463623047, "step": 75510 }, { "epoch": 0.15915, "grad_norm": 4.665765285491943, "learning_rate": 1.2366666666666668e-06, "loss": 6.249026489257813, "step": 75515 }, { "epoch": 0.1592, "grad_norm": 6.106312274932861, "learning_rate": 1.2364141414141414e-06, "loss": 6.208988952636719, "step": 75520 }, { "epoch": 0.15925, "grad_norm": 4.368300437927246, "learning_rate": 1.2361616161616162e-06, "loss": 6.200542449951172, "step": 75525 }, { "epoch": 0.1593, "grad_norm": 6.332970142364502, "learning_rate": 1.2359090909090909e-06, "loss": 6.1809131622314455, "step": 75530 }, { "epoch": 0.15935, "grad_norm": 5.8734130859375, "learning_rate": 1.2356565656565657e-06, "loss": 6.215363693237305, "step": 75535 }, { "epoch": 0.1594, "grad_norm": 9.698541641235352, "learning_rate": 1.2354040404040405e-06, "loss": 6.241461944580078, "step": 75540 }, { "epoch": 0.15945, "grad_norm": 4.7526655197143555, "learning_rate": 1.2351515151515152e-06, "loss": 6.16010856628418, "step": 75545 }, { "epoch": 0.1595, "grad_norm": 9.28890609741211, "learning_rate": 1.23489898989899e-06, "loss": 6.265209197998047, "step": 75550 }, { "epoch": 0.15955, "grad_norm": 5.223910808563232, "learning_rate": 1.2346464646464649e-06, "loss": 6.209152603149414, "step": 75555 }, { "epoch": 0.1596, "grad_norm": 6.150954246520996, "learning_rate": 1.2343939393939395e-06, "loss": 6.231588745117188, "step": 75560 }, { "epoch": 0.15965, "grad_norm": 7.107594013214111, "learning_rate": 1.2341414141414143e-06, "loss": 6.218454742431641, "step": 75565 }, { "epoch": 0.1597, "grad_norm": 4.206393241882324, "learning_rate": 1.233888888888889e-06, "loss": 6.276154327392578, "step": 75570 }, { "epoch": 0.15975, "grad_norm": 7.003544807434082, "learning_rate": 1.2336363636363638e-06, "loss": 6.240028381347656, "step": 75575 }, { "epoch": 0.1598, "grad_norm": 5.300146102905273, "learning_rate": 1.2333838383838386e-06, "loss": 6.2784576416015625, "step": 75580 }, { "epoch": 0.15985, "grad_norm": 9.84379768371582, "learning_rate": 1.2331313131313133e-06, "loss": 6.3011116027832035, "step": 75585 }, { "epoch": 0.1599, "grad_norm": 4.619462013244629, "learning_rate": 1.2328787878787881e-06, "loss": 6.242176818847656, "step": 75590 }, { "epoch": 0.15995, "grad_norm": 21.64238929748535, "learning_rate": 1.2326262626262627e-06, "loss": 6.534483337402344, "step": 75595 }, { "epoch": 0.16, "grad_norm": 34.567569732666016, "learning_rate": 1.2323737373737376e-06, "loss": 6.214157485961914, "step": 75600 }, { "epoch": 0.16005, "grad_norm": 4.574034690856934, "learning_rate": 1.2321212121212122e-06, "loss": 6.2265571594238285, "step": 75605 }, { "epoch": 0.1601, "grad_norm": 5.816732406616211, "learning_rate": 1.231868686868687e-06, "loss": 6.252611923217773, "step": 75610 }, { "epoch": 0.16015, "grad_norm": 5.197467803955078, "learning_rate": 1.2316161616161617e-06, "loss": 6.204755783081055, "step": 75615 }, { "epoch": 0.1602, "grad_norm": 8.0764741897583, "learning_rate": 1.2313636363636365e-06, "loss": 6.151495361328125, "step": 75620 }, { "epoch": 0.16025, "grad_norm": 4.342720031738281, "learning_rate": 1.2311111111111112e-06, "loss": 6.2311866760253904, "step": 75625 }, { "epoch": 0.1603, "grad_norm": 10.79515266418457, "learning_rate": 1.230858585858586e-06, "loss": 6.209868240356445, "step": 75630 }, { "epoch": 0.16035, "grad_norm": 3.531212329864502, "learning_rate": 1.2306060606060608e-06, "loss": 6.184176254272461, "step": 75635 }, { "epoch": 0.1604, "grad_norm": 5.105765342712402, "learning_rate": 1.2303535353535355e-06, "loss": 6.2170257568359375, "step": 75640 }, { "epoch": 0.16045, "grad_norm": 3.314887523651123, "learning_rate": 1.2301010101010103e-06, "loss": 6.207878494262696, "step": 75645 }, { "epoch": 0.1605, "grad_norm": 4.504886627197266, "learning_rate": 1.229848484848485e-06, "loss": 6.236207580566406, "step": 75650 }, { "epoch": 0.16055, "grad_norm": 7.077663898468018, "learning_rate": 1.2295959595959598e-06, "loss": 6.218576049804687, "step": 75655 }, { "epoch": 0.1606, "grad_norm": 5.024385929107666, "learning_rate": 1.2293434343434344e-06, "loss": 6.226555252075196, "step": 75660 }, { "epoch": 0.16065, "grad_norm": 2.9647343158721924, "learning_rate": 1.2290909090909093e-06, "loss": 6.278777313232422, "step": 75665 }, { "epoch": 0.1607, "grad_norm": 8.324501991271973, "learning_rate": 1.2288383838383839e-06, "loss": 6.353342437744141, "step": 75670 }, { "epoch": 0.16075, "grad_norm": 3.7377865314483643, "learning_rate": 1.2285858585858587e-06, "loss": 6.188668060302734, "step": 75675 }, { "epoch": 0.1608, "grad_norm": 5.959318161010742, "learning_rate": 1.2283333333333334e-06, "loss": 6.213443756103516, "step": 75680 }, { "epoch": 0.16085, "grad_norm": 6.945539474487305, "learning_rate": 1.2280808080808082e-06, "loss": 6.34049072265625, "step": 75685 }, { "epoch": 0.1609, "grad_norm": 5.659703254699707, "learning_rate": 1.227828282828283e-06, "loss": 6.219380950927734, "step": 75690 }, { "epoch": 0.16095, "grad_norm": 5.713063716888428, "learning_rate": 1.2275757575757577e-06, "loss": 6.241762161254883, "step": 75695 }, { "epoch": 0.161, "grad_norm": 6.939210891723633, "learning_rate": 1.2273232323232325e-06, "loss": 6.228340911865234, "step": 75700 }, { "epoch": 0.16105, "grad_norm": 8.688322067260742, "learning_rate": 1.2270707070707071e-06, "loss": 6.220810317993164, "step": 75705 }, { "epoch": 0.1611, "grad_norm": 6.199443340301514, "learning_rate": 1.226818181818182e-06, "loss": 6.224296569824219, "step": 75710 }, { "epoch": 0.16115, "grad_norm": 6.372860431671143, "learning_rate": 1.2265656565656566e-06, "loss": 6.326523971557617, "step": 75715 }, { "epoch": 0.1612, "grad_norm": 6.2038421630859375, "learning_rate": 1.2263131313131315e-06, "loss": 6.237415313720703, "step": 75720 }, { "epoch": 0.16125, "grad_norm": 3.666438102722168, "learning_rate": 1.226060606060606e-06, "loss": 6.2288330078125, "step": 75725 }, { "epoch": 0.1613, "grad_norm": 4.810835361480713, "learning_rate": 1.225808080808081e-06, "loss": 6.2471153259277346, "step": 75730 }, { "epoch": 0.16135, "grad_norm": 4.745327472686768, "learning_rate": 1.2255555555555556e-06, "loss": 6.207925796508789, "step": 75735 }, { "epoch": 0.1614, "grad_norm": 6.035501480102539, "learning_rate": 1.2253030303030304e-06, "loss": 6.243111801147461, "step": 75740 }, { "epoch": 0.16145, "grad_norm": 5.04922342300415, "learning_rate": 1.2250505050505052e-06, "loss": 6.239986801147461, "step": 75745 }, { "epoch": 0.1615, "grad_norm": 6.267117500305176, "learning_rate": 1.2247979797979799e-06, "loss": 6.227577209472656, "step": 75750 }, { "epoch": 0.16155, "grad_norm": 5.060290813446045, "learning_rate": 1.2245454545454547e-06, "loss": 6.2534538269042965, "step": 75755 }, { "epoch": 0.1616, "grad_norm": 18.71457290649414, "learning_rate": 1.2242929292929293e-06, "loss": 6.332928466796875, "step": 75760 }, { "epoch": 0.16165, "grad_norm": 5.184547424316406, "learning_rate": 1.2240404040404042e-06, "loss": 6.307101058959961, "step": 75765 }, { "epoch": 0.1617, "grad_norm": 6.999083995819092, "learning_rate": 1.2237878787878788e-06, "loss": 6.183119201660157, "step": 75770 }, { "epoch": 0.16175, "grad_norm": 6.046182155609131, "learning_rate": 1.2235353535353537e-06, "loss": 6.2597404479980465, "step": 75775 }, { "epoch": 0.1618, "grad_norm": 5.406234264373779, "learning_rate": 1.2232828282828283e-06, "loss": 6.27038345336914, "step": 75780 }, { "epoch": 0.16185, "grad_norm": 5.902841567993164, "learning_rate": 1.2230303030303031e-06, "loss": 6.188706970214843, "step": 75785 }, { "epoch": 0.1619, "grad_norm": 11.259592056274414, "learning_rate": 1.2227777777777778e-06, "loss": 6.3180595397949215, "step": 75790 }, { "epoch": 0.16195, "grad_norm": 6.817213535308838, "learning_rate": 1.2225252525252526e-06, "loss": 6.230636215209961, "step": 75795 }, { "epoch": 0.162, "grad_norm": 4.135263919830322, "learning_rate": 1.2222727272727274e-06, "loss": 6.284294891357422, "step": 75800 }, { "epoch": 0.16205, "grad_norm": 3.5220115184783936, "learning_rate": 1.222020202020202e-06, "loss": 6.226583480834961, "step": 75805 }, { "epoch": 0.1621, "grad_norm": 5.2956862449646, "learning_rate": 1.221767676767677e-06, "loss": 6.245886611938476, "step": 75810 }, { "epoch": 0.16215, "grad_norm": 6.754693984985352, "learning_rate": 1.2215151515151516e-06, "loss": 6.224676132202148, "step": 75815 }, { "epoch": 0.1622, "grad_norm": 4.568908214569092, "learning_rate": 1.2212626262626264e-06, "loss": 6.194143676757813, "step": 75820 }, { "epoch": 0.16225, "grad_norm": 5.778822422027588, "learning_rate": 1.221010101010101e-06, "loss": 6.176654434204101, "step": 75825 }, { "epoch": 0.1623, "grad_norm": 5.099380016326904, "learning_rate": 1.2207575757575759e-06, "loss": 6.236127090454102, "step": 75830 }, { "epoch": 0.16235, "grad_norm": 4.714526653289795, "learning_rate": 1.2205050505050505e-06, "loss": 6.243751525878906, "step": 75835 }, { "epoch": 0.1624, "grad_norm": 5.741721153259277, "learning_rate": 1.2202525252525253e-06, "loss": 6.1920616149902346, "step": 75840 }, { "epoch": 0.16245, "grad_norm": 16.085254669189453, "learning_rate": 1.2200000000000002e-06, "loss": 6.427898406982422, "step": 75845 }, { "epoch": 0.1625, "grad_norm": 5.73773717880249, "learning_rate": 1.2197474747474748e-06, "loss": 6.2380859375, "step": 75850 }, { "epoch": 0.16255, "grad_norm": 7.83579683303833, "learning_rate": 1.2194949494949496e-06, "loss": 6.244665145874023, "step": 75855 }, { "epoch": 0.1626, "grad_norm": 21.109460830688477, "learning_rate": 1.2192424242424243e-06, "loss": 6.249995422363281, "step": 75860 }, { "epoch": 0.16265, "grad_norm": 6.65559196472168, "learning_rate": 1.2189898989898991e-06, "loss": 6.2860759735107425, "step": 75865 }, { "epoch": 0.1627, "grad_norm": 4.089727878570557, "learning_rate": 1.2187373737373738e-06, "loss": 6.2269126892089846, "step": 75870 }, { "epoch": 0.16275, "grad_norm": 6.1708550453186035, "learning_rate": 1.2184848484848486e-06, "loss": 6.2152565002441404, "step": 75875 }, { "epoch": 0.1628, "grad_norm": 4.102262496948242, "learning_rate": 1.2182323232323232e-06, "loss": 6.2602794647216795, "step": 75880 }, { "epoch": 0.16285, "grad_norm": 8.303638458251953, "learning_rate": 1.217979797979798e-06, "loss": 6.354499435424804, "step": 75885 }, { "epoch": 0.1629, "grad_norm": 6.004108428955078, "learning_rate": 1.2177272727272727e-06, "loss": 6.288310623168945, "step": 75890 }, { "epoch": 0.16295, "grad_norm": 5.171757221221924, "learning_rate": 1.2174747474747475e-06, "loss": 6.1981040954589846, "step": 75895 }, { "epoch": 0.163, "grad_norm": 17.573617935180664, "learning_rate": 1.2172222222222224e-06, "loss": 6.3806205749511715, "step": 75900 }, { "epoch": 0.16305, "grad_norm": 5.227954387664795, "learning_rate": 1.216969696969697e-06, "loss": 6.214947509765625, "step": 75905 }, { "epoch": 0.1631, "grad_norm": 4.339335918426514, "learning_rate": 1.2167171717171719e-06, "loss": 6.292198181152344, "step": 75910 }, { "epoch": 0.16315, "grad_norm": 7.022764682769775, "learning_rate": 1.2164646464646465e-06, "loss": 6.246692657470703, "step": 75915 }, { "epoch": 0.1632, "grad_norm": 7.414557933807373, "learning_rate": 1.2162121212121213e-06, "loss": 6.249967193603515, "step": 75920 }, { "epoch": 0.16325, "grad_norm": 5.16168212890625, "learning_rate": 1.215959595959596e-06, "loss": 6.212811660766602, "step": 75925 }, { "epoch": 0.1633, "grad_norm": 7.420509338378906, "learning_rate": 1.2157070707070708e-06, "loss": 6.153585815429688, "step": 75930 }, { "epoch": 0.16335, "grad_norm": 9.485564231872559, "learning_rate": 1.2154545454545454e-06, "loss": 6.284300231933594, "step": 75935 }, { "epoch": 0.1634, "grad_norm": 5.3045244216918945, "learning_rate": 1.2152020202020203e-06, "loss": 6.2874092102050785, "step": 75940 }, { "epoch": 0.16345, "grad_norm": 4.440707683563232, "learning_rate": 1.214949494949495e-06, "loss": 6.234646987915039, "step": 75945 }, { "epoch": 0.1635, "grad_norm": 5.709773063659668, "learning_rate": 1.2146969696969697e-06, "loss": 6.208148574829101, "step": 75950 }, { "epoch": 0.16355, "grad_norm": 14.392989158630371, "learning_rate": 1.2144444444444446e-06, "loss": 6.255464172363281, "step": 75955 }, { "epoch": 0.1636, "grad_norm": 3.3113880157470703, "learning_rate": 1.2141919191919192e-06, "loss": 6.257605743408203, "step": 75960 }, { "epoch": 0.16365, "grad_norm": 4.209521770477295, "learning_rate": 1.213939393939394e-06, "loss": 6.233726501464844, "step": 75965 }, { "epoch": 0.1637, "grad_norm": 7.040538787841797, "learning_rate": 1.2136868686868687e-06, "loss": 6.497767639160156, "step": 75970 }, { "epoch": 0.16375, "grad_norm": 7.275787830352783, "learning_rate": 1.2134343434343435e-06, "loss": 6.241238403320312, "step": 75975 }, { "epoch": 0.1638, "grad_norm": 5.314579486846924, "learning_rate": 1.2131818181818184e-06, "loss": 6.237210845947265, "step": 75980 }, { "epoch": 0.16385, "grad_norm": 5.956376552581787, "learning_rate": 1.212929292929293e-06, "loss": 6.2295997619628904, "step": 75985 }, { "epoch": 0.1639, "grad_norm": 5.258864879608154, "learning_rate": 1.2126767676767678e-06, "loss": 6.2080738067626955, "step": 75990 }, { "epoch": 0.16395, "grad_norm": 5.162792682647705, "learning_rate": 1.2124242424242427e-06, "loss": 6.243452072143555, "step": 75995 }, { "epoch": 0.164, "grad_norm": 7.038429260253906, "learning_rate": 1.2121717171717173e-06, "loss": 6.297326278686524, "step": 76000 }, { "epoch": 0.16405, "grad_norm": 6.952173233032227, "learning_rate": 1.2119191919191922e-06, "loss": 6.199296569824218, "step": 76005 }, { "epoch": 0.1641, "grad_norm": 6.2600417137146, "learning_rate": 1.2116666666666668e-06, "loss": 6.231483840942383, "step": 76010 }, { "epoch": 0.16415, "grad_norm": 25.13971710205078, "learning_rate": 1.2114141414141416e-06, "loss": 6.3986766815185545, "step": 76015 }, { "epoch": 0.1642, "grad_norm": 5.963688850402832, "learning_rate": 1.2111616161616163e-06, "loss": 6.2738292694091795, "step": 76020 }, { "epoch": 0.16425, "grad_norm": 5.791244029998779, "learning_rate": 1.210909090909091e-06, "loss": 6.245751953125, "step": 76025 }, { "epoch": 0.1643, "grad_norm": 5.052264213562012, "learning_rate": 1.2106565656565657e-06, "loss": 6.186773681640625, "step": 76030 }, { "epoch": 0.16435, "grad_norm": 6.530547142028809, "learning_rate": 1.2104040404040406e-06, "loss": 6.1919807434082035, "step": 76035 }, { "epoch": 0.1644, "grad_norm": 21.545804977416992, "learning_rate": 1.2101515151515152e-06, "loss": 6.571453094482422, "step": 76040 }, { "epoch": 0.16445, "grad_norm": 6.939615249633789, "learning_rate": 1.20989898989899e-06, "loss": 6.2610626220703125, "step": 76045 }, { "epoch": 0.1645, "grad_norm": 10.541024208068848, "learning_rate": 1.2096464646464649e-06, "loss": 6.377774810791015, "step": 76050 }, { "epoch": 0.16455, "grad_norm": 18.062576293945312, "learning_rate": 1.2093939393939395e-06, "loss": 6.223469161987305, "step": 76055 }, { "epoch": 0.1646, "grad_norm": 4.13922119140625, "learning_rate": 1.2091414141414144e-06, "loss": 6.208523559570312, "step": 76060 }, { "epoch": 0.16465, "grad_norm": 11.57026195526123, "learning_rate": 1.208888888888889e-06, "loss": 6.371451568603516, "step": 76065 }, { "epoch": 0.1647, "grad_norm": 6.082940101623535, "learning_rate": 1.2086363636363638e-06, "loss": 6.247600555419922, "step": 76070 }, { "epoch": 0.16475, "grad_norm": 6.433895111083984, "learning_rate": 1.2083838383838385e-06, "loss": 6.3032268524169925, "step": 76075 }, { "epoch": 0.1648, "grad_norm": 6.996480464935303, "learning_rate": 1.2081313131313133e-06, "loss": 6.218159103393555, "step": 76080 }, { "epoch": 0.16485, "grad_norm": 9.57541275024414, "learning_rate": 1.207878787878788e-06, "loss": 6.219036102294922, "step": 76085 }, { "epoch": 0.1649, "grad_norm": 5.245524883270264, "learning_rate": 1.2076262626262628e-06, "loss": 6.216851043701172, "step": 76090 }, { "epoch": 0.16495, "grad_norm": 5.830862045288086, "learning_rate": 1.2073737373737374e-06, "loss": 6.202058410644531, "step": 76095 }, { "epoch": 0.165, "grad_norm": 6.034179210662842, "learning_rate": 1.2071212121212122e-06, "loss": 6.207172775268555, "step": 76100 }, { "epoch": 0.16505, "grad_norm": 5.014467239379883, "learning_rate": 1.206868686868687e-06, "loss": 6.268409347534179, "step": 76105 }, { "epoch": 0.1651, "grad_norm": 6.297733306884766, "learning_rate": 1.2066161616161617e-06, "loss": 6.203469085693359, "step": 76110 }, { "epoch": 0.16515, "grad_norm": 15.821343421936035, "learning_rate": 1.2063636363636366e-06, "loss": 6.2624259948730465, "step": 76115 }, { "epoch": 0.1652, "grad_norm": 7.744136333465576, "learning_rate": 1.2061111111111112e-06, "loss": 6.236600112915039, "step": 76120 }, { "epoch": 0.16525, "grad_norm": 6.48860502243042, "learning_rate": 1.205858585858586e-06, "loss": 6.262626266479492, "step": 76125 }, { "epoch": 0.1653, "grad_norm": 8.276586532592773, "learning_rate": 1.2056060606060607e-06, "loss": 6.246722412109375, "step": 76130 }, { "epoch": 0.16535, "grad_norm": 9.970539093017578, "learning_rate": 1.2053535353535355e-06, "loss": 6.221017456054687, "step": 76135 }, { "epoch": 0.1654, "grad_norm": 6.051907539367676, "learning_rate": 1.2051010101010101e-06, "loss": 6.203687286376953, "step": 76140 }, { "epoch": 0.16545, "grad_norm": 5.3925652503967285, "learning_rate": 1.204848484848485e-06, "loss": 6.244287109375, "step": 76145 }, { "epoch": 0.1655, "grad_norm": 7.337137222290039, "learning_rate": 1.2045959595959596e-06, "loss": 6.175638198852539, "step": 76150 }, { "epoch": 0.16555, "grad_norm": 5.947591781616211, "learning_rate": 1.2043434343434344e-06, "loss": 6.234101867675781, "step": 76155 }, { "epoch": 0.1656, "grad_norm": 12.142390251159668, "learning_rate": 1.2040909090909093e-06, "loss": 6.264815139770508, "step": 76160 }, { "epoch": 0.16565, "grad_norm": 5.5345139503479, "learning_rate": 1.203838383838384e-06, "loss": 6.248451232910156, "step": 76165 }, { "epoch": 0.1657, "grad_norm": 5.947760581970215, "learning_rate": 1.2035858585858588e-06, "loss": 6.379930877685547, "step": 76170 }, { "epoch": 0.16575, "grad_norm": 8.020411491394043, "learning_rate": 1.2033333333333334e-06, "loss": 6.211217880249023, "step": 76175 }, { "epoch": 0.1658, "grad_norm": 5.970745086669922, "learning_rate": 1.2030808080808082e-06, "loss": 6.253725051879883, "step": 76180 }, { "epoch": 0.16585, "grad_norm": 9.724008560180664, "learning_rate": 1.2028282828282829e-06, "loss": 6.2963825225830075, "step": 76185 }, { "epoch": 0.1659, "grad_norm": 6.96956729888916, "learning_rate": 1.2025757575757577e-06, "loss": 6.22994384765625, "step": 76190 }, { "epoch": 0.16595, "grad_norm": 10.932524681091309, "learning_rate": 1.2023232323232323e-06, "loss": 6.239582824707031, "step": 76195 }, { "epoch": 0.166, "grad_norm": 5.800020217895508, "learning_rate": 1.2020707070707072e-06, "loss": 6.260361862182617, "step": 76200 }, { "epoch": 0.16605, "grad_norm": 14.612014770507812, "learning_rate": 1.201818181818182e-06, "loss": 6.257643508911133, "step": 76205 }, { "epoch": 0.1661, "grad_norm": 4.958982467651367, "learning_rate": 1.2015656565656566e-06, "loss": 6.233529663085937, "step": 76210 }, { "epoch": 0.16615, "grad_norm": 13.169255256652832, "learning_rate": 1.2013131313131315e-06, "loss": 6.308760070800782, "step": 76215 }, { "epoch": 0.1662, "grad_norm": 5.949330806732178, "learning_rate": 1.2010606060606061e-06, "loss": 6.219233703613281, "step": 76220 }, { "epoch": 0.16625, "grad_norm": 4.841281890869141, "learning_rate": 1.200808080808081e-06, "loss": 6.220286178588867, "step": 76225 }, { "epoch": 0.1663, "grad_norm": 5.23755407333374, "learning_rate": 1.2005555555555556e-06, "loss": 6.18877067565918, "step": 76230 }, { "epoch": 0.16635, "grad_norm": 8.817583084106445, "learning_rate": 1.2003030303030304e-06, "loss": 6.231939315795898, "step": 76235 }, { "epoch": 0.1664, "grad_norm": 5.3878631591796875, "learning_rate": 1.200050505050505e-06, "loss": 6.252736282348633, "step": 76240 }, { "epoch": 0.16645, "grad_norm": 4.349515438079834, "learning_rate": 1.19979797979798e-06, "loss": 6.2482139587402346, "step": 76245 }, { "epoch": 0.1665, "grad_norm": 8.36505126953125, "learning_rate": 1.1995454545454545e-06, "loss": 6.291033554077148, "step": 76250 }, { "epoch": 0.16655, "grad_norm": 6.003873348236084, "learning_rate": 1.1992929292929294e-06, "loss": 6.246479415893555, "step": 76255 }, { "epoch": 0.1666, "grad_norm": 8.506495475769043, "learning_rate": 1.1990404040404042e-06, "loss": 6.259516906738281, "step": 76260 }, { "epoch": 0.16665, "grad_norm": 12.457499504089355, "learning_rate": 1.1987878787878788e-06, "loss": 6.272107315063477, "step": 76265 }, { "epoch": 0.1667, "grad_norm": 5.8012871742248535, "learning_rate": 1.1985353535353537e-06, "loss": 6.201502990722656, "step": 76270 }, { "epoch": 0.16675, "grad_norm": 5.135997772216797, "learning_rate": 1.1982828282828283e-06, "loss": 6.238021469116211, "step": 76275 }, { "epoch": 0.1668, "grad_norm": 25.800588607788086, "learning_rate": 1.1980303030303032e-06, "loss": 6.51892318725586, "step": 76280 }, { "epoch": 0.16685, "grad_norm": 6.832269191741943, "learning_rate": 1.1977777777777778e-06, "loss": 6.230094909667969, "step": 76285 }, { "epoch": 0.1669, "grad_norm": 7.044804573059082, "learning_rate": 1.1975252525252526e-06, "loss": 6.2927001953125, "step": 76290 }, { "epoch": 0.16695, "grad_norm": 5.469159126281738, "learning_rate": 1.1972727272727273e-06, "loss": 6.198987579345703, "step": 76295 }, { "epoch": 0.167, "grad_norm": 5.4324564933776855, "learning_rate": 1.197020202020202e-06, "loss": 6.2489501953125, "step": 76300 }, { "epoch": 0.16705, "grad_norm": 28.08213233947754, "learning_rate": 1.1967676767676767e-06, "loss": 6.2841541290283205, "step": 76305 }, { "epoch": 0.1671, "grad_norm": 4.397571086883545, "learning_rate": 1.1965151515151516e-06, "loss": 6.224627685546875, "step": 76310 }, { "epoch": 0.16715, "grad_norm": 4.2860026359558105, "learning_rate": 1.1962626262626264e-06, "loss": 6.250120544433594, "step": 76315 }, { "epoch": 0.1672, "grad_norm": 5.710602283477783, "learning_rate": 1.196010101010101e-06, "loss": 6.195205307006836, "step": 76320 }, { "epoch": 0.16725, "grad_norm": 14.549528121948242, "learning_rate": 1.1957575757575759e-06, "loss": 6.277894973754883, "step": 76325 }, { "epoch": 0.1673, "grad_norm": 6.0468645095825195, "learning_rate": 1.1955050505050505e-06, "loss": 6.366886138916016, "step": 76330 }, { "epoch": 0.16735, "grad_norm": 10.532063484191895, "learning_rate": 1.1952525252525254e-06, "loss": 6.2227325439453125, "step": 76335 }, { "epoch": 0.1674, "grad_norm": 22.290809631347656, "learning_rate": 1.195e-06, "loss": 6.279832077026367, "step": 76340 }, { "epoch": 0.16745, "grad_norm": 5.825928211212158, "learning_rate": 1.1947474747474748e-06, "loss": 6.246927642822266, "step": 76345 }, { "epoch": 0.1675, "grad_norm": 4.07521915435791, "learning_rate": 1.1944949494949495e-06, "loss": 6.216015243530274, "step": 76350 }, { "epoch": 0.16755, "grad_norm": 9.26401138305664, "learning_rate": 1.1942424242424243e-06, "loss": 6.287821197509766, "step": 76355 }, { "epoch": 0.1676, "grad_norm": 11.581019401550293, "learning_rate": 1.193989898989899e-06, "loss": 6.231879043579101, "step": 76360 }, { "epoch": 0.16765, "grad_norm": 12.815722465515137, "learning_rate": 1.1937373737373738e-06, "loss": 6.084149932861328, "step": 76365 }, { "epoch": 0.1677, "grad_norm": 4.553281307220459, "learning_rate": 1.1934848484848486e-06, "loss": 6.297398376464844, "step": 76370 }, { "epoch": 0.16775, "grad_norm": 8.311574935913086, "learning_rate": 1.1932323232323233e-06, "loss": 6.216790771484375, "step": 76375 }, { "epoch": 0.1678, "grad_norm": 8.547215461730957, "learning_rate": 1.192979797979798e-06, "loss": 6.226951980590821, "step": 76380 }, { "epoch": 0.16785, "grad_norm": 5.446493625640869, "learning_rate": 1.1927272727272727e-06, "loss": 6.248435974121094, "step": 76385 }, { "epoch": 0.1679, "grad_norm": 5.994565010070801, "learning_rate": 1.1924747474747476e-06, "loss": 6.284016799926758, "step": 76390 }, { "epoch": 0.16795, "grad_norm": 8.918450355529785, "learning_rate": 1.1922222222222222e-06, "loss": 6.243867111206055, "step": 76395 }, { "epoch": 0.168, "grad_norm": 7.715800762176514, "learning_rate": 1.191969696969697e-06, "loss": 6.2330268859863285, "step": 76400 }, { "epoch": 0.16805, "grad_norm": 7.681880950927734, "learning_rate": 1.1917171717171719e-06, "loss": 6.245293426513672, "step": 76405 }, { "epoch": 0.1681, "grad_norm": 6.321917533874512, "learning_rate": 1.1914646464646467e-06, "loss": 6.348214340209961, "step": 76410 }, { "epoch": 0.16815, "grad_norm": 4.439499855041504, "learning_rate": 1.1912121212121214e-06, "loss": 6.272935485839843, "step": 76415 }, { "epoch": 0.1682, "grad_norm": 7.301117420196533, "learning_rate": 1.1909595959595962e-06, "loss": 6.231130981445313, "step": 76420 }, { "epoch": 0.16825, "grad_norm": 5.0750508308410645, "learning_rate": 1.1907070707070708e-06, "loss": 6.222492218017578, "step": 76425 }, { "epoch": 0.1683, "grad_norm": 6.881435871124268, "learning_rate": 1.1904545454545457e-06, "loss": 6.303968811035157, "step": 76430 }, { "epoch": 0.16835, "grad_norm": 10.987617492675781, "learning_rate": 1.1902020202020203e-06, "loss": 6.2170154571533205, "step": 76435 }, { "epoch": 0.1684, "grad_norm": 3.8712854385375977, "learning_rate": 1.1899494949494951e-06, "loss": 6.250242614746094, "step": 76440 }, { "epoch": 0.16845, "grad_norm": 4.825461387634277, "learning_rate": 1.1896969696969698e-06, "loss": 6.272250747680664, "step": 76445 }, { "epoch": 0.1685, "grad_norm": 6.567546367645264, "learning_rate": 1.1894444444444446e-06, "loss": 6.228774642944336, "step": 76450 }, { "epoch": 0.16855, "grad_norm": 4.955061912536621, "learning_rate": 1.1891919191919192e-06, "loss": 6.215934371948242, "step": 76455 }, { "epoch": 0.1686, "grad_norm": 8.251968383789062, "learning_rate": 1.188939393939394e-06, "loss": 6.231398391723633, "step": 76460 }, { "epoch": 0.16865, "grad_norm": 6.296323299407959, "learning_rate": 1.188686868686869e-06, "loss": 6.2507881164550785, "step": 76465 }, { "epoch": 0.1687, "grad_norm": 6.05072021484375, "learning_rate": 1.1884343434343436e-06, "loss": 6.185202789306641, "step": 76470 }, { "epoch": 0.16875, "grad_norm": 40.84956359863281, "learning_rate": 1.1881818181818184e-06, "loss": 6.212310791015625, "step": 76475 }, { "epoch": 0.1688, "grad_norm": 4.7977094650268555, "learning_rate": 1.187929292929293e-06, "loss": 6.2153068542480465, "step": 76480 }, { "epoch": 0.16885, "grad_norm": 4.49822998046875, "learning_rate": 1.1876767676767679e-06, "loss": 6.450070190429687, "step": 76485 }, { "epoch": 0.1689, "grad_norm": 5.724848747253418, "learning_rate": 1.1874242424242425e-06, "loss": 6.277370452880859, "step": 76490 }, { "epoch": 0.16895, "grad_norm": 5.26506233215332, "learning_rate": 1.1871717171717173e-06, "loss": 6.2194068908691404, "step": 76495 }, { "epoch": 0.169, "grad_norm": 6.052298545837402, "learning_rate": 1.186919191919192e-06, "loss": 6.345171356201172, "step": 76500 }, { "epoch": 0.16905, "grad_norm": 9.596378326416016, "learning_rate": 1.1866666666666668e-06, "loss": 6.236267471313477, "step": 76505 }, { "epoch": 0.1691, "grad_norm": 3.99934720993042, "learning_rate": 1.1864141414141414e-06, "loss": 6.231280517578125, "step": 76510 }, { "epoch": 0.16915, "grad_norm": 11.7384614944458, "learning_rate": 1.1861616161616163e-06, "loss": 6.232776641845703, "step": 76515 }, { "epoch": 0.1692, "grad_norm": 5.669408798217773, "learning_rate": 1.1859090909090911e-06, "loss": 6.120367813110351, "step": 76520 }, { "epoch": 0.16925, "grad_norm": 3.2827038764953613, "learning_rate": 1.1856565656565658e-06, "loss": 6.194318389892578, "step": 76525 }, { "epoch": 0.1693, "grad_norm": 8.520268440246582, "learning_rate": 1.1854040404040406e-06, "loss": 6.285604476928711, "step": 76530 }, { "epoch": 0.16935, "grad_norm": 5.127617359161377, "learning_rate": 1.1851515151515152e-06, "loss": 6.23932113647461, "step": 76535 }, { "epoch": 0.1694, "grad_norm": 12.674568176269531, "learning_rate": 1.18489898989899e-06, "loss": 6.279071426391601, "step": 76540 }, { "epoch": 0.16945, "grad_norm": 7.22168493270874, "learning_rate": 1.1846464646464647e-06, "loss": 6.272966766357422, "step": 76545 }, { "epoch": 0.1695, "grad_norm": 3.203972578048706, "learning_rate": 1.1843939393939395e-06, "loss": 6.19160270690918, "step": 76550 }, { "epoch": 0.16955, "grad_norm": 4.238882541656494, "learning_rate": 1.1841414141414142e-06, "loss": 6.225872802734375, "step": 76555 }, { "epoch": 0.1696, "grad_norm": 7.414513111114502, "learning_rate": 1.183888888888889e-06, "loss": 6.298076629638672, "step": 76560 }, { "epoch": 0.16965, "grad_norm": 4.938249111175537, "learning_rate": 1.1836363636363639e-06, "loss": 6.240488433837891, "step": 76565 }, { "epoch": 0.1697, "grad_norm": 3.616589307785034, "learning_rate": 1.1833838383838385e-06, "loss": 6.232953643798828, "step": 76570 }, { "epoch": 0.16975, "grad_norm": 4.65211820602417, "learning_rate": 1.1831313131313133e-06, "loss": 6.24490966796875, "step": 76575 }, { "epoch": 0.1698, "grad_norm": 6.107354640960693, "learning_rate": 1.182878787878788e-06, "loss": 6.231362533569336, "step": 76580 }, { "epoch": 0.16985, "grad_norm": 7.121214866638184, "learning_rate": 1.1826262626262628e-06, "loss": 6.230127334594727, "step": 76585 }, { "epoch": 0.1699, "grad_norm": 8.680512428283691, "learning_rate": 1.1823737373737374e-06, "loss": 6.2316131591796875, "step": 76590 }, { "epoch": 0.16995, "grad_norm": 4.794097900390625, "learning_rate": 1.1821212121212123e-06, "loss": 6.240266799926758, "step": 76595 }, { "epoch": 0.17, "grad_norm": 7.495255470275879, "learning_rate": 1.181868686868687e-06, "loss": 6.243625640869141, "step": 76600 }, { "epoch": 0.17005, "grad_norm": 6.412196159362793, "learning_rate": 1.1816161616161617e-06, "loss": 6.2037109375, "step": 76605 }, { "epoch": 0.1701, "grad_norm": 5.605950832366943, "learning_rate": 1.1813636363636364e-06, "loss": 6.25317153930664, "step": 76610 }, { "epoch": 0.17015, "grad_norm": 4.36215877532959, "learning_rate": 1.1811111111111112e-06, "loss": 6.252906799316406, "step": 76615 }, { "epoch": 0.1702, "grad_norm": 3.4501256942749023, "learning_rate": 1.180858585858586e-06, "loss": 6.206050872802734, "step": 76620 }, { "epoch": 0.17025, "grad_norm": 12.606247901916504, "learning_rate": 1.1806060606060607e-06, "loss": 6.738040924072266, "step": 76625 }, { "epoch": 0.1703, "grad_norm": 7.6188788414001465, "learning_rate": 1.1803535353535355e-06, "loss": 6.255937576293945, "step": 76630 }, { "epoch": 0.17035, "grad_norm": 8.408783912658691, "learning_rate": 1.1801010101010102e-06, "loss": 6.244535446166992, "step": 76635 }, { "epoch": 0.1704, "grad_norm": 8.019062042236328, "learning_rate": 1.179848484848485e-06, "loss": 6.229003143310547, "step": 76640 }, { "epoch": 0.17045, "grad_norm": 3.8503150939941406, "learning_rate": 1.1795959595959596e-06, "loss": 6.319132995605469, "step": 76645 }, { "epoch": 0.1705, "grad_norm": 5.162674903869629, "learning_rate": 1.1793434343434345e-06, "loss": 6.2445625305175785, "step": 76650 }, { "epoch": 0.17055, "grad_norm": 5.049015522003174, "learning_rate": 1.179090909090909e-06, "loss": 6.198622512817383, "step": 76655 }, { "epoch": 0.1706, "grad_norm": 31.913400650024414, "learning_rate": 1.178838383838384e-06, "loss": 6.275890731811524, "step": 76660 }, { "epoch": 0.17065, "grad_norm": 9.81215763092041, "learning_rate": 1.1785858585858586e-06, "loss": 6.231233215332031, "step": 76665 }, { "epoch": 0.1707, "grad_norm": 4.243008136749268, "learning_rate": 1.1783333333333334e-06, "loss": 6.280393981933594, "step": 76670 }, { "epoch": 0.17075, "grad_norm": 4.132779121398926, "learning_rate": 1.1780808080808083e-06, "loss": 6.244886016845703, "step": 76675 }, { "epoch": 0.1708, "grad_norm": 5.672957897186279, "learning_rate": 1.1778282828282829e-06, "loss": 6.232676696777344, "step": 76680 }, { "epoch": 0.17085, "grad_norm": 5.93626070022583, "learning_rate": 1.1775757575757577e-06, "loss": 6.20739517211914, "step": 76685 }, { "epoch": 0.1709, "grad_norm": 12.082473754882812, "learning_rate": 1.1773232323232324e-06, "loss": 6.502204895019531, "step": 76690 }, { "epoch": 0.17095, "grad_norm": 6.949464797973633, "learning_rate": 1.1770707070707072e-06, "loss": 6.3496040344238285, "step": 76695 }, { "epoch": 0.171, "grad_norm": 12.701355934143066, "learning_rate": 1.1768181818181818e-06, "loss": 6.215681457519532, "step": 76700 }, { "epoch": 0.17105, "grad_norm": 6.45787239074707, "learning_rate": 1.1765656565656567e-06, "loss": 6.254242706298828, "step": 76705 }, { "epoch": 0.1711, "grad_norm": 8.286921501159668, "learning_rate": 1.1763131313131313e-06, "loss": 6.220455932617187, "step": 76710 }, { "epoch": 0.17115, "grad_norm": 10.692482948303223, "learning_rate": 1.1760606060606061e-06, "loss": 6.195799255371094, "step": 76715 }, { "epoch": 0.1712, "grad_norm": 4.472867965698242, "learning_rate": 1.1758080808080808e-06, "loss": 6.270844650268555, "step": 76720 }, { "epoch": 0.17125, "grad_norm": 4.633749485015869, "learning_rate": 1.1755555555555556e-06, "loss": 6.478636932373047, "step": 76725 }, { "epoch": 0.1713, "grad_norm": 7.7024922370910645, "learning_rate": 1.1753030303030305e-06, "loss": 6.277113723754883, "step": 76730 }, { "epoch": 0.17135, "grad_norm": 11.417915344238281, "learning_rate": 1.175050505050505e-06, "loss": 6.338034057617188, "step": 76735 }, { "epoch": 0.1714, "grad_norm": 7.251706600189209, "learning_rate": 1.17479797979798e-06, "loss": 6.2490089416503904, "step": 76740 }, { "epoch": 0.17145, "grad_norm": 4.929230690002441, "learning_rate": 1.1745454545454546e-06, "loss": 6.24151840209961, "step": 76745 }, { "epoch": 0.1715, "grad_norm": 5.1977081298828125, "learning_rate": 1.1742929292929294e-06, "loss": 6.244397354125977, "step": 76750 }, { "epoch": 0.17155, "grad_norm": 5.573616981506348, "learning_rate": 1.174040404040404e-06, "loss": 6.222430801391601, "step": 76755 }, { "epoch": 0.1716, "grad_norm": 7.442543983459473, "learning_rate": 1.1737878787878789e-06, "loss": 6.229808807373047, "step": 76760 }, { "epoch": 0.17165, "grad_norm": 28.578575134277344, "learning_rate": 1.1735353535353535e-06, "loss": 6.238621520996094, "step": 76765 }, { "epoch": 0.1717, "grad_norm": 26.518896102905273, "learning_rate": 1.1732828282828283e-06, "loss": 6.32861213684082, "step": 76770 }, { "epoch": 0.17175, "grad_norm": 19.20452117919922, "learning_rate": 1.173030303030303e-06, "loss": 6.205448913574219, "step": 76775 }, { "epoch": 0.1718, "grad_norm": 49.408836364746094, "learning_rate": 1.1727777777777778e-06, "loss": 6.169350814819336, "step": 76780 }, { "epoch": 0.17185, "grad_norm": 7.77352237701416, "learning_rate": 1.1725252525252527e-06, "loss": 6.2123558044433596, "step": 76785 }, { "epoch": 0.1719, "grad_norm": 4.605600357055664, "learning_rate": 1.1722727272727273e-06, "loss": 6.241158294677734, "step": 76790 }, { "epoch": 0.17195, "grad_norm": 7.824213981628418, "learning_rate": 1.1720202020202021e-06, "loss": 6.258676910400391, "step": 76795 }, { "epoch": 0.172, "grad_norm": 7.159731864929199, "learning_rate": 1.1717676767676768e-06, "loss": 6.232947540283203, "step": 76800 }, { "epoch": 0.001, "grad_norm": 5.325935363769531, "learning_rate": 1.166717171717172e-06, "loss": 6.277135009765625, "step": 76900 }, { "epoch": 0.002, "grad_norm": 5.067817687988281, "learning_rate": 1.1616666666666668e-06, "loss": 6.306599731445313, "step": 77000 }, { "epoch": 0.003, "grad_norm": 7.431005477905273, "learning_rate": 1.1566161616161618e-06, "loss": 6.321148681640625, "step": 77100 }, { "epoch": 0.004, "grad_norm": 5.885525703430176, "learning_rate": 1.1515656565656567e-06, "loss": 6.304764404296875, "step": 77200 }, { "epoch": 0.005, "grad_norm": 6.6994123458862305, "learning_rate": 1.1465151515151516e-06, "loss": 6.290416259765625, "step": 77300 }, { "epoch": 0.006, "grad_norm": 3.146827220916748, "learning_rate": 1.1414646464646466e-06, "loss": 6.262575073242187, "step": 77400 }, { "epoch": 0.007, "grad_norm": 7.762009143829346, "learning_rate": 1.1364141414141415e-06, "loss": 6.272303466796875, "step": 77500 }, { "epoch": 0.008, "grad_norm": 5.467002868652344, "learning_rate": 1.1313636363636364e-06, "loss": 6.28575439453125, "step": 77600 }, { "epoch": 0.009, "grad_norm": 4.156159400939941, "learning_rate": 1.1263131313131316e-06, "loss": 6.2598809814453125, "step": 77700 }, { "epoch": 0.01, "grad_norm": 6.027379035949707, "learning_rate": 1.1212626262626263e-06, "loss": 6.280023193359375, "step": 77800 }, { "epoch": 0.011, "grad_norm": 4.681583404541016, "learning_rate": 1.1162121212121212e-06, "loss": 6.289010620117187, "step": 77900 }, { "epoch": 0.012, "grad_norm": 4.746392726898193, "learning_rate": 1.1111616161616162e-06, "loss": 6.28618896484375, "step": 78000 }, { "epoch": 0.0001, "grad_norm": 13.831293106079102, "learning_rate": 1.8e-07, "loss": 6.25738754272461, "step": 78010 }, { "epoch": 0.0002, "grad_norm": 5.434852600097656, "learning_rate": 3.8e-07, "loss": 6.229875564575195, "step": 78020 }, { "epoch": 0.0003, "grad_norm": 5.03108549118042, "learning_rate": 5.8e-07, "loss": 6.280242919921875, "step": 78030 }, { "epoch": 0.0004, "grad_norm": 3.0562851428985596, "learning_rate": 7.8e-07, "loss": 6.255479049682617, "step": 78040 }, { "epoch": 0.0005, "grad_norm": 9.858650207519531, "learning_rate": 9.8e-07, "loss": 6.231929016113281, "step": 78050 }, { "epoch": 0.0006, "grad_norm": 8.13047981262207, "learning_rate": 1.18e-06, "loss": 6.249365234375, "step": 78060 }, { "epoch": 0.0007, "grad_norm": 10.772915840148926, "learning_rate": 1.3800000000000001e-06, "loss": 6.239860153198242, "step": 78070 }, { "epoch": 0.0008, "grad_norm": 6.823290824890137, "learning_rate": 1.5800000000000003e-06, "loss": 6.212490844726562, "step": 78080 }, { "epoch": 0.0009, "grad_norm": 6.4095916748046875, "learning_rate": 1.7800000000000001e-06, "loss": 6.222204971313476, "step": 78090 }, { "epoch": 0.001, "grad_norm": 8.364444732666016, "learning_rate": 1.98e-06, "loss": 6.278558349609375, "step": 78100 }, { "epoch": 0.0011, "grad_norm": 8.836404800415039, "learning_rate": 2.1800000000000003e-06, "loss": 6.294901275634766, "step": 78110 }, { "epoch": 0.0012, "grad_norm": 9.267115592956543, "learning_rate": 2.38e-06, "loss": 6.21533203125, "step": 78120 }, { "epoch": 0.0013, "grad_norm": 16.759416580200195, "learning_rate": 2.5800000000000003e-06, "loss": 6.2719074249267575, "step": 78130 }, { "epoch": 0.0014, "grad_norm": 15.809864044189453, "learning_rate": 2.78e-06, "loss": 6.266093444824219, "step": 78140 }, { "epoch": 0.0015, "grad_norm": 7.359900951385498, "learning_rate": 2.9800000000000003e-06, "loss": 6.182052230834961, "step": 78150 }, { "epoch": 0.0016, "grad_norm": 13.689974784851074, "learning_rate": 3.1800000000000005e-06, "loss": 6.214759826660156, "step": 78160 }, { "epoch": 0.0017, "grad_norm": 9.414170265197754, "learning_rate": 3.38e-06, "loss": 6.211700439453125, "step": 78170 }, { "epoch": 0.0018, "grad_norm": 17.625696182250977, "learning_rate": 3.58e-06, "loss": 6.208622741699219, "step": 78180 }, { "epoch": 0.0019, "grad_norm": 32.35793685913086, "learning_rate": 3.7800000000000002e-06, "loss": 6.312331771850586, "step": 78190 }, { "epoch": 0.002, "grad_norm": 20.288366317749023, "learning_rate": 3.98e-06, "loss": 6.270192718505859, "step": 78200 }, { "epoch": 0.0021, "grad_norm": 41.83295440673828, "learning_rate": 4.18e-06, "loss": 6.264197921752929, "step": 78210 }, { "epoch": 0.0022, "grad_norm": 34.18258285522461, "learning_rate": 4.38e-06, "loss": 6.217342376708984, "step": 78220 }, { "epoch": 0.0023, "grad_norm": 22.293800354003906, "learning_rate": 4.58e-06, "loss": 6.176754379272461, "step": 78230 }, { "epoch": 0.0024, "grad_norm": 18.186948776245117, "learning_rate": 4.780000000000001e-06, "loss": 6.1969348907470705, "step": 78240 }, { "epoch": 0.0025, "grad_norm": 36.656288146972656, "learning_rate": 4.98e-06, "loss": 6.2761798858642575, "step": 78250 }, { "epoch": 0.0026, "grad_norm": 23.73198127746582, "learning_rate": 5.18e-06, "loss": 6.220414352416992, "step": 78260 }, { "epoch": 0.0027, "grad_norm": 32.153228759765625, "learning_rate": 5.38e-06, "loss": 6.238127136230469, "step": 78270 }, { "epoch": 0.0028, "grad_norm": 19.463003158569336, "learning_rate": 5.580000000000001e-06, "loss": 6.192630004882813, "step": 78280 }, { "epoch": 0.0029, "grad_norm": 22.468446731567383, "learning_rate": 5.78e-06, "loss": 6.315469741821289, "step": 78290 }, { "epoch": 0.003, "grad_norm": 17.94868278503418, "learning_rate": 5.98e-06, "loss": 6.3200019836425785, "step": 78300 }, { "epoch": 0.0031, "grad_norm": 13.334260940551758, "learning_rate": 6.18e-06, "loss": 6.465155029296875, "step": 78310 }, { "epoch": 0.0032, "grad_norm": 11.829285621643066, "learning_rate": 6.38e-06, "loss": 6.187746810913086, "step": 78320 }, { "epoch": 0.0033, "grad_norm": 19.58742332458496, "learning_rate": 6.58e-06, "loss": 6.174218368530274, "step": 78330 }, { "epoch": 0.0034, "grad_norm": 14.360065460205078, "learning_rate": 6.78e-06, "loss": 6.165006256103515, "step": 78340 }, { "epoch": 0.0035, "grad_norm": 19.357784271240234, "learning_rate": 6.98e-06, "loss": 6.176294326782227, "step": 78350 }, { "epoch": 0.0036, "grad_norm": 23.641145706176758, "learning_rate": 7.180000000000001e-06, "loss": 6.137590408325195, "step": 78360 }, { "epoch": 0.0037, "grad_norm": 17.24037742614746, "learning_rate": 7.3800000000000005e-06, "loss": 6.153422546386719, "step": 78370 }, { "epoch": 0.0038, "grad_norm": 18.96773338317871, "learning_rate": 7.580000000000001e-06, "loss": 6.136513137817383, "step": 78380 }, { "epoch": 0.0039, "grad_norm": 14.598422050476074, "learning_rate": 7.78e-06, "loss": 6.105740737915039, "step": 78390 }, { "epoch": 0.004, "grad_norm": 14.752056121826172, "learning_rate": 7.98e-06, "loss": 6.095166778564453, "step": 78400 }, { "epoch": 0.0041, "grad_norm": 20.05882453918457, "learning_rate": 8.18e-06, "loss": 6.087274551391602, "step": 78410 }, { "epoch": 0.0042, "grad_norm": 13.778268814086914, "learning_rate": 8.380000000000001e-06, "loss": 6.104154968261719, "step": 78420 }, { "epoch": 0.0043, "grad_norm": 16.199426651000977, "learning_rate": 8.580000000000001e-06, "loss": 6.146556854248047, "step": 78430 }, { "epoch": 0.0044, "grad_norm": 19.096878051757812, "learning_rate": 8.78e-06, "loss": 6.066572952270508, "step": 78440 }, { "epoch": 0.0045, "grad_norm": 18.036277770996094, "learning_rate": 8.98e-06, "loss": 6.07025146484375, "step": 78450 }, { "epoch": 0.0046, "grad_norm": 10.683009147644043, "learning_rate": 9.180000000000002e-06, "loss": 6.0315605163574215, "step": 78460 }, { "epoch": 0.0047, "grad_norm": 14.032135963439941, "learning_rate": 9.38e-06, "loss": 6.014869308471679, "step": 78470 }, { "epoch": 0.0048, "grad_norm": 23.035322189331055, "learning_rate": 9.58e-06, "loss": 6.018888092041015, "step": 78480 }, { "epoch": 0.0049, "grad_norm": 13.094988822937012, "learning_rate": 9.78e-06, "loss": 6.195376205444336, "step": 78490 }, { "epoch": 0.005, "grad_norm": 29.933177947998047, "learning_rate": 9.980000000000001e-06, "loss": 5.993812179565429, "step": 78500 }, { "epoch": 0.0051, "grad_norm": 18.486473083496094, "learning_rate": 1.018e-05, "loss": 5.963929748535156, "step": 78510 }, { "epoch": 0.0052, "grad_norm": 14.718317031860352, "learning_rate": 1.038e-05, "loss": 6.0145305633544925, "step": 78520 }, { "epoch": 0.0053, "grad_norm": 24.83466339111328, "learning_rate": 1.058e-05, "loss": 5.9802391052246096, "step": 78530 }, { "epoch": 0.0054, "grad_norm": 16.267728805541992, "learning_rate": 1.0780000000000002e-05, "loss": 5.929384231567383, "step": 78540 }, { "epoch": 0.0055, "grad_norm": 11.732959747314453, "learning_rate": 1.098e-05, "loss": 5.956810379028321, "step": 78550 }, { "epoch": 0.0056, "grad_norm": 18.43738555908203, "learning_rate": 1.118e-05, "loss": 5.968051910400391, "step": 78560 }, { "epoch": 0.0057, "grad_norm": 12.559771537780762, "learning_rate": 1.1380000000000001e-05, "loss": 5.905082321166992, "step": 78570 }, { "epoch": 0.0058, "grad_norm": 10.343229293823242, "learning_rate": 1.1580000000000001e-05, "loss": 5.8938743591308596, "step": 78580 }, { "epoch": 0.0059, "grad_norm": 17.656749725341797, "learning_rate": 1.178e-05, "loss": 5.901334762573242, "step": 78590 }, { "epoch": 0.006, "grad_norm": 13.118258476257324, "learning_rate": 1.198e-05, "loss": 5.881685256958008, "step": 78600 }, { "epoch": 0.0061, "grad_norm": 15.699453353881836, "learning_rate": 1.2180000000000002e-05, "loss": 5.842811203002929, "step": 78610 }, { "epoch": 0.0062, "grad_norm": 21.073495864868164, "learning_rate": 1.238e-05, "loss": 5.902508926391602, "step": 78620 }, { "epoch": 0.0063, "grad_norm": 10.76048755645752, "learning_rate": 1.258e-05, "loss": 5.860686111450195, "step": 78630 }, { "epoch": 0.0064, "grad_norm": 14.35180377960205, "learning_rate": 1.278e-05, "loss": 5.944111251831055, "step": 78640 }, { "epoch": 0.0065, "grad_norm": 11.1992826461792, "learning_rate": 1.2980000000000001e-05, "loss": 5.802405166625976, "step": 78650 }, { "epoch": 0.0066, "grad_norm": 9.87505054473877, "learning_rate": 1.3180000000000001e-05, "loss": 5.7846027374267575, "step": 78660 }, { "epoch": 0.0067, "grad_norm": 12.658112525939941, "learning_rate": 1.338e-05, "loss": 5.7684375762939455, "step": 78670 }, { "epoch": 0.0068, "grad_norm": 8.843621253967285, "learning_rate": 1.358e-05, "loss": 5.746451187133789, "step": 78680 }, { "epoch": 0.0069, "grad_norm": 12.071161270141602, "learning_rate": 1.3780000000000002e-05, "loss": 5.739550018310547, "step": 78690 }, { "epoch": 0.007, "grad_norm": 9.72649097442627, "learning_rate": 1.3980000000000002e-05, "loss": 5.704967498779297, "step": 78700 }, { "epoch": 0.0071, "grad_norm": 15.297441482543945, "learning_rate": 1.4180000000000001e-05, "loss": 5.662594223022461, "step": 78710 }, { "epoch": 0.0072, "grad_norm": 11.910374641418457, "learning_rate": 1.4380000000000001e-05, "loss": 5.727904510498047, "step": 78720 }, { "epoch": 0.0073, "grad_norm": 20.780237197875977, "learning_rate": 1.4580000000000003e-05, "loss": 5.76818962097168, "step": 78730 }, { "epoch": 0.0074, "grad_norm": 9.636930465698242, "learning_rate": 1.4779999999999999e-05, "loss": 5.669594573974609, "step": 78740 }, { "epoch": 0.0075, "grad_norm": 8.19430160522461, "learning_rate": 1.4979999999999999e-05, "loss": 5.655058288574219, "step": 78750 }, { "epoch": 0.0076, "grad_norm": 6.069611072540283, "learning_rate": 1.518e-05, "loss": 5.619614791870117, "step": 78760 }, { "epoch": 0.0077, "grad_norm": 7.102042198181152, "learning_rate": 1.538e-05, "loss": 5.658396911621094, "step": 78770 }, { "epoch": 0.0078, "grad_norm": 6.038002014160156, "learning_rate": 1.558e-05, "loss": 5.757386016845703, "step": 78780 }, { "epoch": 0.0079, "grad_norm": 9.522271156311035, "learning_rate": 1.578e-05, "loss": 5.695059204101563, "step": 78790 }, { "epoch": 0.008, "grad_norm": 7.115524768829346, "learning_rate": 1.598e-05, "loss": 5.577466583251953, "step": 78800 }, { "epoch": 0.0081, "grad_norm": 6.765868663787842, "learning_rate": 1.618e-05, "loss": 5.541515350341797, "step": 78810 }, { "epoch": 0.0082, "grad_norm": 10.673751831054688, "learning_rate": 1.6380000000000002e-05, "loss": 5.608538436889648, "step": 78820 }, { "epoch": 0.0083, "grad_norm": 10.56566047668457, "learning_rate": 1.658e-05, "loss": 5.5947223663330075, "step": 78830 }, { "epoch": 0.0084, "grad_norm": 7.5465264320373535, "learning_rate": 1.6780000000000002e-05, "loss": 5.577526473999024, "step": 78840 }, { "epoch": 0.0085, "grad_norm": 7.476527214050293, "learning_rate": 1.698e-05, "loss": 5.548984146118164, "step": 78850 }, { "epoch": 0.0086, "grad_norm": 7.920779228210449, "learning_rate": 1.718e-05, "loss": 5.509591293334961, "step": 78860 }, { "epoch": 0.0087, "grad_norm": 8.084773063659668, "learning_rate": 1.7380000000000003e-05, "loss": 5.495907592773437, "step": 78870 }, { "epoch": 0.0088, "grad_norm": 4.4232988357543945, "learning_rate": 1.758e-05, "loss": 5.467477798461914, "step": 78880 }, { "epoch": 0.0089, "grad_norm": 6.381575107574463, "learning_rate": 1.7780000000000003e-05, "loss": 5.4616954803466795, "step": 78890 }, { "epoch": 0.009, "grad_norm": 5.088339805603027, "learning_rate": 1.798e-05, "loss": 5.458164215087891, "step": 78900 }, { "epoch": 0.0091, "grad_norm": 7.627932548522949, "learning_rate": 1.818e-05, "loss": 5.469063949584961, "step": 78910 }, { "epoch": 0.0092, "grad_norm": 5.625033855438232, "learning_rate": 1.838e-05, "loss": 5.4588066101074215, "step": 78920 }, { "epoch": 0.0093, "grad_norm": 5.196907043457031, "learning_rate": 1.858e-05, "loss": 5.425547027587891, "step": 78930 }, { "epoch": 0.0094, "grad_norm": 7.5594587326049805, "learning_rate": 1.878e-05, "loss": 5.421227645874024, "step": 78940 }, { "epoch": 0.0095, "grad_norm": 4.04423713684082, "learning_rate": 1.898e-05, "loss": 5.523548126220703, "step": 78950 }, { "epoch": 0.0096, "grad_norm": 4.898789405822754, "learning_rate": 1.918e-05, "loss": 5.413890075683594, "step": 78960 }, { "epoch": 0.0097, "grad_norm": 5.981982707977295, "learning_rate": 1.938e-05, "loss": 5.390898895263672, "step": 78970 }, { "epoch": 0.0098, "grad_norm": 4.433288097381592, "learning_rate": 1.9580000000000002e-05, "loss": 5.394657897949219, "step": 78980 }, { "epoch": 0.0099, "grad_norm": 5.7409257888793945, "learning_rate": 1.978e-05, "loss": 5.352289199829102, "step": 78990 }, { "epoch": 0.01, "grad_norm": 4.7127485275268555, "learning_rate": 1.9980000000000002e-05, "loss": 5.387149047851563, "step": 79000 }, { "epoch": 0.0101, "grad_norm": 5.740267276763916, "learning_rate": 2.0180000000000003e-05, "loss": 5.416493225097656, "step": 79010 }, { "epoch": 0.0102, "grad_norm": 4.055177211761475, "learning_rate": 2.038e-05, "loss": 5.553731155395508, "step": 79020 }, { "epoch": 0.0103, "grad_norm": 4.2837042808532715, "learning_rate": 2.0580000000000003e-05, "loss": 5.319791793823242, "step": 79030 }, { "epoch": 0.0104, "grad_norm": 4.059365749359131, "learning_rate": 2.078e-05, "loss": 5.374584197998047, "step": 79040 }, { "epoch": 0.0105, "grad_norm": 3.626437187194824, "learning_rate": 2.098e-05, "loss": 5.287915802001953, "step": 79050 }, { "epoch": 0.0106, "grad_norm": 4.021815299987793, "learning_rate": 2.118e-05, "loss": 5.300275421142578, "step": 79060 }, { "epoch": 0.0107, "grad_norm": 3.0267510414123535, "learning_rate": 2.138e-05, "loss": 5.306292724609375, "step": 79070 }, { "epoch": 0.0108, "grad_norm": 3.8944857120513916, "learning_rate": 2.158e-05, "loss": 5.257094192504883, "step": 79080 }, { "epoch": 0.0109, "grad_norm": 3.8396737575531006, "learning_rate": 2.178e-05, "loss": 5.258983993530274, "step": 79090 }, { "epoch": 0.011, "grad_norm": 3.4545974731445312, "learning_rate": 2.198e-05, "loss": 5.271467971801758, "step": 79100 }, { "epoch": 0.0111, "grad_norm": 3.3376893997192383, "learning_rate": 2.218e-05, "loss": 5.253792953491211, "step": 79110 }, { "epoch": 0.0112, "grad_norm": 3.1432063579559326, "learning_rate": 2.2380000000000003e-05, "loss": 5.2273712158203125, "step": 79120 }, { "epoch": 0.0113, "grad_norm": 2.61570405960083, "learning_rate": 2.258e-05, "loss": 5.188290023803711, "step": 79130 }, { "epoch": 0.0114, "grad_norm": 2.953009605407715, "learning_rate": 2.2780000000000002e-05, "loss": 5.230261993408203, "step": 79140 }, { "epoch": 0.0115, "grad_norm": 2.9995856285095215, "learning_rate": 2.298e-05, "loss": 5.192085266113281, "step": 79150 }, { "epoch": 0.0116, "grad_norm": 2.939575672149658, "learning_rate": 2.318e-05, "loss": 5.265590667724609, "step": 79160 }, { "epoch": 0.0117, "grad_norm": 2.654491901397705, "learning_rate": 2.3380000000000003e-05, "loss": 5.221822738647461, "step": 79170 }, { "epoch": 0.0118, "grad_norm": 2.7023849487304688, "learning_rate": 2.358e-05, "loss": 5.260878372192383, "step": 79180 }, { "epoch": 0.0119, "grad_norm": 2.9579830169677734, "learning_rate": 2.3780000000000003e-05, "loss": 5.188859176635742, "step": 79190 }, { "epoch": 0.012, "grad_norm": 3.2710068225860596, "learning_rate": 2.398e-05, "loss": 5.186108779907227, "step": 79200 }, { "epoch": 0.0121, "grad_norm": 2.8521430492401123, "learning_rate": 2.418e-05, "loss": 5.196657180786133, "step": 79210 }, { "epoch": 0.0122, "grad_norm": 2.42059063911438, "learning_rate": 2.438e-05, "loss": 5.1253715515136715, "step": 79220 }, { "epoch": 0.0123, "grad_norm": 2.6970036029815674, "learning_rate": 2.4580000000000002e-05, "loss": 5.09541130065918, "step": 79230 }, { "epoch": 0.0124, "grad_norm": 2.1887121200561523, "learning_rate": 2.478e-05, "loss": 5.107755279541015, "step": 79240 }, { "epoch": 0.0125, "grad_norm": 2.876920700073242, "learning_rate": 2.498e-05, "loss": 5.083759689331055, "step": 79250 }, { "epoch": 0.0126, "grad_norm": 2.6807138919830322, "learning_rate": 2.5180000000000003e-05, "loss": 5.092090225219726, "step": 79260 }, { "epoch": 0.0127, "grad_norm": 2.785169839859009, "learning_rate": 2.5380000000000004e-05, "loss": 5.085588455200195, "step": 79270 }, { "epoch": 0.0128, "grad_norm": 2.68437123298645, "learning_rate": 2.5580000000000002e-05, "loss": 5.0093029022216795, "step": 79280 }, { "epoch": 0.0129, "grad_norm": 2.302216053009033, "learning_rate": 2.5779999999999997e-05, "loss": 5.091044235229492, "step": 79290 }, { "epoch": 0.013, "grad_norm": 2.686721086502075, "learning_rate": 2.598e-05, "loss": 5.021673965454101, "step": 79300 }, { "epoch": 0.0131, "grad_norm": 2.0559897422790527, "learning_rate": 2.618e-05, "loss": 5.0074462890625, "step": 79310 }, { "epoch": 0.0132, "grad_norm": 2.309727191925049, "learning_rate": 2.6379999999999998e-05, "loss": 5.061210632324219, "step": 79320 }, { "epoch": 0.0133, "grad_norm": 2.9470717906951904, "learning_rate": 2.658e-05, "loss": 5.051780319213867, "step": 79330 }, { "epoch": 0.0134, "grad_norm": 2.3843889236450195, "learning_rate": 2.678e-05, "loss": 4.9680931091308596, "step": 79340 }, { "epoch": 0.0135, "grad_norm": 2.084409236907959, "learning_rate": 2.698e-05, "loss": 5.002963638305664, "step": 79350 }, { "epoch": 0.0136, "grad_norm": 2.152895212173462, "learning_rate": 2.718e-05, "loss": 4.969459152221679, "step": 79360 }, { "epoch": 0.0137, "grad_norm": 1.047629952430725, "learning_rate": 2.738e-05, "loss": 4.947771835327148, "step": 79370 }, { "epoch": 0.0138, "grad_norm": 2.1405746936798096, "learning_rate": 2.758e-05, "loss": 4.985189819335938, "step": 79380 }, { "epoch": 0.0139, "grad_norm": 2.1960415840148926, "learning_rate": 2.778e-05, "loss": 4.9476886749267575, "step": 79390 }, { "epoch": 0.014, "grad_norm": 2.6127655506134033, "learning_rate": 2.798e-05, "loss": 4.921721649169922, "step": 79400 }, { "epoch": 0.0141, "grad_norm": 1.6091228723526, "learning_rate": 2.818e-05, "loss": 5.143603515625, "step": 79410 }, { "epoch": 0.0142, "grad_norm": 2.266418695449829, "learning_rate": 2.8380000000000003e-05, "loss": 5.016165161132813, "step": 79420 }, { "epoch": 0.0143, "grad_norm": 1.9079009294509888, "learning_rate": 2.858e-05, "loss": 4.955899810791015, "step": 79430 }, { "epoch": 0.0144, "grad_norm": 2.2210869789123535, "learning_rate": 2.8780000000000002e-05, "loss": 4.938759231567383, "step": 79440 }, { "epoch": 0.0145, "grad_norm": 1.86758553981781, "learning_rate": 2.898e-05, "loss": 5.025225830078125, "step": 79450 }, { "epoch": 0.0146, "grad_norm": 2.2474615573883057, "learning_rate": 2.9180000000000002e-05, "loss": 4.910765838623047, "step": 79460 }, { "epoch": 0.0147, "grad_norm": 2.309957265853882, "learning_rate": 2.9380000000000003e-05, "loss": 4.8738445281982425, "step": 79470 }, { "epoch": 0.0148, "grad_norm": 2.262937545776367, "learning_rate": 2.958e-05, "loss": 4.943482971191406, "step": 79480 }, { "epoch": 0.0149, "grad_norm": 2.256162643432617, "learning_rate": 2.9780000000000003e-05, "loss": 5.068452835083008, "step": 79490 }, { "epoch": 0.015, "grad_norm": 2.54862380027771, "learning_rate": 2.998e-05, "loss": 4.863506317138672, "step": 79500 }, { "epoch": 0.0151, "grad_norm": 2.26631236076355, "learning_rate": 3.0180000000000002e-05, "loss": 4.840765762329101, "step": 79510 }, { "epoch": 0.0152, "grad_norm": 1.5525401830673218, "learning_rate": 3.0380000000000004e-05, "loss": 4.899321365356445, "step": 79520 }, { "epoch": 0.0153, "grad_norm": 2.2222964763641357, "learning_rate": 3.058e-05, "loss": 4.904487228393554, "step": 79530 }, { "epoch": 0.0154, "grad_norm": 1.6886237859725952, "learning_rate": 3.078e-05, "loss": 4.864575958251953, "step": 79540 }, { "epoch": 0.0155, "grad_norm": 1.7053600549697876, "learning_rate": 3.0980000000000005e-05, "loss": 4.789885711669922, "step": 79550 }, { "epoch": 0.0156, "grad_norm": 1.458892822265625, "learning_rate": 3.118e-05, "loss": 4.851237106323242, "step": 79560 }, { "epoch": 0.0157, "grad_norm": 1.9349397420883179, "learning_rate": 3.138e-05, "loss": 4.818837738037109, "step": 79570 }, { "epoch": 0.0158, "grad_norm": 1.6734633445739746, "learning_rate": 3.1580000000000006e-05, "loss": 4.823519515991211, "step": 79580 }, { "epoch": 0.0159, "grad_norm": 1.900466799736023, "learning_rate": 3.1780000000000004e-05, "loss": 4.830980682373047, "step": 79590 }, { "epoch": 0.016, "grad_norm": 1.594581127166748, "learning_rate": 3.198e-05, "loss": 4.932279968261719, "step": 79600 }, { "epoch": 0.0161, "grad_norm": 1.965193748474121, "learning_rate": 3.218e-05, "loss": 4.807279586791992, "step": 79610 }, { "epoch": 0.0162, "grad_norm": 1.4386441707611084, "learning_rate": 3.238e-05, "loss": 4.813835144042969, "step": 79620 }, { "epoch": 0.0163, "grad_norm": 1.7240893840789795, "learning_rate": 3.2579999999999996e-05, "loss": 4.7933910369873045, "step": 79630 }, { "epoch": 0.0164, "grad_norm": 1.8781183958053589, "learning_rate": 3.278e-05, "loss": 4.788759994506836, "step": 79640 }, { "epoch": 0.0165, "grad_norm": 1.3734517097473145, "learning_rate": 3.298e-05, "loss": 4.784089660644531, "step": 79650 }, { "epoch": 0.0166, "grad_norm": 1.4845818281173706, "learning_rate": 3.318e-05, "loss": 4.775547790527344, "step": 79660 }, { "epoch": 0.0167, "grad_norm": 1.5989508628845215, "learning_rate": 3.338e-05, "loss": 4.774301528930664, "step": 79670 }, { "epoch": 0.0168, "grad_norm": 1.451206088066101, "learning_rate": 3.358e-05, "loss": 4.7411552429199215, "step": 79680 }, { "epoch": 0.0169, "grad_norm": 1.2605071067810059, "learning_rate": 3.378e-05, "loss": 4.754336547851563, "step": 79690 }, { "epoch": 0.017, "grad_norm": 1.235495924949646, "learning_rate": 3.398e-05, "loss": 4.744180297851562, "step": 79700 }, { "epoch": 0.0171, "grad_norm": 1.4865126609802246, "learning_rate": 3.418e-05, "loss": 4.724196243286133, "step": 79710 }, { "epoch": 0.0172, "grad_norm": 1.600272297859192, "learning_rate": 3.438e-05, "loss": 4.686580657958984, "step": 79720 }, { "epoch": 0.0173, "grad_norm": 1.8734424114227295, "learning_rate": 3.4580000000000004e-05, "loss": 4.817763137817383, "step": 79730 }, { "epoch": 0.0174, "grad_norm": 1.618775725364685, "learning_rate": 3.478e-05, "loss": 4.701865005493164, "step": 79740 }, { "epoch": 0.0175, "grad_norm": 1.9350478649139404, "learning_rate": 3.498e-05, "loss": 4.791823196411133, "step": 79750 }, { "epoch": 0.0176, "grad_norm": 1.5198490619659424, "learning_rate": 3.518e-05, "loss": 4.727080917358398, "step": 79760 }, { "epoch": 0.0177, "grad_norm": 1.2485250234603882, "learning_rate": 3.5380000000000003e-05, "loss": 4.729285430908203, "step": 79770 }, { "epoch": 0.0178, "grad_norm": 1.1380257606506348, "learning_rate": 3.558e-05, "loss": 4.681850433349609, "step": 79780 }, { "epoch": 0.0179, "grad_norm": 1.163130521774292, "learning_rate": 3.578e-05, "loss": 4.684491729736328, "step": 79790 }, { "epoch": 0.018, "grad_norm": 1.2136390209197998, "learning_rate": 3.5980000000000004e-05, "loss": 4.697042083740234, "step": 79800 }, { "epoch": 0.0181, "grad_norm": 1.0268884897232056, "learning_rate": 3.618e-05, "loss": 4.675926208496094, "step": 79810 }, { "epoch": 0.0182, "grad_norm": 1.3107571601867676, "learning_rate": 3.638e-05, "loss": 4.70428581237793, "step": 79820 }, { "epoch": 0.0183, "grad_norm": 1.5012598037719727, "learning_rate": 3.6580000000000006e-05, "loss": 4.683891296386719, "step": 79830 }, { "epoch": 0.0184, "grad_norm": 1.068357229232788, "learning_rate": 3.6780000000000004e-05, "loss": 4.661652755737305, "step": 79840 }, { "epoch": 0.0185, "grad_norm": 1.4554474353790283, "learning_rate": 3.698e-05, "loss": 4.630062484741211, "step": 79850 }, { "epoch": 0.0186, "grad_norm": 1.7133673429489136, "learning_rate": 3.7180000000000007e-05, "loss": 4.651305770874023, "step": 79860 }, { "epoch": 0.0187, "grad_norm": 1.2922152280807495, "learning_rate": 3.7380000000000005e-05, "loss": 4.711663436889649, "step": 79870 }, { "epoch": 0.0188, "grad_norm": 1.199976921081543, "learning_rate": 3.758e-05, "loss": 4.695935440063477, "step": 79880 }, { "epoch": 0.0189, "grad_norm": 2.0232207775115967, "learning_rate": 3.778000000000001e-05, "loss": 4.5120391845703125, "step": 79890 }, { "epoch": 0.019, "grad_norm": 1.2053391933441162, "learning_rate": 3.7980000000000006e-05, "loss": 4.6756591796875, "step": 79900 }, { "epoch": 0.0191, "grad_norm": 1.392480731010437, "learning_rate": 3.818e-05, "loss": 4.598100280761718, "step": 79910 }, { "epoch": 0.0192, "grad_norm": 1.1502047777175903, "learning_rate": 3.838e-05, "loss": 4.620033645629883, "step": 79920 }, { "epoch": 0.0193, "grad_norm": 1.3657559156417847, "learning_rate": 3.858e-05, "loss": 4.627785110473633, "step": 79930 }, { "epoch": 0.0194, "grad_norm": 1.410475730895996, "learning_rate": 3.878e-05, "loss": 4.702437210083008, "step": 79940 }, { "epoch": 0.0195, "grad_norm": 1.2301191091537476, "learning_rate": 3.898e-05, "loss": 4.578351974487305, "step": 79950 }, { "epoch": 0.0196, "grad_norm": 1.2482020854949951, "learning_rate": 3.918e-05, "loss": 4.633525466918945, "step": 79960 }, { "epoch": 0.0197, "grad_norm": 0.9870690107345581, "learning_rate": 3.938e-05, "loss": 4.618231964111328, "step": 79970 }, { "epoch": 0.0198, "grad_norm": 1.1585794687271118, "learning_rate": 3.958e-05, "loss": 4.550857925415039, "step": 79980 }, { "epoch": 0.0199, "grad_norm": 0.9988259077072144, "learning_rate": 3.978e-05, "loss": 4.6564281463623045, "step": 79990 }, { "epoch": 0.02, "grad_norm": 1.2768689393997192, "learning_rate": 3.998e-05, "loss": 4.589435195922851, "step": 80000 }, { "epoch": 0.0201, "grad_norm": 1.2740956544876099, "learning_rate": 4.018e-05, "loss": 4.628123092651367, "step": 80010 }, { "epoch": 0.0202, "grad_norm": 1.1608917713165283, "learning_rate": 4.038e-05, "loss": 4.567636871337891, "step": 80020 }, { "epoch": 0.0203, "grad_norm": 1.2257567644119263, "learning_rate": 4.058e-05, "loss": 4.533962249755859, "step": 80030 }, { "epoch": 0.0204, "grad_norm": 1.0351650714874268, "learning_rate": 4.078e-05, "loss": 4.524802398681641, "step": 80040 }, { "epoch": 0.0205, "grad_norm": 1.0720527172088623, "learning_rate": 4.0980000000000004e-05, "loss": 4.528472900390625, "step": 80050 }, { "epoch": 0.0206, "grad_norm": 2.390462636947632, "learning_rate": 4.118e-05, "loss": 4.653088760375977, "step": 80060 }, { "epoch": 0.0207, "grad_norm": 1.2235627174377441, "learning_rate": 4.138e-05, "loss": 4.677774047851562, "step": 80070 }, { "epoch": 0.0208, "grad_norm": 1.0535306930541992, "learning_rate": 4.1580000000000005e-05, "loss": 4.528578567504883, "step": 80080 }, { "epoch": 0.0209, "grad_norm": 1.0980952978134155, "learning_rate": 4.178e-05, "loss": 4.540675354003906, "step": 80090 }, { "epoch": 0.021, "grad_norm": 1.059409499168396, "learning_rate": 4.198e-05, "loss": 4.4876453399658205, "step": 80100 }, { "epoch": 0.0211, "grad_norm": 1.1138412952423096, "learning_rate": 4.2180000000000006e-05, "loss": 4.4658466339111325, "step": 80110 }, { "epoch": 0.0212, "grad_norm": 0.6996028423309326, "learning_rate": 4.2380000000000004e-05, "loss": 4.5030670166015625, "step": 80120 }, { "epoch": 0.0213, "grad_norm": 2.4578969478607178, "learning_rate": 4.258e-05, "loss": 4.695986557006836, "step": 80130 }, { "epoch": 0.0214, "grad_norm": 1.130741834640503, "learning_rate": 4.278e-05, "loss": 4.564279937744141, "step": 80140 }, { "epoch": 0.0215, "grad_norm": 0.8312556147575378, "learning_rate": 4.2980000000000005e-05, "loss": 4.525263977050781, "step": 80150 }, { "epoch": 0.0216, "grad_norm": 1.0220922231674194, "learning_rate": 4.318e-05, "loss": 4.473982238769532, "step": 80160 }, { "epoch": 0.0217, "grad_norm": 1.0151786804199219, "learning_rate": 4.338e-05, "loss": 4.471885681152344, "step": 80170 }, { "epoch": 0.0218, "grad_norm": 1.0880709886550903, "learning_rate": 4.3580000000000006e-05, "loss": 4.504767990112304, "step": 80180 }, { "epoch": 0.0219, "grad_norm": 1.179359793663025, "learning_rate": 4.3780000000000004e-05, "loss": 4.528125, "step": 80190 }, { "epoch": 0.022, "grad_norm": 0.9086615443229675, "learning_rate": 4.398e-05, "loss": 4.479424285888672, "step": 80200 }, { "epoch": 0.0221, "grad_norm": 1.033979892730713, "learning_rate": 4.418000000000001e-05, "loss": 4.429480743408203, "step": 80210 }, { "epoch": 0.0222, "grad_norm": 0.9205300807952881, "learning_rate": 4.438e-05, "loss": 4.485737609863281, "step": 80220 }, { "epoch": 0.0223, "grad_norm": 0.9911152124404907, "learning_rate": 4.458e-05, "loss": 4.425294494628906, "step": 80230 }, { "epoch": 0.0224, "grad_norm": 1.069217562675476, "learning_rate": 4.478e-05, "loss": 4.466315841674804, "step": 80240 }, { "epoch": 0.0225, "grad_norm": 1.0082817077636719, "learning_rate": 4.498e-05, "loss": 4.445558166503906, "step": 80250 }, { "epoch": 0.0226, "grad_norm": 1.190942406654358, "learning_rate": 4.518e-05, "loss": 4.48295783996582, "step": 80260 }, { "epoch": 0.0227, "grad_norm": 0.8903585076332092, "learning_rate": 4.538e-05, "loss": 4.452038955688477, "step": 80270 }, { "epoch": 0.0228, "grad_norm": 0.8489766120910645, "learning_rate": 4.558e-05, "loss": 4.419961547851562, "step": 80280 }, { "epoch": 0.0229, "grad_norm": 0.9620883464813232, "learning_rate": 4.578e-05, "loss": 4.362897109985352, "step": 80290 }, { "epoch": 0.023, "grad_norm": 0.8962896466255188, "learning_rate": 4.5980000000000004e-05, "loss": 4.362208557128906, "step": 80300 }, { "epoch": 0.0231, "grad_norm": 1.035569667816162, "learning_rate": 4.618e-05, "loss": 4.370485305786133, "step": 80310 }, { "epoch": 0.0232, "grad_norm": 0.9185439348220825, "learning_rate": 4.638e-05, "loss": 4.3609672546386715, "step": 80320 }, { "epoch": 0.0233, "grad_norm": 0.9876886010169983, "learning_rate": 4.6580000000000005e-05, "loss": 4.39827880859375, "step": 80330 }, { "epoch": 0.0234, "grad_norm": 0.9136462211608887, "learning_rate": 4.678e-05, "loss": 4.345404815673828, "step": 80340 }, { "epoch": 0.0235, "grad_norm": 0.8449466824531555, "learning_rate": 4.698e-05, "loss": 4.4219707489013675, "step": 80350 }, { "epoch": 0.0236, "grad_norm": 0.7947878837585449, "learning_rate": 4.718e-05, "loss": 4.331117248535156, "step": 80360 }, { "epoch": 0.0237, "grad_norm": 0.992650032043457, "learning_rate": 4.7380000000000004e-05, "loss": 4.31458625793457, "step": 80370 }, { "epoch": 0.0238, "grad_norm": 0.8988213539123535, "learning_rate": 4.758e-05, "loss": 4.340858078002929, "step": 80380 }, { "epoch": 0.0239, "grad_norm": 0.8121524453163147, "learning_rate": 4.778e-05, "loss": 4.315187454223633, "step": 80390 }, { "epoch": 0.024, "grad_norm": 0.8794443011283875, "learning_rate": 4.7980000000000005e-05, "loss": 4.3045696258544925, "step": 80400 }, { "epoch": 0.0241, "grad_norm": 0.9218288660049438, "learning_rate": 4.818e-05, "loss": 4.339737319946289, "step": 80410 }, { "epoch": 0.0242, "grad_norm": 0.8059577345848083, "learning_rate": 4.838e-05, "loss": 4.305084228515625, "step": 80420 }, { "epoch": 0.0243, "grad_norm": 0.7854894995689392, "learning_rate": 4.8580000000000006e-05, "loss": 4.430810165405274, "step": 80430 }, { "epoch": 0.0244, "grad_norm": 1.1295255422592163, "learning_rate": 4.8780000000000004e-05, "loss": 4.328108596801758, "step": 80440 }, { "epoch": 0.0245, "grad_norm": 0.9596011638641357, "learning_rate": 4.898e-05, "loss": 4.30256462097168, "step": 80450 }, { "epoch": 0.0246, "grad_norm": 0.8583745360374451, "learning_rate": 4.918000000000001e-05, "loss": 4.290711975097656, "step": 80460 }, { "epoch": 0.0247, "grad_norm": 0.8017030954360962, "learning_rate": 4.9380000000000005e-05, "loss": 4.3108875274658205, "step": 80470 }, { "epoch": 0.0248, "grad_norm": 0.7690845727920532, "learning_rate": 4.958e-05, "loss": 4.357697677612305, "step": 80480 }, { "epoch": 0.0249, "grad_norm": 0.7710899114608765, "learning_rate": 4.978e-05, "loss": 4.2502796173095705, "step": 80490 }, { "epoch": 0.025, "grad_norm": 1.1364080905914307, "learning_rate": 4.9980000000000006e-05, "loss": 4.317300796508789, "step": 80500 }, { "epoch": 0.0251, "grad_norm": 0.880591869354248, "learning_rate": 5.0180000000000004e-05, "loss": 4.450421142578125, "step": 80510 }, { "epoch": 0.0252, "grad_norm": 0.8420453071594238, "learning_rate": 5.038e-05, "loss": 4.29888687133789, "step": 80520 }, { "epoch": 0.0253, "grad_norm": 0.7620976567268372, "learning_rate": 5.058000000000001e-05, "loss": 4.342173004150391, "step": 80530 }, { "epoch": 0.0254, "grad_norm": 0.5715026259422302, "learning_rate": 5.0780000000000005e-05, "loss": 4.267873001098633, "step": 80540 }, { "epoch": 0.0255, "grad_norm": 0.7940792441368103, "learning_rate": 5.098e-05, "loss": 4.267649841308594, "step": 80550 }, { "epoch": 0.0256, "grad_norm": 0.8055289387702942, "learning_rate": 5.118000000000001e-05, "loss": 4.2085529327392575, "step": 80560 }, { "epoch": 0.0257, "grad_norm": 0.7041337490081787, "learning_rate": 5.1380000000000006e-05, "loss": 4.229782485961914, "step": 80570 }, { "epoch": 0.0258, "grad_norm": 0.7647186517715454, "learning_rate": 5.1580000000000004e-05, "loss": 4.323853302001953, "step": 80580 }, { "epoch": 0.0259, "grad_norm": 0.7459456920623779, "learning_rate": 5.178000000000001e-05, "loss": 4.243249893188477, "step": 80590 }, { "epoch": 0.026, "grad_norm": 0.7966719269752502, "learning_rate": 5.198000000000001e-05, "loss": 4.219022369384765, "step": 80600 }, { "epoch": 0.0261, "grad_norm": 0.7073616981506348, "learning_rate": 5.2180000000000005e-05, "loss": 4.22697868347168, "step": 80610 }, { "epoch": 0.0262, "grad_norm": 0.6936001777648926, "learning_rate": 5.238000000000001e-05, "loss": 4.236738586425782, "step": 80620 }, { "epoch": 0.0263, "grad_norm": 0.6609569191932678, "learning_rate": 5.258000000000001e-05, "loss": 4.200273513793945, "step": 80630 }, { "epoch": 0.0264, "grad_norm": 0.7582675218582153, "learning_rate": 5.2780000000000006e-05, "loss": 4.173809432983399, "step": 80640 }, { "epoch": 0.0265, "grad_norm": 0.7489715218544006, "learning_rate": 5.2980000000000004e-05, "loss": 4.227204895019531, "step": 80650 }, { "epoch": 0.0266, "grad_norm": 0.7459937334060669, "learning_rate": 5.318000000000001e-05, "loss": 4.191170120239258, "step": 80660 }, { "epoch": 0.0267, "grad_norm": 0.6550440788269043, "learning_rate": 5.338000000000001e-05, "loss": 4.188927078247071, "step": 80670 }, { "epoch": 0.0268, "grad_norm": 0.7738357186317444, "learning_rate": 5.3580000000000005e-05, "loss": 4.23632926940918, "step": 80680 }, { "epoch": 0.0269, "grad_norm": 1.2643016576766968, "learning_rate": 5.378e-05, "loss": 4.218930435180664, "step": 80690 }, { "epoch": 0.027, "grad_norm": 0.6486037969589233, "learning_rate": 5.3979999999999995e-05, "loss": 4.199253845214844, "step": 80700 }, { "epoch": 0.0271, "grad_norm": 0.7528240084648132, "learning_rate": 5.418e-05, "loss": 4.211904144287109, "step": 80710 }, { "epoch": 0.0272, "grad_norm": 0.8509882688522339, "learning_rate": 5.438e-05, "loss": 4.229808044433594, "step": 80720 }, { "epoch": 0.0273, "grad_norm": 0.7237697839736938, "learning_rate": 5.4579999999999996e-05, "loss": 4.2085823059082035, "step": 80730 }, { "epoch": 0.0274, "grad_norm": 0.7488872408866882, "learning_rate": 5.478e-05, "loss": 4.204967498779297, "step": 80740 }, { "epoch": 0.0275, "grad_norm": 0.6420933604240417, "learning_rate": 5.498e-05, "loss": 4.146570205688477, "step": 80750 }, { "epoch": 0.0276, "grad_norm": 0.6691610813140869, "learning_rate": 5.518e-05, "loss": 4.1503242492675785, "step": 80760 }, { "epoch": 0.0277, "grad_norm": 0.6334725022315979, "learning_rate": 5.538e-05, "loss": 4.162997436523438, "step": 80770 }, { "epoch": 0.0278, "grad_norm": 0.6345768570899963, "learning_rate": 5.558e-05, "loss": 4.1868431091308596, "step": 80780 }, { "epoch": 0.0279, "grad_norm": 0.6303321123123169, "learning_rate": 5.578e-05, "loss": 4.147438430786133, "step": 80790 }, { "epoch": 0.028, "grad_norm": 0.7453981637954712, "learning_rate": 5.5979999999999996e-05, "loss": 4.15161361694336, "step": 80800 }, { "epoch": 0.0281, "grad_norm": 0.6000621318817139, "learning_rate": 5.618e-05, "loss": 4.302061080932617, "step": 80810 }, { "epoch": 0.0282, "grad_norm": 0.6002620458602905, "learning_rate": 5.638e-05, "loss": 4.141644287109375, "step": 80820 }, { "epoch": 0.0283, "grad_norm": 0.6249469518661499, "learning_rate": 5.658e-05, "loss": 4.155448150634766, "step": 80830 }, { "epoch": 0.0284, "grad_norm": 0.609454333782196, "learning_rate": 5.678e-05, "loss": 4.173176956176758, "step": 80840 }, { "epoch": 0.0285, "grad_norm": 0.5503066778182983, "learning_rate": 5.698e-05, "loss": 4.133391571044922, "step": 80850 }, { "epoch": 0.0286, "grad_norm": 0.6895385980606079, "learning_rate": 5.718e-05, "loss": 4.163289260864258, "step": 80860 }, { "epoch": 0.0287, "grad_norm": 0.5655317902565002, "learning_rate": 5.738e-05, "loss": 4.108301162719727, "step": 80870 }, { "epoch": 0.0288, "grad_norm": 0.521626353263855, "learning_rate": 5.758e-05, "loss": 4.163860702514649, "step": 80880 }, { "epoch": 0.0289, "grad_norm": 0.5978989601135254, "learning_rate": 5.778e-05, "loss": 4.084605407714844, "step": 80890 }, { "epoch": 0.029, "grad_norm": 0.947877049446106, "learning_rate": 5.7980000000000004e-05, "loss": 4.201071929931641, "step": 80900 }, { "epoch": 0.0291, "grad_norm": 0.6038442850112915, "learning_rate": 5.818e-05, "loss": 4.14214859008789, "step": 80910 }, { "epoch": 0.0292, "grad_norm": 0.7941854596138, "learning_rate": 5.838e-05, "loss": 4.263940811157227, "step": 80920 }, { "epoch": 0.0293, "grad_norm": 0.9320460557937622, "learning_rate": 5.858e-05, "loss": 4.203144836425781, "step": 80930 }, { "epoch": 0.0294, "grad_norm": 0.657222330570221, "learning_rate": 5.878e-05, "loss": 4.157551574707031, "step": 80940 }, { "epoch": 0.0295, "grad_norm": 0.5000120997428894, "learning_rate": 5.898e-05, "loss": 4.146945190429688, "step": 80950 }, { "epoch": 0.0296, "grad_norm": 0.5449489951133728, "learning_rate": 5.918e-05, "loss": 4.212858963012695, "step": 80960 }, { "epoch": 0.0297, "grad_norm": 0.5274266004562378, "learning_rate": 5.9380000000000004e-05, "loss": 4.057695770263672, "step": 80970 }, { "epoch": 0.0298, "grad_norm": 0.5422614812850952, "learning_rate": 5.958e-05, "loss": 4.125976943969727, "step": 80980 }, { "epoch": 0.0299, "grad_norm": 0.5549547672271729, "learning_rate": 5.978e-05, "loss": 4.0906017303466795, "step": 80990 }, { "epoch": 0.03, "grad_norm": 0.6117221117019653, "learning_rate": 5.9980000000000005e-05, "loss": 4.112210845947265, "step": 81000 }, { "epoch": 0.0301, "grad_norm": 0.5994063019752502, "learning_rate": 6.018e-05, "loss": 4.135273361206055, "step": 81010 }, { "epoch": 0.0302, "grad_norm": 0.6089306473731995, "learning_rate": 6.038e-05, "loss": 4.110384368896485, "step": 81020 }, { "epoch": 0.0303, "grad_norm": 0.5926423668861389, "learning_rate": 6.0580000000000006e-05, "loss": 4.0928184509277346, "step": 81030 }, { "epoch": 0.0304, "grad_norm": 0.5345453023910522, "learning_rate": 6.0780000000000004e-05, "loss": 4.1174976348876955, "step": 81040 }, { "epoch": 0.0305, "grad_norm": 0.5773014426231384, "learning_rate": 6.098e-05, "loss": 4.119912719726562, "step": 81050 }, { "epoch": 0.0306, "grad_norm": 0.4978563189506531, "learning_rate": 6.118000000000001e-05, "loss": 4.133348846435547, "step": 81060 }, { "epoch": 0.0307, "grad_norm": 0.8470102548599243, "learning_rate": 6.138e-05, "loss": 4.13293228149414, "step": 81070 }, { "epoch": 0.0308, "grad_norm": 0.4249403774738312, "learning_rate": 6.158e-05, "loss": 4.089142990112305, "step": 81080 }, { "epoch": 0.0309, "grad_norm": 0.5307182669639587, "learning_rate": 6.178000000000001e-05, "loss": 4.092873001098633, "step": 81090 }, { "epoch": 0.031, "grad_norm": 0.5584889054298401, "learning_rate": 6.198e-05, "loss": 4.068823623657226, "step": 81100 }, { "epoch": 0.0311, "grad_norm": 0.4920558035373688, "learning_rate": 6.218e-05, "loss": 4.0644794464111325, "step": 81110 }, { "epoch": 0.0312, "grad_norm": 0.5747891664505005, "learning_rate": 6.238000000000001e-05, "loss": 4.061798858642578, "step": 81120 }, { "epoch": 0.0313, "grad_norm": 0.5392170548439026, "learning_rate": 6.258e-05, "loss": 4.061770248413086, "step": 81130 }, { "epoch": 0.0314, "grad_norm": 0.6029335260391235, "learning_rate": 6.278e-05, "loss": 4.085165405273438, "step": 81140 }, { "epoch": 0.0315, "grad_norm": 1.3268107175827026, "learning_rate": 6.298000000000001e-05, "loss": 4.160947799682617, "step": 81150 }, { "epoch": 0.0316, "grad_norm": 0.5485433340072632, "learning_rate": 6.318e-05, "loss": 4.145301437377929, "step": 81160 }, { "epoch": 0.0317, "grad_norm": 0.5348362326622009, "learning_rate": 6.338e-05, "loss": 4.184476470947265, "step": 81170 }, { "epoch": 0.0318, "grad_norm": 0.5437711477279663, "learning_rate": 6.358000000000001e-05, "loss": 4.0518444061279295, "step": 81180 }, { "epoch": 0.0319, "grad_norm": 0.49510905146598816, "learning_rate": 6.378e-05, "loss": 4.085231018066406, "step": 81190 }, { "epoch": 0.032, "grad_norm": 0.8949711918830872, "learning_rate": 6.398000000000001e-05, "loss": 4.088314819335937, "step": 81200 }, { "epoch": 0.0321, "grad_norm": 0.5574071407318115, "learning_rate": 6.418000000000001e-05, "loss": 4.094241714477539, "step": 81210 }, { "epoch": 0.0322, "grad_norm": 0.5345049500465393, "learning_rate": 6.438e-05, "loss": 4.084363174438477, "step": 81220 }, { "epoch": 0.0323, "grad_norm": 0.46691396832466125, "learning_rate": 6.458000000000001e-05, "loss": 4.304480743408203, "step": 81230 }, { "epoch": 0.0324, "grad_norm": 0.46952077746391296, "learning_rate": 6.478000000000001e-05, "loss": 4.041556930541992, "step": 81240 }, { "epoch": 0.0325, "grad_norm": 0.5418161153793335, "learning_rate": 6.498e-05, "loss": 4.048797988891602, "step": 81250 }, { "epoch": 0.0326, "grad_norm": 0.5095402002334595, "learning_rate": 6.518000000000001e-05, "loss": 3.977817916870117, "step": 81260 }, { "epoch": 0.0327, "grad_norm": 1.460627555847168, "learning_rate": 6.538000000000001e-05, "loss": 4.230980682373047, "step": 81270 }, { "epoch": 0.0328, "grad_norm": 0.5157870650291443, "learning_rate": 6.558e-05, "loss": 4.002996826171875, "step": 81280 }, { "epoch": 0.0329, "grad_norm": 0.4919586181640625, "learning_rate": 6.578000000000001e-05, "loss": 4.109578704833984, "step": 81290 }, { "epoch": 0.033, "grad_norm": 0.509157657623291, "learning_rate": 6.598e-05, "loss": 4.053997421264649, "step": 81300 }, { "epoch": 0.0331, "grad_norm": 0.5989805459976196, "learning_rate": 6.618e-05, "loss": 4.0417320251464846, "step": 81310 }, { "epoch": 0.0332, "grad_norm": 1.7693849802017212, "learning_rate": 6.638e-05, "loss": 4.178015518188476, "step": 81320 }, { "epoch": 0.0333, "grad_norm": 0.6040340662002563, "learning_rate": 6.658e-05, "loss": 4.081357192993164, "step": 81330 }, { "epoch": 0.0334, "grad_norm": 0.4969209134578705, "learning_rate": 6.678e-05, "loss": 4.078896713256836, "step": 81340 }, { "epoch": 0.0335, "grad_norm": 0.4945206046104431, "learning_rate": 6.698e-05, "loss": 4.1484428405761715, "step": 81350 }, { "epoch": 0.0336, "grad_norm": 0.6357508301734924, "learning_rate": 6.718e-05, "loss": 4.072774124145508, "step": 81360 }, { "epoch": 0.0337, "grad_norm": 0.46380069851875305, "learning_rate": 6.738e-05, "loss": 4.012380981445313, "step": 81370 }, { "epoch": 0.0338, "grad_norm": 0.4587346911430359, "learning_rate": 6.758e-05, "loss": 4.029714584350586, "step": 81380 }, { "epoch": 0.0339, "grad_norm": 0.5019038319587708, "learning_rate": 6.778e-05, "loss": 3.995611572265625, "step": 81390 }, { "epoch": 0.034, "grad_norm": 0.5282730460166931, "learning_rate": 6.798e-05, "loss": 4.0609893798828125, "step": 81400 }, { "epoch": 0.0341, "grad_norm": 0.5124421119689941, "learning_rate": 6.818e-05, "loss": 4.021018600463867, "step": 81410 }, { "epoch": 0.0342, "grad_norm": 0.4271531403064728, "learning_rate": 6.838e-05, "loss": 4.050865173339844, "step": 81420 }, { "epoch": 0.0343, "grad_norm": 1.2954230308532715, "learning_rate": 6.858e-05, "loss": 4.144805526733398, "step": 81430 }, { "epoch": 0.0344, "grad_norm": 0.49326351284980774, "learning_rate": 6.878e-05, "loss": 4.0962272644042965, "step": 81440 }, { "epoch": 0.0345, "grad_norm": 0.49173876643180847, "learning_rate": 6.898e-05, "loss": 4.036347198486328, "step": 81450 }, { "epoch": 0.0346, "grad_norm": 0.4872412085533142, "learning_rate": 6.918e-05, "loss": 4.052567672729492, "step": 81460 }, { "epoch": 0.0347, "grad_norm": 0.4957573115825653, "learning_rate": 6.938e-05, "loss": 4.03974609375, "step": 81470 }, { "epoch": 0.0348, "grad_norm": 0.43150800466537476, "learning_rate": 6.958e-05, "loss": 4.010758590698242, "step": 81480 }, { "epoch": 0.0349, "grad_norm": 0.5539997220039368, "learning_rate": 6.978e-05, "loss": 4.126621246337891, "step": 81490 }, { "epoch": 0.035, "grad_norm": 0.5765202641487122, "learning_rate": 6.998e-05, "loss": 4.027452850341797, "step": 81500 }, { "epoch": 0.0351, "grad_norm": 0.4617144763469696, "learning_rate": 7.018e-05, "loss": 4.038814926147461, "step": 81510 }, { "epoch": 0.0352, "grad_norm": 0.4162423014640808, "learning_rate": 7.038e-05, "loss": 3.9821941375732424, "step": 81520 }, { "epoch": 0.0353, "grad_norm": 0.4452921450138092, "learning_rate": 7.058e-05, "loss": 3.968626022338867, "step": 81530 }, { "epoch": 0.0354, "grad_norm": 0.4633454382419586, "learning_rate": 7.078e-05, "loss": 4.009642791748047, "step": 81540 }, { "epoch": 0.0355, "grad_norm": 0.7989187836647034, "learning_rate": 7.098e-05, "loss": 4.045684051513672, "step": 81550 }, { "epoch": 0.0356, "grad_norm": 0.47016459703445435, "learning_rate": 7.118e-05, "loss": 4.043623733520508, "step": 81560 }, { "epoch": 0.0357, "grad_norm": 0.47574982047080994, "learning_rate": 7.138e-05, "loss": 3.994187927246094, "step": 81570 }, { "epoch": 0.0358, "grad_norm": 0.5035045742988586, "learning_rate": 7.158e-05, "loss": 3.9976734161376952, "step": 81580 }, { "epoch": 0.0359, "grad_norm": 0.6078383922576904, "learning_rate": 7.178000000000001e-05, "loss": 3.960103225708008, "step": 81590 }, { "epoch": 0.036, "grad_norm": 0.48050999641418457, "learning_rate": 7.198e-05, "loss": 4.0539703369140625, "step": 81600 }, { "epoch": 0.0361, "grad_norm": 0.5049411058425903, "learning_rate": 7.218e-05, "loss": 3.973519134521484, "step": 81610 }, { "epoch": 0.0362, "grad_norm": 0.9177631735801697, "learning_rate": 7.238000000000001e-05, "loss": 3.903715133666992, "step": 81620 }, { "epoch": 0.0363, "grad_norm": 0.46053746342658997, "learning_rate": 7.258e-05, "loss": 4.01729736328125, "step": 81630 }, { "epoch": 0.0364, "grad_norm": 0.4118608832359314, "learning_rate": 7.278e-05, "loss": 4.006989288330078, "step": 81640 }, { "epoch": 0.0365, "grad_norm": 0.4604043960571289, "learning_rate": 7.298000000000001e-05, "loss": 4.026253509521484, "step": 81650 }, { "epoch": 0.0366, "grad_norm": 0.45775970816612244, "learning_rate": 7.318e-05, "loss": 3.9580989837646485, "step": 81660 }, { "epoch": 0.0367, "grad_norm": 0.4291290044784546, "learning_rate": 7.338e-05, "loss": 3.9802600860595705, "step": 81670 }, { "epoch": 0.0368, "grad_norm": 0.4874412417411804, "learning_rate": 7.358000000000001e-05, "loss": 3.9674762725830077, "step": 81680 }, { "epoch": 0.0369, "grad_norm": 0.4603731334209442, "learning_rate": 7.378e-05, "loss": 3.9625270843505858, "step": 81690 }, { "epoch": 0.037, "grad_norm": 0.5156533122062683, "learning_rate": 7.398e-05, "loss": 4.097967147827148, "step": 81700 }, { "epoch": 0.0371, "grad_norm": 0.4164431393146515, "learning_rate": 7.418000000000001e-05, "loss": 3.9871395111083983, "step": 81710 }, { "epoch": 0.0372, "grad_norm": 0.4676928222179413, "learning_rate": 7.438e-05, "loss": 4.0619956970214846, "step": 81720 }, { "epoch": 0.0373, "grad_norm": 0.826423704624176, "learning_rate": 7.458000000000001e-05, "loss": 4.190798568725586, "step": 81730 }, { "epoch": 0.0374, "grad_norm": 0.43880799412727356, "learning_rate": 7.478e-05, "loss": 4.040071868896485, "step": 81740 }, { "epoch": 0.0375, "grad_norm": 0.5205581188201904, "learning_rate": 7.498e-05, "loss": 4.016611480712891, "step": 81750 }, { "epoch": 0.0376, "grad_norm": 0.40337318181991577, "learning_rate": 7.518000000000001e-05, "loss": 3.9768943786621094, "step": 81760 }, { "epoch": 0.0377, "grad_norm": 0.4057261645793915, "learning_rate": 7.538e-05, "loss": 3.982502746582031, "step": 81770 }, { "epoch": 0.0378, "grad_norm": 0.44077205657958984, "learning_rate": 7.558e-05, "loss": 3.9662776947021485, "step": 81780 }, { "epoch": 0.0379, "grad_norm": 0.5806800723075867, "learning_rate": 7.578000000000001e-05, "loss": 3.9394832611083985, "step": 81790 }, { "epoch": 0.038, "grad_norm": 0.5396292209625244, "learning_rate": 7.598e-05, "loss": 3.9664424896240233, "step": 81800 }, { "epoch": 0.0381, "grad_norm": 0.507144570350647, "learning_rate": 7.618e-05, "loss": 3.957942581176758, "step": 81810 }, { "epoch": 0.0382, "grad_norm": 0.42543837428092957, "learning_rate": 7.638000000000001e-05, "loss": 3.9445827484130858, "step": 81820 }, { "epoch": 0.0383, "grad_norm": 0.4694370627403259, "learning_rate": 7.658e-05, "loss": 3.954279327392578, "step": 81830 }, { "epoch": 0.0384, "grad_norm": 0.4709033966064453, "learning_rate": 7.678000000000001e-05, "loss": 4.024324798583985, "step": 81840 }, { "epoch": 0.0385, "grad_norm": 0.5160669684410095, "learning_rate": 7.698000000000001e-05, "loss": 3.923431396484375, "step": 81850 }, { "epoch": 0.0386, "grad_norm": 0.4147569239139557, "learning_rate": 7.718e-05, "loss": 3.978494644165039, "step": 81860 }, { "epoch": 0.0387, "grad_norm": 0.46357303857803345, "learning_rate": 7.738000000000001e-05, "loss": 4.160784149169922, "step": 81870 }, { "epoch": 0.0388, "grad_norm": 0.4416487514972687, "learning_rate": 7.758000000000001e-05, "loss": 3.9569442749023436, "step": 81880 }, { "epoch": 0.0389, "grad_norm": 0.45184236764907837, "learning_rate": 7.778e-05, "loss": 3.963978958129883, "step": 81890 }, { "epoch": 0.039, "grad_norm": 0.40221789479255676, "learning_rate": 7.798000000000001e-05, "loss": 3.951943588256836, "step": 81900 }, { "epoch": 0.0391, "grad_norm": 0.5833235383033752, "learning_rate": 7.818000000000001e-05, "loss": 3.9903884887695313, "step": 81910 }, { "epoch": 0.0392, "grad_norm": 0.46128684282302856, "learning_rate": 7.838e-05, "loss": 4.169551849365234, "step": 81920 }, { "epoch": 0.0393, "grad_norm": 0.40567469596862793, "learning_rate": 7.858000000000001e-05, "loss": 3.9475753784179686, "step": 81930 }, { "epoch": 0.0394, "grad_norm": 0.41714417934417725, "learning_rate": 7.878e-05, "loss": 4.037763977050782, "step": 81940 }, { "epoch": 0.0395, "grad_norm": 0.4033423364162445, "learning_rate": 7.897999999999999e-05, "loss": 3.949198913574219, "step": 81950 }, { "epoch": 0.0396, "grad_norm": 0.8075945973396301, "learning_rate": 7.918e-05, "loss": 3.959520721435547, "step": 81960 }, { "epoch": 0.0397, "grad_norm": 0.4296705424785614, "learning_rate": 7.938e-05, "loss": 3.9887752532958984, "step": 81970 }, { "epoch": 0.0398, "grad_norm": 0.4820793867111206, "learning_rate": 7.958e-05, "loss": 3.9300987243652346, "step": 81980 }, { "epoch": 0.0399, "grad_norm": 0.4734731912612915, "learning_rate": 7.978e-05, "loss": 3.9688289642333983, "step": 81990 }, { "epoch": 0.04, "grad_norm": 0.376831978559494, "learning_rate": 7.998e-05, "loss": 4.0510505676269535, "step": 82000 }, { "epoch": 0.0401, "grad_norm": 0.44370701909065247, "learning_rate": 8.018e-05, "loss": 3.900652313232422, "step": 82010 }, { "epoch": 0.0402, "grad_norm": 0.7983126640319824, "learning_rate": 8.038e-05, "loss": 4.005221939086914, "step": 82020 }, { "epoch": 0.0403, "grad_norm": 0.4299812912940979, "learning_rate": 8.058e-05, "loss": 4.020475006103515, "step": 82030 }, { "epoch": 0.0404, "grad_norm": 0.43184319138526917, "learning_rate": 8.078e-05, "loss": 3.9040550231933593, "step": 82040 }, { "epoch": 0.0405, "grad_norm": 0.40197867155075073, "learning_rate": 8.098e-05, "loss": 3.943069076538086, "step": 82050 }, { "epoch": 0.0406, "grad_norm": 0.44560980796813965, "learning_rate": 8.118e-05, "loss": 3.8903118133544923, "step": 82060 }, { "epoch": 0.0407, "grad_norm": 0.4123702049255371, "learning_rate": 8.138e-05, "loss": 3.931021881103516, "step": 82070 }, { "epoch": 0.0408, "grad_norm": 0.5029538869857788, "learning_rate": 8.158e-05, "loss": 4.023634719848633, "step": 82080 }, { "epoch": 0.0409, "grad_norm": 0.4140770137310028, "learning_rate": 8.178e-05, "loss": 3.93249626159668, "step": 82090 }, { "epoch": 0.041, "grad_norm": 0.4393298923969269, "learning_rate": 8.198e-05, "loss": 3.9117626190185546, "step": 82100 }, { "epoch": 0.0411, "grad_norm": 0.40576064586639404, "learning_rate": 8.218e-05, "loss": 3.92786865234375, "step": 82110 }, { "epoch": 0.0412, "grad_norm": 0.40594494342803955, "learning_rate": 8.238000000000001e-05, "loss": 4.013692474365234, "step": 82120 }, { "epoch": 0.0413, "grad_norm": 0.40767937898635864, "learning_rate": 8.258e-05, "loss": 3.9411151885986326, "step": 82130 }, { "epoch": 0.0414, "grad_norm": 0.6238659024238586, "learning_rate": 8.278e-05, "loss": 3.9228561401367186, "step": 82140 }, { "epoch": 0.0415, "grad_norm": 0.5880590081214905, "learning_rate": 8.298000000000001e-05, "loss": 3.9516670227050783, "step": 82150 }, { "epoch": 0.0416, "grad_norm": 0.41273123025894165, "learning_rate": 8.318e-05, "loss": 4.215232849121094, "step": 82160 }, { "epoch": 0.0417, "grad_norm": 0.4204077422618866, "learning_rate": 8.338e-05, "loss": 3.947635269165039, "step": 82170 }, { "epoch": 0.0418, "grad_norm": 0.40033769607543945, "learning_rate": 8.358e-05, "loss": 3.884442901611328, "step": 82180 }, { "epoch": 0.0419, "grad_norm": 0.4656597673892975, "learning_rate": 8.378e-05, "loss": 3.9488754272460938, "step": 82190 }, { "epoch": 0.042, "grad_norm": 0.4394247829914093, "learning_rate": 8.398e-05, "loss": 3.9214859008789062, "step": 82200 }, { "epoch": 0.0421, "grad_norm": 0.6469634175300598, "learning_rate": 8.418e-05, "loss": 3.9825435638427735, "step": 82210 }, { "epoch": 0.0422, "grad_norm": 0.6021825075149536, "learning_rate": 8.438e-05, "loss": 3.95635871887207, "step": 82220 }, { "epoch": 0.0423, "grad_norm": 0.410918653011322, "learning_rate": 8.458e-05, "loss": 3.955471420288086, "step": 82230 }, { "epoch": 0.0424, "grad_norm": 0.45376157760620117, "learning_rate": 8.478e-05, "loss": 4.01763916015625, "step": 82240 }, { "epoch": 0.0425, "grad_norm": 0.44720911979675293, "learning_rate": 8.498e-05, "loss": 3.8624122619628904, "step": 82250 }, { "epoch": 0.0426, "grad_norm": 0.36790111660957336, "learning_rate": 8.518000000000001e-05, "loss": 4.033926010131836, "step": 82260 }, { "epoch": 0.0427, "grad_norm": 0.4077184796333313, "learning_rate": 8.538e-05, "loss": 3.863504409790039, "step": 82270 }, { "epoch": 0.0428, "grad_norm": 0.34015464782714844, "learning_rate": 8.558e-05, "loss": 3.885137176513672, "step": 82280 }, { "epoch": 0.0429, "grad_norm": 0.39179831743240356, "learning_rate": 8.578000000000001e-05, "loss": 3.9212169647216797, "step": 82290 }, { "epoch": 0.043, "grad_norm": 0.4270918071269989, "learning_rate": 8.598e-05, "loss": 3.957785415649414, "step": 82300 }, { "epoch": 0.0431, "grad_norm": 0.4346376657485962, "learning_rate": 8.618e-05, "loss": 3.8960060119628905, "step": 82310 }, { "epoch": 0.0432, "grad_norm": 0.4100809097290039, "learning_rate": 8.638000000000001e-05, "loss": 3.9174163818359373, "step": 82320 }, { "epoch": 0.0433, "grad_norm": 0.3941136598587036, "learning_rate": 8.658e-05, "loss": 3.893391418457031, "step": 82330 }, { "epoch": 0.0434, "grad_norm": 0.3876273036003113, "learning_rate": 8.678e-05, "loss": 3.882611083984375, "step": 82340 }, { "epoch": 0.0435, "grad_norm": 0.38060447573661804, "learning_rate": 8.698000000000001e-05, "loss": 3.881389617919922, "step": 82350 }, { "epoch": 0.0436, "grad_norm": 0.38053205609321594, "learning_rate": 8.718e-05, "loss": 3.8683727264404295, "step": 82360 }, { "epoch": 0.0437, "grad_norm": 0.3874657154083252, "learning_rate": 8.738000000000001e-05, "loss": 3.8612483978271483, "step": 82370 }, { "epoch": 0.0438, "grad_norm": 0.3973087966442108, "learning_rate": 8.758000000000001e-05, "loss": 3.896382141113281, "step": 82380 }, { "epoch": 0.0439, "grad_norm": 0.41932356357574463, "learning_rate": 8.778e-05, "loss": 3.9018890380859377, "step": 82390 }, { "epoch": 0.044, "grad_norm": 0.3679087460041046, "learning_rate": 8.798000000000001e-05, "loss": 3.8719455718994142, "step": 82400 }, { "epoch": 0.0441, "grad_norm": 0.5245869755744934, "learning_rate": 8.818000000000001e-05, "loss": 3.877519989013672, "step": 82410 }, { "epoch": 0.0442, "grad_norm": 0.36421987414360046, "learning_rate": 8.838e-05, "loss": 3.901009750366211, "step": 82420 }, { "epoch": 0.0443, "grad_norm": 0.38611918687820435, "learning_rate": 8.858000000000001e-05, "loss": 3.9322650909423826, "step": 82430 }, { "epoch": 0.0444, "grad_norm": 0.3976812958717346, "learning_rate": 8.878000000000001e-05, "loss": 3.9007942199707033, "step": 82440 }, { "epoch": 0.0445, "grad_norm": 0.3621211349964142, "learning_rate": 8.898e-05, "loss": 3.8961814880371093, "step": 82450 }, { "epoch": 0.0446, "grad_norm": 0.3616512715816498, "learning_rate": 8.918000000000001e-05, "loss": 3.878189468383789, "step": 82460 }, { "epoch": 0.0447, "grad_norm": 0.3793681859970093, "learning_rate": 8.938e-05, "loss": 3.8516616821289062, "step": 82470 }, { "epoch": 0.0448, "grad_norm": 0.958914041519165, "learning_rate": 8.958e-05, "loss": 3.9125953674316407, "step": 82480 }, { "epoch": 0.0449, "grad_norm": 0.41370779275894165, "learning_rate": 8.978000000000001e-05, "loss": 3.8809314727783204, "step": 82490 }, { "epoch": 0.045, "grad_norm": 0.36686602234840393, "learning_rate": 8.998e-05, "loss": 3.833428955078125, "step": 82500 }, { "epoch": 0.0451, "grad_norm": 0.3825756907463074, "learning_rate": 9.018000000000001e-05, "loss": 3.847615051269531, "step": 82510 }, { "epoch": 0.0452, "grad_norm": 0.4120357930660248, "learning_rate": 9.038000000000001e-05, "loss": 3.8996334075927734, "step": 82520 }, { "epoch": 0.0453, "grad_norm": 0.343305766582489, "learning_rate": 9.058e-05, "loss": 3.8599334716796876, "step": 82530 }, { "epoch": 0.0454, "grad_norm": 0.41744810342788696, "learning_rate": 9.078000000000001e-05, "loss": 3.866754913330078, "step": 82540 }, { "epoch": 0.0455, "grad_norm": 0.39350342750549316, "learning_rate": 9.098000000000001e-05, "loss": 3.8460845947265625, "step": 82550 }, { "epoch": 0.0456, "grad_norm": 0.3759637176990509, "learning_rate": 9.118e-05, "loss": 3.8823848724365235, "step": 82560 }, { "epoch": 0.0457, "grad_norm": 0.35090872645378113, "learning_rate": 9.138e-05, "loss": 3.854581832885742, "step": 82570 }, { "epoch": 0.0458, "grad_norm": 0.35352253913879395, "learning_rate": 9.158e-05, "loss": 3.8543167114257812, "step": 82580 }, { "epoch": 0.0459, "grad_norm": 0.3800620436668396, "learning_rate": 9.178e-05, "loss": 3.878927993774414, "step": 82590 }, { "epoch": 0.046, "grad_norm": 0.3791212737560272, "learning_rate": 9.198e-05, "loss": 3.8281883239746093, "step": 82600 }, { "epoch": 0.0461, "grad_norm": 1.0095129013061523, "learning_rate": 9.218e-05, "loss": 4.113484191894531, "step": 82610 }, { "epoch": 0.0462, "grad_norm": 0.3651430904865265, "learning_rate": 9.238e-05, "loss": 3.845316696166992, "step": 82620 }, { "epoch": 0.0463, "grad_norm": 0.3488781452178955, "learning_rate": 9.258e-05, "loss": 3.8417816162109375, "step": 82630 }, { "epoch": 0.0464, "grad_norm": 0.4021349251270294, "learning_rate": 9.278e-05, "loss": 3.951266860961914, "step": 82640 }, { "epoch": 0.0465, "grad_norm": 0.419025182723999, "learning_rate": 9.298e-05, "loss": 3.8597225189208983, "step": 82650 }, { "epoch": 0.0466, "grad_norm": 0.3755001127719879, "learning_rate": 9.318e-05, "loss": 3.8688735961914062, "step": 82660 }, { "epoch": 0.0467, "grad_norm": 0.3632712662220001, "learning_rate": 9.338e-05, "loss": 3.9300357818603517, "step": 82670 }, { "epoch": 0.0468, "grad_norm": 0.3945075273513794, "learning_rate": 9.358e-05, "loss": 4.023001861572266, "step": 82680 }, { "epoch": 0.0469, "grad_norm": 0.347078412771225, "learning_rate": 9.378e-05, "loss": 3.81383056640625, "step": 82690 }, { "epoch": 0.047, "grad_norm": 0.5212072134017944, "learning_rate": 9.398e-05, "loss": 3.8905261993408202, "step": 82700 }, { "epoch": 0.0471, "grad_norm": 0.47846928238868713, "learning_rate": 9.418e-05, "loss": 4.062021255493164, "step": 82710 }, { "epoch": 0.0472, "grad_norm": 0.4495738446712494, "learning_rate": 9.438e-05, "loss": 4.018693923950195, "step": 82720 }, { "epoch": 0.0473, "grad_norm": 0.41616198420524597, "learning_rate": 9.458e-05, "loss": 3.880020523071289, "step": 82730 }, { "epoch": 0.0474, "grad_norm": 0.3985685408115387, "learning_rate": 9.478e-05, "loss": 3.8657894134521484, "step": 82740 }, { "epoch": 0.0475, "grad_norm": 0.3556722402572632, "learning_rate": 9.498e-05, "loss": 3.835987854003906, "step": 82750 }, { "epoch": 0.0476, "grad_norm": 0.3475450277328491, "learning_rate": 9.518000000000001e-05, "loss": 3.8452220916748048, "step": 82760 }, { "epoch": 0.0477, "grad_norm": 0.3749247193336487, "learning_rate": 9.538e-05, "loss": 3.932608413696289, "step": 82770 }, { "epoch": 0.0478, "grad_norm": 0.32564833760261536, "learning_rate": 9.558e-05, "loss": 3.8859230041503907, "step": 82780 }, { "epoch": 0.0479, "grad_norm": 0.377262145280838, "learning_rate": 9.578000000000001e-05, "loss": 4.021604537963867, "step": 82790 }, { "epoch": 0.048, "grad_norm": 0.37888863682746887, "learning_rate": 9.598e-05, "loss": 3.866642379760742, "step": 82800 }, { "epoch": 0.0481, "grad_norm": 0.38174378871917725, "learning_rate": 9.618e-05, "loss": 3.9002532958984375, "step": 82810 }, { "epoch": 0.0482, "grad_norm": 0.37359318137168884, "learning_rate": 9.638000000000001e-05, "loss": 3.8445838928222655, "step": 82820 }, { "epoch": 0.0483, "grad_norm": 0.4153497517108917, "learning_rate": 9.658e-05, "loss": 3.9124683380126952, "step": 82830 }, { "epoch": 0.0484, "grad_norm": 0.3534516394138336, "learning_rate": 9.678e-05, "loss": 3.9404430389404297, "step": 82840 }, { "epoch": 0.0485, "grad_norm": 0.40927091240882874, "learning_rate": 9.698000000000001e-05, "loss": 4.034648132324219, "step": 82850 }, { "epoch": 0.0486, "grad_norm": 0.350938081741333, "learning_rate": 9.718e-05, "loss": 3.812015914916992, "step": 82860 }, { "epoch": 0.0487, "grad_norm": 0.39179927110671997, "learning_rate": 9.738e-05, "loss": 3.871914291381836, "step": 82870 }, { "epoch": 0.0488, "grad_norm": 0.3421719968318939, "learning_rate": 9.758000000000001e-05, "loss": 3.804059600830078, "step": 82880 }, { "epoch": 0.0489, "grad_norm": 0.3892348110675812, "learning_rate": 9.778e-05, "loss": 3.846315383911133, "step": 82890 }, { "epoch": 0.049, "grad_norm": 0.8134501576423645, "learning_rate": 9.798000000000001e-05, "loss": 3.850412368774414, "step": 82900 }, { "epoch": 0.0491, "grad_norm": 0.3270565867424011, "learning_rate": 9.818000000000001e-05, "loss": 3.897083282470703, "step": 82910 }, { "epoch": 0.0492, "grad_norm": 0.35217463970184326, "learning_rate": 9.838e-05, "loss": 3.834426498413086, "step": 82920 }, { "epoch": 0.0493, "grad_norm": 0.3205115795135498, "learning_rate": 9.858000000000001e-05, "loss": 3.8739059448242186, "step": 82930 }, { "epoch": 0.0494, "grad_norm": 0.32530784606933594, "learning_rate": 9.878e-05, "loss": 3.8487995147705076, "step": 82940 }, { "epoch": 0.0495, "grad_norm": 0.3924853205680847, "learning_rate": 9.898e-05, "loss": 3.7641490936279296, "step": 82950 }, { "epoch": 0.0496, "grad_norm": 0.3617713749408722, "learning_rate": 9.918000000000001e-05, "loss": 3.8098236083984376, "step": 82960 }, { "epoch": 0.0497, "grad_norm": 0.3259013891220093, "learning_rate": 9.938e-05, "loss": 3.9314403533935547, "step": 82970 }, { "epoch": 0.0498, "grad_norm": 0.4144577085971832, "learning_rate": 9.958e-05, "loss": 3.8905261993408202, "step": 82980 }, { "epoch": 0.0499, "grad_norm": 0.3937422037124634, "learning_rate": 9.978000000000001e-05, "loss": 3.8521678924560545, "step": 82990 }, { "epoch": 0.05, "grad_norm": 0.3902900218963623, "learning_rate": 9.998e-05, "loss": 3.8728492736816404, "step": 83000 }, { "epoch": 0.0501, "grad_norm": 0.39842963218688965, "learning_rate": 9.999999778549045e-05, "loss": 3.8664478302001952, "step": 83010 }, { "epoch": 0.0502, "grad_norm": 0.31621137261390686, "learning_rate": 9.999999013039593e-05, "loss": 3.925592041015625, "step": 83020 }, { "epoch": 0.0503, "grad_norm": 0.3774428963661194, "learning_rate": 9.999997700737766e-05, "loss": 3.6988426208496095, "step": 83030 }, { "epoch": 0.0504, "grad_norm": 0.3514353334903717, "learning_rate": 9.999995841643709e-05, "loss": 3.8097213745117187, "step": 83040 }, { "epoch": 0.0505, "grad_norm": 0.3591354787349701, "learning_rate": 9.999993435757623e-05, "loss": 3.8139583587646486, "step": 83050 }, { "epoch": 0.0506, "grad_norm": 0.3420144319534302, "learning_rate": 9.999990483079773e-05, "loss": 3.8276302337646486, "step": 83060 }, { "epoch": 0.0507, "grad_norm": 0.38361331820487976, "learning_rate": 9.999986983610481e-05, "loss": 3.811387634277344, "step": 83070 }, { "epoch": 0.0508, "grad_norm": 0.34372514486312866, "learning_rate": 9.99998293735013e-05, "loss": 3.814420700073242, "step": 83080 }, { "epoch": 0.0509, "grad_norm": 0.3869839310646057, "learning_rate": 9.999978344299161e-05, "loss": 3.822507858276367, "step": 83090 }, { "epoch": 0.051, "grad_norm": 0.6987540125846863, "learning_rate": 9.99997320445808e-05, "loss": 3.7858028411865234, "step": 83100 }, { "epoch": 0.0511, "grad_norm": 0.3535923361778259, "learning_rate": 9.999967517827444e-05, "loss": 3.8200477600097655, "step": 83110 }, { "epoch": 0.0512, "grad_norm": 0.36568525433540344, "learning_rate": 9.999961284407879e-05, "loss": 3.765528106689453, "step": 83120 }, { "epoch": 0.0513, "grad_norm": 0.327122300863266, "learning_rate": 9.999954504200067e-05, "loss": 3.8417179107666017, "step": 83130 }, { "epoch": 0.0514, "grad_norm": 0.38184818625450134, "learning_rate": 9.999947177204744e-05, "loss": 3.8148113250732423, "step": 83140 }, { "epoch": 0.0515, "grad_norm": 0.35110288858413696, "learning_rate": 9.999939303422718e-05, "loss": 3.859242248535156, "step": 83150 }, { "epoch": 0.0516, "grad_norm": 0.4850228726863861, "learning_rate": 9.999930882854847e-05, "loss": 3.819898223876953, "step": 83160 }, { "epoch": 0.0517, "grad_norm": 0.3513251841068268, "learning_rate": 9.999921915502051e-05, "loss": 3.798270416259766, "step": 83170 }, { "epoch": 0.0518, "grad_norm": 0.3239874243736267, "learning_rate": 9.99991240136531e-05, "loss": 3.8538772583007814, "step": 83180 }, { "epoch": 0.0519, "grad_norm": 0.30293792486190796, "learning_rate": 9.999902340445668e-05, "loss": 3.8319358825683594, "step": 83190 }, { "epoch": 0.052, "grad_norm": 0.3121660351753235, "learning_rate": 9.999891732744224e-05, "loss": 3.810728073120117, "step": 83200 }, { "epoch": 0.0001, "grad_norm": 0.46172550320625305, "learning_rate": 9.999880578262135e-05, "loss": 3.8486099243164062, "step": 83210 }, { "epoch": 0.0002, "grad_norm": 0.35218456387519836, "learning_rate": 9.999868877000624e-05, "loss": 3.7945110321044924, "step": 83220 }, { "epoch": 0.0003, "grad_norm": 0.3150595426559448, "learning_rate": 9.99985662896097e-05, "loss": 3.7959964752197264, "step": 83230 }, { "epoch": 0.0004, "grad_norm": 0.2930697500705719, "learning_rate": 9.999843834144513e-05, "loss": 3.8008277893066404, "step": 83240 }, { "epoch": 0.0005, "grad_norm": 0.3913096785545349, "learning_rate": 9.99983049255265e-05, "loss": 3.816920852661133, "step": 83250 }, { "epoch": 0.0006, "grad_norm": 0.43855613470077515, "learning_rate": 9.999816604186843e-05, "loss": 3.8061267852783205, "step": 83260 }, { "epoch": 0.0007, "grad_norm": 0.3215405344963074, "learning_rate": 9.999802169048609e-05, "loss": 3.762989807128906, "step": 83270 }, { "epoch": 0.0008, "grad_norm": 0.3363588750362396, "learning_rate": 9.999787187139527e-05, "loss": 3.768034744262695, "step": 83280 }, { "epoch": 0.0009, "grad_norm": 0.3151419460773468, "learning_rate": 9.999771658461234e-05, "loss": 3.7999534606933594, "step": 83290 }, { "epoch": 0.001, "grad_norm": 0.3272475302219391, "learning_rate": 9.999755583015431e-05, "loss": 3.7911468505859376, "step": 83300 }, { "epoch": 0.0011, "grad_norm": 0.31459152698516846, "learning_rate": 9.999738960803874e-05, "loss": 3.8385593414306642, "step": 83310 }, { "epoch": 0.0012, "grad_norm": 0.30471497774124146, "learning_rate": 9.99972179182838e-05, "loss": 3.7527996063232423, "step": 83320 }, { "epoch": 0.0013, "grad_norm": 0.8856930732727051, "learning_rate": 9.99970407609083e-05, "loss": 3.772340774536133, "step": 83330 }, { "epoch": 0.0014, "grad_norm": 0.37569352984428406, "learning_rate": 9.999685813593159e-05, "loss": 3.840856170654297, "step": 83340 }, { "epoch": 0.0015, "grad_norm": 0.3128896653652191, "learning_rate": 9.999667004337362e-05, "loss": 3.820966339111328, "step": 83350 }, { "epoch": 0.0016, "grad_norm": 0.2925649583339691, "learning_rate": 9.9996476483255e-05, "loss": 3.806324768066406, "step": 83360 }, { "epoch": 0.0017, "grad_norm": 0.3298606872558594, "learning_rate": 9.999627745559688e-05, "loss": 3.8656845092773438, "step": 83370 }, { "epoch": 0.0018, "grad_norm": 0.3136289119720459, "learning_rate": 9.999607296042101e-05, "loss": 3.8158058166503905, "step": 83380 }, { "epoch": 0.0019, "grad_norm": 0.9002659916877747, "learning_rate": 9.99958629977498e-05, "loss": 3.8666664123535157, "step": 83390 }, { "epoch": 0.002, "grad_norm": 0.2898136377334595, "learning_rate": 9.999564756760615e-05, "loss": 3.8743629455566406, "step": 83400 }, { "epoch": 0.0021, "grad_norm": 0.3114927411079407, "learning_rate": 9.999542667001366e-05, "loss": 3.882855987548828, "step": 83410 }, { "epoch": 0.0022, "grad_norm": 0.30287209153175354, "learning_rate": 9.999520030499647e-05, "loss": 3.7960723876953124, "step": 83420 }, { "epoch": 0.0023, "grad_norm": 0.2935043275356293, "learning_rate": 9.999496847257936e-05, "loss": 3.737273025512695, "step": 83430 }, { "epoch": 0.0024, "grad_norm": 0.3218196630477905, "learning_rate": 9.999473117278764e-05, "loss": 3.784800720214844, "step": 83440 }, { "epoch": 0.0025, "grad_norm": 0.3353395462036133, "learning_rate": 9.999448840564731e-05, "loss": 3.8490245819091795, "step": 83450 }, { "epoch": 0.0026, "grad_norm": 0.33304092288017273, "learning_rate": 9.999424017118488e-05, "loss": 3.796077346801758, "step": 83460 }, { "epoch": 0.0027, "grad_norm": 0.34534889459609985, "learning_rate": 9.999398646942751e-05, "loss": 3.7440380096435546, "step": 83470 }, { "epoch": 0.0028, "grad_norm": 0.30867987871170044, "learning_rate": 9.999372730040296e-05, "loss": 3.7291751861572267, "step": 83480 }, { "epoch": 0.0029, "grad_norm": 0.8935909271240234, "learning_rate": 9.999346266413953e-05, "loss": 3.7540428161621096, "step": 83490 }, { "epoch": 0.003, "grad_norm": 0.3775537610054016, "learning_rate": 9.99931925606662e-05, "loss": 3.9341556549072267, "step": 83500 }, { "epoch": 0.0031, "grad_norm": 0.3054729402065277, "learning_rate": 9.99929169900125e-05, "loss": 4.22179946899414, "step": 83510 }, { "epoch": 0.0032, "grad_norm": 0.3387799859046936, "learning_rate": 9.999263595220855e-05, "loss": 3.8072837829589843, "step": 83520 }, { "epoch": 0.0033, "grad_norm": 0.32923126220703125, "learning_rate": 9.99923494472851e-05, "loss": 3.7745723724365234, "step": 83530 }, { "epoch": 0.0034, "grad_norm": 0.29864582419395447, "learning_rate": 9.999205747527348e-05, "loss": 3.7472164154052736, "step": 83540 }, { "epoch": 0.0035, "grad_norm": 0.31445086002349854, "learning_rate": 9.999176003620561e-05, "loss": 3.7745201110839846, "step": 83550 }, { "epoch": 0.0036, "grad_norm": 0.330960750579834, "learning_rate": 9.999145713011405e-05, "loss": 3.766396713256836, "step": 83560 }, { "epoch": 0.0037, "grad_norm": 0.32975828647613525, "learning_rate": 9.999114875703186e-05, "loss": 3.7721385955810547, "step": 83570 }, { "epoch": 0.0038, "grad_norm": 0.31975796818733215, "learning_rate": 9.999083491699281e-05, "loss": 3.7845413208007814, "step": 83580 }, { "epoch": 0.0039, "grad_norm": 0.2991958260536194, "learning_rate": 9.999051561003123e-05, "loss": 3.7527462005615235, "step": 83590 }, { "epoch": 0.004, "grad_norm": 0.2936054766178131, "learning_rate": 9.999019083618202e-05, "loss": 3.7372978210449217, "step": 83600 }, { "epoch": 0.0041, "grad_norm": 0.30992016196250916, "learning_rate": 9.99898605954807e-05, "loss": 3.7773033142089845, "step": 83610 }, { "epoch": 0.0042, "grad_norm": 0.31956759095191956, "learning_rate": 9.998952488796338e-05, "loss": 3.7358421325683593, "step": 83620 }, { "epoch": 0.0043, "grad_norm": 0.2954537868499756, "learning_rate": 9.998918371366676e-05, "loss": 3.8497447967529297, "step": 83630 }, { "epoch": 0.0044, "grad_norm": 0.33330878615379333, "learning_rate": 9.99888370726282e-05, "loss": 3.753360366821289, "step": 83640 }, { "epoch": 0.0045, "grad_norm": 0.3561699092388153, "learning_rate": 9.998848496488556e-05, "loss": 3.7192745208740234, "step": 83650 }, { "epoch": 0.0046, "grad_norm": 0.3320558965206146, "learning_rate": 9.998812739047736e-05, "loss": 3.7962955474853515, "step": 83660 }, { "epoch": 0.0047, "grad_norm": 0.3007565140724182, "learning_rate": 9.99877643494427e-05, "loss": 3.728927230834961, "step": 83670 }, { "epoch": 0.0048, "grad_norm": 0.3343304395675659, "learning_rate": 9.998739584182128e-05, "loss": 3.776163864135742, "step": 83680 }, { "epoch": 0.0049, "grad_norm": 0.29734089970588684, "learning_rate": 9.998702186765342e-05, "loss": 3.9177711486816404, "step": 83690 }, { "epoch": 0.005, "grad_norm": 0.31020456552505493, "learning_rate": 9.998664242698e-05, "loss": 3.7417606353759765, "step": 83700 }, { "epoch": 0.0051, "grad_norm": 0.29213470220565796, "learning_rate": 9.998625751984251e-05, "loss": 3.729521560668945, "step": 83710 }, { "epoch": 0.0052, "grad_norm": 0.2952626347541809, "learning_rate": 9.998586714628307e-05, "loss": 3.7422054290771483, "step": 83720 }, { "epoch": 0.0053, "grad_norm": 0.6679181456565857, "learning_rate": 9.998547130634432e-05, "loss": 3.7184337615966796, "step": 83730 }, { "epoch": 0.0054, "grad_norm": 0.29027095437049866, "learning_rate": 9.99850700000696e-05, "loss": 3.6971817016601562, "step": 83740 }, { "epoch": 0.0055, "grad_norm": 0.30800989270210266, "learning_rate": 9.998466322750278e-05, "loss": 3.7443538665771485, "step": 83750 }, { "epoch": 0.0056, "grad_norm": 0.3017495274543762, "learning_rate": 9.998425098868834e-05, "loss": 3.7538970947265624, "step": 83760 }, { "epoch": 0.0057, "grad_norm": 0.31099507212638855, "learning_rate": 9.998383328367136e-05, "loss": 3.6968109130859377, "step": 83770 }, { "epoch": 0.0058, "grad_norm": 0.30337584018707275, "learning_rate": 9.99834101124975e-05, "loss": 3.7189136505126954, "step": 83780 }, { "epoch": 0.0059, "grad_norm": 0.29351165890693665, "learning_rate": 9.998298147521309e-05, "loss": 3.7206035614013673, "step": 83790 }, { "epoch": 0.006, "grad_norm": 0.2851601839065552, "learning_rate": 9.998254737186496e-05, "loss": 3.7244026184082033, "step": 83800 }, { "epoch": 0.0061, "grad_norm": 0.2841717302799225, "learning_rate": 9.99821078025006e-05, "loss": 3.696475601196289, "step": 83810 }, { "epoch": 0.0062, "grad_norm": 0.31163260340690613, "learning_rate": 9.998166276716807e-05, "loss": 3.830350875854492, "step": 83820 }, { "epoch": 0.0063, "grad_norm": 0.31767481565475464, "learning_rate": 9.998121226591606e-05, "loss": 3.758163833618164, "step": 83830 }, { "epoch": 0.0064, "grad_norm": 0.3415278196334839, "learning_rate": 9.998075629879382e-05, "loss": 3.830966567993164, "step": 83840 }, { "epoch": 0.0065, "grad_norm": 0.41317543387413025, "learning_rate": 9.99802948658512e-05, "loss": 3.706676483154297, "step": 83850 }, { "epoch": 0.0066, "grad_norm": 0.28475648164749146, "learning_rate": 9.99798279671387e-05, "loss": 3.707737350463867, "step": 83860 }, { "epoch": 0.0067, "grad_norm": 0.30963894724845886, "learning_rate": 9.997935560270734e-05, "loss": 3.709725570678711, "step": 83870 }, { "epoch": 0.0068, "grad_norm": 0.27574118971824646, "learning_rate": 9.997887777260879e-05, "loss": 3.7120189666748047, "step": 83880 }, { "epoch": 0.0069, "grad_norm": 0.2835499048233032, "learning_rate": 9.997839447689532e-05, "loss": 3.7362274169921874, "step": 83890 }, { "epoch": 0.007, "grad_norm": 0.3679351806640625, "learning_rate": 9.997790571561978e-05, "loss": 3.7187286376953126, "step": 83900 }, { "epoch": 0.0071, "grad_norm": 0.29465362429618835, "learning_rate": 9.99774114888356e-05, "loss": 3.6762458801269533, "step": 83910 }, { "epoch": 0.0072, "grad_norm": 0.30970558524131775, "learning_rate": 9.997691179659684e-05, "loss": 3.745528793334961, "step": 83920 }, { "epoch": 0.0073, "grad_norm": 0.3203109800815582, "learning_rate": 9.997640663895815e-05, "loss": 3.799913024902344, "step": 83930 }, { "epoch": 0.0074, "grad_norm": 0.3300760090351105, "learning_rate": 9.997589601597477e-05, "loss": 3.7185848236083983, "step": 83940 }, { "epoch": 0.0075, "grad_norm": 0.28454458713531494, "learning_rate": 9.997537992770252e-05, "loss": 3.7427799224853517, "step": 83950 }, { "epoch": 0.0076, "grad_norm": 0.2869153320789337, "learning_rate": 9.997485837419788e-05, "loss": 3.6875835418701173, "step": 83960 }, { "epoch": 0.0077, "grad_norm": 0.2666051983833313, "learning_rate": 9.997433135551786e-05, "loss": 3.710926055908203, "step": 83970 }, { "epoch": 0.0078, "grad_norm": 0.7562700510025024, "learning_rate": 9.997379887172009e-05, "loss": 3.8051422119140623, "step": 83980 }, { "epoch": 0.0079, "grad_norm": 0.3042979836463928, "learning_rate": 9.997326092286281e-05, "loss": 3.786056137084961, "step": 83990 }, { "epoch": 0.008, "grad_norm": 0.32059091329574585, "learning_rate": 9.997271750900486e-05, "loss": 3.7323928833007813, "step": 84000 }, { "epoch": 0.0081, "grad_norm": 0.43642181158065796, "learning_rate": 9.997216863020565e-05, "loss": 3.683271026611328, "step": 84010 }, { "epoch": 0.0082, "grad_norm": 0.4332754611968994, "learning_rate": 9.99716142865252e-05, "loss": 3.710392379760742, "step": 84020 }, { "epoch": 0.0083, "grad_norm": 0.3230162262916565, "learning_rate": 9.997105447802415e-05, "loss": 3.790265655517578, "step": 84030 }, { "epoch": 0.0084, "grad_norm": 0.2865377962589264, "learning_rate": 9.997048920476373e-05, "loss": 3.7654045104980467, "step": 84040 }, { "epoch": 0.0085, "grad_norm": 0.2923087477684021, "learning_rate": 9.996991846680572e-05, "loss": 3.7558765411376953, "step": 84050 }, { "epoch": 0.0086, "grad_norm": 0.27676334977149963, "learning_rate": 9.996934226421257e-05, "loss": 3.7240447998046875, "step": 84060 }, { "epoch": 0.0087, "grad_norm": 0.29819369316101074, "learning_rate": 9.996876059704726e-05, "loss": 3.696995162963867, "step": 84070 }, { "epoch": 0.0088, "grad_norm": 0.43039318919181824, "learning_rate": 9.996817346537343e-05, "loss": 3.6725696563720702, "step": 84080 }, { "epoch": 0.0089, "grad_norm": 0.3873271346092224, "learning_rate": 9.996758086925526e-05, "loss": 3.6867134094238283, "step": 84090 }, { "epoch": 0.009, "grad_norm": 0.291673481464386, "learning_rate": 9.996698280875759e-05, "loss": 3.7313922882080077, "step": 84100 }, { "epoch": 0.0091, "grad_norm": 0.4599123001098633, "learning_rate": 9.99663792839458e-05, "loss": 3.7317237854003906, "step": 84110 }, { "epoch": 0.0092, "grad_norm": 0.36091652512550354, "learning_rate": 9.99657702948859e-05, "loss": 3.692718505859375, "step": 84120 }, { "epoch": 0.0093, "grad_norm": 0.2885626256465912, "learning_rate": 9.996515584164448e-05, "loss": 3.709298324584961, "step": 84130 }, { "epoch": 0.0094, "grad_norm": 0.2739655673503876, "learning_rate": 9.996453592428873e-05, "loss": 3.667826461791992, "step": 84140 }, { "epoch": 0.0095, "grad_norm": 0.2908301055431366, "learning_rate": 9.996391054288646e-05, "loss": 3.874862289428711, "step": 84150 }, { "epoch": 0.0096, "grad_norm": 0.2993263900279999, "learning_rate": 9.996327969750605e-05, "loss": 3.706760787963867, "step": 84160 }, { "epoch": 0.0097, "grad_norm": 0.29502788186073303, "learning_rate": 9.996264338821649e-05, "loss": 3.6855545043945312, "step": 84170 }, { "epoch": 0.0098, "grad_norm": 0.27505671977996826, "learning_rate": 9.996200161508735e-05, "loss": 3.6944915771484377, "step": 84180 }, { "epoch": 0.0099, "grad_norm": 0.2615220844745636, "learning_rate": 9.996135437818885e-05, "loss": 3.7221439361572264, "step": 84190 }, { "epoch": 0.01, "grad_norm": 0.29790231585502625, "learning_rate": 9.996070167759175e-05, "loss": 3.7267841339111327, "step": 84200 }, { "epoch": 0.0101, "grad_norm": 0.3563947081565857, "learning_rate": 9.996004351336743e-05, "loss": 3.8453121185302734, "step": 84210 }, { "epoch": 0.0102, "grad_norm": 1.035010576248169, "learning_rate": 9.995937988558785e-05, "loss": 3.9560173034667967, "step": 84220 }, { "epoch": 0.0103, "grad_norm": 0.26476454734802246, "learning_rate": 9.995871079432561e-05, "loss": 3.6924465179443358, "step": 84230 }, { "epoch": 0.0104, "grad_norm": 0.28808432817459106, "learning_rate": 9.995803623965389e-05, "loss": 3.7619529724121095, "step": 84240 }, { "epoch": 0.0105, "grad_norm": 0.27497121691703796, "learning_rate": 9.995735622164641e-05, "loss": 3.6973804473876952, "step": 84250 }, { "epoch": 0.0106, "grad_norm": 0.2922205924987793, "learning_rate": 9.995667074037758e-05, "loss": 3.7086257934570312, "step": 84260 }, { "epoch": 0.0107, "grad_norm": 0.28231361508369446, "learning_rate": 9.995597979592232e-05, "loss": 3.738058090209961, "step": 84270 }, { "epoch": 0.0108, "grad_norm": 0.2753177583217621, "learning_rate": 9.995528338835625e-05, "loss": 3.6708652496337892, "step": 84280 }, { "epoch": 0.0109, "grad_norm": 0.3010140359401703, "learning_rate": 9.995458151775547e-05, "loss": 3.7018722534179687, "step": 84290 }, { "epoch": 0.011, "grad_norm": 0.2673659324645996, "learning_rate": 9.995387418419677e-05, "loss": 3.6982131958007813, "step": 84300 }, { "epoch": 0.0111, "grad_norm": 0.3174901008605957, "learning_rate": 9.99531613877575e-05, "loss": 3.708171081542969, "step": 84310 }, { "epoch": 0.0112, "grad_norm": 0.37919145822525024, "learning_rate": 9.995244312851559e-05, "loss": 3.7034980773925783, "step": 84320 }, { "epoch": 0.0113, "grad_norm": 0.33328044414520264, "learning_rate": 9.995171940654961e-05, "loss": 3.6426422119140627, "step": 84330 }, { "epoch": 0.0114, "grad_norm": 0.2768639326095581, "learning_rate": 9.995099022193871e-05, "loss": 3.688552474975586, "step": 84340 }, { "epoch": 0.0115, "grad_norm": 0.29498782753944397, "learning_rate": 9.995025557476261e-05, "loss": 3.659234619140625, "step": 84350 }, { "epoch": 0.0116, "grad_norm": 0.29603710770606995, "learning_rate": 9.994951546510165e-05, "loss": 3.7908111572265626, "step": 84360 }, { "epoch": 0.0117, "grad_norm": 0.3008792996406555, "learning_rate": 9.994876989303679e-05, "loss": 3.7032970428466796, "step": 84370 }, { "epoch": 0.0118, "grad_norm": 0.36400842666625977, "learning_rate": 9.994801885864955e-05, "loss": 3.799905014038086, "step": 84380 }, { "epoch": 0.0119, "grad_norm": 0.3740883767604828, "learning_rate": 9.994726236202205e-05, "loss": 3.721433639526367, "step": 84390 }, { "epoch": 0.012, "grad_norm": 0.26394209265708923, "learning_rate": 9.994650040323704e-05, "loss": 3.6681087493896483, "step": 84400 }, { "epoch": 0.0121, "grad_norm": 0.28201553225517273, "learning_rate": 9.994573298237784e-05, "loss": 3.6220928192138673, "step": 84410 }, { "epoch": 0.0122, "grad_norm": 0.29811936616897583, "learning_rate": 9.994496009952837e-05, "loss": 3.670298767089844, "step": 84420 }, { "epoch": 0.0123, "grad_norm": 0.29378166794776917, "learning_rate": 9.994418175477316e-05, "loss": 3.6430587768554688, "step": 84430 }, { "epoch": 0.0124, "grad_norm": 0.2813373804092407, "learning_rate": 9.994339794819733e-05, "loss": 3.6950450897216798, "step": 84440 }, { "epoch": 0.0125, "grad_norm": 0.26766225695610046, "learning_rate": 9.994260867988658e-05, "loss": 3.687444305419922, "step": 84450 }, { "epoch": 0.0126, "grad_norm": 0.2912333905696869, "learning_rate": 9.994181394992723e-05, "loss": 3.6485137939453125, "step": 84460 }, { "epoch": 0.0127, "grad_norm": 0.29468369483947754, "learning_rate": 9.994101375840618e-05, "loss": 3.6847896575927734, "step": 84470 }, { "epoch": 0.0128, "grad_norm": 0.30274367332458496, "learning_rate": 9.994020810541098e-05, "loss": 3.6330875396728515, "step": 84480 }, { "epoch": 0.0129, "grad_norm": 0.3261053264141083, "learning_rate": 9.99393969910297e-05, "loss": 3.7124881744384766, "step": 84490 }, { "epoch": 0.013, "grad_norm": 0.2950016260147095, "learning_rate": 9.993858041535104e-05, "loss": 3.651884841918945, "step": 84500 }, { "epoch": 0.0131, "grad_norm": 0.3526657819747925, "learning_rate": 9.99377583784643e-05, "loss": 3.6653316497802733, "step": 84510 }, { "epoch": 0.0132, "grad_norm": 0.28628677129745483, "learning_rate": 9.993693088045939e-05, "loss": 3.7038524627685545, "step": 84520 }, { "epoch": 0.0133, "grad_norm": 0.2856554090976715, "learning_rate": 9.99360979214268e-05, "loss": 3.6831943511962892, "step": 84530 }, { "epoch": 0.0134, "grad_norm": 0.2904079854488373, "learning_rate": 9.99352595014576e-05, "loss": 3.63916130065918, "step": 84540 }, { "epoch": 0.0135, "grad_norm": 0.3895551562309265, "learning_rate": 9.993441562064354e-05, "loss": 3.6735130310058595, "step": 84550 }, { "epoch": 0.0136, "grad_norm": 0.2685251235961914, "learning_rate": 9.993356627907685e-05, "loss": 3.6408214569091797, "step": 84560 }, { "epoch": 0.0137, "grad_norm": 0.26372188329696655, "learning_rate": 9.99327114768504e-05, "loss": 3.6400081634521486, "step": 84570 }, { "epoch": 0.0138, "grad_norm": 0.2654428780078888, "learning_rate": 9.99318512140577e-05, "loss": 3.648508071899414, "step": 84580 }, { "epoch": 0.0139, "grad_norm": 0.2707081735134125, "learning_rate": 9.993098549079284e-05, "loss": 3.610399627685547, "step": 84590 }, { "epoch": 0.014, "grad_norm": 0.25929567217826843, "learning_rate": 9.993011430715047e-05, "loss": 3.6582550048828124, "step": 84600 }, { "epoch": 0.0141, "grad_norm": 0.28501084446907043, "learning_rate": 9.992923766322586e-05, "loss": 3.8640674591064452, "step": 84610 }, { "epoch": 0.0142, "grad_norm": 0.27245280146598816, "learning_rate": 9.99283555591149e-05, "loss": 3.7064876556396484, "step": 84620 }, { "epoch": 0.0143, "grad_norm": 0.26817578077316284, "learning_rate": 9.992746799491404e-05, "loss": 3.6717796325683594, "step": 84630 }, { "epoch": 0.0144, "grad_norm": 0.28869614005088806, "learning_rate": 9.992657497072033e-05, "loss": 3.661438751220703, "step": 84640 }, { "epoch": 0.0145, "grad_norm": 0.25756171345710754, "learning_rate": 9.992567648663147e-05, "loss": 3.7627632141113283, "step": 84650 }, { "epoch": 0.0146, "grad_norm": 0.2554946541786194, "learning_rate": 9.992477254274568e-05, "loss": 3.6389511108398436, "step": 84660 }, { "epoch": 0.0147, "grad_norm": 0.27790117263793945, "learning_rate": 9.992386313916183e-05, "loss": 3.614088439941406, "step": 84670 }, { "epoch": 0.0148, "grad_norm": 0.6109986305236816, "learning_rate": 9.992294827597934e-05, "loss": 3.703929138183594, "step": 84680 }, { "epoch": 0.0149, "grad_norm": 0.2710428833961487, "learning_rate": 9.992202795329831e-05, "loss": 3.7939647674560546, "step": 84690 }, { "epoch": 0.015, "grad_norm": 0.27565139532089233, "learning_rate": 9.992110217121936e-05, "loss": 3.629970169067383, "step": 84700 }, { "epoch": 0.0151, "grad_norm": 0.27539974451065063, "learning_rate": 9.992017092984372e-05, "loss": 3.619001770019531, "step": 84710 }, { "epoch": 0.0152, "grad_norm": 0.2647361755371094, "learning_rate": 9.991923422927326e-05, "loss": 3.6917633056640624, "step": 84720 }, { "epoch": 0.0153, "grad_norm": 0.6230190992355347, "learning_rate": 9.991829206961037e-05, "loss": 3.7478443145751954, "step": 84730 }, { "epoch": 0.0154, "grad_norm": 0.2780240774154663, "learning_rate": 9.991734445095813e-05, "loss": 3.6476226806640626, "step": 84740 }, { "epoch": 0.0155, "grad_norm": 0.35595566034317017, "learning_rate": 9.991639137342015e-05, "loss": 3.573978042602539, "step": 84750 }, { "epoch": 0.0156, "grad_norm": 0.2836577594280243, "learning_rate": 9.991543283710064e-05, "loss": 3.617045593261719, "step": 84760 }, { "epoch": 0.0157, "grad_norm": 0.2492760419845581, "learning_rate": 9.991446884210445e-05, "loss": 3.6304271697998045, "step": 84770 }, { "epoch": 0.0158, "grad_norm": 0.2547227740287781, "learning_rate": 9.9913499388537e-05, "loss": 3.6432247161865234, "step": 84780 }, { "epoch": 0.0159, "grad_norm": 0.27947524189949036, "learning_rate": 9.99125244765043e-05, "loss": 3.6694393157958984, "step": 84790 }, { "epoch": 0.016, "grad_norm": 0.2819245755672455, "learning_rate": 9.991154410611296e-05, "loss": 3.7544319152832033, "step": 84800 }, { "epoch": 0.0161, "grad_norm": 0.26809099316596985, "learning_rate": 9.99105582774702e-05, "loss": 3.6334564208984377, "step": 84810 }, { "epoch": 0.0162, "grad_norm": 0.25060704350471497, "learning_rate": 9.990956699068384e-05, "loss": 3.63934440612793, "step": 84820 }, { "epoch": 0.0163, "grad_norm": 0.26129117608070374, "learning_rate": 9.990857024586224e-05, "loss": 3.6837406158447266, "step": 84830 }, { "epoch": 0.0164, "grad_norm": 0.4481086730957031, "learning_rate": 9.990756804311446e-05, "loss": 3.6479557037353514, "step": 84840 }, { "epoch": 0.0165, "grad_norm": 0.2535252273082733, "learning_rate": 9.990656038255006e-05, "loss": 3.6568538665771486, "step": 84850 }, { "epoch": 0.0166, "grad_norm": 0.2449161857366562, "learning_rate": 9.990554726427926e-05, "loss": 3.6333213806152345, "step": 84860 }, { "epoch": 0.0167, "grad_norm": 0.2439567595720291, "learning_rate": 9.990452868841284e-05, "loss": 3.656516265869141, "step": 84870 }, { "epoch": 0.0168, "grad_norm": 0.26403185725212097, "learning_rate": 9.99035046550622e-05, "loss": 3.6358219146728517, "step": 84880 }, { "epoch": 0.0169, "grad_norm": 0.8884915113449097, "learning_rate": 9.99024751643393e-05, "loss": 3.634407806396484, "step": 84890 }, { "epoch": 0.017, "grad_norm": 0.25160226225852966, "learning_rate": 9.990144021635677e-05, "loss": 3.6189918518066406, "step": 84900 }, { "epoch": 0.0171, "grad_norm": 0.287718802690506, "learning_rate": 9.990039981122775e-05, "loss": 3.6347091674804686, "step": 84910 }, { "epoch": 0.0172, "grad_norm": 0.26645761728286743, "learning_rate": 9.989935394906602e-05, "loss": 3.5592464447021483, "step": 84920 }, { "epoch": 0.0173, "grad_norm": 0.26977407932281494, "learning_rate": 9.989830262998598e-05, "loss": 3.738728713989258, "step": 84930 }, { "epoch": 0.0174, "grad_norm": 0.36819988489151, "learning_rate": 9.989724585410259e-05, "loss": 3.626963806152344, "step": 84940 }, { "epoch": 0.0175, "grad_norm": 0.49660632014274597, "learning_rate": 9.989618362153139e-05, "loss": 3.7604835510253904, "step": 84950 }, { "epoch": 0.0176, "grad_norm": 0.28116080164909363, "learning_rate": 9.989511593238859e-05, "loss": 3.634185791015625, "step": 84960 }, { "epoch": 0.0177, "grad_norm": 0.26776161789894104, "learning_rate": 9.98940427867909e-05, "loss": 3.652395248413086, "step": 84970 }, { "epoch": 0.0178, "grad_norm": 0.2534007132053375, "learning_rate": 9.989296418485573e-05, "loss": 3.5961162567138674, "step": 84980 }, { "epoch": 0.0179, "grad_norm": 0.2513788044452667, "learning_rate": 9.989188012670101e-05, "loss": 3.6286182403564453, "step": 84990 }, { "epoch": 0.018, "grad_norm": 0.26208066940307617, "learning_rate": 9.989079061244528e-05, "loss": 3.5902149200439455, "step": 85000 }, { "epoch": 0.0181, "grad_norm": 0.2678416073322296, "learning_rate": 9.988969564220769e-05, "loss": 3.611722946166992, "step": 85010 }, { "epoch": 0.0182, "grad_norm": 0.258638471364975, "learning_rate": 9.988859521610801e-05, "loss": 3.706229019165039, "step": 85020 }, { "epoch": 0.0183, "grad_norm": 0.2703403830528259, "learning_rate": 9.988748933426656e-05, "loss": 3.632783126831055, "step": 85030 }, { "epoch": 0.0184, "grad_norm": 0.2524788975715637, "learning_rate": 9.988637799680428e-05, "loss": 3.630057144165039, "step": 85040 }, { "epoch": 0.0185, "grad_norm": 0.2823765277862549, "learning_rate": 9.98852612038427e-05, "loss": 3.5970684051513673, "step": 85050 }, { "epoch": 0.0186, "grad_norm": 0.40357911586761475, "learning_rate": 9.988413895550397e-05, "loss": 3.627879333496094, "step": 85060 }, { "epoch": 0.0187, "grad_norm": 0.254033625125885, "learning_rate": 9.98830112519108e-05, "loss": 3.6379791259765626, "step": 85070 }, { "epoch": 0.0188, "grad_norm": 0.2334320992231369, "learning_rate": 9.98818780931865e-05, "loss": 3.6307395935058593, "step": 85080 }, { "epoch": 0.0189, "grad_norm": 0.559700071811676, "learning_rate": 9.988073947945502e-05, "loss": 3.5570709228515627, "step": 85090 }, { "epoch": 0.019, "grad_norm": 0.24874722957611084, "learning_rate": 9.987959541084087e-05, "loss": 3.6313430786132814, "step": 85100 }, { "epoch": 0.0191, "grad_norm": 0.27919065952301025, "learning_rate": 9.987844588746915e-05, "loss": 3.5931961059570314, "step": 85110 }, { "epoch": 0.0192, "grad_norm": 0.26230403780937195, "learning_rate": 9.987729090946558e-05, "loss": 3.6434871673583986, "step": 85120 }, { "epoch": 0.0193, "grad_norm": 0.3103668689727783, "learning_rate": 9.987613047695647e-05, "loss": 3.6431617736816406, "step": 85130 }, { "epoch": 0.0194, "grad_norm": 0.2713069021701813, "learning_rate": 9.987496459006871e-05, "loss": 3.7358371734619142, "step": 85140 }, { "epoch": 0.0195, "grad_norm": 0.25538644194602966, "learning_rate": 9.987379324892982e-05, "loss": 3.6248268127441405, "step": 85150 }, { "epoch": 0.0196, "grad_norm": 0.33541756868362427, "learning_rate": 9.987261645366788e-05, "loss": 3.661859130859375, "step": 85160 }, { "epoch": 0.0197, "grad_norm": 0.2498832643032074, "learning_rate": 9.987143420441158e-05, "loss": 3.6815727233886717, "step": 85170 }, { "epoch": 0.0198, "grad_norm": 0.2820783257484436, "learning_rate": 9.987024650129022e-05, "loss": 3.5976947784423827, "step": 85180 }, { "epoch": 0.0199, "grad_norm": 0.29067307710647583, "learning_rate": 9.986905334443368e-05, "loss": 3.6827850341796875, "step": 85190 }, { "epoch": 0.02, "grad_norm": 0.24741661548614502, "learning_rate": 9.986785473397245e-05, "loss": 3.6277320861816404, "step": 85200 }, { "epoch": 0.0201, "grad_norm": 0.27294591069221497, "learning_rate": 9.98666506700376e-05, "loss": 3.6579833984375, "step": 85210 }, { "epoch": 0.0202, "grad_norm": 0.2656424045562744, "learning_rate": 9.986544115276081e-05, "loss": 3.627397918701172, "step": 85220 }, { "epoch": 0.0203, "grad_norm": 0.2507568597793579, "learning_rate": 9.986422618227433e-05, "loss": 3.6089473724365235, "step": 85230 }, { "epoch": 0.0204, "grad_norm": 0.25992894172668457, "learning_rate": 9.986300575871106e-05, "loss": 3.6093997955322266, "step": 85240 }, { "epoch": 0.0205, "grad_norm": 0.2580110728740692, "learning_rate": 9.986177988220444e-05, "loss": 3.602360153198242, "step": 85250 }, { "epoch": 0.0206, "grad_norm": 0.44160109758377075, "learning_rate": 9.986054855288856e-05, "loss": 3.6124210357666016, "step": 85260 }, { "epoch": 0.0207, "grad_norm": 0.28597763180732727, "learning_rate": 9.985931177089802e-05, "loss": 3.369567108154297, "step": 85270 }, { "epoch": 0.0208, "grad_norm": 0.2390611469745636, "learning_rate": 9.985806953636814e-05, "loss": 3.574873352050781, "step": 85280 }, { "epoch": 0.0209, "grad_norm": 0.2689356803894043, "learning_rate": 9.985682184943471e-05, "loss": 3.6195514678955076, "step": 85290 }, { "epoch": 0.021, "grad_norm": 0.3069749176502228, "learning_rate": 9.98555687102342e-05, "loss": 3.5628311157226564, "step": 85300 }, { "epoch": 0.0211, "grad_norm": 0.25516900420188904, "learning_rate": 9.985431011890367e-05, "loss": 3.5898296356201174, "step": 85310 }, { "epoch": 0.0212, "grad_norm": 0.2481095790863037, "learning_rate": 9.985304607558075e-05, "loss": 3.6141582489013673, "step": 85320 }, { "epoch": 0.0213, "grad_norm": 0.819419801235199, "learning_rate": 9.985177658040364e-05, "loss": 3.8181838989257812, "step": 85330 }, { "epoch": 0.0214, "grad_norm": 0.2710549831390381, "learning_rate": 9.985050163351119e-05, "loss": 3.7206100463867187, "step": 85340 }, { "epoch": 0.0215, "grad_norm": 0.24863991141319275, "learning_rate": 9.984922123504286e-05, "loss": 3.6582103729248048, "step": 85350 }, { "epoch": 0.0216, "grad_norm": 0.2624618411064148, "learning_rate": 9.984793538513862e-05, "loss": 3.6053421020507814, "step": 85360 }, { "epoch": 0.0217, "grad_norm": 0.28069108724594116, "learning_rate": 9.984664408393912e-05, "loss": 3.6144382476806642, "step": 85370 }, { "epoch": 0.0218, "grad_norm": 0.2532116174697876, "learning_rate": 9.984534733158556e-05, "loss": 3.6202327728271486, "step": 85380 }, { "epoch": 0.0219, "grad_norm": 0.29506364464759827, "learning_rate": 9.984404512821977e-05, "loss": 3.702600860595703, "step": 85390 }, { "epoch": 0.022, "grad_norm": 0.25815752148628235, "learning_rate": 9.984273747398411e-05, "loss": 3.656900405883789, "step": 85400 }, { "epoch": 0.0221, "grad_norm": 0.2533004581928253, "learning_rate": 9.984142436902165e-05, "loss": 3.607746124267578, "step": 85410 }, { "epoch": 0.0222, "grad_norm": 0.2563793361186981, "learning_rate": 9.984010581347596e-05, "loss": 3.665967559814453, "step": 85420 }, { "epoch": 0.0223, "grad_norm": 0.2633318603038788, "learning_rate": 9.983878180749121e-05, "loss": 3.5977550506591798, "step": 85430 }, { "epoch": 0.0224, "grad_norm": 0.2543111741542816, "learning_rate": 9.983745235121222e-05, "loss": 3.620417022705078, "step": 85440 }, { "epoch": 0.0225, "grad_norm": 0.31046542525291443, "learning_rate": 9.983611744478438e-05, "loss": 3.611802673339844, "step": 85450 }, { "epoch": 0.0226, "grad_norm": 0.32984763383865356, "learning_rate": 9.983477708835365e-05, "loss": 3.659274673461914, "step": 85460 }, { "epoch": 0.0227, "grad_norm": 0.2382606565952301, "learning_rate": 9.983343128206664e-05, "loss": 3.6407955169677733, "step": 85470 }, { "epoch": 0.0228, "grad_norm": 0.2767845690250397, "learning_rate": 9.983208002607049e-05, "loss": 3.59202880859375, "step": 85480 }, { "epoch": 0.0229, "grad_norm": 0.26981160044670105, "learning_rate": 9.9830723320513e-05, "loss": 3.5599910736083986, "step": 85490 }, { "epoch": 0.023, "grad_norm": 0.281143456697464, "learning_rate": 9.982936116554254e-05, "loss": 3.5864818572998045, "step": 85500 }, { "epoch": 0.0231, "grad_norm": 0.2885102927684784, "learning_rate": 9.982799356130803e-05, "loss": 3.6041526794433594, "step": 85510 }, { "epoch": 0.0232, "grad_norm": 0.30228304862976074, "learning_rate": 9.982662050795908e-05, "loss": 3.591915512084961, "step": 85520 }, { "epoch": 0.0233, "grad_norm": 0.25424811244010925, "learning_rate": 9.982524200564583e-05, "loss": 3.6255348205566404, "step": 85530 }, { "epoch": 0.0234, "grad_norm": 0.2461751252412796, "learning_rate": 9.982385805451901e-05, "loss": 3.600880813598633, "step": 85540 }, { "epoch": 0.0235, "grad_norm": 0.2418268918991089, "learning_rate": 9.982246865472998e-05, "loss": 3.6520408630371093, "step": 85550 }, { "epoch": 0.0236, "grad_norm": 0.24845528602600098, "learning_rate": 9.982107380643069e-05, "loss": 3.5574569702148438, "step": 85560 }, { "epoch": 0.0237, "grad_norm": 0.25059717893600464, "learning_rate": 9.981967350977368e-05, "loss": 3.5402393341064453, "step": 85570 }, { "epoch": 0.0238, "grad_norm": 0.28324198722839355, "learning_rate": 9.981826776491208e-05, "loss": 3.595504379272461, "step": 85580 }, { "epoch": 0.0239, "grad_norm": 0.24451854825019836, "learning_rate": 9.98168565719996e-05, "loss": 3.5850383758544924, "step": 85590 }, { "epoch": 0.024, "grad_norm": 0.267659068107605, "learning_rate": 9.98154399311906e-05, "loss": 3.578251266479492, "step": 85600 }, { "epoch": 0.0241, "grad_norm": 0.35833561420440674, "learning_rate": 9.981401784263997e-05, "loss": 3.620841217041016, "step": 85610 }, { "epoch": 0.0242, "grad_norm": 0.2442755103111267, "learning_rate": 9.981259030650326e-05, "loss": 3.587302017211914, "step": 85620 }, { "epoch": 0.0243, "grad_norm": 0.24890415370464325, "learning_rate": 9.981115732293655e-05, "loss": 3.6900665283203127, "step": 85630 }, { "epoch": 0.0244, "grad_norm": 0.419906884431839, "learning_rate": 9.980971889209659e-05, "loss": 3.609457015991211, "step": 85640 }, { "epoch": 0.0245, "grad_norm": 0.24704444408416748, "learning_rate": 9.980827501414064e-05, "loss": 3.5770389556884767, "step": 85650 }, { "epoch": 0.0246, "grad_norm": 0.27113986015319824, "learning_rate": 9.980682568922663e-05, "loss": 3.579428863525391, "step": 85660 }, { "epoch": 0.0247, "grad_norm": 0.2433699667453766, "learning_rate": 9.980537091751304e-05, "loss": 3.6096595764160155, "step": 85670 }, { "epoch": 0.0248, "grad_norm": 0.25338035821914673, "learning_rate": 9.980391069915897e-05, "loss": 3.6539871215820314, "step": 85680 }, { "epoch": 0.0249, "grad_norm": 0.26990896463394165, "learning_rate": 9.98024450343241e-05, "loss": 3.551648712158203, "step": 85690 }, { "epoch": 0.025, "grad_norm": 0.6481966972351074, "learning_rate": 9.980097392316872e-05, "loss": 3.6198158264160156, "step": 85700 }, { "epoch": 0.0251, "grad_norm": 0.26027655601501465, "learning_rate": 9.97994973658537e-05, "loss": 3.7159801483154298, "step": 85710 }, { "epoch": 0.0252, "grad_norm": 0.23775680363178253, "learning_rate": 9.979801536254054e-05, "loss": 3.5569076538085938, "step": 85720 }, { "epoch": 0.0253, "grad_norm": 0.2288324236869812, "learning_rate": 9.979652791339127e-05, "loss": 3.6577377319335938, "step": 85730 }, { "epoch": 0.0254, "grad_norm": 0.23314149677753448, "learning_rate": 9.97950350185686e-05, "loss": 3.582255554199219, "step": 85740 }, { "epoch": 0.0255, "grad_norm": 0.2392662763595581, "learning_rate": 9.979353667823574e-05, "loss": 3.577046203613281, "step": 85750 }, { "epoch": 0.0256, "grad_norm": 0.3036603629589081, "learning_rate": 9.979203289255658e-05, "loss": 3.534611129760742, "step": 85760 }, { "epoch": 0.0257, "grad_norm": 0.2220892608165741, "learning_rate": 9.979052366169557e-05, "loss": 3.5335983276367187, "step": 85770 }, { "epoch": 0.0258, "grad_norm": 0.25794705748558044, "learning_rate": 9.978900898581775e-05, "loss": 3.6444496154785155, "step": 85780 }, { "epoch": 0.0259, "grad_norm": 0.2427547127008438, "learning_rate": 9.978748886508875e-05, "loss": 3.5649723052978515, "step": 85790 }, { "epoch": 0.026, "grad_norm": 0.24454070627689362, "learning_rate": 9.978596329967484e-05, "loss": 3.541699981689453, "step": 85800 }, { "epoch": 0.0261, "grad_norm": 0.2416403740644455, "learning_rate": 9.978443228974284e-05, "loss": 3.5596855163574217, "step": 85810 }, { "epoch": 0.0262, "grad_norm": 0.23428606986999512, "learning_rate": 9.978289583546015e-05, "loss": 3.577880859375, "step": 85820 }, { "epoch": 0.0263, "grad_norm": 0.2251688688993454, "learning_rate": 9.978135393699484e-05, "loss": 3.5454475402832033, "step": 85830 }, { "epoch": 0.0264, "grad_norm": 0.2377568632364273, "learning_rate": 9.977980659451548e-05, "loss": 3.524850845336914, "step": 85840 }, { "epoch": 0.0265, "grad_norm": 0.2254606932401657, "learning_rate": 9.977825380819135e-05, "loss": 3.572825622558594, "step": 85850 }, { "epoch": 0.0266, "grad_norm": 0.2635759115219116, "learning_rate": 9.97766955781922e-05, "loss": 3.543880081176758, "step": 85860 }, { "epoch": 0.0267, "grad_norm": 0.23378437757492065, "learning_rate": 9.977513190468848e-05, "loss": 3.5434852600097657, "step": 85870 }, { "epoch": 0.0268, "grad_norm": 0.2603974938392639, "learning_rate": 9.977356278785116e-05, "loss": 3.587119293212891, "step": 85880 }, { "epoch": 0.0269, "grad_norm": 0.3638036847114563, "learning_rate": 9.977198822785184e-05, "loss": 3.5810420989990233, "step": 85890 }, { "epoch": 0.027, "grad_norm": 0.2505592107772827, "learning_rate": 9.977040822486273e-05, "loss": 3.519091033935547, "step": 85900 }, { "epoch": 0.0271, "grad_norm": 0.27783986926078796, "learning_rate": 9.97688227790566e-05, "loss": 3.562078857421875, "step": 85910 }, { "epoch": 0.0272, "grad_norm": 0.27582547068595886, "learning_rate": 9.976723189060684e-05, "loss": 3.5725261688232424, "step": 85920 }, { "epoch": 0.0273, "grad_norm": 0.2812955677509308, "learning_rate": 9.976563555968742e-05, "loss": 3.586712646484375, "step": 85930 }, { "epoch": 0.0274, "grad_norm": 0.2693924605846405, "learning_rate": 9.976403378647292e-05, "loss": 3.5260692596435548, "step": 85940 }, { "epoch": 0.0275, "grad_norm": 0.23856335878372192, "learning_rate": 9.97624265711385e-05, "loss": 3.5269500732421877, "step": 85950 }, { "epoch": 0.0276, "grad_norm": 0.25507020950317383, "learning_rate": 9.976081391385993e-05, "loss": 3.538066101074219, "step": 85960 }, { "epoch": 0.0277, "grad_norm": 0.26610663533210754, "learning_rate": 9.975919581481356e-05, "loss": 3.543822479248047, "step": 85970 }, { "epoch": 0.0278, "grad_norm": 0.2360590398311615, "learning_rate": 9.975757227417634e-05, "loss": 3.5951602935791014, "step": 85980 }, { "epoch": 0.0279, "grad_norm": 0.23227424919605255, "learning_rate": 9.975594329212586e-05, "loss": 3.53214111328125, "step": 85990 }, { "epoch": 0.028, "grad_norm": 0.2758643925189972, "learning_rate": 9.97543088688402e-05, "loss": 3.5416709899902346, "step": 86000 }, { "epoch": 0.0281, "grad_norm": 0.2786068916320801, "learning_rate": 9.975266900449814e-05, "loss": 3.6856075286865235, "step": 86010 }, { "epoch": 0.0282, "grad_norm": 0.24467229843139648, "learning_rate": 9.975102369927898e-05, "loss": 3.542380142211914, "step": 86020 }, { "epoch": 0.0283, "grad_norm": 0.23049572110176086, "learning_rate": 9.974937295336269e-05, "loss": 3.5651481628417967, "step": 86030 }, { "epoch": 0.0284, "grad_norm": 0.2629784345626831, "learning_rate": 9.974771676692975e-05, "loss": 3.5652305603027346, "step": 86040 }, { "epoch": 0.0285, "grad_norm": 0.23925773799419403, "learning_rate": 9.974605514016131e-05, "loss": 3.544325256347656, "step": 86050 }, { "epoch": 0.0286, "grad_norm": 0.2569584548473358, "learning_rate": 9.974438807323907e-05, "loss": 3.5688503265380858, "step": 86060 }, { "epoch": 0.0287, "grad_norm": 0.23051214218139648, "learning_rate": 9.974271556634535e-05, "loss": 3.523692321777344, "step": 86070 }, { "epoch": 0.0288, "grad_norm": 0.2549886107444763, "learning_rate": 9.974103761966302e-05, "loss": 3.589580535888672, "step": 86080 }, { "epoch": 0.0289, "grad_norm": 0.22411562502384186, "learning_rate": 9.973935423337563e-05, "loss": 3.4966278076171875, "step": 86090 }, { "epoch": 0.029, "grad_norm": 0.4035455584526062, "learning_rate": 9.973766540766722e-05, "loss": 3.628371810913086, "step": 86100 }, { "epoch": 0.0291, "grad_norm": 0.2180011123418808, "learning_rate": 9.97359711427225e-05, "loss": 3.559170150756836, "step": 86110 }, { "epoch": 0.0292, "grad_norm": 0.26745229959487915, "learning_rate": 9.973427143872677e-05, "loss": 3.652248764038086, "step": 86120 }, { "epoch": 0.0293, "grad_norm": 0.3266768157482147, "learning_rate": 9.973256629586589e-05, "loss": 3.612881088256836, "step": 86130 }, { "epoch": 0.0294, "grad_norm": 0.3141908347606659, "learning_rate": 9.973085571432632e-05, "loss": 3.577619171142578, "step": 86140 }, { "epoch": 0.0295, "grad_norm": 0.23842748999595642, "learning_rate": 9.972913969429513e-05, "loss": 3.545775604248047, "step": 86150 }, { "epoch": 0.0296, "grad_norm": 0.22997666895389557, "learning_rate": 9.972741823596e-05, "loss": 3.6512195587158205, "step": 86160 }, { "epoch": 0.0297, "grad_norm": 0.2464122325181961, "learning_rate": 9.972569133950917e-05, "loss": 3.4714424133300783, "step": 86170 }, { "epoch": 0.0298, "grad_norm": 0.23178745806217194, "learning_rate": 9.972395900513151e-05, "loss": 3.5572746276855467, "step": 86180 }, { "epoch": 0.0299, "grad_norm": 0.23654168844223022, "learning_rate": 9.972222123301645e-05, "loss": 3.515637969970703, "step": 86190 }, { "epoch": 0.03, "grad_norm": 0.276481568813324, "learning_rate": 9.972047802335403e-05, "loss": 3.5499847412109373, "step": 86200 }, { "epoch": 0.0301, "grad_norm": 0.24733297526836395, "learning_rate": 9.971872937633488e-05, "loss": 3.5223976135253907, "step": 86210 }, { "epoch": 0.0302, "grad_norm": 0.24473869800567627, "learning_rate": 9.971697529215024e-05, "loss": 3.552435302734375, "step": 86220 }, { "epoch": 0.0303, "grad_norm": 0.2503977417945862, "learning_rate": 9.971521577099192e-05, "loss": 3.5326679229736326, "step": 86230 }, { "epoch": 0.0304, "grad_norm": 0.28221395611763, "learning_rate": 9.971345081305236e-05, "loss": 3.571624755859375, "step": 86240 }, { "epoch": 0.0305, "grad_norm": 0.27032190561294556, "learning_rate": 9.971168041852456e-05, "loss": 3.559336471557617, "step": 86250 }, { "epoch": 0.0306, "grad_norm": 0.23588159680366516, "learning_rate": 9.970990458760215e-05, "loss": 3.5758243560791017, "step": 86260 }, { "epoch": 0.0307, "grad_norm": 0.3419370651245117, "learning_rate": 9.970812332047929e-05, "loss": 3.5643283843994142, "step": 86270 }, { "epoch": 0.0308, "grad_norm": 0.24446982145309448, "learning_rate": 9.97063366173508e-05, "loss": 3.530550003051758, "step": 86280 }, { "epoch": 0.0309, "grad_norm": 0.22315742075443268, "learning_rate": 9.970454447841207e-05, "loss": 3.540903854370117, "step": 86290 }, { "epoch": 0.031, "grad_norm": 0.2406078726053238, "learning_rate": 9.970274690385909e-05, "loss": 3.519917297363281, "step": 86300 }, { "epoch": 0.0311, "grad_norm": 0.2340644747018814, "learning_rate": 9.970094389388844e-05, "loss": 3.495412063598633, "step": 86310 }, { "epoch": 0.0312, "grad_norm": 0.26140597462654114, "learning_rate": 9.969913544869728e-05, "loss": 3.516475296020508, "step": 86320 }, { "epoch": 0.0313, "grad_norm": 0.23784679174423218, "learning_rate": 9.96973215684834e-05, "loss": 3.5184513092041017, "step": 86330 }, { "epoch": 0.0314, "grad_norm": 0.26336389780044556, "learning_rate": 9.969550225344513e-05, "loss": 3.5311885833740235, "step": 86340 }, { "epoch": 0.0315, "grad_norm": 0.7982680201530457, "learning_rate": 9.969367750378147e-05, "loss": 3.6185916900634765, "step": 86350 }, { "epoch": 0.0316, "grad_norm": 0.24621133506298065, "learning_rate": 9.969184731969194e-05, "loss": 3.5918624877929686, "step": 86360 }, { "epoch": 0.0317, "grad_norm": 0.2201436460018158, "learning_rate": 9.96900117013767e-05, "loss": 3.6284595489501954, "step": 86370 }, { "epoch": 0.0318, "grad_norm": 0.26922062039375305, "learning_rate": 9.96881706490365e-05, "loss": 3.5191715240478514, "step": 86380 }, { "epoch": 0.0319, "grad_norm": 0.23400968313217163, "learning_rate": 9.968632416287265e-05, "loss": 3.5420841217041015, "step": 86390 }, { "epoch": 0.032, "grad_norm": 0.3328185975551605, "learning_rate": 9.96844722430871e-05, "loss": 3.5460227966308593, "step": 86400 }, { "epoch": 0.0321, "grad_norm": 0.24629555642604828, "learning_rate": 9.968261488988235e-05, "loss": 3.566570281982422, "step": 86410 }, { "epoch": 0.0322, "grad_norm": 0.22251945734024048, "learning_rate": 9.968075210346155e-05, "loss": 3.551888275146484, "step": 86420 }, { "epoch": 0.0323, "grad_norm": 0.22824624180793762, "learning_rate": 9.967888388402839e-05, "loss": 3.7954971313476564, "step": 86430 }, { "epoch": 0.0324, "grad_norm": 0.23136557638645172, "learning_rate": 9.967701023178717e-05, "loss": 3.493439483642578, "step": 86440 }, { "epoch": 0.0325, "grad_norm": 0.2427946925163269, "learning_rate": 9.967513114694282e-05, "loss": 3.519550323486328, "step": 86450 }, { "epoch": 0.0326, "grad_norm": 0.22986049950122833, "learning_rate": 9.967324662970079e-05, "loss": 3.4548366546630858, "step": 86460 }, { "epoch": 0.0327, "grad_norm": 0.44554460048675537, "learning_rate": 9.96713566802672e-05, "loss": 3.587781524658203, "step": 86470 }, { "epoch": 0.0328, "grad_norm": 0.24687790870666504, "learning_rate": 9.966946129884873e-05, "loss": 3.4787353515625, "step": 86480 }, { "epoch": 0.0329, "grad_norm": 0.2708691656589508, "learning_rate": 9.966756048565265e-05, "loss": 3.6029510498046875, "step": 86490 }, { "epoch": 0.033, "grad_norm": 0.22812286019325256, "learning_rate": 9.966565424088681e-05, "loss": 3.5425640106201173, "step": 86500 }, { "epoch": 0.0331, "grad_norm": 0.257814884185791, "learning_rate": 9.96637425647597e-05, "loss": 3.5223087310791015, "step": 86510 }, { "epoch": 0.0332, "grad_norm": 0.7189636826515198, "learning_rate": 9.966182545748038e-05, "loss": 3.658791732788086, "step": 86520 }, { "epoch": 0.0333, "grad_norm": 0.2598377466201782, "learning_rate": 9.96599029192585e-05, "loss": 3.5704761505126954, "step": 86530 }, { "epoch": 0.0334, "grad_norm": 0.21975110471248627, "learning_rate": 9.965797495030428e-05, "loss": 3.563149642944336, "step": 86540 }, { "epoch": 0.0335, "grad_norm": 0.23634259402751923, "learning_rate": 9.96560415508286e-05, "loss": 3.6047389984130858, "step": 86550 }, { "epoch": 0.0336, "grad_norm": 0.30092036724090576, "learning_rate": 9.965410272104286e-05, "loss": 3.55379638671875, "step": 86560 }, { "epoch": 0.0337, "grad_norm": 0.2408142387866974, "learning_rate": 9.96521584611591e-05, "loss": 3.5009265899658204, "step": 86570 }, { "epoch": 0.0338, "grad_norm": 0.24401724338531494, "learning_rate": 9.965020877138994e-05, "loss": 3.532696533203125, "step": 86580 }, { "epoch": 0.0339, "grad_norm": 0.236811101436615, "learning_rate": 9.964825365194861e-05, "loss": 3.4810935974121096, "step": 86590 }, { "epoch": 0.034, "grad_norm": 0.2461588978767395, "learning_rate": 9.96462931030489e-05, "loss": 3.5590286254882812, "step": 86600 }, { "epoch": 0.0341, "grad_norm": 0.24463453888893127, "learning_rate": 9.96443271249052e-05, "loss": 3.5136062622070314, "step": 86610 }, { "epoch": 0.0342, "grad_norm": 0.22821012139320374, "learning_rate": 9.964235571773255e-05, "loss": 3.549204635620117, "step": 86620 }, { "epoch": 0.0343, "grad_norm": 0.7006849050521851, "learning_rate": 9.96403788817465e-05, "loss": 3.646626663208008, "step": 86630 }, { "epoch": 0.0344, "grad_norm": 0.2429393231868744, "learning_rate": 9.963839661716325e-05, "loss": 3.589604949951172, "step": 86640 }, { "epoch": 0.0345, "grad_norm": 0.23724322021007538, "learning_rate": 9.963640892419958e-05, "loss": 3.5419536590576173, "step": 86650 }, { "epoch": 0.0346, "grad_norm": 0.2366449385881424, "learning_rate": 9.963441580307286e-05, "loss": 3.5712677001953126, "step": 86660 }, { "epoch": 0.0347, "grad_norm": 0.22230412065982819, "learning_rate": 9.963241725400104e-05, "loss": 3.550762939453125, "step": 86670 }, { "epoch": 0.0348, "grad_norm": 0.22035905718803406, "learning_rate": 9.963041327720271e-05, "loss": 3.5128196716308593, "step": 86680 }, { "epoch": 0.0349, "grad_norm": 0.2889842987060547, "learning_rate": 9.962840387289697e-05, "loss": 3.6475540161132813, "step": 86690 }, { "epoch": 0.035, "grad_norm": 0.22476686537265778, "learning_rate": 9.962638904130363e-05, "loss": 3.539365768432617, "step": 86700 }, { "epoch": 0.0351, "grad_norm": 0.22089125216007233, "learning_rate": 9.962436878264298e-05, "loss": 3.558451461791992, "step": 86710 }, { "epoch": 0.0352, "grad_norm": 0.22724896669387817, "learning_rate": 9.962234309713598e-05, "loss": 3.5054359436035156, "step": 86720 }, { "epoch": 0.0353, "grad_norm": 0.22698794305324554, "learning_rate": 9.962031198500414e-05, "loss": 3.488138198852539, "step": 86730 }, { "epoch": 0.0354, "grad_norm": 0.22052790224552155, "learning_rate": 9.961827544646958e-05, "loss": 3.5264102935791017, "step": 86740 }, { "epoch": 0.0355, "grad_norm": 0.5088785290718079, "learning_rate": 9.961623348175501e-05, "loss": 3.5621829986572267, "step": 86750 }, { "epoch": 0.0356, "grad_norm": 0.24433596432209015, "learning_rate": 9.961418609108377e-05, "loss": 3.5696880340576174, "step": 86760 }, { "epoch": 0.0357, "grad_norm": 0.23283378779888153, "learning_rate": 9.961213327467971e-05, "loss": 3.5053569793701174, "step": 86770 }, { "epoch": 0.0358, "grad_norm": 0.2503364086151123, "learning_rate": 9.961007503276736e-05, "loss": 3.5167388916015625, "step": 86780 }, { "epoch": 0.0359, "grad_norm": 0.3051312267780304, "learning_rate": 9.960801136557179e-05, "loss": 3.473704147338867, "step": 86790 }, { "epoch": 0.036, "grad_norm": 0.22353413701057434, "learning_rate": 9.960594227331866e-05, "loss": 3.5780208587646483, "step": 86800 }, { "epoch": 0.0361, "grad_norm": 0.24655623733997345, "learning_rate": 9.960386775623429e-05, "loss": 3.4981124877929686, "step": 86810 }, { "epoch": 0.0362, "grad_norm": 0.37959155440330505, "learning_rate": 9.96017878145455e-05, "loss": 3.454939270019531, "step": 86820 }, { "epoch": 0.0363, "grad_norm": 0.2530515789985657, "learning_rate": 9.959970244847977e-05, "loss": 3.5425491333007812, "step": 86830 }, { "epoch": 0.0364, "grad_norm": 0.235835000872612, "learning_rate": 9.959761165826518e-05, "loss": 3.5380516052246094, "step": 86840 }, { "epoch": 0.0365, "grad_norm": 0.2573254406452179, "learning_rate": 9.959551544413033e-05, "loss": 3.565522003173828, "step": 86850 }, { "epoch": 0.0366, "grad_norm": 0.24530521035194397, "learning_rate": 9.959341380630448e-05, "loss": 3.4831092834472654, "step": 86860 }, { "epoch": 0.0367, "grad_norm": 0.2265908420085907, "learning_rate": 9.959130674501746e-05, "loss": 3.512775421142578, "step": 86870 }, { "epoch": 0.0368, "grad_norm": 0.24352923035621643, "learning_rate": 9.958919426049968e-05, "loss": 3.5084068298339846, "step": 86880 }, { "epoch": 0.0369, "grad_norm": 0.2197597771883011, "learning_rate": 9.958707635298219e-05, "loss": 3.475159454345703, "step": 86890 }, { "epoch": 0.037, "grad_norm": 0.27145445346832275, "learning_rate": 9.958495302269657e-05, "loss": 3.629647445678711, "step": 86900 }, { "epoch": 0.0371, "grad_norm": 0.23333457112312317, "learning_rate": 9.958282426987503e-05, "loss": 3.5136219024658204, "step": 86910 }, { "epoch": 0.0372, "grad_norm": 0.26160719990730286, "learning_rate": 9.95806900947504e-05, "loss": 3.589425277709961, "step": 86920 }, { "epoch": 0.0373, "grad_norm": 0.4414494037628174, "learning_rate": 9.957855049755604e-05, "loss": 3.7182918548583985, "step": 86930 }, { "epoch": 0.0374, "grad_norm": 0.2377568632364273, "learning_rate": 9.957640547852593e-05, "loss": 3.574993896484375, "step": 86940 }, { "epoch": 0.0375, "grad_norm": 0.2676723599433899, "learning_rate": 9.957425503789466e-05, "loss": 3.5500152587890623, "step": 86950 }, { "epoch": 0.0376, "grad_norm": 0.22574037313461304, "learning_rate": 9.957209917589738e-05, "loss": 3.502276611328125, "step": 86960 }, { "epoch": 0.0377, "grad_norm": 0.2214985191822052, "learning_rate": 9.956993789276987e-05, "loss": 3.5094379425048827, "step": 86970 }, { "epoch": 0.0378, "grad_norm": 0.2247961014509201, "learning_rate": 9.956777118874847e-05, "loss": 3.494683837890625, "step": 86980 }, { "epoch": 0.0379, "grad_norm": 0.25474363565444946, "learning_rate": 9.956559906407016e-05, "loss": 3.475168991088867, "step": 86990 }, { "epoch": 0.038, "grad_norm": 0.2989141345024109, "learning_rate": 9.956342151897245e-05, "loss": 3.501174545288086, "step": 87000 }, { "epoch": 0.0381, "grad_norm": 0.25852909684181213, "learning_rate": 9.956123855369346e-05, "loss": 3.489072799682617, "step": 87010 }, { "epoch": 0.0382, "grad_norm": 0.21059463918209076, "learning_rate": 9.955905016847196e-05, "loss": 3.486780548095703, "step": 87020 }, { "epoch": 0.0383, "grad_norm": 0.22944959998130798, "learning_rate": 9.955685636354723e-05, "loss": 3.4915592193603517, "step": 87030 }, { "epoch": 0.0384, "grad_norm": 0.24829331040382385, "learning_rate": 9.95546571391592e-05, "loss": 3.537678909301758, "step": 87040 }, { "epoch": 0.0385, "grad_norm": 0.253560334444046, "learning_rate": 9.955245249554837e-05, "loss": 3.4627197265625, "step": 87050 }, { "epoch": 0.0386, "grad_norm": 0.22625349462032318, "learning_rate": 9.955024243295582e-05, "loss": 3.526144790649414, "step": 87060 }, { "epoch": 0.0387, "grad_norm": 0.23981168866157532, "learning_rate": 9.954802695162328e-05, "loss": 3.651128387451172, "step": 87070 }, { "epoch": 0.0388, "grad_norm": 0.21188385784626007, "learning_rate": 9.954580605179302e-05, "loss": 3.4992820739746096, "step": 87080 }, { "epoch": 0.0389, "grad_norm": 0.20997042953968048, "learning_rate": 9.954357973370788e-05, "loss": 3.5104774475097655, "step": 87090 }, { "epoch": 0.039, "grad_norm": 0.21936026215553284, "learning_rate": 9.954134799761135e-05, "loss": 3.483306884765625, "step": 87100 }, { "epoch": 0.0391, "grad_norm": 0.6412197351455688, "learning_rate": 9.953911084374748e-05, "loss": 3.5444759368896483, "step": 87110 }, { "epoch": 0.0392, "grad_norm": 0.22256293892860413, "learning_rate": 9.953686827236093e-05, "loss": 3.674502944946289, "step": 87120 }, { "epoch": 0.0393, "grad_norm": 0.2200121432542801, "learning_rate": 9.953462028369695e-05, "loss": 3.4882282257080077, "step": 87130 }, { "epoch": 0.0394, "grad_norm": 0.2311324030160904, "learning_rate": 9.953236687800136e-05, "loss": 3.572439956665039, "step": 87140 }, { "epoch": 0.0395, "grad_norm": 0.21785826981067657, "learning_rate": 9.95301080555206e-05, "loss": 3.502328872680664, "step": 87150 }, { "epoch": 0.0396, "grad_norm": 0.38882070779800415, "learning_rate": 9.952784381650171e-05, "loss": 3.5151954650878907, "step": 87160 }, { "epoch": 0.0397, "grad_norm": 0.2375527024269104, "learning_rate": 9.952557416119226e-05, "loss": 3.5366649627685547, "step": 87170 }, { "epoch": 0.0398, "grad_norm": 0.22945114970207214, "learning_rate": 9.95232990898405e-05, "loss": 3.479144287109375, "step": 87180 }, { "epoch": 0.0399, "grad_norm": 0.2675630450248718, "learning_rate": 9.95210186026952e-05, "loss": 3.524378204345703, "step": 87190 }, { "epoch": 0.04, "grad_norm": 0.22728347778320312, "learning_rate": 9.951873270000576e-05, "loss": 3.618131637573242, "step": 87200 }, { "epoch": 0.0401, "grad_norm": 0.22647036612033844, "learning_rate": 9.951644138202216e-05, "loss": 3.4421878814697267, "step": 87210 }, { "epoch": 0.0402, "grad_norm": 0.3725104033946991, "learning_rate": 9.951414464899498e-05, "loss": 3.5570526123046875, "step": 87220 }, { "epoch": 0.0403, "grad_norm": 0.2510637640953064, "learning_rate": 9.951184250117538e-05, "loss": 3.573494720458984, "step": 87230 }, { "epoch": 0.0404, "grad_norm": 0.23112894594669342, "learning_rate": 9.950953493881513e-05, "loss": 3.4580966949462892, "step": 87240 }, { "epoch": 0.0405, "grad_norm": 0.22407910227775574, "learning_rate": 9.950722196216658e-05, "loss": 3.4951072692871095, "step": 87250 }, { "epoch": 0.0406, "grad_norm": 0.24626241624355316, "learning_rate": 9.950490357148265e-05, "loss": 3.441891098022461, "step": 87260 }, { "epoch": 0.0407, "grad_norm": 0.21892742812633514, "learning_rate": 9.950257976701692e-05, "loss": 3.4867233276367187, "step": 87270 }, { "epoch": 0.0408, "grad_norm": 0.23128166794776917, "learning_rate": 9.950025054902348e-05, "loss": 3.5898468017578127, "step": 87280 }, { "epoch": 0.0409, "grad_norm": 0.22768433392047882, "learning_rate": 9.949791591775706e-05, "loss": 3.4650142669677733, "step": 87290 }, { "epoch": 0.041, "grad_norm": 0.242142453789711, "learning_rate": 9.949557587347298e-05, "loss": 3.4743362426757813, "step": 87300 }, { "epoch": 0.0411, "grad_norm": 0.21424978971481323, "learning_rate": 9.949323041642713e-05, "loss": 3.492284393310547, "step": 87310 }, { "epoch": 0.0412, "grad_norm": 0.22114643454551697, "learning_rate": 9.949087954687602e-05, "loss": 3.566326141357422, "step": 87320 }, { "epoch": 0.0413, "grad_norm": 0.23026612401008606, "learning_rate": 9.948852326507672e-05, "loss": 3.509130096435547, "step": 87330 }, { "epoch": 0.0414, "grad_norm": 0.2993336319923401, "learning_rate": 9.948616157128694e-05, "loss": 3.496174621582031, "step": 87340 }, { "epoch": 0.0415, "grad_norm": 0.3403513431549072, "learning_rate": 9.948379446576493e-05, "loss": 3.512033462524414, "step": 87350 }, { "epoch": 0.0416, "grad_norm": 0.23015788197517395, "learning_rate": 9.948142194876952e-05, "loss": 3.763461685180664, "step": 87360 }, { "epoch": 0.0417, "grad_norm": 0.22674298286437988, "learning_rate": 9.947904402056024e-05, "loss": 3.518315887451172, "step": 87370 }, { "epoch": 0.0418, "grad_norm": 0.20918603241443634, "learning_rate": 9.947666068139708e-05, "loss": 3.457122802734375, "step": 87380 }, { "epoch": 0.0419, "grad_norm": 0.26207005977630615, "learning_rate": 9.947427193154071e-05, "loss": 3.5162277221679688, "step": 87390 }, { "epoch": 0.042, "grad_norm": 0.21584054827690125, "learning_rate": 9.947187777125233e-05, "loss": 3.466419219970703, "step": 87400 }, { "epoch": 0.0421, "grad_norm": 0.27594462037086487, "learning_rate": 9.946947820079377e-05, "loss": 3.5102783203125, "step": 87410 }, { "epoch": 0.0422, "grad_norm": 0.3398813307285309, "learning_rate": 9.946707322042747e-05, "loss": 3.5125961303710938, "step": 87420 }, { "epoch": 0.0423, "grad_norm": 0.2285033017396927, "learning_rate": 9.94646628304164e-05, "loss": 3.528242492675781, "step": 87430 }, { "epoch": 0.0424, "grad_norm": 0.24180272221565247, "learning_rate": 9.946224703102418e-05, "loss": 3.5903446197509767, "step": 87440 }, { "epoch": 0.0425, "grad_norm": 0.22291679680347443, "learning_rate": 9.945982582251498e-05, "loss": 3.431362533569336, "step": 87450 }, { "epoch": 0.0426, "grad_norm": 0.2241559773683548, "learning_rate": 9.94573992051536e-05, "loss": 3.5814159393310545, "step": 87460 }, { "epoch": 0.0427, "grad_norm": 0.22420759499073029, "learning_rate": 9.94549671792054e-05, "loss": 3.4340133666992188, "step": 87470 }, { "epoch": 0.0428, "grad_norm": 0.20814333856105804, "learning_rate": 9.945252974493635e-05, "loss": 3.461866760253906, "step": 87480 }, { "epoch": 0.0429, "grad_norm": 0.2102765589952469, "learning_rate": 9.9450086902613e-05, "loss": 3.4968128204345703, "step": 87490 }, { "epoch": 0.043, "grad_norm": 0.23011274635791779, "learning_rate": 9.944763865250248e-05, "loss": 3.539672088623047, "step": 87500 }, { "epoch": 0.0431, "grad_norm": 0.2450559288263321, "learning_rate": 9.944518499487254e-05, "loss": 3.4776374816894533, "step": 87510 }, { "epoch": 0.0432, "grad_norm": 0.23029664158821106, "learning_rate": 9.944272592999151e-05, "loss": 3.5025753021240233, "step": 87520 }, { "epoch": 0.0433, "grad_norm": 0.20505572855472565, "learning_rate": 9.94402614581283e-05, "loss": 3.465119552612305, "step": 87530 }, { "epoch": 0.0434, "grad_norm": 0.2184097021818161, "learning_rate": 9.943779157955244e-05, "loss": 3.46063346862793, "step": 87540 }, { "epoch": 0.0435, "grad_norm": 0.2131258249282837, "learning_rate": 9.943531629453403e-05, "loss": 3.4603450775146483, "step": 87550 }, { "epoch": 0.0436, "grad_norm": 0.2167542576789856, "learning_rate": 9.943283560334375e-05, "loss": 3.452219772338867, "step": 87560 }, { "epoch": 0.0437, "grad_norm": 0.20982196927070618, "learning_rate": 9.943034950625288e-05, "loss": 3.446381378173828, "step": 87570 }, { "epoch": 0.0438, "grad_norm": 0.2265927493572235, "learning_rate": 9.942785800353332e-05, "loss": 3.483041763305664, "step": 87580 }, { "epoch": 0.0439, "grad_norm": 0.22275805473327637, "learning_rate": 9.942536109545751e-05, "loss": 3.4848876953125, "step": 87590 }, { "epoch": 0.044, "grad_norm": 0.2150101214647293, "learning_rate": 9.942285878229853e-05, "loss": 3.449690246582031, "step": 87600 }, { "epoch": 0.0441, "grad_norm": 0.31510916352272034, "learning_rate": 9.942035106433001e-05, "loss": 3.4738346099853517, "step": 87610 }, { "epoch": 0.0442, "grad_norm": 0.20170903205871582, "learning_rate": 9.94178379418262e-05, "loss": 3.484848403930664, "step": 87620 }, { "epoch": 0.0443, "grad_norm": 0.202318474650383, "learning_rate": 9.941531941506194e-05, "loss": 3.5212066650390623, "step": 87630 }, { "epoch": 0.0444, "grad_norm": 0.20829206705093384, "learning_rate": 9.941279548431263e-05, "loss": 3.492949676513672, "step": 87640 }, { "epoch": 0.0445, "grad_norm": 0.20682312548160553, "learning_rate": 9.941026614985431e-05, "loss": 3.4830909729003907, "step": 87650 }, { "epoch": 0.0446, "grad_norm": 0.20790083706378937, "learning_rate": 9.940773141196357e-05, "loss": 3.4723243713378906, "step": 87660 }, { "epoch": 0.0447, "grad_norm": 0.24451112747192383, "learning_rate": 9.94051912709176e-05, "loss": 3.44329948425293, "step": 87670 }, { "epoch": 0.0448, "grad_norm": 0.6788707971572876, "learning_rate": 9.940264572699421e-05, "loss": 3.4924007415771485, "step": 87680 }, { "epoch": 0.0449, "grad_norm": 0.22288651764392853, "learning_rate": 9.940009478047174e-05, "loss": 3.4692554473876953, "step": 87690 }, { "epoch": 0.045, "grad_norm": 0.20806677639484406, "learning_rate": 9.939753843162918e-05, "loss": 3.4302738189697264, "step": 87700 }, { "epoch": 0.0451, "grad_norm": 0.1970839500427246, "learning_rate": 9.939497668074609e-05, "loss": 3.4425384521484377, "step": 87710 }, { "epoch": 0.0452, "grad_norm": 0.22695882618427277, "learning_rate": 9.93924095281026e-05, "loss": 3.4967575073242188, "step": 87720 }, { "epoch": 0.0453, "grad_norm": 0.2068215310573578, "learning_rate": 9.938983697397948e-05, "loss": 3.457621383666992, "step": 87730 }, { "epoch": 0.0454, "grad_norm": 0.20516575872898102, "learning_rate": 9.938725901865805e-05, "loss": 3.4575443267822266, "step": 87740 }, { "epoch": 0.0455, "grad_norm": 0.20642898976802826, "learning_rate": 9.93846756624202e-05, "loss": 3.443154525756836, "step": 87750 }, { "epoch": 0.0456, "grad_norm": 0.2216077744960785, "learning_rate": 9.938208690554849e-05, "loss": 3.475957489013672, "step": 87760 }, { "epoch": 0.0457, "grad_norm": 0.2133660465478897, "learning_rate": 9.9379492748326e-05, "loss": 3.4443634033203123, "step": 87770 }, { "epoch": 0.0458, "grad_norm": 0.21034245193004608, "learning_rate": 9.937689319103641e-05, "loss": 3.4486026763916016, "step": 87780 }, { "epoch": 0.0459, "grad_norm": 0.23072350025177002, "learning_rate": 9.937428823396404e-05, "loss": 3.4670520782470704, "step": 87790 }, { "epoch": 0.046, "grad_norm": 0.20785468816757202, "learning_rate": 9.937167787739372e-05, "loss": 3.422760772705078, "step": 87800 }, { "epoch": 0.0461, "grad_norm": 0.42528074979782104, "learning_rate": 9.936906212161095e-05, "loss": 3.6702789306640624, "step": 87810 }, { "epoch": 0.0462, "grad_norm": 0.21889372169971466, "learning_rate": 9.936644096690176e-05, "loss": 3.437454605102539, "step": 87820 }, { "epoch": 0.0463, "grad_norm": 0.21360132098197937, "learning_rate": 9.936381441355282e-05, "loss": 3.435292434692383, "step": 87830 }, { "epoch": 0.0464, "grad_norm": 0.22827357053756714, "learning_rate": 9.936118246185136e-05, "loss": 3.555126953125, "step": 87840 }, { "epoch": 0.0465, "grad_norm": 0.2259804755449295, "learning_rate": 9.935854511208518e-05, "loss": 3.4523799896240233, "step": 87850 }, { "epoch": 0.0466, "grad_norm": 0.21691113710403442, "learning_rate": 9.935590236454272e-05, "loss": 3.46997184753418, "step": 87860 }, { "epoch": 0.0467, "grad_norm": 0.2235109806060791, "learning_rate": 9.935325421951298e-05, "loss": 3.5263381958007813, "step": 87870 }, { "epoch": 0.0468, "grad_norm": 0.22141925990581512, "learning_rate": 9.935060067728557e-05, "loss": 3.5978321075439452, "step": 87880 }, { "epoch": 0.0469, "grad_norm": 0.20492106676101685, "learning_rate": 9.934794173815067e-05, "loss": 3.4131378173828124, "step": 87890 }, { "epoch": 0.047, "grad_norm": 0.274261474609375, "learning_rate": 9.934527740239906e-05, "loss": 3.4911231994628906, "step": 87900 }, { "epoch": 0.0471, "grad_norm": 0.3009234666824341, "learning_rate": 9.934260767032209e-05, "loss": 3.6233985900878904, "step": 87910 }, { "epoch": 0.0472, "grad_norm": 0.2513240575790405, "learning_rate": 9.933993254221172e-05, "loss": 3.591939926147461, "step": 87920 }, { "epoch": 0.0473, "grad_norm": 0.23563043773174286, "learning_rate": 9.933725201836053e-05, "loss": 3.4712120056152345, "step": 87930 }, { "epoch": 0.0474, "grad_norm": 0.22870992124080658, "learning_rate": 9.933456609906162e-05, "loss": 3.462660217285156, "step": 87940 }, { "epoch": 0.0475, "grad_norm": 0.21629232168197632, "learning_rate": 9.933187478460875e-05, "loss": 3.425070953369141, "step": 87950 }, { "epoch": 0.0476, "grad_norm": 0.2081223428249359, "learning_rate": 9.93291780752962e-05, "loss": 3.4479732513427734, "step": 87960 }, { "epoch": 0.0477, "grad_norm": 0.22758755087852478, "learning_rate": 9.932647597141893e-05, "loss": 3.5144393920898436, "step": 87970 }, { "epoch": 0.0478, "grad_norm": 0.2109268307685852, "learning_rate": 9.932376847327239e-05, "loss": 3.4865749359130858, "step": 87980 }, { "epoch": 0.0479, "grad_norm": 0.2091103196144104, "learning_rate": 9.932105558115268e-05, "loss": 3.605680465698242, "step": 87990 }, { "epoch": 0.048, "grad_norm": 0.2202194482088089, "learning_rate": 9.931833729535651e-05, "loss": 3.4654556274414063, "step": 88000 }, { "epoch": 0.0481, "grad_norm": 0.22847074270248413, "learning_rate": 9.931561361618111e-05, "loss": 3.497611236572266, "step": 88010 }, { "epoch": 0.0482, "grad_norm": 0.22244277596473694, "learning_rate": 9.931288454392435e-05, "loss": 3.4481185913085937, "step": 88020 }, { "epoch": 0.0483, "grad_norm": 0.25382542610168457, "learning_rate": 9.931015007888467e-05, "loss": 3.521778869628906, "step": 88030 }, { "epoch": 0.0484, "grad_norm": 0.21427449584007263, "learning_rate": 9.930741022136112e-05, "loss": 3.547662353515625, "step": 88040 }, { "epoch": 0.0485, "grad_norm": 0.27043306827545166, "learning_rate": 9.930466497165333e-05, "loss": 3.637647247314453, "step": 88050 }, { "epoch": 0.0486, "grad_norm": 0.20680159330368042, "learning_rate": 9.93019143300615e-05, "loss": 3.4027156829833984, "step": 88060 }, { "epoch": 0.0487, "grad_norm": 0.22643111646175385, "learning_rate": 9.929915829688644e-05, "loss": 3.474656677246094, "step": 88070 }, { "epoch": 0.0488, "grad_norm": 0.20598763227462769, "learning_rate": 9.929639687242955e-05, "loss": 3.3988388061523436, "step": 88080 }, { "epoch": 0.0489, "grad_norm": 0.22102606296539307, "learning_rate": 9.929363005699281e-05, "loss": 3.441686248779297, "step": 88090 }, { "epoch": 0.049, "grad_norm": 0.4055846631526947, "learning_rate": 9.92908578508788e-05, "loss": 3.42834358215332, "step": 88100 }, { "epoch": 0.0491, "grad_norm": 0.20581355690956116, "learning_rate": 9.928808025439069e-05, "loss": 3.498126983642578, "step": 88110 }, { "epoch": 0.0492, "grad_norm": 0.21253280341625214, "learning_rate": 9.928529726783223e-05, "loss": 3.4334564208984375, "step": 88120 }, { "epoch": 0.0493, "grad_norm": 0.21126846969127655, "learning_rate": 9.928250889150774e-05, "loss": 3.4784435272216796, "step": 88130 }, { "epoch": 0.0494, "grad_norm": 0.2041010558605194, "learning_rate": 9.92797151257222e-05, "loss": 3.4565383911132814, "step": 88140 }, { "epoch": 0.0495, "grad_norm": 0.22381795942783356, "learning_rate": 9.927691597078108e-05, "loss": 3.3681774139404297, "step": 88150 }, { "epoch": 0.0496, "grad_norm": 0.2129427045583725, "learning_rate": 9.927411142699053e-05, "loss": 3.4187793731689453, "step": 88160 }, { "epoch": 0.0497, "grad_norm": 0.1992781162261963, "learning_rate": 9.927130149465725e-05, "loss": 3.5202369689941406, "step": 88170 }, { "epoch": 0.0498, "grad_norm": 0.22106146812438965, "learning_rate": 9.92684861740885e-05, "loss": 3.4938732147216798, "step": 88180 }, { "epoch": 0.0499, "grad_norm": 0.22755417227745056, "learning_rate": 9.926566546559217e-05, "loss": 3.4571441650390624, "step": 88190 }, { "epoch": 0.05, "grad_norm": 0.21116143465042114, "learning_rate": 9.926283936947673e-05, "loss": 3.47540397644043, "step": 88200 }, { "epoch": 0.0501, "grad_norm": 0.25135543942451477, "learning_rate": 9.926000788605126e-05, "loss": 3.474293518066406, "step": 88210 }, { "epoch": 0.0502, "grad_norm": 0.20247307419776917, "learning_rate": 9.92571710156254e-05, "loss": 3.5368934631347657, "step": 88220 }, { "epoch": 0.0503, "grad_norm": 0.2370595782995224, "learning_rate": 9.925432875850936e-05, "loss": 3.299351119995117, "step": 88230 }, { "epoch": 0.0504, "grad_norm": 0.20980019867420197, "learning_rate": 9.925148111501396e-05, "loss": 3.419571304321289, "step": 88240 }, { "epoch": 0.0505, "grad_norm": 0.20460277795791626, "learning_rate": 9.924862808545066e-05, "loss": 3.421742630004883, "step": 88250 }, { "epoch": 0.0506, "grad_norm": 0.20013706386089325, "learning_rate": 9.924576967013141e-05, "loss": 3.433887481689453, "step": 88260 }, { "epoch": 0.0507, "grad_norm": 0.20746369659900665, "learning_rate": 9.924290586936887e-05, "loss": 3.420815277099609, "step": 88270 }, { "epoch": 0.0508, "grad_norm": 0.20271070301532745, "learning_rate": 9.924003668347614e-05, "loss": 3.431641387939453, "step": 88280 }, { "epoch": 0.0509, "grad_norm": 0.24390657246112823, "learning_rate": 9.923716211276704e-05, "loss": 3.4343593597412108, "step": 88290 }, { "epoch": 0.051, "grad_norm": 0.3958902955055237, "learning_rate": 9.923428215755594e-05, "loss": 3.3996536254882814, "step": 88300 }, { "epoch": 0.0511, "grad_norm": 0.21826477348804474, "learning_rate": 9.923139681815775e-05, "loss": 3.4292205810546874, "step": 88310 }, { "epoch": 0.0512, "grad_norm": 0.23290973901748657, "learning_rate": 9.922850609488801e-05, "loss": 3.3791458129882814, "step": 88320 }, { "epoch": 0.0513, "grad_norm": 0.20755083858966827, "learning_rate": 9.922560998806287e-05, "loss": 3.458829879760742, "step": 88330 }, { "epoch": 0.0514, "grad_norm": 0.2211775928735733, "learning_rate": 9.922270849799905e-05, "loss": 3.4258502960205077, "step": 88340 }, { "epoch": 0.0515, "grad_norm": 0.2246459573507309, "learning_rate": 9.92198016250138e-05, "loss": 3.46429443359375, "step": 88350 }, { "epoch": 0.0516, "grad_norm": 0.4560513496398926, "learning_rate": 9.921688936942506e-05, "loss": 3.4339221954345702, "step": 88360 }, { "epoch": 0.0517, "grad_norm": 0.25246477127075195, "learning_rate": 9.921397173155129e-05, "loss": 3.412601089477539, "step": 88370 }, { "epoch": 0.0518, "grad_norm": 0.20820581912994385, "learning_rate": 9.921104871171157e-05, "loss": 3.484527587890625, "step": 88380 }, { "epoch": 0.0519, "grad_norm": 0.19367246329784393, "learning_rate": 9.920812031022554e-05, "loss": 3.4511947631835938, "step": 88390 }, { "epoch": 0.052, "grad_norm": 0.20343661308288574, "learning_rate": 9.920518652741348e-05, "loss": 3.4330039978027345, "step": 88400 }, { "epoch": 0.0521, "grad_norm": 0.2045159488916397, "learning_rate": 9.920224736359618e-05, "loss": 3.475534439086914, "step": 88410 }, { "epoch": 0.0522, "grad_norm": 0.1953175961971283, "learning_rate": 9.91993028190951e-05, "loss": 3.4895450592041017, "step": 88420 }, { "epoch": 0.0523, "grad_norm": 0.2111949473619461, "learning_rate": 9.919635289423222e-05, "loss": 3.459346389770508, "step": 88430 }, { "epoch": 0.0524, "grad_norm": 0.2062598615884781, "learning_rate": 9.919339758933015e-05, "loss": 3.4337753295898437, "step": 88440 }, { "epoch": 0.0525, "grad_norm": 1.005130648612976, "learning_rate": 9.919043690471209e-05, "loss": 3.572843551635742, "step": 88450 }, { "epoch": 0.0526, "grad_norm": 132.48765563964844, "learning_rate": 9.91874708407018e-05, "loss": 5.462657165527344, "step": 88460 }, { "epoch": 0.0527, "grad_norm": 0.21032750606536865, "learning_rate": 9.918449939762367e-05, "loss": 4.512516784667969, "step": 88470 }, { "epoch": 0.0528, "grad_norm": 0.25138649344444275, "learning_rate": 9.91815225758026e-05, "loss": 3.5258323669433596, "step": 88480 }, { "epoch": 0.0529, "grad_norm": 0.23238541185855865, "learning_rate": 9.917854037556419e-05, "loss": 3.4861698150634766, "step": 88490 }, { "epoch": 0.053, "grad_norm": 0.20848235487937927, "learning_rate": 9.917555279723454e-05, "loss": 3.5034629821777346, "step": 88500 }, { "epoch": 0.0531, "grad_norm": 0.20651061832904816, "learning_rate": 9.917255984114036e-05, "loss": 3.4337642669677733, "step": 88510 }, { "epoch": 0.0532, "grad_norm": 0.2084576040506363, "learning_rate": 9.916956150760896e-05, "loss": 3.466422271728516, "step": 88520 }, { "epoch": 0.0533, "grad_norm": 0.22980253398418427, "learning_rate": 9.916655779696826e-05, "loss": 3.5865554809570312, "step": 88530 }, { "epoch": 0.0534, "grad_norm": 0.20767848193645477, "learning_rate": 9.916354870954671e-05, "loss": 3.4570602416992187, "step": 88540 }, { "epoch": 0.0535, "grad_norm": 0.639073371887207, "learning_rate": 9.91605342456734e-05, "loss": 3.6030319213867186, "step": 88550 }, { "epoch": 0.0536, "grad_norm": 0.21950949728488922, "learning_rate": 9.915751440567795e-05, "loss": 3.6912841796875, "step": 88560 }, { "epoch": 0.0537, "grad_norm": 0.20814180374145508, "learning_rate": 9.915448918989066e-05, "loss": 3.5825736999511717, "step": 88570 }, { "epoch": 0.0538, "grad_norm": 0.2894248366355896, "learning_rate": 9.915145859864232e-05, "loss": 3.4874401092529297, "step": 88580 }, { "epoch": 0.0539, "grad_norm": 0.24874542653560638, "learning_rate": 9.914842263226437e-05, "loss": 3.465639114379883, "step": 88590 }, { "epoch": 0.054, "grad_norm": 0.21626344323158264, "learning_rate": 9.914538129108882e-05, "loss": 3.5141986846923827, "step": 88600 }, { "epoch": 0.0541, "grad_norm": 0.23507332801818848, "learning_rate": 9.914233457544825e-05, "loss": 3.4884784698486326, "step": 88610 }, { "epoch": 0.0542, "grad_norm": 0.2227034717798233, "learning_rate": 9.913928248567586e-05, "loss": 3.517100143432617, "step": 88620 }, { "epoch": 0.0543, "grad_norm": 0.2536813020706177, "learning_rate": 9.913622502210542e-05, "loss": 3.5024185180664062, "step": 88630 }, { "epoch": 0.0544, "grad_norm": 0.23898369073867798, "learning_rate": 9.913316218507128e-05, "loss": 3.558736038208008, "step": 88640 }, { "epoch": 0.0545, "grad_norm": 0.20069561898708344, "learning_rate": 9.91300939749084e-05, "loss": 3.495846176147461, "step": 88650 }, { "epoch": 0.0546, "grad_norm": 0.2289477437734604, "learning_rate": 9.91270203919523e-05, "loss": 3.4999679565429687, "step": 88660 }, { "epoch": 0.0547, "grad_norm": 0.2105262726545334, "learning_rate": 9.912394143653912e-05, "loss": 3.6118770599365235, "step": 88670 }, { "epoch": 0.0548, "grad_norm": 0.21185249090194702, "learning_rate": 9.912085710900555e-05, "loss": 3.4327251434326174, "step": 88680 }, { "epoch": 0.0549, "grad_norm": 0.2270280420780182, "learning_rate": 9.911776740968892e-05, "loss": 3.4859664916992186, "step": 88690 }, { "epoch": 0.055, "grad_norm": 0.7453796863555908, "learning_rate": 9.911467233892709e-05, "loss": 3.525126266479492, "step": 88700 }, { "epoch": 0.0551, "grad_norm": 0.19970841705799103, "learning_rate": 9.911157189705853e-05, "loss": 3.518052673339844, "step": 88710 }, { "epoch": 0.0552, "grad_norm": 0.5951544046401978, "learning_rate": 9.910846608442229e-05, "loss": 3.5432334899902345, "step": 88720 }, { "epoch": 0.0553, "grad_norm": 0.205495685338974, "learning_rate": 9.910535490135805e-05, "loss": 3.4707004547119142, "step": 88730 }, { "epoch": 0.0554, "grad_norm": 0.19855575263500214, "learning_rate": 9.910223834820603e-05, "loss": 3.466890335083008, "step": 88740 }, { "epoch": 0.0555, "grad_norm": 0.20748327672481537, "learning_rate": 9.909911642530703e-05, "loss": 3.473844528198242, "step": 88750 }, { "epoch": 0.0556, "grad_norm": 0.22381797432899475, "learning_rate": 9.909598913300249e-05, "loss": 3.482311248779297, "step": 88760 }, { "epoch": 0.0557, "grad_norm": 0.2077081948518753, "learning_rate": 9.909285647163438e-05, "loss": 3.442440414428711, "step": 88770 }, { "epoch": 0.0558, "grad_norm": 0.20445998013019562, "learning_rate": 9.908971844154531e-05, "loss": 3.4935592651367187, "step": 88780 }, { "epoch": 0.0559, "grad_norm": 0.24528709053993225, "learning_rate": 9.908657504307843e-05, "loss": 3.4737823486328123, "step": 88790 }, { "epoch": 0.056, "grad_norm": 0.22450390458106995, "learning_rate": 9.908342627657751e-05, "loss": 3.480276870727539, "step": 88800 }, { "epoch": 0.0561, "grad_norm": 0.2046712189912796, "learning_rate": 9.908027214238689e-05, "loss": 3.5056232452392577, "step": 88810 }, { "epoch": 0.0562, "grad_norm": 0.20232227444648743, "learning_rate": 9.90771126408515e-05, "loss": 3.4712188720703123, "step": 88820 }, { "epoch": 0.0563, "grad_norm": 0.19787070155143738, "learning_rate": 9.907394777231685e-05, "loss": 3.4717727661132813, "step": 88830 }, { "epoch": 0.0564, "grad_norm": 0.4233787953853607, "learning_rate": 9.907077753712905e-05, "loss": 3.4647403717041017, "step": 88840 }, { "epoch": 0.0565, "grad_norm": 0.21045099198818207, "learning_rate": 9.906760193563482e-05, "loss": 3.4679080963134767, "step": 88850 }, { "epoch": 0.0566, "grad_norm": 0.2026880532503128, "learning_rate": 9.906442096818139e-05, "loss": 3.452913284301758, "step": 88860 }, { "epoch": 0.0567, "grad_norm": 0.2141169011592865, "learning_rate": 9.906123463511665e-05, "loss": 2.6859405517578123, "step": 88870 }, { "epoch": 0.0568, "grad_norm": 0.21389293670654297, "learning_rate": 9.905804293678907e-05, "loss": 3.496937942504883, "step": 88880 }, { "epoch": 0.0569, "grad_norm": 0.6384546160697937, "learning_rate": 9.905484587354766e-05, "loss": 3.5529857635498048, "step": 88890 }, { "epoch": 0.057, "grad_norm": 0.20130573213100433, "learning_rate": 9.905164344574205e-05, "loss": 3.4789676666259766, "step": 88900 }, { "epoch": 0.0571, "grad_norm": 0.20364037156105042, "learning_rate": 9.904843565372248e-05, "loss": 3.5134410858154297, "step": 88910 }, { "epoch": 0.0572, "grad_norm": 0.21930386126041412, "learning_rate": 9.904522249783972e-05, "loss": 3.4699546813964846, "step": 88920 }, { "epoch": 0.0573, "grad_norm": 0.21080081164836884, "learning_rate": 9.904200397844517e-05, "loss": 3.4079925537109377, "step": 88930 }, { "epoch": 0.0574, "grad_norm": 0.19659695029258728, "learning_rate": 9.903878009589078e-05, "loss": 3.514387512207031, "step": 88940 }, { "epoch": 0.0575, "grad_norm": 0.2739630341529846, "learning_rate": 9.903555085052915e-05, "loss": 3.4711139678955076, "step": 88950 }, { "epoch": 0.0576, "grad_norm": 0.18696947395801544, "learning_rate": 9.903231624271338e-05, "loss": 3.4844993591308593, "step": 88960 }, { "epoch": 0.0577, "grad_norm": 0.1995149701833725, "learning_rate": 9.902907627279724e-05, "loss": 3.4629627227783204, "step": 88970 }, { "epoch": 0.0578, "grad_norm": 0.196527898311615, "learning_rate": 9.902583094113504e-05, "loss": 3.4632965087890626, "step": 88980 }, { "epoch": 0.0579, "grad_norm": 0.18903584778308868, "learning_rate": 9.902258024808168e-05, "loss": 3.4840362548828123, "step": 88990 }, { "epoch": 0.058, "grad_norm": 0.20126968622207642, "learning_rate": 9.901932419399264e-05, "loss": 3.4569473266601562, "step": 89000 }, { "epoch": 0.0581, "grad_norm": 0.25105592608451843, "learning_rate": 9.9016062779224e-05, "loss": 3.4798568725585937, "step": 89010 }, { "epoch": 0.0582, "grad_norm": 0.21018150448799133, "learning_rate": 9.901279600413242e-05, "loss": 3.4552234649658202, "step": 89020 }, { "epoch": 0.0583, "grad_norm": 0.20367887616157532, "learning_rate": 9.900952386907518e-05, "loss": 3.47275390625, "step": 89030 }, { "epoch": 0.0584, "grad_norm": 0.25491175055503845, "learning_rate": 9.90062463744101e-05, "loss": 3.491483688354492, "step": 89040 }, { "epoch": 0.0585, "grad_norm": 0.26060357689857483, "learning_rate": 9.900296352049558e-05, "loss": 3.4821800231933593, "step": 89050 }, { "epoch": 0.0586, "grad_norm": 0.20182396471500397, "learning_rate": 9.899967530769065e-05, "loss": 3.4740638732910156, "step": 89060 }, { "epoch": 0.0587, "grad_norm": 0.19438311457633972, "learning_rate": 9.899638173635489e-05, "loss": 3.4584377288818358, "step": 89070 }, { "epoch": 0.0588, "grad_norm": 0.20572009682655334, "learning_rate": 9.899308280684849e-05, "loss": 3.7154422760009767, "step": 89080 }, { "epoch": 0.0589, "grad_norm": 0.21264798939228058, "learning_rate": 9.898977851953222e-05, "loss": 3.4521839141845705, "step": 89090 }, { "epoch": 0.059, "grad_norm": 0.48729193210601807, "learning_rate": 9.898646887476741e-05, "loss": 3.3992000579833985, "step": 89100 }, { "epoch": 0.0591, "grad_norm": 0.22591347992420197, "learning_rate": 9.898315387291603e-05, "loss": 3.573691558837891, "step": 89110 }, { "epoch": 0.0592, "grad_norm": 0.2098514288663864, "learning_rate": 9.89798335143406e-05, "loss": 3.4540946960449217, "step": 89120 }, { "epoch": 0.0593, "grad_norm": 0.20766131579875946, "learning_rate": 9.897650779940419e-05, "loss": 3.499449920654297, "step": 89130 }, { "epoch": 0.0594, "grad_norm": 0.31043943762779236, "learning_rate": 9.897317672847054e-05, "loss": 3.5835922241210936, "step": 89140 }, { "epoch": 0.0595, "grad_norm": 0.21313293278217316, "learning_rate": 9.89698403019039e-05, "loss": 3.4728450775146484, "step": 89150 }, { "epoch": 0.0596, "grad_norm": 0.19958889484405518, "learning_rate": 9.896649852006917e-05, "loss": 3.477406692504883, "step": 89160 }, { "epoch": 0.0597, "grad_norm": 0.22063732147216797, "learning_rate": 9.896315138333177e-05, "loss": 3.5126262664794923, "step": 89170 }, { "epoch": 0.0598, "grad_norm": 0.32085853815078735, "learning_rate": 9.895979889205774e-05, "loss": 3.4631683349609377, "step": 89180 }, { "epoch": 0.0599, "grad_norm": 0.21332105994224548, "learning_rate": 9.895644104661372e-05, "loss": 3.4485774993896485, "step": 89190 }, { "epoch": 0.06, "grad_norm": 0.21000173687934875, "learning_rate": 9.895307784736691e-05, "loss": 3.4593437194824217, "step": 89200 }, { "epoch": 0.0601, "grad_norm": 0.24599437415599823, "learning_rate": 9.894970929468512e-05, "loss": 3.542616271972656, "step": 89210 }, { "epoch": 0.0602, "grad_norm": 0.22962863743305206, "learning_rate": 9.89463353889367e-05, "loss": 3.553028869628906, "step": 89220 }, { "epoch": 0.0603, "grad_norm": 0.20635321736335754, "learning_rate": 9.894295613049065e-05, "loss": 3.4743122100830077, "step": 89230 }, { "epoch": 0.0604, "grad_norm": 0.2056771069765091, "learning_rate": 9.893957151971649e-05, "loss": 3.4551544189453125, "step": 89240 }, { "epoch": 0.0605, "grad_norm": 1.0362017154693604, "learning_rate": 9.893618155698436e-05, "loss": 3.7936565399169924, "step": 89250 }, { "epoch": 0.0606, "grad_norm": 0.20664022862911224, "learning_rate": 9.8932786242665e-05, "loss": 3.432172393798828, "step": 89260 }, { "epoch": 0.0607, "grad_norm": 0.1923878937959671, "learning_rate": 9.89293855771297e-05, "loss": 3.435908889770508, "step": 89270 }, { "epoch": 0.0608, "grad_norm": 0.2042771279811859, "learning_rate": 9.892597956075036e-05, "loss": 3.499740982055664, "step": 89280 }, { "epoch": 0.0609, "grad_norm": 0.26855891942977905, "learning_rate": 9.892256819389947e-05, "loss": 3.441214370727539, "step": 89290 }, { "epoch": 0.061, "grad_norm": 0.19968263804912567, "learning_rate": 9.891915147695006e-05, "loss": 3.495225524902344, "step": 89300 }, { "epoch": 0.0611, "grad_norm": 0.19805912673473358, "learning_rate": 9.891572941027577e-05, "loss": 3.5245033264160157, "step": 89310 }, { "epoch": 0.0612, "grad_norm": 0.2031104415655136, "learning_rate": 9.89123019942509e-05, "loss": 3.5029041290283205, "step": 89320 }, { "epoch": 0.0613, "grad_norm": 0.20210763812065125, "learning_rate": 9.89088692292502e-05, "loss": 3.5004878997802735, "step": 89330 }, { "epoch": 0.0614, "grad_norm": 0.19427543878555298, "learning_rate": 9.89054311156491e-05, "loss": 3.4038616180419923, "step": 89340 }, { "epoch": 0.0615, "grad_norm": 0.19454139471054077, "learning_rate": 9.890198765382357e-05, "loss": 3.4228302001953126, "step": 89350 }, { "epoch": 0.0616, "grad_norm": 0.25822871923446655, "learning_rate": 9.889853884415021e-05, "loss": 3.397611618041992, "step": 89360 }, { "epoch": 0.0617, "grad_norm": 0.18997600674629211, "learning_rate": 9.889508468700614e-05, "loss": 3.3981906890869142, "step": 89370 }, { "epoch": 0.0618, "grad_norm": 0.5811297297477722, "learning_rate": 9.889162518276915e-05, "loss": 3.376690673828125, "step": 89380 }, { "epoch": 0.0619, "grad_norm": 0.19297108054161072, "learning_rate": 9.888816033181752e-05, "loss": 3.426951217651367, "step": 89390 }, { "epoch": 0.062, "grad_norm": 0.20577505230903625, "learning_rate": 9.888469013453018e-05, "loss": 3.4298816680908204, "step": 89400 }, { "epoch": 0.0621, "grad_norm": 0.2044234722852707, "learning_rate": 9.888121459128663e-05, "loss": 3.451419448852539, "step": 89410 }, { "epoch": 0.0622, "grad_norm": 0.1926313042640686, "learning_rate": 9.887773370246693e-05, "loss": 3.441177749633789, "step": 89420 }, { "epoch": 0.0623, "grad_norm": 0.21021787822246552, "learning_rate": 9.887424746845177e-05, "loss": 3.4494297027587892, "step": 89430 }, { "epoch": 0.0624, "grad_norm": 0.19469882547855377, "learning_rate": 9.887075588962239e-05, "loss": 3.4548404693603514, "step": 89440 }, { "epoch": 0.0625, "grad_norm": 0.20585650205612183, "learning_rate": 9.88672589663606e-05, "loss": 3.4229782104492186, "step": 89450 }, { "epoch": 0.0626, "grad_norm": 0.21273331344127655, "learning_rate": 9.886375669904886e-05, "loss": 3.459269332885742, "step": 89460 }, { "epoch": 0.0627, "grad_norm": 0.19937758147716522, "learning_rate": 9.886024908807014e-05, "loss": 3.3936458587646485, "step": 89470 }, { "epoch": 0.0628, "grad_norm": 0.3472966253757477, "learning_rate": 9.885673613380806e-05, "loss": 3.417217254638672, "step": 89480 }, { "epoch": 0.0629, "grad_norm": 0.2839251160621643, "learning_rate": 9.885321783664676e-05, "loss": 3.575532150268555, "step": 89490 }, { "epoch": 0.063, "grad_norm": 0.2279869168996811, "learning_rate": 9.884969419697101e-05, "loss": 3.464560699462891, "step": 89500 }, { "epoch": 0.0631, "grad_norm": 0.1959521472454071, "learning_rate": 9.884616521516614e-05, "loss": 3.4132770538330077, "step": 89510 }, { "epoch": 0.0632, "grad_norm": 0.21481133997440338, "learning_rate": 9.88426308916181e-05, "loss": 3.4634910583496095, "step": 89520 }, { "epoch": 0.0633, "grad_norm": 0.18974684178829193, "learning_rate": 9.883909122671335e-05, "loss": 3.427521514892578, "step": 89530 }, { "epoch": 0.0634, "grad_norm": 0.5635640025138855, "learning_rate": 9.883554622083904e-05, "loss": 3.3842185974121093, "step": 89540 }, { "epoch": 0.0635, "grad_norm": 0.20399244129657745, "learning_rate": 9.88319958743828e-05, "loss": 3.539202117919922, "step": 89550 }, { "epoch": 0.0636, "grad_norm": 0.21917104721069336, "learning_rate": 9.882844018773291e-05, "loss": 3.453915023803711, "step": 89560 }, { "epoch": 0.0637, "grad_norm": 0.1964418590068817, "learning_rate": 9.882487916127823e-05, "loss": 3.4556228637695314, "step": 89570 }, { "epoch": 0.0638, "grad_norm": 1.2727577686309814, "learning_rate": 9.882131279540815e-05, "loss": 3.4827877044677735, "step": 89580 }, { "epoch": 0.0639, "grad_norm": 0.19889576733112335, "learning_rate": 9.881774109051271e-05, "loss": 3.4498165130615233, "step": 89590 }, { "epoch": 0.064, "grad_norm": 0.21468499302864075, "learning_rate": 9.881416404698252e-05, "loss": 3.4394134521484374, "step": 89600 }, { "epoch": 0.0641, "grad_norm": 0.2047942727804184, "learning_rate": 9.881058166520873e-05, "loss": 3.4363807678222655, "step": 89610 }, { "epoch": 0.0642, "grad_norm": 0.18500663340091705, "learning_rate": 9.880699394558311e-05, "loss": 3.457814407348633, "step": 89620 }, { "epoch": 0.0643, "grad_norm": 0.36493927240371704, "learning_rate": 9.880340088849801e-05, "loss": 3.471601104736328, "step": 89630 }, { "epoch": 0.0644, "grad_norm": 0.200027734041214, "learning_rate": 9.879980249434637e-05, "loss": 3.425244903564453, "step": 89640 }, { "epoch": 0.0645, "grad_norm": 0.19922411441802979, "learning_rate": 9.879619876352168e-05, "loss": 3.4450130462646484, "step": 89650 }, { "epoch": 0.0646, "grad_norm": 0.2062893509864807, "learning_rate": 9.879258969641809e-05, "loss": 3.455621337890625, "step": 89660 }, { "epoch": 0.0647, "grad_norm": 0.1936301589012146, "learning_rate": 9.878897529343023e-05, "loss": 3.4043205261230467, "step": 89670 }, { "epoch": 0.0648, "grad_norm": 0.19777792692184448, "learning_rate": 9.878535555495338e-05, "loss": 3.4060714721679686, "step": 89680 }, { "epoch": 0.0649, "grad_norm": 0.1945926994085312, "learning_rate": 9.87817304813834e-05, "loss": 3.421062469482422, "step": 89690 }, { "epoch": 0.065, "grad_norm": 0.21797285974025726, "learning_rate": 9.877810007311671e-05, "loss": 3.4285659790039062, "step": 89700 }, { "epoch": 0.0651, "grad_norm": 0.18840435147285461, "learning_rate": 9.877446433055035e-05, "loss": 3.4509166717529296, "step": 89710 }, { "epoch": 0.0652, "grad_norm": 0.18497011065483093, "learning_rate": 9.877082325408191e-05, "loss": 3.4519290924072266, "step": 89720 }, { "epoch": 0.0653, "grad_norm": 0.22163893282413483, "learning_rate": 9.876717684410954e-05, "loss": 3.5065166473388674, "step": 89730 }, { "epoch": 0.0654, "grad_norm": 0.23105868697166443, "learning_rate": 9.876352510103204e-05, "loss": 3.613412094116211, "step": 89740 }, { "epoch": 0.0655, "grad_norm": 0.20272648334503174, "learning_rate": 9.875986802524875e-05, "loss": 3.527901458740234, "step": 89750 }, { "epoch": 0.0656, "grad_norm": 0.20436367392539978, "learning_rate": 9.87562056171596e-05, "loss": 3.466903305053711, "step": 89760 }, { "epoch": 0.0657, "grad_norm": 0.19602878391742706, "learning_rate": 9.875253787716511e-05, "loss": 3.4349361419677735, "step": 89770 }, { "epoch": 0.0658, "grad_norm": 0.19002887606620789, "learning_rate": 9.874886480566637e-05, "loss": 3.4590431213378907, "step": 89780 }, { "epoch": 0.0659, "grad_norm": 0.23066462576389313, "learning_rate": 9.874518640306507e-05, "loss": 3.474501037597656, "step": 89790 }, { "epoch": 0.066, "grad_norm": 0.19918841123580933, "learning_rate": 9.874150266976347e-05, "loss": 3.413235092163086, "step": 89800 }, { "epoch": 0.0661, "grad_norm": 0.20216597616672516, "learning_rate": 9.873781360616443e-05, "loss": 3.451231002807617, "step": 89810 }, { "epoch": 0.0662, "grad_norm": 0.19771383702754974, "learning_rate": 9.873411921267137e-05, "loss": 3.4356189727783204, "step": 89820 }, { "epoch": 0.0663, "grad_norm": 0.2315620481967926, "learning_rate": 9.873041948968829e-05, "loss": 3.392162322998047, "step": 89830 }, { "epoch": 0.0664, "grad_norm": 0.22000086307525635, "learning_rate": 9.872671443761981e-05, "loss": 3.425168991088867, "step": 89840 }, { "epoch": 0.0665, "grad_norm": 0.20106548070907593, "learning_rate": 9.872300405687109e-05, "loss": 3.432635498046875, "step": 89850 }, { "epoch": 0.0666, "grad_norm": 0.22975106537342072, "learning_rate": 9.871928834784792e-05, "loss": 3.477445602416992, "step": 89860 }, { "epoch": 0.0667, "grad_norm": 0.20586711168289185, "learning_rate": 9.871556731095661e-05, "loss": 3.4187599182128907, "step": 89870 }, { "epoch": 0.0668, "grad_norm": 0.22815658152103424, "learning_rate": 9.871184094660411e-05, "loss": 3.3873096466064454, "step": 89880 }, { "epoch": 0.0669, "grad_norm": 0.2067597210407257, "learning_rate": 9.870810925519791e-05, "loss": 3.645707702636719, "step": 89890 }, { "epoch": 0.067, "grad_norm": 0.23676487803459167, "learning_rate": 9.870437223714612e-05, "loss": 3.4409839630126955, "step": 89900 }, { "epoch": 0.0671, "grad_norm": 0.2584167718887329, "learning_rate": 9.87006298928574e-05, "loss": 3.398544692993164, "step": 89910 }, { "epoch": 0.0672, "grad_norm": 0.19814462959766388, "learning_rate": 9.869688222274103e-05, "loss": 3.439470672607422, "step": 89920 }, { "epoch": 0.0673, "grad_norm": 0.21089524030685425, "learning_rate": 9.869312922720681e-05, "loss": 3.395713043212891, "step": 89930 }, { "epoch": 0.0674, "grad_norm": 0.24018466472625732, "learning_rate": 9.868937090666521e-05, "loss": 3.4173484802246095, "step": 89940 }, { "epoch": 0.0675, "grad_norm": 0.18835599720478058, "learning_rate": 9.86856072615272e-05, "loss": 3.4421661376953123, "step": 89950 }, { "epoch": 0.0676, "grad_norm": 0.19058310985565186, "learning_rate": 9.868183829220438e-05, "loss": 3.4404422760009767, "step": 89960 }, { "epoch": 0.0677, "grad_norm": 0.20740528404712677, "learning_rate": 9.867806399910893e-05, "loss": 3.4856842041015623, "step": 89970 }, { "epoch": 0.0678, "grad_norm": 0.39437851309776306, "learning_rate": 9.867428438265356e-05, "loss": 3.558710479736328, "step": 89980 }, { "epoch": 0.0679, "grad_norm": 0.18865805864334106, "learning_rate": 9.867049944325165e-05, "loss": 3.468341827392578, "step": 89990 }, { "epoch": 0.068, "grad_norm": 0.1895010620355606, "learning_rate": 9.86667091813171e-05, "loss": 3.4364364624023436, "step": 90000 }, { "epoch": 0.0681, "grad_norm": 0.1845843642950058, "learning_rate": 9.866291359726438e-05, "loss": 3.4180324554443358, "step": 90010 }, { "epoch": 0.0682, "grad_norm": 0.2081768661737442, "learning_rate": 9.865911269150861e-05, "loss": 3.278183364868164, "step": 90020 }, { "epoch": 0.0683, "grad_norm": 0.18075361847877502, "learning_rate": 9.865530646446544e-05, "loss": 3.4177787780761717, "step": 90030 }, { "epoch": 0.0684, "grad_norm": 0.1949615180492401, "learning_rate": 9.86514949165511e-05, "loss": 3.418138122558594, "step": 90040 }, { "epoch": 0.0685, "grad_norm": 0.1982610523700714, "learning_rate": 9.864767804818243e-05, "loss": 3.377317428588867, "step": 90050 }, { "epoch": 0.0686, "grad_norm": 0.1965378075838089, "learning_rate": 9.86438558597768e-05, "loss": 3.4394294738769533, "step": 90060 }, { "epoch": 0.0687, "grad_norm": 0.21472881734371185, "learning_rate": 9.864002835175225e-05, "loss": 3.4632644653320312, "step": 90070 }, { "epoch": 0.0688, "grad_norm": 0.20160451531410217, "learning_rate": 9.863619552452734e-05, "loss": 3.417496109008789, "step": 90080 }, { "epoch": 0.0689, "grad_norm": 0.18697671592235565, "learning_rate": 9.863235737852119e-05, "loss": 3.421751022338867, "step": 90090 }, { "epoch": 0.069, "grad_norm": 0.21384471654891968, "learning_rate": 9.862851391415356e-05, "loss": 3.3702251434326174, "step": 90100 }, { "epoch": 0.0691, "grad_norm": 0.36784687638282776, "learning_rate": 9.862466513184477e-05, "loss": 3.455644989013672, "step": 90110 }, { "epoch": 0.0692, "grad_norm": 0.19213269650936127, "learning_rate": 9.86208110320157e-05, "loss": 3.4436874389648438, "step": 90120 }, { "epoch": 0.0693, "grad_norm": 0.191935196518898, "learning_rate": 9.861695161508784e-05, "loss": 3.370574951171875, "step": 90130 }, { "epoch": 0.0694, "grad_norm": 0.20935216546058655, "learning_rate": 9.861308688148324e-05, "loss": 3.6008441925048826, "step": 90140 }, { "epoch": 0.0695, "grad_norm": 0.23210465908050537, "learning_rate": 9.860921683162455e-05, "loss": 3.421033477783203, "step": 90150 }, { "epoch": 0.0696, "grad_norm": 0.1882653534412384, "learning_rate": 9.860534146593499e-05, "loss": 3.5439300537109375, "step": 90160 }, { "epoch": 0.0697, "grad_norm": 0.18368054926395416, "learning_rate": 9.860146078483836e-05, "loss": 3.3756973266601564, "step": 90170 }, { "epoch": 0.0698, "grad_norm": 0.22780048847198486, "learning_rate": 9.859757478875905e-05, "loss": 3.4124114990234373, "step": 90180 }, { "epoch": 0.0699, "grad_norm": 0.1933293342590332, "learning_rate": 9.859368347812204e-05, "loss": 3.4204635620117188, "step": 90190 }, { "epoch": 0.07, "grad_norm": 0.18966281414031982, "learning_rate": 9.858978685335285e-05, "loss": 3.493378448486328, "step": 90200 }, { "epoch": 0.0701, "grad_norm": 0.22090333700180054, "learning_rate": 9.858588491487763e-05, "loss": 3.4145050048828125, "step": 90210 }, { "epoch": 0.0702, "grad_norm": 0.19326356053352356, "learning_rate": 9.858197766312308e-05, "loss": 4.895836257934571, "step": 90220 }, { "epoch": 0.0703, "grad_norm": 0.419099360704422, "learning_rate": 9.857806509851649e-05, "loss": 3.441783905029297, "step": 90230 }, { "epoch": 0.0704, "grad_norm": 0.29411375522613525, "learning_rate": 9.857414722148574e-05, "loss": 3.450041961669922, "step": 90240 }, { "epoch": 0.0705, "grad_norm": 0.2423461526632309, "learning_rate": 9.857022403245928e-05, "loss": 3.418796157836914, "step": 90250 }, { "epoch": 0.0706, "grad_norm": 0.2009509950876236, "learning_rate": 9.856629553186615e-05, "loss": 3.424629974365234, "step": 90260 }, { "epoch": 0.0707, "grad_norm": 0.21068136394023895, "learning_rate": 9.856236172013595e-05, "loss": 3.4472522735595703, "step": 90270 }, { "epoch": 0.0708, "grad_norm": 0.1944015771150589, "learning_rate": 9.85584225976989e-05, "loss": 3.4631675720214843, "step": 90280 }, { "epoch": 0.0709, "grad_norm": 0.19520260393619537, "learning_rate": 9.855447816498575e-05, "loss": 3.5742561340332033, "step": 90290 }, { "epoch": 0.071, "grad_norm": 0.20172785222530365, "learning_rate": 9.855052842242787e-05, "loss": 3.441641998291016, "step": 90300 }, { "epoch": 0.0711, "grad_norm": 0.5770493149757385, "learning_rate": 9.85465733704572e-05, "loss": 3.434587860107422, "step": 90310 }, { "epoch": 0.0712, "grad_norm": 0.1912987232208252, "learning_rate": 9.854261300950624e-05, "loss": 3.4274738311767576, "step": 90320 }, { "epoch": 0.0713, "grad_norm": 0.2012673318386078, "learning_rate": 9.853864734000813e-05, "loss": 3.358982467651367, "step": 90330 }, { "epoch": 0.0714, "grad_norm": 0.1996285766363144, "learning_rate": 9.85346763623965e-05, "loss": 3.4198928833007813, "step": 90340 }, { "epoch": 0.0715, "grad_norm": 0.2435326874256134, "learning_rate": 9.853070007710564e-05, "loss": 3.4828147888183594, "step": 90350 }, { "epoch": 0.0716, "grad_norm": 0.20343773066997528, "learning_rate": 9.85267184845704e-05, "loss": 3.4680549621582033, "step": 90360 }, { "epoch": 0.0717, "grad_norm": 0.20360995829105377, "learning_rate": 9.852273158522616e-05, "loss": 3.404336166381836, "step": 90370 }, { "epoch": 0.0718, "grad_norm": 0.18316473066806793, "learning_rate": 9.851873937950896e-05, "loss": 3.427193450927734, "step": 90380 }, { "epoch": 0.0719, "grad_norm": 0.18323542177677155, "learning_rate": 9.851474186785537e-05, "loss": 3.4293724060058595, "step": 90390 }, { "epoch": 0.072, "grad_norm": 0.24065449833869934, "learning_rate": 9.851073905070254e-05, "loss": 3.412984085083008, "step": 90400 }, { "epoch": 0.0721, "grad_norm": 0.2167995721101761, "learning_rate": 9.850673092848824e-05, "loss": 3.4718666076660156, "step": 90410 }, { "epoch": 0.0722, "grad_norm": 0.22016695141792297, "learning_rate": 9.850271750165077e-05, "loss": 3.462350845336914, "step": 90420 }, { "epoch": 0.0723, "grad_norm": 0.20022708177566528, "learning_rate": 9.849869877062902e-05, "loss": 3.3929302215576174, "step": 90430 }, { "epoch": 0.0724, "grad_norm": 0.20021797716617584, "learning_rate": 9.849467473586252e-05, "loss": 3.4043853759765623, "step": 90440 }, { "epoch": 0.0725, "grad_norm": 0.2045396864414215, "learning_rate": 9.849064539779127e-05, "loss": 3.4112117767333983, "step": 90450 }, { "epoch": 0.0726, "grad_norm": 0.18425671756267548, "learning_rate": 9.848661075685594e-05, "loss": 3.3883522033691404, "step": 90460 }, { "epoch": 0.0727, "grad_norm": 0.2002735584974289, "learning_rate": 9.848257081349778e-05, "loss": 3.425012969970703, "step": 90470 }, { "epoch": 0.0728, "grad_norm": 0.21939072012901306, "learning_rate": 9.847852556815856e-05, "loss": 3.3874881744384764, "step": 90480 }, { "epoch": 0.0729, "grad_norm": 0.19874972105026245, "learning_rate": 9.847447502128067e-05, "loss": 3.4318378448486326, "step": 90490 }, { "epoch": 0.073, "grad_norm": 0.17558015882968903, "learning_rate": 9.847041917330708e-05, "loss": 3.414885711669922, "step": 90500 }, { "epoch": 0.0731, "grad_norm": 0.20712341368198395, "learning_rate": 9.846635802468132e-05, "loss": 3.4001956939697267, "step": 90510 }, { "epoch": 0.0732, "grad_norm": 0.18562646210193634, "learning_rate": 9.84622915758475e-05, "loss": 3.454481506347656, "step": 90520 }, { "epoch": 0.0733, "grad_norm": 0.1891176700592041, "learning_rate": 9.845821982725034e-05, "loss": 3.424183654785156, "step": 90530 }, { "epoch": 0.0734, "grad_norm": 0.20412865281105042, "learning_rate": 9.845414277933514e-05, "loss": 3.4339012145996093, "step": 90540 }, { "epoch": 0.0735, "grad_norm": 0.20076261460781097, "learning_rate": 9.845006043254771e-05, "loss": 3.4416248321533205, "step": 90550 }, { "epoch": 0.0736, "grad_norm": 0.18903033435344696, "learning_rate": 9.844597278733451e-05, "loss": 3.4262290954589845, "step": 90560 }, { "epoch": 0.0737, "grad_norm": 0.18829245865345, "learning_rate": 9.844187984414259e-05, "loss": 3.420502471923828, "step": 90570 }, { "epoch": 0.0738, "grad_norm": 0.22221332788467407, "learning_rate": 9.84377816034195e-05, "loss": 3.5758430480957033, "step": 90580 }, { "epoch": 0.0739, "grad_norm": 0.1879892498254776, "learning_rate": 9.843367806561345e-05, "loss": 3.4141063690185547, "step": 90590 }, { "epoch": 0.074, "grad_norm": 0.20494823157787323, "learning_rate": 9.842956923117317e-05, "loss": 3.3604499816894533, "step": 90600 }, { "epoch": 0.0741, "grad_norm": 0.1852206140756607, "learning_rate": 9.842545510054802e-05, "loss": 3.445057678222656, "step": 90610 }, { "epoch": 0.0742, "grad_norm": 0.19173751771450043, "learning_rate": 9.842133567418792e-05, "loss": 3.4444602966308593, "step": 90620 }, { "epoch": 0.0743, "grad_norm": 0.1993899941444397, "learning_rate": 9.841721095254333e-05, "loss": 3.4150882720947267, "step": 90630 }, { "epoch": 0.0744, "grad_norm": 0.2048618048429489, "learning_rate": 9.841308093606537e-05, "loss": 3.4006935119628907, "step": 90640 }, { "epoch": 0.0745, "grad_norm": 0.18062923848628998, "learning_rate": 9.840894562520565e-05, "loss": 3.4288841247558595, "step": 90650 }, { "epoch": 0.0746, "grad_norm": 0.24513165652751923, "learning_rate": 9.840480502041642e-05, "loss": 3.5283134460449217, "step": 90660 }, { "epoch": 0.0747, "grad_norm": 0.1878131628036499, "learning_rate": 9.840065912215049e-05, "loss": 3.3753379821777343, "step": 90670 }, { "epoch": 0.0748, "grad_norm": 0.1954878568649292, "learning_rate": 9.839650793086124e-05, "loss": 3.397724914550781, "step": 90680 }, { "epoch": 0.0749, "grad_norm": 0.21396121382713318, "learning_rate": 9.839235144700265e-05, "loss": 3.4670635223388673, "step": 90690 }, { "epoch": 0.075, "grad_norm": 0.21549062430858612, "learning_rate": 9.838818967102926e-05, "loss": 3.418809509277344, "step": 90700 }, { "epoch": 0.0751, "grad_norm": 0.2641301155090332, "learning_rate": 9.83840226033962e-05, "loss": 3.415201187133789, "step": 90710 }, { "epoch": 0.0752, "grad_norm": 0.22600533068180084, "learning_rate": 9.837985024455918e-05, "loss": 3.3877792358398438, "step": 90720 }, { "epoch": 0.0753, "grad_norm": 0.20542959868907928, "learning_rate": 9.837567259497447e-05, "loss": 3.4186309814453124, "step": 90730 }, { "epoch": 0.0754, "grad_norm": 0.28535255789756775, "learning_rate": 9.837148965509894e-05, "loss": 3.335541534423828, "step": 90740 }, { "epoch": 0.0755, "grad_norm": 0.19924986362457275, "learning_rate": 9.836730142539001e-05, "loss": 3.414875030517578, "step": 90750 }, { "epoch": 0.0756, "grad_norm": 0.1855117231607437, "learning_rate": 9.836310790630574e-05, "loss": 3.437949371337891, "step": 90760 }, { "epoch": 0.0757, "grad_norm": 0.19032162427902222, "learning_rate": 9.83589090983047e-05, "loss": 3.4283119201660157, "step": 90770 }, { "epoch": 0.0758, "grad_norm": 0.19951468706130981, "learning_rate": 9.835470500184605e-05, "loss": 3.373621368408203, "step": 90780 }, { "epoch": 0.0759, "grad_norm": 0.19003558158874512, "learning_rate": 9.835049561738957e-05, "loss": 3.379435729980469, "step": 90790 }, { "epoch": 0.076, "grad_norm": 0.18255220353603363, "learning_rate": 9.834628094539558e-05, "loss": 3.436986541748047, "step": 90800 }, { "epoch": 0.0761, "grad_norm": 0.234188973903656, "learning_rate": 9.834206098632499e-05, "loss": 3.3922462463378906, "step": 90810 }, { "epoch": 0.0762, "grad_norm": 0.187027707695961, "learning_rate": 9.833783574063931e-05, "loss": 3.442804718017578, "step": 90820 }, { "epoch": 0.0763, "grad_norm": 0.19435922801494598, "learning_rate": 9.833360520880058e-05, "loss": 3.391123962402344, "step": 90830 }, { "epoch": 0.0764, "grad_norm": 0.1894882619380951, "learning_rate": 9.832936939127144e-05, "loss": 3.4382129669189454, "step": 90840 }, { "epoch": 0.0765, "grad_norm": 0.19001539051532745, "learning_rate": 9.832512828851515e-05, "loss": 3.4043079376220704, "step": 90850 }, { "epoch": 0.0766, "grad_norm": 0.1797163188457489, "learning_rate": 9.832088190099546e-05, "loss": 3.437387466430664, "step": 90860 }, { "epoch": 0.0767, "grad_norm": 0.2064702957868576, "learning_rate": 9.831663022917679e-05, "loss": 3.425634765625, "step": 90870 }, { "epoch": 0.0768, "grad_norm": 0.18764592707157135, "learning_rate": 9.831237327352407e-05, "loss": 3.383989715576172, "step": 90880 }, { "epoch": 0.0769, "grad_norm": 0.18248462677001953, "learning_rate": 9.830811103450286e-05, "loss": 3.404180145263672, "step": 90890 }, { "epoch": 0.077, "grad_norm": 0.19625085592269897, "learning_rate": 9.830384351257924e-05, "loss": 3.4502777099609374, "step": 90900 }, { "epoch": 0.0771, "grad_norm": 0.19390007853507996, "learning_rate": 9.829957070821993e-05, "loss": 3.4019561767578126, "step": 90910 }, { "epoch": 0.0772, "grad_norm": 0.18367496132850647, "learning_rate": 9.829529262189218e-05, "loss": 3.3525634765625, "step": 90920 }, { "epoch": 0.0773, "grad_norm": 0.18571564555168152, "learning_rate": 9.829100925406385e-05, "loss": 3.4157615661621095, "step": 90930 }, { "epoch": 0.0774, "grad_norm": 0.1928042620420456, "learning_rate": 9.828672060520333e-05, "loss": 3.3647953033447267, "step": 90940 }, { "epoch": 0.0775, "grad_norm": 0.18328942358493805, "learning_rate": 9.828242667577966e-05, "loss": 3.413312530517578, "step": 90950 }, { "epoch": 0.0776, "grad_norm": 0.45585140585899353, "learning_rate": 9.82781274662624e-05, "loss": 3.4098072052001953, "step": 90960 }, { "epoch": 0.0777, "grad_norm": 0.34254154562950134, "learning_rate": 9.82738229771217e-05, "loss": 3.339917755126953, "step": 90970 }, { "epoch": 0.0778, "grad_norm": 0.18308515846729279, "learning_rate": 9.826951320882829e-05, "loss": 3.412471389770508, "step": 90980 }, { "epoch": 0.0779, "grad_norm": 0.23055846989154816, "learning_rate": 9.826519816185351e-05, "loss": 3.430379104614258, "step": 90990 }, { "epoch": 0.078, "grad_norm": 0.19235859811306, "learning_rate": 9.826087783666921e-05, "loss": 3.391558837890625, "step": 91000 }, { "epoch": 0.0781, "grad_norm": 0.19561542570590973, "learning_rate": 9.825655223374787e-05, "loss": 3.3945308685302735, "step": 91010 }, { "epoch": 0.0782, "grad_norm": 0.20887531340122223, "learning_rate": 9.825222135356253e-05, "loss": 3.507613754272461, "step": 91020 }, { "epoch": 0.0783, "grad_norm": 0.1942916065454483, "learning_rate": 9.82478851965868e-05, "loss": 3.4066715240478516, "step": 91030 }, { "epoch": 0.0784, "grad_norm": 0.1907833367586136, "learning_rate": 9.82435437632949e-05, "loss": 3.414319610595703, "step": 91040 }, { "epoch": 0.0785, "grad_norm": 0.18842460215091705, "learning_rate": 9.823919705416158e-05, "loss": 3.3979476928710937, "step": 91050 }, { "epoch": 0.0786, "grad_norm": 0.47371453046798706, "learning_rate": 9.82348450696622e-05, "loss": 3.4853992462158203, "step": 91060 }, { "epoch": 0.0787, "grad_norm": 0.18634769320487976, "learning_rate": 9.823048781027268e-05, "loss": 3.444872283935547, "step": 91070 }, { "epoch": 0.0788, "grad_norm": 0.1946692317724228, "learning_rate": 9.822612527646953e-05, "loss": 3.4461708068847656, "step": 91080 }, { "epoch": 0.0789, "grad_norm": 0.20968157052993774, "learning_rate": 9.822175746872984e-05, "loss": 3.373087692260742, "step": 91090 }, { "epoch": 0.079, "grad_norm": 0.20163120329380035, "learning_rate": 9.821738438753123e-05, "loss": 3.436847686767578, "step": 91100 }, { "epoch": 0.0791, "grad_norm": 0.18406334519386292, "learning_rate": 9.821300603335196e-05, "loss": 3.3830215454101564, "step": 91110 }, { "epoch": 0.0792, "grad_norm": 0.2000827193260193, "learning_rate": 9.820862240667085e-05, "loss": 3.3901039123535157, "step": 91120 }, { "epoch": 0.0793, "grad_norm": 0.18200770020484924, "learning_rate": 9.820423350796726e-05, "loss": 3.4010009765625, "step": 91130 }, { "epoch": 0.0794, "grad_norm": 0.25358057022094727, "learning_rate": 9.819983933772118e-05, "loss": 3.396565246582031, "step": 91140 }, { "epoch": 0.0795, "grad_norm": 0.193947896361351, "learning_rate": 9.819543989641314e-05, "loss": 3.3812702178955076, "step": 91150 }, { "epoch": 0.0796, "grad_norm": 0.2066369205713272, "learning_rate": 9.819103518452423e-05, "loss": 3.363048553466797, "step": 91160 }, { "epoch": 0.0797, "grad_norm": 0.1875719130039215, "learning_rate": 9.818662520253618e-05, "loss": 3.391843795776367, "step": 91170 }, { "epoch": 0.0798, "grad_norm": 7.719047546386719, "learning_rate": 9.818220995093126e-05, "loss": 4.655821228027344, "step": 91180 }, { "epoch": 0.0799, "grad_norm": 0.19423134624958038, "learning_rate": 9.817778943019228e-05, "loss": 3.5906097412109377, "step": 91190 }, { "epoch": 0.08, "grad_norm": 0.21337127685546875, "learning_rate": 9.81733636408027e-05, "loss": 3.4511482238769533, "step": 91200 }, { "epoch": 0.0801, "grad_norm": 0.2380789965391159, "learning_rate": 9.816893258324649e-05, "loss": 3.387167739868164, "step": 91210 }, { "epoch": 0.0802, "grad_norm": 0.20199312269687653, "learning_rate": 9.816449625800823e-05, "loss": 3.427529144287109, "step": 91220 }, { "epoch": 0.0803, "grad_norm": 0.4359970688819885, "learning_rate": 9.816005466557308e-05, "loss": 3.3455387115478517, "step": 91230 }, { "epoch": 0.0804, "grad_norm": 0.1914917677640915, "learning_rate": 9.815560780642674e-05, "loss": 3.3834487915039064, "step": 91240 }, { "epoch": 0.0805, "grad_norm": 0.23227998614311218, "learning_rate": 9.815115568105555e-05, "loss": 3.476776885986328, "step": 91250 }, { "epoch": 0.0806, "grad_norm": 0.19104789197444916, "learning_rate": 9.814669828994638e-05, "loss": 3.4379226684570314, "step": 91260 }, { "epoch": 0.0807, "grad_norm": 0.2065754234790802, "learning_rate": 9.814223563358665e-05, "loss": 3.4074939727783202, "step": 91270 }, { "epoch": 0.0808, "grad_norm": 0.22207500040531158, "learning_rate": 9.813776771246443e-05, "loss": 3.4396095275878906, "step": 91280 }, { "epoch": 0.0809, "grad_norm": 0.20681488513946533, "learning_rate": 9.813329452706829e-05, "loss": 3.3754486083984374, "step": 91290 }, { "epoch": 0.081, "grad_norm": 0.19995920360088348, "learning_rate": 9.812881607788744e-05, "loss": 3.4267017364501955, "step": 91300 }, { "epoch": 0.0811, "grad_norm": 0.1884300857782364, "learning_rate": 9.812433236541163e-05, "loss": 3.4264190673828123, "step": 91310 }, { "epoch": 0.0812, "grad_norm": 0.2227810025215149, "learning_rate": 9.811984339013116e-05, "loss": 3.4063602447509767, "step": 91320 }, { "epoch": 0.0813, "grad_norm": 0.19623860716819763, "learning_rate": 9.811534915253698e-05, "loss": 3.4120952606201174, "step": 91330 }, { "epoch": 0.0814, "grad_norm": 0.20103465020656586, "learning_rate": 9.811084965312056e-05, "loss": 3.3859752655029296, "step": 91340 }, { "epoch": 0.0815, "grad_norm": 0.20172545313835144, "learning_rate": 9.810634489237396e-05, "loss": 3.345873260498047, "step": 91350 }, { "epoch": 0.0816, "grad_norm": 0.21904966235160828, "learning_rate": 9.81018348707898e-05, "loss": 3.457351303100586, "step": 91360 }, { "epoch": 0.0817, "grad_norm": 0.1877533346414566, "learning_rate": 9.809731958886131e-05, "loss": 3.430813217163086, "step": 91370 }, { "epoch": 0.0818, "grad_norm": 0.1886226236820221, "learning_rate": 9.809279904708224e-05, "loss": 3.4863864898681642, "step": 91380 }, { "epoch": 0.0819, "grad_norm": 0.19434700906276703, "learning_rate": 9.808827324594699e-05, "loss": 3.388488006591797, "step": 91390 }, { "epoch": 0.082, "grad_norm": 0.19085317850112915, "learning_rate": 9.808374218595046e-05, "loss": 3.5107711791992187, "step": 91400 }, { "epoch": 0.0821, "grad_norm": 0.23095102608203888, "learning_rate": 9.80792058675882e-05, "loss": 3.4256153106689453, "step": 91410 }, { "epoch": 0.0822, "grad_norm": 0.19960041344165802, "learning_rate": 9.807466429135627e-05, "loss": 3.402443695068359, "step": 91420 }, { "epoch": 0.0823, "grad_norm": 0.2082933634519577, "learning_rate": 9.807011745775132e-05, "loss": 3.4014442443847654, "step": 91430 }, { "epoch": 0.0824, "grad_norm": 0.21152393519878387, "learning_rate": 9.806556536727061e-05, "loss": 3.3851043701171877, "step": 91440 }, { "epoch": 0.0825, "grad_norm": 0.18621787428855896, "learning_rate": 9.806100802041193e-05, "loss": 3.3793067932128906, "step": 91450 }, { "epoch": 0.0826, "grad_norm": 0.19067880511283875, "learning_rate": 9.805644541767368e-05, "loss": 3.4126476287841796, "step": 91460 }, { "epoch": 0.0827, "grad_norm": 0.20056810975074768, "learning_rate": 9.805187755955478e-05, "loss": 3.4147029876708985, "step": 91470 }, { "epoch": 0.0828, "grad_norm": 0.19175231456756592, "learning_rate": 9.804730444655483e-05, "loss": 3.35157470703125, "step": 91480 }, { "epoch": 0.0829, "grad_norm": 0.195384681224823, "learning_rate": 9.804272607917388e-05, "loss": 3.4343708038330076, "step": 91490 }, { "epoch": 0.083, "grad_norm": 0.19017109274864197, "learning_rate": 9.803814245791265e-05, "loss": 3.3935638427734376, "step": 91500 }, { "epoch": 0.0831, "grad_norm": 0.18711380660533905, "learning_rate": 9.803355358327239e-05, "loss": 3.4113079071044923, "step": 91510 }, { "epoch": 0.0832, "grad_norm": 0.21106573939323425, "learning_rate": 9.802895945575492e-05, "loss": 3.428888702392578, "step": 91520 }, { "epoch": 0.0833, "grad_norm": 0.21212129294872284, "learning_rate": 9.802436007586266e-05, "loss": 3.4689876556396486, "step": 91530 }, { "epoch": 0.0834, "grad_norm": 0.19798506796360016, "learning_rate": 9.801975544409858e-05, "loss": 3.4709835052490234, "step": 91540 }, { "epoch": 0.0835, "grad_norm": 0.2003561407327652, "learning_rate": 9.801514556096625e-05, "loss": 3.387255096435547, "step": 91550 }, { "epoch": 0.0836, "grad_norm": 0.2079688161611557, "learning_rate": 9.801053042696977e-05, "loss": 3.5063541412353514, "step": 91560 }, { "epoch": 0.0837, "grad_norm": 0.19824376702308655, "learning_rate": 9.800591004261388e-05, "loss": 3.3946697235107424, "step": 91570 }, { "epoch": 0.0838, "grad_norm": 0.1988406479358673, "learning_rate": 9.800128440840385e-05, "loss": 3.401607894897461, "step": 91580 }, { "epoch": 0.0839, "grad_norm": 0.2218780666589737, "learning_rate": 9.799665352484552e-05, "loss": 3.6584468841552735, "step": 91590 }, { "epoch": 0.084, "grad_norm": 0.21064074337482452, "learning_rate": 9.799201739244532e-05, "loss": 3.4322872161865234, "step": 91600 }, { "epoch": 0.0841, "grad_norm": 0.2188245803117752, "learning_rate": 9.798737601171025e-05, "loss": 3.650586700439453, "step": 91610 }, { "epoch": 0.0842, "grad_norm": 0.19057540595531464, "learning_rate": 9.79827293831479e-05, "loss": 3.4100948333740235, "step": 91620 }, { "epoch": 0.0843, "grad_norm": 0.2076670378446579, "learning_rate": 9.797807750726638e-05, "loss": 3.4359912872314453, "step": 91630 }, { "epoch": 0.0844, "grad_norm": 0.18836377561092377, "learning_rate": 9.797342038457446e-05, "loss": 3.4130393981933596, "step": 91640 }, { "epoch": 0.0845, "grad_norm": 0.1922067105770111, "learning_rate": 9.796875801558141e-05, "loss": 3.4421764373779298, "step": 91650 }, { "epoch": 0.0846, "grad_norm": 0.17354799807071686, "learning_rate": 9.79640904007971e-05, "loss": 3.433587646484375, "step": 91660 }, { "epoch": 0.0847, "grad_norm": 0.19191983342170715, "learning_rate": 9.795941754073199e-05, "loss": 3.4083648681640626, "step": 91670 }, { "epoch": 0.0848, "grad_norm": 0.1727205365896225, "learning_rate": 9.795473943589705e-05, "loss": 3.433949279785156, "step": 91680 }, { "epoch": 0.0849, "grad_norm": 0.1995992213487625, "learning_rate": 9.795005608680394e-05, "loss": 3.4083507537841795, "step": 91690 }, { "epoch": 0.085, "grad_norm": 0.33368152379989624, "learning_rate": 9.794536749396477e-05, "loss": 3.451213836669922, "step": 91700 }, { "epoch": 0.0851, "grad_norm": 0.1926344335079193, "learning_rate": 9.79406736578923e-05, "loss": 3.4025295257568358, "step": 91710 }, { "epoch": 0.0852, "grad_norm": 0.18497197329998016, "learning_rate": 9.793597457909984e-05, "loss": 3.3778240203857424, "step": 91720 }, { "epoch": 0.0853, "grad_norm": 0.2649877071380615, "learning_rate": 9.793127025810127e-05, "loss": 3.505254364013672, "step": 91730 }, { "epoch": 0.0854, "grad_norm": 0.20104698836803436, "learning_rate": 9.792656069541104e-05, "loss": 3.401994323730469, "step": 91740 }, { "epoch": 0.0855, "grad_norm": 0.1970488578081131, "learning_rate": 9.79218458915442e-05, "loss": 3.4050521850585938, "step": 91750 }, { "epoch": 0.0856, "grad_norm": 1.1522005796432495, "learning_rate": 9.791712584701634e-05, "loss": 3.4115131378173826, "step": 91760 }, { "epoch": 0.0857, "grad_norm": 2.9208922386169434, "learning_rate": 9.791240056234364e-05, "loss": 3.506179428100586, "step": 91770 }, { "epoch": 0.0858, "grad_norm": 4.161956787109375, "learning_rate": 9.790767003804283e-05, "loss": 3.7007461547851563, "step": 91780 }, { "epoch": 0.0859, "grad_norm": 0.2534559667110443, "learning_rate": 9.790293427463126e-05, "loss": 3.6764793395996094, "step": 91790 }, { "epoch": 0.086, "grad_norm": 0.2456006109714508, "learning_rate": 9.789819327262684e-05, "loss": 3.574969482421875, "step": 91800 }, { "epoch": 0.0861, "grad_norm": 0.2428002804517746, "learning_rate": 9.7893447032548e-05, "loss": 3.4407573699951173, "step": 91810 }, { "epoch": 0.0862, "grad_norm": 0.20644572377204895, "learning_rate": 9.78886955549138e-05, "loss": 3.3847957611083985, "step": 91820 }, { "epoch": 0.0863, "grad_norm": 0.20137767493724823, "learning_rate": 9.788393884024387e-05, "loss": 3.4309871673583983, "step": 91830 }, { "epoch": 0.0864, "grad_norm": 0.4157596230506897, "learning_rate": 9.787917688905836e-05, "loss": 3.5784957885742186, "step": 91840 }, { "epoch": 0.0865, "grad_norm": 0.19170531630516052, "learning_rate": 9.787440970187807e-05, "loss": 3.429454803466797, "step": 91850 }, { "epoch": 0.0866, "grad_norm": 0.18199092149734497, "learning_rate": 9.786963727922429e-05, "loss": 3.458285903930664, "step": 91860 }, { "epoch": 0.0867, "grad_norm": 0.17982378602027893, "learning_rate": 9.786485962161897e-05, "loss": 3.581888961791992, "step": 91870 }, { "epoch": 0.0868, "grad_norm": 0.37853315472602844, "learning_rate": 9.786007672958455e-05, "loss": 3.5339317321777344, "step": 91880 }, { "epoch": 0.0869, "grad_norm": 0.18710732460021973, "learning_rate": 9.78552886036441e-05, "loss": 3.3405525207519533, "step": 91890 }, { "epoch": 0.087, "grad_norm": 0.1895432472229004, "learning_rate": 9.785049524432124e-05, "loss": 3.5724555969238283, "step": 91900 }, { "epoch": 0.0871, "grad_norm": 0.23891156911849976, "learning_rate": 9.784569665214016e-05, "loss": 3.4014656066894533, "step": 91910 }, { "epoch": 0.0872, "grad_norm": 0.49472784996032715, "learning_rate": 9.784089282762563e-05, "loss": 3.476654052734375, "step": 91920 }, { "epoch": 0.0873, "grad_norm": 0.19059666991233826, "learning_rate": 9.7836083771303e-05, "loss": 3.4351242065429686, "step": 91930 }, { "epoch": 0.0874, "grad_norm": 0.20039673149585724, "learning_rate": 9.783126948369817e-05, "loss": 3.4712753295898438, "step": 91940 }, { "epoch": 0.0875, "grad_norm": 0.19425639510154724, "learning_rate": 9.78264499653376e-05, "loss": 3.3690380096435546, "step": 91950 }, { "epoch": 0.0876, "grad_norm": 0.18458005785942078, "learning_rate": 9.782162521674838e-05, "loss": 3.3809600830078126, "step": 91960 }, { "epoch": 0.0877, "grad_norm": 0.19268442690372467, "learning_rate": 9.781679523845812e-05, "loss": 3.4067996978759765, "step": 91970 }, { "epoch": 0.0878, "grad_norm": 0.20264291763305664, "learning_rate": 9.781196003099502e-05, "loss": 3.404836654663086, "step": 91980 }, { "epoch": 0.0879, "grad_norm": 0.1768999546766281, "learning_rate": 9.780711959488786e-05, "loss": 3.3836597442626952, "step": 91990 }, { "epoch": 0.088, "grad_norm": 0.18540772795677185, "learning_rate": 9.780227393066599e-05, "loss": 3.409617233276367, "step": 92000 }, { "epoch": 0.0881, "grad_norm": 0.21679256856441498, "learning_rate": 9.77974230388593e-05, "loss": 3.4451221466064452, "step": 92010 }, { "epoch": 0.0882, "grad_norm": 0.5126044750213623, "learning_rate": 9.779256691999829e-05, "loss": 2.9811859130859375, "step": 92020 }, { "epoch": 0.0883, "grad_norm": 0.20593369007110596, "learning_rate": 9.778770557461403e-05, "loss": 3.456888198852539, "step": 92030 }, { "epoch": 0.0884, "grad_norm": 0.19467945396900177, "learning_rate": 9.778283900323812e-05, "loss": 3.4368019104003906, "step": 92040 }, { "epoch": 0.0885, "grad_norm": 0.18081435561180115, "learning_rate": 9.777796720640277e-05, "loss": 3.419713592529297, "step": 92050 }, { "epoch": 0.0886, "grad_norm": 0.17966784536838531, "learning_rate": 9.777309018464078e-05, "loss": 3.3850425720214843, "step": 92060 }, { "epoch": 0.0887, "grad_norm": 0.22639235854148865, "learning_rate": 9.776820793848547e-05, "loss": 3.4650577545166015, "step": 92070 }, { "epoch": 0.0888, "grad_norm": 0.29963475465774536, "learning_rate": 9.776332046847075e-05, "loss": 3.481368637084961, "step": 92080 }, { "epoch": 0.0889, "grad_norm": 0.18343707919120789, "learning_rate": 9.775842777513111e-05, "loss": 3.3530738830566404, "step": 92090 }, { "epoch": 0.089, "grad_norm": 0.1991959661245346, "learning_rate": 9.775352985900163e-05, "loss": 3.404874801635742, "step": 92100 }, { "epoch": 0.0891, "grad_norm": 0.1778908520936966, "learning_rate": 9.774862672061791e-05, "loss": 3.404584503173828, "step": 92110 }, { "epoch": 0.0892, "grad_norm": 0.19873136281967163, "learning_rate": 9.774371836051616e-05, "loss": 3.3428462982177733, "step": 92120 }, { "epoch": 0.0893, "grad_norm": 0.1868468075990677, "learning_rate": 9.773880477923315e-05, "loss": 3.36285400390625, "step": 92130 }, { "epoch": 0.0894, "grad_norm": 0.18620771169662476, "learning_rate": 9.773388597730623e-05, "loss": 3.368159866333008, "step": 92140 }, { "epoch": 0.0895, "grad_norm": 0.19430547952651978, "learning_rate": 9.77289619552733e-05, "loss": 3.508283996582031, "step": 92150 }, { "epoch": 0.0896, "grad_norm": 0.18186146020889282, "learning_rate": 9.772403271367285e-05, "loss": 3.3873424530029297, "step": 92160 }, { "epoch": 0.0897, "grad_norm": 0.1978245973587036, "learning_rate": 9.771909825304396e-05, "loss": 3.442005920410156, "step": 92170 }, { "epoch": 0.0898, "grad_norm": 0.1792907863855362, "learning_rate": 9.771415857392619e-05, "loss": 3.4279964447021483, "step": 92180 }, { "epoch": 0.0899, "grad_norm": 0.18308980762958527, "learning_rate": 9.770921367685978e-05, "loss": 3.383203125, "step": 92190 }, { "epoch": 0.09, "grad_norm": 0.18515586853027344, "learning_rate": 9.770426356238551e-05, "loss": 3.3613250732421873, "step": 92200 }, { "epoch": 0.0901, "grad_norm": 0.18211492896080017, "learning_rate": 9.769930823104469e-05, "loss": 3.39209098815918, "step": 92210 }, { "epoch": 0.0902, "grad_norm": 0.19500291347503662, "learning_rate": 9.769434768337926e-05, "loss": 3.404861831665039, "step": 92220 }, { "epoch": 0.0903, "grad_norm": 0.40389484167099, "learning_rate": 9.768938191993164e-05, "loss": 3.416990280151367, "step": 92230 }, { "epoch": 0.0904, "grad_norm": 0.1787511706352234, "learning_rate": 9.768441094124494e-05, "loss": 3.4527652740478514, "step": 92240 }, { "epoch": 0.0905, "grad_norm": 0.18241560459136963, "learning_rate": 9.767943474786275e-05, "loss": 3.4092899322509767, "step": 92250 }, { "epoch": 0.0906, "grad_norm": 0.20309799909591675, "learning_rate": 9.767445334032923e-05, "loss": 3.3670639038085937, "step": 92260 }, { "epoch": 0.0907, "grad_norm": 0.20976105332374573, "learning_rate": 9.766946671918919e-05, "loss": 3.4671154022216797, "step": 92270 }, { "epoch": 0.0908, "grad_norm": 0.37315070629119873, "learning_rate": 9.766447488498796e-05, "loss": 3.3924598693847656, "step": 92280 }, { "epoch": 0.0909, "grad_norm": 0.18292881548404694, "learning_rate": 9.765947783827139e-05, "loss": 3.367724609375, "step": 92290 }, { "epoch": 0.091, "grad_norm": 0.20175138115882874, "learning_rate": 9.765447557958599e-05, "loss": 3.439516067504883, "step": 92300 }, { "epoch": 0.0911, "grad_norm": 0.18270353972911835, "learning_rate": 9.764946810947879e-05, "loss": 3.3930484771728517, "step": 92310 }, { "epoch": 0.0912, "grad_norm": 0.21533581614494324, "learning_rate": 9.764445542849738e-05, "loss": 3.4076217651367187, "step": 92320 }, { "epoch": 0.0913, "grad_norm": 0.16901718080043793, "learning_rate": 9.763943753718998e-05, "loss": 3.363730621337891, "step": 92330 }, { "epoch": 0.0914, "grad_norm": 0.17504799365997314, "learning_rate": 9.76344144361053e-05, "loss": 3.4189586639404297, "step": 92340 }, { "epoch": 0.0915, "grad_norm": 0.17356206476688385, "learning_rate": 9.762938612579269e-05, "loss": 3.3613044738769533, "step": 92350 }, { "epoch": 0.0916, "grad_norm": 0.20273666083812714, "learning_rate": 9.762435260680202e-05, "loss": 3.393584060668945, "step": 92360 }, { "epoch": 0.0917, "grad_norm": 0.17738497257232666, "learning_rate": 9.761931387968373e-05, "loss": 3.366605758666992, "step": 92370 }, { "epoch": 0.0918, "grad_norm": 0.18805056810379028, "learning_rate": 9.76142699449889e-05, "loss": 3.3506305694580076, "step": 92380 }, { "epoch": 0.0919, "grad_norm": 0.1840905100107193, "learning_rate": 9.760922080326908e-05, "loss": 3.377168655395508, "step": 92390 }, { "epoch": 0.092, "grad_norm": 0.1923864334821701, "learning_rate": 9.760416645507644e-05, "loss": 3.3742027282714844, "step": 92400 }, { "epoch": 0.0921, "grad_norm": 0.18857623636722565, "learning_rate": 9.759910690096375e-05, "loss": 3.3752307891845703, "step": 92410 }, { "epoch": 0.0922, "grad_norm": 0.18799597024917603, "learning_rate": 9.759404214148429e-05, "loss": 3.400809478759766, "step": 92420 }, { "epoch": 0.0923, "grad_norm": 0.18187573552131653, "learning_rate": 9.758897217719191e-05, "loss": 3.378226089477539, "step": 92430 }, { "epoch": 0.0924, "grad_norm": 0.17371349036693573, "learning_rate": 9.758389700864113e-05, "loss": 3.366255187988281, "step": 92440 }, { "epoch": 0.0925, "grad_norm": 0.18528704345226288, "learning_rate": 9.757881663638688e-05, "loss": 3.350115203857422, "step": 92450 }, { "epoch": 0.0926, "grad_norm": 0.18509893119335175, "learning_rate": 9.757373106098478e-05, "loss": 3.369429016113281, "step": 92460 }, { "epoch": 0.0927, "grad_norm": 0.17606227099895477, "learning_rate": 9.756864028299097e-05, "loss": 3.365740203857422, "step": 92470 }, { "epoch": 0.0928, "grad_norm": 0.19974029064178467, "learning_rate": 9.75635443029622e-05, "loss": 3.3666618347167967, "step": 92480 }, { "epoch": 0.0929, "grad_norm": 0.35590195655822754, "learning_rate": 9.755844312145572e-05, "loss": 3.3563751220703124, "step": 92490 }, { "epoch": 0.093, "grad_norm": 0.1815875768661499, "learning_rate": 9.755333673902941e-05, "loss": 3.366680908203125, "step": 92500 }, { "epoch": 0.0931, "grad_norm": 0.17167527973651886, "learning_rate": 9.75482251562417e-05, "loss": 3.3464385986328127, "step": 92510 }, { "epoch": 0.0932, "grad_norm": 0.1908465325832367, "learning_rate": 9.754310837365155e-05, "loss": 3.351943588256836, "step": 92520 }, { "epoch": 0.0933, "grad_norm": 0.17142222821712494, "learning_rate": 9.753798639181856e-05, "loss": 3.3770423889160157, "step": 92530 }, { "epoch": 0.0934, "grad_norm": 0.19346436858177185, "learning_rate": 9.753285921130286e-05, "loss": 3.3747467041015624, "step": 92540 }, { "epoch": 0.0935, "grad_norm": 0.1873231679201126, "learning_rate": 9.752772683266512e-05, "loss": 3.374437713623047, "step": 92550 }, { "epoch": 0.0936, "grad_norm": 0.18630506098270416, "learning_rate": 9.752258925646665e-05, "loss": 3.3352733612060548, "step": 92560 }, { "epoch": 0.0937, "grad_norm": 0.19445307552814484, "learning_rate": 9.751744648326926e-05, "loss": 3.2858631134033205, "step": 92570 }, { "epoch": 0.0938, "grad_norm": 0.1763097196817398, "learning_rate": 9.751229851363536e-05, "loss": 3.3674556732177736, "step": 92580 }, { "epoch": 0.0939, "grad_norm": 0.18313275277614594, "learning_rate": 9.750714534812793e-05, "loss": 3.3698997497558594, "step": 92590 }, { "epoch": 0.094, "grad_norm": 0.18662159144878387, "learning_rate": 9.750198698731053e-05, "loss": 3.352142333984375, "step": 92600 }, { "epoch": 0.0941, "grad_norm": 0.21142643690109253, "learning_rate": 9.749682343174722e-05, "loss": 3.5436946868896486, "step": 92610 }, { "epoch": 0.0942, "grad_norm": 0.39172062277793884, "learning_rate": 9.749165468200272e-05, "loss": 3.412789154052734, "step": 92620 }, { "epoch": 0.0943, "grad_norm": 0.17983540892601013, "learning_rate": 9.748648073864229e-05, "loss": 3.3842884063720704, "step": 92630 }, { "epoch": 0.0944, "grad_norm": 0.1777448058128357, "learning_rate": 9.748130160223168e-05, "loss": 3.426255798339844, "step": 92640 }, { "epoch": 0.0945, "grad_norm": 0.1973676085472107, "learning_rate": 9.747611727333734e-05, "loss": 3.295685577392578, "step": 92650 }, { "epoch": 0.0946, "grad_norm": 1.7789175510406494, "learning_rate": 9.74709277525262e-05, "loss": 3.3180915832519533, "step": 92660 }, { "epoch": 0.0947, "grad_norm": 0.2238033413887024, "learning_rate": 9.746573304036576e-05, "loss": 3.3654510498046877, "step": 92670 }, { "epoch": 0.0948, "grad_norm": 0.2121528834104538, "learning_rate": 9.746053313742412e-05, "loss": 3.39679069519043, "step": 92680 }, { "epoch": 0.0949, "grad_norm": 0.1789523810148239, "learning_rate": 9.745532804426994e-05, "loss": 3.330720901489258, "step": 92690 }, { "epoch": 0.095, "grad_norm": 0.4916650056838989, "learning_rate": 9.745011776147242e-05, "loss": 3.4212265014648438, "step": 92700 }, { "epoch": 0.0951, "grad_norm": 0.18469475209712982, "learning_rate": 9.744490228960138e-05, "loss": 3.4230377197265627, "step": 92710 }, { "epoch": 0.0952, "grad_norm": 0.1820952594280243, "learning_rate": 9.743968162922713e-05, "loss": 3.357303237915039, "step": 92720 }, { "epoch": 0.0953, "grad_norm": 0.17372813820838928, "learning_rate": 9.743445578092064e-05, "loss": 3.4083492279052736, "step": 92730 }, { "epoch": 0.0954, "grad_norm": 0.19119414687156677, "learning_rate": 9.742922474525338e-05, "loss": 3.3719551086425783, "step": 92740 }, { "epoch": 0.0955, "grad_norm": 0.1976037174463272, "learning_rate": 9.742398852279741e-05, "loss": 3.4511878967285154, "step": 92750 }, { "epoch": 0.0956, "grad_norm": 0.17340926826000214, "learning_rate": 9.741874711412535e-05, "loss": 3.4062644958496096, "step": 92760 }, { "epoch": 0.0957, "grad_norm": 0.17307507991790771, "learning_rate": 9.741350051981042e-05, "loss": 3.325326156616211, "step": 92770 }, { "epoch": 0.0958, "grad_norm": 0.21087615191936493, "learning_rate": 9.740824874042633e-05, "loss": 3.3633487701416014, "step": 92780 }, { "epoch": 0.0959, "grad_norm": 0.18066388368606567, "learning_rate": 9.740299177654746e-05, "loss": 3.3623706817626955, "step": 92790 }, { "epoch": 0.096, "grad_norm": 0.18083637952804565, "learning_rate": 9.739772962874867e-05, "loss": 3.4319232940673827, "step": 92800 }, { "epoch": 0.0961, "grad_norm": 0.179342120885849, "learning_rate": 9.739246229760541e-05, "loss": 3.3491634368896483, "step": 92810 }, { "epoch": 0.0962, "grad_norm": 0.18278174102306366, "learning_rate": 9.738718978369376e-05, "loss": 3.3342872619628907, "step": 92820 }, { "epoch": 0.0963, "grad_norm": 0.18882253766059875, "learning_rate": 9.738191208759025e-05, "loss": 3.408319091796875, "step": 92830 }, { "epoch": 0.0964, "grad_norm": 0.16867266595363617, "learning_rate": 9.73766292098721e-05, "loss": 3.3429012298583984, "step": 92840 }, { "epoch": 0.0965, "grad_norm": 0.1763424277305603, "learning_rate": 9.737134115111699e-05, "loss": 3.3400306701660156, "step": 92850 }, { "epoch": 0.0966, "grad_norm": 0.19429665803909302, "learning_rate": 9.736604791190323e-05, "loss": 3.3200363159179687, "step": 92860 }, { "epoch": 0.0967, "grad_norm": 0.19003814458847046, "learning_rate": 9.73607494928097e-05, "loss": 3.396249008178711, "step": 92870 }, { "epoch": 0.0968, "grad_norm": 0.19364269077777863, "learning_rate": 9.735544589441581e-05, "loss": 3.41319580078125, "step": 92880 }, { "epoch": 0.0969, "grad_norm": 0.18898577988147736, "learning_rate": 9.735013711730154e-05, "loss": 3.3353824615478516, "step": 92890 }, { "epoch": 0.097, "grad_norm": 0.2528945207595825, "learning_rate": 9.734482316204747e-05, "loss": 3.408232498168945, "step": 92900 }, { "epoch": 0.0971, "grad_norm": 0.1808372437953949, "learning_rate": 9.733950402923473e-05, "loss": 3.3661766052246094, "step": 92910 }, { "epoch": 0.0972, "grad_norm": 0.18153300881385803, "learning_rate": 9.7334179719445e-05, "loss": 3.341472625732422, "step": 92920 }, { "epoch": 0.0973, "grad_norm": 0.18229012191295624, "learning_rate": 9.732885023326053e-05, "loss": 3.342691421508789, "step": 92930 }, { "epoch": 0.0974, "grad_norm": 0.186013862490654, "learning_rate": 9.732351557126418e-05, "loss": 3.3142879486083983, "step": 92940 }, { "epoch": 0.0975, "grad_norm": 0.1875489503145218, "learning_rate": 9.731817573403929e-05, "loss": 3.42895393371582, "step": 92950 }, { "epoch": 0.0976, "grad_norm": 0.1888951063156128, "learning_rate": 9.731283072216985e-05, "loss": 3.3856414794921874, "step": 92960 }, { "epoch": 0.0977, "grad_norm": 0.1919674128293991, "learning_rate": 9.730748053624039e-05, "loss": 4.216233444213867, "step": 92970 }, { "epoch": 0.0978, "grad_norm": 0.1826237291097641, "learning_rate": 9.730212517683598e-05, "loss": 3.3683616638183596, "step": 92980 }, { "epoch": 0.0979, "grad_norm": 0.18626046180725098, "learning_rate": 9.729676464454228e-05, "loss": 3.5407230377197267, "step": 92990 }, { "epoch": 0.098, "grad_norm": 0.17354334890842438, "learning_rate": 9.72913989399455e-05, "loss": 3.362946319580078, "step": 93000 }, { "epoch": 0.0981, "grad_norm": 0.2029949277639389, "learning_rate": 9.728602806363242e-05, "loss": 3.3592864990234377, "step": 93010 }, { "epoch": 0.0982, "grad_norm": 0.18062716722488403, "learning_rate": 9.728065201619043e-05, "loss": 3.334847640991211, "step": 93020 }, { "epoch": 0.0983, "grad_norm": 0.1901557296514511, "learning_rate": 9.727527079820742e-05, "loss": 3.3312763214111327, "step": 93030 }, { "epoch": 0.0984, "grad_norm": 0.17883121967315674, "learning_rate": 9.726988441027186e-05, "loss": 3.331074523925781, "step": 93040 }, { "epoch": 0.0985, "grad_norm": 0.17224442958831787, "learning_rate": 9.726449285297281e-05, "loss": 3.3487995147705076, "step": 93050 }, { "epoch": 0.0986, "grad_norm": 0.1863522231578827, "learning_rate": 9.72590961268999e-05, "loss": 3.313360595703125, "step": 93060 }, { "epoch": 0.0987, "grad_norm": 0.18949440121650696, "learning_rate": 9.725369423264328e-05, "loss": 3.3923789978027346, "step": 93070 }, { "epoch": 0.0988, "grad_norm": 0.187246173620224, "learning_rate": 9.72482871707937e-05, "loss": 3.3525440216064455, "step": 93080 }, { "epoch": 0.0989, "grad_norm": 0.18266661465168, "learning_rate": 9.724287494194247e-05, "loss": 3.3351070404052736, "step": 93090 }, { "epoch": 0.099, "grad_norm": 0.30355510115623474, "learning_rate": 9.723745754668147e-05, "loss": 3.394586944580078, "step": 93100 }, { "epoch": 0.0991, "grad_norm": 0.1980743408203125, "learning_rate": 9.723203498560313e-05, "loss": 3.390087890625, "step": 93110 }, { "epoch": 0.0992, "grad_norm": 0.18260569870471954, "learning_rate": 9.722660725930046e-05, "loss": 3.4523963928222656, "step": 93120 }, { "epoch": 0.0993, "grad_norm": 0.20429344475269318, "learning_rate": 9.722117436836702e-05, "loss": 3.314269256591797, "step": 93130 }, { "epoch": 0.0994, "grad_norm": 0.1995691955089569, "learning_rate": 9.721573631339696e-05, "loss": 3.387729263305664, "step": 93140 }, { "epoch": 0.0995, "grad_norm": 0.18795126676559448, "learning_rate": 9.721029309498494e-05, "loss": 3.3713035583496094, "step": 93150 }, { "epoch": 0.0996, "grad_norm": 0.19957448542118073, "learning_rate": 9.720484471372627e-05, "loss": 3.3358295440673826, "step": 93160 }, { "epoch": 0.0997, "grad_norm": 0.18129251897335052, "learning_rate": 9.719939117021673e-05, "loss": 3.3180885314941406, "step": 93170 }, { "epoch": 0.0998, "grad_norm": 0.18430814146995544, "learning_rate": 9.719393246505275e-05, "loss": 3.360474395751953, "step": 93180 }, { "epoch": 0.0999, "grad_norm": 0.17876079678535461, "learning_rate": 9.718846859883128e-05, "loss": 3.507733917236328, "step": 93190 }, { "epoch": 0.1, "grad_norm": 0.18751008808612823, "learning_rate": 9.718299957214982e-05, "loss": 3.346788787841797, "step": 93200 }, { "epoch": 0.1001, "grad_norm": 0.19816718995571136, "learning_rate": 9.717752538560646e-05, "loss": 3.3703990936279298, "step": 93210 }, { "epoch": 0.1002, "grad_norm": 0.18306848406791687, "learning_rate": 9.717204603979986e-05, "loss": 3.3657554626464843, "step": 93220 }, { "epoch": 0.1003, "grad_norm": 0.18021681904792786, "learning_rate": 9.716656153532922e-05, "loss": 3.4062759399414064, "step": 93230 }, { "epoch": 0.1004, "grad_norm": 0.17957623302936554, "learning_rate": 9.716107187279434e-05, "loss": 3.330896759033203, "step": 93240 }, { "epoch": 0.1005, "grad_norm": 0.17668591439723969, "learning_rate": 9.715557705279555e-05, "loss": 3.3385238647460938, "step": 93250 }, { "epoch": 0.1006, "grad_norm": 0.18595214188098907, "learning_rate": 9.715007707593372e-05, "loss": 3.3697303771972655, "step": 93260 }, { "epoch": 0.1007, "grad_norm": 0.196992889046669, "learning_rate": 9.714457194281036e-05, "loss": 3.4267856597900392, "step": 93270 }, { "epoch": 0.1008, "grad_norm": 0.18735450506210327, "learning_rate": 9.713906165402751e-05, "loss": 3.3501232147216795, "step": 93280 }, { "epoch": 0.1009, "grad_norm": 0.18659374117851257, "learning_rate": 9.713354621018774e-05, "loss": 3.3423126220703123, "step": 93290 }, { "epoch": 0.101, "grad_norm": 0.18845805525779724, "learning_rate": 9.712802561189422e-05, "loss": 3.3063350677490235, "step": 93300 }, { "epoch": 0.1011, "grad_norm": 0.18705794215202332, "learning_rate": 9.712249985975069e-05, "loss": 3.2908042907714843, "step": 93310 }, { "epoch": 0.1012, "grad_norm": 0.1941978633403778, "learning_rate": 9.71169689543614e-05, "loss": 3.337903594970703, "step": 93320 }, { "epoch": 0.1013, "grad_norm": 0.1861061453819275, "learning_rate": 9.711143289633123e-05, "loss": 3.4102420806884766, "step": 93330 }, { "epoch": 0.1014, "grad_norm": 0.17489460110664368, "learning_rate": 9.710589168626561e-05, "loss": 3.286606216430664, "step": 93340 }, { "epoch": 0.1015, "grad_norm": 0.18955348432064056, "learning_rate": 9.710034532477048e-05, "loss": 3.3659664154052735, "step": 93350 }, { "epoch": 0.1016, "grad_norm": 0.17220398783683777, "learning_rate": 9.709479381245239e-05, "loss": 3.3339153289794923, "step": 93360 }, { "epoch": 0.1017, "grad_norm": 0.17763367295265198, "learning_rate": 9.708923714991847e-05, "loss": 3.3459075927734374, "step": 93370 }, { "epoch": 0.1018, "grad_norm": 0.19728371500968933, "learning_rate": 9.708367533777638e-05, "loss": 3.3036819458007813, "step": 93380 }, { "epoch": 0.1019, "grad_norm": 0.1780911237001419, "learning_rate": 9.707810837663431e-05, "loss": 3.3681625366210937, "step": 93390 }, { "epoch": 0.102, "grad_norm": 0.18082532286643982, "learning_rate": 9.707253626710113e-05, "loss": 3.460093688964844, "step": 93400 }, { "epoch": 6.666666666666667e-05, "grad_norm": 0.23884116113185883, "learning_rate": 9.924185445979839e-05, "loss": 3.3800148010253905, "step": 93410 }, { "epoch": 0.00013333333333333334, "grad_norm": 0.1810702085494995, "learning_rate": 9.923994095120614e-05, "loss": 3.329336166381836, "step": 93420 }, { "epoch": 0.0002, "grad_norm": 0.1797095537185669, "learning_rate": 9.923802504936681e-05, "loss": 3.326512908935547, "step": 93430 }, { "epoch": 0.0002666666666666667, "grad_norm": 0.18862873315811157, "learning_rate": 9.92361067543735e-05, "loss": 3.342056655883789, "step": 93440 }, { "epoch": 0.0003333333333333333, "grad_norm": 0.6451561450958252, "learning_rate": 9.923418606631948e-05, "loss": 3.361593246459961, "step": 93450 }, { "epoch": 0.0004, "grad_norm": 0.289739727973938, "learning_rate": 9.923226298529809e-05, "loss": 3.3539966583251952, "step": 93460 }, { "epoch": 0.00046666666666666666, "grad_norm": 0.18954837322235107, "learning_rate": 9.923033751140277e-05, "loss": 3.3025569915771484, "step": 93470 }, { "epoch": 0.0005333333333333334, "grad_norm": 0.1752583235502243, "learning_rate": 9.922840964472715e-05, "loss": 3.312456512451172, "step": 93480 }, { "epoch": 0.0006, "grad_norm": 0.17407698929309845, "learning_rate": 9.92264793853649e-05, "loss": 3.3547943115234373, "step": 93490 }, { "epoch": 0.0006666666666666666, "grad_norm": 0.1975732445716858, "learning_rate": 9.922454673340986e-05, "loss": 3.3439411163330077, "step": 93500 }, { "epoch": 0.0007333333333333333, "grad_norm": 0.1941983848810196, "learning_rate": 9.922261168895595e-05, "loss": 3.371200942993164, "step": 93510 }, { "epoch": 0.0008, "grad_norm": 0.16999144852161407, "learning_rate": 9.922067425209722e-05, "loss": 3.308665084838867, "step": 93520 }, { "epoch": 0.0008666666666666666, "grad_norm": 0.2301211655139923, "learning_rate": 9.921873442292784e-05, "loss": 3.3197856903076173, "step": 93530 }, { "epoch": 0.0009333333333333333, "grad_norm": 0.23400846123695374, "learning_rate": 9.92167922015421e-05, "loss": 3.373531723022461, "step": 93540 }, { "epoch": 0.001, "grad_norm": 0.17699263989925385, "learning_rate": 9.921484758803439e-05, "loss": 3.3788005828857424, "step": 93550 }, { "epoch": 0.0010666666666666667, "grad_norm": 0.1765851378440857, "learning_rate": 9.921290058249921e-05, "loss": 3.3627555847167967, "step": 93560 }, { "epoch": 0.0011333333333333334, "grad_norm": 0.17661263048648834, "learning_rate": 9.921095118503123e-05, "loss": 3.425292205810547, "step": 93570 }, { "epoch": 0.0012, "grad_norm": 0.17981427907943726, "learning_rate": 9.920899939572518e-05, "loss": 3.3673065185546873, "step": 93580 }, { "epoch": 0.0012666666666666666, "grad_norm": 0.6728806495666504, "learning_rate": 9.920704521467592e-05, "loss": 3.4234394073486327, "step": 93590 }, { "epoch": 0.0013333333333333333, "grad_norm": 0.1776372343301773, "learning_rate": 9.920508864197842e-05, "loss": 3.4393409729003905, "step": 93600 }, { "epoch": 0.0014, "grad_norm": 0.19894006848335266, "learning_rate": 9.92031296777278e-05, "loss": 3.4410015106201173, "step": 93610 }, { "epoch": 0.0014666666666666667, "grad_norm": 0.16888518631458282, "learning_rate": 9.920116832201926e-05, "loss": 3.349628448486328, "step": 93620 }, { "epoch": 0.0015333333333333334, "grad_norm": 0.17417480051517487, "learning_rate": 9.919920457494815e-05, "loss": 3.3013282775878907, "step": 93630 }, { "epoch": 0.0016, "grad_norm": 0.18335992097854614, "learning_rate": 9.919723843660989e-05, "loss": 3.3443565368652344, "step": 93640 }, { "epoch": 0.0016666666666666668, "grad_norm": 0.2004677951335907, "learning_rate": 9.919526990710004e-05, "loss": 3.3677940368652344, "step": 93650 }, { "epoch": 0.0017333333333333333, "grad_norm": 0.19556932151317596, "learning_rate": 9.91932989865143e-05, "loss": 3.3638206481933595, "step": 93660 }, { "epoch": 0.0018, "grad_norm": 0.18939654529094696, "learning_rate": 9.919132567494844e-05, "loss": 3.3001548767089846, "step": 93670 }, { "epoch": 0.0018666666666666666, "grad_norm": 0.1811266988515854, "learning_rate": 9.918934997249838e-05, "loss": 3.2876422882080076, "step": 93680 }, { "epoch": 0.0019333333333333333, "grad_norm": 0.4066402316093445, "learning_rate": 9.918737187926014e-05, "loss": 3.3092254638671874, "step": 93690 }, { "epoch": 0.002, "grad_norm": 0.21160003542900085, "learning_rate": 9.918539139532989e-05, "loss": 3.462291717529297, "step": 93700 }, { "epoch": 0.0020666666666666667, "grad_norm": 0.1905018538236618, "learning_rate": 9.918340852080388e-05, "loss": 3.77709846496582, "step": 93710 }, { "epoch": 0.0021333333333333334, "grad_norm": 0.19303730130195618, "learning_rate": 9.918142325577846e-05, "loss": 3.3699062347412108, "step": 93720 }, { "epoch": 0.0022, "grad_norm": 0.18092261254787445, "learning_rate": 9.917943560035015e-05, "loss": 3.334912109375, "step": 93730 }, { "epoch": 0.002266666666666667, "grad_norm": 0.1827220916748047, "learning_rate": 9.917744555461552e-05, "loss": 3.313478469848633, "step": 93740 }, { "epoch": 0.0023333333333333335, "grad_norm": 0.1845168024301529, "learning_rate": 9.917545311867134e-05, "loss": 3.33974609375, "step": 93750 }, { "epoch": 0.0024, "grad_norm": 0.17826545238494873, "learning_rate": 9.917345829261442e-05, "loss": 3.3377723693847656, "step": 93760 }, { "epoch": 0.0024666666666666665, "grad_norm": 0.19547930359840393, "learning_rate": 9.917146107654172e-05, "loss": 3.340500259399414, "step": 93770 }, { "epoch": 0.002533333333333333, "grad_norm": 0.18329651653766632, "learning_rate": 9.916946147055033e-05, "loss": 3.359639358520508, "step": 93780 }, { "epoch": 0.0026, "grad_norm": 0.16862143576145172, "learning_rate": 9.916745947473743e-05, "loss": 3.327963638305664, "step": 93790 }, { "epoch": 0.0026666666666666666, "grad_norm": 0.175947368144989, "learning_rate": 9.91654550892003e-05, "loss": 3.313471221923828, "step": 93800 }, { "epoch": 0.0027333333333333333, "grad_norm": 0.1774461269378662, "learning_rate": 9.916344831403639e-05, "loss": 3.3586143493652343, "step": 93810 }, { "epoch": 0.0028, "grad_norm": 0.1899004727602005, "learning_rate": 9.916143914934321e-05, "loss": 3.311175537109375, "step": 93820 }, { "epoch": 0.0028666666666666667, "grad_norm": 0.173585444688797, "learning_rate": 9.915942759521846e-05, "loss": 3.4240989685058594, "step": 93830 }, { "epoch": 0.0029333333333333334, "grad_norm": 0.20002110302448273, "learning_rate": 9.915741365175986e-05, "loss": 3.3285961151123047, "step": 93840 }, { "epoch": 0.003, "grad_norm": 0.18498854339122772, "learning_rate": 9.915539731906531e-05, "loss": 3.295103073120117, "step": 93850 }, { "epoch": 0.0030666666666666668, "grad_norm": 0.1948019415140152, "learning_rate": 9.915337859723283e-05, "loss": 3.380606842041016, "step": 93860 }, { "epoch": 0.0031333333333333335, "grad_norm": 0.1792999804019928, "learning_rate": 9.915135748636053e-05, "loss": 3.3080848693847655, "step": 93870 }, { "epoch": 0.0032, "grad_norm": 0.31143638491630554, "learning_rate": 9.914933398654663e-05, "loss": 3.3654293060302733, "step": 93880 }, { "epoch": 0.003266666666666667, "grad_norm": 0.17813043296337128, "learning_rate": 9.914730809788948e-05, "loss": 3.4882228851318358, "step": 93890 }, { "epoch": 0.0033333333333333335, "grad_norm": 0.18024154007434845, "learning_rate": 9.914527982048755e-05, "loss": 3.3255115509033204, "step": 93900 }, { "epoch": 0.0034, "grad_norm": 0.17511488497257233, "learning_rate": 9.914324915443943e-05, "loss": 3.31469841003418, "step": 93910 }, { "epoch": 0.0034666666666666665, "grad_norm": 0.18209953606128693, "learning_rate": 9.914121609984381e-05, "loss": 3.3325576782226562, "step": 93920 }, { "epoch": 0.003533333333333333, "grad_norm": 0.3169401288032532, "learning_rate": 9.913918065679951e-05, "loss": 3.3089973449707033, "step": 93930 }, { "epoch": 0.0036, "grad_norm": 0.1825665384531021, "learning_rate": 9.913714282540546e-05, "loss": 3.278433609008789, "step": 93940 }, { "epoch": 0.0036666666666666666, "grad_norm": 0.1918283849954605, "learning_rate": 9.91351026057607e-05, "loss": 3.331926727294922, "step": 93950 }, { "epoch": 0.0037333333333333333, "grad_norm": 0.18794241547584534, "learning_rate": 9.91330599979644e-05, "loss": 3.3395641326904295, "step": 93960 }, { "epoch": 0.0038, "grad_norm": 0.19009187817573547, "learning_rate": 9.913101500211584e-05, "loss": 3.288629913330078, "step": 93970 }, { "epoch": 0.0038666666666666667, "grad_norm": 0.1893099546432495, "learning_rate": 9.912896761831439e-05, "loss": 3.3114650726318358, "step": 93980 }, { "epoch": 0.003933333333333333, "grad_norm": 0.18053309619426727, "learning_rate": 9.912691784665958e-05, "loss": 3.3053428649902346, "step": 93990 }, { "epoch": 0.004, "grad_norm": 0.1659596711397171, "learning_rate": 9.912486568725106e-05, "loss": 3.3179306030273437, "step": 94000 }, { "epoch": 0.004066666666666666, "grad_norm": 0.17590172588825226, "learning_rate": 9.912281114018852e-05, "loss": 3.2968406677246094, "step": 94010 }, { "epoch": 0.0041333333333333335, "grad_norm": 0.18330837786197662, "learning_rate": 9.912075420557187e-05, "loss": 3.432546615600586, "step": 94020 }, { "epoch": 0.0042, "grad_norm": 0.22175633907318115, "learning_rate": 9.911869488350105e-05, "loss": 3.3505619049072264, "step": 94030 }, { "epoch": 0.004266666666666667, "grad_norm": 0.21470962464809418, "learning_rate": 9.911663317407616e-05, "loss": 3.4385066986083985, "step": 94040 }, { "epoch": 0.004333333333333333, "grad_norm": 0.20280104875564575, "learning_rate": 9.911456907739742e-05, "loss": 3.3007743835449217, "step": 94050 }, { "epoch": 0.0044, "grad_norm": 0.19475992023944855, "learning_rate": 9.911250259356512e-05, "loss": 3.3073230743408204, "step": 94060 }, { "epoch": 0.0044666666666666665, "grad_norm": 0.1866377592086792, "learning_rate": 9.911043372267975e-05, "loss": 3.31383056640625, "step": 94070 }, { "epoch": 0.004533333333333334, "grad_norm": 0.17593887448310852, "learning_rate": 9.910836246484183e-05, "loss": 3.3178169250488283, "step": 94080 }, { "epoch": 0.0046, "grad_norm": 0.17992326617240906, "learning_rate": 9.910628882015202e-05, "loss": 3.340241622924805, "step": 94090 }, { "epoch": 0.004666666666666667, "grad_norm": 0.2113184630870819, "learning_rate": 9.910421278871115e-05, "loss": 3.327886962890625, "step": 94100 }, { "epoch": 0.004733333333333333, "grad_norm": 0.17677998542785645, "learning_rate": 9.910213437062009e-05, "loss": 3.2789817810058595, "step": 94110 }, { "epoch": 0.0048, "grad_norm": 0.19653072953224182, "learning_rate": 9.910005356597985e-05, "loss": 3.349150466918945, "step": 94120 }, { "epoch": 0.004866666666666667, "grad_norm": 0.20082801580429077, "learning_rate": 9.909797037489161e-05, "loss": 3.411514663696289, "step": 94130 }, { "epoch": 0.004933333333333333, "grad_norm": 0.19444863498210907, "learning_rate": 9.909588479745657e-05, "loss": 3.324434280395508, "step": 94140 }, { "epoch": 0.005, "grad_norm": 0.17367985844612122, "learning_rate": 9.909379683377612e-05, "loss": 3.355208969116211, "step": 94150 }, { "epoch": 0.005066666666666666, "grad_norm": 0.17224730551242828, "learning_rate": 9.909170648395173e-05, "loss": 3.298339080810547, "step": 94160 }, { "epoch": 0.0051333333333333335, "grad_norm": 0.16703574359416962, "learning_rate": 9.908961374808506e-05, "loss": 3.318088912963867, "step": 94170 }, { "epoch": 0.0052, "grad_norm": 0.18954472243785858, "learning_rate": 9.908751862627773e-05, "loss": 3.3607192993164063, "step": 94180 }, { "epoch": 0.005266666666666667, "grad_norm": 0.19603408873081207, "learning_rate": 9.908542111863164e-05, "loss": 3.362942123413086, "step": 94190 }, { "epoch": 0.005333333333333333, "grad_norm": 0.18754155933856964, "learning_rate": 9.90833212252487e-05, "loss": 3.3472526550292967, "step": 94200 }, { "epoch": 0.0054, "grad_norm": 0.25193914771080017, "learning_rate": 9.908121894623099e-05, "loss": 3.3018863677978514, "step": 94210 }, { "epoch": 0.0054666666666666665, "grad_norm": 0.3001921474933624, "learning_rate": 9.907911428168068e-05, "loss": 3.3268943786621095, "step": 94220 }, { "epoch": 0.005533333333333334, "grad_norm": 0.18549001216888428, "learning_rate": 9.907700723170008e-05, "loss": 3.4141212463378907, "step": 94230 }, { "epoch": 0.0056, "grad_norm": 0.18449144065380096, "learning_rate": 9.907489779639159e-05, "loss": 3.369432067871094, "step": 94240 }, { "epoch": 0.005666666666666667, "grad_norm": 0.18486620485782623, "learning_rate": 9.907278597585772e-05, "loss": 3.375998306274414, "step": 94250 }, { "epoch": 0.005733333333333333, "grad_norm": 0.19077368080615997, "learning_rate": 9.907067177020115e-05, "loss": 3.3388916015625, "step": 94260 }, { "epoch": 0.0058, "grad_norm": 0.18349787592887878, "learning_rate": 9.906855517952461e-05, "loss": 3.3072734832763673, "step": 94270 }, { "epoch": 0.005866666666666667, "grad_norm": 0.20034363865852356, "learning_rate": 9.906643620393098e-05, "loss": 3.293721389770508, "step": 94280 }, { "epoch": 0.005933333333333333, "grad_norm": 0.2265128344297409, "learning_rate": 9.906431484352325e-05, "loss": 3.3093029022216798, "step": 94290 }, { "epoch": 0.006, "grad_norm": 0.18591931462287903, "learning_rate": 9.906219109840454e-05, "loss": 3.356502151489258, "step": 94300 }, { "epoch": 0.006066666666666666, "grad_norm": 0.28109297156333923, "learning_rate": 9.906006496867805e-05, "loss": 3.3576793670654297, "step": 94310 }, { "epoch": 0.0061333333333333335, "grad_norm": 0.20066532492637634, "learning_rate": 9.905793645444713e-05, "loss": 3.318279266357422, "step": 94320 }, { "epoch": 0.0062, "grad_norm": 0.18576478958129883, "learning_rate": 9.905580555581524e-05, "loss": 3.33787841796875, "step": 94330 }, { "epoch": 0.006266666666666667, "grad_norm": 0.16647537052631378, "learning_rate": 9.905367227288593e-05, "loss": 3.285923385620117, "step": 94340 }, { "epoch": 0.006333333333333333, "grad_norm": 0.22623594105243683, "learning_rate": 9.90515366057629e-05, "loss": 3.4958683013916017, "step": 94350 }, { "epoch": 0.0064, "grad_norm": 0.18707327544689178, "learning_rate": 9.904939855454995e-05, "loss": 3.3304195404052734, "step": 94360 }, { "epoch": 0.006466666666666667, "grad_norm": 0.17339293658733368, "learning_rate": 9.904725811935101e-05, "loss": 3.315867233276367, "step": 94370 }, { "epoch": 0.006533333333333334, "grad_norm": 0.17200003564357758, "learning_rate": 9.904511530027008e-05, "loss": 3.318442153930664, "step": 94380 }, { "epoch": 0.0066, "grad_norm": 0.17675364017486572, "learning_rate": 9.904297009741134e-05, "loss": 3.35558967590332, "step": 94390 }, { "epoch": 0.006666666666666667, "grad_norm": 0.18250051140785217, "learning_rate": 9.904082251087903e-05, "loss": 3.3598953247070313, "step": 94400 }, { "epoch": 0.006733333333333333, "grad_norm": 0.19653673470020294, "learning_rate": 9.903867254077755e-05, "loss": 3.4689495086669924, "step": 94410 }, { "epoch": 0.0068, "grad_norm": 0.2662406861782074, "learning_rate": 9.90365201872114e-05, "loss": 3.595398712158203, "step": 94420 }, { "epoch": 0.006866666666666667, "grad_norm": 0.16970592737197876, "learning_rate": 9.903436545028517e-05, "loss": 3.3170955657958983, "step": 94430 }, { "epoch": 0.006933333333333333, "grad_norm": 0.17550423741340637, "learning_rate": 9.903220833010363e-05, "loss": 3.378022384643555, "step": 94440 }, { "epoch": 0.007, "grad_norm": 0.20462466776371002, "learning_rate": 9.903004882677156e-05, "loss": 3.333335113525391, "step": 94450 }, { "epoch": 0.007066666666666666, "grad_norm": 0.1875610500574112, "learning_rate": 9.902788694039397e-05, "loss": 3.336885452270508, "step": 94460 }, { "epoch": 0.0071333333333333335, "grad_norm": 0.19485868513584137, "learning_rate": 9.902572267107593e-05, "loss": 3.3707595825195313, "step": 94470 }, { "epoch": 0.0072, "grad_norm": 0.18921299278736115, "learning_rate": 9.902355601892261e-05, "loss": 3.3032760620117188, "step": 94480 }, { "epoch": 0.007266666666666667, "grad_norm": 0.17286232113838196, "learning_rate": 9.902138698403935e-05, "loss": 3.337356948852539, "step": 94490 }, { "epoch": 0.007333333333333333, "grad_norm": 0.17969980835914612, "learning_rate": 9.901921556653154e-05, "loss": 3.326616668701172, "step": 94500 }, { "epoch": 0.0074, "grad_norm": 0.1836596429347992, "learning_rate": 9.901704176650474e-05, "loss": 3.345718765258789, "step": 94510 }, { "epoch": 0.007466666666666667, "grad_norm": 0.28890863060951233, "learning_rate": 9.901486558406459e-05, "loss": 3.3318859100341798, "step": 94520 }, { "epoch": 0.007533333333333334, "grad_norm": 0.22125136852264404, "learning_rate": 9.901268701931688e-05, "loss": 3.2769920349121096, "step": 94530 }, { "epoch": 0.0076, "grad_norm": 0.17763900756835938, "learning_rate": 9.901050607236749e-05, "loss": 3.3155086517333983, "step": 94540 }, { "epoch": 0.007666666666666666, "grad_norm": 0.17545229196548462, "learning_rate": 9.90083227433224e-05, "loss": 3.291212463378906, "step": 94550 }, { "epoch": 0.007733333333333333, "grad_norm": 0.19482333958148956, "learning_rate": 9.900613703228773e-05, "loss": 3.4341697692871094, "step": 94560 }, { "epoch": 0.0078, "grad_norm": 0.1826552450656891, "learning_rate": 9.900394893936978e-05, "loss": 3.3326797485351562, "step": 94570 }, { "epoch": 0.007866666666666666, "grad_norm": 0.20640699565410614, "learning_rate": 9.900175846467481e-05, "loss": 3.432382583618164, "step": 94580 }, { "epoch": 0.007933333333333334, "grad_norm": 0.2610240578651428, "learning_rate": 9.899956560830934e-05, "loss": 3.3577571868896485, "step": 94590 }, { "epoch": 0.008, "grad_norm": 0.16333413124084473, "learning_rate": 9.899737037037993e-05, "loss": 3.300018310546875, "step": 94600 }, { "epoch": 0.008066666666666666, "grad_norm": 0.20294003188610077, "learning_rate": 9.899517275099328e-05, "loss": 3.2580745697021483, "step": 94610 }, { "epoch": 0.008133333333333333, "grad_norm": 0.18176016211509705, "learning_rate": 9.89929727502562e-05, "loss": 3.307281494140625, "step": 94620 }, { "epoch": 0.0082, "grad_norm": 0.17436489462852478, "learning_rate": 9.899077036827564e-05, "loss": 3.2848716735839845, "step": 94630 }, { "epoch": 0.008266666666666667, "grad_norm": 0.17697912454605103, "learning_rate": 9.898856560515864e-05, "loss": 3.339537811279297, "step": 94640 }, { "epoch": 0.008333333333333333, "grad_norm": 0.19742830097675323, "learning_rate": 9.898635846101231e-05, "loss": 3.327779006958008, "step": 94650 }, { "epoch": 0.0084, "grad_norm": 0.19468797743320465, "learning_rate": 9.8984148935944e-05, "loss": 3.291094970703125, "step": 94660 }, { "epoch": 0.008466666666666667, "grad_norm": 0.1805829405784607, "learning_rate": 9.898193703006103e-05, "loss": 3.3276084899902343, "step": 94670 }, { "epoch": 0.008533333333333334, "grad_norm": 0.19013074040412903, "learning_rate": 9.897972274347096e-05, "loss": 3.283792495727539, "step": 94680 }, { "epoch": 0.0086, "grad_norm": 0.20892280340194702, "learning_rate": 9.897750607628138e-05, "loss": 3.3556156158447266, "step": 94690 }, { "epoch": 0.008666666666666666, "grad_norm": 0.19025927782058716, "learning_rate": 9.897528702860006e-05, "loss": 3.296834182739258, "step": 94700 }, { "epoch": 0.008733333333333333, "grad_norm": 0.22958727180957794, "learning_rate": 9.897306560053483e-05, "loss": 3.3185523986816405, "step": 94710 }, { "epoch": 0.0088, "grad_norm": 0.17499449849128723, "learning_rate": 9.897084179219367e-05, "loss": 3.348261260986328, "step": 94720 }, { "epoch": 0.008866666666666667, "grad_norm": 0.17943352460861206, "learning_rate": 9.896861560368465e-05, "loss": 3.331198883056641, "step": 94730 }, { "epoch": 0.008933333333333333, "grad_norm": 0.17205707728862762, "learning_rate": 9.896638703511598e-05, "loss": 3.2859996795654296, "step": 94740 }, { "epoch": 0.009, "grad_norm": 0.2334224134683609, "learning_rate": 9.8964156086596e-05, "loss": 3.322832489013672, "step": 94750 }, { "epoch": 0.009066666666666667, "grad_norm": 0.17514783143997192, "learning_rate": 9.896192275823311e-05, "loss": 3.2868194580078125, "step": 94760 }, { "epoch": 0.009133333333333334, "grad_norm": 0.1885979026556015, "learning_rate": 9.895968705013586e-05, "loss": 3.2959720611572267, "step": 94770 }, { "epoch": 0.0092, "grad_norm": 0.1811433583498001, "learning_rate": 9.895744896241295e-05, "loss": 3.303547668457031, "step": 94780 }, { "epoch": 0.009266666666666666, "grad_norm": 0.1689879596233368, "learning_rate": 9.89552084951731e-05, "loss": 3.261520004272461, "step": 94790 }, { "epoch": 0.009333333333333334, "grad_norm": 0.18238626420497894, "learning_rate": 9.895296564852528e-05, "loss": 3.320184326171875, "step": 94800 }, { "epoch": 0.0094, "grad_norm": 0.1867680549621582, "learning_rate": 9.895072042257842e-05, "loss": 3.52550048828125, "step": 94810 }, { "epoch": 0.009466666666666667, "grad_norm": 0.33512115478515625, "learning_rate": 9.894847281744171e-05, "loss": 3.3579990386962892, "step": 94820 }, { "epoch": 0.009533333333333333, "grad_norm": 0.17731863260269165, "learning_rate": 9.894622283322436e-05, "loss": 3.3288284301757813, "step": 94830 }, { "epoch": 0.0096, "grad_norm": 0.18413913249969482, "learning_rate": 9.894397047003574e-05, "loss": 3.3153564453125, "step": 94840 }, { "epoch": 0.009666666666666667, "grad_norm": 0.1749851256608963, "learning_rate": 9.894171572798532e-05, "loss": 3.431722640991211, "step": 94850 }, { "epoch": 0.009733333333333333, "grad_norm": 0.1735619306564331, "learning_rate": 9.893945860718268e-05, "loss": 3.296839141845703, "step": 94860 }, { "epoch": 0.0098, "grad_norm": 0.2793469727039337, "learning_rate": 9.893719910773753e-05, "loss": 3.2742992401123048, "step": 94870 }, { "epoch": 0.009866666666666666, "grad_norm": 0.3187127411365509, "learning_rate": 9.89349372297597e-05, "loss": 3.3660449981689453, "step": 94880 }, { "epoch": 0.009933333333333334, "grad_norm": 0.18183879554271698, "learning_rate": 9.893267297335914e-05, "loss": 3.43750114440918, "step": 94890 }, { "epoch": 0.01, "grad_norm": 0.2009332776069641, "learning_rate": 9.893040633864585e-05, "loss": 3.2829734802246096, "step": 94900 }, { "epoch": 0.010066666666666666, "grad_norm": 0.1801930069923401, "learning_rate": 9.892813732573004e-05, "loss": 3.284302520751953, "step": 94910 }, { "epoch": 0.010133333333333333, "grad_norm": 0.18241249024868011, "learning_rate": 9.892586593472198e-05, "loss": 3.3415699005126953, "step": 94920 }, { "epoch": 0.0102, "grad_norm": 0.3710581958293915, "learning_rate": 9.892359216573206e-05, "loss": 3.4112239837646485, "step": 94930 }, { "epoch": 0.010266666666666667, "grad_norm": 0.18051566183567047, "learning_rate": 9.89213160188708e-05, "loss": 3.3054786682128907, "step": 94940 }, { "epoch": 0.010333333333333333, "grad_norm": 0.2034343034029007, "learning_rate": 9.891903749424884e-05, "loss": 3.242872619628906, "step": 94950 }, { "epoch": 0.0104, "grad_norm": 0.17827722430229187, "learning_rate": 9.891675659197692e-05, "loss": 3.274610900878906, "step": 94960 }, { "epoch": 0.010466666666666668, "grad_norm": 0.17230671644210815, "learning_rate": 9.89144733121659e-05, "loss": 3.288874053955078, "step": 94970 }, { "epoch": 0.010533333333333334, "grad_norm": 0.18677431344985962, "learning_rate": 9.891218765492673e-05, "loss": 3.2957088470458986, "step": 94980 }, { "epoch": 0.0106, "grad_norm": 0.18135879933834076, "learning_rate": 9.890989962037053e-05, "loss": 3.335614776611328, "step": 94990 }, { "epoch": 0.010666666666666666, "grad_norm": 0.20933423936367035, "learning_rate": 9.890760920860853e-05, "loss": 3.4230594635009766, "step": 95000 }, { "epoch": 0.010733333333333333, "grad_norm": 0.17838552594184875, "learning_rate": 9.8905316419752e-05, "loss": 3.292716217041016, "step": 95010 }, { "epoch": 0.0108, "grad_norm": 0.17149464786052704, "learning_rate": 9.890302125391239e-05, "loss": 3.3067623138427735, "step": 95020 }, { "epoch": 0.010866666666666667, "grad_norm": 0.1759299635887146, "learning_rate": 9.89007237112013e-05, "loss": 3.3522308349609373, "step": 95030 }, { "epoch": 0.010933333333333333, "grad_norm": 0.22994306683540344, "learning_rate": 9.889842379173035e-05, "loss": 3.314341735839844, "step": 95040 }, { "epoch": 0.011, "grad_norm": 0.17042219638824463, "learning_rate": 9.889612149561132e-05, "loss": 3.3260879516601562, "step": 95050 }, { "epoch": 0.011066666666666667, "grad_norm": 0.16258983314037323, "learning_rate": 9.889381682295616e-05, "loss": 3.3029441833496094, "step": 95060 }, { "epoch": 0.011133333333333334, "grad_norm": 0.1779240071773529, "learning_rate": 9.889150977387685e-05, "loss": 3.324973297119141, "step": 95070 }, { "epoch": 0.0112, "grad_norm": 0.17037154734134674, "learning_rate": 9.88892003484855e-05, "loss": 3.30736083984375, "step": 95080 }, { "epoch": 0.011266666666666666, "grad_norm": 0.16942942142486572, "learning_rate": 9.88868885468944e-05, "loss": 3.3094932556152346, "step": 95090 }, { "epoch": 0.011333333333333334, "grad_norm": 0.1768324077129364, "learning_rate": 9.888457436921591e-05, "loss": 3.2872940063476563, "step": 95100 }, { "epoch": 0.0114, "grad_norm": 0.1966913938522339, "learning_rate": 9.888225781556248e-05, "loss": 3.3080883026123047, "step": 95110 }, { "epoch": 0.011466666666666667, "grad_norm": 0.18909914791584015, "learning_rate": 9.887993888604672e-05, "loss": 3.2323551177978516, "step": 95120 }, { "epoch": 0.011533333333333333, "grad_norm": 0.18365263938903809, "learning_rate": 9.887761758078135e-05, "loss": 3.4146305084228517, "step": 95130 }, { "epoch": 0.0116, "grad_norm": 0.1877048909664154, "learning_rate": 9.887529389987916e-05, "loss": 3.305406951904297, "step": 95140 }, { "epoch": 0.011666666666666667, "grad_norm": 0.3084896504878998, "learning_rate": 9.887296784345313e-05, "loss": 3.4423686981201174, "step": 95150 }, { "epoch": 0.011733333333333333, "grad_norm": 0.18165603280067444, "learning_rate": 9.887063941161627e-05, "loss": 3.3031070709228514, "step": 95160 }, { "epoch": 0.0118, "grad_norm": 0.18081210553646088, "learning_rate": 9.88683086044818e-05, "loss": 3.3319812774658204, "step": 95170 }, { "epoch": 0.011866666666666666, "grad_norm": 0.17833514511585236, "learning_rate": 9.886597542216296e-05, "loss": 3.2685691833496096, "step": 95180 }, { "epoch": 0.011933333333333334, "grad_norm": 0.16370858252048492, "learning_rate": 9.886363986477318e-05, "loss": 3.3038703918457033, "step": 95190 }, { "epoch": 0.012, "grad_norm": 0.17662851512432098, "learning_rate": 9.886130193242598e-05, "loss": 3.2671272277832033, "step": 95200 }, { "epoch": 0.012066666666666667, "grad_norm": 0.18252894282341003, "learning_rate": 9.885896162523497e-05, "loss": 3.2824729919433593, "step": 95210 }, { "epoch": 0.012133333333333333, "grad_norm": 0.18103516101837158, "learning_rate": 9.885661894331392e-05, "loss": 3.3852798461914064, "step": 95220 }, { "epoch": 0.0122, "grad_norm": 0.19042804837226868, "learning_rate": 9.885427388677669e-05, "loss": 3.31328125, "step": 95230 }, { "epoch": 0.012266666666666667, "grad_norm": 0.17944425344467163, "learning_rate": 9.885192645573725e-05, "loss": 3.311440277099609, "step": 95240 }, { "epoch": 0.012333333333333333, "grad_norm": 0.201541930437088, "learning_rate": 9.884957665030967e-05, "loss": 3.2813186645507812, "step": 95250 }, { "epoch": 0.0124, "grad_norm": 0.2572249472141266, "learning_rate": 9.884722447060821e-05, "loss": 3.3072628021240233, "step": 95260 }, { "epoch": 0.012466666666666666, "grad_norm": 0.1739029437303543, "learning_rate": 9.884486991674716e-05, "loss": 3.3076007843017576, "step": 95270 }, { "epoch": 0.012533333333333334, "grad_norm": 0.16840247809886932, "learning_rate": 9.884251298884097e-05, "loss": 3.289551544189453, "step": 95280 }, { "epoch": 0.0126, "grad_norm": 0.46884384751319885, "learning_rate": 9.884015368700421e-05, "loss": 3.246815490722656, "step": 95290 }, { "epoch": 0.012666666666666666, "grad_norm": 0.18215042352676392, "learning_rate": 9.883779201135152e-05, "loss": 3.2991146087646483, "step": 95300 }, { "epoch": 0.012733333333333333, "grad_norm": 0.17488481104373932, "learning_rate": 9.883542796199772e-05, "loss": 3.2775726318359375, "step": 95310 }, { "epoch": 0.0128, "grad_norm": 0.19219952821731567, "learning_rate": 9.883306153905768e-05, "loss": 3.325973892211914, "step": 95320 }, { "epoch": 0.012866666666666667, "grad_norm": 0.1894197165966034, "learning_rate": 9.883069274264644e-05, "loss": 3.3251853942871095, "step": 95330 }, { "epoch": 0.012933333333333333, "grad_norm": 0.1857243925333023, "learning_rate": 9.882832157287912e-05, "loss": 3.4228897094726562, "step": 95340 }, { "epoch": 0.013, "grad_norm": 0.17456172406673431, "learning_rate": 9.882594802987097e-05, "loss": 3.3135948181152344, "step": 95350 }, { "epoch": 0.013066666666666667, "grad_norm": 0.2624697685241699, "learning_rate": 9.882357211373737e-05, "loss": 3.3483440399169924, "step": 95360 }, { "epoch": 0.013133333333333334, "grad_norm": 0.17772123217582703, "learning_rate": 9.882119382459377e-05, "loss": 3.3698734283447265, "step": 95370 }, { "epoch": 0.0132, "grad_norm": 0.19826465845108032, "learning_rate": 9.881881316255579e-05, "loss": 3.2755046844482423, "step": 95380 }, { "epoch": 0.013266666666666666, "grad_norm": 0.22985410690307617, "learning_rate": 9.881643012773911e-05, "loss": 3.370331573486328, "step": 95390 }, { "epoch": 0.013333333333333334, "grad_norm": 0.17841237783432007, "learning_rate": 9.881404472025959e-05, "loss": 3.3155677795410154, "step": 95400 }, { "epoch": 0.0134, "grad_norm": 0.1878955215215683, "learning_rate": 9.881165694023313e-05, "loss": 3.3456363677978516, "step": 95410 }, { "epoch": 0.013466666666666667, "grad_norm": 0.19409941136837006, "learning_rate": 9.880926678777583e-05, "loss": 3.317278289794922, "step": 95420 }, { "epoch": 0.013533333333333333, "grad_norm": 0.17708846926689148, "learning_rate": 9.880687426300382e-05, "loss": 3.303171920776367, "step": 95430 }, { "epoch": 0.0136, "grad_norm": 0.1773548275232315, "learning_rate": 9.880447936603341e-05, "loss": 3.3032371520996096, "step": 95440 }, { "epoch": 0.013666666666666667, "grad_norm": 0.1756131947040558, "learning_rate": 9.8802082096981e-05, "loss": 3.287733459472656, "step": 95450 }, { "epoch": 0.013733333333333334, "grad_norm": 0.30191096663475037, "learning_rate": 9.87996824559631e-05, "loss": 3.2999664306640626, "step": 95460 }, { "epoch": 0.0138, "grad_norm": 0.18450628221035004, "learning_rate": 9.879728044309633e-05, "loss": 3.071738624572754, "step": 95470 }, { "epoch": 0.013866666666666666, "grad_norm": 0.1781858205795288, "learning_rate": 9.879487605849744e-05, "loss": 3.263467025756836, "step": 95480 }, { "epoch": 0.013933333333333334, "grad_norm": 0.18029209971427917, "learning_rate": 9.879246930228331e-05, "loss": 3.30255126953125, "step": 95490 }, { "epoch": 0.014, "grad_norm": 0.21638089418411255, "learning_rate": 9.879006017457093e-05, "loss": 3.2586524963378904, "step": 95500 }, { "epoch": 0.014066666666666667, "grad_norm": 0.1768297553062439, "learning_rate": 9.878764867547733e-05, "loss": 3.2848979949951174, "step": 95510 }, { "epoch": 0.014133333333333333, "grad_norm": 0.17254656553268433, "learning_rate": 9.87852348051198e-05, "loss": 3.302748107910156, "step": 95520 }, { "epoch": 0.0142, "grad_norm": 0.6188946962356567, "learning_rate": 9.87828185636156e-05, "loss": 3.504010772705078, "step": 95530 }, { "epoch": 0.014266666666666667, "grad_norm": 0.19448859989643097, "learning_rate": 9.87803999510822e-05, "loss": 3.4139453887939455, "step": 95540 }, { "epoch": 0.014333333333333333, "grad_norm": 0.16873855888843536, "learning_rate": 9.877797896763715e-05, "loss": 3.35058708190918, "step": 95550 }, { "epoch": 0.0144, "grad_norm": 0.18354268372058868, "learning_rate": 9.877555561339808e-05, "loss": 3.2953872680664062, "step": 95560 }, { "epoch": 0.014466666666666666, "grad_norm": 0.1896948516368866, "learning_rate": 9.877312988848283e-05, "loss": 3.30643310546875, "step": 95570 }, { "epoch": 0.014533333333333334, "grad_norm": 0.16989552974700928, "learning_rate": 9.877070179300929e-05, "loss": 3.312907409667969, "step": 95580 }, { "epoch": 0.0146, "grad_norm": 0.20215757191181183, "learning_rate": 9.876827132709544e-05, "loss": 3.397999572753906, "step": 95590 }, { "epoch": 0.014666666666666666, "grad_norm": 0.1792881041765213, "learning_rate": 9.876583849085945e-05, "loss": 3.35510139465332, "step": 95600 }, { "epoch": 0.014733333333333333, "grad_norm": 0.17055252194404602, "learning_rate": 9.876340328441953e-05, "loss": 3.309208297729492, "step": 95610 }, { "epoch": 0.0148, "grad_norm": 0.18660378456115723, "learning_rate": 9.876096570789406e-05, "loss": 3.36682014465332, "step": 95620 }, { "epoch": 0.014866666666666667, "grad_norm": 0.17412392795085907, "learning_rate": 9.875852576140152e-05, "loss": 3.2986637115478517, "step": 95630 }, { "epoch": 0.014933333333333333, "grad_norm": 0.17900195717811584, "learning_rate": 9.875608344506047e-05, "loss": 3.3119670867919924, "step": 95640 }, { "epoch": 0.015, "grad_norm": 0.21265865862369537, "learning_rate": 9.875363875898967e-05, "loss": 3.315450668334961, "step": 95650 }, { "epoch": 0.015066666666666667, "grad_norm": 0.22314360737800598, "learning_rate": 9.87511917033079e-05, "loss": 3.3607814788818358, "step": 95660 }, { "epoch": 0.015133333333333334, "grad_norm": 0.16587868332862854, "learning_rate": 9.874874227813408e-05, "loss": 3.3494476318359374, "step": 95670 }, { "epoch": 0.0152, "grad_norm": 0.1699143946170807, "learning_rate": 9.874629048358732e-05, "loss": 3.292432403564453, "step": 95680 }, { "epoch": 0.015266666666666666, "grad_norm": 0.18086805939674377, "learning_rate": 9.874383631978672e-05, "loss": 3.264809799194336, "step": 95690 }, { "epoch": 0.015333333333333332, "grad_norm": 0.1822335124015808, "learning_rate": 9.874137978685163e-05, "loss": 3.290924835205078, "step": 95700 }, { "epoch": 0.0154, "grad_norm": 0.1908615529537201, "learning_rate": 9.873892088490139e-05, "loss": 3.30347900390625, "step": 95710 }, { "epoch": 0.015466666666666667, "grad_norm": 0.2221079021692276, "learning_rate": 9.873645961405553e-05, "loss": 3.2931243896484377, "step": 95720 }, { "epoch": 0.015533333333333333, "grad_norm": 0.1830497831106186, "learning_rate": 9.87339959744337e-05, "loss": 3.335132598876953, "step": 95730 }, { "epoch": 0.0156, "grad_norm": 0.17524121701717377, "learning_rate": 9.87315299661556e-05, "loss": 3.3073925018310546, "step": 95740 }, { "epoch": 0.015666666666666666, "grad_norm": 0.23278890550136566, "learning_rate": 9.872906158934113e-05, "loss": 3.365728759765625, "step": 95750 }, { "epoch": 0.015733333333333332, "grad_norm": 0.16816304624080658, "learning_rate": 9.872659084411024e-05, "loss": 3.274354934692383, "step": 95760 }, { "epoch": 0.0158, "grad_norm": 0.18601669371128082, "learning_rate": 9.872411773058301e-05, "loss": 3.257576751708984, "step": 95770 }, { "epoch": 0.015866666666666668, "grad_norm": 0.19619005918502808, "learning_rate": 9.872164224887965e-05, "loss": 3.307495880126953, "step": 95780 }, { "epoch": 0.015933333333333334, "grad_norm": 0.17162080109119415, "learning_rate": 9.871916439912048e-05, "loss": 3.300556945800781, "step": 95790 }, { "epoch": 0.016, "grad_norm": 0.18524855375289917, "learning_rate": 9.871668418142596e-05, "loss": 3.2894412994384767, "step": 95800 }, { "epoch": 0.016066666666666667, "grad_norm": 0.17640995979309082, "learning_rate": 9.871420159591657e-05, "loss": 3.337086868286133, "step": 95810 }, { "epoch": 0.016133333333333333, "grad_norm": 0.17858226597309113, "learning_rate": 9.871171664271307e-05, "loss": 3.300934982299805, "step": 95820 }, { "epoch": 0.0162, "grad_norm": 0.1793578416109085, "learning_rate": 9.870922932193613e-05, "loss": 3.3977169036865233, "step": 95830 }, { "epoch": 0.016266666666666665, "grad_norm": 0.2984354794025421, "learning_rate": 9.870673963370671e-05, "loss": 3.323026657104492, "step": 95840 }, { "epoch": 0.01633333333333333, "grad_norm": 0.17054139077663422, "learning_rate": 9.870424757814582e-05, "loss": 3.2917125701904295, "step": 95850 }, { "epoch": 0.0164, "grad_norm": 0.19262677431106567, "learning_rate": 9.870175315537455e-05, "loss": 3.2933650970458985, "step": 95860 }, { "epoch": 0.016466666666666668, "grad_norm": 0.1748715490102768, "learning_rate": 9.869925636551416e-05, "loss": 3.3293682098388673, "step": 95870 }, { "epoch": 0.016533333333333334, "grad_norm": 0.18712183833122253, "learning_rate": 9.8696757208686e-05, "loss": 3.3780658721923826, "step": 95880 }, { "epoch": 0.0166, "grad_norm": 0.18202783167362213, "learning_rate": 9.869425568501153e-05, "loss": 3.271680450439453, "step": 95890 }, { "epoch": 0.016666666666666666, "grad_norm": 1.044325590133667, "learning_rate": 9.869175179461237e-05, "loss": 3.340764617919922, "step": 95900 }, { "epoch": 0.016733333333333333, "grad_norm": 0.20773835480213165, "learning_rate": 9.868924553761016e-05, "loss": 3.4200855255126954, "step": 95910 }, { "epoch": 0.0168, "grad_norm": 0.16914063692092896, "learning_rate": 9.868673691412675e-05, "loss": 3.2760951995849608, "step": 95920 }, { "epoch": 0.016866666666666665, "grad_norm": 0.16904033720493317, "learning_rate": 9.868422592428406e-05, "loss": 3.3798789978027344, "step": 95930 }, { "epoch": 0.016933333333333335, "grad_norm": 0.18988659977912903, "learning_rate": 9.868171256820413e-05, "loss": 3.305398178100586, "step": 95940 }, { "epoch": 0.017, "grad_norm": 0.1735127717256546, "learning_rate": 9.867919684600915e-05, "loss": 3.2971805572509765, "step": 95950 }, { "epoch": 0.017066666666666667, "grad_norm": 0.2005293071269989, "learning_rate": 9.867667875782134e-05, "loss": 3.255040740966797, "step": 95960 }, { "epoch": 0.017133333333333334, "grad_norm": 0.16650255024433136, "learning_rate": 9.867415830376312e-05, "loss": 3.2578804016113283, "step": 95970 }, { "epoch": 0.0172, "grad_norm": 0.17943383753299713, "learning_rate": 9.867163548395699e-05, "loss": 3.371163177490234, "step": 95980 }, { "epoch": 0.017266666666666666, "grad_norm": 0.19685940444469452, "learning_rate": 9.866911029852558e-05, "loss": 3.2909446716308595, "step": 95990 }, { "epoch": 0.017333333333333333, "grad_norm": 0.18330642580986023, "learning_rate": 9.86665827475916e-05, "loss": 3.2641216278076173, "step": 96000 }, { "epoch": 0.0174, "grad_norm": 0.17347083985805511, "learning_rate": 9.866405283127792e-05, "loss": 3.281438446044922, "step": 96010 }, { "epoch": 0.017466666666666665, "grad_norm": 0.1751173883676529, "learning_rate": 9.866152054970749e-05, "loss": 3.301289367675781, "step": 96020 }, { "epoch": 0.017533333333333335, "grad_norm": 0.17057064175605774, "learning_rate": 9.865898590300339e-05, "loss": 3.273762512207031, "step": 96030 }, { "epoch": 0.0176, "grad_norm": 0.17978739738464355, "learning_rate": 9.865644889128882e-05, "loss": 3.2453006744384765, "step": 96040 }, { "epoch": 0.017666666666666667, "grad_norm": 0.1722906231880188, "learning_rate": 9.865390951468707e-05, "loss": 3.3022560119628905, "step": 96050 }, { "epoch": 0.017733333333333334, "grad_norm": 0.19035929441452026, "learning_rate": 9.865136777332159e-05, "loss": 3.2703315734863283, "step": 96060 }, { "epoch": 0.0178, "grad_norm": 0.16702836751937866, "learning_rate": 9.86488236673159e-05, "loss": 3.2737606048583983, "step": 96070 }, { "epoch": 0.017866666666666666, "grad_norm": 0.19415424764156342, "learning_rate": 9.864627719679367e-05, "loss": 3.307981491088867, "step": 96080 }, { "epoch": 0.017933333333333332, "grad_norm": 0.228297621011734, "learning_rate": 9.864372836187863e-05, "loss": 3.3129226684570314, "step": 96090 }, { "epoch": 0.018, "grad_norm": 0.16902372241020203, "learning_rate": 9.864117716269472e-05, "loss": 3.2420616149902344, "step": 96100 }, { "epoch": 0.01806666666666667, "grad_norm": 0.6224579811096191, "learning_rate": 9.863862359936588e-05, "loss": 3.294084930419922, "step": 96110 }, { "epoch": 0.018133333333333335, "grad_norm": 0.1981768161058426, "learning_rate": 9.863606767201627e-05, "loss": 3.297636795043945, "step": 96120 }, { "epoch": 0.0182, "grad_norm": 0.2109135091304779, "learning_rate": 9.86335093807701e-05, "loss": 3.315656280517578, "step": 96130 }, { "epoch": 0.018266666666666667, "grad_norm": 0.1875918209552765, "learning_rate": 9.863094872575171e-05, "loss": 3.252819061279297, "step": 96140 }, { "epoch": 0.018333333333333333, "grad_norm": 0.1724959909915924, "learning_rate": 9.862838570708555e-05, "loss": 3.256865692138672, "step": 96150 }, { "epoch": 0.0184, "grad_norm": 0.16943472623825073, "learning_rate": 9.86258203248962e-05, "loss": 3.26270751953125, "step": 96160 }, { "epoch": 0.018466666666666666, "grad_norm": 0.19248346984386444, "learning_rate": 9.862325257930835e-05, "loss": 3.2748409271240235, "step": 96170 }, { "epoch": 0.018533333333333332, "grad_norm": 0.16681864857673645, "learning_rate": 9.86206824704468e-05, "loss": 3.3326370239257814, "step": 96180 }, { "epoch": 0.0186, "grad_norm": 0.1863364279270172, "learning_rate": 9.861810999843646e-05, "loss": 3.261034393310547, "step": 96190 }, { "epoch": 0.018666666666666668, "grad_norm": 0.20690210163593292, "learning_rate": 9.861553516340238e-05, "loss": 3.271436309814453, "step": 96200 }, { "epoch": 0.018733333333333334, "grad_norm": 0.2135525941848755, "learning_rate": 9.86129579654697e-05, "loss": 3.410662078857422, "step": 96210 }, { "epoch": 0.0188, "grad_norm": 0.1737658530473709, "learning_rate": 9.861037840476367e-05, "loss": 3.2775169372558595, "step": 96220 }, { "epoch": 0.018866666666666667, "grad_norm": 0.17441917955875397, "learning_rate": 9.860779648140966e-05, "loss": 3.2998497009277346, "step": 96230 }, { "epoch": 0.018933333333333333, "grad_norm": 0.18172554671764374, "learning_rate": 9.86052121955332e-05, "loss": 3.297883987426758, "step": 96240 }, { "epoch": 0.019, "grad_norm": 0.17077256739139557, "learning_rate": 9.860262554725985e-05, "loss": 3.2799350738525392, "step": 96250 }, { "epoch": 0.019066666666666666, "grad_norm": 0.19180095195770264, "learning_rate": 9.860003653671536e-05, "loss": 3.301884078979492, "step": 96260 }, { "epoch": 0.019133333333333332, "grad_norm": 0.1603042632341385, "learning_rate": 9.859744516402556e-05, "loss": 3.2587127685546875, "step": 96270 }, { "epoch": 0.0192, "grad_norm": 0.17812122404575348, "learning_rate": 9.85948514293164e-05, "loss": 3.3267353057861326, "step": 96280 }, { "epoch": 0.019266666666666668, "grad_norm": 0.17311446368694305, "learning_rate": 9.859225533271392e-05, "loss": 3.2371036529541017, "step": 96290 }, { "epoch": 0.019333333333333334, "grad_norm": 0.28521353006362915, "learning_rate": 9.858965687434433e-05, "loss": 3.3698219299316405, "step": 96300 }, { "epoch": 0.0194, "grad_norm": 0.16383810341358185, "learning_rate": 9.858705605433394e-05, "loss": 3.297876739501953, "step": 96310 }, { "epoch": 0.019466666666666667, "grad_norm": 0.1993236094713211, "learning_rate": 9.858445287280914e-05, "loss": 3.3776966094970704, "step": 96320 }, { "epoch": 0.019533333333333333, "grad_norm": 0.2206273227930069, "learning_rate": 9.858184732989644e-05, "loss": 3.3449222564697267, "step": 96330 }, { "epoch": 0.0196, "grad_norm": 0.2204829752445221, "learning_rate": 9.857923942572249e-05, "loss": 3.316659164428711, "step": 96340 }, { "epoch": 0.019666666666666666, "grad_norm": 0.17319221794605255, "learning_rate": 9.857662916041403e-05, "loss": 3.2661727905273437, "step": 96350 }, { "epoch": 0.019733333333333332, "grad_norm": 0.17619538307189941, "learning_rate": 9.857401653409797e-05, "loss": 3.3971385955810547, "step": 96360 }, { "epoch": 0.0198, "grad_norm": 0.18193510174751282, "learning_rate": 9.857140154690125e-05, "loss": 3.214316558837891, "step": 96370 }, { "epoch": 0.019866666666666668, "grad_norm": 0.17997746169567108, "learning_rate": 9.8568784198951e-05, "loss": 3.2955223083496095, "step": 96380 }, { "epoch": 0.019933333333333334, "grad_norm": 0.17562949657440186, "learning_rate": 9.856616449037441e-05, "loss": 3.256550979614258, "step": 96390 }, { "epoch": 0.02, "grad_norm": 0.20674411952495575, "learning_rate": 9.856354242129882e-05, "loss": 3.295366668701172, "step": 96400 }, { "epoch": 0.020066666666666667, "grad_norm": 0.18968091905117035, "learning_rate": 9.856091799185167e-05, "loss": 3.2614601135253904, "step": 96410 }, { "epoch": 0.020133333333333333, "grad_norm": 0.1776019036769867, "learning_rate": 9.855829120216052e-05, "loss": 3.287786865234375, "step": 96420 }, { "epoch": 0.0202, "grad_norm": 0.178305521607399, "learning_rate": 9.855566205235302e-05, "loss": 3.273548126220703, "step": 96430 }, { "epoch": 0.020266666666666665, "grad_norm": 0.21143251657485962, "learning_rate": 9.8553030542557e-05, "loss": 3.3206150054931642, "step": 96440 }, { "epoch": 0.02033333333333333, "grad_norm": 0.23750467598438263, "learning_rate": 9.855039667290031e-05, "loss": 3.2994831085205076, "step": 96450 }, { "epoch": 0.0204, "grad_norm": 0.17703913152217865, "learning_rate": 9.854776044351102e-05, "loss": 3.3224281311035155, "step": 96460 }, { "epoch": 0.020466666666666668, "grad_norm": 0.19667363166809082, "learning_rate": 9.854512185451723e-05, "loss": 3.2969070434570313, "step": 96470 }, { "epoch": 0.020533333333333334, "grad_norm": 0.18414267897605896, "learning_rate": 9.854248090604717e-05, "loss": 3.274586486816406, "step": 96480 }, { "epoch": 0.0206, "grad_norm": 0.16035401821136475, "learning_rate": 9.853983759822922e-05, "loss": 3.2824153900146484, "step": 96490 }, { "epoch": 0.020666666666666667, "grad_norm": 0.1844106912612915, "learning_rate": 9.853719193119187e-05, "loss": 3.2654266357421875, "step": 96500 }, { "epoch": 0.020733333333333333, "grad_norm": 0.17156948149204254, "learning_rate": 9.853454390506366e-05, "loss": 3.2423839569091797, "step": 96510 }, { "epoch": 0.0208, "grad_norm": 0.18604516983032227, "learning_rate": 9.853189351997337e-05, "loss": 3.261323928833008, "step": 96520 }, { "epoch": 0.020866666666666665, "grad_norm": 0.1923554688692093, "learning_rate": 9.852924077604975e-05, "loss": 3.2637489318847654, "step": 96530 }, { "epoch": 0.020933333333333335, "grad_norm": 0.20938719809055328, "learning_rate": 9.852658567342176e-05, "loss": 3.2766326904296874, "step": 96540 }, { "epoch": 0.021, "grad_norm": 0.4662324786186218, "learning_rate": 9.852392821221845e-05, "loss": 3.360521697998047, "step": 96550 }, { "epoch": 0.021066666666666668, "grad_norm": 0.19333145022392273, "learning_rate": 9.852126839256897e-05, "loss": 3.3452281951904297, "step": 96560 }, { "epoch": 0.021133333333333334, "grad_norm": 0.1705482453107834, "learning_rate": 9.851860621460261e-05, "loss": 3.3705890655517576, "step": 96570 }, { "epoch": 0.0212, "grad_norm": 0.18221737444400787, "learning_rate": 9.851594167844877e-05, "loss": 3.2715606689453125, "step": 96580 }, { "epoch": 0.021266666666666666, "grad_norm": 0.17935097217559814, "learning_rate": 9.851327478423695e-05, "loss": 3.282718276977539, "step": 96590 }, { "epoch": 0.021333333333333333, "grad_norm": 0.23905660212039948, "learning_rate": 9.851060553209674e-05, "loss": 3.2937461853027346, "step": 96600 }, { "epoch": 0.0214, "grad_norm": 0.19128486514091492, "learning_rate": 9.850793392215791e-05, "loss": 3.3169071197509767, "step": 96610 }, { "epoch": 0.021466666666666665, "grad_norm": 0.18178348243236542, "learning_rate": 9.850525995455031e-05, "loss": 3.2965282440185546, "step": 96620 }, { "epoch": 0.021533333333333335, "grad_norm": 0.19232600927352905, "learning_rate": 9.85025836294039e-05, "loss": 3.539946746826172, "step": 96630 }, { "epoch": 0.0216, "grad_norm": 0.16844213008880615, "learning_rate": 9.849990494684876e-05, "loss": 3.2449432373046876, "step": 96640 }, { "epoch": 0.021666666666666667, "grad_norm": 0.18924476206302643, "learning_rate": 9.849722390701507e-05, "loss": 3.26831169128418, "step": 96650 }, { "epoch": 0.021733333333333334, "grad_norm": 0.18746110796928406, "learning_rate": 9.849454051003316e-05, "loss": 3.2014331817626953, "step": 96660 }, { "epoch": 0.0218, "grad_norm": 0.35532718896865845, "learning_rate": 9.849185475603343e-05, "loss": 3.301473617553711, "step": 96670 }, { "epoch": 0.021866666666666666, "grad_norm": 0.18043048679828644, "learning_rate": 9.848916664514642e-05, "loss": 3.2310462951660157, "step": 96680 }, { "epoch": 0.021933333333333332, "grad_norm": 0.20617325603961945, "learning_rate": 9.848647617750282e-05, "loss": 3.3535171508789063, "step": 96690 }, { "epoch": 0.022, "grad_norm": 0.1715431809425354, "learning_rate": 9.848378335323337e-05, "loss": 3.294017791748047, "step": 96700 }, { "epoch": 0.022066666666666665, "grad_norm": 0.19726496934890747, "learning_rate": 9.848108817246893e-05, "loss": 3.2674190521240236, "step": 96710 }, { "epoch": 0.022133333333333335, "grad_norm": 0.6339850425720215, "learning_rate": 9.847839063534052e-05, "loss": 3.409013366699219, "step": 96720 }, { "epoch": 0.0222, "grad_norm": 0.19208569824695587, "learning_rate": 9.847569074197926e-05, "loss": 3.3215023040771485, "step": 96730 }, { "epoch": 0.022266666666666667, "grad_norm": 0.17342953383922577, "learning_rate": 9.847298849251636e-05, "loss": 3.3145851135253905, "step": 96740 }, { "epoch": 0.022333333333333334, "grad_norm": 0.20888015627861023, "learning_rate": 9.847028388708315e-05, "loss": 3.3417320251464844, "step": 96750 }, { "epoch": 0.0224, "grad_norm": 0.2142053097486496, "learning_rate": 9.84675769258111e-05, "loss": 3.3021236419677735, "step": 96760 }, { "epoch": 0.022466666666666666, "grad_norm": 0.18173189461231232, "learning_rate": 9.846486760883178e-05, "loss": 3.253681182861328, "step": 96770 }, { "epoch": 0.022533333333333332, "grad_norm": 0.17558440566062927, "learning_rate": 9.846215593627688e-05, "loss": 3.289437484741211, "step": 96780 }, { "epoch": 0.0226, "grad_norm": 0.2008294314146042, "learning_rate": 9.845944190827816e-05, "loss": 3.2319561004638673, "step": 96790 }, { "epoch": 0.02266666666666667, "grad_norm": 0.17281505465507507, "learning_rate": 9.845672552496757e-05, "loss": 3.3158058166503905, "step": 96800 }, { "epoch": 0.022733333333333335, "grad_norm": 0.18016014993190765, "learning_rate": 9.845400678647713e-05, "loss": 3.2637481689453125, "step": 96810 }, { "epoch": 0.0228, "grad_norm": 0.18374957144260406, "learning_rate": 9.845128569293896e-05, "loss": 3.301416778564453, "step": 96820 }, { "epoch": 0.022866666666666667, "grad_norm": 0.46992143988609314, "learning_rate": 9.844856224448535e-05, "loss": 3.3992855072021486, "step": 96830 }, { "epoch": 0.022933333333333333, "grad_norm": 0.18637172877788544, "learning_rate": 9.844583644124863e-05, "loss": 3.3441650390625, "step": 96840 }, { "epoch": 0.023, "grad_norm": 0.1776258498430252, "learning_rate": 9.844310828336131e-05, "loss": 3.2976318359375, "step": 96850 }, { "epoch": 0.023066666666666666, "grad_norm": 0.1745828539133072, "learning_rate": 9.844037777095598e-05, "loss": 3.330161285400391, "step": 96860 }, { "epoch": 0.023133333333333332, "grad_norm": 0.17255713045597076, "learning_rate": 9.843764490416535e-05, "loss": 3.309195709228516, "step": 96870 }, { "epoch": 0.0232, "grad_norm": 0.16927070915699005, "learning_rate": 9.843490968312227e-05, "loss": 3.266865539550781, "step": 96880 }, { "epoch": 0.023266666666666668, "grad_norm": 0.2754387855529785, "learning_rate": 9.843217210795967e-05, "loss": 3.3902004241943358, "step": 96890 }, { "epoch": 0.023333333333333334, "grad_norm": 0.17090770602226257, "learning_rate": 9.84294321788106e-05, "loss": 3.2992183685302736, "step": 96900 }, { "epoch": 0.0234, "grad_norm": 0.17596754431724548, "learning_rate": 9.842668989580823e-05, "loss": 3.3228870391845704, "step": 96910 }, { "epoch": 0.023466666666666667, "grad_norm": 0.1785922646522522, "learning_rate": 9.842394525908585e-05, "loss": 3.267321014404297, "step": 96920 }, { "epoch": 0.023533333333333333, "grad_norm": 0.17085394263267517, "learning_rate": 9.842119826877684e-05, "loss": 3.2503940582275392, "step": 96930 }, { "epoch": 0.0236, "grad_norm": 0.17844471335411072, "learning_rate": 9.841844892501475e-05, "loss": 3.288075637817383, "step": 96940 }, { "epoch": 0.023666666666666666, "grad_norm": 0.39548274874687195, "learning_rate": 9.841569722793318e-05, "loss": 3.3248531341552736, "step": 96950 }, { "epoch": 0.023733333333333332, "grad_norm": 0.19228176772594452, "learning_rate": 9.84129431776659e-05, "loss": 3.334400177001953, "step": 96960 }, { "epoch": 0.0238, "grad_norm": 0.18282705545425415, "learning_rate": 9.841018677434675e-05, "loss": 3.2637924194335937, "step": 96970 }, { "epoch": 0.023866666666666668, "grad_norm": 0.21609315276145935, "learning_rate": 9.840742801810969e-05, "loss": 3.280503845214844, "step": 96980 }, { "epoch": 0.023933333333333334, "grad_norm": 0.21218739449977875, "learning_rate": 9.840466690908883e-05, "loss": 3.2296401977539064, "step": 96990 }, { "epoch": 0.024, "grad_norm": 0.1719816029071808, "learning_rate": 9.840190344741835e-05, "loss": 3.3426589965820312, "step": 97000 }, { "epoch": 0.024066666666666667, "grad_norm": 0.2127007097005844, "learning_rate": 9.839913763323257e-05, "loss": 3.264970397949219, "step": 97010 }, { "epoch": 0.024133333333333333, "grad_norm": 0.30556029081344604, "learning_rate": 9.839636946666595e-05, "loss": 3.250416564941406, "step": 97020 }, { "epoch": 0.0242, "grad_norm": 0.18546099960803986, "learning_rate": 9.839359894785298e-05, "loss": 3.3084335327148438, "step": 97030 }, { "epoch": 0.024266666666666666, "grad_norm": 0.17222745716571808, "learning_rate": 9.839082607692835e-05, "loss": 3.307392120361328, "step": 97040 }, { "epoch": 0.024333333333333332, "grad_norm": 0.17940282821655273, "learning_rate": 9.838805085402682e-05, "loss": 3.334792709350586, "step": 97050 }, { "epoch": 0.0244, "grad_norm": 0.19173620641231537, "learning_rate": 9.838527327928329e-05, "loss": 3.249428558349609, "step": 97060 }, { "epoch": 0.024466666666666668, "grad_norm": 0.17394386231899261, "learning_rate": 9.838249335283274e-05, "loss": 3.281302642822266, "step": 97070 }, { "epoch": 0.024533333333333334, "grad_norm": 0.26132726669311523, "learning_rate": 9.837971107481032e-05, "loss": 3.278681182861328, "step": 97080 }, { "epoch": 0.0246, "grad_norm": 0.16971342265605927, "learning_rate": 9.837692644535122e-05, "loss": 3.243194580078125, "step": 97090 }, { "epoch": 0.024666666666666667, "grad_norm": 0.2282569855451584, "learning_rate": 9.83741394645908e-05, "loss": 3.400950241088867, "step": 97100 }, { "epoch": 0.024733333333333333, "grad_norm": 0.17214009165763855, "learning_rate": 9.837135013266452e-05, "loss": 3.279013442993164, "step": 97110 }, { "epoch": 0.0248, "grad_norm": 0.21752393245697021, "learning_rate": 9.836855844970796e-05, "loss": 3.3558197021484375, "step": 97120 }, { "epoch": 0.024866666666666665, "grad_norm": 0.280020147562027, "learning_rate": 9.836576441585678e-05, "loss": 3.4853542327880858, "step": 97130 }, { "epoch": 0.02493333333333333, "grad_norm": 0.19022777676582336, "learning_rate": 9.836296803124681e-05, "loss": 3.349714660644531, "step": 97140 }, { "epoch": 0.025, "grad_norm": 0.2115618735551834, "learning_rate": 9.836016929601395e-05, "loss": 3.3191879272460936, "step": 97150 }, { "epoch": 0.025066666666666668, "grad_norm": 0.1767677515745163, "learning_rate": 9.835736821029424e-05, "loss": 3.280506134033203, "step": 97160 }, { "epoch": 0.025133333333333334, "grad_norm": 0.17728517949581146, "learning_rate": 9.83545647742238e-05, "loss": 3.2797454833984374, "step": 97170 }, { "epoch": 0.0252, "grad_norm": 0.1814766824245453, "learning_rate": 9.835175898793891e-05, "loss": 3.2685367584228517, "step": 97180 }, { "epoch": 0.025266666666666666, "grad_norm": 0.18908093869686127, "learning_rate": 9.834895085157593e-05, "loss": 3.2472068786621096, "step": 97190 }, { "epoch": 0.025333333333333333, "grad_norm": 0.2210550308227539, "learning_rate": 9.834614036527135e-05, "loss": 3.2720165252685547, "step": 97200 }, { "epoch": 0.0254, "grad_norm": 0.19722270965576172, "learning_rate": 9.834332752916177e-05, "loss": 3.259355163574219, "step": 97210 }, { "epoch": 0.025466666666666665, "grad_norm": 0.16931837797164917, "learning_rate": 9.83405123433839e-05, "loss": 3.2631553649902343, "step": 97220 }, { "epoch": 0.025533333333333335, "grad_norm": 0.18998655676841736, "learning_rate": 9.833769480807458e-05, "loss": 3.263739013671875, "step": 97230 }, { "epoch": 0.0256, "grad_norm": 0.19349588453769684, "learning_rate": 9.833487492337074e-05, "loss": 3.3015823364257812, "step": 97240 }, { "epoch": 0.025666666666666667, "grad_norm": 0.18287824094295502, "learning_rate": 9.833205268940946e-05, "loss": 3.237411880493164, "step": 97250 }, { "epoch": 0.025733333333333334, "grad_norm": 0.178482323884964, "learning_rate": 9.832922810632788e-05, "loss": 3.3037979125976564, "step": 97260 }, { "epoch": 0.0258, "grad_norm": 0.18428294360637665, "learning_rate": 9.832640117426329e-05, "loss": 3.387994384765625, "step": 97270 }, { "epoch": 0.025866666666666666, "grad_norm": 0.17614108324050903, "learning_rate": 9.832357189335312e-05, "loss": 3.272378158569336, "step": 97280 }, { "epoch": 0.025933333333333333, "grad_norm": 0.17259962856769562, "learning_rate": 9.832074026373485e-05, "loss": 3.2859027862548826, "step": 97290 }, { "epoch": 0.026, "grad_norm": 0.17983047664165497, "learning_rate": 9.831790628554612e-05, "loss": 3.2565536499023438, "step": 97300 }, { "epoch": 0.026066666666666665, "grad_norm": 0.4274856746196747, "learning_rate": 9.831506995892468e-05, "loss": 3.314905548095703, "step": 97310 }, { "epoch": 0.026133333333333335, "grad_norm": 0.18535709381103516, "learning_rate": 9.831223128400839e-05, "loss": 3.448250961303711, "step": 97320 }, { "epoch": 0.0262, "grad_norm": 0.18264222145080566, "learning_rate": 9.830939026093519e-05, "loss": 3.2620162963867188, "step": 97330 }, { "epoch": 0.026266666666666667, "grad_norm": 0.1973845511674881, "learning_rate": 9.830654688984319e-05, "loss": 3.342450714111328, "step": 97340 }, { "epoch": 0.026333333333333334, "grad_norm": 0.1834082305431366, "learning_rate": 9.830370117087058e-05, "loss": 3.280224609375, "step": 97350 }, { "epoch": 0.0264, "grad_norm": 0.3118092715740204, "learning_rate": 9.830085310415568e-05, "loss": 3.2928386688232423, "step": 97360 }, { "epoch": 0.026466666666666666, "grad_norm": 0.17951811850070953, "learning_rate": 9.82980026898369e-05, "loss": 3.31390495300293, "step": 97370 }, { "epoch": 0.026533333333333332, "grad_norm": 0.1795995682477951, "learning_rate": 9.829514992805282e-05, "loss": 3.2633735656738283, "step": 97380 }, { "epoch": 0.0266, "grad_norm": 0.21821215748786926, "learning_rate": 9.829229481894205e-05, "loss": 3.306035614013672, "step": 97390 }, { "epoch": 0.02666666666666667, "grad_norm": 0.16634097695350647, "learning_rate": 9.828943736264338e-05, "loss": 3.401674270629883, "step": 97400 }, { "epoch": 0.026733333333333335, "grad_norm": 0.180075541138649, "learning_rate": 9.828657755929568e-05, "loss": 3.2222694396972655, "step": 97410 }, { "epoch": 0.0268, "grad_norm": 0.2949139475822449, "learning_rate": 9.828371540903795e-05, "loss": 3.3412994384765624, "step": 97420 }, { "epoch": 0.026866666666666667, "grad_norm": 0.1877976804971695, "learning_rate": 9.828085091200934e-05, "loss": 3.348612976074219, "step": 97430 }, { "epoch": 0.026933333333333333, "grad_norm": 0.18689630925655365, "learning_rate": 9.827798406834903e-05, "loss": 3.2390911102294924, "step": 97440 }, { "epoch": 0.027, "grad_norm": 0.17235687375068665, "learning_rate": 9.827511487819637e-05, "loss": 3.2734306335449217, "step": 97450 }, { "epoch": 0.027066666666666666, "grad_norm": 0.1967061161994934, "learning_rate": 9.827224334169082e-05, "loss": 3.222152328491211, "step": 97460 }, { "epoch": 0.027133333333333332, "grad_norm": 0.17321935296058655, "learning_rate": 9.826936945897194e-05, "loss": 3.2674915313720705, "step": 97470 }, { "epoch": 0.0272, "grad_norm": 0.20779746770858765, "learning_rate": 9.826649323017942e-05, "loss": 3.376055145263672, "step": 97480 }, { "epoch": 0.027266666666666668, "grad_norm": 0.16816017031669617, "learning_rate": 9.826361465545304e-05, "loss": 3.237366485595703, "step": 97490 }, { "epoch": 0.027333333333333334, "grad_norm": 0.19941453635692596, "learning_rate": 9.826073373493274e-05, "loss": 3.257229232788086, "step": 97500 }, { "epoch": 0.0274, "grad_norm": 0.1703978329896927, "learning_rate": 9.825785046875853e-05, "loss": 3.277573013305664, "step": 97510 }, { "epoch": 0.027466666666666667, "grad_norm": 0.19833381474018097, "learning_rate": 9.825496485707053e-05, "loss": 3.347207260131836, "step": 97520 }, { "epoch": 0.027533333333333333, "grad_norm": 0.17550747096538544, "learning_rate": 9.825207690000899e-05, "loss": 3.296492004394531, "step": 97530 }, { "epoch": 0.0276, "grad_norm": 0.27345529198646545, "learning_rate": 9.824918659771432e-05, "loss": 3.2871162414550783, "step": 97540 }, { "epoch": 0.027666666666666666, "grad_norm": 0.29795271158218384, "learning_rate": 9.824629395032694e-05, "loss": 3.295195388793945, "step": 97550 }, { "epoch": 0.027733333333333332, "grad_norm": 0.183814138174057, "learning_rate": 9.824339895798748e-05, "loss": 3.553427886962891, "step": 97560 }, { "epoch": 0.0278, "grad_norm": 0.17548970878124237, "learning_rate": 9.824050162083666e-05, "loss": 3.3051216125488283, "step": 97570 }, { "epoch": 0.027866666666666668, "grad_norm": 0.16515232622623444, "learning_rate": 9.823760193901526e-05, "loss": 3.2436309814453126, "step": 97580 }, { "epoch": 0.027933333333333334, "grad_norm": 0.2029179185628891, "learning_rate": 9.823469991266425e-05, "loss": 3.302735137939453, "step": 97590 }, { "epoch": 0.028, "grad_norm": 0.17405098676681519, "learning_rate": 9.823179554192467e-05, "loss": 3.262088394165039, "step": 97600 }, { "epoch": 0.028066666666666667, "grad_norm": 0.19372440874576569, "learning_rate": 9.822888882693766e-05, "loss": 3.2843780517578125, "step": 97610 }, { "epoch": 0.028133333333333333, "grad_norm": 0.23452669382095337, "learning_rate": 9.822597976784454e-05, "loss": 3.2969493865966797, "step": 97620 }, { "epoch": 0.0282, "grad_norm": 0.18343618512153625, "learning_rate": 9.822306836478668e-05, "loss": 3.319362258911133, "step": 97630 }, { "epoch": 0.028266666666666666, "grad_norm": 0.17934341728687286, "learning_rate": 9.822015461790557e-05, "loss": 3.384661102294922, "step": 97640 }, { "epoch": 0.028333333333333332, "grad_norm": 0.18613353371620178, "learning_rate": 9.821723852734284e-05, "loss": 3.2224586486816404, "step": 97650 }, { "epoch": 0.0284, "grad_norm": 0.18379195034503937, "learning_rate": 9.821432009324024e-05, "loss": 3.3694995880126952, "step": 97660 }, { "epoch": 0.028466666666666668, "grad_norm": 0.17349113523960114, "learning_rate": 9.82113993157396e-05, "loss": 3.2225875854492188, "step": 97670 }, { "epoch": 0.028533333333333334, "grad_norm": 0.17406898736953735, "learning_rate": 9.820847619498287e-05, "loss": 3.254006576538086, "step": 97680 }, { "epoch": 0.0286, "grad_norm": 0.17342984676361084, "learning_rate": 9.820555073111215e-05, "loss": 3.29078369140625, "step": 97690 }, { "epoch": 0.028666666666666667, "grad_norm": 0.1848634034395218, "learning_rate": 9.820262292426961e-05, "loss": 3.331317138671875, "step": 97700 }, { "epoch": 0.028733333333333333, "grad_norm": 0.20564039051532745, "learning_rate": 9.819969277459758e-05, "loss": 3.2730697631835937, "step": 97710 }, { "epoch": 0.0288, "grad_norm": 0.1929074078798294, "learning_rate": 9.819676028223843e-05, "loss": 3.2990367889404295, "step": 97720 }, { "epoch": 0.028866666666666665, "grad_norm": 0.16757896542549133, "learning_rate": 9.819382544733473e-05, "loss": 3.2539649963378907, "step": 97730 }, { "epoch": 0.028933333333333332, "grad_norm": 0.1770271509885788, "learning_rate": 9.819088827002911e-05, "loss": 3.257424163818359, "step": 97740 }, { "epoch": 0.029, "grad_norm": 0.171797513961792, "learning_rate": 9.818794875046433e-05, "loss": 3.252732849121094, "step": 97750 }, { "epoch": 0.029066666666666668, "grad_norm": 0.18020294606685638, "learning_rate": 9.818500688878325e-05, "loss": 3.249001312255859, "step": 97760 }, { "epoch": 0.029133333333333334, "grad_norm": 0.1717831939458847, "learning_rate": 9.818206268512889e-05, "loss": 3.240615463256836, "step": 97770 }, { "epoch": 0.0292, "grad_norm": 0.1852504014968872, "learning_rate": 9.81791161396443e-05, "loss": 3.275537872314453, "step": 97780 }, { "epoch": 0.029266666666666667, "grad_norm": 0.18798433244228363, "learning_rate": 9.817616725247271e-05, "loss": 3.279724884033203, "step": 97790 }, { "epoch": 0.029333333333333333, "grad_norm": 0.170902818441391, "learning_rate": 9.817321602375749e-05, "loss": 3.2416305541992188, "step": 97800 }, { "epoch": 0.0294, "grad_norm": 0.29535529017448425, "learning_rate": 9.817026245364202e-05, "loss": 3.2759784698486327, "step": 97810 }, { "epoch": 0.029466666666666665, "grad_norm": 0.1640680432319641, "learning_rate": 9.816730654226989e-05, "loss": 3.2819137573242188, "step": 97820 }, { "epoch": 0.029533333333333335, "grad_norm": 0.167726069688797, "learning_rate": 9.816434828978475e-05, "loss": 3.318017578125, "step": 97830 }, { "epoch": 0.0296, "grad_norm": 0.17128388583660126, "learning_rate": 9.816138769633041e-05, "loss": 3.2910152435302735, "step": 97840 }, { "epoch": 0.029666666666666668, "grad_norm": 0.16860997676849365, "learning_rate": 9.815842476205073e-05, "loss": 3.280717468261719, "step": 97850 }, { "epoch": 0.029733333333333334, "grad_norm": 0.1708105504512787, "learning_rate": 9.815545948708975e-05, "loss": 3.270613098144531, "step": 97860 }, { "epoch": 0.0298, "grad_norm": 0.206083744764328, "learning_rate": 9.815249187159157e-05, "loss": 3.245461273193359, "step": 97870 }, { "epoch": 0.029866666666666666, "grad_norm": 0.5008456110954285, "learning_rate": 9.814952191570043e-05, "loss": 3.281878662109375, "step": 97880 }, { "epoch": 0.029933333333333333, "grad_norm": 0.17353175580501556, "learning_rate": 9.814654961956071e-05, "loss": 3.2671180725097657, "step": 97890 }, { "epoch": 0.03, "grad_norm": 0.17023390531539917, "learning_rate": 9.814357498331686e-05, "loss": 3.234853744506836, "step": 97900 }, { "epoch": 0.030066666666666665, "grad_norm": 0.169632226228714, "learning_rate": 9.814059800711342e-05, "loss": 3.2465129852294923, "step": 97910 }, { "epoch": 0.030133333333333335, "grad_norm": 0.1844622939825058, "learning_rate": 9.813761869109514e-05, "loss": 3.2994529724121096, "step": 97920 }, { "epoch": 0.0302, "grad_norm": 0.1755056083202362, "learning_rate": 9.81346370354068e-05, "loss": 3.2644458770751954, "step": 97930 }, { "epoch": 0.030266666666666667, "grad_norm": 0.1691884994506836, "learning_rate": 9.813165304019332e-05, "loss": 3.25904541015625, "step": 97940 }, { "epoch": 0.030333333333333334, "grad_norm": 0.16703984141349792, "learning_rate": 9.812866670559972e-05, "loss": 3.2493362426757812, "step": 97950 }, { "epoch": 0.0304, "grad_norm": 0.1741284281015396, "learning_rate": 9.812567803177118e-05, "loss": 3.2779766082763673, "step": 97960 }, { "epoch": 0.030466666666666666, "grad_norm": 0.17830073833465576, "learning_rate": 9.812268701885292e-05, "loss": 3.2409645080566407, "step": 97970 }, { "epoch": 0.030533333333333332, "grad_norm": 0.17422054708003998, "learning_rate": 9.811969366699036e-05, "loss": 3.2536251068115236, "step": 97980 }, { "epoch": 0.0306, "grad_norm": 0.19274964928627014, "learning_rate": 9.811669797632894e-05, "loss": 3.2655517578125, "step": 97990 }, { "epoch": 0.030666666666666665, "grad_norm": 0.17302416265010834, "learning_rate": 9.811369994701432e-05, "loss": 3.2253952026367188, "step": 98000 }, { "epoch": 0.030733333333333335, "grad_norm": 0.4719087481498718, "learning_rate": 9.811069957919215e-05, "loss": 3.461981201171875, "step": 98010 }, { "epoch": 0.0308, "grad_norm": 0.1768861711025238, "learning_rate": 9.81076968730083e-05, "loss": 3.2404296875, "step": 98020 }, { "epoch": 0.030866666666666667, "grad_norm": 0.16990917921066284, "learning_rate": 9.81046918286087e-05, "loss": 3.241153335571289, "step": 98030 }, { "epoch": 0.030933333333333334, "grad_norm": 0.19152289628982544, "learning_rate": 9.810168444613943e-05, "loss": 3.3589752197265623, "step": 98040 }, { "epoch": 0.031, "grad_norm": 0.18609067797660828, "learning_rate": 9.809867472574664e-05, "loss": 3.257870101928711, "step": 98050 }, { "epoch": 0.031066666666666666, "grad_norm": 0.17444655299186707, "learning_rate": 9.809566266757659e-05, "loss": 3.2758457183837892, "step": 98060 }, { "epoch": 0.031133333333333332, "grad_norm": 0.17285019159317017, "learning_rate": 9.809264827177573e-05, "loss": 3.3304428100585937, "step": 98070 }, { "epoch": 0.0312, "grad_norm": 0.18882529437541962, "learning_rate": 9.808963153849053e-05, "loss": 3.394305419921875, "step": 98080 }, { "epoch": 0.031266666666666665, "grad_norm": 0.17193898558616638, "learning_rate": 9.808661246786764e-05, "loss": 3.2217021942138673, "step": 98090 }, { "epoch": 0.03133333333333333, "grad_norm": 0.3053176701068878, "learning_rate": 9.808359106005375e-05, "loss": 3.2994102478027343, "step": 98100 }, { "epoch": 0.0314, "grad_norm": 0.2545721232891083, "learning_rate": 9.808056731519578e-05, "loss": 3.4273590087890624, "step": 98110 }, { "epoch": 0.031466666666666664, "grad_norm": 0.21139168739318848, "learning_rate": 9.807754123344066e-05, "loss": 3.3934867858886717, "step": 98120 }, { "epoch": 0.03153333333333333, "grad_norm": 0.18390384316444397, "learning_rate": 9.807451281493546e-05, "loss": 3.2765480041503907, "step": 98130 }, { "epoch": 0.0316, "grad_norm": 0.19271546602249146, "learning_rate": 9.807148205982739e-05, "loss": 3.2702873229980467, "step": 98140 }, { "epoch": 0.03166666666666667, "grad_norm": 0.17550913989543915, "learning_rate": 9.806844896826375e-05, "loss": 3.2331939697265626, "step": 98150 }, { "epoch": 0.031733333333333336, "grad_norm": 0.16754864156246185, "learning_rate": 9.806541354039198e-05, "loss": 3.255461883544922, "step": 98160 }, { "epoch": 0.0318, "grad_norm": 0.18904612958431244, "learning_rate": 9.806237577635957e-05, "loss": 3.3160018920898438, "step": 98170 }, { "epoch": 0.03186666666666667, "grad_norm": 0.1907319277524948, "learning_rate": 9.80593356763142e-05, "loss": 3.298981475830078, "step": 98180 }, { "epoch": 0.031933333333333334, "grad_norm": 0.18088862299919128, "learning_rate": 9.805629324040362e-05, "loss": 3.391067123413086, "step": 98190 }, { "epoch": 0.032, "grad_norm": 0.17617207765579224, "learning_rate": 9.805324846877571e-05, "loss": 3.275006103515625, "step": 98200 }, { "epoch": 0.03206666666666667, "grad_norm": 0.18246592581272125, "learning_rate": 9.805020136157845e-05, "loss": 3.3093185424804688, "step": 98210 }, { "epoch": 0.03213333333333333, "grad_norm": 0.18703772127628326, "learning_rate": 9.804715191895993e-05, "loss": 3.25781135559082, "step": 98220 }, { "epoch": 0.0322, "grad_norm": 0.20908094942569733, "learning_rate": 9.80441001410684e-05, "loss": 3.3305374145507813, "step": 98230 }, { "epoch": 0.032266666666666666, "grad_norm": 0.17210617661476135, "learning_rate": 9.804104602805216e-05, "loss": 3.357967758178711, "step": 98240 }, { "epoch": 0.03233333333333333, "grad_norm": 0.21255500614643097, "learning_rate": 9.803798958005964e-05, "loss": 3.448545455932617, "step": 98250 }, { "epoch": 0.0324, "grad_norm": 0.1681114137172699, "learning_rate": 9.803493079723942e-05, "loss": 3.2114273071289063, "step": 98260 }, { "epoch": 0.032466666666666665, "grad_norm": 0.18503600358963013, "learning_rate": 9.803186967974017e-05, "loss": 3.2827198028564455, "step": 98270 }, { "epoch": 0.03253333333333333, "grad_norm": 0.1767207384109497, "learning_rate": 9.802880622771064e-05, "loss": 3.2071170806884766, "step": 98280 }, { "epoch": 0.0326, "grad_norm": 0.18326839804649353, "learning_rate": 9.802574044129977e-05, "loss": 3.250045394897461, "step": 98290 }, { "epoch": 0.03266666666666666, "grad_norm": 0.2833355963230133, "learning_rate": 9.802267232065654e-05, "loss": 3.2193408966064454, "step": 98300 }, { "epoch": 0.032733333333333337, "grad_norm": 0.16877010464668274, "learning_rate": 9.801960186593008e-05, "loss": 3.314044189453125, "step": 98310 }, { "epoch": 0.0328, "grad_norm": 0.17521549761295319, "learning_rate": 9.801652907726963e-05, "loss": 3.2444175720214843, "step": 98320 }, { "epoch": 0.03286666666666667, "grad_norm": 0.16487357020378113, "learning_rate": 9.801345395482453e-05, "loss": 3.2919239044189452, "step": 98330 }, { "epoch": 0.032933333333333335, "grad_norm": 0.1780952364206314, "learning_rate": 9.801037649874423e-05, "loss": 3.2732372283935547, "step": 98340 }, { "epoch": 0.033, "grad_norm": 0.17559459805488586, "learning_rate": 9.800729670917834e-05, "loss": 3.1832195281982423, "step": 98350 }, { "epoch": 0.03306666666666667, "grad_norm": 0.17208252847194672, "learning_rate": 9.800421458627652e-05, "loss": 3.235249710083008, "step": 98360 }, { "epoch": 0.033133333333333334, "grad_norm": 0.17462903261184692, "learning_rate": 9.800113013018859e-05, "loss": 3.3320262908935545, "step": 98370 }, { "epoch": 0.0332, "grad_norm": 0.18300378322601318, "learning_rate": 9.799804334106445e-05, "loss": 3.308759307861328, "step": 98380 }, { "epoch": 0.03326666666666667, "grad_norm": 0.19771522283554077, "learning_rate": 9.799495421905415e-05, "loss": 3.2756847381591796, "step": 98390 }, { "epoch": 0.03333333333333333, "grad_norm": 0.17585442960262299, "learning_rate": 9.799186276430784e-05, "loss": 3.2883045196533205, "step": 98400 }, { "epoch": 0.0334, "grad_norm": 0.206253781914711, "learning_rate": 9.798876897697575e-05, "loss": 3.290831756591797, "step": 98410 }, { "epoch": 0.033466666666666665, "grad_norm": 0.16387739777565002, "learning_rate": 9.798567285720824e-05, "loss": 3.354664993286133, "step": 98420 }, { "epoch": 0.03353333333333333, "grad_norm": 0.19100114703178406, "learning_rate": 9.798257440515583e-05, "loss": 3.122268486022949, "step": 98430 }, { "epoch": 0.0336, "grad_norm": 0.17117644846439362, "learning_rate": 9.797947362096908e-05, "loss": 3.236927032470703, "step": 98440 }, { "epoch": 0.033666666666666664, "grad_norm": 0.17490828037261963, "learning_rate": 9.797637050479874e-05, "loss": 3.240927886962891, "step": 98450 }, { "epoch": 0.03373333333333333, "grad_norm": 0.16964900493621826, "learning_rate": 9.797326505679561e-05, "loss": 3.251250076293945, "step": 98460 }, { "epoch": 0.0338, "grad_norm": 0.16967755556106567, "learning_rate": 9.797015727711062e-05, "loss": 3.238653564453125, "step": 98470 }, { "epoch": 0.03386666666666667, "grad_norm": 0.1623520851135254, "learning_rate": 9.796704716589483e-05, "loss": 3.2489269256591795, "step": 98480 }, { "epoch": 0.033933333333333336, "grad_norm": 0.20352095365524292, "learning_rate": 9.79639347232994e-05, "loss": 3.250835418701172, "step": 98490 }, { "epoch": 0.034, "grad_norm": 0.3543427586555481, "learning_rate": 9.796081994947562e-05, "loss": 3.218608093261719, "step": 98500 }, { "epoch": 0.03406666666666667, "grad_norm": 0.18736295402050018, "learning_rate": 9.795770284457484e-05, "loss": 3.247442626953125, "step": 98510 }, { "epoch": 0.034133333333333335, "grad_norm": 0.19531603157520294, "learning_rate": 9.795458340874862e-05, "loss": 3.199906921386719, "step": 98520 }, { "epoch": 0.0342, "grad_norm": 0.17216753959655762, "learning_rate": 9.795146164214852e-05, "loss": 3.2789901733398437, "step": 98530 }, { "epoch": 0.03426666666666667, "grad_norm": 0.194754496216774, "learning_rate": 9.794833754492631e-05, "loss": 3.245064926147461, "step": 98540 }, { "epoch": 0.034333333333333334, "grad_norm": 0.18367083370685577, "learning_rate": 9.794521111723383e-05, "loss": 3.2830333709716797, "step": 98550 }, { "epoch": 0.0344, "grad_norm": 0.18726469576358795, "learning_rate": 9.7942082359223e-05, "loss": 3.25042724609375, "step": 98560 }, { "epoch": 0.034466666666666666, "grad_norm": 0.2196962982416153, "learning_rate": 9.793895127104593e-05, "loss": 3.230865478515625, "step": 98570 }, { "epoch": 0.03453333333333333, "grad_norm": 0.17977425456047058, "learning_rate": 9.793581785285479e-05, "loss": 3.307497024536133, "step": 98580 }, { "epoch": 0.0346, "grad_norm": 0.16143395006656647, "learning_rate": 9.793268210480188e-05, "loss": 3.2704841613769533, "step": 98590 }, { "epoch": 0.034666666666666665, "grad_norm": 0.16726738214492798, "learning_rate": 9.79295440270396e-05, "loss": 3.2557395935058593, "step": 98600 }, { "epoch": 0.03473333333333333, "grad_norm": 0.16997326910495758, "learning_rate": 9.792640361972046e-05, "loss": 3.2679821014404298, "step": 98610 }, { "epoch": 0.0348, "grad_norm": 0.16356004774570465, "learning_rate": 9.792326088299712e-05, "loss": 3.284455490112305, "step": 98620 }, { "epoch": 0.034866666666666664, "grad_norm": 0.170103058218956, "learning_rate": 9.792011581702232e-05, "loss": 3.252399444580078, "step": 98630 }, { "epoch": 0.03493333333333333, "grad_norm": 0.1742527335882187, "learning_rate": 9.791696842194893e-05, "loss": 3.2271186828613283, "step": 98640 }, { "epoch": 0.035, "grad_norm": 0.6446268558502197, "learning_rate": 9.791381869792991e-05, "loss": 3.3293128967285157, "step": 98650 }, { "epoch": 0.03506666666666667, "grad_norm": 39.17057418823242, "learning_rate": 9.791066664511836e-05, "loss": 3.686031723022461, "step": 98660 }, { "epoch": 0.035133333333333336, "grad_norm": 0.17636185884475708, "learning_rate": 9.790751226366748e-05, "loss": 3.563568115234375, "step": 98670 }, { "epoch": 0.0352, "grad_norm": 0.18186932802200317, "learning_rate": 9.790435555373056e-05, "loss": 3.3218921661376952, "step": 98680 }, { "epoch": 0.03526666666666667, "grad_norm": 0.21274332702159882, "learning_rate": 9.790119651546109e-05, "loss": 3.2742443084716797, "step": 98690 }, { "epoch": 0.035333333333333335, "grad_norm": 0.174941286444664, "learning_rate": 9.789803514901253e-05, "loss": 3.296372985839844, "step": 98700 }, { "epoch": 0.0354, "grad_norm": 0.1739869862794876, "learning_rate": 9.78948714545386e-05, "loss": 3.2192203521728517, "step": 98710 }, { "epoch": 0.03546666666666667, "grad_norm": 0.17834685742855072, "learning_rate": 9.789170543219304e-05, "loss": 3.2558483123779296, "step": 98720 }, { "epoch": 0.03553333333333333, "grad_norm": 0.206222802400589, "learning_rate": 9.788853708212973e-05, "loss": 3.3721622467041015, "step": 98730 }, { "epoch": 0.0356, "grad_norm": 0.17301476001739502, "learning_rate": 9.788536640450266e-05, "loss": 3.2472774505615236, "step": 98740 }, { "epoch": 0.035666666666666666, "grad_norm": 0.45429491996765137, "learning_rate": 9.788219339946597e-05, "loss": 3.3903213500976563, "step": 98750 }, { "epoch": 0.03573333333333333, "grad_norm": 0.1824956238269806, "learning_rate": 9.787901806717382e-05, "loss": 3.4000503540039064, "step": 98760 }, { "epoch": 0.0358, "grad_norm": 0.18622639775276184, "learning_rate": 9.787584040778059e-05, "loss": 3.3709068298339844, "step": 98770 }, { "epoch": 0.035866666666666665, "grad_norm": 0.23192453384399414, "learning_rate": 9.787266042144073e-05, "loss": 3.282719039916992, "step": 98780 }, { "epoch": 0.03593333333333333, "grad_norm": 0.21189405024051666, "learning_rate": 9.786947810830877e-05, "loss": 3.2575965881347657, "step": 98790 }, { "epoch": 0.036, "grad_norm": 0.16998529434204102, "learning_rate": 9.78662934685394e-05, "loss": 3.3001567840576174, "step": 98800 }, { "epoch": 0.036066666666666664, "grad_norm": 0.1905965656042099, "learning_rate": 9.78631065022874e-05, "loss": 3.2835887908935546, "step": 98810 }, { "epoch": 0.03613333333333334, "grad_norm": 0.1866782009601593, "learning_rate": 9.785991720970768e-05, "loss": 3.258963775634766, "step": 98820 }, { "epoch": 0.0362, "grad_norm": 0.20994064211845398, "learning_rate": 9.785672559095522e-05, "loss": 3.290869140625, "step": 98830 }, { "epoch": 0.03626666666666667, "grad_norm": 0.19615857303142548, "learning_rate": 9.785353164618518e-05, "loss": 3.3500701904296877, "step": 98840 }, { "epoch": 0.036333333333333336, "grad_norm": 0.18557967245578766, "learning_rate": 9.785033537555279e-05, "loss": 3.2891918182373048, "step": 98850 }, { "epoch": 0.0364, "grad_norm": 0.1871853917837143, "learning_rate": 9.784713677921339e-05, "loss": 3.2973468780517576, "step": 98860 }, { "epoch": 0.03646666666666667, "grad_norm": 0.17302048206329346, "learning_rate": 9.784393585732246e-05, "loss": 3.4115058898925783, "step": 98870 }, { "epoch": 0.036533333333333334, "grad_norm": 0.17155492305755615, "learning_rate": 9.784073261003554e-05, "loss": 3.232875442504883, "step": 98880 }, { "epoch": 0.0366, "grad_norm": 0.17680023610591888, "learning_rate": 9.783752703750835e-05, "loss": 3.2858150482177733, "step": 98890 }, { "epoch": 0.03666666666666667, "grad_norm": 0.38103726506233215, "learning_rate": 9.783431913989672e-05, "loss": 3.3151512145996094, "step": 98900 }, { "epoch": 0.03673333333333333, "grad_norm": 0.17033500969409943, "learning_rate": 9.783110891735649e-05, "loss": 3.3134212493896484, "step": 98910 }, { "epoch": 0.0368, "grad_norm": 0.32085010409355164, "learning_rate": 9.782789637004377e-05, "loss": 3.3363590240478516, "step": 98920 }, { "epoch": 0.036866666666666666, "grad_norm": 0.1768287569284439, "learning_rate": 9.782468149811465e-05, "loss": 3.262644958496094, "step": 98930 }, { "epoch": 0.03693333333333333, "grad_norm": 0.16690543293952942, "learning_rate": 9.78214643017254e-05, "loss": 3.265006256103516, "step": 98940 }, { "epoch": 0.037, "grad_norm": 0.192119300365448, "learning_rate": 9.781824478103239e-05, "loss": 3.266382598876953, "step": 98950 }, { "epoch": 0.037066666666666664, "grad_norm": 0.2594771683216095, "learning_rate": 9.781502293619209e-05, "loss": 3.2803314208984373, "step": 98960 }, { "epoch": 0.03713333333333333, "grad_norm": 0.1708441972732544, "learning_rate": 9.781179876736112e-05, "loss": 3.240060043334961, "step": 98970 }, { "epoch": 0.0372, "grad_norm": 0.17498089373111725, "learning_rate": 9.780857227469615e-05, "loss": 3.2860763549804686, "step": 98980 }, { "epoch": 0.03726666666666666, "grad_norm": 0.2036733627319336, "learning_rate": 9.780534345835403e-05, "loss": 3.26831169128418, "step": 98990 }, { "epoch": 0.037333333333333336, "grad_norm": 0.1790372133255005, "learning_rate": 9.780211231849169e-05, "loss": 3.275978851318359, "step": 99000 }, { "epoch": 0.0374, "grad_norm": 0.17050567269325256, "learning_rate": 9.779887885526615e-05, "loss": 3.3000232696533205, "step": 99010 }, { "epoch": 0.03746666666666667, "grad_norm": 0.16958197951316833, "learning_rate": 9.77956430688346e-05, "loss": 3.2716331481933594, "step": 99020 }, { "epoch": 0.037533333333333335, "grad_norm": 0.16724438965320587, "learning_rate": 9.77924049593543e-05, "loss": 3.2675609588623047, "step": 99030 }, { "epoch": 0.0376, "grad_norm": 0.40795180201530457, "learning_rate": 9.778916452698262e-05, "loss": 3.263105773925781, "step": 99040 }, { "epoch": 0.03766666666666667, "grad_norm": 0.18080182373523712, "learning_rate": 9.778592177187709e-05, "loss": 3.2700611114501954, "step": 99050 }, { "epoch": 0.037733333333333334, "grad_norm": 0.17046846449375153, "learning_rate": 9.778267669419527e-05, "loss": 3.2558292388916015, "step": 99060 }, { "epoch": 0.0378, "grad_norm": 0.18747955560684204, "learning_rate": 9.777942929409494e-05, "loss": 2.492174530029297, "step": 99070 }, { "epoch": 0.037866666666666667, "grad_norm": 0.1856488585472107, "learning_rate": 9.777617957173389e-05, "loss": 3.2958297729492188, "step": 99080 }, { "epoch": 0.03793333333333333, "grad_norm": 0.37177687883377075, "learning_rate": 9.77729275272701e-05, "loss": 3.34619140625, "step": 99090 }, { "epoch": 0.038, "grad_norm": 0.17065183818340302, "learning_rate": 9.776967316086161e-05, "loss": 3.271536636352539, "step": 99100 }, { "epoch": 0.038066666666666665, "grad_norm": 0.17875687777996063, "learning_rate": 9.77664164726666e-05, "loss": 3.3070842742919924, "step": 99110 }, { "epoch": 0.03813333333333333, "grad_norm": 0.18231187760829926, "learning_rate": 9.776315746284337e-05, "loss": 3.2656009674072264, "step": 99120 }, { "epoch": 0.0382, "grad_norm": 0.17906862497329712, "learning_rate": 9.77598961315503e-05, "loss": 3.202284240722656, "step": 99130 }, { "epoch": 0.038266666666666664, "grad_norm": 0.18302953243255615, "learning_rate": 9.77566324789459e-05, "loss": 3.3089393615722655, "step": 99140 }, { "epoch": 0.03833333333333333, "grad_norm": 0.19927139580249786, "learning_rate": 9.775336650518885e-05, "loss": 3.269145965576172, "step": 99150 }, { "epoch": 0.0384, "grad_norm": 0.16326425969600677, "learning_rate": 9.775009821043782e-05, "loss": 3.2830223083496093, "step": 99160 }, { "epoch": 0.03846666666666667, "grad_norm": 0.16461174190044403, "learning_rate": 9.77468275948517e-05, "loss": 3.264435958862305, "step": 99170 }, { "epoch": 0.038533333333333336, "grad_norm": 0.1599767953157425, "learning_rate": 9.774355465858945e-05, "loss": 3.2696392059326174, "step": 99180 }, { "epoch": 0.0386, "grad_norm": 0.16991545259952545, "learning_rate": 9.774027940181012e-05, "loss": 3.2862030029296876, "step": 99190 }, { "epoch": 0.03866666666666667, "grad_norm": 0.18080934882164001, "learning_rate": 9.773700182467293e-05, "loss": 3.2592262268066405, "step": 99200 }, { "epoch": 0.038733333333333335, "grad_norm": 0.2046908438205719, "learning_rate": 9.773372192733719e-05, "loss": 3.2872055053710936, "step": 99210 }, { "epoch": 0.0388, "grad_norm": 0.17246995866298676, "learning_rate": 9.773043970996228e-05, "loss": 3.258968734741211, "step": 99220 }, { "epoch": 0.03886666666666667, "grad_norm": 0.19133548438549042, "learning_rate": 9.772715517270777e-05, "loss": 3.27637939453125, "step": 99230 }, { "epoch": 0.038933333333333334, "grad_norm": 0.2158520221710205, "learning_rate": 9.772386831573325e-05, "loss": 3.2942764282226564, "step": 99240 }, { "epoch": 0.039, "grad_norm": 0.21807768940925598, "learning_rate": 9.77205791391985e-05, "loss": 3.2854068756103514, "step": 99250 }, { "epoch": 0.039066666666666666, "grad_norm": 0.16833236813545227, "learning_rate": 9.771728764326342e-05, "loss": 3.276194763183594, "step": 99260 }, { "epoch": 0.03913333333333333, "grad_norm": 0.17520995438098907, "learning_rate": 9.771399382808792e-05, "loss": 3.261427307128906, "step": 99270 }, { "epoch": 0.0392, "grad_norm": 0.17581094801425934, "learning_rate": 9.771069769383215e-05, "loss": 3.5046459197998048, "step": 99280 }, { "epoch": 0.039266666666666665, "grad_norm": 0.18122568726539612, "learning_rate": 9.77073992406563e-05, "loss": 3.2565387725830077, "step": 99290 }, { "epoch": 0.03933333333333333, "grad_norm": 0.37729185819625854, "learning_rate": 9.770409846872066e-05, "loss": 3.1957118988037108, "step": 99300 }, { "epoch": 0.0394, "grad_norm": 0.17428770661354065, "learning_rate": 9.770079537818571e-05, "loss": 3.3543926239013673, "step": 99310 }, { "epoch": 0.039466666666666664, "grad_norm": 0.17761975526809692, "learning_rate": 9.769748996921193e-05, "loss": 3.257668304443359, "step": 99320 }, { "epoch": 0.03953333333333333, "grad_norm": 0.1800096035003662, "learning_rate": 9.769418224196002e-05, "loss": 3.300911712646484, "step": 99330 }, { "epoch": 0.0396, "grad_norm": 0.2689484655857086, "learning_rate": 9.769087219659076e-05, "loss": 3.3651599884033203, "step": 99340 }, { "epoch": 0.03966666666666667, "grad_norm": 0.17387427389621735, "learning_rate": 9.768755983326498e-05, "loss": 3.2739315032958984, "step": 99350 }, { "epoch": 0.039733333333333336, "grad_norm": 0.17243529856204987, "learning_rate": 9.768424515214371e-05, "loss": 3.2814456939697267, "step": 99360 }, { "epoch": 0.0398, "grad_norm": 0.20176899433135986, "learning_rate": 9.768092815338805e-05, "loss": 3.322185516357422, "step": 99370 }, { "epoch": 0.03986666666666667, "grad_norm": 0.23582318425178528, "learning_rate": 9.767760883715922e-05, "loss": 3.261928176879883, "step": 99380 }, { "epoch": 0.039933333333333335, "grad_norm": 0.19562694430351257, "learning_rate": 9.767428720361854e-05, "loss": 3.2505977630615233, "step": 99390 }, { "epoch": 0.04, "grad_norm": 0.17756372690200806, "learning_rate": 9.767096325292746e-05, "loss": 3.2659202575683595, "step": 99400 }, { "epoch": 0.04006666666666667, "grad_norm": 0.18170756101608276, "learning_rate": 9.766763698524755e-05, "loss": 3.3498966217041017, "step": 99410 }, { "epoch": 0.04013333333333333, "grad_norm": 0.20219586789608002, "learning_rate": 9.766430840074046e-05, "loss": 3.3617340087890626, "step": 99420 }, { "epoch": 0.0402, "grad_norm": 0.173574760556221, "learning_rate": 9.766097749956797e-05, "loss": 3.281106948852539, "step": 99430 }, { "epoch": 0.040266666666666666, "grad_norm": 0.18152421712875366, "learning_rate": 9.765764428189197e-05, "loss": 3.259428787231445, "step": 99440 }, { "epoch": 0.04033333333333333, "grad_norm": 0.46602529287338257, "learning_rate": 9.765430874787451e-05, "loss": 3.582122802734375, "step": 99450 }, { "epoch": 0.0404, "grad_norm": 0.18315796554088593, "learning_rate": 9.765097089767766e-05, "loss": 3.236460876464844, "step": 99460 }, { "epoch": 0.040466666666666665, "grad_norm": 0.17144827544689178, "learning_rate": 9.764763073146368e-05, "loss": 3.23839111328125, "step": 99470 }, { "epoch": 0.04053333333333333, "grad_norm": 0.1747443825006485, "learning_rate": 9.764428824939491e-05, "loss": 3.3034347534179687, "step": 99480 }, { "epoch": 0.0406, "grad_norm": 0.23577247560024261, "learning_rate": 9.764094345163379e-05, "loss": 3.244404602050781, "step": 99490 }, { "epoch": 0.04066666666666666, "grad_norm": 0.17645429074764252, "learning_rate": 9.763759633834292e-05, "loss": 3.2939071655273438, "step": 99500 }, { "epoch": 0.04073333333333334, "grad_norm": 0.17181840538978577, "learning_rate": 9.763424690968495e-05, "loss": 3.331418991088867, "step": 99510 }, { "epoch": 0.0408, "grad_norm": 0.1802951693534851, "learning_rate": 9.76308951658227e-05, "loss": 3.2890907287597657, "step": 99520 }, { "epoch": 0.04086666666666667, "grad_norm": 0.16721050441265106, "learning_rate": 9.762754110691907e-05, "loss": 3.3039627075195312, "step": 99530 }, { "epoch": 0.040933333333333335, "grad_norm": 0.16665437817573547, "learning_rate": 9.762418473313707e-05, "loss": 3.2102699279785156, "step": 99540 }, { "epoch": 0.041, "grad_norm": 0.1678561121225357, "learning_rate": 9.762082604463985e-05, "loss": 3.2288372039794924, "step": 99550 }, { "epoch": 0.04106666666666667, "grad_norm": 0.1796877533197403, "learning_rate": 9.761746504159064e-05, "loss": 3.2023204803466796, "step": 99560 }, { "epoch": 0.041133333333333334, "grad_norm": 0.1648438274860382, "learning_rate": 9.76141017241528e-05, "loss": 3.204525375366211, "step": 99570 }, { "epoch": 0.0412, "grad_norm": 0.4592151939868927, "learning_rate": 9.761073609248981e-05, "loss": 3.1854263305664063, "step": 99580 }, { "epoch": 0.04126666666666667, "grad_norm": 0.16881054639816284, "learning_rate": 9.760736814676523e-05, "loss": 3.236592483520508, "step": 99590 }, { "epoch": 0.04133333333333333, "grad_norm": 0.1697424054145813, "learning_rate": 9.760399788714278e-05, "loss": 3.233546829223633, "step": 99600 }, { "epoch": 0.0414, "grad_norm": 0.18367229402065277, "learning_rate": 9.760062531378625e-05, "loss": 3.25952262878418, "step": 99610 }, { "epoch": 0.041466666666666666, "grad_norm": 0.2113817036151886, "learning_rate": 9.759725042685958e-05, "loss": 3.2461711883544924, "step": 99620 }, { "epoch": 0.04153333333333333, "grad_norm": 0.1876205950975418, "learning_rate": 9.759387322652677e-05, "loss": 3.260803985595703, "step": 99630 }, { "epoch": 0.0416, "grad_norm": 0.16451717913150787, "learning_rate": 9.7590493712952e-05, "loss": 3.2632675170898438, "step": 99640 }, { "epoch": 0.041666666666666664, "grad_norm": 0.17195391654968262, "learning_rate": 9.758711188629951e-05, "loss": 3.2317779541015623, "step": 99650 }, { "epoch": 0.04173333333333333, "grad_norm": 0.19363416731357574, "learning_rate": 9.758372774673367e-05, "loss": 3.266101837158203, "step": 99660 }, { "epoch": 0.0418, "grad_norm": 0.18696393072605133, "learning_rate": 9.758034129441895e-05, "loss": 3.2055355072021485, "step": 99670 }, { "epoch": 0.04186666666666667, "grad_norm": 0.2829519808292389, "learning_rate": 9.757695252951997e-05, "loss": 3.2289234161376954, "step": 99680 }, { "epoch": 0.041933333333333336, "grad_norm": 0.23116949200630188, "learning_rate": 9.757356145220142e-05, "loss": 3.340327835083008, "step": 99690 }, { "epoch": 0.042, "grad_norm": 0.20912596583366394, "learning_rate": 9.757016806262813e-05, "loss": 3.270182418823242, "step": 99700 }, { "epoch": 0.04206666666666667, "grad_norm": 0.16806575655937195, "learning_rate": 9.756677236096503e-05, "loss": 3.2224605560302733, "step": 99710 }, { "epoch": 0.042133333333333335, "grad_norm": 0.1748146116733551, "learning_rate": 9.756337434737714e-05, "loss": 3.277997589111328, "step": 99720 }, { "epoch": 0.0422, "grad_norm": 0.1781592220067978, "learning_rate": 9.755997402202965e-05, "loss": 3.2377883911132814, "step": 99730 }, { "epoch": 0.04226666666666667, "grad_norm": 0.43603914976119995, "learning_rate": 9.75565713850878e-05, "loss": 3.193558120727539, "step": 99740 }, { "epoch": 0.042333333333333334, "grad_norm": 0.16676469147205353, "learning_rate": 9.7553166436717e-05, "loss": 3.348210906982422, "step": 99750 }, { "epoch": 0.0424, "grad_norm": 0.1849627047777176, "learning_rate": 9.754975917708271e-05, "loss": 3.2667266845703127, "step": 99760 }, { "epoch": 0.042466666666666666, "grad_norm": 0.1689651608467102, "learning_rate": 9.754634960635056e-05, "loss": 3.2673431396484376, "step": 99770 }, { "epoch": 0.04253333333333333, "grad_norm": 0.34374114871025085, "learning_rate": 9.754293772468625e-05, "loss": 3.2869503021240236, "step": 99780 }, { "epoch": 0.0426, "grad_norm": 0.16213937103748322, "learning_rate": 9.753952353225564e-05, "loss": 3.261628341674805, "step": 99790 }, { "epoch": 0.042666666666666665, "grad_norm": 0.17647138237953186, "learning_rate": 9.753610702922464e-05, "loss": 3.253116226196289, "step": 99800 }, { "epoch": 0.04273333333333333, "grad_norm": 0.17458292841911316, "learning_rate": 9.753268821575934e-05, "loss": 3.2459312438964845, "step": 99810 }, { "epoch": 0.0428, "grad_norm": 0.16234970092773438, "learning_rate": 9.752926709202587e-05, "loss": 3.2676132202148436, "step": 99820 }, { "epoch": 0.042866666666666664, "grad_norm": 0.2799971401691437, "learning_rate": 9.752584365819053e-05, "loss": 3.2842002868652345, "step": 99830 }, { "epoch": 0.04293333333333333, "grad_norm": 0.16802313923835754, "learning_rate": 9.752241791441971e-05, "loss": 3.2360206604003907, "step": 99840 }, { "epoch": 0.043, "grad_norm": 0.16575074195861816, "learning_rate": 9.75189898608799e-05, "loss": 3.2572872161865236, "step": 99850 }, { "epoch": 0.04306666666666667, "grad_norm": 0.17482468485832214, "learning_rate": 9.751555949773774e-05, "loss": 3.2629169464111327, "step": 99860 }, { "epoch": 0.043133333333333336, "grad_norm": 0.1744375228881836, "learning_rate": 9.751212682515995e-05, "loss": 3.2167633056640623, "step": 99870 }, { "epoch": 0.0432, "grad_norm": 0.17468464374542236, "learning_rate": 9.750869184331335e-05, "loss": 3.21236572265625, "step": 99880 }, { "epoch": 0.04326666666666667, "grad_norm": 0.17059925198554993, "learning_rate": 9.750525455236494e-05, "loss": 3.2321098327636717, "step": 99890 }, { "epoch": 0.043333333333333335, "grad_norm": 0.1903623640537262, "learning_rate": 9.750181495248175e-05, "loss": 3.2370677947998048, "step": 99900 }, { "epoch": 0.0434, "grad_norm": 0.16766853630542755, "learning_rate": 9.749837304383094e-05, "loss": 3.2650516510009764, "step": 99910 }, { "epoch": 0.04346666666666667, "grad_norm": 0.16507568955421448, "learning_rate": 9.749492882657983e-05, "loss": 3.264362335205078, "step": 99920 }, { "epoch": 0.043533333333333334, "grad_norm": 0.19429533183574677, "learning_rate": 9.749148230089583e-05, "loss": 3.317825698852539, "step": 99930 }, { "epoch": 0.0436, "grad_norm": 0.19032670557498932, "learning_rate": 9.748803346694644e-05, "loss": 3.4290695190429688, "step": 99940 }, { "epoch": 0.043666666666666666, "grad_norm": 0.18829722702503204, "learning_rate": 9.748458232489928e-05, "loss": 3.343060302734375, "step": 99950 }, { "epoch": 0.04373333333333333, "grad_norm": 0.18566939234733582, "learning_rate": 9.748112887492208e-05, "loss": 3.270854187011719, "step": 99960 }, { "epoch": 0.0438, "grad_norm": 0.16044573485851288, "learning_rate": 9.747767311718272e-05, "loss": 3.249951171875, "step": 99970 }, { "epoch": 0.043866666666666665, "grad_norm": 0.1625891923904419, "learning_rate": 9.747421505184915e-05, "loss": 3.2683483123779298, "step": 99980 }, { "epoch": 0.04393333333333333, "grad_norm": 0.210659459233284, "learning_rate": 9.747075467908944e-05, "loss": 3.286579132080078, "step": 99990 }, { "epoch": 0.044, "grad_norm": 0.17784704267978668, "learning_rate": 9.746729199907178e-05, "loss": 3.227275085449219, "step": 100000 }, { "epoch": 0.044066666666666664, "grad_norm": 0.17326194047927856, "learning_rate": 9.746382701196446e-05, "loss": 3.2510120391845705, "step": 100010 }, { "epoch": 0.04413333333333333, "grad_norm": 0.173588827252388, "learning_rate": 9.746035971793592e-05, "loss": 3.252346420288086, "step": 100020 }, { "epoch": 0.0442, "grad_norm": 0.2209218591451645, "learning_rate": 9.745689011715464e-05, "loss": 3.2083999633789064, "step": 100030 }, { "epoch": 0.04426666666666667, "grad_norm": 0.19557464122772217, "learning_rate": 9.74534182097893e-05, "loss": 3.236980438232422, "step": 100040 }, { "epoch": 0.044333333333333336, "grad_norm": 0.19175727665424347, "learning_rate": 9.744994399600864e-05, "loss": 3.2428119659423826, "step": 100050 }, { "epoch": 0.0444, "grad_norm": 0.18612034618854523, "learning_rate": 9.744646747598147e-05, "loss": 3.28837890625, "step": 100060 }, { "epoch": 0.04446666666666667, "grad_norm": 0.18100059032440186, "learning_rate": 9.744298864987683e-05, "loss": 3.2364360809326174, "step": 100070 }, { "epoch": 0.044533333333333334, "grad_norm": 0.17977707087993622, "learning_rate": 9.743950751786377e-05, "loss": 3.2039112091064452, "step": 100080 }, { "epoch": 0.0446, "grad_norm": 0.17866435647010803, "learning_rate": 9.743602408011147e-05, "loss": 3.444222640991211, "step": 100090 }, { "epoch": 0.04466666666666667, "grad_norm": 0.2022806704044342, "learning_rate": 9.743253833678929e-05, "loss": 3.259210968017578, "step": 100100 }, { "epoch": 0.04473333333333333, "grad_norm": 0.23468388617038727, "learning_rate": 9.74290502880666e-05, "loss": 3.214277648925781, "step": 100110 }, { "epoch": 0.0448, "grad_norm": 0.17649275064468384, "learning_rate": 9.742555993411295e-05, "loss": 3.2550308227539064, "step": 100120 }, { "epoch": 0.044866666666666666, "grad_norm": 0.18012292683124542, "learning_rate": 9.7422067275098e-05, "loss": 3.214178466796875, "step": 100130 }, { "epoch": 0.04493333333333333, "grad_norm": 0.2089584618806839, "learning_rate": 9.741857231119147e-05, "loss": 3.2358047485351564, "step": 100140 }, { "epoch": 0.045, "grad_norm": 0.1649649739265442, "learning_rate": 9.741507504256327e-05, "loss": 3.2616172790527345, "step": 100150 }, { "epoch": 0.045066666666666665, "grad_norm": 0.1696656495332718, "learning_rate": 9.741157546938335e-05, "loss": 3.2541908264160155, "step": 100160 }, { "epoch": 0.04513333333333333, "grad_norm": 0.18755105137825012, "learning_rate": 9.740807359182182e-05, "loss": 3.301200103759766, "step": 100170 }, { "epoch": 0.0452, "grad_norm": 0.2777382731437683, "learning_rate": 9.740456941004887e-05, "loss": 3.366476821899414, "step": 100180 }, { "epoch": 0.04526666666666666, "grad_norm": 0.1732138842344284, "learning_rate": 9.740106292423483e-05, "loss": 3.288824462890625, "step": 100190 }, { "epoch": 0.04533333333333334, "grad_norm": 0.16762831807136536, "learning_rate": 9.739755413455014e-05, "loss": 3.2529232025146486, "step": 100200 }, { "epoch": 0.0454, "grad_norm": 0.16537219285964966, "learning_rate": 9.73940430411653e-05, "loss": 3.2340934753417967, "step": 100210 }, { "epoch": 0.04546666666666667, "grad_norm": 0.18646173179149628, "learning_rate": 9.7390529644251e-05, "loss": 3.095489501953125, "step": 100220 }, { "epoch": 0.045533333333333335, "grad_norm": 0.17346185445785522, "learning_rate": 9.738701394397798e-05, "loss": 3.239269256591797, "step": 100230 }, { "epoch": 0.0456, "grad_norm": 0.16320770978927612, "learning_rate": 9.738349594051713e-05, "loss": 3.2312763214111326, "step": 100240 }, { "epoch": 0.04566666666666667, "grad_norm": 0.17098039388656616, "learning_rate": 9.737997563403942e-05, "loss": 3.1936264038085938, "step": 100250 }, { "epoch": 0.045733333333333334, "grad_norm": 0.1744081825017929, "learning_rate": 9.737645302471598e-05, "loss": 3.253727340698242, "step": 100260 }, { "epoch": 0.0458, "grad_norm": 0.18343950808048248, "learning_rate": 9.7372928112718e-05, "loss": 3.2888790130615235, "step": 100270 }, { "epoch": 0.04586666666666667, "grad_norm": 0.17684629559516907, "learning_rate": 9.736940089821683e-05, "loss": 3.2346927642822267, "step": 100280 }, { "epoch": 0.04593333333333333, "grad_norm": 0.1694410890340805, "learning_rate": 9.736587138138385e-05, "loss": 3.2437889099121096, "step": 100290 }, { "epoch": 0.046, "grad_norm": 0.18479949235916138, "learning_rate": 9.736233956239066e-05, "loss": 3.1903690338134765, "step": 100300 }, { "epoch": 0.046066666666666665, "grad_norm": 0.36388933658599854, "learning_rate": 9.735880544140892e-05, "loss": 3.2773040771484374, "step": 100310 }, { "epoch": 0.04613333333333333, "grad_norm": 0.16732707619667053, "learning_rate": 9.735526901861039e-05, "loss": 3.268058013916016, "step": 100320 }, { "epoch": 0.0462, "grad_norm": 0.18101342022418976, "learning_rate": 9.735173029416693e-05, "loss": 3.191784477233887, "step": 100330 }, { "epoch": 0.046266666666666664, "grad_norm": 0.1844644844532013, "learning_rate": 9.734818926825056e-05, "loss": 3.3311500549316406, "step": 100340 }, { "epoch": 0.04633333333333333, "grad_norm": 0.18926607072353363, "learning_rate": 9.734464594103339e-05, "loss": 3.243130111694336, "step": 100350 }, { "epoch": 0.0464, "grad_norm": 0.16663214564323425, "learning_rate": 9.734110031268762e-05, "loss": 3.35665283203125, "step": 100360 }, { "epoch": 0.04646666666666667, "grad_norm": 0.1635369062423706, "learning_rate": 9.73375523833856e-05, "loss": 3.1971694946289064, "step": 100370 }, { "epoch": 0.046533333333333336, "grad_norm": 0.21710477769374847, "learning_rate": 9.733400215329976e-05, "loss": 3.2353153228759766, "step": 100380 }, { "epoch": 0.0466, "grad_norm": 0.17518316209316254, "learning_rate": 9.733044962260265e-05, "loss": 3.2419132232666015, "step": 100390 }, { "epoch": 0.04666666666666667, "grad_norm": 0.1625450849533081, "learning_rate": 9.732689479146696e-05, "loss": 3.3175453186035155, "step": 100400 }, { "epoch": 0.046733333333333335, "grad_norm": 0.19838422536849976, "learning_rate": 9.732333766006545e-05, "loss": 3.2418182373046873, "step": 100410 }, { "epoch": 0.0468, "grad_norm": 0.1820768564939499, "learning_rate": 9.731977822857102e-05, "loss": 5.045563507080078, "step": 100420 }, { "epoch": 0.04686666666666667, "grad_norm": 0.3655942380428314, "learning_rate": 9.731621649715666e-05, "loss": 3.2673007965087892, "step": 100430 }, { "epoch": 0.046933333333333334, "grad_norm": 0.2277475893497467, "learning_rate": 9.731265246599549e-05, "loss": 3.2639930725097654, "step": 100440 }, { "epoch": 0.047, "grad_norm": 0.2001921534538269, "learning_rate": 9.730908613526073e-05, "loss": 3.2425689697265625, "step": 100450 }, { "epoch": 0.047066666666666666, "grad_norm": 0.18659310042858124, "learning_rate": 9.730551750512574e-05, "loss": 3.2491523742675783, "step": 100460 }, { "epoch": 0.04713333333333333, "grad_norm": 0.18120050430297852, "learning_rate": 9.730194657576393e-05, "loss": 3.2712722778320313, "step": 100470 }, { "epoch": 0.0472, "grad_norm": 0.18712274730205536, "learning_rate": 9.729837334734889e-05, "loss": 3.28845329284668, "step": 100480 }, { "epoch": 0.047266666666666665, "grad_norm": 0.16832387447357178, "learning_rate": 9.729479782005428e-05, "loss": 3.3874107360839845, "step": 100490 }, { "epoch": 0.04733333333333333, "grad_norm": 0.17728784680366516, "learning_rate": 9.729121999405391e-05, "loss": 3.2644824981689453, "step": 100500 }, { "epoch": 0.0474, "grad_norm": 0.49637213349342346, "learning_rate": 9.728763986952163e-05, "loss": 3.255586624145508, "step": 100510 }, { "epoch": 0.047466666666666664, "grad_norm": 0.17846249043941498, "learning_rate": 9.728405744663148e-05, "loss": 3.2512466430664064, "step": 100520 }, { "epoch": 0.04753333333333333, "grad_norm": 0.17990051209926605, "learning_rate": 9.728047272555756e-05, "loss": 3.1777597427368165, "step": 100530 }, { "epoch": 0.0476, "grad_norm": 0.16719374060630798, "learning_rate": 9.727688570647413e-05, "loss": 3.2359241485595702, "step": 100540 }, { "epoch": 0.04766666666666667, "grad_norm": 0.21326115727424622, "learning_rate": 9.72732963895555e-05, "loss": 3.3037132263183593, "step": 100550 }, { "epoch": 0.047733333333333336, "grad_norm": 0.1717277318239212, "learning_rate": 9.726970477497614e-05, "loss": 3.2936733245849608, "step": 100560 }, { "epoch": 0.0478, "grad_norm": 0.18216215074062347, "learning_rate": 9.726611086291062e-05, "loss": 3.230145263671875, "step": 100570 }, { "epoch": 0.04786666666666667, "grad_norm": 0.17716021835803986, "learning_rate": 9.726251465353362e-05, "loss": 3.2520473480224608, "step": 100580 }, { "epoch": 0.047933333333333335, "grad_norm": 0.1702684462070465, "learning_rate": 9.725891614701992e-05, "loss": 3.2541370391845703, "step": 100590 }, { "epoch": 0.048, "grad_norm": 0.25317829847335815, "learning_rate": 9.725531534354441e-05, "loss": 3.2427837371826174, "step": 100600 }, { "epoch": 0.04806666666666667, "grad_norm": 0.1869828701019287, "learning_rate": 9.725171224328213e-05, "loss": 3.2990947723388673, "step": 100610 }, { "epoch": 0.048133333333333334, "grad_norm": 0.18753278255462646, "learning_rate": 9.724810684640817e-05, "loss": 3.2910697937011717, "step": 100620 }, { "epoch": 0.0482, "grad_norm": 0.1796412318944931, "learning_rate": 9.72444991530978e-05, "loss": 3.2238922119140625, "step": 100630 }, { "epoch": 0.048266666666666666, "grad_norm": 0.18071790039539337, "learning_rate": 9.724088916352636e-05, "loss": 3.231690216064453, "step": 100640 }, { "epoch": 0.04833333333333333, "grad_norm": 0.17613571882247925, "learning_rate": 9.723727687786928e-05, "loss": 3.2403644561767577, "step": 100650 }, { "epoch": 0.0484, "grad_norm": 0.16886188089847565, "learning_rate": 9.723366229630217e-05, "loss": 3.2123882293701174, "step": 100660 }, { "epoch": 0.048466666666666665, "grad_norm": 0.18618978559970856, "learning_rate": 9.723004541900069e-05, "loss": 3.2538532257080077, "step": 100670 }, { "epoch": 0.04853333333333333, "grad_norm": 0.17959484457969666, "learning_rate": 9.722642624614064e-05, "loss": 3.2157005310058593, "step": 100680 }, { "epoch": 0.0486, "grad_norm": 0.1800541877746582, "learning_rate": 9.722280477789794e-05, "loss": 3.260449981689453, "step": 100690 }, { "epoch": 0.048666666666666664, "grad_norm": 3.0451643466949463, "learning_rate": 9.721918101444858e-05, "loss": 3.246641159057617, "step": 100700 }, { "epoch": 0.04873333333333333, "grad_norm": 0.16949760913848877, "learning_rate": 9.721555495596871e-05, "loss": 3.2330780029296875, "step": 100710 }, { "epoch": 0.0488, "grad_norm": 0.1696138083934784, "learning_rate": 9.721192660263453e-05, "loss": 3.282796859741211, "step": 100720 }, { "epoch": 0.04886666666666667, "grad_norm": 0.16433709859848022, "learning_rate": 9.720829595462246e-05, "loss": 3.2538421630859373, "step": 100730 }, { "epoch": 0.048933333333333336, "grad_norm": 0.1929733157157898, "learning_rate": 9.72046630121089e-05, "loss": 3.266448974609375, "step": 100740 }, { "epoch": 0.049, "grad_norm": 0.18170368671417236, "learning_rate": 9.720102777527045e-05, "loss": 3.269723892211914, "step": 100750 }, { "epoch": 0.04906666666666667, "grad_norm": 0.17166274785995483, "learning_rate": 9.719739024428381e-05, "loss": 3.255804443359375, "step": 100760 }, { "epoch": 0.049133333333333334, "grad_norm": 0.17147284746170044, "learning_rate": 9.719375041932576e-05, "loss": 3.2507091522216798, "step": 100770 }, { "epoch": 0.0492, "grad_norm": 0.20554719865322113, "learning_rate": 9.719010830057322e-05, "loss": 3.4022476196289064, "step": 100780 }, { "epoch": 0.04926666666666667, "grad_norm": 0.17488698661327362, "learning_rate": 9.718646388820319e-05, "loss": 3.2463447570800783, "step": 100790 }, { "epoch": 0.04933333333333333, "grad_norm": 0.1841973513364792, "learning_rate": 9.718281718239282e-05, "loss": 3.1913856506347655, "step": 100800 }, { "epoch": 0.0494, "grad_norm": 0.17168879508972168, "learning_rate": 9.717916818331936e-05, "loss": 3.2740707397460938, "step": 100810 }, { "epoch": 0.049466666666666666, "grad_norm": 0.18385016918182373, "learning_rate": 9.717551689116014e-05, "loss": 3.268369674682617, "step": 100820 }, { "epoch": 0.04953333333333333, "grad_norm": 0.19007685780525208, "learning_rate": 9.717186330609265e-05, "loss": 3.249618911743164, "step": 100830 }, { "epoch": 0.0496, "grad_norm": 0.18558527529239655, "learning_rate": 9.716820742829447e-05, "loss": 3.2350353240966796, "step": 100840 }, { "epoch": 0.049666666666666665, "grad_norm": 0.17044320702552795, "learning_rate": 9.716454925794327e-05, "loss": 3.2572227478027345, "step": 100850 }, { "epoch": 0.04973333333333333, "grad_norm": 0.22121264040470123, "learning_rate": 9.716088879521685e-05, "loss": 3.352275848388672, "step": 100860 }, { "epoch": 0.0498, "grad_norm": 0.1783478856086731, "learning_rate": 9.715722604029314e-05, "loss": 3.2061817169189455, "step": 100870 }, { "epoch": 0.04986666666666666, "grad_norm": 0.18043653666973114, "learning_rate": 9.715356099335016e-05, "loss": 3.230840301513672, "step": 100880 }, { "epoch": 0.049933333333333337, "grad_norm": 0.19200439751148224, "learning_rate": 9.714989365456604e-05, "loss": 3.2903774261474608, "step": 100890 }, { "epoch": 0.05, "grad_norm": 0.19525620341300964, "learning_rate": 9.714622402411903e-05, "loss": 3.2501644134521483, "step": 100900 }, { "epoch": 0.05006666666666667, "grad_norm": 0.2670818567276001, "learning_rate": 9.714255210218748e-05, "loss": 3.2445327758789064, "step": 100910 }, { "epoch": 0.050133333333333335, "grad_norm": 0.19018088281154633, "learning_rate": 9.713887788894987e-05, "loss": 3.2185317993164064, "step": 100920 }, { "epoch": 0.0502, "grad_norm": 0.18189364671707153, "learning_rate": 9.713520138458477e-05, "loss": 3.251995086669922, "step": 100930 }, { "epoch": 0.05026666666666667, "grad_norm": 0.26275065541267395, "learning_rate": 9.713152258927089e-05, "loss": 3.1684595108032227, "step": 100940 }, { "epoch": 0.050333333333333334, "grad_norm": 0.19487230479717255, "learning_rate": 9.712784150318702e-05, "loss": 3.245907211303711, "step": 100950 }, { "epoch": 0.0504, "grad_norm": 0.16297248005867004, "learning_rate": 9.712415812651206e-05, "loss": 3.2712738037109377, "step": 100960 }, { "epoch": 0.05046666666666667, "grad_norm": 0.17554925382137299, "learning_rate": 9.712047245942506e-05, "loss": 3.2644153594970704, "step": 100970 }, { "epoch": 0.05053333333333333, "grad_norm": 0.18037468194961548, "learning_rate": 9.711678450210515e-05, "loss": 3.2062942504882814, "step": 100980 }, { "epoch": 0.0506, "grad_norm": 0.17063309252262115, "learning_rate": 9.711309425473156e-05, "loss": 3.216103744506836, "step": 100990 }, { "epoch": 0.050666666666666665, "grad_norm": 0.16736678779125214, "learning_rate": 9.710940171748368e-05, "loss": 3.2728240966796873, "step": 101000 }, { "epoch": 0.05073333333333333, "grad_norm": 0.21101097762584686, "learning_rate": 9.710570689054099e-05, "loss": 3.2293277740478517, "step": 101010 }, { "epoch": 0.0508, "grad_norm": 0.17549656331539154, "learning_rate": 9.710200977408304e-05, "loss": 3.2780311584472654, "step": 101020 }, { "epoch": 0.050866666666666664, "grad_norm": 0.17631810903549194, "learning_rate": 9.709831036828951e-05, "loss": 3.2259361267089846, "step": 101030 }, { "epoch": 0.05093333333333333, "grad_norm": 0.16892056167125702, "learning_rate": 9.709460867334026e-05, "loss": 3.2720687866210936, "step": 101040 }, { "epoch": 0.051, "grad_norm": 0.1811072826385498, "learning_rate": 9.709090468941516e-05, "loss": 3.238707733154297, "step": 101050 }, { "epoch": 0.05106666666666667, "grad_norm": 0.1684051901102066, "learning_rate": 9.708719841669428e-05, "loss": 3.273700714111328, "step": 101060 }, { "epoch": 0.051133333333333336, "grad_norm": 0.20173829793930054, "learning_rate": 9.70834898553577e-05, "loss": 3.2613204956054687, "step": 101070 }, { "epoch": 0.0512, "grad_norm": 0.17629051208496094, "learning_rate": 9.707977900558572e-05, "loss": 3.2209102630615236, "step": 101080 }, { "epoch": 0.05126666666666667, "grad_norm": 0.16612675786018372, "learning_rate": 9.707606586755868e-05, "loss": 3.240869140625, "step": 101090 }, { "epoch": 0.051333333333333335, "grad_norm": 0.18010741472244263, "learning_rate": 9.707235044145706e-05, "loss": 3.284724807739258, "step": 101100 }, { "epoch": 0.0514, "grad_norm": 0.17511089146137238, "learning_rate": 9.706863272746143e-05, "loss": 3.238564682006836, "step": 101110 }, { "epoch": 0.05146666666666667, "grad_norm": 0.17419621348381042, "learning_rate": 9.706491272575251e-05, "loss": 3.191982650756836, "step": 101120 }, { "epoch": 0.051533333333333334, "grad_norm": 0.16757091879844666, "learning_rate": 9.70611904365111e-05, "loss": 3.2484622955322267, "step": 101130 }, { "epoch": 0.0516, "grad_norm": 0.17307831346988678, "learning_rate": 9.705746585991808e-05, "loss": 3.200994110107422, "step": 101140 }, { "epoch": 0.051666666666666666, "grad_norm": 0.16790181398391724, "learning_rate": 9.705373899615453e-05, "loss": 3.2534889221191405, "step": 101150 }, { "epoch": 0.05173333333333333, "grad_norm": 0.38856270909309387, "learning_rate": 9.705000984540155e-05, "loss": 3.242402267456055, "step": 101160 }, { "epoch": 0.0518, "grad_norm": 0.30296415090560913, "learning_rate": 9.704627840784043e-05, "loss": 3.1696613311767576, "step": 101170 }, { "epoch": 0.051866666666666665, "grad_norm": 0.16460853815078735, "learning_rate": 9.704254468365251e-05, "loss": 3.231174850463867, "step": 101180 }, { "epoch": 0.05193333333333333, "grad_norm": 0.19540761411190033, "learning_rate": 9.703880867301924e-05, "loss": 3.2724483489990233, "step": 101190 }, { "epoch": 0.052, "grad_norm": 0.16968531906604767, "learning_rate": 9.703507037612226e-05, "loss": 3.2309329986572264, "step": 101200 }, { "epoch": 0.052066666666666664, "grad_norm": 0.1787322461605072, "learning_rate": 9.703132979314319e-05, "loss": 3.23399658203125, "step": 101210 }, { "epoch": 0.05213333333333333, "grad_norm": 0.18265368044376373, "learning_rate": 9.702758692426391e-05, "loss": 3.343561553955078, "step": 101220 }, { "epoch": 0.0522, "grad_norm": 0.17126932740211487, "learning_rate": 9.70238417696663e-05, "loss": 3.2436687469482424, "step": 101230 }, { "epoch": 0.05226666666666667, "grad_norm": 0.21638259291648865, "learning_rate": 9.70200943295324e-05, "loss": 3.2497711181640625, "step": 101240 }, { "epoch": 0.052333333333333336, "grad_norm": 0.17366915941238403, "learning_rate": 9.701634460404434e-05, "loss": 3.232427215576172, "step": 101250 }, { "epoch": 0.0524, "grad_norm": 0.19422955811023712, "learning_rate": 9.701259259338438e-05, "loss": 3.2828445434570312, "step": 101260 }, { "epoch": 0.05246666666666667, "grad_norm": 0.17265689373016357, "learning_rate": 9.700883829773486e-05, "loss": 3.2766036987304688, "step": 101270 }, { "epoch": 0.052533333333333335, "grad_norm": 0.18149232864379883, "learning_rate": 9.700508171727829e-05, "loss": 3.278426742553711, "step": 101280 }, { "epoch": 0.0526, "grad_norm": 0.193331778049469, "learning_rate": 9.700132285219724e-05, "loss": 3.2107475280761717, "step": 101290 }, { "epoch": 0.05266666666666667, "grad_norm": 0.18248437345027924, "learning_rate": 9.699756170267438e-05, "loss": 3.2615020751953123, "step": 101300 }, { "epoch": 0.05273333333333333, "grad_norm": 0.16292667388916016, "learning_rate": 9.699379826889254e-05, "loss": 3.2200439453125, "step": 101310 }, { "epoch": 0.0528, "grad_norm": 0.1702517569065094, "learning_rate": 9.699003255103465e-05, "loss": 3.2262908935546877, "step": 101320 }, { "epoch": 0.052866666666666666, "grad_norm": 0.1610783338546753, "learning_rate": 9.698626454928371e-05, "loss": 3.238004684448242, "step": 101330 }, { "epoch": 0.05293333333333333, "grad_norm": 0.2037077099084854, "learning_rate": 9.698249426382286e-05, "loss": 3.224721145629883, "step": 101340 }, { "epoch": 0.053, "grad_norm": 0.17333984375, "learning_rate": 9.697872169483537e-05, "loss": 3.2185035705566407, "step": 101350 }, { "epoch": 0.053066666666666665, "grad_norm": 0.1778590828180313, "learning_rate": 9.69749468425046e-05, "loss": 3.201325225830078, "step": 101360 }, { "epoch": 0.05313333333333333, "grad_norm": 0.16682395339012146, "learning_rate": 9.6971169707014e-05, "loss": 3.233554458618164, "step": 101370 }, { "epoch": 0.0532, "grad_norm": 8.420914649963379, "learning_rate": 9.696739028854718e-05, "loss": 5.447623062133789, "step": 101380 }, { "epoch": 0.053266666666666664, "grad_norm": 0.194560706615448, "learning_rate": 9.696360858728781e-05, "loss": 3.3891006469726563, "step": 101390 }, { "epoch": 0.05333333333333334, "grad_norm": 0.19478978216648102, "learning_rate": 9.695982460341972e-05, "loss": 3.2827754974365235, "step": 101400 }, { "epoch": 0.0534, "grad_norm": 0.21056057512760162, "learning_rate": 9.695603833712679e-05, "loss": 3.2338733673095703, "step": 101410 }, { "epoch": 0.05346666666666667, "grad_norm": 0.18672768771648407, "learning_rate": 9.695224978859308e-05, "loss": 3.2665260314941404, "step": 101420 }, { "epoch": 0.053533333333333336, "grad_norm": 0.516720175743103, "learning_rate": 9.694845895800271e-05, "loss": 3.169695281982422, "step": 101430 }, { "epoch": 0.0536, "grad_norm": 0.17715443670749664, "learning_rate": 9.694466584553994e-05, "loss": 3.2240352630615234, "step": 101440 }, { "epoch": 0.05366666666666667, "grad_norm": 0.22813253104686737, "learning_rate": 9.694087045138913e-05, "loss": 3.3186767578125, "step": 101450 }, { "epoch": 0.053733333333333334, "grad_norm": 0.18563196063041687, "learning_rate": 9.693707277573474e-05, "loss": 3.2791839599609376, "step": 101460 }, { "epoch": 0.0538, "grad_norm": 0.20038893818855286, "learning_rate": 9.693327281876136e-05, "loss": 3.247113037109375, "step": 101470 }, { "epoch": 0.05386666666666667, "grad_norm": 0.19668225944042206, "learning_rate": 9.692947058065367e-05, "loss": 3.2788280487060546, "step": 101480 }, { "epoch": 0.05393333333333333, "grad_norm": 0.1912636160850525, "learning_rate": 9.692566606159647e-05, "loss": 3.216488265991211, "step": 101490 }, { "epoch": 0.054, "grad_norm": 0.1817275881767273, "learning_rate": 9.692185926177472e-05, "loss": 3.2660800933837892, "step": 101500 }, { "epoch": 0.054066666666666666, "grad_norm": 0.17423808574676514, "learning_rate": 9.691805018137339e-05, "loss": 3.2645206451416016, "step": 101510 }, { "epoch": 0.05413333333333333, "grad_norm": 0.20310638844966888, "learning_rate": 9.691423882057764e-05, "loss": 3.244771194458008, "step": 101520 }, { "epoch": 0.0542, "grad_norm": 0.1810188740491867, "learning_rate": 9.691042517957271e-05, "loss": 3.241138458251953, "step": 101530 }, { "epoch": 0.054266666666666664, "grad_norm": 0.18963909149169922, "learning_rate": 9.690660925854396e-05, "loss": 3.221059799194336, "step": 101540 }, { "epoch": 0.05433333333333333, "grad_norm": 0.18518368899822235, "learning_rate": 9.690279105767686e-05, "loss": 3.1892942428588866, "step": 101550 }, { "epoch": 0.0544, "grad_norm": 0.1893593817949295, "learning_rate": 9.689897057715698e-05, "loss": 3.297193908691406, "step": 101560 }, { "epoch": 0.05446666666666666, "grad_norm": 0.1715136617422104, "learning_rate": 9.689514781717002e-05, "loss": 3.2728065490722655, "step": 101570 }, { "epoch": 0.054533333333333336, "grad_norm": 0.17725209891796112, "learning_rate": 9.689132277790178e-05, "loss": 3.3250545501708983, "step": 101580 }, { "epoch": 0.0546, "grad_norm": 0.17486020922660828, "learning_rate": 9.688749545953817e-05, "loss": 3.233343505859375, "step": 101590 }, { "epoch": 0.05466666666666667, "grad_norm": 0.17551061511039734, "learning_rate": 9.688366586226521e-05, "loss": 3.348014068603516, "step": 101600 }, { "epoch": 0.054733333333333335, "grad_norm": 0.20072872936725616, "learning_rate": 9.687983398626903e-05, "loss": 3.2703277587890627, "step": 101610 }, { "epoch": 0.0548, "grad_norm": 0.17799316346645355, "learning_rate": 9.68759998317359e-05, "loss": 3.2461658477783204, "step": 101620 }, { "epoch": 0.05486666666666667, "grad_norm": 0.18237712979316711, "learning_rate": 9.687216339885212e-05, "loss": 3.243488311767578, "step": 101630 }, { "epoch": 0.054933333333333334, "grad_norm": 0.19533909857273102, "learning_rate": 9.68683246878042e-05, "loss": 3.2271495819091798, "step": 101640 }, { "epoch": 0.055, "grad_norm": 0.16655543446540833, "learning_rate": 9.68644836987787e-05, "loss": 3.222452926635742, "step": 101650 }, { "epoch": 0.05506666666666667, "grad_norm": 0.1675342321395874, "learning_rate": 9.686064043196232e-05, "loss": 3.2539878845214845, "step": 101660 }, { "epoch": 0.05513333333333333, "grad_norm": 0.1785064935684204, "learning_rate": 9.685679488754185e-05, "loss": 3.26280517578125, "step": 101670 }, { "epoch": 0.0552, "grad_norm": 0.18926212191581726, "learning_rate": 9.685294706570418e-05, "loss": 3.2010955810546875, "step": 101680 }, { "epoch": 0.055266666666666665, "grad_norm": 0.17796622216701508, "learning_rate": 9.684909696663636e-05, "loss": 3.2749355316162108, "step": 101690 }, { "epoch": 0.05533333333333333, "grad_norm": 0.16901199519634247, "learning_rate": 9.684524459052548e-05, "loss": 3.239583969116211, "step": 101700 }, { "epoch": 0.0554, "grad_norm": 0.16689211130142212, "learning_rate": 9.684138993755881e-05, "loss": 3.249583053588867, "step": 101710 }, { "epoch": 0.055466666666666664, "grad_norm": 0.1996900588274002, "learning_rate": 9.683753300792371e-05, "loss": 3.276592254638672, "step": 101720 }, { "epoch": 0.05553333333333333, "grad_norm": 0.19855724275112152, "learning_rate": 9.683367380180762e-05, "loss": 3.3115089416503904, "step": 101730 }, { "epoch": 0.0556, "grad_norm": 0.1763954907655716, "learning_rate": 9.68298123193981e-05, "loss": 3.308899688720703, "step": 101740 }, { "epoch": 0.05566666666666667, "grad_norm": 0.19270539283752441, "learning_rate": 9.682594856088284e-05, "loss": 3.23028450012207, "step": 101750 }, { "epoch": 0.055733333333333336, "grad_norm": 0.18941283226013184, "learning_rate": 9.682208252644968e-05, "loss": 3.3053783416748046, "step": 101760 }, { "epoch": 0.0558, "grad_norm": 0.1840270459651947, "learning_rate": 9.681821421628647e-05, "loss": 3.2360015869140626, "step": 101770 }, { "epoch": 0.05586666666666667, "grad_norm": 0.17586421966552734, "learning_rate": 9.681434363058124e-05, "loss": 3.2425506591796873, "step": 101780 }, { "epoch": 0.055933333333333335, "grad_norm": 0.1881554126739502, "learning_rate": 9.681047076952211e-05, "loss": 3.4715843200683594, "step": 101790 }, { "epoch": 0.056, "grad_norm": 0.2051030695438385, "learning_rate": 9.680659563329733e-05, "loss": 3.272822952270508, "step": 101800 }, { "epoch": 0.05606666666666667, "grad_norm": 0.2032434195280075, "learning_rate": 9.680271822209524e-05, "loss": 3.4751140594482424, "step": 101810 }, { "epoch": 0.056133333333333334, "grad_norm": 0.1708739846944809, "learning_rate": 9.679883853610429e-05, "loss": 3.2507678985595705, "step": 101820 }, { "epoch": 0.0562, "grad_norm": 0.184620201587677, "learning_rate": 9.679495657551305e-05, "loss": 3.275705337524414, "step": 101830 }, { "epoch": 0.056266666666666666, "grad_norm": 0.17611363530158997, "learning_rate": 9.67910723405102e-05, "loss": 3.2546764373779298, "step": 101840 }, { "epoch": 0.05633333333333333, "grad_norm": 0.17565517127513885, "learning_rate": 9.678718583128453e-05, "loss": 3.287525177001953, "step": 101850 }, { "epoch": 0.0564, "grad_norm": 0.16793176531791687, "learning_rate": 9.678329704802494e-05, "loss": 3.2760459899902346, "step": 101860 }, { "epoch": 0.056466666666666665, "grad_norm": 0.19298690557479858, "learning_rate": 9.677940599092044e-05, "loss": 3.242157745361328, "step": 101870 }, { "epoch": 0.05653333333333333, "grad_norm": 0.16466550529003143, "learning_rate": 9.677551266016015e-05, "loss": 3.2764041900634764, "step": 101880 }, { "epoch": 0.0566, "grad_norm": 0.17238152027130127, "learning_rate": 9.67716170559333e-05, "loss": 3.251211929321289, "step": 101890 }, { "epoch": 0.056666666666666664, "grad_norm": 0.31393641233444214, "learning_rate": 9.676771917842922e-05, "loss": 3.2947174072265626, "step": 101900 }, { "epoch": 0.05673333333333333, "grad_norm": 0.1735195815563202, "learning_rate": 9.676381902783737e-05, "loss": 3.242110824584961, "step": 101910 }, { "epoch": 0.0568, "grad_norm": 0.17422497272491455, "learning_rate": 9.675991660434731e-05, "loss": 3.223488616943359, "step": 101920 }, { "epoch": 0.05686666666666667, "grad_norm": 0.22812826931476593, "learning_rate": 9.675601190814872e-05, "loss": 3.347795104980469, "step": 101930 }, { "epoch": 0.056933333333333336, "grad_norm": 0.17694388329982758, "learning_rate": 9.675210493943138e-05, "loss": 3.241551971435547, "step": 101940 }, { "epoch": 0.057, "grad_norm": 0.1637054830789566, "learning_rate": 9.674819569838518e-05, "loss": 3.247371292114258, "step": 101950 }, { "epoch": 0.05706666666666667, "grad_norm": 0.18377159535884857, "learning_rate": 9.674428418520013e-05, "loss": 3.2416736602783205, "step": 101960 }, { "epoch": 0.057133333333333335, "grad_norm": 0.17377017438411713, "learning_rate": 9.674037040006634e-05, "loss": 3.274846649169922, "step": 101970 }, { "epoch": 0.0572, "grad_norm": 0.19211915135383606, "learning_rate": 9.673645434317402e-05, "loss": 3.2822120666503904, "step": 101980 }, { "epoch": 0.05726666666666667, "grad_norm": 0.17115533351898193, "learning_rate": 9.673253601471353e-05, "loss": 3.2815006256103514, "step": 101990 }, { "epoch": 0.05733333333333333, "grad_norm": 0.4693126976490021, "learning_rate": 9.672861541487531e-05, "loss": 3.352202224731445, "step": 102000 }, { "epoch": 0.0574, "grad_norm": 0.18700964748859406, "learning_rate": 9.672469254384991e-05, "loss": 3.244475173950195, "step": 102010 }, { "epoch": 0.057466666666666666, "grad_norm": 0.17503437399864197, "learning_rate": 9.672076740182798e-05, "loss": 3.1890892028808593, "step": 102020 }, { "epoch": 0.05753333333333333, "grad_norm": 0.17044156789779663, "learning_rate": 9.671683998900034e-05, "loss": 3.250334548950195, "step": 102030 }, { "epoch": 0.0576, "grad_norm": 0.33731141686439514, "learning_rate": 9.671291030555785e-05, "loss": 3.375835418701172, "step": 102040 }, { "epoch": 0.057666666666666665, "grad_norm": 0.17976467311382294, "learning_rate": 9.670897835169149e-05, "loss": 3.2441787719726562, "step": 102050 }, { "epoch": 0.05773333333333333, "grad_norm": 0.1700875163078308, "learning_rate": 9.67050441275924e-05, "loss": 3.2881790161132813, "step": 102060 }, { "epoch": 0.0578, "grad_norm": 0.1675977259874344, "learning_rate": 9.670110763345177e-05, "loss": 3.409374237060547, "step": 102070 }, { "epoch": 0.057866666666666663, "grad_norm": 0.26086917519569397, "learning_rate": 9.669716886946096e-05, "loss": 3.35064582824707, "step": 102080 }, { "epoch": 0.05793333333333334, "grad_norm": 0.17014926671981812, "learning_rate": 9.669322783581138e-05, "loss": 3.170986366271973, "step": 102090 }, { "epoch": 0.058, "grad_norm": 0.1714961975812912, "learning_rate": 9.66892845326946e-05, "loss": 3.3997310638427733, "step": 102100 }, { "epoch": 0.05806666666666667, "grad_norm": 0.19900543987751007, "learning_rate": 9.668533896030228e-05, "loss": 3.237195587158203, "step": 102110 }, { "epoch": 0.058133333333333335, "grad_norm": 0.19089137017726898, "learning_rate": 9.668139111882616e-05, "loss": 3.3142631530761717, "step": 102120 }, { "epoch": 0.0582, "grad_norm": 0.17193177342414856, "learning_rate": 9.667744100845814e-05, "loss": 3.2647659301757814, "step": 102130 }, { "epoch": 0.05826666666666667, "grad_norm": 0.18861548602581024, "learning_rate": 9.667348862939023e-05, "loss": 3.3083343505859375, "step": 102140 }, { "epoch": 0.058333333333333334, "grad_norm": 0.16935598850250244, "learning_rate": 9.666953398181449e-05, "loss": 3.206747055053711, "step": 102150 }, { "epoch": 0.0584, "grad_norm": 0.17081299424171448, "learning_rate": 9.666557706592316e-05, "loss": 3.2200408935546876, "step": 102160 }, { "epoch": 0.05846666666666667, "grad_norm": 0.17465414106845856, "learning_rate": 9.666161788190856e-05, "loss": 3.2435192108154296, "step": 102170 }, { "epoch": 0.05853333333333333, "grad_norm": 0.18242427706718445, "learning_rate": 9.665765642996311e-05, "loss": 3.2453636169433593, "step": 102180 }, { "epoch": 0.0586, "grad_norm": 0.16851355135440826, "learning_rate": 9.665369271027935e-05, "loss": 3.2277973175048826, "step": 102190 }, { "epoch": 0.058666666666666666, "grad_norm": 0.17931623756885529, "learning_rate": 9.664972672304994e-05, "loss": 3.2518863677978516, "step": 102200 }, { "epoch": 0.05873333333333333, "grad_norm": 0.19007647037506104, "learning_rate": 9.664575846846765e-05, "loss": 3.2871940612792967, "step": 102210 }, { "epoch": 0.0588, "grad_norm": 0.3726983368396759, "learning_rate": 9.664178794672534e-05, "loss": 2.8335874557495115, "step": 102220 }, { "epoch": 0.058866666666666664, "grad_norm": 0.18575897812843323, "learning_rate": 9.663781515801599e-05, "loss": 3.2994850158691404, "step": 102230 }, { "epoch": 0.05893333333333333, "grad_norm": 0.1634751707315445, "learning_rate": 9.663384010253269e-05, "loss": 3.282004547119141, "step": 102240 }, { "epoch": 0.059, "grad_norm": 0.16846024990081787, "learning_rate": 9.662986278046866e-05, "loss": 3.265142822265625, "step": 102250 }, { "epoch": 0.05906666666666667, "grad_norm": 0.1696866750717163, "learning_rate": 9.662588319201719e-05, "loss": 3.2274654388427733, "step": 102260 }, { "epoch": 0.059133333333333336, "grad_norm": 0.19931066036224365, "learning_rate": 9.662190133737172e-05, "loss": 3.3059295654296874, "step": 102270 }, { "epoch": 0.0592, "grad_norm": 0.2107618749141693, "learning_rate": 9.661791721672579e-05, "loss": 3.3196136474609377, "step": 102280 }, { "epoch": 0.05926666666666667, "grad_norm": 0.17746548354625702, "learning_rate": 9.661393083027301e-05, "loss": 3.1952877044677734, "step": 102290 }, { "epoch": 0.059333333333333335, "grad_norm": 0.1766158640384674, "learning_rate": 9.660994217820718e-05, "loss": 3.251448059082031, "step": 102300 }, { "epoch": 0.0594, "grad_norm": 0.16492782533168793, "learning_rate": 9.660595126072212e-05, "loss": 3.2529354095458984, "step": 102310 }, { "epoch": 0.05946666666666667, "grad_norm": 0.17297126352787018, "learning_rate": 9.660195807801183e-05, "loss": 3.1907974243164063, "step": 102320 }, { "epoch": 0.059533333333333334, "grad_norm": 0.17892014980316162, "learning_rate": 9.659796263027039e-05, "loss": 3.2080482482910155, "step": 102330 }, { "epoch": 0.0596, "grad_norm": 0.1645844578742981, "learning_rate": 9.659396491769197e-05, "loss": 3.209551239013672, "step": 102340 }, { "epoch": 0.059666666666666666, "grad_norm": 0.16493107378482819, "learning_rate": 9.658996494047092e-05, "loss": 3.3512760162353517, "step": 102350 }, { "epoch": 0.05973333333333333, "grad_norm": 0.17190885543823242, "learning_rate": 9.65859626988016e-05, "loss": 3.233256530761719, "step": 102360 }, { "epoch": 0.0598, "grad_norm": 0.18004560470581055, "learning_rate": 9.658195819287859e-05, "loss": 3.291270446777344, "step": 102370 }, { "epoch": 0.059866666666666665, "grad_norm": 0.1654268503189087, "learning_rate": 9.657795142289649e-05, "loss": 3.2677845001220702, "step": 102380 }, { "epoch": 0.05993333333333333, "grad_norm": 0.16884738206863403, "learning_rate": 9.657394238905004e-05, "loss": 3.2346206665039063, "step": 102390 }, { "epoch": 0.06, "grad_norm": 0.17107166349887848, "learning_rate": 9.65699310915341e-05, "loss": 3.2093597412109376, "step": 102400 }, { "epoch": 0.060066666666666664, "grad_norm": 0.17022129893302917, "learning_rate": 9.656591753054364e-05, "loss": 3.2324363708496096, "step": 102410 }, { "epoch": 0.06013333333333333, "grad_norm": 0.1891043335199356, "learning_rate": 9.656190170627375e-05, "loss": 3.256863021850586, "step": 102420 }, { "epoch": 0.0602, "grad_norm": 0.2285168170928955, "learning_rate": 9.65578836189196e-05, "loss": 3.2682540893554686, "step": 102430 }, { "epoch": 0.06026666666666667, "grad_norm": 0.16951295733451843, "learning_rate": 9.655386326867645e-05, "loss": 3.304439926147461, "step": 102440 }, { "epoch": 0.060333333333333336, "grad_norm": 0.17264984548091888, "learning_rate": 9.654984065573976e-05, "loss": 3.2573596954345705, "step": 102450 }, { "epoch": 0.0604, "grad_norm": 0.18307428061962128, "learning_rate": 9.654581578030502e-05, "loss": 3.215596008300781, "step": 102460 }, { "epoch": 0.06046666666666667, "grad_norm": 0.18795756995677948, "learning_rate": 9.654178864256785e-05, "loss": 3.300708770751953, "step": 102470 }, { "epoch": 0.060533333333333335, "grad_norm": 0.3768214285373688, "learning_rate": 9.653775924272398e-05, "loss": 3.2409896850585938, "step": 102480 }, { "epoch": 0.0606, "grad_norm": 0.171961709856987, "learning_rate": 9.653372758096927e-05, "loss": 3.2178348541259765, "step": 102490 }, { "epoch": 0.06066666666666667, "grad_norm": 0.19956065714359283, "learning_rate": 9.652969365749967e-05, "loss": 3.290476608276367, "step": 102500 }, { "epoch": 0.060733333333333334, "grad_norm": 0.5668976902961731, "learning_rate": 9.652565747251124e-05, "loss": 3.2460079193115234, "step": 102510 }, { "epoch": 0.0608, "grad_norm": 0.18348328769207, "learning_rate": 9.652161902620014e-05, "loss": 3.256175994873047, "step": 102520 }, { "epoch": 0.060866666666666666, "grad_norm": 0.16052871942520142, "learning_rate": 9.651757831876267e-05, "loss": 3.2159934997558595, "step": 102530 }, { "epoch": 0.06093333333333333, "grad_norm": 0.17267413437366486, "learning_rate": 9.651353535039523e-05, "loss": 3.2693077087402345, "step": 102540 }, { "epoch": 0.061, "grad_norm": 0.16979169845581055, "learning_rate": 9.65094901212943e-05, "loss": 3.2097183227539063, "step": 102550 }, { "epoch": 0.061066666666666665, "grad_norm": 0.18894879519939423, "learning_rate": 9.650544263165653e-05, "loss": 3.247409439086914, "step": 102560 }, { "epoch": 0.06113333333333333, "grad_norm": 0.168821319937706, "learning_rate": 9.65013928816786e-05, "loss": 3.218159484863281, "step": 102570 }, { "epoch": 0.0612, "grad_norm": 0.1800558865070343, "learning_rate": 9.649734087155737e-05, "loss": 3.2056690216064454, "step": 102580 }, { "epoch": 0.061266666666666664, "grad_norm": 0.17197029292583466, "learning_rate": 9.649328660148979e-05, "loss": 3.228200149536133, "step": 102590 }, { "epoch": 0.06133333333333333, "grad_norm": 0.19030135869979858, "learning_rate": 9.648923007167289e-05, "loss": 3.2291793823242188, "step": 102600 }, { "epoch": 0.0614, "grad_norm": 0.18016111850738525, "learning_rate": 9.648517128230385e-05, "loss": 3.2297176361083983, "step": 102610 }, { "epoch": 0.06146666666666667, "grad_norm": 0.18820853531360626, "learning_rate": 9.648111023357993e-05, "loss": 3.2541362762451174, "step": 102620 }, { "epoch": 0.061533333333333336, "grad_norm": 0.17829415202140808, "learning_rate": 9.647704692569851e-05, "loss": 3.231158447265625, "step": 102630 }, { "epoch": 0.0616, "grad_norm": 0.16615620255470276, "learning_rate": 9.64729813588571e-05, "loss": 3.218492126464844, "step": 102640 }, { "epoch": 0.06166666666666667, "grad_norm": 0.1743045449256897, "learning_rate": 9.646891353325329e-05, "loss": 3.2024169921875, "step": 102650 }, { "epoch": 0.061733333333333335, "grad_norm": 0.17007774114608765, "learning_rate": 9.64648434490848e-05, "loss": 3.224782180786133, "step": 102660 }, { "epoch": 0.0618, "grad_norm": 0.16742108762264252, "learning_rate": 9.646077110654943e-05, "loss": 3.2206817626953126, "step": 102670 }, { "epoch": 0.06186666666666667, "grad_norm": 0.18369054794311523, "learning_rate": 9.645669650584513e-05, "loss": 3.2179229736328123, "step": 102680 }, { "epoch": 0.06193333333333333, "grad_norm": 0.28197789192199707, "learning_rate": 9.645261964716994e-05, "loss": 3.2095245361328124, "step": 102690 }, { "epoch": 0.062, "grad_norm": 0.17137786746025085, "learning_rate": 9.644854053072201e-05, "loss": 3.2161277770996093, "step": 102700 }, { "epoch": 0.062066666666666666, "grad_norm": 0.17102877795696259, "learning_rate": 9.64444591566996e-05, "loss": 3.2026763916015626, "step": 102710 }, { "epoch": 0.06213333333333333, "grad_norm": 0.1849532276391983, "learning_rate": 9.644037552530107e-05, "loss": 3.20628662109375, "step": 102720 }, { "epoch": 0.0622, "grad_norm": 0.18443427979946136, "learning_rate": 9.643628963672492e-05, "loss": 3.2340599060058595, "step": 102730 }, { "epoch": 0.062266666666666665, "grad_norm": 0.18334630131721497, "learning_rate": 9.643220149116971e-05, "loss": 3.229897308349609, "step": 102740 }, { "epoch": 0.06233333333333333, "grad_norm": 0.1819526106119156, "learning_rate": 9.642811108883418e-05, "loss": 3.2325565338134767, "step": 102750 }, { "epoch": 0.0624, "grad_norm": 0.17183096706867218, "learning_rate": 9.642401842991711e-05, "loss": 3.1929698944091798, "step": 102760 }, { "epoch": 0.06246666666666666, "grad_norm": 0.1977253407239914, "learning_rate": 9.641992351461741e-05, "loss": 3.1427764892578125, "step": 102770 }, { "epoch": 0.06253333333333333, "grad_norm": 0.17581138014793396, "learning_rate": 9.641582634313414e-05, "loss": 3.2242813110351562, "step": 102780 }, { "epoch": 0.0626, "grad_norm": 0.1732015311717987, "learning_rate": 9.641172691566642e-05, "loss": 3.226265716552734, "step": 102790 }, { "epoch": 0.06266666666666666, "grad_norm": 0.17644354701042175, "learning_rate": 9.640762523241349e-05, "loss": 3.209980773925781, "step": 102800 }, { "epoch": 0.06273333333333334, "grad_norm": 0.20975664258003235, "learning_rate": 9.640352129357473e-05, "loss": 3.3789161682128905, "step": 102810 }, { "epoch": 0.0628, "grad_norm": 0.39406120777130127, "learning_rate": 9.639941509934958e-05, "loss": 3.2696212768554687, "step": 102820 }, { "epoch": 0.06286666666666667, "grad_norm": 0.17519865930080414, "learning_rate": 9.639530664993764e-05, "loss": 3.243638610839844, "step": 102830 }, { "epoch": 0.06293333333333333, "grad_norm": 0.17815159261226654, "learning_rate": 9.639119594553857e-05, "loss": 3.283892059326172, "step": 102840 }, { "epoch": 0.063, "grad_norm": 0.18523970246315002, "learning_rate": 9.638708298635219e-05, "loss": 3.1505245208740233, "step": 102850 }, { "epoch": 0.06306666666666666, "grad_norm": 0.7236111760139465, "learning_rate": 9.63829677725784e-05, "loss": 3.151352882385254, "step": 102860 }, { "epoch": 0.06313333333333333, "grad_norm": 0.20818550884723663, "learning_rate": 9.637885030441721e-05, "loss": 3.1938806533813477, "step": 102870 }, { "epoch": 0.0632, "grad_norm": 0.1938062608242035, "learning_rate": 9.637473058206874e-05, "loss": 3.2582836151123047, "step": 102880 }, { "epoch": 0.06326666666666667, "grad_norm": 0.1789773851633072, "learning_rate": 9.637060860573323e-05, "loss": 3.188027191162109, "step": 102890 }, { "epoch": 0.06333333333333334, "grad_norm": 0.41150569915771484, "learning_rate": 9.636648437561104e-05, "loss": 3.269758605957031, "step": 102900 }, { "epoch": 0.0634, "grad_norm": 0.18441098928451538, "learning_rate": 9.636235789190259e-05, "loss": 3.2807796478271483, "step": 102910 }, { "epoch": 0.06346666666666667, "grad_norm": 0.16094599664211273, "learning_rate": 9.635822915480848e-05, "loss": 3.2148406982421873, "step": 102920 }, { "epoch": 0.06353333333333333, "grad_norm": 0.163807213306427, "learning_rate": 9.635409816452935e-05, "loss": 3.266091537475586, "step": 102930 }, { "epoch": 0.0636, "grad_norm": 0.17370927333831787, "learning_rate": 9.6349964921266e-05, "loss": 3.2295406341552733, "step": 102940 }, { "epoch": 0.06366666666666666, "grad_norm": 0.18613697588443756, "learning_rate": 9.634582942521932e-05, "loss": 3.3133476257324217, "step": 102950 }, { "epoch": 0.06373333333333334, "grad_norm": 0.16380615532398224, "learning_rate": 9.634169167659029e-05, "loss": 3.262929153442383, "step": 102960 }, { "epoch": 0.0638, "grad_norm": 0.18773820996284485, "learning_rate": 9.633755167558004e-05, "loss": 3.1870975494384766, "step": 102970 }, { "epoch": 0.06386666666666667, "grad_norm": 0.18464328348636627, "learning_rate": 9.633340942238979e-05, "loss": 3.2227516174316406, "step": 102980 }, { "epoch": 0.06393333333333333, "grad_norm": 0.17796464264392853, "learning_rate": 9.632926491722086e-05, "loss": 3.2221282958984374, "step": 102990 }, { "epoch": 0.064, "grad_norm": 0.16514946520328522, "learning_rate": 9.632511816027469e-05, "loss": 3.2880130767822267, "step": 103000 }, { "epoch": 0.06406666666666666, "grad_norm": 0.17037451267242432, "learning_rate": 9.632096915175285e-05, "loss": 3.2097122192382814, "step": 103010 }, { "epoch": 0.06413333333333333, "grad_norm": 0.17305003106594086, "learning_rate": 9.631681789185696e-05, "loss": 3.1946903228759767, "step": 103020 }, { "epoch": 0.0642, "grad_norm": 0.1753523051738739, "learning_rate": 9.631266438078882e-05, "loss": 3.27197380065918, "step": 103030 }, { "epoch": 0.06426666666666667, "grad_norm": 0.16023029386997223, "learning_rate": 9.63085086187503e-05, "loss": 3.2021812438964843, "step": 103040 }, { "epoch": 0.06433333333333334, "grad_norm": 0.1612251251935959, "learning_rate": 9.630435060594334e-05, "loss": 3.2042209625244142, "step": 103050 }, { "epoch": 0.0644, "grad_norm": 0.1932184249162674, "learning_rate": 9.630019034257011e-05, "loss": 3.1796695709228517, "step": 103060 }, { "epoch": 0.06446666666666667, "grad_norm": 0.1877821981906891, "learning_rate": 9.629602782883276e-05, "loss": 3.2604965209960937, "step": 103070 }, { "epoch": 0.06453333333333333, "grad_norm": 0.18710412085056305, "learning_rate": 9.629186306493362e-05, "loss": 3.275337982177734, "step": 103080 }, { "epoch": 0.0646, "grad_norm": 0.17486907541751862, "learning_rate": 9.628769605107513e-05, "loss": 3.1965627670288086, "step": 103090 }, { "epoch": 0.06466666666666666, "grad_norm": 0.2251145839691162, "learning_rate": 9.62835267874598e-05, "loss": 3.2646045684814453, "step": 103100 }, { "epoch": 0.06473333333333334, "grad_norm": 0.1758144199848175, "learning_rate": 9.627935527429027e-05, "loss": 3.2227859497070312, "step": 103110 }, { "epoch": 0.0648, "grad_norm": 0.1710442155599594, "learning_rate": 9.62751815117693e-05, "loss": 3.201504135131836, "step": 103120 }, { "epoch": 0.06486666666666667, "grad_norm": 0.1731809675693512, "learning_rate": 9.627100550009978e-05, "loss": 3.2066982269287108, "step": 103130 }, { "epoch": 0.06493333333333333, "grad_norm": 0.17387251555919647, "learning_rate": 9.626682723948463e-05, "loss": 3.1791702270507813, "step": 103140 }, { "epoch": 0.065, "grad_norm": 0.18918782472610474, "learning_rate": 9.626264673012695e-05, "loss": 3.289244842529297, "step": 103150 }, { "epoch": 0.06506666666666666, "grad_norm": 0.1839744746685028, "learning_rate": 9.625846397222992e-05, "loss": 3.2481307983398438, "step": 103160 }, { "epoch": 0.06513333333333333, "grad_norm": 0.19222880899906158, "learning_rate": 9.625427896599686e-05, "loss": 4.246529006958008, "step": 103170 }, { "epoch": 0.0652, "grad_norm": 0.1702549010515213, "learning_rate": 9.625009171163118e-05, "loss": 3.2324451446533202, "step": 103180 }, { "epoch": 0.06526666666666667, "grad_norm": 0.18297582864761353, "learning_rate": 9.624590220933634e-05, "loss": 3.394380569458008, "step": 103190 }, { "epoch": 0.06533333333333333, "grad_norm": 0.16720061004161835, "learning_rate": 9.624171045931604e-05, "loss": 3.227935791015625, "step": 103200 }, { "epoch": 0.0654, "grad_norm": 0.19226297736167908, "learning_rate": 9.623751646177398e-05, "loss": 3.219175338745117, "step": 103210 }, { "epoch": 0.06546666666666667, "grad_norm": 0.17904411256313324, "learning_rate": 9.623332021691398e-05, "loss": 3.198894500732422, "step": 103220 }, { "epoch": 0.06553333333333333, "grad_norm": 0.18289881944656372, "learning_rate": 9.622912172494003e-05, "loss": 3.198000907897949, "step": 103230 }, { "epoch": 0.0656, "grad_norm": 0.18517141044139862, "learning_rate": 9.622492098605618e-05, "loss": 3.1975852966308596, "step": 103240 }, { "epoch": 0.06566666666666666, "grad_norm": 0.17026813328266144, "learning_rate": 9.622071800046661e-05, "loss": 3.2142051696777343, "step": 103250 }, { "epoch": 0.06573333333333334, "grad_norm": 0.1768302023410797, "learning_rate": 9.621651276837559e-05, "loss": 3.1795841217041017, "step": 103260 }, { "epoch": 0.0658, "grad_norm": 0.18778114020824432, "learning_rate": 9.621230528998751e-05, "loss": 3.2487674713134767, "step": 103270 }, { "epoch": 0.06586666666666667, "grad_norm": 0.17371942102909088, "learning_rate": 9.620809556550687e-05, "loss": 3.220105743408203, "step": 103280 }, { "epoch": 0.06593333333333333, "grad_norm": 0.16577200591564178, "learning_rate": 9.62038835951383e-05, "loss": 3.200623321533203, "step": 103290 }, { "epoch": 0.066, "grad_norm": 0.3165707290172577, "learning_rate": 9.61996693790865e-05, "loss": 3.2635250091552734, "step": 103300 }, { "epoch": 0.06606666666666666, "grad_norm": 0.1846824288368225, "learning_rate": 9.619545291755627e-05, "loss": 3.255442428588867, "step": 103310 }, { "epoch": 0.06613333333333334, "grad_norm": 0.17448779940605164, "learning_rate": 9.619123421075257e-05, "loss": 3.313571548461914, "step": 103320 }, { "epoch": 0.0662, "grad_norm": 0.1882205307483673, "learning_rate": 9.618701325888048e-05, "loss": 3.182765579223633, "step": 103330 }, { "epoch": 0.06626666666666667, "grad_norm": 0.20372430980205536, "learning_rate": 9.61827900621451e-05, "loss": 3.2510875701904296, "step": 103340 }, { "epoch": 0.06633333333333333, "grad_norm": 0.1806679666042328, "learning_rate": 9.617856462075172e-05, "loss": 3.2388126373291017, "step": 103350 }, { "epoch": 0.0664, "grad_norm": 0.19509285688400269, "learning_rate": 9.617433693490571e-05, "loss": 3.2034233093261717, "step": 103360 }, { "epoch": 0.06646666666666666, "grad_norm": 0.176870658993721, "learning_rate": 9.617010700481253e-05, "loss": 3.168494987487793, "step": 103370 }, { "epoch": 0.06653333333333333, "grad_norm": 0.17545194923877716, "learning_rate": 9.616587483067781e-05, "loss": 3.2269134521484375, "step": 103380 }, { "epoch": 0.0666, "grad_norm": 0.1763061285018921, "learning_rate": 9.616164041270723e-05, "loss": 3.410857009887695, "step": 103390 }, { "epoch": 0.06666666666666667, "grad_norm": 0.18675415217876434, "learning_rate": 9.615740375110658e-05, "loss": 3.219410705566406, "step": 103400 }, { "epoch": 0.06673333333333334, "grad_norm": 0.2138786017894745, "learning_rate": 9.615316484608181e-05, "loss": 3.238430404663086, "step": 103410 }, { "epoch": 0.0668, "grad_norm": 0.18453632295131683, "learning_rate": 9.614892369783892e-05, "loss": 3.2398128509521484, "step": 103420 }, { "epoch": 0.06686666666666667, "grad_norm": 0.17140458524227142, "learning_rate": 9.614468030658408e-05, "loss": 3.2692699432373047, "step": 103430 }, { "epoch": 0.06693333333333333, "grad_norm": 0.17366750538349152, "learning_rate": 9.614043467252349e-05, "loss": 3.200261688232422, "step": 103440 }, { "epoch": 0.067, "grad_norm": 0.17496447265148163, "learning_rate": 9.613618679586354e-05, "loss": 3.2075645446777346, "step": 103450 }, { "epoch": 0.06706666666666666, "grad_norm": 0.1925254762172699, "learning_rate": 9.613193667681066e-05, "loss": 3.2385700225830076, "step": 103460 }, { "epoch": 0.06713333333333334, "grad_norm": 0.19410157203674316, "learning_rate": 9.612768431557146e-05, "loss": 3.2959800720214845, "step": 103470 }, { "epoch": 0.0672, "grad_norm": 0.17732566595077515, "learning_rate": 9.61234297123526e-05, "loss": 3.221241760253906, "step": 103480 }, { "epoch": 0.06726666666666667, "grad_norm": 0.17975439131259918, "learning_rate": 9.611917286736088e-05, "loss": 3.2134654998779295, "step": 103490 }, { "epoch": 0.06733333333333333, "grad_norm": 0.19212664663791656, "learning_rate": 9.611491378080318e-05, "loss": 3.1767499923706053, "step": 103500 }, { "epoch": 0.0674, "grad_norm": 0.17872482538223267, "learning_rate": 9.61106524528865e-05, "loss": 3.1566009521484375, "step": 103510 }, { "epoch": 0.06746666666666666, "grad_norm": 0.18552570044994354, "learning_rate": 9.6106388883818e-05, "loss": 3.207967758178711, "step": 103520 }, { "epoch": 0.06753333333333333, "grad_norm": 0.18109670281410217, "learning_rate": 9.610212307380487e-05, "loss": 3.2828338623046873, "step": 103530 }, { "epoch": 0.0676, "grad_norm": 0.17156624794006348, "learning_rate": 9.609785502305446e-05, "loss": 3.155868148803711, "step": 103540 }, { "epoch": 0.06766666666666667, "grad_norm": 0.17508213222026825, "learning_rate": 9.60935847317742e-05, "loss": 3.236589050292969, "step": 103550 }, { "epoch": 0.06773333333333334, "grad_norm": 0.166759192943573, "learning_rate": 9.608931220017166e-05, "loss": 3.203769302368164, "step": 103560 }, { "epoch": 0.0678, "grad_norm": 0.17823083698749542, "learning_rate": 9.60850374284545e-05, "loss": 3.2152000427246095, "step": 103570 }, { "epoch": 0.06786666666666667, "grad_norm": 0.19545812904834747, "learning_rate": 9.608076041683047e-05, "loss": 3.1754236221313477, "step": 103580 }, { "epoch": 0.06793333333333333, "grad_norm": 0.18023699522018433, "learning_rate": 9.607648116550744e-05, "loss": 3.2332298278808596, "step": 103590 }, { "epoch": 0.068, "grad_norm": 0.16794560849666595, "learning_rate": 9.607219967469346e-05, "loss": 3.326029968261719, "step": 103600 }, { "epoch": 0.06806666666666666, "grad_norm": 1.0636405944824219, "learning_rate": 9.606791594459656e-05, "loss": 3.3367839813232423, "step": 103610 }, { "epoch": 0.06813333333333334, "grad_norm": 0.16972681879997253, "learning_rate": 9.606362997542497e-05, "loss": 3.43426628112793, "step": 103620 }, { "epoch": 0.0682, "grad_norm": 0.17763224244117737, "learning_rate": 9.605934176738702e-05, "loss": 3.1794322967529296, "step": 103630 }, { "epoch": 0.06826666666666667, "grad_norm": 0.27817845344543457, "learning_rate": 9.60550513206911e-05, "loss": 3.2320110321044924, "step": 103640 }, { "epoch": 0.06833333333333333, "grad_norm": 0.17546257376670837, "learning_rate": 9.605075863554579e-05, "loss": 3.256380081176758, "step": 103650 }, { "epoch": 0.0684, "grad_norm": 0.4330294728279114, "learning_rate": 9.604646371215966e-05, "loss": 3.3231510162353515, "step": 103660 }, { "epoch": 0.06846666666666666, "grad_norm": 0.16944079101085663, "learning_rate": 9.604216655074151e-05, "loss": 3.43016357421875, "step": 103670 }, { "epoch": 0.06853333333333333, "grad_norm": 0.18852224946022034, "learning_rate": 9.60378671515002e-05, "loss": 3.2603851318359376, "step": 103680 }, { "epoch": 0.0686, "grad_norm": 0.1799336075782776, "learning_rate": 9.60335655146447e-05, "loss": 3.2705345153808594, "step": 103690 }, { "epoch": 0.06866666666666667, "grad_norm": 0.17418649792671204, "learning_rate": 9.602926164038405e-05, "loss": 3.234897994995117, "step": 103700 }, { "epoch": 0.06873333333333333, "grad_norm": 0.21487200260162354, "learning_rate": 9.602495552892745e-05, "loss": 3.3171718597412108, "step": 103710 }, { "epoch": 0.0688, "grad_norm": 0.18209777772426605, "learning_rate": 9.602064718048423e-05, "loss": 3.4334789276123048, "step": 103720 }, { "epoch": 0.06886666666666667, "grad_norm": 0.17250029742717743, "learning_rate": 9.601633659526373e-05, "loss": 3.23535041809082, "step": 103730 }, { "epoch": 0.06893333333333333, "grad_norm": 0.17271699011325836, "learning_rate": 9.60120237734755e-05, "loss": 3.2523075103759767, "step": 103740 }, { "epoch": 0.069, "grad_norm": 0.1680082231760025, "learning_rate": 9.600770871532916e-05, "loss": 3.2957389831542967, "step": 103750 }, { "epoch": 0.06906666666666667, "grad_norm": 0.24388667941093445, "learning_rate": 9.600339142103441e-05, "loss": 3.259154510498047, "step": 103760 }, { "epoch": 0.06913333333333334, "grad_norm": 0.18223494291305542, "learning_rate": 9.599907189080113e-05, "loss": 3.2524391174316407, "step": 103770 }, { "epoch": 0.0692, "grad_norm": 0.1887563318014145, "learning_rate": 9.599475012483923e-05, "loss": 3.3216575622558593, "step": 103780 }, { "epoch": 0.06926666666666667, "grad_norm": 0.30939915776252747, "learning_rate": 9.599042612335877e-05, "loss": 3.3030818939208983, "step": 103790 }, { "epoch": 0.06933333333333333, "grad_norm": 0.19167962670326233, "learning_rate": 9.598609988656992e-05, "loss": 3.2458499908447265, "step": 103800 }, { "epoch": 0.0694, "grad_norm": 0.17220568656921387, "learning_rate": 9.598177141468296e-05, "loss": 3.1692310333251954, "step": 103810 }, { "epoch": 0.06946666666666666, "grad_norm": 0.1960216909646988, "learning_rate": 9.597744070790826e-05, "loss": 3.219563293457031, "step": 103820 }, { "epoch": 0.06953333333333334, "grad_norm": 0.29524192214012146, "learning_rate": 9.597310776645629e-05, "loss": 3.300878143310547, "step": 103830 }, { "epoch": 0.0696, "grad_norm": 0.1816003918647766, "learning_rate": 9.596877259053769e-05, "loss": 3.2980026245117187, "step": 103840 }, { "epoch": 0.06966666666666667, "grad_norm": 0.1706310212612152, "learning_rate": 9.596443518036313e-05, "loss": 3.2486602783203127, "step": 103850 }, { "epoch": 0.06973333333333333, "grad_norm": 0.18831536173820496, "learning_rate": 9.596009553614345e-05, "loss": 3.2845073699951173, "step": 103860 }, { "epoch": 0.0698, "grad_norm": 0.1753290295600891, "learning_rate": 9.595575365808953e-05, "loss": 3.273313522338867, "step": 103870 }, { "epoch": 0.06986666666666666, "grad_norm": 0.1853184700012207, "learning_rate": 9.595140954641247e-05, "loss": 3.2448246002197267, "step": 103880 }, { "epoch": 0.06993333333333333, "grad_norm": 0.18545493483543396, "learning_rate": 9.594706320132333e-05, "loss": 3.2158134460449217, "step": 103890 }, { "epoch": 0.07, "grad_norm": 0.17957569658756256, "learning_rate": 9.594271462303342e-05, "loss": 3.2409019470214844, "step": 103900 }, { "epoch": 0.07006666666666667, "grad_norm": 0.3266254663467407, "learning_rate": 9.593836381175408e-05, "loss": 3.337383270263672, "step": 103910 }, { "epoch": 0.07013333333333334, "grad_norm": 0.17881178855895996, "learning_rate": 9.593401076769679e-05, "loss": 3.242548370361328, "step": 103920 }, { "epoch": 0.0702, "grad_norm": 0.2730664610862732, "learning_rate": 9.592965549107308e-05, "loss": 3.2602500915527344, "step": 103930 }, { "epoch": 0.07026666666666667, "grad_norm": 0.18765117228031158, "learning_rate": 9.592529798209466e-05, "loss": 3.2608985900878906, "step": 103940 }, { "epoch": 0.07033333333333333, "grad_norm": 0.1810206174850464, "learning_rate": 9.592093824097335e-05, "loss": 3.296438980102539, "step": 103950 }, { "epoch": 0.0704, "grad_norm": 0.19060072302818298, "learning_rate": 9.5916576267921e-05, "loss": 3.2518875122070314, "step": 103960 }, { "epoch": 0.07046666666666666, "grad_norm": 0.1931489259004593, "learning_rate": 9.591221206314965e-05, "loss": 3.2466373443603516, "step": 103970 }, { "epoch": 0.07053333333333334, "grad_norm": 0.1643364131450653, "learning_rate": 9.59078456268714e-05, "loss": 3.2824466705322264, "step": 103980 }, { "epoch": 0.0706, "grad_norm": 0.9946746230125427, "learning_rate": 9.590347695929849e-05, "loss": 3.336544418334961, "step": 103990 }, { "epoch": 0.07066666666666667, "grad_norm": 0.23042508959770203, "learning_rate": 9.589910606064323e-05, "loss": 3.4883384704589844, "step": 104000 }, { "epoch": 0.07073333333333333, "grad_norm": 0.19064530730247498, "learning_rate": 9.589473293111809e-05, "loss": 3.2707229614257813, "step": 104010 }, { "epoch": 0.0708, "grad_norm": 0.18008007109165192, "learning_rate": 9.589035757093561e-05, "loss": 3.2288848876953127, "step": 104020 }, { "epoch": 0.07086666666666666, "grad_norm": 0.18725116550922394, "learning_rate": 9.588597998030844e-05, "loss": 3.296216583251953, "step": 104030 }, { "epoch": 0.07093333333333333, "grad_norm": 0.18550488352775574, "learning_rate": 9.588160015944939e-05, "loss": 3.279119873046875, "step": 104040 }, { "epoch": 0.071, "grad_norm": 0.16400296986103058, "learning_rate": 9.587721810857126e-05, "loss": 3.2855293273925783, "step": 104050 }, { "epoch": 0.07106666666666667, "grad_norm": 0.2572838068008423, "learning_rate": 9.587283382788709e-05, "loss": 3.3394611358642576, "step": 104060 }, { "epoch": 0.07113333333333334, "grad_norm": 0.1971157193183899, "learning_rate": 9.586844731760995e-05, "loss": 3.2548709869384767, "step": 104070 }, { "epoch": 0.0712, "grad_norm": 0.173478901386261, "learning_rate": 9.586405857795306e-05, "loss": 3.300617218017578, "step": 104080 }, { "epoch": 0.07126666666666667, "grad_norm": 0.16661249101161957, "learning_rate": 9.585966760912973e-05, "loss": 3.2724964141845705, "step": 104090 }, { "epoch": 0.07133333333333333, "grad_norm": 0.2127007097005844, "learning_rate": 9.585527441135334e-05, "loss": 3.2620948791503905, "step": 104100 }, { "epoch": 0.0714, "grad_norm": 0.2016989290714264, "learning_rate": 9.585087898483746e-05, "loss": 3.457813262939453, "step": 104110 }, { "epoch": 0.07146666666666666, "grad_norm": 0.18986405432224274, "learning_rate": 9.58464813297957e-05, "loss": 3.2532196044921875, "step": 104120 }, { "epoch": 0.07153333333333334, "grad_norm": 0.1818903088569641, "learning_rate": 9.58420814464418e-05, "loss": 3.261556625366211, "step": 104130 }, { "epoch": 0.0716, "grad_norm": 0.1746755838394165, "learning_rate": 9.583767933498964e-05, "loss": 3.2200271606445314, "step": 104140 }, { "epoch": 0.07166666666666667, "grad_norm": 0.21430960297584534, "learning_rate": 9.583327499565315e-05, "loss": 3.3041248321533203, "step": 104150 }, { "epoch": 0.07173333333333333, "grad_norm": 0.1874508261680603, "learning_rate": 9.58288684286464e-05, "loss": 3.289046859741211, "step": 104160 }, { "epoch": 0.0718, "grad_norm": 0.1600426882505417, "learning_rate": 9.582445963418359e-05, "loss": 3.273111343383789, "step": 104170 }, { "epoch": 0.07186666666666666, "grad_norm": 0.18449214100837708, "learning_rate": 9.582004861247897e-05, "loss": 3.2267822265625, "step": 104180 }, { "epoch": 0.07193333333333334, "grad_norm": 0.18376202881336212, "learning_rate": 9.581563536374694e-05, "loss": 3.2144546508789062, "step": 104190 }, { "epoch": 0.072, "grad_norm": 0.2142259180545807, "learning_rate": 9.581121988820203e-05, "loss": 3.228171539306641, "step": 104200 }, { "epoch": 0.07206666666666667, "grad_norm": 0.27300629019737244, "learning_rate": 9.580680218605881e-05, "loss": 3.596132278442383, "step": 104210 }, { "epoch": 0.07213333333333333, "grad_norm": 0.3733060359954834, "learning_rate": 9.580238225753204e-05, "loss": 3.2789966583251955, "step": 104220 }, { "epoch": 0.0722, "grad_norm": 0.17282719910144806, "learning_rate": 9.57979601028365e-05, "loss": 3.3007049560546875, "step": 104230 }, { "epoch": 0.07226666666666667, "grad_norm": 0.25795161724090576, "learning_rate": 9.579353572218716e-05, "loss": 3.3210548400878905, "step": 104240 }, { "epoch": 0.07233333333333333, "grad_norm": 0.18702687323093414, "learning_rate": 9.578910911579904e-05, "loss": 3.2821197509765625, "step": 104250 }, { "epoch": 0.0724, "grad_norm": 0.18487998843193054, "learning_rate": 9.578468028388729e-05, "loss": 3.232279968261719, "step": 104260 }, { "epoch": 0.07246666666666667, "grad_norm": 0.2070157676935196, "learning_rate": 9.578024922666717e-05, "loss": 3.2749462127685547, "step": 104270 }, { "epoch": 0.07253333333333334, "grad_norm": 0.5809149146080017, "learning_rate": 9.577581594435406e-05, "loss": 3.328290557861328, "step": 104280 }, { "epoch": 0.0726, "grad_norm": 0.1749495565891266, "learning_rate": 9.577138043716342e-05, "loss": 3.254985046386719, "step": 104290 }, { "epoch": 0.07266666666666667, "grad_norm": 0.16799384355545044, "learning_rate": 9.576694270531083e-05, "loss": 3.2917835235595705, "step": 104300 }, { "epoch": 0.07273333333333333, "grad_norm": 0.1776348203420639, "learning_rate": 9.5762502749012e-05, "loss": 3.3126197814941407, "step": 104310 }, { "epoch": 0.0728, "grad_norm": 0.18124128878116608, "learning_rate": 9.575806056848271e-05, "loss": 3.250205230712891, "step": 104320 }, { "epoch": 0.07286666666666666, "grad_norm": 0.19141705334186554, "learning_rate": 9.575361616393887e-05, "loss": 3.284702682495117, "step": 104330 }, { "epoch": 0.07293333333333334, "grad_norm": 0.18322788178920746, "learning_rate": 9.574916953559649e-05, "loss": 3.2574111938476564, "step": 104340 }, { "epoch": 0.073, "grad_norm": 0.19036279618740082, "learning_rate": 9.574472068367171e-05, "loss": 3.193827247619629, "step": 104350 }, { "epoch": 0.07306666666666667, "grad_norm": 0.22443412244319916, "learning_rate": 9.574026960838075e-05, "loss": 3.354604721069336, "step": 104360 }, { "epoch": 0.07313333333333333, "grad_norm": 0.16642139852046967, "learning_rate": 9.573581630993995e-05, "loss": 3.3459991455078124, "step": 104370 }, { "epoch": 0.0732, "grad_norm": 0.18798373639583588, "learning_rate": 9.573136078856575e-05, "loss": 3.238303375244141, "step": 104380 }, { "epoch": 0.07326666666666666, "grad_norm": 0.16786393523216248, "learning_rate": 9.572690304447471e-05, "loss": 3.2358875274658203, "step": 104390 }, { "epoch": 0.07333333333333333, "grad_norm": 0.16350562870502472, "learning_rate": 9.572244307788352e-05, "loss": 3.234714889526367, "step": 104400 }, { "epoch": 0.0734, "grad_norm": 0.16234862804412842, "learning_rate": 9.57179808890089e-05, "loss": 3.226913833618164, "step": 104410 }, { "epoch": 0.07346666666666667, "grad_norm": 0.7597988247871399, "learning_rate": 9.571351647806776e-05, "loss": 2.938052177429199, "step": 104420 }, { "epoch": 0.07353333333333334, "grad_norm": 0.18713238835334778, "learning_rate": 9.570904984527709e-05, "loss": 3.265636444091797, "step": 104430 }, { "epoch": 0.0736, "grad_norm": 0.1807105988264084, "learning_rate": 9.570458099085398e-05, "loss": 3.3065601348876954, "step": 104440 }, { "epoch": 0.07366666666666667, "grad_norm": 0.1824122965335846, "learning_rate": 9.570010991501563e-05, "loss": 3.253279113769531, "step": 104450 }, { "epoch": 0.07373333333333333, "grad_norm": 0.18600958585739136, "learning_rate": 9.569563661797935e-05, "loss": 3.2684661865234377, "step": 104460 }, { "epoch": 0.0738, "grad_norm": 0.17206113040447235, "learning_rate": 9.569116109996256e-05, "loss": 3.2823272705078126, "step": 104470 }, { "epoch": 0.07386666666666666, "grad_norm": 0.22594507038593292, "learning_rate": 9.568668336118278e-05, "loss": 3.230610656738281, "step": 104480 }, { "epoch": 0.07393333333333334, "grad_norm": 0.18189068138599396, "learning_rate": 9.568220340185768e-05, "loss": 3.29044189453125, "step": 104490 }, { "epoch": 0.074, "grad_norm": 0.16147229075431824, "learning_rate": 9.567772122220495e-05, "loss": 3.2405750274658205, "step": 104500 }, { "epoch": 0.07406666666666667, "grad_norm": 0.19718214869499207, "learning_rate": 9.567323682244248e-05, "loss": 3.2257339477539064, "step": 104510 }, { "epoch": 0.07413333333333333, "grad_norm": 0.1904936134815216, "learning_rate": 9.566875020278822e-05, "loss": 3.465182876586914, "step": 104520 }, { "epoch": 0.0742, "grad_norm": 0.17245836555957794, "learning_rate": 9.566426136346022e-05, "loss": 3.3208179473876953, "step": 104530 }, { "epoch": 0.07426666666666666, "grad_norm": 0.18593056499958038, "learning_rate": 9.565977030467668e-05, "loss": 3.2805084228515624, "step": 104540 }, { "epoch": 0.07433333333333333, "grad_norm": 0.1919175088405609, "learning_rate": 9.565527702665587e-05, "loss": 3.2116725921630858, "step": 104550 }, { "epoch": 0.0744, "grad_norm": 0.1729540377855301, "learning_rate": 9.565078152961618e-05, "loss": 3.2589694976806642, "step": 104560 }, { "epoch": 0.07446666666666667, "grad_norm": 0.16869677603244781, "learning_rate": 9.56462838137761e-05, "loss": 3.236023712158203, "step": 104570 }, { "epoch": 0.07453333333333333, "grad_norm": 0.1807403862476349, "learning_rate": 9.564178387935425e-05, "loss": 3.2539928436279295, "step": 104580 }, { "epoch": 0.0746, "grad_norm": 0.19483767449855804, "learning_rate": 9.563728172656934e-05, "loss": 3.2843158721923826, "step": 104590 }, { "epoch": 0.07466666666666667, "grad_norm": 0.16414682567119598, "learning_rate": 9.563277735564018e-05, "loss": 3.2530048370361326, "step": 104600 }, { "epoch": 0.07473333333333333, "grad_norm": 0.17174701392650604, "learning_rate": 9.562827076678572e-05, "loss": 3.2954750061035156, "step": 104610 }, { "epoch": 0.0748, "grad_norm": 0.1698242872953415, "learning_rate": 9.562376196022498e-05, "loss": 3.2893924713134766, "step": 104620 }, { "epoch": 0.07486666666666666, "grad_norm": 0.18751581013202667, "learning_rate": 9.561925093617712e-05, "loss": 3.278823471069336, "step": 104630 }, { "epoch": 0.07493333333333334, "grad_norm": 0.2033078521490097, "learning_rate": 9.561473769486138e-05, "loss": 3.2243511199951174, "step": 104640 }, { "epoch": 0.075, "grad_norm": 0.17238447070121765, "learning_rate": 9.561022223649712e-05, "loss": 3.2133033752441404, "step": 104650 }, { "epoch": 0.07506666666666667, "grad_norm": 0.17938381433486938, "learning_rate": 9.560570456130382e-05, "loss": 3.2306884765625, "step": 104660 }, { "epoch": 0.07513333333333333, "grad_norm": 0.19667799770832062, "learning_rate": 9.560118466950105e-05, "loss": 3.225042724609375, "step": 104670 }, { "epoch": 0.0752, "grad_norm": 0.18446427583694458, "learning_rate": 9.559666256130848e-05, "loss": 3.2688247680664064, "step": 104680 }, { "epoch": 0.07526666666666666, "grad_norm": 0.42311593890190125, "learning_rate": 9.559213823694593e-05, "loss": 3.4373592376708983, "step": 104690 }, { "epoch": 0.07533333333333334, "grad_norm": 0.17362354695796967, "learning_rate": 9.558761169663328e-05, "loss": 3.270587158203125, "step": 104700 }, { "epoch": 0.0754, "grad_norm": 0.17564018070697784, "learning_rate": 9.558308294059054e-05, "loss": 3.360302734375, "step": 104710 }, { "epoch": 0.07546666666666667, "grad_norm": 0.1744222193956375, "learning_rate": 9.557855196903782e-05, "loss": 3.2840927124023436, "step": 104720 }, { "epoch": 0.07553333333333333, "grad_norm": 0.18626278638839722, "learning_rate": 9.557401878219537e-05, "loss": 3.2657958984375, "step": 104730 }, { "epoch": 0.0756, "grad_norm": 0.18402379751205444, "learning_rate": 9.556948338028348e-05, "loss": 3.1266496658325194, "step": 104740 }, { "epoch": 0.07566666666666666, "grad_norm": 0.17413465678691864, "learning_rate": 9.55649457635226e-05, "loss": 3.2065498352050783, "step": 104750 }, { "epoch": 0.07573333333333333, "grad_norm": 0.22483965754508972, "learning_rate": 9.556040593213328e-05, "loss": 3.2719036102294923, "step": 104760 }, { "epoch": 0.0758, "grad_norm": 0.17098771035671234, "learning_rate": 9.55558638863362e-05, "loss": 3.254182815551758, "step": 104770 }, { "epoch": 0.07586666666666667, "grad_norm": 0.17474229633808136, "learning_rate": 9.555131962635206e-05, "loss": 3.2698135375976562, "step": 104780 }, { "epoch": 0.07593333333333334, "grad_norm": 0.169650599360466, "learning_rate": 9.554677315240178e-05, "loss": 3.2651603698730467, "step": 104790 }, { "epoch": 0.076, "grad_norm": 0.18649093806743622, "learning_rate": 9.554222446470632e-05, "loss": 3.217290496826172, "step": 104800 }, { "epoch": 0.07606666666666667, "grad_norm": 0.1752753108739853, "learning_rate": 9.553767356348677e-05, "loss": 3.2471435546875, "step": 104810 }, { "epoch": 0.07613333333333333, "grad_norm": 0.5352494120597839, "learning_rate": 9.55331204489643e-05, "loss": 3.2508922576904298, "step": 104820 }, { "epoch": 0.0762, "grad_norm": 0.22728674113750458, "learning_rate": 9.552856512136021e-05, "loss": 3.3102378845214844, "step": 104830 }, { "epoch": 0.07626666666666666, "grad_norm": 0.16458432376384735, "learning_rate": 9.552400758089594e-05, "loss": 3.264999771118164, "step": 104840 }, { "epoch": 0.07633333333333334, "grad_norm": 0.179599329829216, "learning_rate": 9.551944782779296e-05, "loss": 3.243885040283203, "step": 104850 }, { "epoch": 0.0764, "grad_norm": 0.1681358814239502, "learning_rate": 9.551488586227294e-05, "loss": 3.234059143066406, "step": 104860 }, { "epoch": 0.07646666666666667, "grad_norm": 0.17391042411327362, "learning_rate": 9.551032168455756e-05, "loss": 3.2527355194091796, "step": 104870 }, { "epoch": 0.07653333333333333, "grad_norm": 0.18539975583553314, "learning_rate": 9.550575529486867e-05, "loss": 3.257044219970703, "step": 104880 }, { "epoch": 0.0766, "grad_norm": 0.1764518767595291, "learning_rate": 9.550118669342825e-05, "loss": 3.289204788208008, "step": 104890 }, { "epoch": 0.07666666666666666, "grad_norm": 0.1865970641374588, "learning_rate": 9.54966158804583e-05, "loss": 3.2388885498046873, "step": 104900 }, { "epoch": 0.07673333333333333, "grad_norm": 0.20839735865592957, "learning_rate": 9.5492042856181e-05, "loss": 3.240564727783203, "step": 104910 }, { "epoch": 0.0768, "grad_norm": 0.1886114776134491, "learning_rate": 9.548746762081863e-05, "loss": 3.2396900177001955, "step": 104920 }, { "epoch": 0.07686666666666667, "grad_norm": 0.18824899196624756, "learning_rate": 9.548289017459355e-05, "loss": 3.2984897613525392, "step": 104930 }, { "epoch": 0.07693333333333334, "grad_norm": 0.16267135739326477, "learning_rate": 9.547831051772823e-05, "loss": 3.2399871826171873, "step": 104940 }, { "epoch": 0.077, "grad_norm": 0.17010027170181274, "learning_rate": 9.547372865044528e-05, "loss": 3.2356422424316404, "step": 104950 }, { "epoch": 0.07706666666666667, "grad_norm": 0.16866494715213776, "learning_rate": 9.546914457296738e-05, "loss": 3.292099380493164, "step": 104960 }, { "epoch": 0.07713333333333333, "grad_norm": 0.19142548739910126, "learning_rate": 9.546455828551735e-05, "loss": 3.160141944885254, "step": 104970 }, { "epoch": 0.0772, "grad_norm": 0.18838684260845184, "learning_rate": 9.54599697883181e-05, "loss": 3.2982322692871096, "step": 104980 }, { "epoch": 0.07726666666666666, "grad_norm": 0.1702679991722107, "learning_rate": 9.545537908159261e-05, "loss": 3.2336936950683595, "step": 104990 }, { "epoch": 0.07733333333333334, "grad_norm": 0.23448461294174194, "learning_rate": 9.545078616556406e-05, "loss": 3.1953758239746093, "step": 105000 }, { "epoch": 0.0774, "grad_norm": 0.16919133067131042, "learning_rate": 9.544619104045565e-05, "loss": 3.215805435180664, "step": 105010 }, { "epoch": 0.07746666666666667, "grad_norm": 0.22399018704891205, "learning_rate": 9.544159370649073e-05, "loss": 3.2653568267822264, "step": 105020 }, { "epoch": 0.07753333333333333, "grad_norm": 0.18972884118556976, "learning_rate": 9.543699416389276e-05, "loss": 3.2743556976318358, "step": 105030 }, { "epoch": 0.0776, "grad_norm": 0.19205254316329956, "learning_rate": 9.543239241288526e-05, "loss": 3.248259735107422, "step": 105040 }, { "epoch": 0.07766666666666666, "grad_norm": 0.16889232397079468, "learning_rate": 9.542778845369192e-05, "loss": 3.1537269592285155, "step": 105050 }, { "epoch": 0.07773333333333333, "grad_norm": 0.16967330873012543, "learning_rate": 9.54231822865365e-05, "loss": 3.2150222778320314, "step": 105060 }, { "epoch": 0.0778, "grad_norm": 0.19127407670021057, "learning_rate": 9.54185739116429e-05, "loss": 3.2893802642822267, "step": 105070 }, { "epoch": 0.07786666666666667, "grad_norm": 0.17100760340690613, "learning_rate": 9.541396332923507e-05, "loss": 3.245082473754883, "step": 105080 }, { "epoch": 0.07793333333333333, "grad_norm": 0.1979450136423111, "learning_rate": 9.540935053953713e-05, "loss": 3.3681560516357423, "step": 105090 }, { "epoch": 0.078, "grad_norm": 0.19073964655399323, "learning_rate": 9.540473554277325e-05, "loss": 3.2612743377685547, "step": 105100 }, { "epoch": 0.07806666666666667, "grad_norm": 0.16988885402679443, "learning_rate": 9.540011833916777e-05, "loss": 3.2788227081298826, "step": 105110 }, { "epoch": 0.07813333333333333, "grad_norm": 0.1673768013715744, "learning_rate": 9.539549892894506e-05, "loss": 3.2335433959960938, "step": 105120 }, { "epoch": 0.0782, "grad_norm": 0.20793947577476501, "learning_rate": 9.53908773123297e-05, "loss": 3.266001892089844, "step": 105130 }, { "epoch": 0.07826666666666666, "grad_norm": 0.19362354278564453, "learning_rate": 9.538625348954626e-05, "loss": 3.2456005096435545, "step": 105140 }, { "epoch": 0.07833333333333334, "grad_norm": 0.17328383028507233, "learning_rate": 9.538162746081951e-05, "loss": 3.233335494995117, "step": 105150 }, { "epoch": 0.0784, "grad_norm": 0.19509351253509521, "learning_rate": 9.537699922637428e-05, "loss": 3.22266845703125, "step": 105160 }, { "epoch": 0.07846666666666667, "grad_norm": 0.18734419345855713, "learning_rate": 9.537236878643553e-05, "loss": 3.342510223388672, "step": 105170 }, { "epoch": 0.07853333333333333, "grad_norm": 0.19318532943725586, "learning_rate": 9.536773614122831e-05, "loss": 3.2832561492919923, "step": 105180 }, { "epoch": 0.0786, "grad_norm": 0.18289333581924438, "learning_rate": 9.536310129097776e-05, "loss": 3.27650146484375, "step": 105190 }, { "epoch": 0.07866666666666666, "grad_norm": 0.1575540453195572, "learning_rate": 9.53584642359092e-05, "loss": 3.222021484375, "step": 105200 }, { "epoch": 0.07873333333333334, "grad_norm": 0.18135282397270203, "learning_rate": 9.535382497624798e-05, "loss": 3.2280223846435545, "step": 105210 }, { "epoch": 0.0788, "grad_norm": 0.1706049144268036, "learning_rate": 9.534918351221958e-05, "loss": 3.24902458190918, "step": 105220 }, { "epoch": 0.07886666666666667, "grad_norm": 0.17318592965602875, "learning_rate": 9.534453984404959e-05, "loss": 3.215680694580078, "step": 105230 }, { "epoch": 0.07893333333333333, "grad_norm": 0.16937319934368134, "learning_rate": 9.533989397196375e-05, "loss": 3.2385398864746096, "step": 105240 }, { "epoch": 0.079, "grad_norm": 0.17280720174312592, "learning_rate": 9.53352458961878e-05, "loss": 3.2544551849365235, "step": 105250 }, { "epoch": 0.07906666666666666, "grad_norm": 0.18932600319385529, "learning_rate": 9.533059561694773e-05, "loss": 3.2223743438720702, "step": 105260 }, { "epoch": 0.07913333333333333, "grad_norm": 0.1711229681968689, "learning_rate": 9.53259431344695e-05, "loss": 3.3224369049072267, "step": 105270 }, { "epoch": 0.0792, "grad_norm": 0.17960943281650543, "learning_rate": 9.532128844897928e-05, "loss": 3.380161666870117, "step": 105280 }, { "epoch": 0.07926666666666667, "grad_norm": 0.1798854023218155, "learning_rate": 9.531663156070328e-05, "loss": 3.2185443878173827, "step": 105290 }, { "epoch": 0.07933333333333334, "grad_norm": 0.22125165164470673, "learning_rate": 9.531197246986783e-05, "loss": 3.2053607940673827, "step": 105300 }, { "epoch": 0.0794, "grad_norm": 0.1939435452222824, "learning_rate": 9.530731117669941e-05, "loss": 3.222276306152344, "step": 105310 }, { "epoch": 0.07946666666666667, "grad_norm": 0.18355141580104828, "learning_rate": 9.530264768142456e-05, "loss": 3.358594512939453, "step": 105320 }, { "epoch": 0.07953333333333333, "grad_norm": 0.16745807230472565, "learning_rate": 9.529798198426996e-05, "loss": 3.2652664184570312, "step": 105330 }, { "epoch": 0.0796, "grad_norm": 0.18737535178661346, "learning_rate": 9.529331408546236e-05, "loss": 3.304409408569336, "step": 105340 }, { "epoch": 0.07966666666666666, "grad_norm": 0.1655913144350052, "learning_rate": 9.528864398522865e-05, "loss": 3.2832141876220704, "step": 105350 }, { "epoch": 0.07973333333333334, "grad_norm": 0.20039567351341248, "learning_rate": 9.528397168379582e-05, "loss": 3.215257263183594, "step": 105360 }, { "epoch": 0.0798, "grad_norm": 0.1688700169324875, "learning_rate": 9.527929718139094e-05, "loss": 3.1856159210205077, "step": 105370 }, { "epoch": 0.07986666666666667, "grad_norm": 0.1823968142271042, "learning_rate": 9.527462047824122e-05, "loss": 3.2909549713134765, "step": 105380 }, { "epoch": 0.07993333333333333, "grad_norm": 0.21341656148433685, "learning_rate": 9.526994157457398e-05, "loss": 3.2788909912109374, "step": 105390 }, { "epoch": 0.08, "grad_norm": 0.17018529772758484, "learning_rate": 9.52652604706166e-05, "loss": 3.299388122558594, "step": 105400 }, { "epoch": 0.08006666666666666, "grad_norm": 0.18481507897377014, "learning_rate": 9.526057716659663e-05, "loss": 3.2422412872314452, "step": 105410 }, { "epoch": 0.08013333333333333, "grad_norm": 0.17850187420845032, "learning_rate": 9.525589166274167e-05, "loss": 3.2116355895996094, "step": 105420 }, { "epoch": 0.0802, "grad_norm": 0.17312517762184143, "learning_rate": 9.525120395927949e-05, "loss": 3.2149589538574217, "step": 105430 }, { "epoch": 0.08026666666666667, "grad_norm": 0.16819007694721222, "learning_rate": 9.524651405643788e-05, "loss": 3.239655685424805, "step": 105440 }, { "epoch": 0.08033333333333334, "grad_norm": 0.17067621648311615, "learning_rate": 9.524182195444485e-05, "loss": 3.212749481201172, "step": 105450 }, { "epoch": 0.0804, "grad_norm": 0.16195814311504364, "learning_rate": 9.52371276535284e-05, "loss": 3.2273277282714843, "step": 105460 }, { "epoch": 0.08046666666666667, "grad_norm": 0.20392252504825592, "learning_rate": 9.523243115391669e-05, "loss": 3.252925491333008, "step": 105470 }, { "epoch": 0.08053333333333333, "grad_norm": 0.17169851064682007, "learning_rate": 9.522773245583802e-05, "loss": 3.2644073486328127, "step": 105480 }, { "epoch": 0.0806, "grad_norm": 0.1827407330274582, "learning_rate": 9.522303155952076e-05, "loss": 3.2574699401855467, "step": 105490 }, { "epoch": 0.08066666666666666, "grad_norm": 0.18752069771289825, "learning_rate": 9.521832846519337e-05, "loss": 3.225633239746094, "step": 105500 }, { "epoch": 0.08073333333333334, "grad_norm": 0.1657979041337967, "learning_rate": 9.521362317308446e-05, "loss": 3.2537521362304687, "step": 105510 }, { "epoch": 0.0808, "grad_norm": 0.1755383312702179, "learning_rate": 9.520891568342272e-05, "loss": 3.3080459594726563, "step": 105520 }, { "epoch": 0.08086666666666667, "grad_norm": 0.17602203786373138, "learning_rate": 9.520420599643694e-05, "loss": 3.262152099609375, "step": 105530 }, { "epoch": 0.08093333333333333, "grad_norm": 0.2159615457057953, "learning_rate": 9.519949411235602e-05, "loss": 3.253015899658203, "step": 105540 }, { "epoch": 0.081, "grad_norm": 0.17209331691265106, "learning_rate": 9.519478003140902e-05, "loss": 3.247713088989258, "step": 105550 }, { "epoch": 0.08106666666666666, "grad_norm": 0.18219690024852753, "learning_rate": 9.519006375382502e-05, "loss": 3.2508270263671877, "step": 105560 }, { "epoch": 0.08113333333333334, "grad_norm": 0.1587882786989212, "learning_rate": 9.518534527983327e-05, "loss": 3.247677230834961, "step": 105570 }, { "epoch": 0.0812, "grad_norm": 0.16423359513282776, "learning_rate": 9.518062460966308e-05, "loss": 3.221887969970703, "step": 105580 }, { "epoch": 0.08126666666666667, "grad_norm": 0.17331062257289886, "learning_rate": 9.517590174354391e-05, "loss": 3.181007957458496, "step": 105590 }, { "epoch": 0.08133333333333333, "grad_norm": 0.20870816707611084, "learning_rate": 9.517117668170533e-05, "loss": 3.194619369506836, "step": 105600 }, { "epoch": 0.0814, "grad_norm": 0.16957353055477142, "learning_rate": 9.516644942437696e-05, "loss": 3.144313430786133, "step": 105610 }, { "epoch": 0.08146666666666667, "grad_norm": 0.19391357898712158, "learning_rate": 9.51617199717886e-05, "loss": 3.221304702758789, "step": 105620 }, { "epoch": 0.08153333333333333, "grad_norm": 0.1715494841337204, "learning_rate": 9.515698832417007e-05, "loss": 3.2435317993164063, "step": 105630 }, { "epoch": 0.0816, "grad_norm": 0.17386266589164734, "learning_rate": 9.515225448175139e-05, "loss": 3.246813201904297, "step": 105640 }, { "epoch": 0.08166666666666667, "grad_norm": 0.16692717373371124, "learning_rate": 9.514751844476261e-05, "loss": 3.236827850341797, "step": 105650 }, { "epoch": 0.08173333333333334, "grad_norm": 0.22002141177654266, "learning_rate": 9.514278021343394e-05, "loss": 3.2431640625, "step": 105660 }, { "epoch": 0.0818, "grad_norm": 0.17640328407287598, "learning_rate": 9.513803978799569e-05, "loss": 3.2286350250244142, "step": 105670 }, { "epoch": 0.08186666666666667, "grad_norm": 0.18189987540245056, "learning_rate": 9.513329716867823e-05, "loss": 3.2583282470703123, "step": 105680 }, { "epoch": 0.08193333333333333, "grad_norm": 0.4795152246952057, "learning_rate": 9.512855235571207e-05, "loss": 3.298502731323242, "step": 105690 }, { "epoch": 0.082, "grad_norm": 0.188132643699646, "learning_rate": 9.512380534932785e-05, "loss": 3.2853282928466796, "step": 105700 }, { "epoch": 0.08206666666666666, "grad_norm": 0.17392274737358093, "learning_rate": 9.51190561497563e-05, "loss": 3.272445297241211, "step": 105710 }, { "epoch": 0.08213333333333334, "grad_norm": 0.18580321967601776, "learning_rate": 9.51143047572282e-05, "loss": 3.275166702270508, "step": 105720 }, { "epoch": 0.0822, "grad_norm": 0.18472038209438324, "learning_rate": 9.510955117197452e-05, "loss": 3.227761077880859, "step": 105730 }, { "epoch": 0.08226666666666667, "grad_norm": 0.19168642163276672, "learning_rate": 9.51047953942263e-05, "loss": 3.287099075317383, "step": 105740 }, { "epoch": 0.08233333333333333, "grad_norm": 0.18474842607975006, "learning_rate": 9.51000374242147e-05, "loss": 3.232560729980469, "step": 105750 }, { "epoch": 0.0824, "grad_norm": 0.16973643004894257, "learning_rate": 9.509527726217096e-05, "loss": 3.2212974548339846, "step": 105760 }, { "epoch": 0.08246666666666666, "grad_norm": 0.20722751319408417, "learning_rate": 9.509051490832643e-05, "loss": 3.283345413208008, "step": 105770 }, { "epoch": 0.08253333333333333, "grad_norm": 0.1840304136276245, "learning_rate": 9.50857503629126e-05, "loss": 3.302739715576172, "step": 105780 }, { "epoch": 0.0826, "grad_norm": 0.1799536645412445, "learning_rate": 9.508098362616102e-05, "loss": 3.235676956176758, "step": 105790 }, { "epoch": 0.08266666666666667, "grad_norm": 0.16596052050590515, "learning_rate": 9.507621469830342e-05, "loss": 3.201326370239258, "step": 105800 }, { "epoch": 0.08273333333333334, "grad_norm": 0.17189835011959076, "learning_rate": 9.507144357957154e-05, "loss": 3.237584686279297, "step": 105810 }, { "epoch": 0.0828, "grad_norm": 0.18697744607925415, "learning_rate": 9.506667027019728e-05, "loss": 3.1942859649658204, "step": 105820 }, { "epoch": 0.08286666666666667, "grad_norm": 0.2001459151506424, "learning_rate": 9.506189477041267e-05, "loss": 3.258147430419922, "step": 105830 }, { "epoch": 0.08293333333333333, "grad_norm": 0.2114773690700531, "learning_rate": 9.50571170804498e-05, "loss": 3.2456268310546874, "step": 105840 }, { "epoch": 0.083, "grad_norm": 0.1901511400938034, "learning_rate": 9.505233720054087e-05, "loss": 3.233293914794922, "step": 105850 }, { "epoch": 0.08306666666666666, "grad_norm": 0.17879270017147064, "learning_rate": 9.50475551309182e-05, "loss": 3.301906204223633, "step": 105860 }, { "epoch": 0.08313333333333334, "grad_norm": 0.17531050741672516, "learning_rate": 9.504277087181426e-05, "loss": 3.2139339447021484, "step": 105870 }, { "epoch": 0.0832, "grad_norm": 0.18858902156352997, "learning_rate": 9.503798442346154e-05, "loss": 3.248404693603516, "step": 105880 }, { "epoch": 0.08326666666666667, "grad_norm": 0.1629534363746643, "learning_rate": 9.503319578609268e-05, "loss": 3.2229682922363283, "step": 105890 }, { "epoch": 0.08333333333333333, "grad_norm": 0.17964892089366913, "learning_rate": 9.502840495994046e-05, "loss": 3.2886451721191405, "step": 105900 }, { "epoch": 0.0834, "grad_norm": 0.1743745058774948, "learning_rate": 9.50236119452377e-05, "loss": 3.23173828125, "step": 105910 }, { "epoch": 0.08346666666666666, "grad_norm": 0.18096038699150085, "learning_rate": 9.501881674221736e-05, "loss": 3.236540603637695, "step": 105920 }, { "epoch": 0.08353333333333333, "grad_norm": 0.189070463180542, "learning_rate": 9.501401935111253e-05, "loss": 3.2342674255371096, "step": 105930 }, { "epoch": 0.0836, "grad_norm": 0.18141844868659973, "learning_rate": 9.500921977215635e-05, "loss": 3.2335845947265627, "step": 105940 }, { "epoch": 0.08366666666666667, "grad_norm": 0.19833621382713318, "learning_rate": 9.500441800558213e-05, "loss": 3.2577590942382812, "step": 105950 }, { "epoch": 0.08373333333333334, "grad_norm": 0.19845394790172577, "learning_rate": 9.49996140516232e-05, "loss": 3.255332183837891, "step": 105960 }, { "epoch": 0.0838, "grad_norm": 0.17912058532238007, "learning_rate": 9.499480791051313e-05, "loss": 3.194189453125, "step": 105970 }, { "epoch": 0.08386666666666667, "grad_norm": 0.18199682235717773, "learning_rate": 9.498999958248546e-05, "loss": 3.2616573333740235, "step": 105980 }, { "epoch": 0.08393333333333333, "grad_norm": 0.19343431293964386, "learning_rate": 9.498518906777389e-05, "loss": 3.173295593261719, "step": 105990 }, { "epoch": 0.084, "grad_norm": 0.17379596829414368, "learning_rate": 9.498037636661228e-05, "loss": 3.229732894897461, "step": 106000 }, { "epoch": 0.08406666666666666, "grad_norm": 0.18641693890094757, "learning_rate": 9.497556147923448e-05, "loss": 3.211537551879883, "step": 106010 }, { "epoch": 0.08413333333333334, "grad_norm": 0.18123702704906464, "learning_rate": 9.497074440587456e-05, "loss": 3.207011032104492, "step": 106020 }, { "epoch": 0.0842, "grad_norm": 0.16698431968688965, "learning_rate": 9.49659251467666e-05, "loss": 3.2607154846191406, "step": 106030 }, { "epoch": 0.08426666666666667, "grad_norm": 0.17267490923404694, "learning_rate": 9.496110370214491e-05, "loss": 3.31190185546875, "step": 106040 }, { "epoch": 0.08433333333333333, "grad_norm": 0.16868606209754944, "learning_rate": 9.495628007224375e-05, "loss": 3.2002933502197264, "step": 106050 }, { "epoch": 0.0844, "grad_norm": 0.18253079056739807, "learning_rate": 9.495145425729764e-05, "loss": 3.1966926574707033, "step": 106060 }, { "epoch": 0.08446666666666666, "grad_norm": 0.16859881579875946, "learning_rate": 9.494662625754106e-05, "loss": 3.353255844116211, "step": 106070 }, { "epoch": 0.08453333333333334, "grad_norm": 0.17098337411880493, "learning_rate": 9.494179607320872e-05, "loss": 3.2222431182861326, "step": 106080 }, { "epoch": 0.0846, "grad_norm": 0.34647631645202637, "learning_rate": 9.493696370453535e-05, "loss": 3.2280696868896483, "step": 106090 }, { "epoch": 0.08466666666666667, "grad_norm": 0.1710755079984665, "learning_rate": 9.493212915175586e-05, "loss": 3.1738265991210937, "step": 106100 }, { "epoch": 0.08473333333333333, "grad_norm": 0.2445901483297348, "learning_rate": 9.492729241510521e-05, "loss": 3.23153076171875, "step": 106110 }, { "epoch": 0.0848, "grad_norm": 0.1866326779127121, "learning_rate": 9.492245349481848e-05, "loss": 3.2204212188720702, "step": 106120 }, { "epoch": 0.08486666666666667, "grad_norm": 0.17494656145572662, "learning_rate": 9.491761239113085e-05, "loss": 3.2349266052246093, "step": 106130 }, { "epoch": 0.08493333333333333, "grad_norm": 0.19977618753910065, "learning_rate": 9.491276910427764e-05, "loss": 3.2264862060546875, "step": 106140 }, { "epoch": 0.085, "grad_norm": 0.19955949485301971, "learning_rate": 9.490792363449424e-05, "loss": 3.1959508895874023, "step": 106150 }, { "epoch": 0.08506666666666667, "grad_norm": 0.1895098090171814, "learning_rate": 9.490307598201613e-05, "loss": 3.2397342681884767, "step": 106160 }, { "epoch": 0.08513333333333334, "grad_norm": 0.17948780953884125, "learning_rate": 9.489822614707897e-05, "loss": 3.305650329589844, "step": 106170 }, { "epoch": 0.0852, "grad_norm": 0.16857461631298065, "learning_rate": 9.489337412991848e-05, "loss": 3.280976486206055, "step": 106180 }, { "epoch": 0.08526666666666667, "grad_norm": 0.17448216676712036, "learning_rate": 9.488851993077045e-05, "loss": 3.2048870086669923, "step": 106190 }, { "epoch": 0.08533333333333333, "grad_norm": 0.1799643635749817, "learning_rate": 9.488366354987084e-05, "loss": 3.210471343994141, "step": 106200 }, { "epoch": 0.0854, "grad_norm": 0.18519675731658936, "learning_rate": 9.487880498745566e-05, "loss": 3.209813690185547, "step": 106210 }, { "epoch": 0.08546666666666666, "grad_norm": 0.17470605671405792, "learning_rate": 9.487394424376109e-05, "loss": 3.2405536651611326, "step": 106220 }, { "epoch": 0.08553333333333334, "grad_norm": 0.17532169818878174, "learning_rate": 9.486908131902336e-05, "loss": 3.2757171630859374, "step": 106230 }, { "epoch": 0.0856, "grad_norm": 0.18069204688072205, "learning_rate": 9.486421621347883e-05, "loss": 3.2237125396728517, "step": 106240 }, { "epoch": 0.08566666666666667, "grad_norm": 0.18036822974681854, "learning_rate": 9.485934892736397e-05, "loss": 3.2261260986328124, "step": 106250 }, { "epoch": 0.08573333333333333, "grad_norm": 0.18268969655036926, "learning_rate": 9.485447946091533e-05, "loss": 3.288920211791992, "step": 106260 }, { "epoch": 0.0858, "grad_norm": 0.1707400679588318, "learning_rate": 9.484960781436961e-05, "loss": 3.226581573486328, "step": 106270 }, { "epoch": 0.08586666666666666, "grad_norm": 0.16069364547729492, "learning_rate": 9.484473398796356e-05, "loss": 3.2291465759277345, "step": 106280 }, { "epoch": 0.08593333333333333, "grad_norm": 0.1694745123386383, "learning_rate": 9.48398579819341e-05, "loss": 3.1910755157470705, "step": 106290 }, { "epoch": 0.086, "grad_norm": 0.18905480206012726, "learning_rate": 9.483497979651819e-05, "loss": 3.2084903717041016, "step": 106300 }, { "epoch": 0.08606666666666667, "grad_norm": 0.18031416833400726, "learning_rate": 9.483009943195296e-05, "loss": 3.2135848999023438, "step": 106310 }, { "epoch": 0.08613333333333334, "grad_norm": 0.16255618631839752, "learning_rate": 9.482521688847559e-05, "loss": 3.1906824111938477, "step": 106320 }, { "epoch": 0.0862, "grad_norm": 0.17979420721530914, "learning_rate": 9.48203321663234e-05, "loss": 3.204943084716797, "step": 106330 }, { "epoch": 0.08626666666666667, "grad_norm": 0.18252816796302795, "learning_rate": 9.481544526573381e-05, "loss": 3.2337535858154296, "step": 106340 }, { "epoch": 0.08633333333333333, "grad_norm": 0.18388260900974274, "learning_rate": 9.481055618694433e-05, "loss": 3.1742036819458006, "step": 106350 }, { "epoch": 0.0864, "grad_norm": 0.4240354299545288, "learning_rate": 9.480566493019258e-05, "loss": 3.3653087615966797, "step": 106360 }, { "epoch": 0.08646666666666666, "grad_norm": 0.1864461749792099, "learning_rate": 9.480077149571631e-05, "loss": 3.279418182373047, "step": 106370 }, { "epoch": 0.08653333333333334, "grad_norm": 0.16763785481452942, "learning_rate": 9.479587588375336e-05, "loss": 3.2485374450683593, "step": 106380 }, { "epoch": 0.0866, "grad_norm": 0.17914314568042755, "learning_rate": 9.479097809454167e-05, "loss": 3.176812171936035, "step": 106390 }, { "epoch": 0.08666666666666667, "grad_norm": 0.17615285515785217, "learning_rate": 9.47860781283193e-05, "loss": 3.2891891479492186, "step": 106400 }, { "epoch": 0.08673333333333333, "grad_norm": 0.17619982361793518, "learning_rate": 9.47811759853244e-05, "loss": 3.233002471923828, "step": 106410 }, { "epoch": 0.0868, "grad_norm": 0.19470208883285522, "learning_rate": 9.477627166579524e-05, "loss": 3.4394561767578127, "step": 106420 }, { "epoch": 0.08686666666666666, "grad_norm": 0.19808389246463776, "learning_rate": 9.477136516997017e-05, "loss": 3.2830432891845702, "step": 106430 }, { "epoch": 0.08693333333333333, "grad_norm": 0.19141662120819092, "learning_rate": 9.476645649808767e-05, "loss": 3.2244247436523437, "step": 106440 }, { "epoch": 0.087, "grad_norm": 0.16887471079826355, "learning_rate": 9.476154565038632e-05, "loss": 3.2435375213623048, "step": 106450 }, { "epoch": 0.08706666666666667, "grad_norm": 0.17270001769065857, "learning_rate": 9.475663262710481e-05, "loss": 3.2241722106933595, "step": 106460 }, { "epoch": 0.08713333333333333, "grad_norm": 0.18566039204597473, "learning_rate": 9.475171742848194e-05, "loss": 3.2230167388916016, "step": 106470 }, { "epoch": 0.0872, "grad_norm": 0.17937760055065155, "learning_rate": 9.47468000547566e-05, "loss": 3.263715362548828, "step": 106480 }, { "epoch": 0.08726666666666667, "grad_norm": 0.16925020515918732, "learning_rate": 9.47418805061678e-05, "loss": 3.241045379638672, "step": 106490 }, { "epoch": 0.08733333333333333, "grad_norm": 0.17863613367080688, "learning_rate": 9.473695878295464e-05, "loss": 3.2574718475341795, "step": 106500 }, { "epoch": 0.0874, "grad_norm": 0.17741219699382782, "learning_rate": 9.473203488535631e-05, "loss": 3.213343048095703, "step": 106510 }, { "epoch": 0.08746666666666666, "grad_norm": 0.18949927389621735, "learning_rate": 9.472710881361218e-05, "loss": 3.273731231689453, "step": 106520 }, { "epoch": 0.08753333333333334, "grad_norm": 0.19603842496871948, "learning_rate": 9.472218056796164e-05, "loss": 3.258753204345703, "step": 106530 }, { "epoch": 0.0876, "grad_norm": 0.17424413561820984, "learning_rate": 9.471725014864424e-05, "loss": 3.2081588745117187, "step": 106540 }, { "epoch": 0.08766666666666667, "grad_norm": 0.17299388349056244, "learning_rate": 9.47123175558996e-05, "loss": 3.2108123779296873, "step": 106550 }, { "epoch": 0.08773333333333333, "grad_norm": 0.4882325232028961, "learning_rate": 9.470738278996748e-05, "loss": 3.332597351074219, "step": 106560 }, { "epoch": 0.0878, "grad_norm": 0.18348225951194763, "learning_rate": 9.47024458510877e-05, "loss": 3.264744186401367, "step": 106570 }, { "epoch": 0.08786666666666666, "grad_norm": 0.17102870345115662, "learning_rate": 9.469750673950025e-05, "loss": 3.2082851409912108, "step": 106580 }, { "epoch": 0.08793333333333334, "grad_norm": 0.17728173732757568, "learning_rate": 9.469256545544518e-05, "loss": 3.2062244415283203, "step": 106590 }, { "epoch": 0.088, "grad_norm": 24.65404510498047, "learning_rate": 9.468762199916263e-05, "loss": 3.661563491821289, "step": 106600 }, { "epoch": 0.08806666666666667, "grad_norm": 0.17172840237617493, "learning_rate": 9.46826763708929e-05, "loss": 3.4302310943603516, "step": 106610 }, { "epoch": 0.08813333333333333, "grad_norm": 0.2243252843618393, "learning_rate": 9.467772857087636e-05, "loss": 3.2259841918945313, "step": 106620 }, { "epoch": 0.0882, "grad_norm": 0.17258265614509583, "learning_rate": 9.467277859935348e-05, "loss": 3.202710723876953, "step": 106630 }, { "epoch": 0.08826666666666666, "grad_norm": 2.3277833461761475, "learning_rate": 9.466782645656485e-05, "loss": 3.1065521240234375, "step": 106640 }, { "epoch": 0.08833333333333333, "grad_norm": 0.388022243976593, "learning_rate": 9.466287214275118e-05, "loss": 2.314573287963867, "step": 106650 }, { "epoch": 0.0884, "grad_norm": 1.09025239944458, "learning_rate": 9.465791565815325e-05, "loss": 2.330702781677246, "step": 106660 }, { "epoch": 0.08846666666666667, "grad_norm": 0.4901118576526642, "learning_rate": 9.465295700301197e-05, "loss": 2.2171512603759767, "step": 106670 }, { "epoch": 0.08853333333333334, "grad_norm": 0.5984647274017334, "learning_rate": 9.464799617756835e-05, "loss": 2.372441291809082, "step": 106680 }, { "epoch": 0.0886, "grad_norm": 0.206095889210701, "learning_rate": 9.46430331820635e-05, "loss": 2.874693489074707, "step": 106690 }, { "epoch": 0.08866666666666667, "grad_norm": 0.20015880465507507, "learning_rate": 9.463806801673864e-05, "loss": 3.2949268341064455, "step": 106700 }, { "epoch": 0.08873333333333333, "grad_norm": 0.17337417602539062, "learning_rate": 9.46331006818351e-05, "loss": 3.327831268310547, "step": 106710 }, { "epoch": 0.0888, "grad_norm": 0.3450412452220917, "learning_rate": 9.462813117759433e-05, "loss": 3.4042728424072264, "step": 106720 }, { "epoch": 0.08886666666666666, "grad_norm": 0.3109739422798157, "learning_rate": 9.462315950425782e-05, "loss": 3.2701751708984377, "step": 106730 }, { "epoch": 0.08893333333333334, "grad_norm": 0.17139877378940582, "learning_rate": 9.461818566206726e-05, "loss": 3.2719139099121093, "step": 106740 }, { "epoch": 0.089, "grad_norm": 0.3862695097923279, "learning_rate": 9.461320965126436e-05, "loss": 3.2805992126464845, "step": 106750 }, { "epoch": 0.08906666666666667, "grad_norm": 0.19807898998260498, "learning_rate": 9.4608231472091e-05, "loss": 3.338426208496094, "step": 106760 }, { "epoch": 0.08913333333333333, "grad_norm": 0.18818332254886627, "learning_rate": 9.460325112478912e-05, "loss": 3.2397266387939454, "step": 106770 }, { "epoch": 0.0892, "grad_norm": 0.17732356488704681, "learning_rate": 9.459826860960082e-05, "loss": 3.24115104675293, "step": 106780 }, { "epoch": 0.08926666666666666, "grad_norm": 0.19964580237865448, "learning_rate": 9.459328392676821e-05, "loss": 3.2994052886962892, "step": 106790 }, { "epoch": 0.08933333333333333, "grad_norm": 0.17197377979755402, "learning_rate": 9.45882970765336e-05, "loss": 3.26002197265625, "step": 106800 }, { "epoch": 0.0894, "grad_norm": 0.1673242747783661, "learning_rate": 9.458330805913938e-05, "loss": 3.248828887939453, "step": 106810 }, { "epoch": 0.08946666666666667, "grad_norm": 0.16908836364746094, "learning_rate": 9.457831687482801e-05, "loss": 3.2866561889648436, "step": 106820 }, { "epoch": 0.08953333333333334, "grad_norm": 0.17830009758472443, "learning_rate": 9.457332352384209e-05, "loss": 3.26134033203125, "step": 106830 }, { "epoch": 0.0896, "grad_norm": 0.18573597073554993, "learning_rate": 9.456832800642433e-05, "loss": 3.2605533599853516, "step": 106840 }, { "epoch": 0.08966666666666667, "grad_norm": 0.1688704639673233, "learning_rate": 9.45633303228175e-05, "loss": 3.2302757263183595, "step": 106850 }, { "epoch": 0.08973333333333333, "grad_norm": 0.1724659502506256, "learning_rate": 9.455833047326453e-05, "loss": 3.235346221923828, "step": 106860 }, { "epoch": 0.0898, "grad_norm": 0.17335253953933716, "learning_rate": 9.455332845800843e-05, "loss": 3.191029739379883, "step": 106870 }, { "epoch": 0.08986666666666666, "grad_norm": 0.17962448298931122, "learning_rate": 9.45483242772923e-05, "loss": 3.2553348541259766, "step": 106880 }, { "epoch": 0.08993333333333334, "grad_norm": 0.16744931042194366, "learning_rate": 9.45433179313594e-05, "loss": 3.331866455078125, "step": 106890 }, { "epoch": 0.09, "grad_norm": 0.173506498336792, "learning_rate": 9.453830942045301e-05, "loss": 3.237092208862305, "step": 106900 }, { "epoch": 0.09006666666666667, "grad_norm": 0.1680525690317154, "learning_rate": 9.453329874481659e-05, "loss": 3.2051021575927736, "step": 106910 }, { "epoch": 0.09013333333333333, "grad_norm": 0.17091573774814606, "learning_rate": 9.452828590469366e-05, "loss": 3.238759994506836, "step": 106920 }, { "epoch": 0.0902, "grad_norm": 0.3177240490913391, "learning_rate": 9.45232709003279e-05, "loss": 3.302663803100586, "step": 106930 }, { "epoch": 0.09026666666666666, "grad_norm": 0.19517776370048523, "learning_rate": 9.451825373196302e-05, "loss": 3.2390510559082033, "step": 106940 }, { "epoch": 0.09033333333333333, "grad_norm": 0.17960330843925476, "learning_rate": 9.451323439984287e-05, "loss": 3.2275970458984373, "step": 106950 }, { "epoch": 0.0904, "grad_norm": 0.16342419385910034, "learning_rate": 9.450821290421146e-05, "loss": 3.2614826202392577, "step": 106960 }, { "epoch": 0.09046666666666667, "grad_norm": 0.1623694747686386, "learning_rate": 9.450318924531278e-05, "loss": 3.2619949340820313, "step": 106970 }, { "epoch": 0.09053333333333333, "grad_norm": 0.17109732329845428, "learning_rate": 9.449816342339106e-05, "loss": 3.1762876510620117, "step": 106980 }, { "epoch": 0.0906, "grad_norm": 0.1888803243637085, "learning_rate": 9.449313543869055e-05, "loss": 3.2448497772216798, "step": 106990 }, { "epoch": 0.09066666666666667, "grad_norm": 0.1747397631406784, "learning_rate": 9.448810529145564e-05, "loss": 3.2329994201660157, "step": 107000 }, { "epoch": 0.09073333333333333, "grad_norm": 0.18217921257019043, "learning_rate": 9.448307298193078e-05, "loss": 3.1979326248168944, "step": 107010 }, { "epoch": 0.0908, "grad_norm": 0.1802927553653717, "learning_rate": 9.447803851036062e-05, "loss": 3.2066722869873048, "step": 107020 }, { "epoch": 0.09086666666666667, "grad_norm": 0.1835036426782608, "learning_rate": 9.447300187698978e-05, "loss": 3.280942916870117, "step": 107030 }, { "epoch": 0.09093333333333334, "grad_norm": 0.1753305345773697, "learning_rate": 9.446796308206312e-05, "loss": 3.238352966308594, "step": 107040 }, { "epoch": 0.091, "grad_norm": 0.23360174894332886, "learning_rate": 9.44629221258255e-05, "loss": 3.1054868698120117, "step": 107050 }, { "epoch": 0.09106666666666667, "grad_norm": 0.18287959694862366, "learning_rate": 9.445787900852197e-05, "loss": 3.2303226470947264, "step": 107060 }, { "epoch": 0.09113333333333333, "grad_norm": 0.15951858460903168, "learning_rate": 9.445283373039761e-05, "loss": 3.1762767791748048, "step": 107070 }, { "epoch": 0.0912, "grad_norm": 0.1710486263036728, "learning_rate": 9.444778629169769e-05, "loss": 3.205057144165039, "step": 107080 }, { "epoch": 0.09126666666666666, "grad_norm": 0.31740063428878784, "learning_rate": 9.444273669266747e-05, "loss": 3.269954299926758, "step": 107090 }, { "epoch": 0.09133333333333334, "grad_norm": 0.18571870028972626, "learning_rate": 9.443768493355241e-05, "loss": 3.230540084838867, "step": 107100 }, { "epoch": 0.0914, "grad_norm": 0.19004711508750916, "learning_rate": 9.443263101459806e-05, "loss": 3.2293983459472657, "step": 107110 }, { "epoch": 0.09146666666666667, "grad_norm": 0.1677931547164917, "learning_rate": 9.442757493605003e-05, "loss": 3.2166984558105467, "step": 107120 }, { "epoch": 0.09153333333333333, "grad_norm": 0.187294602394104, "learning_rate": 9.442251669815407e-05, "loss": 3.2285541534423827, "step": 107130 }, { "epoch": 0.0916, "grad_norm": 0.20275968313217163, "learning_rate": 9.441745630115605e-05, "loss": 3.4212615966796873, "step": 107140 }, { "epoch": 0.09166666666666666, "grad_norm": 0.17821156978607178, "learning_rate": 9.441239374530192e-05, "loss": 3.2455112457275392, "step": 107150 }, { "epoch": 0.09173333333333333, "grad_norm": 0.18831168115139008, "learning_rate": 9.440732903083774e-05, "loss": 3.287977600097656, "step": 107160 }, { "epoch": 0.0918, "grad_norm": 0.1836439073085785, "learning_rate": 9.440226215800964e-05, "loss": 3.194653129577637, "step": 107170 }, { "epoch": 0.09186666666666667, "grad_norm": 0.20249606668949127, "learning_rate": 9.439719312706393e-05, "loss": 3.486019515991211, "step": 107180 }, { "epoch": 0.09193333333333334, "grad_norm": 0.1746971309185028, "learning_rate": 9.439212193824696e-05, "loss": 3.213307189941406, "step": 107190 }, { "epoch": 0.092, "grad_norm": 0.16892847418785095, "learning_rate": 9.438704859180522e-05, "loss": 3.2335296630859376, "step": 107200 }, { "epoch": 0.09206666666666667, "grad_norm": 0.1980552226305008, "learning_rate": 9.438197308798529e-05, "loss": 3.349812316894531, "step": 107210 }, { "epoch": 0.09213333333333333, "grad_norm": 3.1546471118927, "learning_rate": 9.437689542703388e-05, "loss": 3.1353288650512696, "step": 107220 }, { "epoch": 0.0922, "grad_norm": 0.19826437532901764, "learning_rate": 9.437181560919774e-05, "loss": 3.229267120361328, "step": 107230 }, { "epoch": 0.09226666666666666, "grad_norm": 0.18420256674289703, "learning_rate": 9.43667336347238e-05, "loss": 3.2216331481933596, "step": 107240 }, { "epoch": 0.09233333333333334, "grad_norm": 0.17882132530212402, "learning_rate": 9.436164950385905e-05, "loss": 3.2315792083740233, "step": 107250 }, { "epoch": 0.0924, "grad_norm": 0.1965702623128891, "learning_rate": 9.435656321685062e-05, "loss": 3.213526153564453, "step": 107260 }, { "epoch": 0.09246666666666667, "grad_norm": 0.4190846085548401, "learning_rate": 9.435147477394568e-05, "loss": 3.254383850097656, "step": 107270 }, { "epoch": 0.09253333333333333, "grad_norm": 0.20501193404197693, "learning_rate": 9.43463841753916e-05, "loss": 3.369354248046875, "step": 107280 }, { "epoch": 0.0926, "grad_norm": 0.1668914407491684, "learning_rate": 9.434129142143575e-05, "loss": 3.204964447021484, "step": 107290 }, { "epoch": 0.09266666666666666, "grad_norm": 0.20278973877429962, "learning_rate": 9.43361965123257e-05, "loss": 3.273732376098633, "step": 107300 }, { "epoch": 0.09273333333333333, "grad_norm": 0.17464156448841095, "learning_rate": 9.433109944830907e-05, "loss": 3.1920276641845704, "step": 107310 }, { "epoch": 0.0928, "grad_norm": 0.17039746046066284, "learning_rate": 9.432600022963358e-05, "loss": 3.219076919555664, "step": 107320 }, { "epoch": 0.09286666666666667, "grad_norm": 0.36445993185043335, "learning_rate": 9.432089885654709e-05, "loss": 3.3181106567382814, "step": 107330 }, { "epoch": 0.09293333333333334, "grad_norm": 0.16572542488574982, "learning_rate": 9.431579532929753e-05, "loss": 3.2241947174072267, "step": 107340 }, { "epoch": 0.093, "grad_norm": 0.16817046701908112, "learning_rate": 9.431068964813296e-05, "loss": 3.2168193817138673, "step": 107350 }, { "epoch": 0.09306666666666667, "grad_norm": 0.17175619304180145, "learning_rate": 9.430558181330156e-05, "loss": 3.2352603912353515, "step": 107360 }, { "epoch": 0.09313333333333333, "grad_norm": 0.16946673393249512, "learning_rate": 9.430047182505152e-05, "loss": 3.2261116027832033, "step": 107370 }, { "epoch": 0.0932, "grad_norm": 0.45790916681289673, "learning_rate": 9.429535968363129e-05, "loss": 3.245159912109375, "step": 107380 }, { "epoch": 0.09326666666666666, "grad_norm": 0.19904600083827972, "learning_rate": 9.429024538928927e-05, "loss": 3.2438274383544923, "step": 107390 }, { "epoch": 0.09333333333333334, "grad_norm": 0.17115525901317596, "learning_rate": 9.428512894227408e-05, "loss": 3.2642539978027343, "step": 107400 }, { "epoch": 0.0934, "grad_norm": 0.17783567309379578, "learning_rate": 9.428001034283438e-05, "loss": 3.2471500396728517, "step": 107410 }, { "epoch": 0.09346666666666667, "grad_norm": 0.1791919320821762, "learning_rate": 9.427488959121895e-05, "loss": 3.2789886474609373, "step": 107420 }, { "epoch": 0.09353333333333333, "grad_norm": 0.18195098638534546, "learning_rate": 9.426976668767669e-05, "loss": 3.2555702209472654, "step": 107430 }, { "epoch": 0.0936, "grad_norm": 0.19330552220344543, "learning_rate": 9.426464163245659e-05, "loss": 3.2573688507080076, "step": 107440 }, { "epoch": 0.09366666666666666, "grad_norm": 0.18244734406471252, "learning_rate": 9.425951442580772e-05, "loss": 3.239005279541016, "step": 107450 }, { "epoch": 0.09373333333333334, "grad_norm": 0.22869043052196503, "learning_rate": 9.425438506797933e-05, "loss": 3.1862239837646484, "step": 107460 }, { "epoch": 0.0938, "grad_norm": 0.46305879950523376, "learning_rate": 9.424925355922067e-05, "loss": 3.284545135498047, "step": 107470 }, { "epoch": 0.09386666666666667, "grad_norm": 0.21011953055858612, "learning_rate": 9.424411989978119e-05, "loss": 3.2100372314453125, "step": 107480 }, { "epoch": 0.09393333333333333, "grad_norm": 0.17093853652477264, "learning_rate": 9.42389840899104e-05, "loss": 3.1927207946777343, "step": 107490 }, { "epoch": 0.094, "grad_norm": 0.18140485882759094, "learning_rate": 9.423384612985793e-05, "loss": 3.251308822631836, "step": 107500 }, { "epoch": 0.09406666666666667, "grad_norm": 1.1364858150482178, "learning_rate": 9.422870601987348e-05, "loss": 3.2466510772705077, "step": 107510 }, { "epoch": 0.09413333333333333, "grad_norm": 0.2349347323179245, "learning_rate": 9.422356376020689e-05, "loss": 3.252573013305664, "step": 107520 }, { "epoch": 0.0942, "grad_norm": 0.18135644495487213, "learning_rate": 9.421841935110808e-05, "loss": 3.35919189453125, "step": 107530 }, { "epoch": 0.09426666666666667, "grad_norm": 0.17257235944271088, "learning_rate": 9.42132727928271e-05, "loss": 3.2143295288085936, "step": 107540 }, { "epoch": 0.09433333333333334, "grad_norm": 0.16180352866649628, "learning_rate": 9.42081240856141e-05, "loss": 3.2647045135498045, "step": 107550 }, { "epoch": 0.0944, "grad_norm": 0.19004657864570618, "learning_rate": 9.420297322971933e-05, "loss": 3.258927917480469, "step": 107560 }, { "epoch": 0.09446666666666667, "grad_norm": 0.2296941578388214, "learning_rate": 9.419782022539312e-05, "loss": 3.2038040161132812, "step": 107570 }, { "epoch": 0.09453333333333333, "grad_norm": 0.19567178189754486, "learning_rate": 9.419266507288593e-05, "loss": 3.172968864440918, "step": 107580 }, { "epoch": 0.0946, "grad_norm": 0.16847699880599976, "learning_rate": 9.418750777244833e-05, "loss": 3.3198978424072267, "step": 107590 }, { "epoch": 0.09466666666666666, "grad_norm": 0.1855407953262329, "learning_rate": 9.418234832433097e-05, "loss": 3.2243412017822264, "step": 107600 }, { "epoch": 0.09473333333333334, "grad_norm": 0.16964814066886902, "learning_rate": 9.417718672878463e-05, "loss": 3.2745925903320314, "step": 107610 }, { "epoch": 0.0948, "grad_norm": 0.1661946326494217, "learning_rate": 9.417202298606019e-05, "loss": 3.28997688293457, "step": 107620 }, { "epoch": 0.09486666666666667, "grad_norm": 0.3136678636074066, "learning_rate": 9.416685709640861e-05, "loss": 3.4081077575683594, "step": 107630 }, { "epoch": 0.09493333333333333, "grad_norm": 0.3405991494655609, "learning_rate": 9.416168906008098e-05, "loss": 3.333623504638672, "step": 107640 }, { "epoch": 0.095, "grad_norm": 0.16664300858974457, "learning_rate": 9.41565188773285e-05, "loss": 3.201995849609375, "step": 107650 }, { "epoch": 0.09506666666666666, "grad_norm": 0.3353555202484131, "learning_rate": 9.415134654840243e-05, "loss": 3.215223693847656, "step": 107660 }, { "epoch": 0.09513333333333333, "grad_norm": 0.1967964768409729, "learning_rate": 9.414617207355418e-05, "loss": 3.2692901611328127, "step": 107670 }, { "epoch": 0.0952, "grad_norm": 0.7263655662536621, "learning_rate": 9.414099545303527e-05, "loss": 3.328208160400391, "step": 107680 }, { "epoch": 0.09526666666666667, "grad_norm": 0.17751823365688324, "learning_rate": 9.413581668709727e-05, "loss": 3.3553489685058593, "step": 107690 }, { "epoch": 0.09533333333333334, "grad_norm": 0.1734212040901184, "learning_rate": 9.41306357759919e-05, "loss": 3.1971904754638674, "step": 107700 }, { "epoch": 0.0954, "grad_norm": 0.3169536888599396, "learning_rate": 9.412545271997098e-05, "loss": 3.2608261108398438, "step": 107710 }, { "epoch": 0.09546666666666667, "grad_norm": 0.19381065666675568, "learning_rate": 9.412026751928641e-05, "loss": 3.2312957763671877, "step": 107720 }, { "epoch": 0.09553333333333333, "grad_norm": 0.1722400188446045, "learning_rate": 9.411508017419022e-05, "loss": 3.2571945190429688, "step": 107730 }, { "epoch": 0.0956, "grad_norm": 0.2106781154870987, "learning_rate": 9.410989068493454e-05, "loss": 3.241619873046875, "step": 107740 }, { "epoch": 0.09566666666666666, "grad_norm": 0.19851933419704437, "learning_rate": 9.410469905177158e-05, "loss": 3.211236572265625, "step": 107750 }, { "epoch": 0.09573333333333334, "grad_norm": 0.19693273305892944, "learning_rate": 9.409950527495371e-05, "loss": 3.3758266448974608, "step": 107760 }, { "epoch": 0.0958, "grad_norm": 0.1917637437582016, "learning_rate": 9.409430935473331e-05, "loss": 3.1970619201660155, "step": 107770 }, { "epoch": 0.09586666666666667, "grad_norm": 0.17353734374046326, "learning_rate": 9.408911129136298e-05, "loss": 3.2270751953125, "step": 107780 }, { "epoch": 0.09593333333333333, "grad_norm": 0.1915530264377594, "learning_rate": 9.408391108509533e-05, "loss": 3.198129653930664, "step": 107790 }, { "epoch": 0.096, "grad_norm": 0.18481387197971344, "learning_rate": 9.407870873618312e-05, "loss": 3.1841800689697264, "step": 107800 }, { "epoch": 0.09606666666666666, "grad_norm": 0.16530464589595795, "learning_rate": 9.407350424487921e-05, "loss": 3.2179443359375, "step": 107810 }, { "epoch": 0.09613333333333333, "grad_norm": 0.18174108862876892, "learning_rate": 9.406829761143653e-05, "loss": 3.2030269622802736, "step": 107820 }, { "epoch": 0.0962, "grad_norm": 0.19453369081020355, "learning_rate": 9.40630888361082e-05, "loss": 3.1974666595458983, "step": 107830 }, { "epoch": 0.09626666666666667, "grad_norm": 0.17912447452545166, "learning_rate": 9.405787791914732e-05, "loss": 3.2060546875, "step": 107840 }, { "epoch": 0.09633333333333334, "grad_norm": 0.1840120404958725, "learning_rate": 9.40526648608072e-05, "loss": 3.2556522369384764, "step": 107850 }, { "epoch": 0.0964, "grad_norm": 0.18387891352176666, "learning_rate": 9.40474496613412e-05, "loss": 3.336039352416992, "step": 107860 }, { "epoch": 0.09646666666666667, "grad_norm": 0.18555156886577606, "learning_rate": 9.40422323210028e-05, "loss": 3.242298889160156, "step": 107870 }, { "epoch": 0.09653333333333333, "grad_norm": 0.16391944885253906, "learning_rate": 9.403701284004559e-05, "loss": 3.2504749298095703, "step": 107880 }, { "epoch": 0.0966, "grad_norm": 0.17585249245166779, "learning_rate": 9.403179121872325e-05, "loss": 3.1450428009033202, "step": 107890 }, { "epoch": 0.09666666666666666, "grad_norm": 0.1834709495306015, "learning_rate": 9.402656745728957e-05, "loss": 3.2270740509033202, "step": 107900 }, { "epoch": 0.09673333333333334, "grad_norm": 0.3867793679237366, "learning_rate": 9.402134155599845e-05, "loss": 3.246144485473633, "step": 107910 }, { "epoch": 0.0968, "grad_norm": 0.1650799810886383, "learning_rate": 9.401611351510388e-05, "loss": 3.2385532379150392, "step": 107920 }, { "epoch": 0.09686666666666667, "grad_norm": 0.20104095339775085, "learning_rate": 9.401088333485997e-05, "loss": 3.3231006622314454, "step": 107930 }, { "epoch": 0.09693333333333333, "grad_norm": 0.17631950974464417, "learning_rate": 9.400565101552093e-05, "loss": 3.2371620178222655, "step": 107940 }, { "epoch": 0.097, "grad_norm": 0.1700495332479477, "learning_rate": 9.400041655734104e-05, "loss": 3.2115402221679688, "step": 107950 }, { "epoch": 0.09706666666666666, "grad_norm": 0.1742597222328186, "learning_rate": 9.399517996057476e-05, "loss": 3.2166568756103517, "step": 107960 }, { "epoch": 0.09713333333333334, "grad_norm": 0.1751135140657425, "learning_rate": 9.398994122547658e-05, "loss": 3.232207489013672, "step": 107970 }, { "epoch": 0.0972, "grad_norm": 0.18533176183700562, "learning_rate": 9.398470035230114e-05, "loss": 3.2757003784179686, "step": 107980 }, { "epoch": 0.09726666666666667, "grad_norm": 0.196780264377594, "learning_rate": 9.397945734130315e-05, "loss": 3.2648109436035155, "step": 107990 }, { "epoch": 0.09733333333333333, "grad_norm": 0.17461957037448883, "learning_rate": 9.397421219273743e-05, "loss": 3.16708927154541, "step": 108000 }, { "epoch": 0.0974, "grad_norm": 0.26387637853622437, "learning_rate": 9.396896490685895e-05, "loss": 3.185939407348633, "step": 108010 }, { "epoch": 0.09746666666666666, "grad_norm": 0.21083839237689972, "learning_rate": 9.396371548392273e-05, "loss": 3.233509063720703, "step": 108020 }, { "epoch": 0.09753333333333333, "grad_norm": 0.1706472933292389, "learning_rate": 9.39584639241839e-05, "loss": 3.2364856719970705, "step": 108030 }, { "epoch": 0.0976, "grad_norm": 0.251444548368454, "learning_rate": 9.395321022789771e-05, "loss": 3.3415245056152343, "step": 108040 }, { "epoch": 0.09766666666666667, "grad_norm": 0.1666121482849121, "learning_rate": 9.394795439531952e-05, "loss": 3.2084224700927733, "step": 108050 }, { "epoch": 0.09773333333333334, "grad_norm": 0.19688867032527924, "learning_rate": 9.394269642670477e-05, "loss": 3.2306442260742188, "step": 108060 }, { "epoch": 0.0978, "grad_norm": 0.8704792857170105, "learning_rate": 9.393743632230904e-05, "loss": 3.1583383560180662, "step": 108070 }, { "epoch": 0.09786666666666667, "grad_norm": 0.18641163408756256, "learning_rate": 9.393217408238797e-05, "loss": 3.197026252746582, "step": 108080 }, { "epoch": 0.09793333333333333, "grad_norm": 0.3145439028739929, "learning_rate": 9.392690970719733e-05, "loss": 3.2385585784912108, "step": 108090 }, { "epoch": 0.098, "grad_norm": 0.1863429844379425, "learning_rate": 9.392164319699299e-05, "loss": 3.1837942123413088, "step": 108100 }, { "epoch": 0.09806666666666666, "grad_norm": 0.3867132067680359, "learning_rate": 9.391637455203091e-05, "loss": 3.3145706176757814, "step": 108110 }, { "epoch": 0.09813333333333334, "grad_norm": 0.5412503480911255, "learning_rate": 9.39111037725672e-05, "loss": 3.3822174072265625, "step": 108120 }, { "epoch": 0.0982, "grad_norm": 0.17123907804489136, "learning_rate": 9.3905830858858e-05, "loss": 3.2530921936035155, "step": 108130 }, { "epoch": 0.09826666666666667, "grad_norm": 0.17427770793437958, "learning_rate": 9.390055581115962e-05, "loss": 3.2551651000976562, "step": 108140 }, { "epoch": 0.09833333333333333, "grad_norm": 0.3175860345363617, "learning_rate": 9.389527862972843e-05, "loss": 3.3659347534179687, "step": 108150 }, { "epoch": 0.0984, "grad_norm": 0.17362120747566223, "learning_rate": 9.388999931482094e-05, "loss": 3.2158954620361326, "step": 108160 }, { "epoch": 0.09846666666666666, "grad_norm": 0.22479645907878876, "learning_rate": 9.388471786669373e-05, "loss": 3.2921939849853517, "step": 108170 }, { "epoch": 0.09853333333333333, "grad_norm": 0.17806002497673035, "learning_rate": 9.387943428560349e-05, "loss": 3.1955638885498048, "step": 108180 }, { "epoch": 0.0986, "grad_norm": 0.18553543090820312, "learning_rate": 9.387414857180705e-05, "loss": 3.256625747680664, "step": 108190 }, { "epoch": 0.09866666666666667, "grad_norm": 0.17537470161914825, "learning_rate": 9.386886072556129e-05, "loss": 3.236408233642578, "step": 108200 }, { "epoch": 0.09873333333333334, "grad_norm": 0.1713891327381134, "learning_rate": 9.386357074712323e-05, "loss": 3.2000053405761717, "step": 108210 }, { "epoch": 0.0988, "grad_norm": 0.17600412666797638, "learning_rate": 9.385827863674998e-05, "loss": 3.2406085968017577, "step": 108220 }, { "epoch": 0.09886666666666667, "grad_norm": 0.3171563148498535, "learning_rate": 9.385298439469877e-05, "loss": 3.2092342376708984, "step": 108230 }, { "epoch": 0.09893333333333333, "grad_norm": 0.18146520853042603, "learning_rate": 9.38476880212269e-05, "loss": 3.22479362487793, "step": 108240 }, { "epoch": 0.099, "grad_norm": 0.16903989017009735, "learning_rate": 9.384238951659179e-05, "loss": 3.1932605743408202, "step": 108250 }, { "epoch": 0.09906666666666666, "grad_norm": 0.1881149411201477, "learning_rate": 9.3837088881051e-05, "loss": 3.25708122253418, "step": 108260 }, { "epoch": 0.09913333333333334, "grad_norm": 0.16619203984737396, "learning_rate": 9.383178611486213e-05, "loss": 3.1921958923339844, "step": 108270 }, { "epoch": 0.0992, "grad_norm": 0.18236656486988068, "learning_rate": 9.382648121828292e-05, "loss": 3.2274742126464844, "step": 108280 }, { "epoch": 0.09926666666666667, "grad_norm": 0.18851430714130402, "learning_rate": 9.382117419157124e-05, "loss": 3.2400665283203125, "step": 108290 }, { "epoch": 0.09933333333333333, "grad_norm": 0.382865309715271, "learning_rate": 9.381586503498498e-05, "loss": 3.2250797271728517, "step": 108300 }, { "epoch": 0.0994, "grad_norm": 0.18830233812332153, "learning_rate": 9.38105537487822e-05, "loss": 3.2797725677490233, "step": 108310 }, { "epoch": 0.09946666666666666, "grad_norm": 0.18124231696128845, "learning_rate": 9.380524033322108e-05, "loss": 3.4106201171875, "step": 108320 }, { "epoch": 0.09953333333333333, "grad_norm": 0.19975440204143524, "learning_rate": 9.379992478855986e-05, "loss": 3.2333240509033203, "step": 108330 }, { "epoch": 0.0996, "grad_norm": 0.22519026696681976, "learning_rate": 9.379460711505685e-05, "loss": 3.2493667602539062, "step": 108340 }, { "epoch": 0.09966666666666667, "grad_norm": 0.16953827440738678, "learning_rate": 9.378928731297057e-05, "loss": 3.1950143814086913, "step": 108350 }, { "epoch": 0.09973333333333333, "grad_norm": 0.16662119328975677, "learning_rate": 9.378396538255956e-05, "loss": 3.2164230346679688, "step": 108360 }, { "epoch": 0.0998, "grad_norm": 0.185033917427063, "learning_rate": 9.377864132408247e-05, "loss": 3.2186988830566405, "step": 108370 }, { "epoch": 0.09986666666666667, "grad_norm": 0.17545448243618011, "learning_rate": 9.377331513779808e-05, "loss": 3.2157337188720705, "step": 108380 }, { "epoch": 0.09993333333333333, "grad_norm": 0.17785504460334778, "learning_rate": 9.376798682396529e-05, "loss": 3.183071327209473, "step": 108390 }, { "epoch": 0.1, "grad_norm": 0.4504942297935486, "learning_rate": 9.376265638284303e-05, "loss": 3.300009536743164, "step": 108400 }, { "epoch": 0.10006666666666666, "grad_norm": 0.1812923401594162, "learning_rate": 9.37573238146904e-05, "loss": 3.350973129272461, "step": 108410 }, { "epoch": 0.10013333333333334, "grad_norm": 0.19275973737239838, "learning_rate": 9.375198911976659e-05, "loss": 3.2448680877685545, "step": 108420 }, { "epoch": 0.1002, "grad_norm": 0.17853227257728577, "learning_rate": 9.374665229833089e-05, "loss": 3.1932113647460936, "step": 108430 }, { "epoch": 0.10026666666666667, "grad_norm": 0.18447332084178925, "learning_rate": 9.374131335064266e-05, "loss": 3.2144569396972655, "step": 108440 }, { "epoch": 0.10033333333333333, "grad_norm": 0.35231462121009827, "learning_rate": 9.373597227696143e-05, "loss": 3.242249298095703, "step": 108450 }, { "epoch": 0.1004, "grad_norm": 0.17900879681110382, "learning_rate": 9.373062907754677e-05, "loss": 3.2686222076416014, "step": 108460 }, { "epoch": 0.10046666666666666, "grad_norm": 0.17995642125606537, "learning_rate": 9.372528375265839e-05, "loss": 3.2831878662109375, "step": 108470 }, { "epoch": 0.10053333333333334, "grad_norm": 0.1752672642469406, "learning_rate": 9.371993630255609e-05, "loss": 3.1909021377563476, "step": 108480 }, { "epoch": 0.1006, "grad_norm": 0.1810501515865326, "learning_rate": 9.371458672749977e-05, "loss": 3.2498237609863283, "step": 108490 }, { "epoch": 0.10066666666666667, "grad_norm": 0.1874300241470337, "learning_rate": 9.370923502774947e-05, "loss": 3.2122386932373046, "step": 108500 }, { "epoch": 0.10073333333333333, "grad_norm": 0.2574571967124939, "learning_rate": 9.370388120356527e-05, "loss": 3.453685760498047, "step": 108510 }, { "epoch": 0.1008, "grad_norm": 0.18936283886432648, "learning_rate": 9.36985252552074e-05, "loss": 3.2456329345703123, "step": 108520 }, { "epoch": 0.10086666666666666, "grad_norm": 0.18231728672981262, "learning_rate": 9.369316718293617e-05, "loss": 3.369558334350586, "step": 108530 }, { "epoch": 0.10093333333333333, "grad_norm": 0.16402311623096466, "learning_rate": 9.3687806987012e-05, "loss": 3.206327438354492, "step": 108540 }, { "epoch": 0.101, "grad_norm": 0.17286989092826843, "learning_rate": 9.368244466769545e-05, "loss": 3.179379463195801, "step": 108550 }, { "epoch": 0.10106666666666667, "grad_norm": 0.1717694103717804, "learning_rate": 9.36770802252471e-05, "loss": 3.220286178588867, "step": 108560 }, { "epoch": 0.10113333333333334, "grad_norm": 0.18422475457191467, "learning_rate": 9.367171365992772e-05, "loss": 3.2455673217773438, "step": 108570 }, { "epoch": 0.1012, "grad_norm": 0.17051534354686737, "learning_rate": 9.366634497199813e-05, "loss": 3.2251815795898438, "step": 108580 }, { "epoch": 0.10126666666666667, "grad_norm": 0.17066915333271027, "learning_rate": 9.366097416171926e-05, "loss": 3.224117660522461, "step": 108590 }, { "epoch": 0.10133333333333333, "grad_norm": 0.17902611196041107, "learning_rate": 9.365560122935216e-05, "loss": 3.212969207763672, "step": 108600 }, { "epoch": 0.1014, "grad_norm": 0.18677660822868347, "learning_rate": 9.365022617515799e-05, "loss": 3.213025665283203, "step": 108610 }, { "epoch": 0.10146666666666666, "grad_norm": 0.18107101321220398, "learning_rate": 9.364484899939797e-05, "loss": 3.244974136352539, "step": 108620 }, { "epoch": 0.10153333333333334, "grad_norm": 0.18585656583309174, "learning_rate": 9.363946970233347e-05, "loss": 3.284011459350586, "step": 108630 }, { "epoch": 0.1016, "grad_norm": 0.16664272546768188, "learning_rate": 9.363408828422594e-05, "loss": 3.205975341796875, "step": 108640 }, { "epoch": 0.10166666666666667, "grad_norm": 0.24435682594776154, "learning_rate": 9.362870474533694e-05, "loss": 3.230483627319336, "step": 108650 }, { "epoch": 0.10173333333333333, "grad_norm": 0.18203693628311157, "learning_rate": 9.362331908592811e-05, "loss": 3.2060161590576173, "step": 108660 }, { "epoch": 0.1018, "grad_norm": 0.23063088953495026, "learning_rate": 9.361793130626126e-05, "loss": 3.236077880859375, "step": 108670 }, { "epoch": 0.10186666666666666, "grad_norm": 0.2398436963558197, "learning_rate": 9.361254140659821e-05, "loss": 3.158075141906738, "step": 108680 }, { "epoch": 0.10193333333333333, "grad_norm": 0.18778564035892487, "learning_rate": 9.360714938720093e-05, "loss": 3.189676284790039, "step": 108690 }, { "epoch": 0.102, "grad_norm": 0.17411211133003235, "learning_rate": 9.360175524833153e-05, "loss": 3.220009994506836, "step": 108700 }, { "epoch": 0.10206666666666667, "grad_norm": 0.16660840809345245, "learning_rate": 9.359635899025215e-05, "loss": 3.221352767944336, "step": 108710 }, { "epoch": 0.10213333333333334, "grad_norm": 0.1822863519191742, "learning_rate": 9.35909606132251e-05, "loss": 3.235634613037109, "step": 108720 }, { "epoch": 0.1022, "grad_norm": 0.19466522336006165, "learning_rate": 9.358556011751272e-05, "loss": 3.2129173278808594, "step": 108730 }, { "epoch": 0.10226666666666667, "grad_norm": 0.19304759800434113, "learning_rate": 9.358015750337754e-05, "loss": 3.1951602935791015, "step": 108740 }, { "epoch": 0.10233333333333333, "grad_norm": 0.19958677887916565, "learning_rate": 9.357475277108212e-05, "loss": 3.178843879699707, "step": 108750 }, { "epoch": 0.1024, "grad_norm": 0.17517249286174774, "learning_rate": 9.356934592088915e-05, "loss": 3.2345767974853517, "step": 108760 }, { "epoch": 0.10246666666666666, "grad_norm": 0.18047407269477844, "learning_rate": 9.356393695306143e-05, "loss": 3.2073516845703125, "step": 108770 }, { "epoch": 0.10253333333333334, "grad_norm": 0.1680058091878891, "learning_rate": 9.355852586786184e-05, "loss": 3.1539621353149414, "step": 108780 }, { "epoch": 0.1026, "grad_norm": 0.16785168647766113, "learning_rate": 9.355311266555342e-05, "loss": 3.2446052551269533, "step": 108790 }, { "epoch": 0.10266666666666667, "grad_norm": 0.2086949646472931, "learning_rate": 9.354769734639926e-05, "loss": 3.2466812133789062, "step": 108800 }, { "epoch": 0.10273333333333333, "grad_norm": 0.16388057172298431, "learning_rate": 9.354227991066253e-05, "loss": 3.2515399932861326, "step": 108810 }, { "epoch": 0.1028, "grad_norm": 0.18253323435783386, "learning_rate": 9.353686035860656e-05, "loss": 3.1439964294433596, "step": 108820 }, { "epoch": 0.10286666666666666, "grad_norm": 0.18649068474769592, "learning_rate": 9.353143869049476e-05, "loss": 3.2326515197753904, "step": 108830 }, { "epoch": 0.10293333333333334, "grad_norm": 0.17107978463172913, "learning_rate": 9.352601490659064e-05, "loss": 3.198199653625488, "step": 108840 }, { "epoch": 0.103, "grad_norm": 0.1612454056739807, "learning_rate": 9.352058900715784e-05, "loss": 3.205558013916016, "step": 108850 }, { "epoch": 0.10306666666666667, "grad_norm": 0.1730424016714096, "learning_rate": 9.351516099246003e-05, "loss": 3.211539459228516, "step": 108860 }, { "epoch": 0.10313333333333333, "grad_norm": 0.2343958020210266, "learning_rate": 9.350973086276109e-05, "loss": 3.17358512878418, "step": 108870 }, { "epoch": 0.1032, "grad_norm": 0.16428732872009277, "learning_rate": 9.350429861832491e-05, "loss": 3.191093635559082, "step": 108880 }, { "epoch": 0.10326666666666667, "grad_norm": 0.19870026409626007, "learning_rate": 9.349886425941552e-05, "loss": 3.2596229553222655, "step": 108890 }, { "epoch": 0.10333333333333333, "grad_norm": 0.9427034854888916, "learning_rate": 9.349342778629703e-05, "loss": 3.245658111572266, "step": 108900 }, { "epoch": 0.1034, "grad_norm": 0.48636573553085327, "learning_rate": 9.348798919923374e-05, "loss": 3.302033233642578, "step": 108910 }, { "epoch": 0.10346666666666667, "grad_norm": 0.1779370903968811, "learning_rate": 9.348254849848992e-05, "loss": 3.3435802459716797, "step": 108920 }, { "epoch": 0.10353333333333334, "grad_norm": 0.19383059442043304, "learning_rate": 9.347710568433003e-05, "loss": 3.2483314514160155, "step": 108930 }, { "epoch": 0.1036, "grad_norm": 0.1854577362537384, "learning_rate": 9.347166075701863e-05, "loss": 3.1650716781616213, "step": 108940 }, { "epoch": 0.10366666666666667, "grad_norm": 0.1743556410074234, "learning_rate": 9.346621371682034e-05, "loss": 3.2210872650146483, "step": 108950 }, { "epoch": 0.10373333333333333, "grad_norm": 0.19301621615886688, "learning_rate": 9.346076456399991e-05, "loss": 3.279525375366211, "step": 108960 }, { "epoch": 0.1038, "grad_norm": 0.21708695590496063, "learning_rate": 9.34553132988222e-05, "loss": 3.2047889709472654, "step": 108970 }, { "epoch": 0.10386666666666666, "grad_norm": 0.224630206823349, "learning_rate": 9.344985992155216e-05, "loss": 3.2394237518310547, "step": 108980 }, { "epoch": 0.10393333333333334, "grad_norm": 0.17901593446731567, "learning_rate": 9.344440443245482e-05, "loss": 3.2236953735351563, "step": 108990 }, { "epoch": 0.104, "grad_norm": 5.032718658447266, "learning_rate": 9.343894683179538e-05, "loss": 3.171239471435547, "step": 109000 }, { "epoch": 0.10406666666666667, "grad_norm": 0.19014839828014374, "learning_rate": 9.343348711983905e-05, "loss": 3.2190357208251954, "step": 109010 }, { "epoch": 0.10413333333333333, "grad_norm": 0.19578571617603302, "learning_rate": 9.342802529685124e-05, "loss": 3.2022781372070312, "step": 109020 }, { "epoch": 0.1042, "grad_norm": 0.18843501806259155, "learning_rate": 9.342256136309739e-05, "loss": 3.1808666229248046, "step": 109030 }, { "epoch": 0.10426666666666666, "grad_norm": 0.18782690167427063, "learning_rate": 9.341709531884309e-05, "loss": 3.2363311767578127, "step": 109040 }, { "epoch": 0.10433333333333333, "grad_norm": 0.18879693746566772, "learning_rate": 9.341162716435397e-05, "loss": 3.198792266845703, "step": 109050 }, { "epoch": 0.1044, "grad_norm": 0.21468302607536316, "learning_rate": 9.340615689989585e-05, "loss": 3.1908594131469727, "step": 109060 }, { "epoch": 0.10446666666666667, "grad_norm": 0.17413148283958435, "learning_rate": 9.340068452573456e-05, "loss": 3.198972702026367, "step": 109070 }, { "epoch": 0.10453333333333334, "grad_norm": 0.173643097281456, "learning_rate": 9.33952100421361e-05, "loss": 3.1891361236572267, "step": 109080 }, { "epoch": 0.1046, "grad_norm": 0.19496895372867584, "learning_rate": 9.338973344936657e-05, "loss": 3.1950313568115236, "step": 109090 }, { "epoch": 0.10466666666666667, "grad_norm": 0.207650288939476, "learning_rate": 9.338425474769212e-05, "loss": 3.24417724609375, "step": 109100 }, { "epoch": 0.10473333333333333, "grad_norm": 0.17154869437217712, "learning_rate": 9.337877393737905e-05, "loss": 3.1913965225219725, "step": 109110 }, { "epoch": 0.1048, "grad_norm": 0.1858222931623459, "learning_rate": 9.337329101869376e-05, "loss": 3.20311279296875, "step": 109120 }, { "epoch": 0.10486666666666666, "grad_norm": 0.1652139127254486, "learning_rate": 9.336780599190271e-05, "loss": 3.229369354248047, "step": 109130 }, { "epoch": 0.10493333333333334, "grad_norm": 0.20083686709403992, "learning_rate": 9.336231885727254e-05, "loss": 3.1963449478149415, "step": 109140 }, { "epoch": 0.105, "grad_norm": 0.19582712650299072, "learning_rate": 9.335682961506988e-05, "loss": 3.2062614440917967, "step": 109150 }, { "epoch": 0.10506666666666667, "grad_norm": 0.21086402237415314, "learning_rate": 9.335133826556159e-05, "loss": 3.2359970092773436, "step": 109160 }, { "epoch": 0.10513333333333333, "grad_norm": 0.17231912910938263, "learning_rate": 9.334584480901454e-05, "loss": 3.232046890258789, "step": 109170 }, { "epoch": 0.1052, "grad_norm": 0.5984296798706055, "learning_rate": 9.334034924569573e-05, "loss": 3.244123840332031, "step": 109180 }, { "epoch": 0.10526666666666666, "grad_norm": 0.1755087673664093, "learning_rate": 9.333485157587228e-05, "loss": 3.273574447631836, "step": 109190 }, { "epoch": 0.10533333333333333, "grad_norm": 0.1782408356666565, "learning_rate": 9.33293517998114e-05, "loss": 3.1514230728149415, "step": 109200 }, { "epoch": 0.1054, "grad_norm": 0.19906778633594513, "learning_rate": 9.332384991778036e-05, "loss": 3.158348274230957, "step": 109210 }, { "epoch": 0.10546666666666667, "grad_norm": 0.212272509932518, "learning_rate": 9.331834593004663e-05, "loss": 3.3057201385498045, "step": 109220 }, { "epoch": 0.10553333333333334, "grad_norm": 0.19976051151752472, "learning_rate": 9.331283983687769e-05, "loss": 3.197414016723633, "step": 109230 }, { "epoch": 0.1056, "grad_norm": 0.17274954915046692, "learning_rate": 9.330733163854115e-05, "loss": 3.17935791015625, "step": 109240 }, { "epoch": 0.10566666666666667, "grad_norm": 0.17264750599861145, "learning_rate": 9.330182133530476e-05, "loss": 3.211627960205078, "step": 109250 }, { "epoch": 0.10573333333333333, "grad_norm": 0.1766710728406906, "learning_rate": 9.329630892743632e-05, "loss": 3.209403228759766, "step": 109260 }, { "epoch": 0.1058, "grad_norm": 0.2103142887353897, "learning_rate": 9.329079441520377e-05, "loss": 3.2919178009033203, "step": 109270 }, { "epoch": 0.10586666666666666, "grad_norm": 0.17737281322479248, "learning_rate": 9.32852777988751e-05, "loss": 3.2491535186767577, "step": 109280 }, { "epoch": 0.10593333333333334, "grad_norm": 0.19233274459838867, "learning_rate": 9.327975907871847e-05, "loss": 3.1683366775512694, "step": 109290 }, { "epoch": 0.106, "grad_norm": 0.19851668179035187, "learning_rate": 9.327423825500213e-05, "loss": 3.1976383209228514, "step": 109300 }, { "epoch": 0.10606666666666667, "grad_norm": 0.2322150319814682, "learning_rate": 9.326871532799435e-05, "loss": 3.1705379486083984, "step": 109310 }, { "epoch": 0.10613333333333333, "grad_norm": 0.2059485763311386, "learning_rate": 9.326319029796362e-05, "loss": 3.2884990692138674, "step": 109320 }, { "epoch": 0.1062, "grad_norm": 0.18426169455051422, "learning_rate": 9.325766316517846e-05, "loss": 3.2227542877197264, "step": 109330 }, { "epoch": 0.10626666666666666, "grad_norm": 0.18756817281246185, "learning_rate": 9.325213392990751e-05, "loss": 3.183778190612793, "step": 109340 }, { "epoch": 0.10633333333333334, "grad_norm": 0.18360449373722076, "learning_rate": 9.32466025924195e-05, "loss": 3.215562438964844, "step": 109350 }, { "epoch": 0.1064, "grad_norm": 0.1891784965991974, "learning_rate": 9.324106915298329e-05, "loss": 3.2015060424804687, "step": 109360 }, { "epoch": 0.10646666666666667, "grad_norm": 0.21375629305839539, "learning_rate": 9.323553361186781e-05, "loss": 3.201441192626953, "step": 109370 }, { "epoch": 0.10653333333333333, "grad_norm": 0.16651180386543274, "learning_rate": 9.322999596934213e-05, "loss": 3.226927947998047, "step": 109380 }, { "epoch": 0.1066, "grad_norm": 0.1958107352256775, "learning_rate": 9.322445622567539e-05, "loss": 3.258595657348633, "step": 109390 }, { "epoch": 0.10666666666666667, "grad_norm": 0.20020346343517303, "learning_rate": 9.321891438113683e-05, "loss": 3.2267074584960938, "step": 109400 }, { "epoch": 0.10673333333333333, "grad_norm": 0.1885232925415039, "learning_rate": 9.321337043599583e-05, "loss": 3.303643798828125, "step": 109410 }, { "epoch": 0.1068, "grad_norm": 0.18923263251781464, "learning_rate": 9.320782439052182e-05, "loss": 3.2399749755859375, "step": 109420 }, { "epoch": 0.10686666666666667, "grad_norm": 0.16690129041671753, "learning_rate": 9.320227624498439e-05, "loss": 3.215964126586914, "step": 109430 }, { "epoch": 0.10693333333333334, "grad_norm": 0.17347757518291473, "learning_rate": 9.319672599965317e-05, "loss": 3.1499576568603516, "step": 109440 }, { "epoch": 0.107, "grad_norm": 0.16412819921970367, "learning_rate": 9.319117365479792e-05, "loss": 3.211117172241211, "step": 109450 }, { "epoch": 0.10706666666666667, "grad_norm": 0.17262110114097595, "learning_rate": 9.318561921068855e-05, "loss": 3.197846221923828, "step": 109460 }, { "epoch": 0.10713333333333333, "grad_norm": 0.18862618505954742, "learning_rate": 9.318006266759498e-05, "loss": 3.2177459716796877, "step": 109470 }, { "epoch": 0.1072, "grad_norm": 0.16999290883541107, "learning_rate": 9.317450402578729e-05, "loss": 3.1798439025878906, "step": 109480 }, { "epoch": 0.10726666666666666, "grad_norm": 0.20578084886074066, "learning_rate": 9.316894328553567e-05, "loss": 3.2057113647460938, "step": 109490 }, { "epoch": 0.10733333333333334, "grad_norm": 0.17642557621002197, "learning_rate": 9.316338044711037e-05, "loss": 3.1971820831298827, "step": 109500 }, { "epoch": 0.1074, "grad_norm": 0.17570234835147858, "learning_rate": 9.315781551078178e-05, "loss": 3.2340118408203127, "step": 109510 }, { "epoch": 0.10746666666666667, "grad_norm": 0.19452935457229614, "learning_rate": 9.315224847682037e-05, "loss": 3.231650543212891, "step": 109520 }, { "epoch": 0.10753333333333333, "grad_norm": 0.17492137849330902, "learning_rate": 9.314667934549672e-05, "loss": 3.214214324951172, "step": 109530 }, { "epoch": 0.1076, "grad_norm": 0.17902082204818726, "learning_rate": 9.314110811708151e-05, "loss": 3.197111892700195, "step": 109540 }, { "epoch": 0.10766666666666666, "grad_norm": 2.0666868686676025, "learning_rate": 9.313553479184553e-05, "loss": 2.8196739196777343, "step": 109550 }, { "epoch": 0.10773333333333333, "grad_norm": 0.1848868429660797, "learning_rate": 9.312995937005965e-05, "loss": 3.2526527404785157, "step": 109560 }, { "epoch": 0.1078, "grad_norm": 0.5704283118247986, "learning_rate": 9.312438185199487e-05, "loss": 2.628369140625, "step": 109570 }, { "epoch": 0.10786666666666667, "grad_norm": 0.48139557242393494, "learning_rate": 9.311880223792228e-05, "loss": 2.9373313903808596, "step": 109580 }, { "epoch": 0.10793333333333334, "grad_norm": 0.16929706931114197, "learning_rate": 9.311322052811305e-05, "loss": 3.2840003967285156, "step": 109590 }, { "epoch": 0.108, "grad_norm": 0.20020703971385956, "learning_rate": 9.310763672283849e-05, "loss": 3.192902374267578, "step": 109600 }, { "epoch": 0.10806666666666667, "grad_norm": 0.18664290010929108, "learning_rate": 9.310205082237e-05, "loss": 3.2242691040039064, "step": 109610 }, { "epoch": 0.10813333333333333, "grad_norm": 0.16946369409561157, "learning_rate": 9.309646282697906e-05, "loss": 3.2224246978759767, "step": 109620 }, { "epoch": 0.1082, "grad_norm": 0.2290266752243042, "learning_rate": 9.309087273693728e-05, "loss": 3.2180877685546876, "step": 109630 }, { "epoch": 0.10826666666666666, "grad_norm": 0.20343199372291565, "learning_rate": 9.308528055251634e-05, "loss": 3.2587055206298827, "step": 109640 }, { "epoch": 0.10833333333333334, "grad_norm": 0.19016703963279724, "learning_rate": 9.307968627398807e-05, "loss": 3.2060550689697265, "step": 109650 }, { "epoch": 0.1084, "grad_norm": 0.39406564831733704, "learning_rate": 9.307408990162434e-05, "loss": 3.239479827880859, "step": 109660 }, { "epoch": 0.10846666666666667, "grad_norm": 0.1996629387140274, "learning_rate": 9.306849143569717e-05, "loss": 3.2271774291992186, "step": 109670 }, { "epoch": 0.10853333333333333, "grad_norm": 0.19651848077774048, "learning_rate": 9.306289087647869e-05, "loss": 3.218290328979492, "step": 109680 }, { "epoch": 0.1086, "grad_norm": 0.269931823015213, "learning_rate": 9.305728822424108e-05, "loss": 3.1840044021606446, "step": 109690 }, { "epoch": 0.10866666666666666, "grad_norm": 0.32751160860061646, "learning_rate": 9.305168347925666e-05, "loss": 3.216344451904297, "step": 109700 }, { "epoch": 0.10873333333333333, "grad_norm": 0.18715180456638336, "learning_rate": 9.304607664179782e-05, "loss": 3.269866943359375, "step": 109710 }, { "epoch": 0.1088, "grad_norm": 0.1904134452342987, "learning_rate": 9.304046771213712e-05, "loss": 3.250564193725586, "step": 109720 }, { "epoch": 0.10886666666666667, "grad_norm": 0.26277777552604675, "learning_rate": 9.303485669054713e-05, "loss": 3.201845169067383, "step": 109730 }, { "epoch": 0.10893333333333333, "grad_norm": 0.172882542014122, "learning_rate": 9.302924357730059e-05, "loss": 3.2114566802978515, "step": 109740 }, { "epoch": 0.109, "grad_norm": 0.21314193308353424, "learning_rate": 9.302362837267031e-05, "loss": 3.335422897338867, "step": 109750 }, { "epoch": 0.10906666666666667, "grad_norm": 0.17357587814331055, "learning_rate": 9.301801107692922e-05, "loss": 3.2476097106933595, "step": 109760 }, { "epoch": 0.10913333333333333, "grad_norm": 0.17956697940826416, "learning_rate": 9.301239169035033e-05, "loss": 3.2490989685058596, "step": 109770 }, { "epoch": 0.1092, "grad_norm": 0.19083364307880402, "learning_rate": 9.300677021320677e-05, "loss": 3.301980972290039, "step": 109780 }, { "epoch": 0.10926666666666666, "grad_norm": 0.17542563378810883, "learning_rate": 9.300114664577176e-05, "loss": 3.1742849349975586, "step": 109790 }, { "epoch": 0.10933333333333334, "grad_norm": 0.186640202999115, "learning_rate": 9.299552098831863e-05, "loss": 3.2218055725097656, "step": 109800 }, { "epoch": 0.1094, "grad_norm": 0.17598439753055573, "learning_rate": 9.298989324112081e-05, "loss": 3.152638816833496, "step": 109810 }, { "epoch": 0.10946666666666667, "grad_norm": 0.17940866947174072, "learning_rate": 9.298426340445183e-05, "loss": 3.239529037475586, "step": 109820 }, { "epoch": 0.10953333333333333, "grad_norm": 0.17147323489189148, "learning_rate": 9.297863147858533e-05, "loss": 3.190868377685547, "step": 109830 }, { "epoch": 0.1096, "grad_norm": 0.17372068762779236, "learning_rate": 9.297299746379502e-05, "loss": 3.2155738830566407, "step": 109840 }, { "epoch": 0.10966666666666666, "grad_norm": 0.19146163761615753, "learning_rate": 9.296736136035474e-05, "loss": 3.232274627685547, "step": 109850 }, { "epoch": 0.10973333333333334, "grad_norm": 0.5797277092933655, "learning_rate": 9.296172316853845e-05, "loss": 3.2920494079589844, "step": 109860 }, { "epoch": 0.1098, "grad_norm": 0.1743949055671692, "learning_rate": 9.295608288862016e-05, "loss": 3.166634178161621, "step": 109870 }, { "epoch": 0.10986666666666667, "grad_norm": 0.18545939028263092, "learning_rate": 9.295044052087402e-05, "loss": 3.3911582946777346, "step": 109880 }, { "epoch": 0.10993333333333333, "grad_norm": 0.20755644142627716, "learning_rate": 9.294479606557427e-05, "loss": 3.224203872680664, "step": 109890 }, { "epoch": 0.11, "grad_norm": 0.19253137707710266, "learning_rate": 9.293914952299525e-05, "loss": 3.225690460205078, "step": 109900 }, { "epoch": 0.11006666666666666, "grad_norm": 0.23044724762439728, "learning_rate": 9.29335008934114e-05, "loss": 3.1864173889160154, "step": 109910 }, { "epoch": 0.11013333333333333, "grad_norm": 0.18605844676494598, "learning_rate": 9.292785017709726e-05, "loss": 3.2142307281494142, "step": 109920 }, { "epoch": 0.1102, "grad_norm": 0.16837111115455627, "learning_rate": 9.29221973743275e-05, "loss": 3.211568832397461, "step": 109930 }, { "epoch": 0.11026666666666667, "grad_norm": 0.18363016843795776, "learning_rate": 9.291654248537687e-05, "loss": 3.208856964111328, "step": 109940 }, { "epoch": 0.11033333333333334, "grad_norm": 0.19664856791496277, "learning_rate": 9.291088551052018e-05, "loss": 3.2013008117675783, "step": 109950 }, { "epoch": 0.1104, "grad_norm": 0.5188928842544556, "learning_rate": 9.29052264500324e-05, "loss": 3.247976303100586, "step": 109960 }, { "epoch": 0.11046666666666667, "grad_norm": 0.19013258814811707, "learning_rate": 9.289956530418858e-05, "loss": 3.167633056640625, "step": 109970 }, { "epoch": 0.11053333333333333, "grad_norm": 0.18233487010002136, "learning_rate": 9.28939020732639e-05, "loss": 3.202226257324219, "step": 109980 }, { "epoch": 0.1106, "grad_norm": 0.21617646515369415, "learning_rate": 9.288823675753358e-05, "loss": 3.22198371887207, "step": 109990 }, { "epoch": 0.11066666666666666, "grad_norm": 0.18457838892936707, "learning_rate": 9.288256935727298e-05, "loss": 3.2261001586914064, "step": 110000 }, { "epoch": 0.11073333333333334, "grad_norm": 0.18994663655757904, "learning_rate": 9.287689987275756e-05, "loss": 3.2031276702880858, "step": 110010 }, { "epoch": 0.1108, "grad_norm": 0.1798814833164215, "learning_rate": 9.287122830426289e-05, "loss": 3.1863029479980467, "step": 110020 }, { "epoch": 0.11086666666666667, "grad_norm": 0.2515169680118561, "learning_rate": 9.286555465206463e-05, "loss": 3.215082550048828, "step": 110030 }, { "epoch": 0.11093333333333333, "grad_norm": 0.18736062943935394, "learning_rate": 9.285987891643853e-05, "loss": 3.325109100341797, "step": 110040 }, { "epoch": 0.111, "grad_norm": 0.17622581124305725, "learning_rate": 9.285420109766046e-05, "loss": 3.141195869445801, "step": 110050 }, { "epoch": 0.11106666666666666, "grad_norm": 0.19203153252601624, "learning_rate": 9.284852119600636e-05, "loss": 3.227835464477539, "step": 110060 }, { "epoch": 0.11113333333333333, "grad_norm": 0.1706237941980362, "learning_rate": 9.284283921175233e-05, "loss": 3.2698997497558593, "step": 110070 }, { "epoch": 0.1112, "grad_norm": 0.17359957098960876, "learning_rate": 9.28371551451745e-05, "loss": 3.1908430099487304, "step": 110080 }, { "epoch": 0.11126666666666667, "grad_norm": 0.16876986622810364, "learning_rate": 9.283146899654918e-05, "loss": 3.2014114379882814, "step": 110090 }, { "epoch": 0.11133333333333334, "grad_norm": 0.18835635483264923, "learning_rate": 9.282578076615269e-05, "loss": 3.1730325698852537, "step": 110100 }, { "epoch": 0.1114, "grad_norm": 0.1716253012418747, "learning_rate": 9.282009045426155e-05, "loss": 3.2201679229736326, "step": 110110 }, { "epoch": 0.11146666666666667, "grad_norm": 0.18515262007713318, "learning_rate": 9.281439806115229e-05, "loss": 3.2129501342773437, "step": 110120 }, { "epoch": 0.11153333333333333, "grad_norm": 0.9887115359306335, "learning_rate": 9.28087035871016e-05, "loss": 2.9544729232788085, "step": 110130 }, { "epoch": 0.1116, "grad_norm": 1.1101323366165161, "learning_rate": 9.280300703238624e-05, "loss": 2.383194923400879, "step": 110140 }, { "epoch": 0.11166666666666666, "grad_norm": 0.8035651445388794, "learning_rate": 9.27973083972831e-05, "loss": 2.241897392272949, "step": 110150 }, { "epoch": 0.11173333333333334, "grad_norm": 0.6615997552871704, "learning_rate": 9.279160768206916e-05, "loss": 2.2110822677612303, "step": 110160 }, { "epoch": 0.1118, "grad_norm": 0.8342711925506592, "learning_rate": 9.278590488702147e-05, "loss": 2.0978181838989256, "step": 110170 }, { "epoch": 0.11186666666666667, "grad_norm": 0.37840431928634644, "learning_rate": 9.278020001241724e-05, "loss": 2.150665855407715, "step": 110180 }, { "epoch": 0.11193333333333333, "grad_norm": 0.48987120389938354, "learning_rate": 9.277449305853372e-05, "loss": 2.1005523681640623, "step": 110190 }, { "epoch": 0.112, "grad_norm": 0.353720486164093, "learning_rate": 9.276878402564831e-05, "loss": 2.1868900299072265, "step": 110200 }, { "epoch": 0.11206666666666666, "grad_norm": 0.29419273138046265, "learning_rate": 9.276307291403846e-05, "loss": 2.2053415298461916, "step": 110210 }, { "epoch": 0.11213333333333333, "grad_norm": 0.4426549971103668, "learning_rate": 9.275735972398178e-05, "loss": 2.356749153137207, "step": 110220 }, { "epoch": 0.1122, "grad_norm": 0.4477282166481018, "learning_rate": 9.275164445575595e-05, "loss": 2.09845027923584, "step": 110230 }, { "epoch": 0.11226666666666667, "grad_norm": 0.3643140196800232, "learning_rate": 9.274592710963876e-05, "loss": 2.0151901245117188, "step": 110240 }, { "epoch": 0.11233333333333333, "grad_norm": 0.3400585651397705, "learning_rate": 9.274020768590806e-05, "loss": 2.1065910339355467, "step": 110250 }, { "epoch": 0.1124, "grad_norm": 0.6971167922019958, "learning_rate": 9.273448618484187e-05, "loss": 2.199711799621582, "step": 110260 }, { "epoch": 0.11246666666666667, "grad_norm": 0.24216412007808685, "learning_rate": 9.272876260671828e-05, "loss": 3.6265033721923827, "step": 110270 }, { "epoch": 0.11253333333333333, "grad_norm": 0.20484653115272522, "learning_rate": 9.272303695181544e-05, "loss": 3.5608776092529295, "step": 110280 }, { "epoch": 0.1126, "grad_norm": 0.21159610152244568, "learning_rate": 9.271730922041166e-05, "loss": 3.418692779541016, "step": 110290 }, { "epoch": 0.11266666666666666, "grad_norm": 0.20971494913101196, "learning_rate": 9.271157941278536e-05, "loss": 3.406142807006836, "step": 110300 }, { "epoch": 0.11273333333333334, "grad_norm": 0.6782054901123047, "learning_rate": 9.270584752921497e-05, "loss": 3.3785327911376952, "step": 110310 }, { "epoch": 0.1128, "grad_norm": 0.3579425513744354, "learning_rate": 9.270011356997914e-05, "loss": 3.446002960205078, "step": 110320 }, { "epoch": 0.11286666666666667, "grad_norm": 0.19753357768058777, "learning_rate": 9.26943775353565e-05, "loss": 3.3427570343017576, "step": 110330 }, { "epoch": 0.11293333333333333, "grad_norm": 0.21670065820217133, "learning_rate": 9.268863942562591e-05, "loss": 3.4521766662597657, "step": 110340 }, { "epoch": 0.113, "grad_norm": 0.1805860996246338, "learning_rate": 9.268289924106622e-05, "loss": 3.365281677246094, "step": 110350 }, { "epoch": 0.11306666666666666, "grad_norm": 0.1824197620153427, "learning_rate": 9.267715698195643e-05, "loss": 3.3343532562255858, "step": 110360 }, { "epoch": 0.11313333333333334, "grad_norm": 0.20869001746177673, "learning_rate": 9.267141264857564e-05, "loss": 3.2905006408691406, "step": 110370 }, { "epoch": 0.1132, "grad_norm": 0.19875752925872803, "learning_rate": 9.266566624120306e-05, "loss": 3.2830215454101563, "step": 110380 }, { "epoch": 0.11326666666666667, "grad_norm": 0.18904197216033936, "learning_rate": 9.265991776011795e-05, "loss": 3.3061843872070313, "step": 110390 }, { "epoch": 0.11333333333333333, "grad_norm": 0.16458743810653687, "learning_rate": 9.265416720559976e-05, "loss": 3.219131088256836, "step": 110400 }, { "epoch": 0.1134, "grad_norm": 0.2061881721019745, "learning_rate": 9.264841457792795e-05, "loss": 3.242839050292969, "step": 110410 }, { "epoch": 0.11346666666666666, "grad_norm": 0.16463702917099, "learning_rate": 9.264265987738215e-05, "loss": 3.2261791229248047, "step": 110420 }, { "epoch": 0.11353333333333333, "grad_norm": 0.17636464536190033, "learning_rate": 9.2636903104242e-05, "loss": 3.251835250854492, "step": 110430 }, { "epoch": 0.1136, "grad_norm": 0.17139358818531036, "learning_rate": 9.263114425878737e-05, "loss": 3.2830177307128907, "step": 110440 }, { "epoch": 0.11366666666666667, "grad_norm": 0.21662026643753052, "learning_rate": 9.262538334129813e-05, "loss": 3.319408416748047, "step": 110450 }, { "epoch": 0.11373333333333334, "grad_norm": 0.17834024131298065, "learning_rate": 9.261962035205429e-05, "loss": 3.2540328979492186, "step": 110460 }, { "epoch": 0.1138, "grad_norm": 0.17686517536640167, "learning_rate": 9.261385529133594e-05, "loss": 3.234027862548828, "step": 110470 }, { "epoch": 0.11386666666666667, "grad_norm": 0.1778765171766281, "learning_rate": 9.26080881594233e-05, "loss": 3.282632827758789, "step": 110480 }, { "epoch": 0.11393333333333333, "grad_norm": 0.1677188277244568, "learning_rate": 9.260231895659665e-05, "loss": 3.2076881408691404, "step": 110490 }, { "epoch": 0.114, "grad_norm": 0.22586002945899963, "learning_rate": 9.259654768313644e-05, "loss": 3.2638439178466796, "step": 110500 }, { "epoch": 0.11406666666666666, "grad_norm": 0.1636440008878708, "learning_rate": 9.259077433932312e-05, "loss": 3.218471908569336, "step": 110510 }, { "epoch": 0.11413333333333334, "grad_norm": 0.20154546201229095, "learning_rate": 9.258499892543734e-05, "loss": 3.251149368286133, "step": 110520 }, { "epoch": 0.1142, "grad_norm": 0.1650281548500061, "learning_rate": 9.25792214417598e-05, "loss": 3.208812713623047, "step": 110530 }, { "epoch": 0.11426666666666667, "grad_norm": 0.16820915043354034, "learning_rate": 9.257344188857126e-05, "loss": 3.163133430480957, "step": 110540 }, { "epoch": 0.11433333333333333, "grad_norm": 0.1726948767900467, "learning_rate": 9.25676602661527e-05, "loss": 3.2837459564208986, "step": 110550 }, { "epoch": 0.1144, "grad_norm": 0.17700441181659698, "learning_rate": 9.256187657478509e-05, "loss": 3.208140182495117, "step": 110560 }, { "epoch": 0.11446666666666666, "grad_norm": 0.17085468769073486, "learning_rate": 9.255609081474955e-05, "loss": 3.284466552734375, "step": 110570 }, { "epoch": 0.11453333333333333, "grad_norm": 0.1950644850730896, "learning_rate": 9.255030298632727e-05, "loss": 3.243681716918945, "step": 110580 }, { "epoch": 0.1146, "grad_norm": 0.19787028431892395, "learning_rate": 9.254451308979957e-05, "loss": 3.206203079223633, "step": 110590 }, { "epoch": 0.11466666666666667, "grad_norm": 0.20068077743053436, "learning_rate": 9.253872112544788e-05, "loss": 3.1956567764282227, "step": 110600 }, { "epoch": 0.11473333333333334, "grad_norm": 0.17947939038276672, "learning_rate": 9.253292709355369e-05, "loss": 3.2326324462890623, "step": 110610 }, { "epoch": 0.1148, "grad_norm": 0.5168956518173218, "learning_rate": 9.252713099439863e-05, "loss": 3.317743682861328, "step": 110620 }, { "epoch": 0.11486666666666667, "grad_norm": 0.1624792218208313, "learning_rate": 9.252133282826438e-05, "loss": 3.2149730682373048, "step": 110630 }, { "epoch": 0.11493333333333333, "grad_norm": 0.1613030880689621, "learning_rate": 9.25155325954328e-05, "loss": 3.2808666229248047, "step": 110640 }, { "epoch": 0.115, "grad_norm": 0.3571198582649231, "learning_rate": 9.250973029618575e-05, "loss": 3.3516429901123046, "step": 110650 }, { "epoch": 0.11506666666666666, "grad_norm": 0.17104226350784302, "learning_rate": 9.250392593080529e-05, "loss": 3.2160408020019533, "step": 110660 }, { "epoch": 0.11513333333333334, "grad_norm": 0.17278003692626953, "learning_rate": 9.249811949957349e-05, "loss": 3.216230010986328, "step": 110670 }, { "epoch": 0.1152, "grad_norm": 0.1659902185201645, "learning_rate": 9.249231100277263e-05, "loss": 3.1900094985961913, "step": 110680 }, { "epoch": 0.11526666666666667, "grad_norm": 0.18818128108978271, "learning_rate": 9.248650044068495e-05, "loss": 3.315332794189453, "step": 110690 }, { "epoch": 0.11533333333333333, "grad_norm": 0.1763753890991211, "learning_rate": 9.248068781359291e-05, "loss": 3.200428771972656, "step": 110700 }, { "epoch": 0.1154, "grad_norm": 0.16747203469276428, "learning_rate": 9.247487312177903e-05, "loss": 3.2127120971679686, "step": 110710 }, { "epoch": 0.11546666666666666, "grad_norm": 0.1750430017709732, "learning_rate": 9.246905636552588e-05, "loss": 3.2116744995117186, "step": 110720 }, { "epoch": 0.11553333333333334, "grad_norm": 0.17145980894565582, "learning_rate": 9.246323754511623e-05, "loss": 3.2261566162109374, "step": 110730 }, { "epoch": 0.1156, "grad_norm": 0.18503350019454956, "learning_rate": 9.245741666083286e-05, "loss": 3.222332000732422, "step": 110740 }, { "epoch": 0.11566666666666667, "grad_norm": 0.17692987620830536, "learning_rate": 9.24515937129587e-05, "loss": 3.180293083190918, "step": 110750 }, { "epoch": 0.11573333333333333, "grad_norm": 0.17621447145938873, "learning_rate": 9.244576870177678e-05, "loss": 3.2334579467773437, "step": 110760 }, { "epoch": 0.1158, "grad_norm": 0.16273841261863708, "learning_rate": 9.24399416275702e-05, "loss": 3.2388080596923827, "step": 110770 }, { "epoch": 0.11586666666666667, "grad_norm": 0.1773068904876709, "learning_rate": 9.24341124906222e-05, "loss": 3.2089302062988283, "step": 110780 }, { "epoch": 0.11593333333333333, "grad_norm": 0.16514372825622559, "learning_rate": 9.242828129121606e-05, "loss": 3.1710094451904296, "step": 110790 }, { "epoch": 0.116, "grad_norm": 0.1922532469034195, "learning_rate": 9.242244802963522e-05, "loss": 3.237478256225586, "step": 110800 }, { "epoch": 0.11606666666666667, "grad_norm": 0.17402753233909607, "learning_rate": 9.24166127061632e-05, "loss": 3.2250743865966798, "step": 110810 }, { "epoch": 0.11613333333333334, "grad_norm": 0.23828741908073425, "learning_rate": 9.241077532108363e-05, "loss": 3.223643493652344, "step": 110820 }, { "epoch": 0.1162, "grad_norm": 0.17182910442352295, "learning_rate": 9.24049358746802e-05, "loss": 3.229136657714844, "step": 110830 }, { "epoch": 0.11626666666666667, "grad_norm": 0.17753024399280548, "learning_rate": 9.239909436723674e-05, "loss": 3.1950124740600585, "step": 110840 }, { "epoch": 0.11633333333333333, "grad_norm": 0.16869786381721497, "learning_rate": 9.23932507990372e-05, "loss": 3.3078048706054686, "step": 110850 }, { "epoch": 0.1164, "grad_norm": 0.17734025418758392, "learning_rate": 9.238740517036557e-05, "loss": 3.238301467895508, "step": 110860 }, { "epoch": 0.11646666666666666, "grad_norm": 0.19707641005516052, "learning_rate": 9.238155748150597e-05, "loss": 3.180228424072266, "step": 110870 }, { "epoch": 0.11653333333333334, "grad_norm": 0.16713286936283112, "learning_rate": 9.23757077327426e-05, "loss": 3.1605735778808595, "step": 110880 }, { "epoch": 0.1166, "grad_norm": 0.1755858212709427, "learning_rate": 9.236985592435983e-05, "loss": 3.2126869201660155, "step": 110890 }, { "epoch": 0.11666666666666667, "grad_norm": 0.1821666955947876, "learning_rate": 9.236400205664205e-05, "loss": 3.2380733489990234, "step": 110900 }, { "epoch": 0.11673333333333333, "grad_norm": 0.16460183262825012, "learning_rate": 9.235814612987377e-05, "loss": 3.1561511993408202, "step": 110910 }, { "epoch": 0.1168, "grad_norm": 0.17084473371505737, "learning_rate": 9.235228814433963e-05, "loss": 3.2050502777099608, "step": 110920 }, { "epoch": 0.11686666666666666, "grad_norm": 0.4522821009159088, "learning_rate": 9.234642810032434e-05, "loss": 3.1579519271850587, "step": 110930 }, { "epoch": 0.11693333333333333, "grad_norm": 0.19572332501411438, "learning_rate": 9.234056599811274e-05, "loss": 3.202436065673828, "step": 110940 }, { "epoch": 0.117, "grad_norm": 0.16134794056415558, "learning_rate": 9.233470183798972e-05, "loss": 3.181082344055176, "step": 110950 }, { "epoch": 0.11706666666666667, "grad_norm": 0.1633865237236023, "learning_rate": 9.232883562024031e-05, "loss": 3.2247383117675783, "step": 110960 }, { "epoch": 0.11713333333333334, "grad_norm": 0.1924436241388321, "learning_rate": 9.232296734514965e-05, "loss": 3.239014434814453, "step": 110970 }, { "epoch": 0.1172, "grad_norm": 0.20819240808486938, "learning_rate": 9.231709701300293e-05, "loss": 3.165486717224121, "step": 110980 }, { "epoch": 0.11726666666666667, "grad_norm": 0.20108428597450256, "learning_rate": 9.231122462408549e-05, "loss": 3.2281097412109374, "step": 110990 }, { "epoch": 0.11733333333333333, "grad_norm": 0.2087092101573944, "learning_rate": 9.230535017868275e-05, "loss": 3.183753776550293, "step": 111000 }, { "epoch": 0.1174, "grad_norm": 0.1749776154756546, "learning_rate": 9.229947367708023e-05, "loss": 3.1806859970092773, "step": 111010 }, { "epoch": 0.11746666666666666, "grad_norm": 0.1655922383069992, "learning_rate": 9.229359511956355e-05, "loss": 3.225376510620117, "step": 111020 }, { "epoch": 0.11753333333333334, "grad_norm": 0.17723453044891357, "learning_rate": 9.228771450641839e-05, "loss": 3.205007553100586, "step": 111030 }, { "epoch": 0.1176, "grad_norm": 0.17527306079864502, "learning_rate": 9.228183183793064e-05, "loss": 3.1974842071533205, "step": 111040 }, { "epoch": 0.11766666666666667, "grad_norm": 0.15982505679130554, "learning_rate": 9.227594711438618e-05, "loss": 3.2197601318359377, "step": 111050 }, { "epoch": 0.11773333333333333, "grad_norm": 0.17800623178482056, "learning_rate": 9.227006033607104e-05, "loss": 3.196685791015625, "step": 111060 }, { "epoch": 0.1178, "grad_norm": 0.18872642517089844, "learning_rate": 9.226417150327134e-05, "loss": 3.3200237274169924, "step": 111070 }, { "epoch": 0.11786666666666666, "grad_norm": 0.17758604884147644, "learning_rate": 9.225828061627328e-05, "loss": 3.220774841308594, "step": 111080 }, { "epoch": 0.11793333333333333, "grad_norm": 0.17707322537899017, "learning_rate": 9.22523876753632e-05, "loss": 3.2309818267822266, "step": 111090 }, { "epoch": 0.118, "grad_norm": 0.16896507143974304, "learning_rate": 9.224649268082753e-05, "loss": 3.2447032928466797, "step": 111100 }, { "epoch": 0.11806666666666667, "grad_norm": 0.17423062026500702, "learning_rate": 9.224059563295275e-05, "loss": 3.214163970947266, "step": 111110 }, { "epoch": 0.11813333333333334, "grad_norm": 0.17286866903305054, "learning_rate": 9.223469653202551e-05, "loss": 3.224813461303711, "step": 111120 }, { "epoch": 0.1182, "grad_norm": 0.1730978786945343, "learning_rate": 9.222879537833252e-05, "loss": 3.330349349975586, "step": 111130 }, { "epoch": 0.11826666666666667, "grad_norm": 0.1641434282064438, "learning_rate": 9.22228921721606e-05, "loss": 3.251364898681641, "step": 111140 }, { "epoch": 0.11833333333333333, "grad_norm": 0.2392062544822693, "learning_rate": 9.221698691379667e-05, "loss": 3.171377182006836, "step": 111150 }, { "epoch": 0.1184, "grad_norm": 0.19749240577220917, "learning_rate": 9.221107960352772e-05, "loss": 3.207162857055664, "step": 111160 }, { "epoch": 0.11846666666666666, "grad_norm": 0.18440796434879303, "learning_rate": 9.220517024164092e-05, "loss": 3.2015354156494142, "step": 111170 }, { "epoch": 0.11853333333333334, "grad_norm": 0.24437828361988068, "learning_rate": 9.219925882842345e-05, "loss": 3.166498374938965, "step": 111180 }, { "epoch": 0.1186, "grad_norm": 0.19547180831432343, "learning_rate": 9.219334536416265e-05, "loss": 3.2043704986572266, "step": 111190 }, { "epoch": 0.11866666666666667, "grad_norm": 0.185228630900383, "learning_rate": 9.21874298491459e-05, "loss": 3.2123912811279296, "step": 111200 }, { "epoch": 0.11873333333333333, "grad_norm": 0.1703331619501114, "learning_rate": 9.218151228366075e-05, "loss": 3.138707160949707, "step": 111210 }, { "epoch": 0.1188, "grad_norm": 0.1942654252052307, "learning_rate": 9.21755926679948e-05, "loss": 3.2374221801757814, "step": 111220 }, { "epoch": 0.11886666666666666, "grad_norm": 0.20253850519657135, "learning_rate": 9.216967100243579e-05, "loss": 3.2072998046875, "step": 111230 }, { "epoch": 0.11893333333333334, "grad_norm": 0.1862007975578308, "learning_rate": 9.21637472872715e-05, "loss": 3.2297996520996093, "step": 111240 }, { "epoch": 0.119, "grad_norm": 0.1716809868812561, "learning_rate": 9.215782152278986e-05, "loss": 3.204128646850586, "step": 111250 }, { "epoch": 0.11906666666666667, "grad_norm": 0.16961994767189026, "learning_rate": 9.21518937092789e-05, "loss": 3.1407089233398438, "step": 111260 }, { "epoch": 0.11913333333333333, "grad_norm": 0.17541857063770294, "learning_rate": 9.214596384702671e-05, "loss": 3.2095211029052733, "step": 111270 }, { "epoch": 0.1192, "grad_norm": 0.1859084814786911, "learning_rate": 9.21400319363215e-05, "loss": 3.169190216064453, "step": 111280 }, { "epoch": 0.11926666666666667, "grad_norm": 0.16746731102466583, "learning_rate": 9.213409797745161e-05, "loss": 3.1949438095092773, "step": 111290 }, { "epoch": 0.11933333333333333, "grad_norm": 0.1722993105649948, "learning_rate": 9.212816197070544e-05, "loss": 3.1954845428466796, "step": 111300 }, { "epoch": 0.1194, "grad_norm": 0.2833596169948578, "learning_rate": 9.212222391637151e-05, "loss": 3.220359039306641, "step": 111310 }, { "epoch": 0.11946666666666667, "grad_norm": 0.16329920291900635, "learning_rate": 9.211628381473842e-05, "loss": 3.2184368133544923, "step": 111320 }, { "epoch": 0.11953333333333334, "grad_norm": 0.18085215985774994, "learning_rate": 9.211034166609487e-05, "loss": 3.1956771850585937, "step": 111330 }, { "epoch": 0.1196, "grad_norm": 0.1721593290567398, "learning_rate": 9.21043974707297e-05, "loss": 3.285791778564453, "step": 111340 }, { "epoch": 0.11966666666666667, "grad_norm": 0.19937270879745483, "learning_rate": 9.209845122893181e-05, "loss": 3.196356773376465, "step": 111350 }, { "epoch": 0.11973333333333333, "grad_norm": 0.17191703617572784, "learning_rate": 9.20925029409902e-05, "loss": 3.187114715576172, "step": 111360 }, { "epoch": 0.1198, "grad_norm": 0.16937540471553802, "learning_rate": 9.208655260719398e-05, "loss": 3.2411472320556642, "step": 111370 }, { "epoch": 0.11986666666666666, "grad_norm": 0.18032759428024292, "learning_rate": 9.208060022783237e-05, "loss": 3.2094635009765624, "step": 111380 }, { "epoch": 0.11993333333333334, "grad_norm": 0.23904195427894592, "learning_rate": 9.20746458031947e-05, "loss": 3.2435970306396484, "step": 111390 }, { "epoch": 0.12, "grad_norm": 0.1936171054840088, "learning_rate": 9.206868933357031e-05, "loss": 3.1958034515380858, "step": 111400 }, { "epoch": 0.12006666666666667, "grad_norm": 0.166502445936203, "learning_rate": 9.206273081924876e-05, "loss": 3.1994949340820313, "step": 111410 }, { "epoch": 0.12013333333333333, "grad_norm": 0.4583588242530823, "learning_rate": 9.205677026051965e-05, "loss": 3.25643310546875, "step": 111420 }, { "epoch": 0.1202, "grad_norm": 1.327568769454956, "learning_rate": 9.205080765767266e-05, "loss": 3.015035057067871, "step": 111430 }, { "epoch": 0.12026666666666666, "grad_norm": 0.1666574627161026, "learning_rate": 9.204484301099763e-05, "loss": 3.15277156829834, "step": 111440 }, { "epoch": 0.12033333333333333, "grad_norm": 0.16911719739437103, "learning_rate": 9.203887632078445e-05, "loss": 3.23890380859375, "step": 111450 }, { "epoch": 0.1204, "grad_norm": 0.16492924094200134, "learning_rate": 9.203290758732312e-05, "loss": 3.2163280487060546, "step": 111460 }, { "epoch": 0.12046666666666667, "grad_norm": 0.20271241664886475, "learning_rate": 9.202693681090373e-05, "loss": 3.251280975341797, "step": 111470 }, { "epoch": 0.12053333333333334, "grad_norm": 0.3525190055370331, "learning_rate": 9.202096399181651e-05, "loss": 3.2368053436279296, "step": 111480 }, { "epoch": 0.1206, "grad_norm": 0.21899202466011047, "learning_rate": 9.201498913035175e-05, "loss": 3.235042190551758, "step": 111490 }, { "epoch": 0.12066666666666667, "grad_norm": 0.17557376623153687, "learning_rate": 9.200901222679985e-05, "loss": 3.233653259277344, "step": 111500 }, { "epoch": 0.12073333333333333, "grad_norm": 0.24472278356552124, "learning_rate": 9.20030332814513e-05, "loss": 3.196592903137207, "step": 111510 }, { "epoch": 0.1208, "grad_norm": 0.3189507722854614, "learning_rate": 9.199705229459672e-05, "loss": 3.150381660461426, "step": 111520 }, { "epoch": 0.12086666666666666, "grad_norm": 0.18759486079216003, "learning_rate": 9.199106926652678e-05, "loss": 3.2248607635498048, "step": 111530 }, { "epoch": 0.12093333333333334, "grad_norm": 0.1687501221895218, "learning_rate": 9.198508419753231e-05, "loss": 3.24305419921875, "step": 111540 }, { "epoch": 0.121, "grad_norm": 0.17173095047473907, "learning_rate": 9.19790970879042e-05, "loss": 3.231243896484375, "step": 111550 }, { "epoch": 0.12106666666666667, "grad_norm": 0.17069193720817566, "learning_rate": 9.197310793793343e-05, "loss": 3.2590118408203126, "step": 111560 }, { "epoch": 0.12113333333333333, "grad_norm": 0.24483971297740936, "learning_rate": 9.19671167479111e-05, "loss": 3.2371829986572265, "step": 111570 }, { "epoch": 0.1212, "grad_norm": 0.20851649343967438, "learning_rate": 9.19611235181284e-05, "loss": 3.1167606353759765, "step": 111580 }, { "epoch": 0.12126666666666666, "grad_norm": 0.1915956288576126, "learning_rate": 9.195512824887667e-05, "loss": 3.192208099365234, "step": 111590 }, { "epoch": 0.12133333333333333, "grad_norm": 0.17841607332229614, "learning_rate": 9.194913094044723e-05, "loss": 3.17147216796875, "step": 111600 }, { "epoch": 0.1214, "grad_norm": 0.16840782761573792, "learning_rate": 9.194313159313161e-05, "loss": 3.2042407989501953, "step": 111610 }, { "epoch": 0.12146666666666667, "grad_norm": 0.2175535410642624, "learning_rate": 9.19371302072214e-05, "loss": 3.196787452697754, "step": 111620 }, { "epoch": 0.12153333333333333, "grad_norm": 0.17690078914165497, "learning_rate": 9.193112678300828e-05, "loss": 3.2403987884521483, "step": 111630 }, { "epoch": 0.1216, "grad_norm": 0.16964232921600342, "learning_rate": 9.192512132078405e-05, "loss": 3.2102848052978517, "step": 111640 }, { "epoch": 0.12166666666666667, "grad_norm": 0.17691776156425476, "learning_rate": 9.191911382084061e-05, "loss": 3.174741744995117, "step": 111650 }, { "epoch": 0.12173333333333333, "grad_norm": 0.18694111704826355, "learning_rate": 9.191310428346992e-05, "loss": 3.18184757232666, "step": 111660 }, { "epoch": 0.1218, "grad_norm": 0.18384389579296112, "learning_rate": 9.190709270896407e-05, "loss": 3.179962921142578, "step": 111670 }, { "epoch": 0.12186666666666666, "grad_norm": 0.18085302412509918, "learning_rate": 9.190107909761527e-05, "loss": 3.1611557006835938, "step": 111680 }, { "epoch": 0.12193333333333334, "grad_norm": 0.20091961324214935, "learning_rate": 9.189506344971578e-05, "loss": 3.2287296295166015, "step": 111690 }, { "epoch": 0.122, "grad_norm": 0.18476040661334991, "learning_rate": 9.1889045765558e-05, "loss": 3.2361576080322267, "step": 111700 }, { "epoch": 0.12206666666666667, "grad_norm": 0.1894243061542511, "learning_rate": 9.188302604543438e-05, "loss": 3.233539581298828, "step": 111710 }, { "epoch": 0.12213333333333333, "grad_norm": 0.1938467025756836, "learning_rate": 9.187700428963753e-05, "loss": 3.2609710693359375, "step": 111720 }, { "epoch": 0.1222, "grad_norm": 0.1664406955242157, "learning_rate": 9.187098049846013e-05, "loss": 3.170698547363281, "step": 111730 }, { "epoch": 0.12226666666666666, "grad_norm": 0.16531212627887726, "learning_rate": 9.186495467219496e-05, "loss": 3.245777893066406, "step": 111740 }, { "epoch": 0.12233333333333334, "grad_norm": 0.17001208662986755, "learning_rate": 9.185892681113488e-05, "loss": 3.173684310913086, "step": 111750 }, { "epoch": 0.1224, "grad_norm": 0.1963704228401184, "learning_rate": 9.185289691557289e-05, "loss": 3.134219741821289, "step": 111760 }, { "epoch": 0.12246666666666667, "grad_norm": 0.17983707785606384, "learning_rate": 9.184686498580203e-05, "loss": 3.1770179748535154, "step": 111770 }, { "epoch": 0.12253333333333333, "grad_norm": 0.1690322458744049, "learning_rate": 9.184083102211552e-05, "loss": 3.153815269470215, "step": 111780 }, { "epoch": 0.1226, "grad_norm": 0.1797688752412796, "learning_rate": 9.183479502480661e-05, "loss": 3.1881683349609373, "step": 111790 }, { "epoch": 0.12266666666666666, "grad_norm": 0.2092755138874054, "learning_rate": 9.182875699416866e-05, "loss": 3.211425018310547, "step": 111800 }, { "epoch": 0.12273333333333333, "grad_norm": 0.17318150401115417, "learning_rate": 9.182271693049517e-05, "loss": 3.1946636199951173, "step": 111810 }, { "epoch": 0.1228, "grad_norm": 0.20933619141578674, "learning_rate": 9.181667483407968e-05, "loss": 3.2620265960693358, "step": 111820 }, { "epoch": 0.12286666666666667, "grad_norm": 0.21184378862380981, "learning_rate": 9.181063070521588e-05, "loss": 3.1933542251586915, "step": 111830 }, { "epoch": 0.12293333333333334, "grad_norm": 0.1723550260066986, "learning_rate": 9.180458454419754e-05, "loss": 3.2189640045166015, "step": 111840 }, { "epoch": 0.123, "grad_norm": 0.2228795289993286, "learning_rate": 9.179853635131849e-05, "loss": 3.137780952453613, "step": 111850 }, { "epoch": 0.12306666666666667, "grad_norm": 0.1712978035211563, "learning_rate": 9.179248612687274e-05, "loss": 3.2594284057617187, "step": 111860 }, { "epoch": 0.12313333333333333, "grad_norm": 0.17970669269561768, "learning_rate": 9.178643387115435e-05, "loss": 3.174802780151367, "step": 111870 }, { "epoch": 0.1232, "grad_norm": 0.16448679566383362, "learning_rate": 9.178037958445745e-05, "loss": 3.1961090087890627, "step": 111880 }, { "epoch": 0.12326666666666666, "grad_norm": 0.17198264598846436, "learning_rate": 9.177432326707632e-05, "loss": 3.227538299560547, "step": 111890 }, { "epoch": 0.12333333333333334, "grad_norm": 0.21197731792926788, "learning_rate": 9.176826491930533e-05, "loss": 3.1415964126586915, "step": 111900 }, { "epoch": 0.1234, "grad_norm": 0.17070335149765015, "learning_rate": 9.176220454143891e-05, "loss": 3.219564437866211, "step": 111910 }, { "epoch": 0.12346666666666667, "grad_norm": 0.17678944766521454, "learning_rate": 9.175614213377166e-05, "loss": 3.217927932739258, "step": 111920 }, { "epoch": 0.12353333333333333, "grad_norm": 0.16967208683490753, "learning_rate": 9.175007769659819e-05, "loss": 3.2160293579101564, "step": 111930 }, { "epoch": 0.1236, "grad_norm": 0.45333993434906006, "learning_rate": 9.174401123021327e-05, "loss": 3.302455520629883, "step": 111940 }, { "epoch": 0.12366666666666666, "grad_norm": 0.17688079178333282, "learning_rate": 9.173794273491179e-05, "loss": 3.137138557434082, "step": 111950 }, { "epoch": 0.12373333333333333, "grad_norm": 0.5639349818229675, "learning_rate": 9.173187221098865e-05, "loss": 3.1842557907104494, "step": 111960 }, { "epoch": 0.1238, "grad_norm": 0.18858906626701355, "learning_rate": 9.172579965873893e-05, "loss": 3.186284065246582, "step": 111970 }, { "epoch": 0.12386666666666667, "grad_norm": 0.17198888957500458, "learning_rate": 9.171972507845776e-05, "loss": 3.1738361358642577, "step": 111980 }, { "epoch": 0.12393333333333334, "grad_norm": 0.1681160032749176, "learning_rate": 9.17136484704404e-05, "loss": 3.2633644104003907, "step": 111990 }, { "epoch": 0.124, "grad_norm": 0.17795506119728088, "learning_rate": 9.170756983498219e-05, "loss": 3.2135459899902346, "step": 112000 }, { "epoch": 0.12406666666666667, "grad_norm": 0.16644634306430817, "learning_rate": 9.170148917237858e-05, "loss": 3.151529884338379, "step": 112010 }, { "epoch": 0.12413333333333333, "grad_norm": 0.1813119351863861, "learning_rate": 9.169540648292511e-05, "loss": 3.180362319946289, "step": 112020 }, { "epoch": 0.1242, "grad_norm": 0.18978190422058105, "learning_rate": 9.168932176691744e-05, "loss": 3.2166301727294924, "step": 112030 }, { "epoch": 0.12426666666666666, "grad_norm": 0.1855314075946808, "learning_rate": 9.168323502465128e-05, "loss": 3.1881370544433594, "step": 112040 }, { "epoch": 0.12433333333333334, "grad_norm": 0.18715757131576538, "learning_rate": 9.167714625642247e-05, "loss": 3.1764408111572267, "step": 112050 }, { "epoch": 0.1244, "grad_norm": 0.19104529917240143, "learning_rate": 9.167105546252698e-05, "loss": 3.200644683837891, "step": 112060 }, { "epoch": 0.12446666666666667, "grad_norm": 0.19038017094135284, "learning_rate": 9.166496264326082e-05, "loss": 3.2686782836914063, "step": 112070 }, { "epoch": 0.12453333333333333, "grad_norm": 0.1656234860420227, "learning_rate": 9.165886779892012e-05, "loss": 3.167560577392578, "step": 112080 }, { "epoch": 0.1246, "grad_norm": 0.173201322555542, "learning_rate": 9.165277092980114e-05, "loss": 3.220260238647461, "step": 112090 }, { "epoch": 0.12466666666666666, "grad_norm": 0.1828765869140625, "learning_rate": 9.164667203620016e-05, "loss": 3.2029067993164064, "step": 112100 }, { "epoch": 0.12473333333333333, "grad_norm": 0.1969306915998459, "learning_rate": 9.164057111841368e-05, "loss": 3.1892726898193358, "step": 112110 }, { "epoch": 0.1248, "grad_norm": 0.17336618900299072, "learning_rate": 9.163446817673817e-05, "loss": 3.2022586822509767, "step": 112120 }, { "epoch": 0.12486666666666667, "grad_norm": 0.1779400259256363, "learning_rate": 9.162836321147026e-05, "loss": 3.200323486328125, "step": 112130 }, { "epoch": 0.12493333333333333, "grad_norm": 0.20036451518535614, "learning_rate": 9.162225622290671e-05, "loss": 3.1662864685058594, "step": 112140 }, { "epoch": 0.125, "grad_norm": 0.17832280695438385, "learning_rate": 9.161614721134432e-05, "loss": 3.2955181121826174, "step": 112150 }, { "epoch": 0.12506666666666666, "grad_norm": 0.1814066767692566, "learning_rate": 9.161003617708001e-05, "loss": 3.188141441345215, "step": 112160 }, { "epoch": 0.12513333333333335, "grad_norm": 0.1910819560289383, "learning_rate": 9.16039231204108e-05, "loss": 3.1291236877441406, "step": 112170 }, { "epoch": 0.1252, "grad_norm": 0.18864473700523376, "learning_rate": 9.15978080416338e-05, "loss": 3.2372467041015627, "step": 112180 }, { "epoch": 0.12526666666666667, "grad_norm": 0.1850244700908661, "learning_rate": 9.159169094104625e-05, "loss": 3.4902416229248048, "step": 112190 }, { "epoch": 0.12533333333333332, "grad_norm": 0.17509028315544128, "learning_rate": 9.158557181894545e-05, "loss": 3.202339935302734, "step": 112200 }, { "epoch": 0.1254, "grad_norm": 0.18536582589149475, "learning_rate": 9.157945067562881e-05, "loss": 3.199332046508789, "step": 112210 }, { "epoch": 0.12546666666666667, "grad_norm": 0.1661641150712967, "learning_rate": 9.157332751139383e-05, "loss": 3.2391632080078123, "step": 112220 }, { "epoch": 0.12553333333333333, "grad_norm": 0.17555277049541473, "learning_rate": 9.156720232653815e-05, "loss": 3.2652351379394533, "step": 112230 }, { "epoch": 0.1256, "grad_norm": 0.6143915057182312, "learning_rate": 9.156107512135945e-05, "loss": 3.1560361862182615, "step": 112240 }, { "epoch": 0.12566666666666668, "grad_norm": 0.189873605966568, "learning_rate": 9.155494589615555e-05, "loss": 3.261944580078125, "step": 112250 }, { "epoch": 0.12573333333333334, "grad_norm": 0.18809537589550018, "learning_rate": 9.154881465122435e-05, "loss": 3.2358169555664062, "step": 112260 }, { "epoch": 0.1258, "grad_norm": 0.17801901698112488, "learning_rate": 9.154268138686386e-05, "loss": 3.244751739501953, "step": 112270 }, { "epoch": 0.12586666666666665, "grad_norm": 0.1870117038488388, "learning_rate": 9.153654610337214e-05, "loss": 3.1997940063476564, "step": 112280 }, { "epoch": 0.12593333333333334, "grad_norm": 0.1782100349664688, "learning_rate": 9.153040880104744e-05, "loss": 3.1507619857788085, "step": 112290 }, { "epoch": 0.126, "grad_norm": 0.1855946183204651, "learning_rate": 9.152426948018802e-05, "loss": 3.1772830963134764, "step": 112300 }, { "epoch": 0.12606666666666666, "grad_norm": 0.1768273413181305, "learning_rate": 9.15181281410923e-05, "loss": 3.206709289550781, "step": 112310 }, { "epoch": 0.12613333333333332, "grad_norm": 0.2009982168674469, "learning_rate": 9.151198478405875e-05, "loss": 3.2108695983886717, "step": 112320 }, { "epoch": 0.1262, "grad_norm": 0.1670883148908615, "learning_rate": 9.150583940938598e-05, "loss": 3.189632797241211, "step": 112330 }, { "epoch": 0.12626666666666667, "grad_norm": 0.770788848400116, "learning_rate": 9.149969201737267e-05, "loss": 3.308798599243164, "step": 112340 }, { "epoch": 0.12633333333333333, "grad_norm": 0.19286251068115234, "learning_rate": 9.14935426083176e-05, "loss": 3.1551946640014648, "step": 112350 }, { "epoch": 0.1264, "grad_norm": 0.1830107569694519, "learning_rate": 9.148739118251966e-05, "loss": 3.209461212158203, "step": 112360 }, { "epoch": 0.12646666666666667, "grad_norm": 0.4746004045009613, "learning_rate": 9.148123774027782e-05, "loss": 3.220672607421875, "step": 112370 }, { "epoch": 0.12653333333333333, "grad_norm": 0.18054059147834778, "learning_rate": 9.14750822818912e-05, "loss": 3.2473567962646483, "step": 112380 }, { "epoch": 0.1266, "grad_norm": 0.1803295910358429, "learning_rate": 9.146892480765892e-05, "loss": 3.2473915100097654, "step": 112390 }, { "epoch": 0.12666666666666668, "grad_norm": 0.18173012137413025, "learning_rate": 9.146276531788032e-05, "loss": 3.2134468078613283, "step": 112400 }, { "epoch": 0.12673333333333334, "grad_norm": 0.17763535678386688, "learning_rate": 9.145660381285471e-05, "loss": 3.3383079528808595, "step": 112410 }, { "epoch": 0.1268, "grad_norm": 0.16866415739059448, "learning_rate": 9.145044029288161e-05, "loss": 3.1969942092895507, "step": 112420 }, { "epoch": 0.12686666666666666, "grad_norm": 0.1838809698820114, "learning_rate": 9.144427475826058e-05, "loss": 3.165110778808594, "step": 112430 }, { "epoch": 0.12693333333333334, "grad_norm": 0.18570809066295624, "learning_rate": 9.143810720929129e-05, "loss": 3.2438304901123045, "step": 112440 }, { "epoch": 0.127, "grad_norm": 0.17428793013095856, "learning_rate": 9.143193764627348e-05, "loss": 3.2016574859619142, "step": 112450 }, { "epoch": 0.12706666666666666, "grad_norm": 0.17488564550876617, "learning_rate": 9.142576606950704e-05, "loss": 3.1855581283569334, "step": 112460 }, { "epoch": 0.12713333333333332, "grad_norm": 0.1769166886806488, "learning_rate": 9.141959247929193e-05, "loss": 3.1619800567626952, "step": 112470 }, { "epoch": 0.1272, "grad_norm": 0.1921638697385788, "learning_rate": 9.14134168759282e-05, "loss": 3.201795959472656, "step": 112480 }, { "epoch": 0.12726666666666667, "grad_norm": 0.19938601553440094, "learning_rate": 9.140723925971602e-05, "loss": 3.221731185913086, "step": 112490 }, { "epoch": 0.12733333333333333, "grad_norm": 0.1884896457195282, "learning_rate": 9.140105963095564e-05, "loss": 3.2144397735595702, "step": 112500 }, { "epoch": 0.1274, "grad_norm": 0.1795593500137329, "learning_rate": 9.139487798994739e-05, "loss": 3.1826641082763674, "step": 112510 }, { "epoch": 0.12746666666666667, "grad_norm": 0.4091011583805084, "learning_rate": 9.138869433699178e-05, "loss": 3.234404754638672, "step": 112520 }, { "epoch": 0.12753333333333333, "grad_norm": 0.16493229568004608, "learning_rate": 9.138250867238929e-05, "loss": 3.173012924194336, "step": 112530 }, { "epoch": 0.1276, "grad_norm": 0.17220903933048248, "learning_rate": 9.137632099644061e-05, "loss": 3.2449737548828126, "step": 112540 }, { "epoch": 0.12766666666666668, "grad_norm": 0.1906852126121521, "learning_rate": 9.137013130944647e-05, "loss": 3.2313941955566405, "step": 112550 }, { "epoch": 0.12773333333333334, "grad_norm": 0.17561602592468262, "learning_rate": 9.136393961170772e-05, "loss": 3.256842041015625, "step": 112560 }, { "epoch": 0.1278, "grad_norm": 0.18433788418769836, "learning_rate": 9.135774590352528e-05, "loss": 3.1840864181518556, "step": 112570 }, { "epoch": 0.12786666666666666, "grad_norm": 0.17279337346553802, "learning_rate": 9.135155018520023e-05, "loss": 3.187165451049805, "step": 112580 }, { "epoch": 0.12793333333333334, "grad_norm": 0.195037379860878, "learning_rate": 9.134535245703367e-05, "loss": 3.2171554565429688, "step": 112590 }, { "epoch": 0.128, "grad_norm": 0.24321947991847992, "learning_rate": 9.133915271932682e-05, "loss": 3.2502262115478517, "step": 112600 }, { "epoch": 0.12806666666666666, "grad_norm": 0.20539063215255737, "learning_rate": 9.133295097238104e-05, "loss": 3.3009159088134767, "step": 112610 }, { "epoch": 0.12813333333333332, "grad_norm": 0.1914736032485962, "learning_rate": 9.132674721649775e-05, "loss": 3.2300235748291017, "step": 112620 }, { "epoch": 0.1282, "grad_norm": 0.2713460624217987, "learning_rate": 9.132054145197848e-05, "loss": 3.4413421630859373, "step": 112630 }, { "epoch": 0.12826666666666667, "grad_norm": 0.1718793511390686, "learning_rate": 9.131433367912486e-05, "loss": 3.279451370239258, "step": 112640 }, { "epoch": 0.12833333333333333, "grad_norm": 0.1742272973060608, "learning_rate": 9.130812389823858e-05, "loss": 3.2277122497558595, "step": 112650 }, { "epoch": 0.1284, "grad_norm": 0.19044530391693115, "learning_rate": 9.130191210962149e-05, "loss": 3.248968505859375, "step": 112660 }, { "epoch": 0.12846666666666667, "grad_norm": 0.17481181025505066, "learning_rate": 9.129569831357549e-05, "loss": 3.2411270141601562, "step": 112670 }, { "epoch": 0.12853333333333333, "grad_norm": 0.2313205897808075, "learning_rate": 9.128948251040262e-05, "loss": 3.1697996139526365, "step": 112680 }, { "epoch": 0.1286, "grad_norm": 0.1728314906358719, "learning_rate": 9.128326470040495e-05, "loss": 3.2771602630615235, "step": 112690 }, { "epoch": 0.12866666666666668, "grad_norm": 0.18378204107284546, "learning_rate": 9.127704488388471e-05, "loss": 3.1674095153808595, "step": 112700 }, { "epoch": 0.12873333333333334, "grad_norm": 0.19730857014656067, "learning_rate": 9.127082306114422e-05, "loss": 3.2010524749755858, "step": 112710 }, { "epoch": 0.1288, "grad_norm": 0.19529220461845398, "learning_rate": 9.126459923248586e-05, "loss": 3.1797792434692385, "step": 112720 }, { "epoch": 0.12886666666666666, "grad_norm": 0.20870976150035858, "learning_rate": 9.125837339821214e-05, "loss": 3.1775665283203125, "step": 112730 }, { "epoch": 0.12893333333333334, "grad_norm": 0.1851048320531845, "learning_rate": 9.125214555862567e-05, "loss": 3.2170745849609377, "step": 112740 }, { "epoch": 0.129, "grad_norm": 0.1788458526134491, "learning_rate": 9.124591571402914e-05, "loss": 3.235788345336914, "step": 112750 }, { "epoch": 0.12906666666666666, "grad_norm": 0.16778619587421417, "learning_rate": 9.123968386472533e-05, "loss": 3.217586135864258, "step": 112760 }, { "epoch": 0.12913333333333332, "grad_norm": 0.1839594691991806, "learning_rate": 9.123345001101715e-05, "loss": 3.2219879150390627, "step": 112770 }, { "epoch": 0.1292, "grad_norm": 0.17065726220607758, "learning_rate": 9.122721415320759e-05, "loss": 3.200637435913086, "step": 112780 }, { "epoch": 0.12926666666666667, "grad_norm": 0.18054766952991486, "learning_rate": 9.122097629159971e-05, "loss": 3.1877891540527346, "step": 112790 }, { "epoch": 0.12933333333333333, "grad_norm": 0.16712810099124908, "learning_rate": 9.121473642649674e-05, "loss": 3.179124450683594, "step": 112800 }, { "epoch": 0.1294, "grad_norm": 0.46022871136665344, "learning_rate": 9.120849455820191e-05, "loss": 3.1668859481811524, "step": 112810 }, { "epoch": 0.12946666666666667, "grad_norm": 0.2039223164319992, "learning_rate": 9.120225068701861e-05, "loss": 3.2328968048095703, "step": 112820 }, { "epoch": 0.12953333333333333, "grad_norm": 0.19142772257328033, "learning_rate": 9.119600481325036e-05, "loss": 3.2289573669433596, "step": 112830 }, { "epoch": 0.1296, "grad_norm": 0.17681922018527985, "learning_rate": 9.118975693720069e-05, "loss": 3.185886764526367, "step": 112840 }, { "epoch": 0.12966666666666668, "grad_norm": 0.19459176063537598, "learning_rate": 9.118350705917327e-05, "loss": 3.205418014526367, "step": 112850 }, { "epoch": 0.12973333333333334, "grad_norm": 0.18502797186374664, "learning_rate": 9.117725517947188e-05, "loss": 3.0844961166381837, "step": 112860 }, { "epoch": 0.1298, "grad_norm": 0.17093487083911896, "learning_rate": 9.117100129840039e-05, "loss": 3.183925437927246, "step": 112870 }, { "epoch": 0.12986666666666666, "grad_norm": 0.6055653691291809, "learning_rate": 9.116474541626277e-05, "loss": 3.3043277740478514, "step": 112880 }, { "epoch": 0.12993333333333335, "grad_norm": 0.1970231831073761, "learning_rate": 9.115848753336303e-05, "loss": 3.2429759979248045, "step": 112890 }, { "epoch": 0.13, "grad_norm": 0.16846957802772522, "learning_rate": 9.115222765000538e-05, "loss": 3.233433151245117, "step": 112900 }, { "epoch": 0.13006666666666666, "grad_norm": 0.16942036151885986, "learning_rate": 9.114596576649406e-05, "loss": 3.217433547973633, "step": 112910 }, { "epoch": 0.13013333333333332, "grad_norm": 0.18028593063354492, "learning_rate": 9.113970188313341e-05, "loss": 3.2230449676513673, "step": 112920 }, { "epoch": 0.1302, "grad_norm": 0.17334508895874023, "learning_rate": 9.113343600022789e-05, "loss": 3.223783493041992, "step": 112930 }, { "epoch": 0.13026666666666667, "grad_norm": 0.18171875178813934, "learning_rate": 9.112716811808203e-05, "loss": 3.231059265136719, "step": 112940 }, { "epoch": 0.13033333333333333, "grad_norm": 0.1983022540807724, "learning_rate": 9.11208982370005e-05, "loss": 3.171686363220215, "step": 112950 }, { "epoch": 0.1304, "grad_norm": 0.17307350039482117, "learning_rate": 9.111462635728803e-05, "loss": 3.213716506958008, "step": 112960 }, { "epoch": 0.13046666666666668, "grad_norm": 0.5150696635246277, "learning_rate": 9.110835247924942e-05, "loss": 3.177633857727051, "step": 112970 }, { "epoch": 0.13053333333333333, "grad_norm": 0.26395556330680847, "learning_rate": 9.110207660318966e-05, "loss": 3.2012271881103516, "step": 112980 }, { "epoch": 0.1306, "grad_norm": 0.21589502692222595, "learning_rate": 9.109579872941374e-05, "loss": 3.215406036376953, "step": 112990 }, { "epoch": 0.13066666666666665, "grad_norm": 0.3070248067378998, "learning_rate": 9.10895188582268e-05, "loss": 3.230615234375, "step": 113000 }, { "epoch": 0.13073333333333334, "grad_norm": 0.17308887839317322, "learning_rate": 9.10832369899341e-05, "loss": 3.173346519470215, "step": 113010 }, { "epoch": 0.1308, "grad_norm": 0.23106347024440765, "learning_rate": 9.10769531248409e-05, "loss": 3.1923303604125977, "step": 113020 }, { "epoch": 0.13086666666666666, "grad_norm": 0.1701957881450653, "learning_rate": 9.107066726325267e-05, "loss": 3.1917856216430662, "step": 113030 }, { "epoch": 0.13093333333333335, "grad_norm": 0.18841411173343658, "learning_rate": 9.106437940547491e-05, "loss": 3.2415897369384767, "step": 113040 }, { "epoch": 0.131, "grad_norm": 0.21292553842067719, "learning_rate": 9.105808955181323e-05, "loss": 3.045858955383301, "step": 113050 }, { "epoch": 0.13106666666666666, "grad_norm": 0.18023090064525604, "learning_rate": 9.105179770257333e-05, "loss": 3.256398391723633, "step": 113060 }, { "epoch": 0.13113333333333332, "grad_norm": 0.2055298388004303, "learning_rate": 9.104550385806103e-05, "loss": 3.2250396728515627, "step": 113070 }, { "epoch": 0.1312, "grad_norm": 0.17475678026676178, "learning_rate": 9.103920801858225e-05, "loss": 3.261698913574219, "step": 113080 }, { "epoch": 0.13126666666666667, "grad_norm": 0.17855167388916016, "learning_rate": 9.103291018444297e-05, "loss": 3.207578659057617, "step": 113090 }, { "epoch": 0.13133333333333333, "grad_norm": 0.4113374650478363, "learning_rate": 9.10266103559493e-05, "loss": 3.2358306884765624, "step": 113100 }, { "epoch": 0.1314, "grad_norm": 0.19640536606311798, "learning_rate": 9.102030853340743e-05, "loss": 3.2008617401123045, "step": 113110 }, { "epoch": 0.13146666666666668, "grad_norm": 0.1936882883310318, "learning_rate": 9.101400471712364e-05, "loss": 3.2721485137939452, "step": 113120 }, { "epoch": 0.13153333333333334, "grad_norm": 0.1764295995235443, "learning_rate": 9.100769890740435e-05, "loss": 3.228588104248047, "step": 113130 }, { "epoch": 0.1316, "grad_norm": 0.17790333926677704, "learning_rate": 9.100139110455603e-05, "loss": 3.2047565460205076, "step": 113140 }, { "epoch": 0.13166666666666665, "grad_norm": 0.17653606832027435, "learning_rate": 9.099508130888524e-05, "loss": 3.2675209045410156, "step": 113150 }, { "epoch": 0.13173333333333334, "grad_norm": 0.16932667791843414, "learning_rate": 9.09887695206987e-05, "loss": 3.215300369262695, "step": 113160 }, { "epoch": 0.1318, "grad_norm": 0.20065909624099731, "learning_rate": 9.098245574030315e-05, "loss": 3.1823236465454103, "step": 113170 }, { "epoch": 0.13186666666666666, "grad_norm": 0.19858765602111816, "learning_rate": 9.097613996800549e-05, "loss": 3.170137405395508, "step": 113180 }, { "epoch": 0.13193333333333335, "grad_norm": 0.1749795377254486, "learning_rate": 9.09698222041127e-05, "loss": 3.1848285675048826, "step": 113190 }, { "epoch": 0.132, "grad_norm": 0.18668027222156525, "learning_rate": 9.096350244893182e-05, "loss": 3.231436920166016, "step": 113200 }, { "epoch": 0.13206666666666667, "grad_norm": 0.23210349678993225, "learning_rate": 9.095718070277001e-05, "loss": 3.2399707794189454, "step": 113210 }, { "epoch": 0.13213333333333332, "grad_norm": 0.2070767730474472, "learning_rate": 9.095085696593455e-05, "loss": 3.159801483154297, "step": 113220 }, { "epoch": 0.1322, "grad_norm": 0.18630200624465942, "learning_rate": 9.094453123873279e-05, "loss": 3.1840097427368166, "step": 113230 }, { "epoch": 0.13226666666666667, "grad_norm": 0.1910122036933899, "learning_rate": 9.093820352147219e-05, "loss": 3.1370716094970703, "step": 113240 }, { "epoch": 0.13233333333333333, "grad_norm": 0.1869906485080719, "learning_rate": 9.093187381446028e-05, "loss": 3.164831352233887, "step": 113250 }, { "epoch": 0.1324, "grad_norm": 0.1781224012374878, "learning_rate": 9.092554211800474e-05, "loss": 3.2084629058837892, "step": 113260 }, { "epoch": 0.13246666666666668, "grad_norm": 0.1806883066892624, "learning_rate": 9.091920843241331e-05, "loss": 3.208434295654297, "step": 113270 }, { "epoch": 0.13253333333333334, "grad_norm": 0.23964190483093262, "learning_rate": 9.09128727579938e-05, "loss": 3.184771728515625, "step": 113280 }, { "epoch": 0.1326, "grad_norm": 0.18390165269374847, "learning_rate": 9.090653509505418e-05, "loss": 3.179881477355957, "step": 113290 }, { "epoch": 0.13266666666666665, "grad_norm": 0.18001873791217804, "learning_rate": 9.090019544390246e-05, "loss": 3.146690559387207, "step": 113300 }, { "epoch": 0.13273333333333334, "grad_norm": 0.17776484787464142, "learning_rate": 9.08938538048468e-05, "loss": 3.1711780548095705, "step": 113310 }, { "epoch": 0.1328, "grad_norm": 0.1867818534374237, "learning_rate": 9.08875101781954e-05, "loss": 3.1843517303466795, "step": 113320 }, { "epoch": 0.13286666666666666, "grad_norm": 0.20086845755577087, "learning_rate": 9.088116456425659e-05, "loss": 3.2419551849365233, "step": 113330 }, { "epoch": 0.13293333333333332, "grad_norm": 0.22052516043186188, "learning_rate": 9.08748169633388e-05, "loss": 3.1849971771240235, "step": 113340 }, { "epoch": 0.133, "grad_norm": 0.19859808683395386, "learning_rate": 9.086846737575054e-05, "loss": 3.1890838623046873, "step": 113350 }, { "epoch": 0.13306666666666667, "grad_norm": 0.19316038489341736, "learning_rate": 9.086211580180044e-05, "loss": 3.1868522644042967, "step": 113360 }, { "epoch": 0.13313333333333333, "grad_norm": 0.1895970106124878, "learning_rate": 9.085576224179718e-05, "loss": 3.194872283935547, "step": 113370 }, { "epoch": 0.1332, "grad_norm": 0.18416474759578705, "learning_rate": 9.08494066960496e-05, "loss": 3.1911964416503906, "step": 113380 }, { "epoch": 0.13326666666666667, "grad_norm": 0.1822071075439453, "learning_rate": 9.084304916486657e-05, "loss": 3.183609962463379, "step": 113390 }, { "epoch": 0.13333333333333333, "grad_norm": 0.20603902637958527, "learning_rate": 9.083668964855712e-05, "loss": 3.21002197265625, "step": 113400 }, { "epoch": 0.1334, "grad_norm": 0.6076306104660034, "learning_rate": 9.083032814743033e-05, "loss": 3.2904727935791014, "step": 113410 }, { "epoch": 0.13346666666666668, "grad_norm": 0.1847466379404068, "learning_rate": 9.082396466179538e-05, "loss": 3.248929977416992, "step": 113420 }, { "epoch": 0.13353333333333334, "grad_norm": 0.1810804158449173, "learning_rate": 9.081759919196161e-05, "loss": 3.2467079162597656, "step": 113430 }, { "epoch": 0.1336, "grad_norm": 0.16459889709949493, "learning_rate": 9.081123173823836e-05, "loss": 3.1763912200927735, "step": 113440 }, { "epoch": 0.13366666666666666, "grad_norm": 0.1811525970697403, "learning_rate": 9.080486230093512e-05, "loss": 3.1693220138549805, "step": 113450 }, { "epoch": 0.13373333333333334, "grad_norm": 0.17141661047935486, "learning_rate": 9.079849088036147e-05, "loss": 3.2004619598388673, "step": 113460 }, { "epoch": 0.1338, "grad_norm": 0.16907645761966705, "learning_rate": 9.07921174768271e-05, "loss": 3.180263137817383, "step": 113470 }, { "epoch": 0.13386666666666666, "grad_norm": 0.18556618690490723, "learning_rate": 9.078574209064175e-05, "loss": 3.1830474853515627, "step": 113480 }, { "epoch": 0.13393333333333332, "grad_norm": 0.1725897639989853, "learning_rate": 9.077936472211532e-05, "loss": 3.150063133239746, "step": 113490 }, { "epoch": 0.134, "grad_norm": 0.21068304777145386, "learning_rate": 9.077298537155778e-05, "loss": 3.2592288970947267, "step": 113500 }, { "epoch": 0.13406666666666667, "grad_norm": 0.17300409078598022, "learning_rate": 9.076660403927914e-05, "loss": 3.1738107681274412, "step": 113510 }, { "epoch": 0.13413333333333333, "grad_norm": 0.1929457038640976, "learning_rate": 9.076022072558961e-05, "loss": 3.170249366760254, "step": 113520 }, { "epoch": 0.1342, "grad_norm": 0.19834600389003754, "learning_rate": 9.075383543079943e-05, "loss": 3.171519470214844, "step": 113530 }, { "epoch": 0.13426666666666667, "grad_norm": 0.45577773451805115, "learning_rate": 9.074744815521891e-05, "loss": 3.318266677856445, "step": 113540 }, { "epoch": 0.13433333333333333, "grad_norm": 0.1690557599067688, "learning_rate": 9.074105889915856e-05, "loss": 3.22008056640625, "step": 113550 }, { "epoch": 0.1344, "grad_norm": 0.16922929883003235, "learning_rate": 9.073466766292888e-05, "loss": 3.293321228027344, "step": 113560 }, { "epoch": 0.13446666666666668, "grad_norm": 0.17941083014011383, "learning_rate": 9.072827444684051e-05, "loss": 3.1760496139526366, "step": 113570 }, { "epoch": 0.13453333333333334, "grad_norm": 0.18559306859970093, "learning_rate": 9.07218792512042e-05, "loss": 3.25958137512207, "step": 113580 }, { "epoch": 0.1346, "grad_norm": 0.18160293996334076, "learning_rate": 9.071548207633077e-05, "loss": 3.203842544555664, "step": 113590 }, { "epoch": 0.13466666666666666, "grad_norm": 0.18580099940299988, "learning_rate": 9.070908292253115e-05, "loss": 3.1759241104125975, "step": 113600 }, { "epoch": 0.13473333333333334, "grad_norm": 0.1852956861257553, "learning_rate": 9.070268179011637e-05, "loss": 3.1352943420410155, "step": 113610 }, { "epoch": 0.1348, "grad_norm": 0.16744637489318848, "learning_rate": 9.069627867939754e-05, "loss": 3.2169933319091797, "step": 113620 }, { "epoch": 0.13486666666666666, "grad_norm": 0.17488180100917816, "learning_rate": 9.068987359068587e-05, "loss": 3.1901731491088867, "step": 113630 }, { "epoch": 0.13493333333333332, "grad_norm": 0.17688529193401337, "learning_rate": 9.068346652429268e-05, "loss": 3.1340761184692383, "step": 113640 }, { "epoch": 0.135, "grad_norm": 0.1906961351633072, "learning_rate": 9.067705748052938e-05, "loss": 3.1703218460083007, "step": 113650 }, { "epoch": 0.13506666666666667, "grad_norm": 0.16303399205207825, "learning_rate": 9.067064645970746e-05, "loss": 3.1840171813964844, "step": 113660 }, { "epoch": 0.13513333333333333, "grad_norm": 0.3460107147693634, "learning_rate": 9.066423346213855e-05, "loss": 3.1630191802978516, "step": 113670 }, { "epoch": 0.1352, "grad_norm": 0.29941242933273315, "learning_rate": 9.065781848813432e-05, "loss": 3.2462047576904296, "step": 113680 }, { "epoch": 0.13526666666666667, "grad_norm": 0.17341387271881104, "learning_rate": 9.065140153800656e-05, "loss": 3.244545745849609, "step": 113690 }, { "epoch": 0.13533333333333333, "grad_norm": 0.22260800004005432, "learning_rate": 9.064498261206717e-05, "loss": 3.169811248779297, "step": 113700 }, { "epoch": 0.1354, "grad_norm": 0.20994016528129578, "learning_rate": 9.063856171062813e-05, "loss": 3.1836095809936524, "step": 113710 }, { "epoch": 0.13546666666666668, "grad_norm": 0.1696045994758606, "learning_rate": 9.063213883400153e-05, "loss": 3.177886962890625, "step": 113720 }, { "epoch": 0.13553333333333334, "grad_norm": 0.17984582483768463, "learning_rate": 9.062571398249953e-05, "loss": 3.13616828918457, "step": 113730 }, { "epoch": 0.1356, "grad_norm": 0.18481217324733734, "learning_rate": 9.06192871564344e-05, "loss": 3.1803300857543944, "step": 113740 }, { "epoch": 0.13566666666666666, "grad_norm": 0.19947277009487152, "learning_rate": 9.061285835611853e-05, "loss": 2.780478668212891, "step": 113750 }, { "epoch": 0.13573333333333334, "grad_norm": 0.4731743633747101, "learning_rate": 9.060642758186434e-05, "loss": 3.25928955078125, "step": 113760 }, { "epoch": 0.1358, "grad_norm": 0.18187925219535828, "learning_rate": 9.059999483398446e-05, "loss": 3.222382354736328, "step": 113770 }, { "epoch": 0.13586666666666666, "grad_norm": 0.19144578278064728, "learning_rate": 9.05935601127915e-05, "loss": 3.1907352447509765, "step": 113780 }, { "epoch": 0.13593333333333332, "grad_norm": 0.1840675175189972, "learning_rate": 9.05871234185982e-05, "loss": 3.1247976303100584, "step": 113790 }, { "epoch": 0.136, "grad_norm": 0.19520743191242218, "learning_rate": 9.058068475171742e-05, "loss": 3.211517333984375, "step": 113800 }, { "epoch": 0.13606666666666667, "grad_norm": 0.16642089188098907, "learning_rate": 9.057424411246213e-05, "loss": 3.1919431686401367, "step": 113810 }, { "epoch": 0.13613333333333333, "grad_norm": 0.18517529964447021, "learning_rate": 9.056780150114535e-05, "loss": 3.175059700012207, "step": 113820 }, { "epoch": 0.1362, "grad_norm": 0.1794176697731018, "learning_rate": 9.056135691808019e-05, "loss": 3.261687469482422, "step": 113830 }, { "epoch": 0.13626666666666667, "grad_norm": 0.19457800686359406, "learning_rate": 9.055491036357992e-05, "loss": 3.2881961822509767, "step": 113840 }, { "epoch": 0.13633333333333333, "grad_norm": 0.18108032643795013, "learning_rate": 9.054846183795784e-05, "loss": 3.173267364501953, "step": 113850 }, { "epoch": 0.1364, "grad_norm": 0.18153244256973267, "learning_rate": 9.05420113415274e-05, "loss": 3.247394561767578, "step": 113860 }, { "epoch": 0.13646666666666665, "grad_norm": 0.17425650358200073, "learning_rate": 9.05355588746021e-05, "loss": 3.2729034423828125, "step": 113870 }, { "epoch": 0.13653333333333334, "grad_norm": 0.17988216876983643, "learning_rate": 9.052910443749554e-05, "loss": 3.1754806518554686, "step": 113880 }, { "epoch": 0.1366, "grad_norm": 0.17814460396766663, "learning_rate": 9.052264803052147e-05, "loss": 3.2257125854492186, "step": 113890 }, { "epoch": 0.13666666666666666, "grad_norm": 0.19629643857479095, "learning_rate": 9.051618965399367e-05, "loss": 3.1152921676635743, "step": 113900 }, { "epoch": 0.13673333333333335, "grad_norm": 0.18694061040878296, "learning_rate": 9.050972930822603e-05, "loss": 3.1845424652099608, "step": 113910 }, { "epoch": 0.1368, "grad_norm": 0.18583494424819946, "learning_rate": 9.050326699353257e-05, "loss": 3.1695350646972655, "step": 113920 }, { "epoch": 0.13686666666666666, "grad_norm": 0.3942601978778839, "learning_rate": 9.049680271022738e-05, "loss": 3.188716697692871, "step": 113930 }, { "epoch": 0.13693333333333332, "grad_norm": 0.21978148818016052, "learning_rate": 9.049033645862464e-05, "loss": 3.1533485412597657, "step": 113940 }, { "epoch": 0.137, "grad_norm": 0.17789925634860992, "learning_rate": 9.048386823903863e-05, "loss": 3.2980113983154298, "step": 113950 }, { "epoch": 0.13706666666666667, "grad_norm": 0.18772044777870178, "learning_rate": 9.047739805178375e-05, "loss": 3.1813386917114257, "step": 113960 }, { "epoch": 0.13713333333333333, "grad_norm": 0.19553543627262115, "learning_rate": 9.047092589717443e-05, "loss": 3.2116935729980467, "step": 113970 }, { "epoch": 0.1372, "grad_norm": 0.19662505388259888, "learning_rate": 9.046445177552531e-05, "loss": 3.1691091537475584, "step": 113980 }, { "epoch": 0.13726666666666668, "grad_norm": 0.21935807168483734, "learning_rate": 9.045797568715099e-05, "loss": 3.210488128662109, "step": 113990 }, { "epoch": 0.13733333333333334, "grad_norm": 0.43772196769714355, "learning_rate": 9.04514976323663e-05, "loss": 3.2749866485595702, "step": 114000 }, { "epoch": 0.1374, "grad_norm": 0.20814593136310577, "learning_rate": 9.044501761148603e-05, "loss": 3.2279659271240235, "step": 114010 }, { "epoch": 0.13746666666666665, "grad_norm": 0.32545289397239685, "learning_rate": 9.043853562482518e-05, "loss": 3.3052555084228517, "step": 114020 }, { "epoch": 0.13753333333333334, "grad_norm": 0.1885615885257721, "learning_rate": 9.04320516726988e-05, "loss": 3.198647880554199, "step": 114030 }, { "epoch": 0.1376, "grad_norm": 0.19361995160579681, "learning_rate": 9.042556575542198e-05, "loss": 3.2053661346435547, "step": 114040 }, { "epoch": 0.13766666666666666, "grad_norm": 0.16662664711475372, "learning_rate": 9.041907787331002e-05, "loss": 3.1342161178588865, "step": 114050 }, { "epoch": 0.13773333333333335, "grad_norm": 0.2946757376194, "learning_rate": 9.041258802667823e-05, "loss": 3.244381332397461, "step": 114060 }, { "epoch": 0.1378, "grad_norm": 0.2397407740354538, "learning_rate": 9.040609621584204e-05, "loss": 3.171159553527832, "step": 114070 }, { "epoch": 0.13786666666666667, "grad_norm": 0.17920348048210144, "learning_rate": 9.039960244111698e-05, "loss": 3.1746990203857424, "step": 114080 }, { "epoch": 0.13793333333333332, "grad_norm": 0.21274632215499878, "learning_rate": 9.039310670281869e-05, "loss": 3.2202735900878907, "step": 114090 }, { "epoch": 0.138, "grad_norm": 0.19215577840805054, "learning_rate": 9.038660900126286e-05, "loss": 3.2301326751708985, "step": 114100 }, { "epoch": 0.13806666666666667, "grad_norm": 0.18641048669815063, "learning_rate": 9.038010933676531e-05, "loss": 3.1675527572631834, "step": 114110 }, { "epoch": 0.13813333333333333, "grad_norm": 0.2627963125705719, "learning_rate": 9.037360770964195e-05, "loss": 3.2164947509765627, "step": 114120 }, { "epoch": 0.1382, "grad_norm": 0.18995587527751923, "learning_rate": 9.036710412020879e-05, "loss": 3.1909709930419923, "step": 114130 }, { "epoch": 0.13826666666666668, "grad_norm": 0.17934869229793549, "learning_rate": 9.036059856878192e-05, "loss": 3.1736129760742187, "step": 114140 }, { "epoch": 0.13833333333333334, "grad_norm": 0.1779748499393463, "learning_rate": 9.035409105567755e-05, "loss": 3.1349706649780273, "step": 114150 }, { "epoch": 0.1384, "grad_norm": 0.194613516330719, "learning_rate": 9.034758158121195e-05, "loss": 3.1492082595825197, "step": 114160 }, { "epoch": 0.13846666666666665, "grad_norm": 0.3132864832878113, "learning_rate": 9.03410701457015e-05, "loss": 3.2092910766601563, "step": 114170 }, { "epoch": 0.13853333333333334, "grad_norm": 0.19200824201107025, "learning_rate": 9.033455674946271e-05, "loss": 3.1764644622802733, "step": 114180 }, { "epoch": 0.1386, "grad_norm": 0.1788487732410431, "learning_rate": 9.032804139281214e-05, "loss": 3.235658645629883, "step": 114190 }, { "epoch": 0.13866666666666666, "grad_norm": 0.19541221857070923, "learning_rate": 9.032152407606646e-05, "loss": 3.1671470642089843, "step": 114200 }, { "epoch": 0.13873333333333332, "grad_norm": 0.19844593107700348, "learning_rate": 9.031500479954244e-05, "loss": 3.2051300048828124, "step": 114210 }, { "epoch": 0.1388, "grad_norm": 0.18376082181930542, "learning_rate": 9.030848356355692e-05, "loss": 3.2002235412597657, "step": 114220 }, { "epoch": 0.13886666666666667, "grad_norm": 0.19381237030029297, "learning_rate": 9.030196036842689e-05, "loss": 3.321832275390625, "step": 114230 }, { "epoch": 0.13893333333333333, "grad_norm": 0.19356265664100647, "learning_rate": 9.029543521446938e-05, "loss": 3.1363258361816406, "step": 114240 }, { "epoch": 0.139, "grad_norm": 0.1830986887216568, "learning_rate": 9.028890810200154e-05, "loss": 3.1875892639160157, "step": 114250 }, { "epoch": 0.13906666666666667, "grad_norm": 0.2150091528892517, "learning_rate": 9.028237903134063e-05, "loss": 3.201552963256836, "step": 114260 }, { "epoch": 0.13913333333333333, "grad_norm": 0.20294474065303802, "learning_rate": 9.027584800280398e-05, "loss": 3.1820343017578123, "step": 114270 }, { "epoch": 0.1392, "grad_norm": 0.18313606083393097, "learning_rate": 9.026931501670899e-05, "loss": 3.305310821533203, "step": 114280 }, { "epoch": 0.13926666666666668, "grad_norm": 0.18067806959152222, "learning_rate": 9.026278007337323e-05, "loss": 3.287954330444336, "step": 114290 }, { "epoch": 0.13933333333333334, "grad_norm": 0.2084859311580658, "learning_rate": 9.025624317311429e-05, "loss": 3.1991729736328125, "step": 114300 }, { "epoch": 0.1394, "grad_norm": 0.18028248846530914, "learning_rate": 9.024970431624993e-05, "loss": 3.125703239440918, "step": 114310 }, { "epoch": 0.13946666666666666, "grad_norm": 0.21896566450595856, "learning_rate": 9.024316350309793e-05, "loss": 3.146208381652832, "step": 114320 }, { "epoch": 0.13953333333333334, "grad_norm": 0.6390248537063599, "learning_rate": 9.02366207339762e-05, "loss": 3.3201904296875, "step": 114330 }, { "epoch": 0.1396, "grad_norm": 0.22961311042308807, "learning_rate": 9.023007600920275e-05, "loss": 3.3014434814453124, "step": 114340 }, { "epoch": 0.13966666666666666, "grad_norm": 0.2901480197906494, "learning_rate": 9.022352932909569e-05, "loss": 3.20198974609375, "step": 114350 }, { "epoch": 0.13973333333333332, "grad_norm": 0.4110122323036194, "learning_rate": 9.021698069397318e-05, "loss": 3.247548294067383, "step": 114360 }, { "epoch": 0.1398, "grad_norm": 0.1935773640871048, "learning_rate": 9.021043010415355e-05, "loss": 3.184442329406738, "step": 114370 }, { "epoch": 0.13986666666666667, "grad_norm": 0.17001038789749146, "learning_rate": 9.020387755995515e-05, "loss": 3.2104423522949217, "step": 114380 }, { "epoch": 0.13993333333333333, "grad_norm": 0.1939748078584671, "learning_rate": 9.019732306169648e-05, "loss": 3.1919153213500975, "step": 114390 }, { "epoch": 0.14, "grad_norm": 0.19822168350219727, "learning_rate": 9.01907666096961e-05, "loss": 3.131729507446289, "step": 114400 }, { "epoch": 0.14006666666666667, "grad_norm": 0.29900863766670227, "learning_rate": 9.018420820427269e-05, "loss": 3.249318313598633, "step": 114410 }, { "epoch": 0.14013333333333333, "grad_norm": 0.19753113389015198, "learning_rate": 9.0177647845745e-05, "loss": 3.173324394226074, "step": 114420 }, { "epoch": 0.1402, "grad_norm": 0.18416723608970642, "learning_rate": 9.017108553443189e-05, "loss": 3.2081817626953124, "step": 114430 }, { "epoch": 0.14026666666666668, "grad_norm": 0.21802785992622375, "learning_rate": 9.01645212706523e-05, "loss": 3.212560272216797, "step": 114440 }, { "epoch": 0.14033333333333334, "grad_norm": 0.1940755993127823, "learning_rate": 9.015795505472534e-05, "loss": 3.1659912109375, "step": 114450 }, { "epoch": 0.1404, "grad_norm": 0.1948695331811905, "learning_rate": 9.015138688697009e-05, "loss": 3.2107635498046876, "step": 114460 }, { "epoch": 0.14046666666666666, "grad_norm": 0.19362348318099976, "learning_rate": 9.01448167677058e-05, "loss": 3.2567279815673826, "step": 114470 }, { "epoch": 0.14053333333333334, "grad_norm": 0.1888050138950348, "learning_rate": 9.013824469725181e-05, "loss": 3.083976936340332, "step": 114480 }, { "epoch": 0.1406, "grad_norm": 0.22269375622272491, "learning_rate": 9.013167067592756e-05, "loss": 3.147875213623047, "step": 114490 }, { "epoch": 0.14066666666666666, "grad_norm": 0.17424488067626953, "learning_rate": 9.012509470405255e-05, "loss": 3.1685977935791017, "step": 114500 }, { "epoch": 0.14073333333333332, "grad_norm": 0.18194545805454254, "learning_rate": 9.01185167819464e-05, "loss": 3.134779930114746, "step": 114510 }, { "epoch": 0.1408, "grad_norm": 0.17762227356433868, "learning_rate": 9.011193690992884e-05, "loss": 3.147483253479004, "step": 114520 }, { "epoch": 0.14086666666666667, "grad_norm": 0.2029266506433487, "learning_rate": 9.010535508831965e-05, "loss": 3.166990280151367, "step": 114530 }, { "epoch": 0.14093333333333333, "grad_norm": 0.18297815322875977, "learning_rate": 9.009877131743874e-05, "loss": 3.168257141113281, "step": 114540 }, { "epoch": 0.141, "grad_norm": 0.1734992265701294, "learning_rate": 9.009218559760612e-05, "loss": 3.2384719848632812, "step": 114550 }, { "epoch": 0.14106666666666667, "grad_norm": 0.18154776096343994, "learning_rate": 9.008559792914188e-05, "loss": 3.175594711303711, "step": 114560 }, { "epoch": 0.14113333333333333, "grad_norm": 0.18030932545661926, "learning_rate": 9.007900831236619e-05, "loss": 3.1904226303100587, "step": 114570 }, { "epoch": 0.1412, "grad_norm": 0.17187371850013733, "learning_rate": 9.007241674759935e-05, "loss": 3.1994171142578125, "step": 114580 }, { "epoch": 0.14126666666666668, "grad_norm": 0.7026723623275757, "learning_rate": 9.006582323516173e-05, "loss": 3.174205780029297, "step": 114590 }, { "epoch": 0.14133333333333334, "grad_norm": 0.1738448292016983, "learning_rate": 9.005922777537377e-05, "loss": 3.2075061798095703, "step": 114600 }, { "epoch": 0.1414, "grad_norm": 0.1861000657081604, "learning_rate": 9.005263036855607e-05, "loss": 3.2447933197021483, "step": 114610 }, { "epoch": 0.14146666666666666, "grad_norm": 0.17767398059368134, "learning_rate": 9.004603101502926e-05, "loss": 3.2310203552246093, "step": 114620 }, { "epoch": 0.14153333333333334, "grad_norm": 0.17314204573631287, "learning_rate": 9.003942971511414e-05, "loss": 3.1545713424682615, "step": 114630 }, { "epoch": 0.1416, "grad_norm": 0.19053491950035095, "learning_rate": 9.00328264691315e-05, "loss": 3.2287673950195312, "step": 114640 }, { "epoch": 0.14166666666666666, "grad_norm": 0.19653521478176117, "learning_rate": 9.002622127740232e-05, "loss": 3.216690444946289, "step": 114650 }, { "epoch": 0.14173333333333332, "grad_norm": 0.1842586249113083, "learning_rate": 9.001961414024764e-05, "loss": 3.1622066497802734, "step": 114660 }, { "epoch": 0.1418, "grad_norm": 0.19822576642036438, "learning_rate": 9.001300505798857e-05, "loss": 3.0850605010986327, "step": 114670 }, { "epoch": 0.14186666666666667, "grad_norm": 0.17581920325756073, "learning_rate": 9.000639403094634e-05, "loss": 3.1727344512939455, "step": 114680 }, { "epoch": 0.14193333333333333, "grad_norm": 0.1730213463306427, "learning_rate": 8.999978105944228e-05, "loss": 3.1815057754516602, "step": 114690 }, { "epoch": 0.142, "grad_norm": 0.1835562139749527, "learning_rate": 8.99931661437978e-05, "loss": 3.1881704330444336, "step": 114700 }, { "epoch": 0.14206666666666667, "grad_norm": 0.18077601492404938, "learning_rate": 8.998654928433442e-05, "loss": 3.186404228210449, "step": 114710 }, { "epoch": 0.14213333333333333, "grad_norm": 0.1850963979959488, "learning_rate": 8.997993048137373e-05, "loss": 3.1238094329833985, "step": 114720 }, { "epoch": 0.1422, "grad_norm": 0.17055778205394745, "learning_rate": 8.997330973523743e-05, "loss": 3.184661102294922, "step": 114730 }, { "epoch": 0.14226666666666668, "grad_norm": 0.3685237765312195, "learning_rate": 8.996668704624734e-05, "loss": 3.2074970245361327, "step": 114740 }, { "epoch": 0.14233333333333334, "grad_norm": 0.26027196645736694, "learning_rate": 8.99600624147253e-05, "loss": 3.236650848388672, "step": 114750 }, { "epoch": 0.1424, "grad_norm": 0.1716344803571701, "learning_rate": 8.995343584099334e-05, "loss": 3.144558334350586, "step": 114760 }, { "epoch": 0.14246666666666666, "grad_norm": 0.16670696437358856, "learning_rate": 8.99468073253735e-05, "loss": 3.1602739334106444, "step": 114770 }, { "epoch": 0.14253333333333335, "grad_norm": 0.20760400593280792, "learning_rate": 8.994017686818798e-05, "loss": 3.182782745361328, "step": 114780 }, { "epoch": 0.1426, "grad_norm": 0.17987647652626038, "learning_rate": 8.993354446975901e-05, "loss": 3.221872329711914, "step": 114790 }, { "epoch": 0.14266666666666666, "grad_norm": 0.18277469277381897, "learning_rate": 8.992691013040899e-05, "loss": 3.195799446105957, "step": 114800 }, { "epoch": 0.14273333333333332, "grad_norm": 0.1963052749633789, "learning_rate": 8.992027385046034e-05, "loss": 3.1621688842773437, "step": 114810 }, { "epoch": 0.1428, "grad_norm": 0.17632681131362915, "learning_rate": 8.991363563023564e-05, "loss": 3.1762004852294923, "step": 114820 }, { "epoch": 0.14286666666666667, "grad_norm": 0.20575493574142456, "learning_rate": 8.99069954700575e-05, "loss": 3.200313186645508, "step": 114830 }, { "epoch": 0.14293333333333333, "grad_norm": 0.18156351149082184, "learning_rate": 8.990035337024867e-05, "loss": 3.216659164428711, "step": 114840 }, { "epoch": 0.143, "grad_norm": 0.1766965538263321, "learning_rate": 8.989370933113199e-05, "loss": 3.1877199172973634, "step": 114850 }, { "epoch": 0.14306666666666668, "grad_norm": 0.20732806622982025, "learning_rate": 8.988706335303038e-05, "loss": 3.1868366241455077, "step": 114860 }, { "epoch": 0.14313333333333333, "grad_norm": 0.20976994931697845, "learning_rate": 8.988041543626686e-05, "loss": 3.2011734008789063, "step": 114870 }, { "epoch": 0.1432, "grad_norm": 0.18196550011634827, "learning_rate": 8.987376558116454e-05, "loss": 3.2429080963134767, "step": 114880 }, { "epoch": 0.14326666666666665, "grad_norm": 0.18522854149341583, "learning_rate": 8.986711378804664e-05, "loss": 3.191069221496582, "step": 114890 }, { "epoch": 0.14333333333333334, "grad_norm": 0.4436974823474884, "learning_rate": 8.986046005723644e-05, "loss": 3.1583600997924806, "step": 114900 }, { "epoch": 0.1434, "grad_norm": 0.29821568727493286, "learning_rate": 8.985380438905735e-05, "loss": 3.100702476501465, "step": 114910 }, { "epoch": 0.14346666666666666, "grad_norm": 0.18407706916332245, "learning_rate": 8.984714678383287e-05, "loss": 3.1548463821411135, "step": 114920 }, { "epoch": 0.14353333333333335, "grad_norm": 0.2030358910560608, "learning_rate": 8.984048724188656e-05, "loss": 3.1971254348754883, "step": 114930 }, { "epoch": 0.1436, "grad_norm": 0.18311788141727448, "learning_rate": 8.983382576354212e-05, "loss": 3.154345703125, "step": 114940 }, { "epoch": 0.14366666666666666, "grad_norm": 0.17354696989059448, "learning_rate": 8.982716234912331e-05, "loss": 3.210905075073242, "step": 114950 }, { "epoch": 0.14373333333333332, "grad_norm": 0.18447045981884003, "learning_rate": 8.982049699895401e-05, "loss": 3.121708869934082, "step": 114960 }, { "epoch": 0.1438, "grad_norm": 0.2059606909751892, "learning_rate": 8.981382971335819e-05, "loss": 3.1766971588134765, "step": 114970 }, { "epoch": 0.14386666666666667, "grad_norm": 0.18898580968379974, "learning_rate": 8.980716049265987e-05, "loss": 3.178365135192871, "step": 114980 }, { "epoch": 0.14393333333333333, "grad_norm": 0.20749375224113464, "learning_rate": 8.980048933718323e-05, "loss": 3.1620510101318358, "step": 114990 }, { "epoch": 0.144, "grad_norm": 0.17467133700847626, "learning_rate": 8.97938162472525e-05, "loss": 3.2135353088378906, "step": 115000 }, { "epoch": 0.14406666666666668, "grad_norm": 0.20782822370529175, "learning_rate": 8.978714122319201e-05, "loss": 3.2226978302001954, "step": 115010 }, { "epoch": 0.14413333333333334, "grad_norm": 0.19982804358005524, "learning_rate": 8.97804642653262e-05, "loss": 3.2211883544921873, "step": 115020 }, { "epoch": 0.1442, "grad_norm": 0.19547010958194733, "learning_rate": 8.97737853739796e-05, "loss": 3.1696855545043947, "step": 115030 }, { "epoch": 0.14426666666666665, "grad_norm": 0.1943843960762024, "learning_rate": 8.976710454947683e-05, "loss": 3.1443670272827147, "step": 115040 }, { "epoch": 0.14433333333333334, "grad_norm": 0.18507641553878784, "learning_rate": 8.976042179214262e-05, "loss": 3.159477424621582, "step": 115050 }, { "epoch": 0.1444, "grad_norm": 0.1755301058292389, "learning_rate": 8.975373710230173e-05, "loss": 3.1217071533203127, "step": 115060 }, { "epoch": 0.14446666666666666, "grad_norm": 0.17738258838653564, "learning_rate": 8.974705048027909e-05, "loss": 3.1778539657592773, "step": 115070 }, { "epoch": 0.14453333333333335, "grad_norm": 0.19343037903308868, "learning_rate": 8.974036192639969e-05, "loss": 3.158251953125, "step": 115080 }, { "epoch": 0.1446, "grad_norm": 0.22535422444343567, "learning_rate": 8.973367144098863e-05, "loss": 3.129413032531738, "step": 115090 }, { "epoch": 0.14466666666666667, "grad_norm": 0.17406047880649567, "learning_rate": 8.972697902437108e-05, "loss": 3.1919225692749023, "step": 115100 }, { "epoch": 0.14473333333333332, "grad_norm": 0.18025706708431244, "learning_rate": 8.972028467687233e-05, "loss": 3.162981414794922, "step": 115110 }, { "epoch": 0.1448, "grad_norm": 0.2404307872056961, "learning_rate": 8.971358839881773e-05, "loss": 3.1934152603149415, "step": 115120 }, { "epoch": 0.14486666666666667, "grad_norm": 0.29389744997024536, "learning_rate": 8.970689019053275e-05, "loss": 3.1389835357666014, "step": 115130 }, { "epoch": 0.14493333333333333, "grad_norm": 0.23112912476062775, "learning_rate": 8.970019005234298e-05, "loss": 3.170174407958984, "step": 115140 }, { "epoch": 0.145, "grad_norm": 0.2429821789264679, "learning_rate": 8.969348798457404e-05, "loss": 3.195509338378906, "step": 115150 }, { "epoch": 0.14506666666666668, "grad_norm": 0.1767321228981018, "learning_rate": 8.968678398755166e-05, "loss": 3.215562438964844, "step": 115160 }, { "epoch": 0.14513333333333334, "grad_norm": 0.2414773404598236, "learning_rate": 8.968007806160172e-05, "loss": 3.2684005737304687, "step": 115170 }, { "epoch": 0.1452, "grad_norm": 0.20993423461914062, "learning_rate": 8.967337020705014e-05, "loss": 3.1631994247436523, "step": 115180 }, { "epoch": 0.14526666666666666, "grad_norm": 0.19327853620052338, "learning_rate": 8.966666042422294e-05, "loss": 3.205347442626953, "step": 115190 }, { "epoch": 0.14533333333333334, "grad_norm": 0.22929729521274567, "learning_rate": 8.965994871344623e-05, "loss": 3.179778480529785, "step": 115200 }, { "epoch": 0.1454, "grad_norm": 0.1789487898349762, "learning_rate": 8.965323507504624e-05, "loss": 3.2067626953125, "step": 115210 }, { "epoch": 0.14546666666666666, "grad_norm": 0.19531042873859406, "learning_rate": 8.964651950934929e-05, "loss": 3.2209110260009766, "step": 115220 }, { "epoch": 0.14553333333333332, "grad_norm": 0.246993288397789, "learning_rate": 8.963980201668176e-05, "loss": 3.181458282470703, "step": 115230 }, { "epoch": 0.1456, "grad_norm": 0.18307502567768097, "learning_rate": 8.963308259737016e-05, "loss": 3.3071914672851563, "step": 115240 }, { "epoch": 0.14566666666666667, "grad_norm": 0.17983435094356537, "learning_rate": 8.962636125174106e-05, "loss": 3.1394975662231444, "step": 115250 }, { "epoch": 0.14573333333333333, "grad_norm": 0.24881289899349213, "learning_rate": 8.961963798012115e-05, "loss": 3.188882255554199, "step": 115260 }, { "epoch": 0.1458, "grad_norm": 0.18036165833473206, "learning_rate": 8.961291278283722e-05, "loss": 3.186545181274414, "step": 115270 }, { "epoch": 0.14586666666666667, "grad_norm": 4.557909605296118e-09, "learning_rate": 8.960618566021613e-05, "loss": 5.0685783386230465, "step": 115280 }, { "epoch": 0.14593333333333333, "grad_norm": 0.178819939494133, "learning_rate": 8.959945661258485e-05, "loss": 1.441700553894043, "step": 115290 }, { "epoch": 0.146, "grad_norm": 0.1897207349538803, "learning_rate": 8.959272564027044e-05, "loss": 3.1920251846313477, "step": 115300 }, { "epoch": 0.14606666666666668, "grad_norm": 0.18675118684768677, "learning_rate": 8.958599274360002e-05, "loss": 3.2555484771728516, "step": 115310 }, { "epoch": 0.14613333333333334, "grad_norm": 0.17841550707817078, "learning_rate": 8.957925792290087e-05, "loss": 3.151851463317871, "step": 115320 }, { "epoch": 0.1462, "grad_norm": 0.20667503774166107, "learning_rate": 8.957252117850033e-05, "loss": 3.2338302612304686, "step": 115330 }, { "epoch": 0.14626666666666666, "grad_norm": 0.1871166080236435, "learning_rate": 8.95657825107258e-05, "loss": 3.196574401855469, "step": 115340 }, { "epoch": 0.14633333333333334, "grad_norm": 0.19130975008010864, "learning_rate": 8.95590419199048e-05, "loss": 3.192097473144531, "step": 115350 }, { "epoch": 0.1464, "grad_norm": 0.273000568151474, "learning_rate": 8.9552299406365e-05, "loss": 3.194003105163574, "step": 115360 }, { "epoch": 0.14646666666666666, "grad_norm": 0.18371795117855072, "learning_rate": 8.954555497043407e-05, "loss": 3.1831220626831054, "step": 115370 }, { "epoch": 0.14653333333333332, "grad_norm": 0.17748597264289856, "learning_rate": 8.953880861243983e-05, "loss": 3.214915084838867, "step": 115380 }, { "epoch": 0.1466, "grad_norm": 0.18889757990837097, "learning_rate": 8.953206033271016e-05, "loss": 3.1816131591796877, "step": 115390 }, { "epoch": 0.14666666666666667, "grad_norm": 0.1711314469575882, "learning_rate": 8.952531013157309e-05, "loss": 3.1554115295410154, "step": 115400 }, { "epoch": 0.14673333333333333, "grad_norm": 0.32204878330230713, "learning_rate": 8.951855800935665e-05, "loss": 3.212777328491211, "step": 115410 }, { "epoch": 0.1468, "grad_norm": 0.22848555445671082, "learning_rate": 8.951180396638907e-05, "loss": 3.18810977935791, "step": 115420 }, { "epoch": 0.14686666666666667, "grad_norm": 0.18211179971694946, "learning_rate": 8.95050480029986e-05, "loss": 3.194930648803711, "step": 115430 }, { "epoch": 0.14693333333333333, "grad_norm": 0.19413678348064423, "learning_rate": 8.949829011951361e-05, "loss": 3.238846206665039, "step": 115440 }, { "epoch": 0.147, "grad_norm": 0.17590893805027008, "learning_rate": 8.949153031626254e-05, "loss": 3.180173873901367, "step": 115450 }, { "epoch": 0.14706666666666668, "grad_norm": 0.26328110694885254, "learning_rate": 8.948476859357397e-05, "loss": 3.286937713623047, "step": 115460 }, { "epoch": 0.14713333333333334, "grad_norm": 0.22170816361904144, "learning_rate": 8.947800495177654e-05, "loss": 3.1817703247070312, "step": 115470 }, { "epoch": 0.1472, "grad_norm": 0.293414831161499, "learning_rate": 8.947123939119897e-05, "loss": 3.5015132904052733, "step": 115480 }, { "epoch": 0.14726666666666666, "grad_norm": 0.1782049685716629, "learning_rate": 8.946447191217012e-05, "loss": 3.2901996612548827, "step": 115490 }, { "epoch": 0.14733333333333334, "grad_norm": 0.17074903845787048, "learning_rate": 8.945770251501888e-05, "loss": 3.132657051086426, "step": 115500 }, { "epoch": 0.1474, "grad_norm": 0.18534399569034576, "learning_rate": 8.945093120007429e-05, "loss": 3.2516422271728516, "step": 115510 }, { "epoch": 0.14746666666666666, "grad_norm": 0.18468549847602844, "learning_rate": 8.944415796766546e-05, "loss": 3.1536474227905273, "step": 115520 }, { "epoch": 0.14753333333333332, "grad_norm": 0.18307669460773468, "learning_rate": 8.943738281812158e-05, "loss": 3.207419204711914, "step": 115530 }, { "epoch": 0.1476, "grad_norm": 0.25065797567367554, "learning_rate": 8.943060575177197e-05, "loss": 3.173893165588379, "step": 115540 }, { "epoch": 0.14766666666666667, "grad_norm": 0.16777868568897247, "learning_rate": 8.942382676894603e-05, "loss": 3.1967288970947267, "step": 115550 }, { "epoch": 0.14773333333333333, "grad_norm": 0.1713712513446808, "learning_rate": 8.94170458699732e-05, "loss": 3.217233657836914, "step": 115560 }, { "epoch": 0.1478, "grad_norm": 0.21578551828861237, "learning_rate": 8.941026305518309e-05, "loss": 3.2038482666015624, "step": 115570 }, { "epoch": 0.14786666666666667, "grad_norm": 0.18790622055530548, "learning_rate": 8.940347832490537e-05, "loss": 3.179693031311035, "step": 115580 }, { "epoch": 0.14793333333333333, "grad_norm": 0.1901494711637497, "learning_rate": 8.93966916794698e-05, "loss": 3.1948543548583985, "step": 115590 }, { "epoch": 0.148, "grad_norm": 0.1808430403470993, "learning_rate": 8.938990311920623e-05, "loss": 3.201068115234375, "step": 115600 }, { "epoch": 0.14806666666666668, "grad_norm": 0.19344674050807953, "learning_rate": 8.938311264444462e-05, "loss": 3.1865446090698244, "step": 115610 }, { "epoch": 0.14813333333333334, "grad_norm": 0.1824226826429367, "learning_rate": 8.937632025551498e-05, "loss": 3.2064128875732423, "step": 115620 }, { "epoch": 0.1482, "grad_norm": 0.18881726264953613, "learning_rate": 8.936952595274749e-05, "loss": 3.2079547882080077, "step": 115630 }, { "epoch": 0.14826666666666666, "grad_norm": 0.24450643360614777, "learning_rate": 8.936272973647236e-05, "loss": 3.340474319458008, "step": 115640 }, { "epoch": 0.14833333333333334, "grad_norm": 0.2553510367870331, "learning_rate": 8.935593160701992e-05, "loss": 3.1404937744140624, "step": 115650 }, { "epoch": 0.1484, "grad_norm": 0.18880610167980194, "learning_rate": 8.934913156472056e-05, "loss": 3.262364959716797, "step": 115660 }, { "epoch": 0.14846666666666666, "grad_norm": 0.18154381215572357, "learning_rate": 8.934232960990481e-05, "loss": 3.221924591064453, "step": 115670 }, { "epoch": 0.14853333333333332, "grad_norm": 0.18763086199760437, "learning_rate": 8.933552574290327e-05, "loss": 3.171453666687012, "step": 115680 }, { "epoch": 0.1486, "grad_norm": 0.1826278120279312, "learning_rate": 8.932871996404664e-05, "loss": 3.167572784423828, "step": 115690 }, { "epoch": 0.14866666666666667, "grad_norm": 0.22959361970424652, "learning_rate": 8.932191227366569e-05, "loss": 3.2160919189453123, "step": 115700 }, { "epoch": 0.14873333333333333, "grad_norm": 0.22545503079891205, "learning_rate": 8.931510267209128e-05, "loss": 3.2533859252929687, "step": 115710 }, { "epoch": 0.1488, "grad_norm": 0.4554384648799896, "learning_rate": 8.930829115965443e-05, "loss": 3.2530258178710936, "step": 115720 }, { "epoch": 0.14886666666666667, "grad_norm": 0.19763630628585815, "learning_rate": 8.930147773668618e-05, "loss": 3.1776432037353515, "step": 115730 }, { "epoch": 0.14893333333333333, "grad_norm": 0.18720343708992004, "learning_rate": 8.929466240351769e-05, "loss": 3.131260871887207, "step": 115740 }, { "epoch": 0.149, "grad_norm": 0.16500571370124817, "learning_rate": 8.928784516048022e-05, "loss": 3.1984603881835936, "step": 115750 }, { "epoch": 0.14906666666666665, "grad_norm": 0.17722585797309875, "learning_rate": 8.928102600790509e-05, "loss": 3.2673057556152343, "step": 115760 }, { "epoch": 0.14913333333333334, "grad_norm": 0.17225214838981628, "learning_rate": 8.927420494612376e-05, "loss": 3.1406682968139648, "step": 115770 }, { "epoch": 0.1492, "grad_norm": 0.18165288865566254, "learning_rate": 8.926738197546776e-05, "loss": 3.2668285369873047, "step": 115780 }, { "epoch": 0.14926666666666666, "grad_norm": 0.17759980261325836, "learning_rate": 8.926055709626868e-05, "loss": 3.1867038726806642, "step": 115790 }, { "epoch": 0.14933333333333335, "grad_norm": 0.18733304738998413, "learning_rate": 8.925373030885829e-05, "loss": 3.169986343383789, "step": 115800 }, { "epoch": 0.1494, "grad_norm": 0.3108231723308563, "learning_rate": 8.924690161356833e-05, "loss": 3.2664676666259767, "step": 115810 }, { "epoch": 0.14946666666666666, "grad_norm": 0.2222132682800293, "learning_rate": 8.924007101073075e-05, "loss": 3.2450653076171876, "step": 115820 }, { "epoch": 0.14953333333333332, "grad_norm": 0.19076670706272125, "learning_rate": 8.923323850067755e-05, "loss": 3.1924095153808594, "step": 115830 }, { "epoch": 0.1496, "grad_norm": 0.17377188801765442, "learning_rate": 8.922640408374078e-05, "loss": 3.208312225341797, "step": 115840 }, { "epoch": 0.14966666666666667, "grad_norm": 0.2106606662273407, "learning_rate": 8.921956776025263e-05, "loss": 3.1857141494750976, "step": 115850 }, { "epoch": 0.14973333333333333, "grad_norm": 0.1764078438282013, "learning_rate": 8.921272953054537e-05, "loss": 3.1342716217041016, "step": 115860 }, { "epoch": 0.1498, "grad_norm": 0.1757580190896988, "learning_rate": 8.920588939495139e-05, "loss": 3.189113807678223, "step": 115870 }, { "epoch": 0.14986666666666668, "grad_norm": 0.2026742547750473, "learning_rate": 8.919904735380311e-05, "loss": 3.1997133255004884, "step": 115880 }, { "epoch": 0.14993333333333334, "grad_norm": 0.18823206424713135, "learning_rate": 8.91922034074331e-05, "loss": 3.2390846252441405, "step": 115890 }, { "epoch": 0.15, "grad_norm": 0.18663419783115387, "learning_rate": 8.9185357556174e-05, "loss": 3.194029426574707, "step": 115900 }, { "epoch": 0.15006666666666665, "grad_norm": 0.27954551577568054, "learning_rate": 8.917850980035854e-05, "loss": 3.207720947265625, "step": 115910 }, { "epoch": 0.15013333333333334, "grad_norm": 0.19352658092975616, "learning_rate": 8.917166014031953e-05, "loss": 3.2405147552490234, "step": 115920 }, { "epoch": 0.1502, "grad_norm": 0.17494341731071472, "learning_rate": 8.916480857638991e-05, "loss": 3.152114486694336, "step": 115930 }, { "epoch": 0.15026666666666666, "grad_norm": 0.1839168220758438, "learning_rate": 8.915795510890271e-05, "loss": 3.2385658264160155, "step": 115940 }, { "epoch": 0.15033333333333335, "grad_norm": 0.1705285906791687, "learning_rate": 8.915109973819099e-05, "loss": 3.196470260620117, "step": 115950 }, { "epoch": 0.1504, "grad_norm": 0.16979536414146423, "learning_rate": 8.914424246458798e-05, "loss": 3.163599395751953, "step": 115960 }, { "epoch": 0.15046666666666667, "grad_norm": 0.17662031948566437, "learning_rate": 8.913738328842695e-05, "loss": 3.297653579711914, "step": 115970 }, { "epoch": 0.15053333333333332, "grad_norm": 0.19534194469451904, "learning_rate": 8.91305222100413e-05, "loss": 3.171773338317871, "step": 115980 }, { "epoch": 0.1506, "grad_norm": 0.18263614177703857, "learning_rate": 8.91236592297645e-05, "loss": 3.2344112396240234, "step": 115990 }, { "epoch": 0.15066666666666667, "grad_norm": 0.18084816634655, "learning_rate": 8.91167943479301e-05, "loss": 3.1272424697875976, "step": 116000 }, { "epoch": 0.15073333333333333, "grad_norm": 0.19284190237522125, "learning_rate": 8.910992756487177e-05, "loss": 3.1429513931274413, "step": 116010 }, { "epoch": 0.1508, "grad_norm": 0.20697033405303955, "learning_rate": 8.910305888092327e-05, "loss": 3.171030807495117, "step": 116020 }, { "epoch": 0.15086666666666668, "grad_norm": 0.21869546175003052, "learning_rate": 8.909618829641843e-05, "loss": 3.348674011230469, "step": 116030 }, { "epoch": 0.15093333333333334, "grad_norm": 0.17931675910949707, "learning_rate": 8.90893158116912e-05, "loss": 3.1992300033569334, "step": 116040 }, { "epoch": 0.151, "grad_norm": 0.18826885521411896, "learning_rate": 8.90824414270756e-05, "loss": 3.163996124267578, "step": 116050 }, { "epoch": 0.15106666666666665, "grad_norm": 0.1697552353143692, "learning_rate": 8.907556514290574e-05, "loss": 3.172346305847168, "step": 116060 }, { "epoch": 0.15113333333333334, "grad_norm": 0.23733562231063843, "learning_rate": 8.906868695951586e-05, "loss": 3.1142568588256836, "step": 116070 }, { "epoch": 0.1512, "grad_norm": 0.2329884171485901, "learning_rate": 8.906180687724025e-05, "loss": 3.184773826599121, "step": 116080 }, { "epoch": 0.15126666666666666, "grad_norm": 0.17212112247943878, "learning_rate": 8.90549248964133e-05, "loss": 3.1827396392822265, "step": 116090 }, { "epoch": 0.15133333333333332, "grad_norm": 0.17753633856773376, "learning_rate": 8.904804101736952e-05, "loss": 3.143393325805664, "step": 116100 }, { "epoch": 0.1514, "grad_norm": 0.2242303043603897, "learning_rate": 8.904115524044348e-05, "loss": 3.082079315185547, "step": 116110 }, { "epoch": 0.15146666666666667, "grad_norm": 0.1778675615787506, "learning_rate": 8.903426756596987e-05, "loss": 3.135373115539551, "step": 116120 }, { "epoch": 0.15153333333333333, "grad_norm": 0.1849077343940735, "learning_rate": 8.902737799428343e-05, "loss": 3.152600860595703, "step": 116130 }, { "epoch": 0.1516, "grad_norm": 0.18070769309997559, "learning_rate": 8.902048652571904e-05, "loss": 3.201785659790039, "step": 116140 }, { "epoch": 0.15166666666666667, "grad_norm": 0.2364789992570877, "learning_rate": 8.901359316061163e-05, "loss": 3.184540939331055, "step": 116150 }, { "epoch": 0.15173333333333333, "grad_norm": 0.2168954312801361, "learning_rate": 8.900669789929626e-05, "loss": 3.2120986938476563, "step": 116160 }, { "epoch": 0.1518, "grad_norm": 0.20524972677230835, "learning_rate": 8.899980074210807e-05, "loss": 3.1978063583374023, "step": 116170 }, { "epoch": 0.15186666666666668, "grad_norm": 0.1760595291852951, "learning_rate": 8.899290168938229e-05, "loss": 3.4033607482910155, "step": 116180 }, { "epoch": 0.15193333333333334, "grad_norm": 0.19901101291179657, "learning_rate": 8.898600074145424e-05, "loss": 3.1484678268432615, "step": 116190 }, { "epoch": 0.152, "grad_norm": 0.20948545634746552, "learning_rate": 8.89790978986593e-05, "loss": 3.1736804962158205, "step": 116200 }, { "epoch": 0.15206666666666666, "grad_norm": 0.19665394723415375, "learning_rate": 8.897219316133301e-05, "loss": 3.188533592224121, "step": 116210 }, { "epoch": 0.15213333333333334, "grad_norm": 0.1812409609556198, "learning_rate": 8.896528652981096e-05, "loss": 3.1459360122680664, "step": 116220 }, { "epoch": 0.1522, "grad_norm": 0.17385603487491608, "learning_rate": 8.895837800442884e-05, "loss": 3.186120796203613, "step": 116230 }, { "epoch": 0.15226666666666666, "grad_norm": 0.224608913064003, "learning_rate": 8.89514675855224e-05, "loss": 3.158353805541992, "step": 116240 }, { "epoch": 0.15233333333333332, "grad_norm": 0.19043293595314026, "learning_rate": 8.894455527342755e-05, "loss": 3.1972158432006834, "step": 116250 }, { "epoch": 0.1524, "grad_norm": 0.18545326590538025, "learning_rate": 8.893764106848022e-05, "loss": 3.1893714904785155, "step": 116260 }, { "epoch": 0.15246666666666667, "grad_norm": 0.2043672502040863, "learning_rate": 8.89307249710165e-05, "loss": 3.156164360046387, "step": 116270 }, { "epoch": 0.15253333333333333, "grad_norm": 0.18194140493869781, "learning_rate": 8.892380698137254e-05, "loss": 3.145594024658203, "step": 116280 }, { "epoch": 0.1526, "grad_norm": 0.4247041642665863, "learning_rate": 8.891688709988456e-05, "loss": 3.3859699249267576, "step": 116290 }, { "epoch": 0.15266666666666667, "grad_norm": 0.19413727521896362, "learning_rate": 8.890996532688888e-05, "loss": 3.194483757019043, "step": 116300 }, { "epoch": 0.15273333333333333, "grad_norm": 0.19387374818325043, "learning_rate": 8.890304166272196e-05, "loss": 3.168155860900879, "step": 116310 }, { "epoch": 0.1528, "grad_norm": 0.21814575791358948, "learning_rate": 8.88961161077203e-05, "loss": 3.187595176696777, "step": 116320 }, { "epoch": 0.15286666666666668, "grad_norm": 0.17702198028564453, "learning_rate": 8.88891886622205e-05, "loss": 3.2154788970947266, "step": 116330 }, { "epoch": 0.15293333333333334, "grad_norm": 0.1681031584739685, "learning_rate": 8.888225932655927e-05, "loss": 3.214625930786133, "step": 116340 }, { "epoch": 0.153, "grad_norm": 0.18359380960464478, "learning_rate": 8.88753281010734e-05, "loss": 3.1690040588378907, "step": 116350 }, { "epoch": 0.15306666666666666, "grad_norm": 0.18262095749378204, "learning_rate": 8.886839498609977e-05, "loss": 3.2448375701904295, "step": 116360 }, { "epoch": 0.15313333333333334, "grad_norm": 0.16890014708042145, "learning_rate": 8.886145998197535e-05, "loss": 3.1564550399780273, "step": 116370 }, { "epoch": 0.1532, "grad_norm": 0.18542420864105225, "learning_rate": 8.885452308903723e-05, "loss": 3.2260997772216795, "step": 116380 }, { "epoch": 0.15326666666666666, "grad_norm": 0.17674781382083893, "learning_rate": 8.884758430762255e-05, "loss": 3.1457576751708984, "step": 116390 }, { "epoch": 0.15333333333333332, "grad_norm": 0.19250452518463135, "learning_rate": 8.884064363806857e-05, "loss": 3.1844915390014648, "step": 116400 }, { "epoch": 0.1534, "grad_norm": 0.17716515064239502, "learning_rate": 8.88337010807126e-05, "loss": 3.1289310455322266, "step": 116410 }, { "epoch": 0.15346666666666667, "grad_norm": 0.17526598274707794, "learning_rate": 8.882675663589213e-05, "loss": 3.218434143066406, "step": 116420 }, { "epoch": 0.15353333333333333, "grad_norm": 0.23991592228412628, "learning_rate": 8.881981030394466e-05, "loss": 3.1509321212768553, "step": 116430 }, { "epoch": 0.1536, "grad_norm": 0.20371633768081665, "learning_rate": 8.88128620852078e-05, "loss": 3.1915716171264648, "step": 116440 }, { "epoch": 0.15366666666666667, "grad_norm": 0.1870700716972351, "learning_rate": 8.880591198001927e-05, "loss": 3.3475013732910157, "step": 116450 }, { "epoch": 0.15373333333333333, "grad_norm": 0.17328859865665436, "learning_rate": 8.879895998871686e-05, "loss": 3.177873992919922, "step": 116460 }, { "epoch": 0.1538, "grad_norm": 0.19497938454151154, "learning_rate": 8.879200611163848e-05, "loss": 3.167601203918457, "step": 116470 }, { "epoch": 0.15386666666666668, "grad_norm": 0.2118101269006729, "learning_rate": 8.87850503491221e-05, "loss": 3.1669828414916994, "step": 116480 }, { "epoch": 0.15393333333333334, "grad_norm": 0.17831072211265564, "learning_rate": 8.877809270150581e-05, "loss": 3.1753713607788088, "step": 116490 }, { "epoch": 0.154, "grad_norm": 0.18367049098014832, "learning_rate": 8.877113316912775e-05, "loss": 3.1381013870239256, "step": 116500 }, { "epoch": 0.15406666666666666, "grad_norm": 0.18699193000793457, "learning_rate": 8.876417175232623e-05, "loss": 3.14172248840332, "step": 116510 }, { "epoch": 0.15413333333333334, "grad_norm": 14.249421119689941, "learning_rate": 8.875720845143954e-05, "loss": 4.037147521972656, "step": 116520 }, { "epoch": 0.1542, "grad_norm": 0.36147600412368774, "learning_rate": 8.875024326680618e-05, "loss": 3.0568885803222656, "step": 116530 }, { "epoch": 0.15426666666666666, "grad_norm": 0.18450431525707245, "learning_rate": 8.874327619876464e-05, "loss": 3.1297407150268555, "step": 116540 }, { "epoch": 0.15433333333333332, "grad_norm": 0.25912296772003174, "learning_rate": 8.873630724765357e-05, "loss": 3.297260284423828, "step": 116550 }, { "epoch": 0.1544, "grad_norm": 0.20867633819580078, "learning_rate": 8.872933641381168e-05, "loss": 3.163220024108887, "step": 116560 }, { "epoch": 0.15446666666666667, "grad_norm": 0.19538186490535736, "learning_rate": 8.872236369757779e-05, "loss": 3.195733833312988, "step": 116570 }, { "epoch": 0.15453333333333333, "grad_norm": 0.1855723112821579, "learning_rate": 8.871538909929079e-05, "loss": 3.1573984146118166, "step": 116580 }, { "epoch": 0.1546, "grad_norm": 0.20923806726932526, "learning_rate": 8.870841261928965e-05, "loss": 3.205109786987305, "step": 116590 }, { "epoch": 0.15466666666666667, "grad_norm": 0.2000235617160797, "learning_rate": 8.870143425791349e-05, "loss": 3.1962629318237306, "step": 116600 }, { "epoch": 0.15473333333333333, "grad_norm": 0.19046354293823242, "learning_rate": 8.869445401550147e-05, "loss": 3.2120292663574217, "step": 116610 }, { "epoch": 0.1548, "grad_norm": 0.18727830052375793, "learning_rate": 8.868747189239284e-05, "loss": 3.1686189651489256, "step": 116620 }, { "epoch": 0.15486666666666668, "grad_norm": 0.18717165291309357, "learning_rate": 8.8680487888927e-05, "loss": 3.2592037200927733, "step": 116630 }, { "epoch": 0.15493333333333334, "grad_norm": 0.22730383276939392, "learning_rate": 8.867350200544336e-05, "loss": 3.2514949798583985, "step": 116640 }, { "epoch": 0.155, "grad_norm": 0.19303973019123077, "learning_rate": 8.866651424228147e-05, "loss": 3.174184226989746, "step": 116650 }, { "epoch": 0.15506666666666666, "grad_norm": 0.1826484352350235, "learning_rate": 8.865952459978097e-05, "loss": 3.1970355987548826, "step": 116660 }, { "epoch": 0.15513333333333335, "grad_norm": 0.17630185186862946, "learning_rate": 8.865253307828157e-05, "loss": 3.1922233581542967, "step": 116670 }, { "epoch": 0.1552, "grad_norm": 0.18326982855796814, "learning_rate": 8.86455396781231e-05, "loss": 3.157773971557617, "step": 116680 }, { "epoch": 0.15526666666666666, "grad_norm": 0.17667488753795624, "learning_rate": 8.863854439964545e-05, "loss": 3.2011913299560546, "step": 116690 }, { "epoch": 0.15533333333333332, "grad_norm": 0.9063477516174316, "learning_rate": 8.863154724318863e-05, "loss": 3.2470375061035157, "step": 116700 }, { "epoch": 0.1554, "grad_norm": 0.26685717701911926, "learning_rate": 8.862454820909272e-05, "loss": 3.265735626220703, "step": 116710 }, { "epoch": 0.15546666666666667, "grad_norm": 0.18550673127174377, "learning_rate": 8.86175472976979e-05, "loss": 3.2065502166748048, "step": 116720 }, { "epoch": 0.15553333333333333, "grad_norm": 0.27447056770324707, "learning_rate": 8.861054450934445e-05, "loss": 3.1186580657958984, "step": 116730 }, { "epoch": 0.1556, "grad_norm": 0.20383073389530182, "learning_rate": 8.860353984437273e-05, "loss": 3.1721511840820313, "step": 116740 }, { "epoch": 0.15566666666666668, "grad_norm": 0.19102488458156586, "learning_rate": 8.859653330312317e-05, "loss": 3.1429256439208983, "step": 116750 }, { "epoch": 0.15573333333333333, "grad_norm": 0.17617620527744293, "learning_rate": 8.858952488593635e-05, "loss": 3.1731185913085938, "step": 116760 }, { "epoch": 0.1558, "grad_norm": 0.17467689514160156, "learning_rate": 8.858251459315288e-05, "loss": 3.1383024215698243, "step": 116770 }, { "epoch": 0.15586666666666665, "grad_norm": 0.1825319081544876, "learning_rate": 8.85755024251135e-05, "loss": 3.157659912109375, "step": 116780 }, { "epoch": 0.15593333333333334, "grad_norm": 0.1760953813791275, "learning_rate": 8.856848838215901e-05, "loss": 3.170824432373047, "step": 116790 }, { "epoch": 0.156, "grad_norm": 0.17510949075222015, "learning_rate": 8.856147246463035e-05, "loss": 3.1675559997558596, "step": 116800 }, { "epoch": 0.15606666666666666, "grad_norm": 0.17412729561328888, "learning_rate": 8.85544546728685e-05, "loss": 3.125448989868164, "step": 116810 }, { "epoch": 0.15613333333333335, "grad_norm": 2.4424960613250732, "learning_rate": 8.854743500721453e-05, "loss": 3.1692649841308596, "step": 116820 }, { "epoch": 0.1562, "grad_norm": 0.18654651939868927, "learning_rate": 8.854041346800966e-05, "loss": 3.1800804138183594, "step": 116830 }, { "epoch": 0.15626666666666666, "grad_norm": 0.1777157038450241, "learning_rate": 8.853339005559515e-05, "loss": 3.180362319946289, "step": 116840 }, { "epoch": 0.15633333333333332, "grad_norm": 0.1863633692264557, "learning_rate": 8.852636477031236e-05, "loss": 3.146407127380371, "step": 116850 }, { "epoch": 0.1564, "grad_norm": 0.19077986478805542, "learning_rate": 8.851933761250276e-05, "loss": 3.1881534576416017, "step": 116860 }, { "epoch": 0.15646666666666667, "grad_norm": 0.1973438709974289, "learning_rate": 8.851230858250785e-05, "loss": 3.0105592727661135, "step": 116870 }, { "epoch": 0.15653333333333333, "grad_norm": 0.7302625179290771, "learning_rate": 8.850527768066935e-05, "loss": 3.2441402435302735, "step": 116880 }, { "epoch": 0.1566, "grad_norm": 0.4144805073738098, "learning_rate": 8.849824490732889e-05, "loss": 3.367179107666016, "step": 116890 }, { "epoch": 0.15666666666666668, "grad_norm": 0.22902406752109528, "learning_rate": 8.849121026282838e-05, "loss": 3.1121042251586912, "step": 116900 }, { "epoch": 0.15673333333333334, "grad_norm": 0.2246682196855545, "learning_rate": 8.848417374750967e-05, "loss": 3.211495208740234, "step": 116910 }, { "epoch": 0.1568, "grad_norm": 0.184931680560112, "learning_rate": 8.847713536171478e-05, "loss": 3.1547075271606446, "step": 116920 }, { "epoch": 0.15686666666666665, "grad_norm": 0.1804894655942917, "learning_rate": 8.847009510578581e-05, "loss": 3.177079772949219, "step": 116930 }, { "epoch": 0.15693333333333334, "grad_norm": 0.21325072646141052, "learning_rate": 8.846305298006494e-05, "loss": 3.139813232421875, "step": 116940 }, { "epoch": 0.157, "grad_norm": 0.20625725388526917, "learning_rate": 8.845600898489443e-05, "loss": 3.2014144897460937, "step": 116950 }, { "epoch": 0.15706666666666666, "grad_norm": 0.27701330184936523, "learning_rate": 8.844896312061668e-05, "loss": 3.1889745712280275, "step": 116960 }, { "epoch": 0.15713333333333335, "grad_norm": 0.1840047836303711, "learning_rate": 8.844191538757409e-05, "loss": 3.276389312744141, "step": 116970 }, { "epoch": 0.1572, "grad_norm": 0.23063434660434723, "learning_rate": 8.843486578610926e-05, "loss": 3.151161003112793, "step": 116980 }, { "epoch": 0.15726666666666667, "grad_norm": 0.20124812424182892, "learning_rate": 8.84278143165648e-05, "loss": 3.2036548614501954, "step": 116990 }, { "epoch": 0.15733333333333333, "grad_norm": 0.19604183733463287, "learning_rate": 8.842076097928342e-05, "loss": 3.2925956726074217, "step": 117000 }, { "epoch": 0.1574, "grad_norm": 0.18221943080425262, "learning_rate": 8.841370577460798e-05, "loss": 3.177325439453125, "step": 117010 }, { "epoch": 0.15746666666666667, "grad_norm": 0.19113025069236755, "learning_rate": 8.840664870288136e-05, "loss": 3.2173370361328124, "step": 117020 }, { "epoch": 0.15753333333333333, "grad_norm": 0.3318032920360565, "learning_rate": 8.83995897644466e-05, "loss": 3.215443420410156, "step": 117030 }, { "epoch": 0.1576, "grad_norm": 0.18770731985569, "learning_rate": 8.839252895964673e-05, "loss": 3.219461441040039, "step": 117040 }, { "epoch": 0.15766666666666668, "grad_norm": 0.5973770022392273, "learning_rate": 8.838546628882498e-05, "loss": 3.3355484008789062, "step": 117050 }, { "epoch": 0.15773333333333334, "grad_norm": 0.19254694879055023, "learning_rate": 8.837840175232459e-05, "loss": 3.144818878173828, "step": 117060 }, { "epoch": 0.1578, "grad_norm": 0.20289896428585052, "learning_rate": 8.837133535048895e-05, "loss": 3.266956329345703, "step": 117070 }, { "epoch": 0.15786666666666666, "grad_norm": 0.7363761067390442, "learning_rate": 8.836426708366153e-05, "loss": 3.3826854705810545, "step": 117080 }, { "epoch": 0.15793333333333334, "grad_norm": 0.19231964647769928, "learning_rate": 8.835719695218581e-05, "loss": 3.2313194274902344, "step": 117090 }, { "epoch": 0.158, "grad_norm": 0.1912682056427002, "learning_rate": 8.835012495640547e-05, "loss": 3.195576477050781, "step": 117100 }, { "epoch": 0.15806666666666666, "grad_norm": 0.17350253462791443, "learning_rate": 8.834305109666423e-05, "loss": 3.1645557403564455, "step": 117110 }, { "epoch": 0.15813333333333332, "grad_norm": 0.24080084264278412, "learning_rate": 8.833597537330593e-05, "loss": 3.2544395446777346, "step": 117120 }, { "epoch": 0.1582, "grad_norm": 0.1892646998167038, "learning_rate": 8.832889778667443e-05, "loss": 3.178272819519043, "step": 117130 }, { "epoch": 0.15826666666666667, "grad_norm": 0.18631179630756378, "learning_rate": 8.832181833711375e-05, "loss": 3.172731399536133, "step": 117140 }, { "epoch": 0.15833333333333333, "grad_norm": 0.22171571850776672, "learning_rate": 8.831473702496797e-05, "loss": 3.2104129791259766, "step": 117150 }, { "epoch": 0.1584, "grad_norm": 0.17646509408950806, "learning_rate": 8.830765385058128e-05, "loss": 3.168443298339844, "step": 117160 }, { "epoch": 0.15846666666666667, "grad_norm": 0.2238849550485611, "learning_rate": 8.830056881429795e-05, "loss": 3.1762840270996096, "step": 117170 }, { "epoch": 0.15853333333333333, "grad_norm": 0.17891579866409302, "learning_rate": 8.829348191646233e-05, "loss": 3.1535900115966795, "step": 117180 }, { "epoch": 0.1586, "grad_norm": 0.19416049122810364, "learning_rate": 8.82863931574189e-05, "loss": 3.1509578704833983, "step": 117190 }, { "epoch": 0.15866666666666668, "grad_norm": 0.287092000246048, "learning_rate": 8.827930253751215e-05, "loss": 3.338936996459961, "step": 117200 }, { "epoch": 0.15873333333333334, "grad_norm": 0.19743825495243073, "learning_rate": 8.827221005708673e-05, "loss": 3.1352378845214846, "step": 117210 }, { "epoch": 0.1588, "grad_norm": 0.16437336802482605, "learning_rate": 8.826511571648737e-05, "loss": 3.223967361450195, "step": 117220 }, { "epoch": 0.15886666666666666, "grad_norm": 0.17965194582939148, "learning_rate": 8.825801951605889e-05, "loss": 3.1863500595092775, "step": 117230 }, { "epoch": 0.15893333333333334, "grad_norm": 0.1864343136548996, "learning_rate": 8.825092145614617e-05, "loss": 3.1810861587524415, "step": 117240 }, { "epoch": 0.159, "grad_norm": 0.1970648169517517, "learning_rate": 8.824382153709421e-05, "loss": 3.184712219238281, "step": 117250 }, { "epoch": 0.15906666666666666, "grad_norm": 0.23680777847766876, "learning_rate": 8.823671975924812e-05, "loss": 3.3315616607666017, "step": 117260 }, { "epoch": 0.15913333333333332, "grad_norm": 0.19469675421714783, "learning_rate": 8.822961612295303e-05, "loss": 3.2997367858886717, "step": 117270 }, { "epoch": 0.1592, "grad_norm": 0.21063755452632904, "learning_rate": 8.822251062855422e-05, "loss": 3.1971153259277343, "step": 117280 }, { "epoch": 0.15926666666666667, "grad_norm": 0.31296876072883606, "learning_rate": 8.821540327639705e-05, "loss": 3.195456886291504, "step": 117290 }, { "epoch": 0.15933333333333333, "grad_norm": 0.21251054108142853, "learning_rate": 8.820829406682698e-05, "loss": 3.1842132568359376, "step": 117300 }, { "epoch": 0.1594, "grad_norm": 0.22218257188796997, "learning_rate": 8.820118300018951e-05, "loss": 3.1995241165161135, "step": 117310 }, { "epoch": 0.15946666666666667, "grad_norm": 0.17428213357925415, "learning_rate": 8.819407007683029e-05, "loss": 3.146713447570801, "step": 117320 }, { "epoch": 0.15953333333333333, "grad_norm": 0.20196178555488586, "learning_rate": 8.818695529709501e-05, "loss": 3.2218017578125, "step": 117330 }, { "epoch": 0.1596, "grad_norm": 0.19046969711780548, "learning_rate": 8.81798386613295e-05, "loss": 3.1591238021850585, "step": 117340 }, { "epoch": 0.15966666666666668, "grad_norm": 0.17580436170101166, "learning_rate": 8.817272016987963e-05, "loss": 3.167226219177246, "step": 117350 }, { "epoch": 0.15973333333333334, "grad_norm": 0.21187226474285126, "learning_rate": 8.816559982309143e-05, "loss": 3.2653968811035154, "step": 117360 }, { "epoch": 0.1598, "grad_norm": 0.2166254073381424, "learning_rate": 8.815847762131093e-05, "loss": 3.2055984497070313, "step": 117370 }, { "epoch": 0.15986666666666666, "grad_norm": 0.1823117733001709, "learning_rate": 8.815135356488431e-05, "loss": 3.1158773422241213, "step": 117380 }, { "epoch": 0.15993333333333334, "grad_norm": 0.30180105566978455, "learning_rate": 8.814422765415783e-05, "loss": 3.209910583496094, "step": 117390 }, { "epoch": 0.16, "grad_norm": 0.218704953789711, "learning_rate": 8.813709988947783e-05, "loss": 3.251177978515625, "step": 117400 }, { "epoch": 0.16006666666666666, "grad_norm": 0.17514339089393616, "learning_rate": 8.812997027119077e-05, "loss": 3.153173065185547, "step": 117410 }, { "epoch": 0.16013333333333332, "grad_norm": 0.17822924256324768, "learning_rate": 8.812283879964314e-05, "loss": 3.1177597045898438, "step": 117420 }, { "epoch": 0.1602, "grad_norm": 0.1771828830242157, "learning_rate": 8.811570547518159e-05, "loss": 3.1821035385131835, "step": 117430 }, { "epoch": 0.16026666666666667, "grad_norm": 0.1847875863313675, "learning_rate": 8.810857029815281e-05, "loss": 3.2196186065673826, "step": 117440 }, { "epoch": 0.16033333333333333, "grad_norm": 0.1942659616470337, "learning_rate": 8.810143326890359e-05, "loss": 3.151424789428711, "step": 117450 }, { "epoch": 0.1604, "grad_norm": 0.20954866707324982, "learning_rate": 8.809429438778081e-05, "loss": 3.203818511962891, "step": 117460 }, { "epoch": 0.16046666666666667, "grad_norm": 0.18898549675941467, "learning_rate": 8.808715365513148e-05, "loss": 3.1360681533813475, "step": 117470 }, { "epoch": 0.16053333333333333, "grad_norm": 0.20685654878616333, "learning_rate": 8.808001107130263e-05, "loss": 3.1711666107177736, "step": 117480 }, { "epoch": 0.1606, "grad_norm": 0.19471600651741028, "learning_rate": 8.807286663664144e-05, "loss": 3.1225584030151365, "step": 117490 }, { "epoch": 0.16066666666666668, "grad_norm": 0.1851060688495636, "learning_rate": 8.806572035149516e-05, "loss": 3.156396675109863, "step": 117500 }, { "epoch": 0.16073333333333334, "grad_norm": 0.2170630842447281, "learning_rate": 8.805857221621112e-05, "loss": 3.3176502227783202, "step": 117510 }, { "epoch": 0.1608, "grad_norm": 0.19249814748764038, "learning_rate": 8.805142223113673e-05, "loss": 3.2447303771972655, "step": 117520 }, { "epoch": 0.16086666666666666, "grad_norm": 0.20053061842918396, "learning_rate": 8.804427039661954e-05, "loss": 3.1586585998535157, "step": 117530 }, { "epoch": 0.16093333333333334, "grad_norm": 0.2391374111175537, "learning_rate": 8.803711671300712e-05, "loss": 3.209058380126953, "step": 117540 }, { "epoch": 0.161, "grad_norm": 0.20610934495925903, "learning_rate": 8.802996118064717e-05, "loss": 3.16064567565918, "step": 117550 }, { "epoch": 0.16106666666666666, "grad_norm": 0.1728743612766266, "learning_rate": 8.80228037998875e-05, "loss": 3.215135955810547, "step": 117560 }, { "epoch": 0.16113333333333332, "grad_norm": 0.21486517786979675, "learning_rate": 8.801564457107597e-05, "loss": 5.320541763305664, "step": 117570 }, { "epoch": 0.1612, "grad_norm": 0.19772259891033173, "learning_rate": 8.800848349456055e-05, "loss": 3.199207305908203, "step": 117580 }, { "epoch": 0.16126666666666667, "grad_norm": 0.24174395203590393, "learning_rate": 8.80013205706893e-05, "loss": 3.191281318664551, "step": 117590 }, { "epoch": 0.16133333333333333, "grad_norm": 0.18800696730613708, "learning_rate": 8.799415579981033e-05, "loss": 3.1980724334716797, "step": 117600 }, { "epoch": 0.1614, "grad_norm": 0.21889258921146393, "learning_rate": 8.798698918227194e-05, "loss": 3.1990880966186523, "step": 117610 }, { "epoch": 0.16146666666666668, "grad_norm": 0.1736421287059784, "learning_rate": 8.797982071842241e-05, "loss": 3.1829496383666993, "step": 117620 }, { "epoch": 0.16153333333333333, "grad_norm": 0.18490207195281982, "learning_rate": 8.797265040861015e-05, "loss": 3.163214111328125, "step": 117630 }, { "epoch": 0.1616, "grad_norm": 0.1981453001499176, "learning_rate": 8.796547825318368e-05, "loss": 3.2168319702148436, "step": 117640 }, { "epoch": 0.16166666666666665, "grad_norm": 0.1855965256690979, "learning_rate": 8.795830425249161e-05, "loss": 3.139117622375488, "step": 117650 }, { "epoch": 0.16173333333333334, "grad_norm": 0.1839778870344162, "learning_rate": 8.795112840688259e-05, "loss": 3.1780973434448243, "step": 117660 }, { "epoch": 0.1618, "grad_norm": 0.20665858685970306, "learning_rate": 8.79439507167054e-05, "loss": 3.1291399002075195, "step": 117670 }, { "epoch": 0.16186666666666666, "grad_norm": 0.2123611718416214, "learning_rate": 8.793677118230894e-05, "loss": 3.2495147705078127, "step": 117680 }, { "epoch": 0.16193333333333335, "grad_norm": 0.1998555213212967, "learning_rate": 8.79295898040421e-05, "loss": 3.3016120910644533, "step": 117690 }, { "epoch": 0.162, "grad_norm": 0.1688101589679718, "learning_rate": 8.792240658225396e-05, "loss": 3.0931991577148437, "step": 117700 }, { "epoch": 0.16206666666666666, "grad_norm": 0.17491811513900757, "learning_rate": 8.791522151729366e-05, "loss": 3.2187145233154295, "step": 117710 }, { "epoch": 0.16213333333333332, "grad_norm": 0.20334523916244507, "learning_rate": 8.79080346095104e-05, "loss": 3.2128765106201174, "step": 117720 }, { "epoch": 0.1622, "grad_norm": 0.20536507666110992, "learning_rate": 8.790084585925351e-05, "loss": 3.173935127258301, "step": 117730 }, { "epoch": 0.16226666666666667, "grad_norm": 0.19088861346244812, "learning_rate": 8.789365526687237e-05, "loss": 3.1833871841430663, "step": 117740 }, { "epoch": 0.16233333333333333, "grad_norm": 0.20249220728874207, "learning_rate": 8.78864628327165e-05, "loss": 3.137770080566406, "step": 117750 }, { "epoch": 0.1624, "grad_norm": 0.17734132707118988, "learning_rate": 8.787926855713542e-05, "loss": 3.2396385192871096, "step": 117760 }, { "epoch": 0.16246666666666668, "grad_norm": 0.1825917810201645, "learning_rate": 8.787207244047886e-05, "loss": 3.1760936737060548, "step": 117770 }, { "epoch": 0.16253333333333334, "grad_norm": 0.1753077656030655, "learning_rate": 8.786487448309654e-05, "loss": 3.1875642776489257, "step": 117780 }, { "epoch": 0.1626, "grad_norm": 0.18484348058700562, "learning_rate": 8.785767468533836e-05, "loss": 3.2001190185546875, "step": 117790 }, { "epoch": 0.16266666666666665, "grad_norm": 0.19780804216861725, "learning_rate": 8.785047304755419e-05, "loss": 3.1428800582885743, "step": 117800 }, { "epoch": 0.16273333333333334, "grad_norm": 0.19218385219573975, "learning_rate": 8.78432695700941e-05, "loss": 3.2437225341796876, "step": 117810 }, { "epoch": 0.1628, "grad_norm": 0.18323516845703125, "learning_rate": 8.783606425330819e-05, "loss": 3.1626264572143556, "step": 117820 }, { "epoch": 0.16286666666666666, "grad_norm": 0.19437408447265625, "learning_rate": 8.782885709754668e-05, "loss": 3.1544923782348633, "step": 117830 }, { "epoch": 0.16293333333333335, "grad_norm": 0.1842847615480423, "learning_rate": 8.782164810315984e-05, "loss": 3.184066963195801, "step": 117840 }, { "epoch": 0.163, "grad_norm": 0.17978887259960175, "learning_rate": 8.78144372704981e-05, "loss": 3.1840608596801756, "step": 117850 }, { "epoch": 0.16306666666666667, "grad_norm": 0.18121963739395142, "learning_rate": 8.780722459991186e-05, "loss": 3.1630910873413085, "step": 117860 }, { "epoch": 0.16313333333333332, "grad_norm": 0.3621894121170044, "learning_rate": 8.780001009175175e-05, "loss": 3.1456621170043944, "step": 117870 }, { "epoch": 0.1632, "grad_norm": 0.19030813872814178, "learning_rate": 8.779279374636841e-05, "loss": 3.078353500366211, "step": 117880 }, { "epoch": 0.16326666666666667, "grad_norm": 0.18776202201843262, "learning_rate": 8.778557556411255e-05, "loss": 3.127124214172363, "step": 117890 }, { "epoch": 0.16333333333333333, "grad_norm": 0.2003190964460373, "learning_rate": 8.777835554533502e-05, "loss": 3.208584213256836, "step": 117900 }, { "epoch": 0.1634, "grad_norm": 0.24278774857521057, "learning_rate": 8.777113369038676e-05, "loss": 3.1136566162109376, "step": 117910 }, { "epoch": 0.16346666666666668, "grad_norm": 0.18774348497390747, "learning_rate": 8.776390999961876e-05, "loss": 3.158965301513672, "step": 117920 }, { "epoch": 0.16353333333333334, "grad_norm": 0.3308679759502411, "learning_rate": 8.77566844733821e-05, "loss": 3.187213134765625, "step": 117930 }, { "epoch": 0.1636, "grad_norm": 0.17302174866199493, "learning_rate": 8.7749457112028e-05, "loss": 3.227678680419922, "step": 117940 }, { "epoch": 0.16366666666666665, "grad_norm": 0.20729248225688934, "learning_rate": 8.774222791590772e-05, "loss": 3.16510124206543, "step": 117950 }, { "epoch": 0.16373333333333334, "grad_norm": 0.19807036221027374, "learning_rate": 8.773499688537263e-05, "loss": 3.1703876495361327, "step": 117960 }, { "epoch": 0.1638, "grad_norm": 0.19292180240154266, "learning_rate": 8.772776402077419e-05, "loss": 3.142010307312012, "step": 117970 }, { "epoch": 0.16386666666666666, "grad_norm": 0.1874379962682724, "learning_rate": 8.772052932246393e-05, "loss": 3.146187400817871, "step": 117980 }, { "epoch": 0.16393333333333332, "grad_norm": 0.19777965545654297, "learning_rate": 8.77132927907935e-05, "loss": 3.1819225311279298, "step": 117990 }, { "epoch": 0.164, "grad_norm": 0.19324471056461334, "learning_rate": 8.770605442611462e-05, "loss": 3.1833654403686524, "step": 118000 }, { "epoch": 0.16406666666666667, "grad_norm": 0.19729483127593994, "learning_rate": 8.76988142287791e-05, "loss": 3.1816247940063476, "step": 118010 }, { "epoch": 0.16413333333333333, "grad_norm": 0.19861388206481934, "learning_rate": 8.769157219913884e-05, "loss": 3.1339513778686525, "step": 118020 }, { "epoch": 0.1642, "grad_norm": 0.17525865137577057, "learning_rate": 8.768432833754583e-05, "loss": 3.14807243347168, "step": 118030 }, { "epoch": 0.16426666666666667, "grad_norm": 0.20246951282024384, "learning_rate": 8.767708264435214e-05, "loss": 3.1532875061035157, "step": 118040 }, { "epoch": 0.16433333333333333, "grad_norm": 0.18260055780410767, "learning_rate": 8.766983511990996e-05, "loss": 3.1750335693359375, "step": 118050 }, { "epoch": 0.1644, "grad_norm": 0.18339918553829193, "learning_rate": 8.766258576457153e-05, "loss": 3.231616973876953, "step": 118060 }, { "epoch": 0.16446666666666668, "grad_norm": 0.1738063544034958, "learning_rate": 8.765533457868919e-05, "loss": 3.1226335525512696, "step": 118070 }, { "epoch": 0.16453333333333334, "grad_norm": 0.21040435135364532, "learning_rate": 8.76480815626154e-05, "loss": 3.238601303100586, "step": 118080 }, { "epoch": 0.1646, "grad_norm": 0.2079191654920578, "learning_rate": 8.764082671670266e-05, "loss": 3.185613822937012, "step": 118090 }, { "epoch": 0.16466666666666666, "grad_norm": 0.1996842324733734, "learning_rate": 8.76335700413036e-05, "loss": 3.2176795959472657, "step": 118100 }, { "epoch": 0.16473333333333334, "grad_norm": 0.187798872590065, "learning_rate": 8.76263115367709e-05, "loss": 3.2094093322753907, "step": 118110 }, { "epoch": 0.1648, "grad_norm": 0.1861574649810791, "learning_rate": 8.761905120345737e-05, "loss": 3.1645204544067385, "step": 118120 }, { "epoch": 0.16486666666666666, "grad_norm": 0.21548697352409363, "learning_rate": 8.761178904171587e-05, "loss": 3.4588787078857424, "step": 118130 }, { "epoch": 0.16493333333333332, "grad_norm": 0.19228944182395935, "learning_rate": 8.76045250518994e-05, "loss": 3.165936279296875, "step": 118140 }, { "epoch": 0.165, "grad_norm": 0.1786780208349228, "learning_rate": 8.7597259234361e-05, "loss": 3.1794971466064452, "step": 118150 }, { "epoch": 0.16506666666666667, "grad_norm": 0.18730124831199646, "learning_rate": 8.758999158945382e-05, "loss": 3.1643039703369142, "step": 118160 }, { "epoch": 0.16513333333333333, "grad_norm": 0.17566439509391785, "learning_rate": 8.758272211753108e-05, "loss": 3.246054458618164, "step": 118170 }, { "epoch": 0.1652, "grad_norm": 0.18864929676055908, "learning_rate": 8.757545081894611e-05, "loss": 3.1430696487426757, "step": 118180 }, { "epoch": 0.16526666666666667, "grad_norm": 0.17759177088737488, "learning_rate": 8.756817769405234e-05, "loss": 3.1584386825561523, "step": 118190 }, { "epoch": 0.16533333333333333, "grad_norm": 0.22381798923015594, "learning_rate": 8.756090274320325e-05, "loss": 3.1754123687744142, "step": 118200 }, { "epoch": 0.1654, "grad_norm": 0.17183011770248413, "learning_rate": 8.755362596675245e-05, "loss": 3.170297622680664, "step": 118210 }, { "epoch": 0.16546666666666668, "grad_norm": 0.19697821140289307, "learning_rate": 8.754634736505361e-05, "loss": 3.1365447998046876, "step": 118220 }, { "epoch": 0.16553333333333334, "grad_norm": 0.18117551505565643, "learning_rate": 8.753906693846047e-05, "loss": 3.1499969482421877, "step": 118230 }, { "epoch": 0.1656, "grad_norm": 0.18088267743587494, "learning_rate": 8.753178468732694e-05, "loss": 3.2250667572021485, "step": 118240 }, { "epoch": 0.16566666666666666, "grad_norm": 0.17758208513259888, "learning_rate": 8.752450061200692e-05, "loss": 3.192069435119629, "step": 118250 }, { "epoch": 0.16573333333333334, "grad_norm": 0.21192730963230133, "learning_rate": 8.751721471285447e-05, "loss": 3.133144569396973, "step": 118260 }, { "epoch": 0.1658, "grad_norm": 0.18822801113128662, "learning_rate": 8.75099269902237e-05, "loss": 3.1264150619506834, "step": 118270 }, { "epoch": 0.16586666666666666, "grad_norm": 0.1743152141571045, "learning_rate": 8.750263744446881e-05, "loss": 3.199213981628418, "step": 118280 }, { "epoch": 0.16593333333333332, "grad_norm": 0.18448546528816223, "learning_rate": 8.749534607594412e-05, "loss": 3.207162857055664, "step": 118290 }, { "epoch": 0.166, "grad_norm": 0.9267733097076416, "learning_rate": 8.7488052885004e-05, "loss": 3.2991085052490234, "step": 118300 }, { "epoch": 0.16606666666666667, "grad_norm": 0.21127013862133026, "learning_rate": 8.748075787200296e-05, "loss": 3.1743967056274416, "step": 118310 }, { "epoch": 0.16613333333333333, "grad_norm": 0.1915658712387085, "learning_rate": 8.747346103729552e-05, "loss": 3.2154052734375, "step": 118320 }, { "epoch": 0.1662, "grad_norm": 0.2372097223997116, "learning_rate": 8.746616238123637e-05, "loss": 3.127071189880371, "step": 118330 }, { "epoch": 0.16626666666666667, "grad_norm": 0.2435922920703888, "learning_rate": 8.745886190418024e-05, "loss": 3.1543212890625, "step": 118340 }, { "epoch": 0.16633333333333333, "grad_norm": 0.18592271208763123, "learning_rate": 8.745155960648195e-05, "loss": 3.168771171569824, "step": 118350 }, { "epoch": 0.1664, "grad_norm": 0.19534778594970703, "learning_rate": 8.744425548849643e-05, "loss": 3.1719980239868164, "step": 118360 }, { "epoch": 0.16646666666666668, "grad_norm": 0.18551334738731384, "learning_rate": 8.743694955057869e-05, "loss": 3.191775894165039, "step": 118370 }, { "epoch": 0.16653333333333334, "grad_norm": 0.19679895043373108, "learning_rate": 8.742964179308381e-05, "loss": 3.1450515747070313, "step": 118380 }, { "epoch": 0.1666, "grad_norm": 0.19031396508216858, "learning_rate": 8.7422332216367e-05, "loss": 3.1528509140014647, "step": 118390 }, { "epoch": 0.16666666666666666, "grad_norm": 0.19802504777908325, "learning_rate": 8.741502082078352e-05, "loss": 3.211081695556641, "step": 118400 }, { "epoch": 0.16673333333333334, "grad_norm": 0.19686496257781982, "learning_rate": 8.740770760668872e-05, "loss": 3.182558059692383, "step": 118410 }, { "epoch": 0.1668, "grad_norm": 0.2741928696632385, "learning_rate": 8.740039257443807e-05, "loss": 3.171588325500488, "step": 118420 }, { "epoch": 0.16686666666666666, "grad_norm": 0.19818370044231415, "learning_rate": 8.73930757243871e-05, "loss": 3.161284637451172, "step": 118430 }, { "epoch": 0.16693333333333332, "grad_norm": 0.21718089282512665, "learning_rate": 8.738575705689142e-05, "loss": 3.1920454025268556, "step": 118440 }, { "epoch": 0.167, "grad_norm": 0.1832529902458191, "learning_rate": 8.737843657230679e-05, "loss": 3.147245979309082, "step": 118450 }, { "epoch": 0.16706666666666667, "grad_norm": 0.186939999461174, "learning_rate": 8.737111427098897e-05, "loss": 3.1543275833129885, "step": 118460 }, { "epoch": 0.16713333333333333, "grad_norm": 0.18455784022808075, "learning_rate": 8.736379015329385e-05, "loss": 3.157526397705078, "step": 118470 }, { "epoch": 0.1672, "grad_norm": 0.24190956354141235, "learning_rate": 8.735646421957743e-05, "loss": 3.128617858886719, "step": 118480 }, { "epoch": 0.16726666666666667, "grad_norm": 0.18913628160953522, "learning_rate": 8.734913647019577e-05, "loss": 3.1342546463012697, "step": 118490 }, { "epoch": 0.16733333333333333, "grad_norm": 0.1858527660369873, "learning_rate": 8.734180690550505e-05, "loss": 3.174312400817871, "step": 118500 }, { "epoch": 0.1674, "grad_norm": 0.1835012137889862, "learning_rate": 8.733447552586149e-05, "loss": 3.149956703186035, "step": 118510 }, { "epoch": 0.16746666666666668, "grad_norm": 0.31534522771835327, "learning_rate": 8.732714233162141e-05, "loss": 3.207775115966797, "step": 118520 }, { "epoch": 0.16753333333333334, "grad_norm": 0.3189527094364166, "learning_rate": 8.731980732314126e-05, "loss": 3.184318733215332, "step": 118530 }, { "epoch": 0.1676, "grad_norm": 0.26977378129959106, "learning_rate": 8.731247050077753e-05, "loss": 3.2076148986816406, "step": 118540 }, { "epoch": 0.16766666666666666, "grad_norm": 0.18479400873184204, "learning_rate": 8.730513186488684e-05, "loss": 3.199959373474121, "step": 118550 }, { "epoch": 0.16773333333333335, "grad_norm": 0.1902797818183899, "learning_rate": 8.729779141582583e-05, "loss": 3.364847183227539, "step": 118560 }, { "epoch": 0.1678, "grad_norm": 0.17075678706169128, "learning_rate": 8.729044915395132e-05, "loss": 3.0954353332519533, "step": 118570 }, { "epoch": 0.16786666666666666, "grad_norm": 0.20461797714233398, "learning_rate": 8.728310507962016e-05, "loss": 3.16065673828125, "step": 118580 }, { "epoch": 0.16793333333333332, "grad_norm": 0.17398037016391754, "learning_rate": 8.727575919318929e-05, "loss": 3.1453731536865233, "step": 118590 }, { "epoch": 0.168, "grad_norm": 0.19193026423454285, "learning_rate": 8.726841149501576e-05, "loss": 3.1135431289672852, "step": 118600 }, { "epoch": 0.16806666666666667, "grad_norm": 0.19382672011852264, "learning_rate": 8.726106198545667e-05, "loss": 3.1894092559814453, "step": 118610 }, { "epoch": 0.16813333333333333, "grad_norm": 0.18831662833690643, "learning_rate": 8.725371066486926e-05, "loss": 3.2009811401367188, "step": 118620 }, { "epoch": 0.1682, "grad_norm": 0.1786315143108368, "learning_rate": 8.724635753361083e-05, "loss": 3.157421112060547, "step": 118630 }, { "epoch": 0.16826666666666668, "grad_norm": 0.2610945701599121, "learning_rate": 8.723900259203876e-05, "loss": 3.128001403808594, "step": 118640 }, { "epoch": 0.16833333333333333, "grad_norm": 0.18198204040527344, "learning_rate": 8.723164584051054e-05, "loss": 3.1593015670776365, "step": 118650 }, { "epoch": 0.1684, "grad_norm": 0.1884835809469223, "learning_rate": 8.72242872793837e-05, "loss": 3.1913896560668946, "step": 118660 }, { "epoch": 0.16846666666666665, "grad_norm": 0.18527862429618835, "learning_rate": 8.721692690901594e-05, "loss": 3.1750873565673827, "step": 118670 }, { "epoch": 0.16853333333333334, "grad_norm": 0.19908495247364044, "learning_rate": 8.720956472976498e-05, "loss": 3.1880687713623046, "step": 118680 }, { "epoch": 0.1686, "grad_norm": 0.18882408738136292, "learning_rate": 8.720220074198866e-05, "loss": 3.131977653503418, "step": 118690 }, { "epoch": 0.16866666666666666, "grad_norm": 0.18438304960727692, "learning_rate": 8.719483494604489e-05, "loss": 3.1559457778930664, "step": 118700 }, { "epoch": 0.16873333333333335, "grad_norm": 0.18139810860157013, "learning_rate": 8.718746734229166e-05, "loss": 3.1957555770874024, "step": 118710 }, { "epoch": 0.1688, "grad_norm": 0.19969123601913452, "learning_rate": 8.71800979310871e-05, "loss": 3.1381498336791993, "step": 118720 }, { "epoch": 0.16886666666666666, "grad_norm": 0.20507284998893738, "learning_rate": 8.717272671278936e-05, "loss": 3.2148765563964843, "step": 118730 }, { "epoch": 0.16893333333333332, "grad_norm": 0.18973971903324127, "learning_rate": 8.716535368775673e-05, "loss": 3.147504425048828, "step": 118740 }, { "epoch": 0.169, "grad_norm": 0.19109764695167542, "learning_rate": 8.715797885634755e-05, "loss": 3.132307434082031, "step": 118750 }, { "epoch": 0.16906666666666667, "grad_norm": 0.1956021636724472, "learning_rate": 8.715060221892026e-05, "loss": 3.274454116821289, "step": 118760 }, { "epoch": 0.16913333333333333, "grad_norm": 0.29708266258239746, "learning_rate": 8.714322377583341e-05, "loss": 3.217304992675781, "step": 118770 }, { "epoch": 0.1692, "grad_norm": 0.18544811010360718, "learning_rate": 8.713584352744563e-05, "loss": 3.2474990844726563, "step": 118780 }, { "epoch": 0.16926666666666668, "grad_norm": 0.2861015200614929, "learning_rate": 8.712846147411559e-05, "loss": 3.164324951171875, "step": 118790 }, { "epoch": 0.16933333333333334, "grad_norm": 0.1805543750524521, "learning_rate": 8.712107761620212e-05, "loss": 3.246195602416992, "step": 118800 }, { "epoch": 0.1694, "grad_norm": 0.17775726318359375, "learning_rate": 8.711369195406409e-05, "loss": 3.380698394775391, "step": 118810 }, { "epoch": 0.16946666666666665, "grad_norm": 0.19197595119476318, "learning_rate": 8.710630448806047e-05, "loss": 3.1894563674926757, "step": 118820 }, { "epoch": 0.16953333333333334, "grad_norm": 0.18715785443782806, "learning_rate": 8.709891521855033e-05, "loss": 3.15135498046875, "step": 118830 }, { "epoch": 0.1696, "grad_norm": 0.21918268501758575, "learning_rate": 8.709152414589278e-05, "loss": 3.168287467956543, "step": 118840 }, { "epoch": 0.16966666666666666, "grad_norm": 0.1940905898809433, "learning_rate": 8.708413127044712e-05, "loss": 3.199066925048828, "step": 118850 }, { "epoch": 0.16973333333333335, "grad_norm": 0.19896914064884186, "learning_rate": 8.70767365925726e-05, "loss": 3.13541259765625, "step": 118860 }, { "epoch": 0.1698, "grad_norm": 0.20658789575099945, "learning_rate": 8.706934011262867e-05, "loss": 3.145847129821777, "step": 118870 }, { "epoch": 0.16986666666666667, "grad_norm": 0.19688071310520172, "learning_rate": 8.706194183097482e-05, "loss": 3.1769432067871093, "step": 118880 }, { "epoch": 0.16993333333333333, "grad_norm": 0.19454804062843323, "learning_rate": 8.705454174797066e-05, "loss": 3.2523788452148437, "step": 118890 }, { "epoch": 0.17, "grad_norm": 0.19174379110336304, "learning_rate": 8.70471398639758e-05, "loss": 3.183890151977539, "step": 118900 }, { "epoch": 0.17006666666666667, "grad_norm": 0.223172128200531, "learning_rate": 8.703973617935004e-05, "loss": 3.1951446533203125, "step": 118910 }, { "epoch": 0.17013333333333333, "grad_norm": 0.23051464557647705, "learning_rate": 8.703233069445323e-05, "loss": 3.1598386764526367, "step": 118920 }, { "epoch": 0.1702, "grad_norm": 0.18712453544139862, "learning_rate": 8.70249234096453e-05, "loss": 3.167182731628418, "step": 118930 }, { "epoch": 0.17026666666666668, "grad_norm": 0.19015845656394958, "learning_rate": 8.701751432528627e-05, "loss": 3.1779735565185545, "step": 118940 }, { "epoch": 0.17033333333333334, "grad_norm": 0.18547670543193817, "learning_rate": 8.701010344173624e-05, "loss": 3.1653697967529295, "step": 118950 }, { "epoch": 0.1704, "grad_norm": 0.19738905131816864, "learning_rate": 8.700269075935541e-05, "loss": 3.1484216690063476, "step": 118960 }, { "epoch": 0.17046666666666666, "grad_norm": 0.17853909730911255, "learning_rate": 8.699527627850408e-05, "loss": 3.1729337692260744, "step": 118970 }, { "epoch": 0.17053333333333334, "grad_norm": 0.18060988187789917, "learning_rate": 8.69878599995426e-05, "loss": 3.1815900802612305, "step": 118980 }, { "epoch": 0.1706, "grad_norm": 0.17875535786151886, "learning_rate": 8.698044192283146e-05, "loss": 3.1416507720947267, "step": 118990 }, { "epoch": 0.17066666666666666, "grad_norm": 0.18246719241142273, "learning_rate": 8.697302204873116e-05, "loss": 3.2003250122070312, "step": 119000 }, { "epoch": 0.17073333333333332, "grad_norm": 0.1920650750398636, "learning_rate": 8.696560037760237e-05, "loss": 3.2084041595458985, "step": 119010 }, { "epoch": 0.1708, "grad_norm": 0.18611276149749756, "learning_rate": 8.69581769098058e-05, "loss": 3.1684825897216795, "step": 119020 }, { "epoch": 0.17086666666666667, "grad_norm": 0.19428297877311707, "learning_rate": 8.695075164570228e-05, "loss": 3.123139572143555, "step": 119030 }, { "epoch": 0.17093333333333333, "grad_norm": 0.17702791094779968, "learning_rate": 8.694332458565266e-05, "loss": 3.1312780380249023, "step": 119040 }, { "epoch": 0.171, "grad_norm": 0.1922427862882614, "learning_rate": 8.693589573001797e-05, "loss": 3.2262805938720702, "step": 119050 }, { "epoch": 0.17106666666666667, "grad_norm": 0.2221478521823883, "learning_rate": 8.692846507915926e-05, "loss": 3.140827178955078, "step": 119060 }, { "epoch": 0.17113333333333333, "grad_norm": 0.175761878490448, "learning_rate": 8.692103263343768e-05, "loss": 3.171834182739258, "step": 119070 }, { "epoch": 0.1712, "grad_norm": 0.1910477876663208, "learning_rate": 8.691359839321447e-05, "loss": 3.1929269790649415, "step": 119080 }, { "epoch": 0.17126666666666668, "grad_norm": 0.24511879682540894, "learning_rate": 8.6906162358851e-05, "loss": 3.22039794921875, "step": 119090 }, { "epoch": 0.17133333333333334, "grad_norm": 0.18123924732208252, "learning_rate": 8.689872453070864e-05, "loss": 3.155098533630371, "step": 119100 }, { "epoch": 0.1714, "grad_norm": 0.18578337132930756, "learning_rate": 8.689128490914893e-05, "loss": 3.1163230895996095, "step": 119110 }, { "epoch": 0.17146666666666666, "grad_norm": 0.18727408349514008, "learning_rate": 8.688384349453347e-05, "loss": 3.151838493347168, "step": 119120 }, { "epoch": 0.17153333333333334, "grad_norm": 0.1753525584936142, "learning_rate": 8.68764002872239e-05, "loss": 3.1310096740722657, "step": 119130 }, { "epoch": 0.1716, "grad_norm": 5.221131801605225, "learning_rate": 8.686895528758203e-05, "loss": 3.4974609375, "step": 119140 }, { "epoch": 0.17166666666666666, "grad_norm": 0.21390150487422943, "learning_rate": 8.686150849596969e-05, "loss": 3.1554855346679687, "step": 119150 }, { "epoch": 0.17173333333333332, "grad_norm": 0.19377711415290833, "learning_rate": 8.685405991274883e-05, "loss": 3.2987430572509764, "step": 119160 }, { "epoch": 0.1718, "grad_norm": 0.46389028429985046, "learning_rate": 8.684660953828149e-05, "loss": 3.1957406997680664, "step": 119170 }, { "epoch": 0.17186666666666667, "grad_norm": 0.7435762286186218, "learning_rate": 8.683915737292976e-05, "loss": 3.198529052734375, "step": 119180 }, { "epoch": 0.17193333333333333, "grad_norm": 0.19123999774456024, "learning_rate": 8.683170341705587e-05, "loss": 3.1861610412597656, "step": 119190 }, { "epoch": 0.172, "grad_norm": 0.1970609426498413, "learning_rate": 8.682424767102211e-05, "loss": 3.1705698013305663, "step": 119200 }, { "epoch": 0.17206666666666667, "grad_norm": 0.18046672642230988, "learning_rate": 8.681679013519081e-05, "loss": 3.1878488540649412, "step": 119210 }, { "epoch": 0.17213333333333333, "grad_norm": 0.2831767797470093, "learning_rate": 8.68093308099245e-05, "loss": 3.0795141220092774, "step": 119220 }, { "epoch": 0.1722, "grad_norm": 0.190758615732193, "learning_rate": 8.68018696955857e-05, "loss": 3.186728858947754, "step": 119230 }, { "epoch": 0.17226666666666668, "grad_norm": 0.17803426086902618, "learning_rate": 8.679440679253706e-05, "loss": 3.209795379638672, "step": 119240 }, { "epoch": 0.17233333333333334, "grad_norm": 0.17996187508106232, "learning_rate": 8.678694210114129e-05, "loss": 3.1254999160766603, "step": 119250 }, { "epoch": 0.1724, "grad_norm": 0.18574629724025726, "learning_rate": 8.67794756217612e-05, "loss": 3.14648494720459, "step": 119260 }, { "epoch": 0.17246666666666666, "grad_norm": 0.19396649301052094, "learning_rate": 8.677200735475971e-05, "loss": 3.147232246398926, "step": 119270 }, { "epoch": 0.17253333333333334, "grad_norm": 0.18598175048828125, "learning_rate": 8.676453730049979e-05, "loss": 3.07678108215332, "step": 119280 }, { "epoch": 0.1726, "grad_norm": 0.18581397831439972, "learning_rate": 8.675706545934451e-05, "loss": 3.151928520202637, "step": 119290 }, { "epoch": 0.17266666666666666, "grad_norm": 0.17795704305171967, "learning_rate": 8.674959183165705e-05, "loss": 3.144610786437988, "step": 119300 }, { "epoch": 0.17273333333333332, "grad_norm": 0.18495509028434753, "learning_rate": 8.674211641780063e-05, "loss": 3.209019088745117, "step": 119310 }, { "epoch": 0.1728, "grad_norm": 0.18757972121238708, "learning_rate": 8.67346392181386e-05, "loss": 3.160280227661133, "step": 119320 }, { "epoch": 0.17286666666666667, "grad_norm": 0.2514232397079468, "learning_rate": 8.672716023303437e-05, "loss": 3.147739028930664, "step": 119330 }, { "epoch": 0.17293333333333333, "grad_norm": 0.703903317451477, "learning_rate": 8.671967946285147e-05, "loss": 3.135213279724121, "step": 119340 }, { "epoch": 0.173, "grad_norm": 0.1667642742395401, "learning_rate": 8.671219690795346e-05, "loss": 3.2483123779296874, "step": 119350 }, { "epoch": 0.17306666666666667, "grad_norm": 0.21168306469917297, "learning_rate": 8.670471256870405e-05, "loss": 3.1238916397094725, "step": 119360 }, { "epoch": 0.17313333333333333, "grad_norm": 0.21186283230781555, "learning_rate": 8.669722644546698e-05, "loss": 3.1745298385620115, "step": 119370 }, { "epoch": 0.1732, "grad_norm": 0.21106873452663422, "learning_rate": 8.668973853860614e-05, "loss": 3.091720962524414, "step": 119380 }, { "epoch": 0.17326666666666668, "grad_norm": 0.20880410075187683, "learning_rate": 8.668224884848543e-05, "loss": 3.2654254913330076, "step": 119390 }, { "epoch": 0.17333333333333334, "grad_norm": 0.17858079075813293, "learning_rate": 8.66747573754689e-05, "loss": 3.136305046081543, "step": 119400 }, { "epoch": 0.1734, "grad_norm": 0.2725478708744049, "learning_rate": 8.666726411992065e-05, "loss": 3.213727569580078, "step": 119410 }, { "epoch": 0.17346666666666666, "grad_norm": 0.20742323994636536, "learning_rate": 8.665976908220493e-05, "loss": 3.161612701416016, "step": 119420 }, { "epoch": 0.17353333333333334, "grad_norm": 0.1780846267938614, "learning_rate": 8.665227226268596e-05, "loss": 3.1278915405273438, "step": 119430 }, { "epoch": 0.1736, "grad_norm": 0.21765148639678955, "learning_rate": 8.664477366172814e-05, "loss": 3.1699350357055662, "step": 119440 }, { "epoch": 0.17366666666666666, "grad_norm": 0.180785670876503, "learning_rate": 8.663727327969593e-05, "loss": 3.138622856140137, "step": 119450 }, { "epoch": 0.17373333333333332, "grad_norm": 0.18879829347133636, "learning_rate": 8.662977111695389e-05, "loss": 3.2911590576171874, "step": 119460 }, { "epoch": 0.1738, "grad_norm": 0.20278239250183105, "learning_rate": 8.662226717386663e-05, "loss": 3.1479358673095703, "step": 119470 }, { "epoch": 0.17386666666666667, "grad_norm": 0.18192796409130096, "learning_rate": 8.661476145079889e-05, "loss": 3.1395565032958985, "step": 119480 }, { "epoch": 0.17393333333333333, "grad_norm": 0.18961584568023682, "learning_rate": 8.660725394811546e-05, "loss": 3.151737594604492, "step": 119490 }, { "epoch": 0.174, "grad_norm": 0.21059201657772064, "learning_rate": 8.659974466618126e-05, "loss": 3.1007524490356446, "step": 119500 }, { "epoch": 0.17406666666666668, "grad_norm": 0.1874569207429886, "learning_rate": 8.659223360536124e-05, "loss": 3.1481950759887694, "step": 119510 }, { "epoch": 0.17413333333333333, "grad_norm": 0.20005691051483154, "learning_rate": 8.658472076602047e-05, "loss": 3.1530075073242188, "step": 119520 }, { "epoch": 0.1742, "grad_norm": 0.20185589790344238, "learning_rate": 8.657720614852411e-05, "loss": 3.1276504516601564, "step": 119530 }, { "epoch": 0.17426666666666665, "grad_norm": 0.1918351948261261, "learning_rate": 8.65696897532374e-05, "loss": 3.318642425537109, "step": 119540 }, { "epoch": 0.17433333333333334, "grad_norm": 0.1842793971300125, "learning_rate": 8.656217158052567e-05, "loss": 3.1643817901611326, "step": 119550 }, { "epoch": 0.1744, "grad_norm": 0.22758011519908905, "learning_rate": 8.655465163075432e-05, "loss": 3.1590723037719726, "step": 119560 }, { "epoch": 0.17446666666666666, "grad_norm": 0.19423648715019226, "learning_rate": 8.654712990428886e-05, "loss": 3.150889205932617, "step": 119570 }, { "epoch": 0.17453333333333335, "grad_norm": 0.18903464078903198, "learning_rate": 8.653960640149486e-05, "loss": 3.225634002685547, "step": 119580 }, { "epoch": 0.1746, "grad_norm": 0.1756870299577713, "learning_rate": 8.653208112273801e-05, "loss": 3.17054443359375, "step": 119590 }, { "epoch": 0.17466666666666666, "grad_norm": 0.21795551478862762, "learning_rate": 8.652455406838403e-05, "loss": 3.2260059356689452, "step": 119600 }, { "epoch": 0.17473333333333332, "grad_norm": 0.22275179624557495, "learning_rate": 8.651702523879882e-05, "loss": 3.4613914489746094, "step": 119610 }, { "epoch": 0.1748, "grad_norm": 0.18384477496147156, "learning_rate": 8.650949463434826e-05, "loss": 3.1252874374389648, "step": 119620 }, { "epoch": 0.17486666666666667, "grad_norm": 0.2353755384683609, "learning_rate": 8.650196225539837e-05, "loss": 3.139452362060547, "step": 119630 }, { "epoch": 0.17493333333333333, "grad_norm": 0.17586715519428253, "learning_rate": 8.649442810231528e-05, "loss": 3.176097106933594, "step": 119640 }, { "epoch": 0.175, "grad_norm": 0.20056401193141937, "learning_rate": 8.648689217546518e-05, "loss": 3.1695882797241213, "step": 119650 }, { "epoch": 0.17506666666666668, "grad_norm": 0.2065398246049881, "learning_rate": 8.64793544752143e-05, "loss": 3.1660440444946287, "step": 119660 }, { "epoch": 0.17513333333333334, "grad_norm": 0.1931607872247696, "learning_rate": 8.647181500192904e-05, "loss": 3.188939094543457, "step": 119670 }, { "epoch": 0.1752, "grad_norm": 0.18377768993377686, "learning_rate": 8.646427375597583e-05, "loss": 3.1722415924072265, "step": 119680 }, { "epoch": 0.17526666666666665, "grad_norm": 0.18549764156341553, "learning_rate": 8.645673073772123e-05, "loss": 3.152785301208496, "step": 119690 }, { "epoch": 0.17533333333333334, "grad_norm": 0.18733830749988556, "learning_rate": 8.644918594753183e-05, "loss": 3.090981674194336, "step": 119700 }, { "epoch": 0.1754, "grad_norm": 0.21250569820404053, "learning_rate": 8.644163938577431e-05, "loss": 3.1503644943237306, "step": 119710 }, { "epoch": 0.17546666666666666, "grad_norm": 0.19753150641918182, "learning_rate": 8.643409105281554e-05, "loss": 3.1679866790771483, "step": 119720 }, { "epoch": 0.17553333333333335, "grad_norm": 0.21240782737731934, "learning_rate": 8.642654094902234e-05, "loss": 3.1678457260131836, "step": 119730 }, { "epoch": 0.1756, "grad_norm": 0.19288457930088043, "learning_rate": 8.641898907476167e-05, "loss": 3.1280796051025392, "step": 119740 }, { "epoch": 0.17566666666666667, "grad_norm": 0.21556462347507477, "learning_rate": 8.64114354304006e-05, "loss": 3.136199188232422, "step": 119750 }, { "epoch": 0.17573333333333332, "grad_norm": 0.191057950258255, "learning_rate": 8.640388001630627e-05, "loss": 3.0898244857788084, "step": 119760 }, { "epoch": 0.1758, "grad_norm": 0.19989198446273804, "learning_rate": 8.639632283284588e-05, "loss": 3.195694160461426, "step": 119770 }, { "epoch": 0.17586666666666667, "grad_norm": 0.20003683865070343, "learning_rate": 8.638876388038677e-05, "loss": 3.2016441345214846, "step": 119780 }, { "epoch": 0.17593333333333333, "grad_norm": 0.18591344356536865, "learning_rate": 8.63812031592963e-05, "loss": 3.1578466415405275, "step": 119790 }, { "epoch": 0.176, "grad_norm": 0.31602728366851807, "learning_rate": 8.637364066994198e-05, "loss": 3.1897136688232424, "step": 119800 }, { "epoch": 0.17606666666666668, "grad_norm": 0.18654175102710724, "learning_rate": 8.636607641269134e-05, "loss": 3.157954788208008, "step": 119810 }, { "epoch": 0.17613333333333334, "grad_norm": 0.17319543659687042, "learning_rate": 8.635851038791204e-05, "loss": 3.121921730041504, "step": 119820 }, { "epoch": 0.1762, "grad_norm": 0.18633168935775757, "learning_rate": 8.635094259597187e-05, "loss": 3.1364097595214844, "step": 119830 }, { "epoch": 0.17626666666666665, "grad_norm": 0.18994519114494324, "learning_rate": 8.634337303723859e-05, "loss": 3.178271484375, "step": 119840 }, { "epoch": 0.17633333333333334, "grad_norm": 0.18767902255058289, "learning_rate": 8.633580171208011e-05, "loss": 3.22412223815918, "step": 119850 }, { "epoch": 0.1764, "grad_norm": 0.1945076733827591, "learning_rate": 8.632822862086447e-05, "loss": 3.159276580810547, "step": 119860 }, { "epoch": 0.17646666666666666, "grad_norm": 0.20464681088924408, "learning_rate": 8.632065376395973e-05, "loss": 3.2149673461914063, "step": 119870 }, { "epoch": 0.17653333333333332, "grad_norm": 0.2123376876115799, "learning_rate": 8.631307714173403e-05, "loss": 3.1606279373168946, "step": 119880 }, { "epoch": 0.1766, "grad_norm": 0.19205990433692932, "learning_rate": 8.630549875455566e-05, "loss": 3.15264778137207, "step": 119890 }, { "epoch": 0.17666666666666667, "grad_norm": 0.1901373416185379, "learning_rate": 8.629791860279294e-05, "loss": 3.1535999298095705, "step": 119900 }, { "epoch": 0.17673333333333333, "grad_norm": 0.21405717730522156, "learning_rate": 8.62903366868143e-05, "loss": 3.253282165527344, "step": 119910 }, { "epoch": 0.1768, "grad_norm": 0.1678513139486313, "learning_rate": 8.628275300698825e-05, "loss": 3.280496597290039, "step": 119920 }, { "epoch": 0.17686666666666667, "grad_norm": 0.18146590888500214, "learning_rate": 8.627516756368337e-05, "loss": 3.1363401412963867, "step": 119930 }, { "epoch": 0.17693333333333333, "grad_norm": 0.18811801075935364, "learning_rate": 8.626758035726837e-05, "loss": 3.110126495361328, "step": 119940 }, { "epoch": 0.177, "grad_norm": 0.2013738602399826, "learning_rate": 8.625999138811199e-05, "loss": 3.126674461364746, "step": 119950 }, { "epoch": 0.17706666666666668, "grad_norm": 0.2013985812664032, "learning_rate": 8.625240065658309e-05, "loss": 3.1573862075805663, "step": 119960 }, { "epoch": 0.17713333333333334, "grad_norm": 0.25219976902008057, "learning_rate": 8.624480816305061e-05, "loss": 3.1510942459106444, "step": 119970 }, { "epoch": 0.1772, "grad_norm": 0.18733727931976318, "learning_rate": 8.62372139078836e-05, "loss": 3.14178581237793, "step": 119980 }, { "epoch": 0.17726666666666666, "grad_norm": 0.17492598295211792, "learning_rate": 8.622961789145112e-05, "loss": 3.1732975006103517, "step": 119990 }, { "epoch": 0.17733333333333334, "grad_norm": 0.18227721750736237, "learning_rate": 8.622202011412239e-05, "loss": 3.1939001083374023, "step": 120000 }, { "epoch": 0.1774, "grad_norm": 0.23379863798618317, "learning_rate": 8.621442057626671e-05, "loss": 3.1571741104125977, "step": 120010 }, { "epoch": 0.17746666666666666, "grad_norm": 0.17814698815345764, "learning_rate": 8.620681927825343e-05, "loss": 3.1388940811157227, "step": 120020 }, { "epoch": 0.17753333333333332, "grad_norm": 0.2122572958469391, "learning_rate": 8.619921622045198e-05, "loss": 3.1595386505126952, "step": 120030 }, { "epoch": 0.1776, "grad_norm": 0.19371671974658966, "learning_rate": 8.61916114032319e-05, "loss": 3.1731698989868162, "step": 120040 }, { "epoch": 0.17766666666666667, "grad_norm": 0.2064049243927002, "learning_rate": 8.618400482696287e-05, "loss": 3.1245424270629885, "step": 120050 }, { "epoch": 0.17773333333333333, "grad_norm": 0.19864888489246368, "learning_rate": 8.617639649201455e-05, "loss": 3.1462995529174806, "step": 120060 }, { "epoch": 0.1778, "grad_norm": 0.18327797949314117, "learning_rate": 8.616878639875672e-05, "loss": 2.948857879638672, "step": 120070 }, { "epoch": 0.17786666666666667, "grad_norm": 0.1755279153585434, "learning_rate": 8.616117454755929e-05, "loss": 3.092691421508789, "step": 120080 }, { "epoch": 0.17793333333333333, "grad_norm": 0.20332209765911102, "learning_rate": 8.615356093879221e-05, "loss": 3.1583635330200197, "step": 120090 }, { "epoch": 0.178, "grad_norm": 0.19999848306179047, "learning_rate": 8.614594557282553e-05, "loss": 3.1731338500976562, "step": 120100 }, { "epoch": 0.17806666666666668, "grad_norm": 0.1801498681306839, "learning_rate": 8.61383284500294e-05, "loss": 3.157342720031738, "step": 120110 }, { "epoch": 0.17813333333333334, "grad_norm": 0.21340195834636688, "learning_rate": 8.613070957077404e-05, "loss": 3.1660736083984373, "step": 120120 }, { "epoch": 0.1782, "grad_norm": 0.20561528205871582, "learning_rate": 8.612308893542974e-05, "loss": 3.1180774688720705, "step": 120130 }, { "epoch": 0.17826666666666666, "grad_norm": 0.21911191940307617, "learning_rate": 8.611546654436691e-05, "loss": 3.198392868041992, "step": 120140 }, { "epoch": 0.17833333333333334, "grad_norm": 0.21200360357761383, "learning_rate": 8.610784239795599e-05, "loss": 3.159212112426758, "step": 120150 }, { "epoch": 0.1784, "grad_norm": 0.17524172365665436, "learning_rate": 8.610021649656759e-05, "loss": 3.166378402709961, "step": 120160 }, { "epoch": 0.17846666666666666, "grad_norm": 0.18298691511154175, "learning_rate": 8.609258884057235e-05, "loss": 3.1356861114501955, "step": 120170 }, { "epoch": 0.17853333333333332, "grad_norm": 0.1839464157819748, "learning_rate": 8.608495943034096e-05, "loss": 3.1779047012329102, "step": 120180 }, { "epoch": 0.1786, "grad_norm": 0.1878628134727478, "learning_rate": 8.607732826624429e-05, "loss": 3.187811851501465, "step": 120190 }, { "epoch": 0.17866666666666667, "grad_norm": 0.2021353393793106, "learning_rate": 8.60696953486532e-05, "loss": 3.179980659484863, "step": 120200 }, { "epoch": 0.17873333333333333, "grad_norm": 0.17716869711875916, "learning_rate": 8.606206067793871e-05, "loss": 3.2136138916015624, "step": 120210 }, { "epoch": 0.1788, "grad_norm": 0.18257783353328705, "learning_rate": 8.605442425447189e-05, "loss": 3.1500165939331053, "step": 120220 }, { "epoch": 0.17886666666666667, "grad_norm": 0.21726837754249573, "learning_rate": 8.604678607862389e-05, "loss": 3.1664941787719725, "step": 120230 }, { "epoch": 0.17893333333333333, "grad_norm": 0.17624028027057648, "learning_rate": 8.603914615076594e-05, "loss": 3.1594404220581054, "step": 120240 }, { "epoch": 0.179, "grad_norm": 0.20749004185199738, "learning_rate": 8.60315044712694e-05, "loss": 3.169297790527344, "step": 120250 }, { "epoch": 0.17906666666666668, "grad_norm": 0.20415370166301727, "learning_rate": 8.602386104050567e-05, "loss": 3.209854507446289, "step": 120260 }, { "epoch": 0.17913333333333334, "grad_norm": 0.1862858533859253, "learning_rate": 8.601621585884624e-05, "loss": 3.168831443786621, "step": 120270 }, { "epoch": 0.1792, "grad_norm": 0.20665806531906128, "learning_rate": 8.600856892666272e-05, "loss": 3.113919258117676, "step": 120280 }, { "epoch": 0.17926666666666666, "grad_norm": 0.26570889353752136, "learning_rate": 8.600092024432676e-05, "loss": 3.243244934082031, "step": 120290 }, { "epoch": 0.17933333333333334, "grad_norm": 0.1931823343038559, "learning_rate": 8.599326981221012e-05, "loss": 3.241015625, "step": 120300 }, { "epoch": 0.1794, "grad_norm": 0.1953902542591095, "learning_rate": 8.598561763068464e-05, "loss": 3.2210678100585937, "step": 120310 }, { "epoch": 0.17946666666666666, "grad_norm": 0.18352645635604858, "learning_rate": 8.597796370012223e-05, "loss": 3.173002815246582, "step": 120320 }, { "epoch": 0.17953333333333332, "grad_norm": 0.20248858630657196, "learning_rate": 8.597030802089494e-05, "loss": 3.1605430603027345, "step": 120330 }, { "epoch": 0.1796, "grad_norm": 0.2374042421579361, "learning_rate": 8.596265059337483e-05, "loss": 3.2418277740478514, "step": 120340 }, { "epoch": 0.17966666666666667, "grad_norm": 0.2201642543077469, "learning_rate": 8.595499141793409e-05, "loss": 3.1438180923461916, "step": 120350 }, { "epoch": 0.17973333333333333, "grad_norm": 0.1912516951560974, "learning_rate": 8.594733049494496e-05, "loss": 3.202425003051758, "step": 120360 }, { "epoch": 0.1798, "grad_norm": 0.19848047196865082, "learning_rate": 8.593966782477983e-05, "loss": 3.1062347412109377, "step": 120370 }, { "epoch": 0.17986666666666667, "grad_norm": 0.2241443395614624, "learning_rate": 8.593200340781113e-05, "loss": 3.269947052001953, "step": 120380 }, { "epoch": 0.17993333333333333, "grad_norm": 0.20158369839191437, "learning_rate": 8.592433724441135e-05, "loss": 3.1658870697021486, "step": 120390 }, { "epoch": 0.18, "grad_norm": 0.18215499818325043, "learning_rate": 8.591666933495313e-05, "loss": 3.1470979690551757, "step": 120400 }, { "epoch": 0.18006666666666668, "grad_norm": 0.17709815502166748, "learning_rate": 8.590899967980913e-05, "loss": 3.129801559448242, "step": 120410 }, { "epoch": 0.18013333333333334, "grad_norm": 0.19586800038814545, "learning_rate": 8.590132827935214e-05, "loss": 3.1603439331054686, "step": 120420 }, { "epoch": 0.1802, "grad_norm": 0.4365495443344116, "learning_rate": 8.5893655133955e-05, "loss": 3.210569381713867, "step": 120430 }, { "epoch": 0.18026666666666666, "grad_norm": 0.19283850491046906, "learning_rate": 8.588598024399068e-05, "loss": 3.306136703491211, "step": 120440 }, { "epoch": 0.18033333333333335, "grad_norm": 0.22231142222881317, "learning_rate": 8.58783036098322e-05, "loss": 3.1825325012207033, "step": 120450 }, { "epoch": 0.1804, "grad_norm": 0.197474405169487, "learning_rate": 8.587062523185268e-05, "loss": 3.2150222778320314, "step": 120460 }, { "epoch": 0.18046666666666666, "grad_norm": 0.19582702219486237, "learning_rate": 8.586294511042529e-05, "loss": 3.246807098388672, "step": 120470 }, { "epoch": 0.18053333333333332, "grad_norm": 0.189974844455719, "learning_rate": 8.585526324592335e-05, "loss": 3.179202842712402, "step": 120480 }, { "epoch": 0.1806, "grad_norm": 0.17900213599205017, "learning_rate": 8.58475796387202e-05, "loss": 3.139773368835449, "step": 120490 }, { "epoch": 0.18066666666666667, "grad_norm": 0.19089634716510773, "learning_rate": 8.583989428918931e-05, "loss": 3.1728517532348635, "step": 120500 }, { "epoch": 0.18073333333333333, "grad_norm": 0.1784435361623764, "learning_rate": 8.58322071977042e-05, "loss": 3.1515621185302733, "step": 120510 }, { "epoch": 0.1808, "grad_norm": 0.20231975615024567, "learning_rate": 8.58245183646385e-05, "loss": 3.6108688354492187, "step": 120520 }, { "epoch": 0.18086666666666668, "grad_norm": 0.19610503315925598, "learning_rate": 8.581682779036592e-05, "loss": 3.1839601516723635, "step": 120530 }, { "epoch": 0.18093333333333333, "grad_norm": 0.2099122256040573, "learning_rate": 8.580913547526023e-05, "loss": 3.1847095489501953, "step": 120540 }, { "epoch": 0.181, "grad_norm": 0.25131890177726746, "learning_rate": 8.580144141969535e-05, "loss": 3.1411237716674805, "step": 120550 }, { "epoch": 0.18106666666666665, "grad_norm": 0.2888810336589813, "learning_rate": 8.579374562404521e-05, "loss": 3.3330413818359377, "step": 120560 }, { "epoch": 0.18113333333333334, "grad_norm": 0.21883688867092133, "learning_rate": 8.578604808868384e-05, "loss": 3.2304336547851564, "step": 120570 }, { "epoch": 0.1812, "grad_norm": 0.1919129490852356, "learning_rate": 8.57783488139854e-05, "loss": 3.153928756713867, "step": 120580 }, { "epoch": 0.18126666666666666, "grad_norm": 0.7619603276252747, "learning_rate": 8.577064780032411e-05, "loss": 3.3652065277099608, "step": 120590 }, { "epoch": 0.18133333333333335, "grad_norm": 0.2506420910358429, "learning_rate": 8.576294504807423e-05, "loss": 3.251599884033203, "step": 120600 }, { "epoch": 0.1814, "grad_norm": 0.3113124668598175, "learning_rate": 8.575524055761018e-05, "loss": 3.096762847900391, "step": 120610 }, { "epoch": 0.18146666666666667, "grad_norm": 0.4423970878124237, "learning_rate": 8.574753432930638e-05, "loss": 3.193179702758789, "step": 120620 }, { "epoch": 0.18153333333333332, "grad_norm": 0.2058641016483307, "learning_rate": 8.573982636353743e-05, "loss": 3.1500539779663086, "step": 120630 }, { "epoch": 0.1816, "grad_norm": 0.1983710378408432, "learning_rate": 8.573211666067793e-05, "loss": 3.1642810821533205, "step": 120640 }, { "epoch": 0.18166666666666667, "grad_norm": 0.1785162389278412, "learning_rate": 8.572440522110264e-05, "loss": 3.1691614151000977, "step": 120650 }, { "epoch": 0.18173333333333333, "grad_norm": 0.28313568234443665, "learning_rate": 8.571669204518632e-05, "loss": 3.1666860580444336, "step": 120660 }, { "epoch": 0.1818, "grad_norm": 0.1864609569311142, "learning_rate": 8.570897713330393e-05, "loss": 3.1557119369506834, "step": 120670 }, { "epoch": 0.18186666666666668, "grad_norm": 0.2035660743713379, "learning_rate": 8.570126048583036e-05, "loss": 3.18792781829834, "step": 120680 }, { "epoch": 0.18193333333333334, "grad_norm": 0.17669576406478882, "learning_rate": 8.56935421031407e-05, "loss": 3.1398956298828127, "step": 120690 }, { "epoch": 0.182, "grad_norm": 0.20178170502185822, "learning_rate": 8.568582198561013e-05, "loss": 3.1616485595703123, "step": 120700 }, { "epoch": 0.18206666666666665, "grad_norm": 0.20660778880119324, "learning_rate": 8.567810013361382e-05, "loss": 3.1650487899780275, "step": 120710 }, { "epoch": 0.18213333333333334, "grad_norm": 0.22552303969860077, "learning_rate": 8.567037654752711e-05, "loss": 3.1882904052734373, "step": 120720 }, { "epoch": 0.1822, "grad_norm": 0.19813162088394165, "learning_rate": 8.566265122772539e-05, "loss": 3.1458904266357424, "step": 120730 }, { "epoch": 0.18226666666666666, "grad_norm": 0.6987144947052002, "learning_rate": 8.565492417458414e-05, "loss": 3.283124542236328, "step": 120740 }, { "epoch": 0.18233333333333332, "grad_norm": 0.1985749453306198, "learning_rate": 8.564719538847894e-05, "loss": 3.178812026977539, "step": 120750 }, { "epoch": 0.1824, "grad_norm": 0.1764683872461319, "learning_rate": 8.56394648697854e-05, "loss": 3.1730356216430664, "step": 120760 }, { "epoch": 0.18246666666666667, "grad_norm": 0.2056296020746231, "learning_rate": 8.563173261887929e-05, "loss": 3.2042041778564454, "step": 120770 }, { "epoch": 0.18253333333333333, "grad_norm": 0.22672230005264282, "learning_rate": 8.562399863613642e-05, "loss": 3.1781278610229493, "step": 120780 }, { "epoch": 0.1826, "grad_norm": 0.17615297436714172, "learning_rate": 8.561626292193268e-05, "loss": 3.1588489532470705, "step": 120790 }, { "epoch": 0.18266666666666667, "grad_norm": 0.21887566149234772, "learning_rate": 8.560852547664405e-05, "loss": 3.117685890197754, "step": 120800 }, { "epoch": 0.18273333333333333, "grad_norm": 0.18906837701797485, "learning_rate": 8.56007863006466e-05, "loss": 3.2848052978515625, "step": 120810 }, { "epoch": 0.1828, "grad_norm": 0.6222308278083801, "learning_rate": 8.55930453943165e-05, "loss": 3.2228622436523438, "step": 120820 }, { "epoch": 0.18286666666666668, "grad_norm": 0.20151154696941376, "learning_rate": 8.558530275802998e-05, "loss": 3.1825023651123048, "step": 120830 }, { "epoch": 0.18293333333333334, "grad_norm": 0.18550315499305725, "learning_rate": 8.557755839216334e-05, "loss": 3.2042327880859376, "step": 120840 }, { "epoch": 0.183, "grad_norm": 0.20477646589279175, "learning_rate": 8.556981229709303e-05, "loss": 3.1081199645996094, "step": 120850 }, { "epoch": 0.18306666666666666, "grad_norm": 0.19949103891849518, "learning_rate": 8.55620644731955e-05, "loss": 3.1598220825195313, "step": 120860 }, { "epoch": 0.18313333333333334, "grad_norm": 0.18899217247962952, "learning_rate": 8.555431492084734e-05, "loss": 3.15075740814209, "step": 120870 }, { "epoch": 0.1832, "grad_norm": 0.1805112063884735, "learning_rate": 8.554656364042521e-05, "loss": 3.144662094116211, "step": 120880 }, { "epoch": 0.18326666666666666, "grad_norm": 0.19792214035987854, "learning_rate": 8.553881063230585e-05, "loss": 3.1085329055786133, "step": 120890 }, { "epoch": 0.18333333333333332, "grad_norm": 0.18570548295974731, "learning_rate": 8.553105589686605e-05, "loss": 3.1796239852905273, "step": 120900 }, { "epoch": 0.1834, "grad_norm": 0.19503472745418549, "learning_rate": 8.552329943448278e-05, "loss": 3.166265296936035, "step": 120910 }, { "epoch": 0.18346666666666667, "grad_norm": 0.18124493956565857, "learning_rate": 8.551554124553301e-05, "loss": 3.1459114074707033, "step": 120920 }, { "epoch": 0.18353333333333333, "grad_norm": 0.2242344319820404, "learning_rate": 8.550778133039378e-05, "loss": 3.20946044921875, "step": 120930 }, { "epoch": 0.1836, "grad_norm": 0.17936305701732635, "learning_rate": 8.550001968944233e-05, "loss": 3.159326934814453, "step": 120940 }, { "epoch": 0.18366666666666667, "grad_norm": 0.1833767592906952, "learning_rate": 8.549225632305583e-05, "loss": 3.1454336166381838, "step": 120950 }, { "epoch": 0.18373333333333333, "grad_norm": 0.4267731010913849, "learning_rate": 8.548449123161163e-05, "loss": 3.240967559814453, "step": 120960 }, { "epoch": 0.1838, "grad_norm": 0.18036532402038574, "learning_rate": 8.547672441548716e-05, "loss": 3.145497131347656, "step": 120970 }, { "epoch": 0.18386666666666668, "grad_norm": 0.18356584012508392, "learning_rate": 8.54689558750599e-05, "loss": 3.146552085876465, "step": 120980 }, { "epoch": 0.18393333333333334, "grad_norm": 0.176442950963974, "learning_rate": 8.546118561070744e-05, "loss": 3.1718589782714846, "step": 120990 }, { "epoch": 0.184, "grad_norm": 0.16744060814380646, "learning_rate": 8.545341362280743e-05, "loss": 3.177459716796875, "step": 121000 }, { "epoch": 0.18406666666666666, "grad_norm": 0.17861029505729675, "learning_rate": 8.544563991173765e-05, "loss": 3.114496040344238, "step": 121010 }, { "epoch": 0.18413333333333334, "grad_norm": 0.9995338320732117, "learning_rate": 8.54378644778759e-05, "loss": 3.21118278503418, "step": 121020 }, { "epoch": 0.1842, "grad_norm": 0.19579210877418518, "learning_rate": 8.543008732160012e-05, "loss": 3.1725748062133787, "step": 121030 }, { "epoch": 0.18426666666666666, "grad_norm": 0.23137304186820984, "learning_rate": 8.542230844328827e-05, "loss": 3.221246337890625, "step": 121040 }, { "epoch": 0.18433333333333332, "grad_norm": 0.19076015055179596, "learning_rate": 8.541452784331847e-05, "loss": 3.160585403442383, "step": 121050 }, { "epoch": 0.1844, "grad_norm": 0.2068902850151062, "learning_rate": 8.540674552206887e-05, "loss": 3.160508918762207, "step": 121060 }, { "epoch": 0.18446666666666667, "grad_norm": 0.19586554169654846, "learning_rate": 8.539896147991773e-05, "loss": 3.1697837829589846, "step": 121070 }, { "epoch": 0.18453333333333333, "grad_norm": 0.22224575281143188, "learning_rate": 8.539117571724339e-05, "loss": 3.156399726867676, "step": 121080 }, { "epoch": 0.1846, "grad_norm": 0.48992490768432617, "learning_rate": 8.538338823442424e-05, "loss": 3.1756959915161134, "step": 121090 }, { "epoch": 0.18466666666666667, "grad_norm": 0.19770655035972595, "learning_rate": 8.537559903183881e-05, "loss": 3.1364414215087892, "step": 121100 }, { "epoch": 0.18473333333333333, "grad_norm": 0.24992205202579498, "learning_rate": 8.536780810986567e-05, "loss": 3.0870534896850588, "step": 121110 }, { "epoch": 0.1848, "grad_norm": 0.1959463655948639, "learning_rate": 8.536001546888348e-05, "loss": 3.2700904846191405, "step": 121120 }, { "epoch": 0.18486666666666668, "grad_norm": 0.37047988176345825, "learning_rate": 8.535222110927101e-05, "loss": 3.263217544555664, "step": 121130 }, { "epoch": 0.18493333333333334, "grad_norm": 0.18931439518928528, "learning_rate": 8.534442503140707e-05, "loss": 3.1750144958496094, "step": 121140 }, { "epoch": 0.185, "grad_norm": 0.18846739828586578, "learning_rate": 8.533662723567061e-05, "loss": 3.162487602233887, "step": 121150 }, { "epoch": 0.18506666666666666, "grad_norm": 0.3062122166156769, "learning_rate": 8.532882772244062e-05, "loss": 3.1803808212280273, "step": 121160 }, { "epoch": 0.18513333333333334, "grad_norm": 0.18317118287086487, "learning_rate": 8.532102649209619e-05, "loss": 3.120774841308594, "step": 121170 }, { "epoch": 0.1852, "grad_norm": 0.18721668422222137, "learning_rate": 8.531322354501647e-05, "loss": 3.127798843383789, "step": 121180 }, { "epoch": 0.18526666666666666, "grad_norm": 0.2711148262023926, "learning_rate": 8.530541888158072e-05, "loss": 3.160407829284668, "step": 121190 }, { "epoch": 0.18533333333333332, "grad_norm": 0.195384219288826, "learning_rate": 8.52976125021683e-05, "loss": 3.1337366104125977, "step": 121200 }, { "epoch": 0.1854, "grad_norm": 0.19433921575546265, "learning_rate": 8.528980440715862e-05, "loss": 3.151823616027832, "step": 121210 }, { "epoch": 0.18546666666666667, "grad_norm": 0.1776357889175415, "learning_rate": 8.528199459693115e-05, "loss": 3.0806161880493166, "step": 121220 }, { "epoch": 0.18553333333333333, "grad_norm": 0.18701951205730438, "learning_rate": 8.52741830718655e-05, "loss": 3.1467666625976562, "step": 121230 }, { "epoch": 0.1856, "grad_norm": 0.23141971230506897, "learning_rate": 8.526636983234135e-05, "loss": 3.151773452758789, "step": 121240 }, { "epoch": 0.18566666666666667, "grad_norm": 0.7081721425056458, "learning_rate": 8.525855487873846e-05, "loss": 3.1404325485229494, "step": 121250 }, { "epoch": 0.18573333333333333, "grad_norm": 0.22020962834358215, "learning_rate": 8.525073821143663e-05, "loss": 3.1858081817626953, "step": 121260 }, { "epoch": 0.1858, "grad_norm": 0.22574183344841003, "learning_rate": 8.52429198308158e-05, "loss": 3.177754211425781, "step": 121270 }, { "epoch": 0.18586666666666668, "grad_norm": 0.22238419950008392, "learning_rate": 8.523509973725599e-05, "loss": 3.157748222351074, "step": 121280 }, { "epoch": 0.18593333333333334, "grad_norm": 0.19297537207603455, "learning_rate": 8.522727793113725e-05, "loss": 3.141057014465332, "step": 121290 }, { "epoch": 0.186, "grad_norm": 0.18500012159347534, "learning_rate": 8.521945441283977e-05, "loss": 3.1612884521484377, "step": 121300 }, { "epoch": 0.18606666666666666, "grad_norm": 0.20025715231895447, "learning_rate": 8.52116291827438e-05, "loss": 3.0982765197753905, "step": 121310 }, { "epoch": 0.18613333333333335, "grad_norm": 0.19077159464359283, "learning_rate": 8.520380224122968e-05, "loss": 3.1992082595825195, "step": 121320 }, { "epoch": 0.1862, "grad_norm": 0.19508004188537598, "learning_rate": 8.519597358867782e-05, "loss": 3.0922367095947267, "step": 121330 }, { "epoch": 0.18626666666666666, "grad_norm": 0.21067450940608978, "learning_rate": 8.518814322546873e-05, "loss": 3.1426456451416014, "step": 121340 }, { "epoch": 0.18633333333333332, "grad_norm": 0.2028811126947403, "learning_rate": 8.518031115198298e-05, "loss": 3.161065864562988, "step": 121350 }, { "epoch": 0.1864, "grad_norm": 0.23740898072719574, "learning_rate": 8.517247736860126e-05, "loss": 3.2738876342773438, "step": 121360 }, { "epoch": 0.18646666666666667, "grad_norm": 0.18143799901008606, "learning_rate": 8.516464187570432e-05, "loss": 3.1690690994262694, "step": 121370 }, { "epoch": 0.18653333333333333, "grad_norm": 0.19605182111263275, "learning_rate": 8.515680467367297e-05, "loss": 3.1753334045410155, "step": 121380 }, { "epoch": 0.1866, "grad_norm": 0.1904231607913971, "learning_rate": 8.514896576288815e-05, "loss": 3.215808868408203, "step": 121390 }, { "epoch": 0.18666666666666668, "grad_norm": 0.18675272166728973, "learning_rate": 8.514112514373087e-05, "loss": 3.1553443908691405, "step": 121400 }, { "epoch": 0.18673333333333333, "grad_norm": 0.3737390637397766, "learning_rate": 8.513328281658219e-05, "loss": 3.1383012771606444, "step": 121410 }, { "epoch": 0.1868, "grad_norm": 0.22994381189346313, "learning_rate": 8.512543878182329e-05, "loss": 3.1976497650146483, "step": 121420 }, { "epoch": 0.18686666666666665, "grad_norm": 0.21511490643024445, "learning_rate": 8.51175930398354e-05, "loss": 3.1496051788330077, "step": 121430 }, { "epoch": 0.18693333333333334, "grad_norm": 0.1918988823890686, "learning_rate": 8.510974559099987e-05, "loss": 3.192185974121094, "step": 121440 }, { "epoch": 0.187, "grad_norm": 0.20392625033855438, "learning_rate": 8.510189643569812e-05, "loss": 3.1370967864990233, "step": 121450 }, { "epoch": 0.18706666666666666, "grad_norm": 0.1889914721250534, "learning_rate": 8.509404557431163e-05, "loss": 3.167232322692871, "step": 121460 }, { "epoch": 0.18713333333333335, "grad_norm": 0.19554783403873444, "learning_rate": 8.5086193007222e-05, "loss": 3.137557601928711, "step": 121470 }, { "epoch": 0.1872, "grad_norm": 0.18619318306446075, "learning_rate": 8.507833873481089e-05, "loss": 3.1566286087036133, "step": 121480 }, { "epoch": 0.18726666666666666, "grad_norm": 0.24922798573970795, "learning_rate": 8.507048275746006e-05, "loss": 3.1713945388793947, "step": 121490 }, { "epoch": 0.18733333333333332, "grad_norm": 0.19917674362659454, "learning_rate": 8.506262507555129e-05, "loss": 3.218016815185547, "step": 121500 }, { "epoch": 0.1874, "grad_norm": 0.17222127318382263, "learning_rate": 8.505476568946656e-05, "loss": 3.1725322723388674, "step": 121510 }, { "epoch": 0.18746666666666667, "grad_norm": 0.18405383825302124, "learning_rate": 8.504690459958782e-05, "loss": 3.1161882400512697, "step": 121520 }, { "epoch": 0.18753333333333333, "grad_norm": 0.21717144548892975, "learning_rate": 8.503904180629716e-05, "loss": 3.1213651657104493, "step": 121530 }, { "epoch": 0.1876, "grad_norm": 0.6112820506095886, "learning_rate": 8.503117730997674e-05, "loss": 3.271135711669922, "step": 121540 }, { "epoch": 0.18766666666666668, "grad_norm": 0.183763325214386, "learning_rate": 8.502331111100882e-05, "loss": 3.1714576721191405, "step": 121550 }, { "epoch": 0.18773333333333334, "grad_norm": 0.20529399812221527, "learning_rate": 8.501544320977571e-05, "loss": 3.137724685668945, "step": 121560 }, { "epoch": 0.1878, "grad_norm": 0.20820236206054688, "learning_rate": 8.500757360665983e-05, "loss": 3.116535949707031, "step": 121570 }, { "epoch": 0.18786666666666665, "grad_norm": 0.19342724978923798, "learning_rate": 8.499970230204366e-05, "loss": 3.157530975341797, "step": 121580 }, { "epoch": 0.18793333333333334, "grad_norm": 0.21487198770046234, "learning_rate": 8.499182929630979e-05, "loss": 3.127985382080078, "step": 121590 }, { "epoch": 0.188, "grad_norm": 0.19947011768817902, "learning_rate": 8.498395458984086e-05, "loss": 3.160859298706055, "step": 121600 }, { "epoch": 0.18806666666666666, "grad_norm": 0.18481506407260895, "learning_rate": 8.497607818301962e-05, "loss": 3.1643627166748045, "step": 121610 }, { "epoch": 0.18813333333333335, "grad_norm": 0.23086926341056824, "learning_rate": 8.49682000762289e-05, "loss": 3.142086982727051, "step": 121620 }, { "epoch": 0.1882, "grad_norm": 0.2247776836156845, "learning_rate": 8.496032026985161e-05, "loss": 3.280895233154297, "step": 121630 }, { "epoch": 0.18826666666666667, "grad_norm": 0.2087581902742386, "learning_rate": 8.495243876427072e-05, "loss": 3.1503686904907227, "step": 121640 }, { "epoch": 0.18833333333333332, "grad_norm": 0.1987176537513733, "learning_rate": 8.494455555986931e-05, "loss": 3.1407438278198243, "step": 121650 }, { "epoch": 0.1884, "grad_norm": 0.2316509187221527, "learning_rate": 8.493667065703052e-05, "loss": 3.1282327651977537, "step": 121660 }, { "epoch": 0.18846666666666667, "grad_norm": 0.1835762858390808, "learning_rate": 8.492878405613763e-05, "loss": 3.1584638595581054, "step": 121670 }, { "epoch": 0.18853333333333333, "grad_norm": 0.17785966396331787, "learning_rate": 8.492089575757389e-05, "loss": 3.1088541030883787, "step": 121680 }, { "epoch": 0.1886, "grad_norm": 0.19806362688541412, "learning_rate": 8.491300576172276e-05, "loss": 3.2024154663085938, "step": 121690 }, { "epoch": 0.18866666666666668, "grad_norm": 0.1878596842288971, "learning_rate": 8.490511406896768e-05, "loss": 3.1772369384765624, "step": 121700 }, { "epoch": 0.18873333333333334, "grad_norm": 0.20167583227157593, "learning_rate": 8.489722067969226e-05, "loss": 3.105642890930176, "step": 121710 }, { "epoch": 0.1888, "grad_norm": 0.18334804475307465, "learning_rate": 8.488932559428013e-05, "loss": 3.2914642333984374, "step": 121720 }, { "epoch": 0.18886666666666665, "grad_norm": 0.26581326127052307, "learning_rate": 8.4881428813115e-05, "loss": 3.1691974639892577, "step": 121730 }, { "epoch": 0.18893333333333334, "grad_norm": 0.21638232469558716, "learning_rate": 8.487353033658071e-05, "loss": 3.1412302017211915, "step": 121740 }, { "epoch": 0.189, "grad_norm": 0.1964891254901886, "learning_rate": 8.486563016506113e-05, "loss": 3.14970703125, "step": 121750 }, { "epoch": 0.18906666666666666, "grad_norm": 0.35423707962036133, "learning_rate": 8.485772829894027e-05, "loss": 3.25341911315918, "step": 121760 }, { "epoch": 0.18913333333333332, "grad_norm": 0.18316398561000824, "learning_rate": 8.484982473860219e-05, "loss": 3.0793346405029296, "step": 121770 }, { "epoch": 0.1892, "grad_norm": 0.1849530041217804, "learning_rate": 8.484191948443099e-05, "loss": 3.173531150817871, "step": 121780 }, { "epoch": 0.18926666666666667, "grad_norm": 1.1359325647354126, "learning_rate": 8.483401253681094e-05, "loss": 3.0436262130737304, "step": 121790 }, { "epoch": 0.18933333333333333, "grad_norm": 0.2004767656326294, "learning_rate": 8.482610389612633e-05, "loss": 3.106174087524414, "step": 121800 }, { "epoch": 0.1894, "grad_norm": 0.17457161843776703, "learning_rate": 8.481819356276154e-05, "loss": 3.174398994445801, "step": 121810 }, { "epoch": 0.18946666666666667, "grad_norm": 0.18731388449668884, "learning_rate": 8.481028153710107e-05, "loss": 3.209987258911133, "step": 121820 }, { "epoch": 0.18953333333333333, "grad_norm": 0.18289443850517273, "learning_rate": 8.480236781952944e-05, "loss": 3.1661014556884766, "step": 121830 }, { "epoch": 0.1896, "grad_norm": 0.19071203470230103, "learning_rate": 8.479445241043132e-05, "loss": 3.172917366027832, "step": 121840 }, { "epoch": 0.18966666666666668, "grad_norm": 0.17432621121406555, "learning_rate": 8.47865353101914e-05, "loss": 3.1905754089355467, "step": 121850 }, { "epoch": 0.18973333333333334, "grad_norm": 0.2047223001718521, "learning_rate": 8.477861651919448e-05, "loss": 3.1569101333618166, "step": 121860 }, { "epoch": 0.1898, "grad_norm": 0.19737067818641663, "learning_rate": 8.477069603782548e-05, "loss": 3.3347293853759767, "step": 121870 }, { "epoch": 0.18986666666666666, "grad_norm": 0.20777416229248047, "learning_rate": 8.476277386646933e-05, "loss": 3.188547134399414, "step": 121880 }, { "epoch": 0.18993333333333334, "grad_norm": 0.3070421516895294, "learning_rate": 8.47548500055111e-05, "loss": 3.1981998443603517, "step": 121890 }, { "epoch": 0.19, "grad_norm": 0.1797378957271576, "learning_rate": 8.47469244553359e-05, "loss": 3.151828956604004, "step": 121900 }, { "epoch": 0.19006666666666666, "grad_norm": 0.23872517049312592, "learning_rate": 8.473899721632895e-05, "loss": 3.137518310546875, "step": 121910 }, { "epoch": 0.19013333333333332, "grad_norm": 0.19045501947402954, "learning_rate": 8.473106828887556e-05, "loss": 3.1322757720947267, "step": 121920 }, { "epoch": 0.1902, "grad_norm": 0.19503550231456757, "learning_rate": 8.472313767336107e-05, "loss": 3.1191646575927736, "step": 121930 }, { "epoch": 0.19026666666666667, "grad_norm": 0.18020020425319672, "learning_rate": 8.471520537017097e-05, "loss": 3.126681900024414, "step": 121940 }, { "epoch": 0.19033333333333333, "grad_norm": 0.18761195242404938, "learning_rate": 8.470727137969076e-05, "loss": 3.139388084411621, "step": 121950 }, { "epoch": 0.1904, "grad_norm": 0.20950448513031006, "learning_rate": 8.469933570230613e-05, "loss": 3.131490707397461, "step": 121960 }, { "epoch": 0.19046666666666667, "grad_norm": 0.1774873435497284, "learning_rate": 8.469139833840272e-05, "loss": 3.1391977310180663, "step": 121970 }, { "epoch": 0.19053333333333333, "grad_norm": 0.1898418664932251, "learning_rate": 8.468345928836635e-05, "loss": 3.1158111572265623, "step": 121980 }, { "epoch": 0.1906, "grad_norm": 0.21607007086277008, "learning_rate": 8.467551855258287e-05, "loss": 3.1624120712280273, "step": 121990 }, { "epoch": 0.19066666666666668, "grad_norm": 1.1563204526901245, "learning_rate": 8.466757613143825e-05, "loss": 3.222623825073242, "step": 122000 }, { "epoch": 0.19073333333333334, "grad_norm": 0.18270635604858398, "learning_rate": 8.465963202531849e-05, "loss": 3.154124641418457, "step": 122010 }, { "epoch": 0.1908, "grad_norm": 0.18802598118782043, "learning_rate": 8.465168623460973e-05, "loss": 3.1917734146118164, "step": 122020 }, { "epoch": 0.19086666666666666, "grad_norm": 0.20063380897045135, "learning_rate": 8.464373875969816e-05, "loss": 3.158283996582031, "step": 122030 }, { "epoch": 0.19093333333333334, "grad_norm": 0.225837841629982, "learning_rate": 8.463578960097005e-05, "loss": 3.1537801742553713, "step": 122040 }, { "epoch": 0.191, "grad_norm": 0.19215288758277893, "learning_rate": 8.462783875881177e-05, "loss": 3.1839866638183594, "step": 122050 }, { "epoch": 0.19106666666666666, "grad_norm": 0.20209352672100067, "learning_rate": 8.461988623360974e-05, "loss": 3.1425148010253907, "step": 122060 }, { "epoch": 0.19113333333333332, "grad_norm": 0.19155184924602509, "learning_rate": 8.461193202575053e-05, "loss": 3.1849952697753907, "step": 122070 }, { "epoch": 0.1912, "grad_norm": 0.20910771191120148, "learning_rate": 8.460397613562067e-05, "loss": 3.120160675048828, "step": 122080 }, { "epoch": 0.19126666666666667, "grad_norm": 0.1860789805650711, "learning_rate": 8.459601856360692e-05, "loss": 3.131412124633789, "step": 122090 }, { "epoch": 0.19133333333333333, "grad_norm": 0.18574275076389313, "learning_rate": 8.458805931009602e-05, "loss": 3.146405029296875, "step": 122100 }, { "epoch": 0.1914, "grad_norm": 0.5677555203437805, "learning_rate": 8.458009837547479e-05, "loss": 3.2022796630859376, "step": 122110 }, { "epoch": 0.19146666666666667, "grad_norm": 0.22301149368286133, "learning_rate": 8.45721357601302e-05, "loss": 3.1723182678222654, "step": 122120 }, { "epoch": 0.19153333333333333, "grad_norm": 0.21427740156650543, "learning_rate": 8.456417146444926e-05, "loss": 3.148632621765137, "step": 122130 }, { "epoch": 0.1916, "grad_norm": 0.18685957789421082, "learning_rate": 8.455620548881906e-05, "loss": 3.1164100646972654, "step": 122140 }, { "epoch": 0.19166666666666668, "grad_norm": 0.20543576776981354, "learning_rate": 8.454823783362675e-05, "loss": 3.1291200637817385, "step": 122150 }, { "epoch": 0.19173333333333334, "grad_norm": 0.21439175307750702, "learning_rate": 8.454026849925962e-05, "loss": 3.1465789794921877, "step": 122160 }, { "epoch": 0.1918, "grad_norm": 0.19670286774635315, "learning_rate": 8.4532297486105e-05, "loss": 3.2105846405029297, "step": 122170 }, { "epoch": 0.19186666666666666, "grad_norm": 0.18872801959514618, "learning_rate": 8.452432479455032e-05, "loss": 3.092525672912598, "step": 122180 }, { "epoch": 0.19193333333333334, "grad_norm": 0.18634143471717834, "learning_rate": 8.451635042498307e-05, "loss": 3.1700611114501953, "step": 122190 }, { "epoch": 0.192, "grad_norm": 0.19841457903385162, "learning_rate": 8.450837437779084e-05, "loss": 3.1560522079467774, "step": 122200 }, { "epoch": 0.19206666666666666, "grad_norm": 0.21174372732639313, "learning_rate": 8.450039665336129e-05, "loss": 3.3449169158935548, "step": 122210 }, { "epoch": 0.19213333333333332, "grad_norm": 0.208480104804039, "learning_rate": 8.449241725208219e-05, "loss": 3.1554351806640626, "step": 122220 }, { "epoch": 0.1922, "grad_norm": 0.2194945365190506, "learning_rate": 8.448443617434133e-05, "loss": 3.0995655059814453, "step": 122230 }, { "epoch": 0.19226666666666667, "grad_norm": 0.18366089463233948, "learning_rate": 8.447645342052665e-05, "loss": 3.108304977416992, "step": 122240 }, { "epoch": 0.19233333333333333, "grad_norm": 0.31082215905189514, "learning_rate": 8.446846899102614e-05, "loss": 3.1782264709472656, "step": 122250 }, { "epoch": 0.1924, "grad_norm": 0.24182011187076569, "learning_rate": 8.446048288622786e-05, "loss": 3.1264772415161133, "step": 122260 }, { "epoch": 0.19246666666666667, "grad_norm": 0.19528914988040924, "learning_rate": 8.445249510651997e-05, "loss": 3.1377099990844726, "step": 122270 }, { "epoch": 0.19253333333333333, "grad_norm": 0.20808948576450348, "learning_rate": 8.444450565229072e-05, "loss": 3.1863542556762696, "step": 122280 }, { "epoch": 0.1926, "grad_norm": 0.1939326822757721, "learning_rate": 8.44365145239284e-05, "loss": 3.1582609176635743, "step": 122290 }, { "epoch": 0.19266666666666668, "grad_norm": 0.19418834149837494, "learning_rate": 8.442852172182142e-05, "loss": 3.105598258972168, "step": 122300 }, { "epoch": 0.19273333333333334, "grad_norm": 0.19236595928668976, "learning_rate": 8.442052724635829e-05, "loss": 3.278799819946289, "step": 122310 }, { "epoch": 0.1928, "grad_norm": 0.19918283820152283, "learning_rate": 8.441253109792755e-05, "loss": 3.1951120376586912, "step": 122320 }, { "epoch": 0.19286666666666666, "grad_norm": 0.2545427083969116, "learning_rate": 8.440453327691782e-05, "loss": 3.1245203018188477, "step": 122330 }, { "epoch": 0.19293333333333335, "grad_norm": 0.1924268752336502, "learning_rate": 8.439653378371785e-05, "loss": 3.1233404159545897, "step": 122340 }, { "epoch": 0.193, "grad_norm": 0.18317997455596924, "learning_rate": 8.438853261871644e-05, "loss": 3.1760515213012694, "step": 122350 }, { "epoch": 0.19306666666666666, "grad_norm": 0.18755333125591278, "learning_rate": 8.438052978230249e-05, "loss": 3.1784975051879885, "step": 122360 }, { "epoch": 0.19313333333333332, "grad_norm": 0.19723160564899445, "learning_rate": 8.437252527486494e-05, "loss": 3.2734653472900392, "step": 122370 }, { "epoch": 0.1932, "grad_norm": 0.21004053950309753, "learning_rate": 8.436451909679286e-05, "loss": 3.134242820739746, "step": 122380 }, { "epoch": 0.19326666666666667, "grad_norm": 0.48945116996765137, "learning_rate": 8.435651124847539e-05, "loss": 3.1141641616821287, "step": 122390 }, { "epoch": 0.19333333333333333, "grad_norm": 0.1956036239862442, "learning_rate": 8.434850173030171e-05, "loss": 3.126398468017578, "step": 122400 }, { "epoch": 0.1934, "grad_norm": 0.2104678899049759, "learning_rate": 8.434049054266113e-05, "loss": 3.212014389038086, "step": 122410 }, { "epoch": 0.19346666666666668, "grad_norm": 0.1948971301317215, "learning_rate": 8.433247768594301e-05, "loss": 3.166608428955078, "step": 122420 }, { "epoch": 0.19353333333333333, "grad_norm": 0.21619637310504913, "learning_rate": 8.432446316053683e-05, "loss": 3.134595489501953, "step": 122430 }, { "epoch": 0.1936, "grad_norm": 0.19040369987487793, "learning_rate": 8.431644696683213e-05, "loss": 3.148475456237793, "step": 122440 }, { "epoch": 0.19366666666666665, "grad_norm": 0.20498955249786377, "learning_rate": 8.43084291052185e-05, "loss": 3.2202632904052733, "step": 122450 }, { "epoch": 0.19373333333333334, "grad_norm": 0.1799214482307434, "learning_rate": 8.430040957608564e-05, "loss": 3.131497383117676, "step": 122460 }, { "epoch": 0.1938, "grad_norm": 0.19874869287014008, "learning_rate": 8.429238837982336e-05, "loss": 3.1965322494506836, "step": 122470 }, { "epoch": 0.19386666666666666, "grad_norm": 0.18891355395317078, "learning_rate": 8.428436551682149e-05, "loss": 3.167613410949707, "step": 122480 }, { "epoch": 0.19393333333333335, "grad_norm": 0.31869152188301086, "learning_rate": 8.427634098747e-05, "loss": 3.1984079360961912, "step": 122490 }, { "epoch": 0.194, "grad_norm": 0.20619089901447296, "learning_rate": 8.426831479215887e-05, "loss": 3.184630012512207, "step": 122500 }, { "epoch": 0.19406666666666667, "grad_norm": 0.1829196810722351, "learning_rate": 8.426028693127823e-05, "loss": 3.1527481079101562, "step": 122510 }, { "epoch": 0.19413333333333332, "grad_norm": 0.17712737619876862, "learning_rate": 8.425225740521827e-05, "loss": 3.1487504959106447, "step": 122520 }, { "epoch": 0.1942, "grad_norm": 0.20600616931915283, "learning_rate": 8.424422621436923e-05, "loss": 3.180978202819824, "step": 122530 }, { "epoch": 0.19426666666666667, "grad_norm": 0.21889670193195343, "learning_rate": 8.423619335912149e-05, "loss": 3.244008255004883, "step": 122540 }, { "epoch": 0.19433333333333333, "grad_norm": 0.18363068997859955, "learning_rate": 8.422815883986546e-05, "loss": 3.101370620727539, "step": 122550 }, { "epoch": 0.1944, "grad_norm": 0.21313820779323578, "learning_rate": 8.422012265699165e-05, "loss": 3.158548355102539, "step": 122560 }, { "epoch": 0.19446666666666668, "grad_norm": 0.193060964345932, "learning_rate": 8.421208481089065e-05, "loss": 3.1766143798828126, "step": 122570 }, { "epoch": 0.19453333333333334, "grad_norm": 0.2609567940235138, "learning_rate": 8.420404530195312e-05, "loss": 3.121845245361328, "step": 122580 }, { "epoch": 0.1946, "grad_norm": 0.19222506880760193, "learning_rate": 8.41960041305698e-05, "loss": 3.1617507934570312, "step": 122590 }, { "epoch": 0.19466666666666665, "grad_norm": 0.1971876323223114, "learning_rate": 8.418796129713156e-05, "loss": 3.1381263732910156, "step": 122600 }, { "epoch": 0.19473333333333334, "grad_norm": 0.2379298210144043, "learning_rate": 8.417991680202931e-05, "loss": 3.1716083526611327, "step": 122610 }, { "epoch": 0.1948, "grad_norm": 0.18853338062763214, "learning_rate": 8.417187064565399e-05, "loss": 3.1700973510742188, "step": 122620 }, { "epoch": 0.19486666666666666, "grad_norm": 0.20128875970840454, "learning_rate": 8.416382282839673e-05, "loss": 3.1465475082397463, "step": 122630 }, { "epoch": 0.19493333333333332, "grad_norm": 0.18512484431266785, "learning_rate": 8.415577335064864e-05, "loss": 3.1366687774658204, "step": 122640 }, { "epoch": 0.195, "grad_norm": 0.19837471842765808, "learning_rate": 8.414772221280099e-05, "loss": 3.163103485107422, "step": 122650 }, { "epoch": 0.19506666666666667, "grad_norm": 0.19793976843357086, "learning_rate": 8.413966941524507e-05, "loss": 3.1083192825317383, "step": 122660 }, { "epoch": 0.19513333333333333, "grad_norm": 0.21938402950763702, "learning_rate": 8.41316149583723e-05, "loss": 3.165411376953125, "step": 122670 }, { "epoch": 0.1952, "grad_norm": 0.1813003271818161, "learning_rate": 8.412355884257415e-05, "loss": 3.127127838134766, "step": 122680 }, { "epoch": 0.19526666666666667, "grad_norm": 0.1782514750957489, "learning_rate": 8.411550106824216e-05, "loss": 3.1385093688964845, "step": 122690 }, { "epoch": 0.19533333333333333, "grad_norm": 0.18536943197250366, "learning_rate": 8.410744163576801e-05, "loss": 3.1149085998535155, "step": 122700 }, { "epoch": 0.1954, "grad_norm": 0.19876304268836975, "learning_rate": 8.409938054554336e-05, "loss": 3.2360870361328127, "step": 122710 }, { "epoch": 0.19546666666666668, "grad_norm": 0.18610507249832153, "learning_rate": 8.409131779796004e-05, "loss": 3.0846250534057615, "step": 122720 }, { "epoch": 0.19553333333333334, "grad_norm": 0.20565837621688843, "learning_rate": 8.408325339340995e-05, "loss": 3.3332393646240233, "step": 122730 }, { "epoch": 0.1956, "grad_norm": 0.19205519556999207, "learning_rate": 8.407518733228502e-05, "loss": 3.1374208450317385, "step": 122740 }, { "epoch": 0.19566666666666666, "grad_norm": 0.19336596131324768, "learning_rate": 8.406711961497729e-05, "loss": 3.193259429931641, "step": 122750 }, { "epoch": 0.19573333333333334, "grad_norm": 0.24541525542736053, "learning_rate": 8.40590502418789e-05, "loss": 3.167156791687012, "step": 122760 }, { "epoch": 0.1958, "grad_norm": 0.21535624563694, "learning_rate": 8.405097921338205e-05, "loss": 3.192317581176758, "step": 122770 }, { "epoch": 0.19586666666666666, "grad_norm": 0.24307434260845184, "learning_rate": 8.404290652987902e-05, "loss": 3.2498920440673826, "step": 122780 }, { "epoch": 0.19593333333333332, "grad_norm": 0.20032115280628204, "learning_rate": 8.403483219176216e-05, "loss": 3.1280567169189455, "step": 122790 }, { "epoch": 0.196, "grad_norm": 0.1927787810564041, "learning_rate": 8.402675619942393e-05, "loss": 3.1797416687011717, "step": 122800 }, { "epoch": 0.19606666666666667, "grad_norm": 0.20632904767990112, "learning_rate": 8.401867855325684e-05, "loss": 3.1597280502319336, "step": 122810 }, { "epoch": 0.19613333333333333, "grad_norm": 0.26487383246421814, "learning_rate": 8.401059925365353e-05, "loss": 3.1263349533081053, "step": 122820 }, { "epoch": 0.1962, "grad_norm": 0.24448886513710022, "learning_rate": 8.400251830100663e-05, "loss": 3.12491397857666, "step": 122830 }, { "epoch": 0.19626666666666667, "grad_norm": 0.18925026059150696, "learning_rate": 8.399443569570893e-05, "loss": 3.1250274658203123, "step": 122840 }, { "epoch": 0.19633333333333333, "grad_norm": 0.21038544178009033, "learning_rate": 8.398635143815328e-05, "loss": 3.0922237396240235, "step": 122850 }, { "epoch": 0.1964, "grad_norm": 0.5459504723548889, "learning_rate": 8.39782655287326e-05, "loss": 3.172234535217285, "step": 122860 }, { "epoch": 0.19646666666666668, "grad_norm": 0.1848583221435547, "learning_rate": 8.39701779678399e-05, "loss": 3.0942501068115233, "step": 122870 }, { "epoch": 0.19653333333333334, "grad_norm": 0.19076019525527954, "learning_rate": 8.396208875586828e-05, "loss": 3.146830749511719, "step": 122880 }, { "epoch": 0.1966, "grad_norm": 1.0377821922302246, "learning_rate": 8.395399789321087e-05, "loss": 3.258612060546875, "step": 122890 }, { "epoch": 0.19666666666666666, "grad_norm": 0.19190236926078796, "learning_rate": 8.394590538026093e-05, "loss": 3.197412109375, "step": 122900 }, { "epoch": 0.19673333333333334, "grad_norm": 0.18836572766304016, "learning_rate": 8.39378112174118e-05, "loss": 3.161937713623047, "step": 122910 }, { "epoch": 0.1968, "grad_norm": 0.18169601261615753, "learning_rate": 8.392971540505688e-05, "loss": 3.093614959716797, "step": 122920 }, { "epoch": 0.19686666666666666, "grad_norm": 0.4405084550380707, "learning_rate": 8.392161794358966e-05, "loss": 3.151602363586426, "step": 122930 }, { "epoch": 0.19693333333333332, "grad_norm": 0.20971399545669556, "learning_rate": 8.39135188334037e-05, "loss": 3.1813823699951174, "step": 122940 }, { "epoch": 0.197, "grad_norm": 0.20744279026985168, "learning_rate": 8.390541807489265e-05, "loss": 3.1118974685668945, "step": 122950 }, { "epoch": 0.19706666666666667, "grad_norm": 0.19436198472976685, "learning_rate": 8.389731566845025e-05, "loss": 3.1509515762329103, "step": 122960 }, { "epoch": 0.19713333333333333, "grad_norm": 0.19184407591819763, "learning_rate": 8.388921161447027e-05, "loss": 3.1510066986083984, "step": 122970 }, { "epoch": 0.1972, "grad_norm": 0.19567444920539856, "learning_rate": 8.388110591334666e-05, "loss": 3.1300331115722657, "step": 122980 }, { "epoch": 0.19726666666666667, "grad_norm": 0.18446074426174164, "learning_rate": 8.387299856547332e-05, "loss": 3.161172294616699, "step": 122990 }, { "epoch": 0.19733333333333333, "grad_norm": 0.2094121277332306, "learning_rate": 8.386488957124434e-05, "loss": 3.135223388671875, "step": 123000 }, { "epoch": 0.1974, "grad_norm": 0.18276414275169373, "learning_rate": 8.385677893105383e-05, "loss": 3.2060264587402343, "step": 123010 }, { "epoch": 0.19746666666666668, "grad_norm": 0.19928087294101715, "learning_rate": 8.384866664529603e-05, "loss": 3.0863115310668947, "step": 123020 }, { "epoch": 0.19753333333333334, "grad_norm": 0.198073610663414, "learning_rate": 8.384055271436517e-05, "loss": 3.132269859313965, "step": 123030 }, { "epoch": 0.1976, "grad_norm": 0.22146429121494293, "learning_rate": 8.383243713865567e-05, "loss": 3.1416608810424806, "step": 123040 }, { "epoch": 0.19766666666666666, "grad_norm": 0.18958722054958344, "learning_rate": 8.382431991856195e-05, "loss": 3.1551630020141603, "step": 123050 }, { "epoch": 0.19773333333333334, "grad_norm": 0.1977592408657074, "learning_rate": 8.381620105447855e-05, "loss": 3.176200103759766, "step": 123060 }, { "epoch": 0.1978, "grad_norm": 0.20481370389461517, "learning_rate": 8.380808054680007e-05, "loss": 3.128420829772949, "step": 123070 }, { "epoch": 0.19786666666666666, "grad_norm": 0.24115681648254395, "learning_rate": 8.379995839592119e-05, "loss": 3.134942817687988, "step": 123080 }, { "epoch": 0.19793333333333332, "grad_norm": 1.1209418773651123, "learning_rate": 8.37918346022367e-05, "loss": 2.839239501953125, "step": 123090 }, { "epoch": 0.198, "grad_norm": 0.19471924006938934, "learning_rate": 8.378370916614143e-05, "loss": 3.095490837097168, "step": 123100 }, { "epoch": 0.19806666666666667, "grad_norm": 0.22475726902484894, "learning_rate": 8.377558208803033e-05, "loss": 3.112719917297363, "step": 123110 }, { "epoch": 0.19813333333333333, "grad_norm": 0.1995321363210678, "learning_rate": 8.376745336829836e-05, "loss": 3.156229591369629, "step": 123120 }, { "epoch": 0.1982, "grad_norm": 0.17550471425056458, "learning_rate": 8.375932300734064e-05, "loss": 3.1420637130737306, "step": 123130 }, { "epoch": 0.19826666666666667, "grad_norm": 0.2405775785446167, "learning_rate": 8.375119100555234e-05, "loss": 3.1557931900024414, "step": 123140 }, { "epoch": 0.19833333333333333, "grad_norm": 0.20054680109024048, "learning_rate": 8.374305736332869e-05, "loss": 3.1370492935180665, "step": 123150 }, { "epoch": 0.1984, "grad_norm": 0.25249508023262024, "learning_rate": 8.373492208106502e-05, "loss": 3.140548324584961, "step": 123160 }, { "epoch": 0.19846666666666668, "grad_norm": 0.19149698317050934, "learning_rate": 8.372678515915674e-05, "loss": 3.188545036315918, "step": 123170 }, { "epoch": 0.19853333333333334, "grad_norm": 0.17967607080936432, "learning_rate": 8.371864659799933e-05, "loss": 3.1330493927001952, "step": 123180 }, { "epoch": 0.1986, "grad_norm": 0.20919688045978546, "learning_rate": 8.371050639798836e-05, "loss": 3.19982967376709, "step": 123190 }, { "epoch": 0.19866666666666666, "grad_norm": 0.18869008123874664, "learning_rate": 8.370236455951947e-05, "loss": 3.144927215576172, "step": 123200 }, { "epoch": 0.19873333333333335, "grad_norm": 0.24785269796848297, "learning_rate": 8.36942210829884e-05, "loss": 2.908379554748535, "step": 123210 }, { "epoch": 0.1988, "grad_norm": 0.2040317952632904, "learning_rate": 8.368607596879092e-05, "loss": 2.811550521850586, "step": 123220 }, { "epoch": 0.19886666666666666, "grad_norm": 0.18924954533576965, "learning_rate": 8.367792921732297e-05, "loss": 3.157068061828613, "step": 123230 }, { "epoch": 0.19893333333333332, "grad_norm": 0.20646904408931732, "learning_rate": 8.366978082898043e-05, "loss": 3.375255584716797, "step": 123240 }, { "epoch": 0.199, "grad_norm": 0.17956314980983734, "learning_rate": 8.36616308041594e-05, "loss": 3.1873447418212892, "step": 123250 }, { "epoch": 0.19906666666666667, "grad_norm": 0.19786430895328522, "learning_rate": 8.3653479143256e-05, "loss": 3.2147369384765625, "step": 123260 }, { "epoch": 0.19913333333333333, "grad_norm": 0.18067795038223267, "learning_rate": 8.364532584666642e-05, "loss": 3.1728500366210937, "step": 123270 }, { "epoch": 0.1992, "grad_norm": 0.1806071400642395, "learning_rate": 8.363717091478696e-05, "loss": 3.1460155487060546, "step": 123280 }, { "epoch": 0.19926666666666668, "grad_norm": 0.9649947881698608, "learning_rate": 8.362901434801393e-05, "loss": 3.132940483093262, "step": 123290 }, { "epoch": 0.19933333333333333, "grad_norm": 0.18344983458518982, "learning_rate": 8.362085614674382e-05, "loss": 3.175698089599609, "step": 123300 }, { "epoch": 0.1994, "grad_norm": 0.1879209578037262, "learning_rate": 8.361269631137314e-05, "loss": 3.1590681076049805, "step": 123310 }, { "epoch": 0.19946666666666665, "grad_norm": 0.18874357640743256, "learning_rate": 8.360453484229847e-05, "loss": 3.174698066711426, "step": 123320 }, { "epoch": 0.19953333333333334, "grad_norm": 0.24120739102363586, "learning_rate": 8.35963717399165e-05, "loss": 3.14150333404541, "step": 123330 }, { "epoch": 0.1996, "grad_norm": 0.21908199787139893, "learning_rate": 8.3588207004624e-05, "loss": 3.1382837295532227, "step": 123340 }, { "epoch": 0.19966666666666666, "grad_norm": 0.19613707065582275, "learning_rate": 8.358004063681779e-05, "loss": 3.2209903717041017, "step": 123350 }, { "epoch": 0.19973333333333335, "grad_norm": 0.1852622777223587, "learning_rate": 8.357187263689478e-05, "loss": 3.092805099487305, "step": 123360 }, { "epoch": 0.1998, "grad_norm": 0.20194368064403534, "learning_rate": 8.356370300525198e-05, "loss": 3.14150390625, "step": 123370 }, { "epoch": 0.19986666666666666, "grad_norm": 0.1995520293712616, "learning_rate": 8.355553174228647e-05, "loss": 3.1414140701293944, "step": 123380 }, { "epoch": 0.19993333333333332, "grad_norm": 0.19043084979057312, "learning_rate": 8.354735884839538e-05, "loss": 3.1367172241210937, "step": 123390 }, { "epoch": 0.2, "grad_norm": 0.1880982369184494, "learning_rate": 8.353918432397598e-05, "loss": 3.2101001739501953, "step": 123400 }, { "epoch": 0.20006666666666667, "grad_norm": 0.177828848361969, "learning_rate": 8.353100816942554e-05, "loss": 3.1191688537597657, "step": 123410 }, { "epoch": 0.20013333333333333, "grad_norm": 0.1902078241109848, "learning_rate": 8.352283038514148e-05, "loss": 3.1095417022705076, "step": 123420 }, { "epoch": 0.2002, "grad_norm": 0.18368273973464966, "learning_rate": 8.351465097152129e-05, "loss": 3.1956892013549805, "step": 123430 }, { "epoch": 0.20026666666666668, "grad_norm": 0.3330329358577728, "learning_rate": 8.350646992896247e-05, "loss": 3.419098663330078, "step": 123440 }, { "epoch": 0.20033333333333334, "grad_norm": 0.307902455329895, "learning_rate": 8.349828725786268e-05, "loss": 3.344915008544922, "step": 123450 }, { "epoch": 0.2004, "grad_norm": 0.19308972358703613, "learning_rate": 8.349010295861962e-05, "loss": 3.1638587951660155, "step": 123460 }, { "epoch": 0.20046666666666665, "grad_norm": 0.18007683753967285, "learning_rate": 8.348191703163109e-05, "loss": 3.129079246520996, "step": 123470 }, { "epoch": 0.20053333333333334, "grad_norm": 0.3199161887168884, "learning_rate": 8.347372947729494e-05, "loss": 3.17480525970459, "step": 123480 }, { "epoch": 0.2006, "grad_norm": 0.18322430551052094, "learning_rate": 8.346554029600912e-05, "loss": 3.1549715042114257, "step": 123490 }, { "epoch": 0.20066666666666666, "grad_norm": 0.1968175172805786, "learning_rate": 8.345734948817168e-05, "loss": 3.098326873779297, "step": 123500 }, { "epoch": 0.20073333333333335, "grad_norm": 0.19988739490509033, "learning_rate": 8.344915705418068e-05, "loss": 3.133697509765625, "step": 123510 }, { "epoch": 0.2008, "grad_norm": 0.2111625224351883, "learning_rate": 8.344096299443434e-05, "loss": 3.2481536865234375, "step": 123520 }, { "epoch": 0.20086666666666667, "grad_norm": 0.21256953477859497, "learning_rate": 8.343276730933091e-05, "loss": 3.142940139770508, "step": 123530 }, { "epoch": 0.20093333333333332, "grad_norm": 0.19141319394111633, "learning_rate": 8.342456999926872e-05, "loss": 3.1496561050415037, "step": 123540 }, { "epoch": 0.201, "grad_norm": 0.18840323388576508, "learning_rate": 8.341637106464623e-05, "loss": 3.230164337158203, "step": 123550 }, { "epoch": 0.20106666666666667, "grad_norm": 0.18501001596450806, "learning_rate": 8.340817050586188e-05, "loss": 3.1426090240478515, "step": 123560 }, { "epoch": 0.20113333333333333, "grad_norm": 0.21881259977817535, "learning_rate": 8.339996832331429e-05, "loss": 3.156443214416504, "step": 123570 }, { "epoch": 0.2012, "grad_norm": 0.25038406252861023, "learning_rate": 8.33917645174021e-05, "loss": 3.1362924575805664, "step": 123580 }, { "epoch": 0.20126666666666668, "grad_norm": 0.18219931423664093, "learning_rate": 8.338355908852406e-05, "loss": 3.1227794647216798, "step": 123590 }, { "epoch": 0.20133333333333334, "grad_norm": 0.17746104300022125, "learning_rate": 8.337535203707898e-05, "loss": 3.170457649230957, "step": 123600 }, { "epoch": 0.2014, "grad_norm": 0.3173169791698456, "learning_rate": 8.336714336346574e-05, "loss": 3.3340774536132813, "step": 123610 }, { "epoch": 0.20146666666666666, "grad_norm": 0.20475777983665466, "learning_rate": 8.335893306808331e-05, "loss": 3.149312973022461, "step": 123620 }, { "epoch": 0.20153333333333334, "grad_norm": 0.23517157137393951, "learning_rate": 8.335072115133078e-05, "loss": 3.1126785278320312, "step": 123630 }, { "epoch": 0.2016, "grad_norm": 0.2861378490924835, "learning_rate": 8.334250761360724e-05, "loss": 3.250082015991211, "step": 123640 }, { "epoch": 0.20166666666666666, "grad_norm": 0.19273342192173004, "learning_rate": 8.33342924553119e-05, "loss": 3.159092903137207, "step": 123650 }, { "epoch": 0.20173333333333332, "grad_norm": 0.19955575466156006, "learning_rate": 8.332607567684407e-05, "loss": 3.1575901031494142, "step": 123660 }, { "epoch": 0.2018, "grad_norm": 0.20763236284255981, "learning_rate": 8.331785727860313e-05, "loss": 3.1616323471069334, "step": 123670 }, { "epoch": 0.20186666666666667, "grad_norm": 0.22479838132858276, "learning_rate": 8.330963726098848e-05, "loss": 3.1649154663085937, "step": 123680 }, { "epoch": 0.20193333333333333, "grad_norm": 0.17762504518032074, "learning_rate": 8.330141562439966e-05, "loss": 3.1523458480834963, "step": 123690 }, { "epoch": 0.202, "grad_norm": 0.1959788054227829, "learning_rate": 8.32931923692363e-05, "loss": 3.178416633605957, "step": 123700 }, { "epoch": 0.20206666666666667, "grad_norm": 0.23171071708202362, "learning_rate": 8.328496749589804e-05, "loss": 3.103141021728516, "step": 123710 }, { "epoch": 0.20213333333333333, "grad_norm": 0.20251859724521637, "learning_rate": 8.327674100478467e-05, "loss": 3.1581966400146486, "step": 123720 }, { "epoch": 0.2022, "grad_norm": 0.18631188571453094, "learning_rate": 8.326851289629601e-05, "loss": 3.1500228881835937, "step": 123730 }, { "epoch": 0.20226666666666668, "grad_norm": 0.18797710537910461, "learning_rate": 8.326028317083198e-05, "loss": 3.0904361724853517, "step": 123740 }, { "epoch": 0.20233333333333334, "grad_norm": 0.2327837198972702, "learning_rate": 8.325205182879261e-05, "loss": 3.1692073822021483, "step": 123750 }, { "epoch": 0.2024, "grad_norm": 0.21433445811271667, "learning_rate": 8.324381887057792e-05, "loss": 3.2061752319335937, "step": 123760 }, { "epoch": 0.20246666666666666, "grad_norm": 0.20067761838436127, "learning_rate": 8.323558429658808e-05, "loss": 3.146043395996094, "step": 123770 }, { "epoch": 0.20253333333333334, "grad_norm": 0.18243558704853058, "learning_rate": 8.322734810722335e-05, "loss": 3.1399675369262696, "step": 123780 }, { "epoch": 0.2026, "grad_norm": 0.19636990129947662, "learning_rate": 8.321911030288403e-05, "loss": 3.188319206237793, "step": 123790 }, { "epoch": 0.20266666666666666, "grad_norm": 0.35933834314346313, "learning_rate": 8.321087088397049e-05, "loss": 3.154468536376953, "step": 123800 }, { "epoch": 0.20273333333333332, "grad_norm": 0.21001634001731873, "learning_rate": 8.320262985088318e-05, "loss": 3.1560647964477537, "step": 123810 }, { "epoch": 0.2028, "grad_norm": 0.22036360204219818, "learning_rate": 8.319438720402269e-05, "loss": 3.1084680557250977, "step": 123820 }, { "epoch": 0.20286666666666667, "grad_norm": 0.17729797959327698, "learning_rate": 8.318614294378963e-05, "loss": 3.160260009765625, "step": 123830 }, { "epoch": 0.20293333333333333, "grad_norm": 0.18611615896224976, "learning_rate": 8.317789707058468e-05, "loss": 3.1196500778198244, "step": 123840 }, { "epoch": 0.203, "grad_norm": 0.180599182844162, "learning_rate": 8.316964958480865e-05, "loss": 3.1519496917724608, "step": 123850 }, { "epoch": 0.20306666666666667, "grad_norm": 0.19023264944553375, "learning_rate": 8.316140048686238e-05, "loss": 3.1345964431762696, "step": 123860 }, { "epoch": 0.20313333333333333, "grad_norm": 0.18376712501049042, "learning_rate": 8.315314977714682e-05, "loss": 3.137929344177246, "step": 123870 }, { "epoch": 0.2032, "grad_norm": 0.20028094947338104, "learning_rate": 8.314489745606296e-05, "loss": 3.148976707458496, "step": 123880 }, { "epoch": 0.20326666666666668, "grad_norm": 0.17690348625183105, "learning_rate": 8.313664352401192e-05, "loss": 3.1585853576660154, "step": 123890 }, { "epoch": 0.20333333333333334, "grad_norm": 0.19632785022258759, "learning_rate": 8.312838798139488e-05, "loss": 3.1760196685791016, "step": 123900 }, { "epoch": 0.2034, "grad_norm": 0.2628275454044342, "learning_rate": 8.312013082861306e-05, "loss": 3.242158126831055, "step": 123910 }, { "epoch": 0.20346666666666666, "grad_norm": 0.2107280045747757, "learning_rate": 8.311187206606781e-05, "loss": 3.1566232681274413, "step": 123920 }, { "epoch": 0.20353333333333334, "grad_norm": 0.18838590383529663, "learning_rate": 8.310361169416053e-05, "loss": 3.1166967391967773, "step": 123930 }, { "epoch": 0.2036, "grad_norm": 0.2363128662109375, "learning_rate": 8.309534971329271e-05, "loss": 3.1699188232421873, "step": 123940 }, { "epoch": 0.20366666666666666, "grad_norm": 0.19004277884960175, "learning_rate": 8.30870861238659e-05, "loss": 3.1420642852783205, "step": 123950 }, { "epoch": 0.20373333333333332, "grad_norm": 0.1845003366470337, "learning_rate": 8.307882092628177e-05, "loss": 3.1533031463623047, "step": 123960 }, { "epoch": 0.2038, "grad_norm": 0.19845126569271088, "learning_rate": 8.3070554120942e-05, "loss": 3.2381282806396485, "step": 123970 }, { "epoch": 0.20386666666666667, "grad_norm": 0.19092349708080292, "learning_rate": 8.306228570824843e-05, "loss": 3.140933036804199, "step": 123980 }, { "epoch": 0.20393333333333333, "grad_norm": 0.2084706723690033, "learning_rate": 8.305401568860289e-05, "loss": 3.260392761230469, "step": 123990 }, { "epoch": 0.204, "grad_norm": 0.17548403143882751, "learning_rate": 8.304574406240739e-05, "loss": 3.216571044921875, "step": 124000 }, { "epoch": 0.20406666666666667, "grad_norm": 0.21132583916187286, "learning_rate": 8.303747083006393e-05, "loss": 3.095011329650879, "step": 124010 }, { "epoch": 0.20413333333333333, "grad_norm": 0.18921761214733124, "learning_rate": 8.302919599197461e-05, "loss": 3.0967889785766602, "step": 124020 }, { "epoch": 0.2042, "grad_norm": 0.17685946822166443, "learning_rate": 8.302091954854162e-05, "loss": 3.090546226501465, "step": 124030 }, { "epoch": 0.20426666666666668, "grad_norm": 0.21164463460445404, "learning_rate": 8.301264150016725e-05, "loss": 3.085865783691406, "step": 124040 }, { "epoch": 0.20433333333333334, "grad_norm": 0.20423245429992676, "learning_rate": 8.300436184725381e-05, "loss": 3.505768966674805, "step": 124050 }, { "epoch": 0.2044, "grad_norm": 0.18207281827926636, "learning_rate": 8.299608059020378e-05, "loss": 3.189639091491699, "step": 124060 }, { "epoch": 0.20446666666666666, "grad_norm": 0.19033370912075043, "learning_rate": 8.29877977294196e-05, "loss": 3.1571029663085937, "step": 124070 }, { "epoch": 0.20453333333333334, "grad_norm": 0.19009023904800415, "learning_rate": 8.297951326530388e-05, "loss": 3.1536928176879884, "step": 124080 }, { "epoch": 0.2046, "grad_norm": 0.22112807631492615, "learning_rate": 8.297122719825927e-05, "loss": 3.1981874465942384, "step": 124090 }, { "epoch": 0.20466666666666666, "grad_norm": 0.17742492258548737, "learning_rate": 8.296293952868851e-05, "loss": 3.2923503875732423, "step": 124100 }, { "epoch": 0.20473333333333332, "grad_norm": 0.20772941410541534, "learning_rate": 8.295465025699439e-05, "loss": 3.1299602508544924, "step": 124110 }, { "epoch": 0.2048, "grad_norm": 0.188654825091362, "learning_rate": 8.294635938357982e-05, "loss": 3.1001148223876953, "step": 124120 }, { "epoch": 0.20486666666666667, "grad_norm": 0.34126588702201843, "learning_rate": 8.293806690884778e-05, "loss": 3.1444292068481445, "step": 124130 }, { "epoch": 0.20493333333333333, "grad_norm": 0.3495664596557617, "learning_rate": 8.292977283320128e-05, "loss": 3.1598220825195313, "step": 124140 }, { "epoch": 0.205, "grad_norm": 0.2379995584487915, "learning_rate": 8.292147715704347e-05, "loss": 3.118526840209961, "step": 124150 }, { "epoch": 0.20506666666666667, "grad_norm": 0.18077220022678375, "learning_rate": 8.291317988077753e-05, "loss": 3.1095531463623045, "step": 124160 }, { "epoch": 0.20513333333333333, "grad_norm": 0.18543517589569092, "learning_rate": 8.290488100480675e-05, "loss": 3.136759567260742, "step": 124170 }, { "epoch": 0.2052, "grad_norm": 0.2206006646156311, "learning_rate": 8.28965805295345e-05, "loss": 3.1754123687744142, "step": 124180 }, { "epoch": 0.20526666666666665, "grad_norm": 0.1767122745513916, "learning_rate": 8.28882784553642e-05, "loss": 3.0989397048950194, "step": 124190 }, { "epoch": 0.20533333333333334, "grad_norm": 0.20264361798763275, "learning_rate": 8.287997478269937e-05, "loss": 3.1904293060302735, "step": 124200 }, { "epoch": 0.2054, "grad_norm": 0.17902301251888275, "learning_rate": 8.28716695119436e-05, "loss": 3.167470169067383, "step": 124210 }, { "epoch": 0.20546666666666666, "grad_norm": 0.17446447908878326, "learning_rate": 8.286336264350054e-05, "loss": 3.129606819152832, "step": 124220 }, { "epoch": 0.20553333333333335, "grad_norm": 0.3454672694206238, "learning_rate": 8.285505417777395e-05, "loss": 3.225211334228516, "step": 124230 }, { "epoch": 0.2056, "grad_norm": 0.1922808140516281, "learning_rate": 8.284674411516766e-05, "loss": 3.3200653076171873, "step": 124240 }, { "epoch": 0.20566666666666666, "grad_norm": 0.2750762104988098, "learning_rate": 8.283843245608556e-05, "loss": 3.191142272949219, "step": 124250 }, { "epoch": 0.20573333333333332, "grad_norm": 1.618401288986206, "learning_rate": 8.283011920093163e-05, "loss": 3.099229621887207, "step": 124260 }, { "epoch": 0.2058, "grad_norm": 0.1951591819524765, "learning_rate": 8.282180435010991e-05, "loss": 2.8114322662353515, "step": 124270 }, { "epoch": 0.20586666666666667, "grad_norm": 0.19895803928375244, "learning_rate": 8.281348790402455e-05, "loss": 3.158582878112793, "step": 124280 }, { "epoch": 0.20593333333333333, "grad_norm": 0.20780344307422638, "learning_rate": 8.280516986307976e-05, "loss": 3.128569793701172, "step": 124290 }, { "epoch": 0.206, "grad_norm": 0.18380063772201538, "learning_rate": 8.279685022767982e-05, "loss": 3.119245719909668, "step": 124300 }, { "epoch": 0.20606666666666668, "grad_norm": 0.19566719233989716, "learning_rate": 8.278852899822913e-05, "loss": 3.1917726516723635, "step": 124310 }, { "epoch": 0.20613333333333334, "grad_norm": 0.20597653090953827, "learning_rate": 8.278020617513209e-05, "loss": 3.2167121887207033, "step": 124320 }, { "epoch": 0.2062, "grad_norm": 0.18649961054325104, "learning_rate": 8.277188175879323e-05, "loss": 3.128556251525879, "step": 124330 }, { "epoch": 0.20626666666666665, "grad_norm": 0.2135976254940033, "learning_rate": 8.276355574961716e-05, "loss": 3.114094924926758, "step": 124340 }, { "epoch": 0.20633333333333334, "grad_norm": 0.18661010265350342, "learning_rate": 8.275522814800854e-05, "loss": 3.1193471908569337, "step": 124350 }, { "epoch": 0.2064, "grad_norm": 0.18298685550689697, "learning_rate": 8.274689895437216e-05, "loss": 3.1376983642578127, "step": 124360 }, { "epoch": 0.20646666666666666, "grad_norm": 0.6247183680534363, "learning_rate": 8.273856816911278e-05, "loss": 3.2009281158447265, "step": 124370 }, { "epoch": 0.20653333333333335, "grad_norm": 0.20144405961036682, "learning_rate": 8.273023579263538e-05, "loss": 3.19107666015625, "step": 124380 }, { "epoch": 0.2066, "grad_norm": 0.2443840652704239, "learning_rate": 8.272190182534492e-05, "loss": 3.1242067337036135, "step": 124390 }, { "epoch": 0.20666666666666667, "grad_norm": 0.20903387665748596, "learning_rate": 8.271356626764644e-05, "loss": 3.212232971191406, "step": 124400 }, { "epoch": 0.20673333333333332, "grad_norm": 0.18989107012748718, "learning_rate": 8.27052291199451e-05, "loss": 3.1221343994140627, "step": 124410 }, { "epoch": 0.2068, "grad_norm": 0.2246219366788864, "learning_rate": 8.269689038264612e-05, "loss": 3.122291374206543, "step": 124420 }, { "epoch": 0.20686666666666667, "grad_norm": 0.17985914647579193, "learning_rate": 8.268855005615479e-05, "loss": 3.1362983703613283, "step": 124430 }, { "epoch": 0.20693333333333333, "grad_norm": 0.20162402093410492, "learning_rate": 8.268020814087647e-05, "loss": 3.1584497451782227, "step": 124440 }, { "epoch": 0.207, "grad_norm": 0.17510704696178436, "learning_rate": 8.267186463721664e-05, "loss": 3.1547267913818358, "step": 124450 }, { "epoch": 0.20706666666666668, "grad_norm": 0.18130891025066376, "learning_rate": 8.266351954558078e-05, "loss": 3.259846878051758, "step": 124460 }, { "epoch": 0.20713333333333334, "grad_norm": 0.19568510353565216, "learning_rate": 8.265517286637452e-05, "loss": 3.147854042053223, "step": 124470 }, { "epoch": 0.2072, "grad_norm": 0.19801992177963257, "learning_rate": 8.264682460000355e-05, "loss": 3.1369691848754884, "step": 124480 }, { "epoch": 0.20726666666666665, "grad_norm": 0.18977588415145874, "learning_rate": 8.263847474687361e-05, "loss": 3.1183300018310547, "step": 124490 }, { "epoch": 0.20733333333333334, "grad_norm": 0.1988006830215454, "learning_rate": 8.263012330739054e-05, "loss": 3.095139503479004, "step": 124500 }, { "epoch": 0.2074, "grad_norm": 0.18645644187927246, "learning_rate": 8.262177028196024e-05, "loss": 3.148762321472168, "step": 124510 }, { "epoch": 0.20746666666666666, "grad_norm": 0.18286539614200592, "learning_rate": 8.261341567098872e-05, "loss": 3.1776203155517577, "step": 124520 }, { "epoch": 0.20753333333333332, "grad_norm": 0.19618456065654755, "learning_rate": 8.260505947488202e-05, "loss": 3.1404918670654296, "step": 124530 }, { "epoch": 0.2076, "grad_norm": 0.2496548295021057, "learning_rate": 8.259670169404631e-05, "loss": 3.165487861633301, "step": 124540 }, { "epoch": 0.20766666666666667, "grad_norm": 0.2211676687002182, "learning_rate": 8.258834232888779e-05, "loss": 3.143267822265625, "step": 124550 }, { "epoch": 0.20773333333333333, "grad_norm": 0.18860644102096558, "learning_rate": 8.257998137981279e-05, "loss": 3.1336841583251953, "step": 124560 }, { "epoch": 0.2078, "grad_norm": 0.17945612967014313, "learning_rate": 8.257161884722763e-05, "loss": 3.1450618743896483, "step": 124570 }, { "epoch": 0.20786666666666667, "grad_norm": 0.18651118874549866, "learning_rate": 8.256325473153879e-05, "loss": 3.1739757537841795, "step": 124580 }, { "epoch": 0.20793333333333333, "grad_norm": 0.2168239951133728, "learning_rate": 8.255488903315281e-05, "loss": 3.145741844177246, "step": 124590 }, { "epoch": 0.208, "grad_norm": 0.2148510366678238, "learning_rate": 8.254652175247626e-05, "loss": 3.1476953506469725, "step": 124600 }, { "epoch": 0.20806666666666668, "grad_norm": 0.22016634047031403, "learning_rate": 8.253815288991584e-05, "loss": 3.1436485290527343, "step": 124610 }, { "epoch": 0.20813333333333334, "grad_norm": 0.17753875255584717, "learning_rate": 8.252978244587832e-05, "loss": 3.1740699768066407, "step": 124620 }, { "epoch": 0.2082, "grad_norm": 0.1831495314836502, "learning_rate": 8.252141042077053e-05, "loss": 3.1369958877563477, "step": 124630 }, { "epoch": 0.20826666666666666, "grad_norm": 0.19394159317016602, "learning_rate": 8.251303681499937e-05, "loss": 3.2652427673339846, "step": 124640 }, { "epoch": 0.20833333333333334, "grad_norm": 0.2086532562971115, "learning_rate": 8.250466162897185e-05, "loss": 3.2100894927978514, "step": 124650 }, { "epoch": 0.2084, "grad_norm": 0.19258274137973785, "learning_rate": 8.2496284863095e-05, "loss": 3.1167652130126955, "step": 124660 }, { "epoch": 0.20846666666666666, "grad_norm": 0.21619273722171783, "learning_rate": 8.2487906517776e-05, "loss": 3.19057674407959, "step": 124670 }, { "epoch": 0.20853333333333332, "grad_norm": 0.1813611388206482, "learning_rate": 8.247952659342205e-05, "loss": 3.2081321716308593, "step": 124680 }, { "epoch": 0.2086, "grad_norm": 0.2215304970741272, "learning_rate": 8.247114509044045e-05, "loss": 3.138285446166992, "step": 124690 }, { "epoch": 0.20866666666666667, "grad_norm": 0.1787818968296051, "learning_rate": 8.246276200923857e-05, "loss": 3.117198944091797, "step": 124700 }, { "epoch": 0.20873333333333333, "grad_norm": 0.18825025856494904, "learning_rate": 8.245437735022386e-05, "loss": 3.140029525756836, "step": 124710 }, { "epoch": 0.2088, "grad_norm": 0.21493247151374817, "learning_rate": 8.244599111380385e-05, "loss": 3.202648162841797, "step": 124720 }, { "epoch": 0.20886666666666667, "grad_norm": 0.4039885103702545, "learning_rate": 8.243760330038614e-05, "loss": 3.304362487792969, "step": 124730 }, { "epoch": 0.20893333333333333, "grad_norm": 0.26984986662864685, "learning_rate": 8.242921391037842e-05, "loss": 3.209406280517578, "step": 124740 }, { "epoch": 0.209, "grad_norm": 0.19002798199653625, "learning_rate": 8.242082294418841e-05, "loss": 3.150066947937012, "step": 124750 }, { "epoch": 0.20906666666666668, "grad_norm": 0.1987849920988083, "learning_rate": 8.241243040222399e-05, "loss": 3.114251708984375, "step": 124760 }, { "epoch": 0.20913333333333334, "grad_norm": 0.18030747771263123, "learning_rate": 8.240403628489304e-05, "loss": 3.139168548583984, "step": 124770 }, { "epoch": 0.2092, "grad_norm": 0.2083379328250885, "learning_rate": 8.239564059260354e-05, "loss": 3.2239288330078124, "step": 124780 }, { "epoch": 0.20926666666666666, "grad_norm": 0.19306203722953796, "learning_rate": 8.238724332576359e-05, "loss": 3.1049707412719725, "step": 124790 }, { "epoch": 0.20933333333333334, "grad_norm": 0.1870335340499878, "learning_rate": 8.237884448478129e-05, "loss": 3.170248794555664, "step": 124800 }, { "epoch": 0.2094, "grad_norm": 0.19970732927322388, "learning_rate": 8.237044407006487e-05, "loss": 3.131245803833008, "step": 124810 }, { "epoch": 0.20946666666666666, "grad_norm": 0.21311314404010773, "learning_rate": 8.236204208202262e-05, "loss": 3.1321266174316404, "step": 124820 }, { "epoch": 0.20953333333333332, "grad_norm": 0.17975930869579315, "learning_rate": 8.235363852106293e-05, "loss": 3.15737247467041, "step": 124830 }, { "epoch": 0.2096, "grad_norm": 0.22662238776683807, "learning_rate": 8.234523338759422e-05, "loss": 3.1444042205810545, "step": 124840 }, { "epoch": 0.20966666666666667, "grad_norm": 0.19349662959575653, "learning_rate": 8.233682668202499e-05, "loss": 3.1080156326293946, "step": 124850 }, { "epoch": 0.20973333333333333, "grad_norm": 0.1843966543674469, "learning_rate": 8.232841840476388e-05, "loss": 3.1357975006103516, "step": 124860 }, { "epoch": 0.2098, "grad_norm": 0.18526668846607208, "learning_rate": 8.232000855621955e-05, "loss": 3.1832757949829102, "step": 124870 }, { "epoch": 0.20986666666666667, "grad_norm": 0.29199880361557007, "learning_rate": 8.231159713680077e-05, "loss": 3.1881336212158202, "step": 124880 }, { "epoch": 0.20993333333333333, "grad_norm": 0.4477328658103943, "learning_rate": 8.230318414691632e-05, "loss": 3.1974279403686525, "step": 124890 }, { "epoch": 0.21, "grad_norm": 0.22264961898326874, "learning_rate": 8.229476958697513e-05, "loss": 3.1306549072265626, "step": 124900 }, { "epoch": 0.21006666666666668, "grad_norm": 0.2644058167934418, "learning_rate": 8.228635345738618e-05, "loss": 3.2195510864257812, "step": 124910 }, { "epoch": 0.21013333333333334, "grad_norm": 0.21535249054431915, "learning_rate": 8.227793575855852e-05, "loss": 3.2073192596435547, "step": 124920 }, { "epoch": 0.2102, "grad_norm": 0.19462069869041443, "learning_rate": 8.226951649090127e-05, "loss": 3.1075885772705076, "step": 124930 }, { "epoch": 0.21026666666666666, "grad_norm": 0.19670100510120392, "learning_rate": 8.226109565482367e-05, "loss": 3.1043909072875975, "step": 124940 }, { "epoch": 0.21033333333333334, "grad_norm": 0.22745662927627563, "learning_rate": 8.2252673250735e-05, "loss": 3.2289569854736326, "step": 124950 }, { "epoch": 0.2104, "grad_norm": 0.1963125467300415, "learning_rate": 8.22442492790446e-05, "loss": 3.1258548736572265, "step": 124960 }, { "epoch": 0.21046666666666666, "grad_norm": 0.21581092476844788, "learning_rate": 8.223582374016191e-05, "loss": 3.181705665588379, "step": 124970 }, { "epoch": 0.21053333333333332, "grad_norm": 0.1958688199520111, "learning_rate": 8.222739663449646e-05, "loss": 3.153123664855957, "step": 124980 }, { "epoch": 0.2106, "grad_norm": 0.21524637937545776, "learning_rate": 8.22189679624578e-05, "loss": 3.188059616088867, "step": 124990 }, { "epoch": 0.21066666666666667, "grad_norm": 0.2289552390575409, "learning_rate": 8.221053772445563e-05, "loss": 3.2796470642089846, "step": 125000 }, { "epoch": 0.21073333333333333, "grad_norm": 0.26305586099624634, "learning_rate": 8.22021059208997e-05, "loss": 3.167681121826172, "step": 125010 }, { "epoch": 0.2108, "grad_norm": 0.2105698138475418, "learning_rate": 8.219367255219982e-05, "loss": 3.1413951873779298, "step": 125020 }, { "epoch": 0.21086666666666667, "grad_norm": 0.1808946430683136, "learning_rate": 8.218523761876586e-05, "loss": 3.1433767318725585, "step": 125030 }, { "epoch": 0.21093333333333333, "grad_norm": 0.21614767611026764, "learning_rate": 8.217680112100781e-05, "loss": 3.1262639999389648, "step": 125040 }, { "epoch": 0.211, "grad_norm": 0.17882032692432404, "learning_rate": 8.21683630593357e-05, "loss": 3.1083473205566405, "step": 125050 }, { "epoch": 0.21106666666666668, "grad_norm": 0.21088990569114685, "learning_rate": 8.215992343415968e-05, "loss": 3.1888910293579102, "step": 125060 }, { "epoch": 0.21113333333333334, "grad_norm": 0.20737400650978088, "learning_rate": 8.215148224588992e-05, "loss": 3.1494012832641602, "step": 125070 }, { "epoch": 0.2112, "grad_norm": 0.19843755662441254, "learning_rate": 8.21430394949367e-05, "loss": 3.0961965560913085, "step": 125080 }, { "epoch": 0.21126666666666666, "grad_norm": 0.1747598648071289, "learning_rate": 8.213459518171038e-05, "loss": 3.1661331176757814, "step": 125090 }, { "epoch": 0.21133333333333335, "grad_norm": 0.19725480675697327, "learning_rate": 8.212614930662137e-05, "loss": 3.0550409317016602, "step": 125100 }, { "epoch": 0.2114, "grad_norm": 0.20595163106918335, "learning_rate": 8.211770187008019e-05, "loss": 3.1475030899047853, "step": 125110 }, { "epoch": 0.21146666666666666, "grad_norm": 0.18993596732616425, "learning_rate": 8.21092528724974e-05, "loss": 3.1468955993652346, "step": 125120 }, { "epoch": 0.21153333333333332, "grad_norm": 0.19340936839580536, "learning_rate": 8.210080231428366e-05, "loss": 3.1186077117919924, "step": 125130 }, { "epoch": 0.2116, "grad_norm": 0.1812203824520111, "learning_rate": 8.20923501958497e-05, "loss": 3.1765270233154297, "step": 125140 }, { "epoch": 0.21166666666666667, "grad_norm": 0.21595332026481628, "learning_rate": 8.208389651760633e-05, "loss": 3.116384506225586, "step": 125150 }, { "epoch": 0.21173333333333333, "grad_norm": 0.2130025029182434, "learning_rate": 8.207544127996442e-05, "loss": 3.160792350769043, "step": 125160 }, { "epoch": 0.2118, "grad_norm": 0.18578317761421204, "learning_rate": 8.206698448333492e-05, "loss": 3.1266183853149414, "step": 125170 }, { "epoch": 0.21186666666666668, "grad_norm": 0.18140268325805664, "learning_rate": 8.20585261281289e-05, "loss": 3.1545196533203126, "step": 125180 }, { "epoch": 0.21193333333333333, "grad_norm": 0.19515858590602875, "learning_rate": 8.205006621475742e-05, "loss": 3.1361860275268554, "step": 125190 }, { "epoch": 0.212, "grad_norm": 0.19055689871311188, "learning_rate": 8.20416047436317e-05, "loss": 3.111256980895996, "step": 125200 }, { "epoch": 0.21206666666666665, "grad_norm": 0.18539388477802277, "learning_rate": 8.2033141715163e-05, "loss": 3.138452339172363, "step": 125210 }, { "epoch": 0.21213333333333334, "grad_norm": 0.17313793301582336, "learning_rate": 8.20246771297626e-05, "loss": 3.0736312866210938, "step": 125220 }, { "epoch": 0.2122, "grad_norm": 0.23610210418701172, "learning_rate": 8.201621098784198e-05, "loss": 3.1133955001831053, "step": 125230 }, { "epoch": 0.21226666666666666, "grad_norm": 0.18844236433506012, "learning_rate": 8.200774328981258e-05, "loss": 3.1196128845214846, "step": 125240 }, { "epoch": 0.21233333333333335, "grad_norm": 0.20583060383796692, "learning_rate": 8.199927403608598e-05, "loss": 3.1252277374267576, "step": 125250 }, { "epoch": 0.2124, "grad_norm": 0.18588478863239288, "learning_rate": 8.199080322707384e-05, "loss": 3.1564615249633787, "step": 125260 }, { "epoch": 0.21246666666666666, "grad_norm": 0.19546616077423096, "learning_rate": 8.198233086318783e-05, "loss": 3.0547201156616213, "step": 125270 }, { "epoch": 0.21253333333333332, "grad_norm": 0.1861213594675064, "learning_rate": 8.197385694483976e-05, "loss": 3.1519166946411135, "step": 125280 }, { "epoch": 0.2126, "grad_norm": 0.18425846099853516, "learning_rate": 8.19653814724415e-05, "loss": 3.088614845275879, "step": 125290 }, { "epoch": 0.21266666666666667, "grad_norm": 0.23567548394203186, "learning_rate": 8.195690444640498e-05, "loss": 3.1581609725952147, "step": 125300 }, { "epoch": 0.21273333333333333, "grad_norm": 0.2016449123620987, "learning_rate": 8.194842586714222e-05, "loss": 3.1602039337158203, "step": 125310 }, { "epoch": 0.2128, "grad_norm": 0.1987210512161255, "learning_rate": 8.193994573506531e-05, "loss": 3.145290756225586, "step": 125320 }, { "epoch": 0.21286666666666668, "grad_norm": 0.2149377167224884, "learning_rate": 8.193146405058642e-05, "loss": 3.1987014770507813, "step": 125330 }, { "epoch": 0.21293333333333334, "grad_norm": 0.18978454172611237, "learning_rate": 8.19229808141178e-05, "loss": 3.1093759536743164, "step": 125340 }, { "epoch": 0.213, "grad_norm": 0.22420643270015717, "learning_rate": 8.191449602607174e-05, "loss": 3.1434654235839843, "step": 125350 }, { "epoch": 0.21306666666666665, "grad_norm": 0.18801644444465637, "learning_rate": 8.190600968686065e-05, "loss": 3.1122978210449217, "step": 125360 }, { "epoch": 0.21313333333333334, "grad_norm": 0.502730131149292, "learning_rate": 8.1897521796897e-05, "loss": 3.0742584228515626, "step": 125370 }, { "epoch": 0.2132, "grad_norm": 0.1980743706226349, "learning_rate": 8.188903235659333e-05, "loss": 3.156040000915527, "step": 125380 }, { "epoch": 0.21326666666666666, "grad_norm": 0.4411551058292389, "learning_rate": 8.188054136636226e-05, "loss": 3.280078125, "step": 125390 }, { "epoch": 0.21333333333333335, "grad_norm": 0.19457660615444183, "learning_rate": 8.187204882661649e-05, "loss": 3.1046606063842774, "step": 125400 }, { "epoch": 0.2134, "grad_norm": 0.20486539602279663, "learning_rate": 8.186355473776877e-05, "loss": 3.1281734466552735, "step": 125410 }, { "epoch": 0.21346666666666667, "grad_norm": 0.2178216278553009, "learning_rate": 8.185505910023197e-05, "loss": 3.134670639038086, "step": 125420 }, { "epoch": 0.21353333333333332, "grad_norm": 0.19691964983940125, "learning_rate": 8.184656191441898e-05, "loss": 3.1660512924194335, "step": 125430 }, { "epoch": 0.2136, "grad_norm": 0.19339635968208313, "learning_rate": 8.183806318074284e-05, "loss": 3.144388198852539, "step": 125440 }, { "epoch": 0.21366666666666667, "grad_norm": 0.19180931150913239, "learning_rate": 8.182956289961657e-05, "loss": 3.139883613586426, "step": 125450 }, { "epoch": 0.21373333333333333, "grad_norm": 0.17651310563087463, "learning_rate": 8.182106107145336e-05, "loss": 3.1435497283935545, "step": 125460 }, { "epoch": 0.2138, "grad_norm": 0.22126100957393646, "learning_rate": 8.181255769666638e-05, "loss": 3.139628219604492, "step": 125470 }, { "epoch": 0.21386666666666668, "grad_norm": 0.20683923363685608, "learning_rate": 8.180405277566899e-05, "loss": 3.1061229705810547, "step": 125480 }, { "epoch": 0.21393333333333334, "grad_norm": 0.188524529337883, "learning_rate": 8.17955463088745e-05, "loss": 3.1344541549682616, "step": 125490 }, { "epoch": 0.214, "grad_norm": 0.2208324670791626, "learning_rate": 8.178703829669639e-05, "loss": 3.1180591583251953, "step": 125500 }, { "epoch": 0.21406666666666666, "grad_norm": 0.1989527940750122, "learning_rate": 8.177852873954819e-05, "loss": 3.177195739746094, "step": 125510 }, { "epoch": 0.21413333333333334, "grad_norm": 0.23747316002845764, "learning_rate": 8.177001763784347e-05, "loss": 3.1214981079101562, "step": 125520 }, { "epoch": 0.2142, "grad_norm": 0.20011885464191437, "learning_rate": 8.176150499199593e-05, "loss": 3.169481086730957, "step": 125530 }, { "epoch": 0.21426666666666666, "grad_norm": 0.1880398839712143, "learning_rate": 8.175299080241928e-05, "loss": 3.141585922241211, "step": 125540 }, { "epoch": 0.21433333333333332, "grad_norm": 0.19078151881694794, "learning_rate": 8.174447506952737e-05, "loss": 3.157675361633301, "step": 125550 }, { "epoch": 0.2144, "grad_norm": 0.2368520349264145, "learning_rate": 8.173595779373408e-05, "loss": 3.179895210266113, "step": 125560 }, { "epoch": 0.21446666666666667, "grad_norm": 0.18757757544517517, "learning_rate": 8.172743897545341e-05, "loss": 3.139461326599121, "step": 125570 }, { "epoch": 0.21453333333333333, "grad_norm": 0.18264228105545044, "learning_rate": 8.171891861509939e-05, "loss": 3.169450378417969, "step": 125580 }, { "epoch": 0.2146, "grad_norm": 0.19435639679431915, "learning_rate": 8.171039671308612e-05, "loss": 3.107024574279785, "step": 125590 }, { "epoch": 0.21466666666666667, "grad_norm": 0.20481523871421814, "learning_rate": 8.170187326982782e-05, "loss": 3.140646743774414, "step": 125600 }, { "epoch": 0.21473333333333333, "grad_norm": 0.25467053055763245, "learning_rate": 8.169334828573878e-05, "loss": 3.1254505157470702, "step": 125610 }, { "epoch": 0.2148, "grad_norm": 0.2156999111175537, "learning_rate": 8.168482176123329e-05, "loss": 3.173487663269043, "step": 125620 }, { "epoch": 0.21486666666666668, "grad_norm": 0.21844346821308136, "learning_rate": 8.167629369672583e-05, "loss": 3.1635303497314453, "step": 125630 }, { "epoch": 0.21493333333333334, "grad_norm": 0.31997206807136536, "learning_rate": 8.166776409263086e-05, "loss": 3.187288284301758, "step": 125640 }, { "epoch": 0.215, "grad_norm": 0.19835270941257477, "learning_rate": 8.165923294936298e-05, "loss": 3.1433073043823243, "step": 125650 }, { "epoch": 0.21506666666666666, "grad_norm": 0.19407226145267487, "learning_rate": 8.165070026733679e-05, "loss": 3.1643810272216797, "step": 125660 }, { "epoch": 0.21513333333333334, "grad_norm": 0.2071222960948944, "learning_rate": 8.164216604696706e-05, "loss": 3.1612062454223633, "step": 125670 }, { "epoch": 0.2152, "grad_norm": 0.20005351305007935, "learning_rate": 8.163363028866856e-05, "loss": 3.1224288940429688, "step": 125680 }, { "epoch": 0.21526666666666666, "grad_norm": 0.18163973093032837, "learning_rate": 8.162509299285615e-05, "loss": 3.0925113677978517, "step": 125690 }, { "epoch": 0.21533333333333332, "grad_norm": 0.18734894692897797, "learning_rate": 8.16165541599448e-05, "loss": 3.1361618041992188, "step": 125700 }, { "epoch": 0.2154, "grad_norm": 0.5390262603759766, "learning_rate": 8.160801379034953e-05, "loss": 3.11651668548584, "step": 125710 }, { "epoch": 0.21546666666666667, "grad_norm": 0.19552336633205414, "learning_rate": 8.15994718844854e-05, "loss": 3.168461227416992, "step": 125720 }, { "epoch": 0.21553333333333333, "grad_norm": 0.2077396959066391, "learning_rate": 8.159092844276762e-05, "loss": 3.1236948013305663, "step": 125730 }, { "epoch": 0.2156, "grad_norm": 0.17985717952251434, "learning_rate": 8.158238346561142e-05, "loss": 3.1251218795776365, "step": 125740 }, { "epoch": 0.21566666666666667, "grad_norm": 0.19868513941764832, "learning_rate": 8.15738369534321e-05, "loss": 3.1584482192993164, "step": 125750 }, { "epoch": 0.21573333333333333, "grad_norm": 0.18663421273231506, "learning_rate": 8.156528890664505e-05, "loss": 3.200307846069336, "step": 125760 }, { "epoch": 0.2158, "grad_norm": 0.18627333641052246, "learning_rate": 8.155673932566578e-05, "loss": 3.166134834289551, "step": 125770 }, { "epoch": 0.21586666666666668, "grad_norm": 0.34805792570114136, "learning_rate": 8.15481882109098e-05, "loss": 3.1287155151367188, "step": 125780 }, { "epoch": 0.21593333333333334, "grad_norm": 0.19412727653980255, "learning_rate": 8.153963556279274e-05, "loss": 3.1224594116210938, "step": 125790 }, { "epoch": 0.216, "grad_norm": 0.2050856351852417, "learning_rate": 8.153108138173027e-05, "loss": 3.157893180847168, "step": 125800 }, { "epoch": 0.21606666666666666, "grad_norm": 0.18761664628982544, "learning_rate": 8.152252566813817e-05, "loss": 3.123712921142578, "step": 125810 }, { "epoch": 0.21613333333333334, "grad_norm": 0.18600544333457947, "learning_rate": 8.151396842243227e-05, "loss": 3.1606143951416015, "step": 125820 }, { "epoch": 0.2162, "grad_norm": 0.1909758746623993, "learning_rate": 8.150540964502851e-05, "loss": 3.137807846069336, "step": 125830 }, { "epoch": 0.21626666666666666, "grad_norm": 0.1988557130098343, "learning_rate": 8.149684933634285e-05, "loss": 3.2041011810302735, "step": 125840 }, { "epoch": 0.21633333333333332, "grad_norm": 0.2012307494878769, "learning_rate": 8.148828749679138e-05, "loss": 3.161425971984863, "step": 125850 }, { "epoch": 0.2164, "grad_norm": 0.18545810878276825, "learning_rate": 8.14797241267902e-05, "loss": 3.1308719635009767, "step": 125860 }, { "epoch": 0.21646666666666667, "grad_norm": 0.19071051478385925, "learning_rate": 8.147115922675557e-05, "loss": 3.1414859771728514, "step": 125870 }, { "epoch": 0.21653333333333333, "grad_norm": 0.1797081083059311, "learning_rate": 8.146259279710374e-05, "loss": 3.156478691101074, "step": 125880 }, { "epoch": 0.2166, "grad_norm": 0.19180795550346375, "learning_rate": 8.145402483825107e-05, "loss": 3.1201192855834963, "step": 125890 }, { "epoch": 0.21666666666666667, "grad_norm": 0.20089003443717957, "learning_rate": 8.1445455350614e-05, "loss": 3.1345529556274414, "step": 125900 }, { "epoch": 0.21673333333333333, "grad_norm": 0.19361750781536102, "learning_rate": 8.143688433460908e-05, "loss": 3.153101921081543, "step": 125910 }, { "epoch": 0.2168, "grad_norm": 0.21890772879123688, "learning_rate": 8.142831179065285e-05, "loss": 3.1351818084716796, "step": 125920 }, { "epoch": 0.21686666666666668, "grad_norm": 0.18898692727088928, "learning_rate": 8.141973771916198e-05, "loss": 3.1897207260131837, "step": 125930 }, { "epoch": 0.21693333333333334, "grad_norm": 0.18788857758045197, "learning_rate": 8.141116212055319e-05, "loss": 3.11600399017334, "step": 125940 }, { "epoch": 0.217, "grad_norm": 0.2831149101257324, "learning_rate": 8.140258499524329e-05, "loss": 3.211386871337891, "step": 125950 }, { "epoch": 0.21706666666666666, "grad_norm": 0.19148914515972137, "learning_rate": 8.139400634364918e-05, "loss": 3.1862239837646484, "step": 125960 }, { "epoch": 0.21713333333333334, "grad_norm": 0.18812227249145508, "learning_rate": 8.138542616618781e-05, "loss": 3.159276008605957, "step": 125970 }, { "epoch": 0.2172, "grad_norm": 0.1942042112350464, "learning_rate": 8.13768444632762e-05, "loss": 3.093227195739746, "step": 125980 }, { "epoch": 0.21726666666666666, "grad_norm": 0.29152214527130127, "learning_rate": 8.136826123533145e-05, "loss": 3.1544822692871093, "step": 125990 }, { "epoch": 0.21733333333333332, "grad_norm": 0.18293243646621704, "learning_rate": 8.135967648277074e-05, "loss": 3.0908641815185547, "step": 126000 }, { "epoch": 0.2174, "grad_norm": 0.19356849789619446, "learning_rate": 8.135109020601132e-05, "loss": 3.1015647888183593, "step": 126010 }, { "epoch": 0.21746666666666667, "grad_norm": 0.19057132303714752, "learning_rate": 8.134250240547053e-05, "loss": 3.133327102661133, "step": 126020 }, { "epoch": 0.21753333333333333, "grad_norm": 0.20071491599082947, "learning_rate": 8.133391308156575e-05, "loss": 3.119655227661133, "step": 126030 }, { "epoch": 0.2176, "grad_norm": 0.32346564531326294, "learning_rate": 8.132532223471447e-05, "loss": 3.1984153747558595, "step": 126040 }, { "epoch": 0.21766666666666667, "grad_norm": 0.18382775783538818, "learning_rate": 8.131672986533423e-05, "loss": 3.126692008972168, "step": 126050 }, { "epoch": 0.21773333333333333, "grad_norm": 0.18748033046722412, "learning_rate": 8.130813597384265e-05, "loss": 3.0837377548217773, "step": 126060 }, { "epoch": 0.2178, "grad_norm": 0.2976183295249939, "learning_rate": 8.129954056065743e-05, "loss": 3.142597198486328, "step": 126070 }, { "epoch": 0.21786666666666665, "grad_norm": 0.18055379390716553, "learning_rate": 8.129094362619632e-05, "loss": 3.1072355270385743, "step": 126080 }, { "epoch": 0.21793333333333334, "grad_norm": 0.19394050538539886, "learning_rate": 8.12823451708772e-05, "loss": 3.1512029647827147, "step": 126090 }, { "epoch": 0.218, "grad_norm": 0.2093820422887802, "learning_rate": 8.127374519511797e-05, "loss": 3.1004043579101563, "step": 126100 }, { "epoch": 0.21806666666666666, "grad_norm": 0.18363064527511597, "learning_rate": 8.12651436993366e-05, "loss": 3.194956398010254, "step": 126110 }, { "epoch": 0.21813333333333335, "grad_norm": 0.18575814366340637, "learning_rate": 8.12565406839512e-05, "loss": 3.1373838424682616, "step": 126120 }, { "epoch": 0.2182, "grad_norm": 0.18760763108730316, "learning_rate": 8.124793614937985e-05, "loss": 3.1432676315307617, "step": 126130 }, { "epoch": 0.21826666666666666, "grad_norm": 0.18149088323116302, "learning_rate": 8.12393300960408e-05, "loss": 3.159857177734375, "step": 126140 }, { "epoch": 0.21833333333333332, "grad_norm": 0.17813551425933838, "learning_rate": 8.123072252435235e-05, "loss": 3.1064664840698244, "step": 126150 }, { "epoch": 0.2184, "grad_norm": 0.18041487038135529, "learning_rate": 8.122211343473286e-05, "loss": 3.1291807174682615, "step": 126160 }, { "epoch": 0.21846666666666667, "grad_norm": 0.21260125935077667, "learning_rate": 8.121350282760072e-05, "loss": 3.1595989227294923, "step": 126170 }, { "epoch": 0.21853333333333333, "grad_norm": 0.19056172668933868, "learning_rate": 8.120489070337446e-05, "loss": 3.092814636230469, "step": 126180 }, { "epoch": 0.2186, "grad_norm": 0.33907121419906616, "learning_rate": 8.119627706247268e-05, "loss": 3.1963003158569334, "step": 126190 }, { "epoch": 0.21866666666666668, "grad_norm": 0.47373777627944946, "learning_rate": 8.118766190531403e-05, "loss": 3.149736785888672, "step": 126200 }, { "epoch": 0.21873333333333334, "grad_norm": 0.202372208237648, "learning_rate": 8.117904523231722e-05, "loss": 3.1767595291137694, "step": 126210 }, { "epoch": 0.2188, "grad_norm": 0.18441258370876312, "learning_rate": 8.117042704390107e-05, "loss": 3.170459175109863, "step": 126220 }, { "epoch": 0.21886666666666665, "grad_norm": 0.20307454466819763, "learning_rate": 8.116180734048444e-05, "loss": 3.057093048095703, "step": 126230 }, { "epoch": 0.21893333333333334, "grad_norm": 0.21206419169902802, "learning_rate": 8.11531861224863e-05, "loss": 3.121090126037598, "step": 126240 }, { "epoch": 0.219, "grad_norm": 0.2552371025085449, "learning_rate": 8.114456339032568e-05, "loss": 3.1614456176757812, "step": 126250 }, { "epoch": 0.21906666666666666, "grad_norm": 0.1937183290719986, "learning_rate": 8.113593914442164e-05, "loss": 3.1359167098999023, "step": 126260 }, { "epoch": 0.21913333333333335, "grad_norm": 0.19633488357067108, "learning_rate": 8.112731338519338e-05, "loss": 3.1333410263061525, "step": 126270 }, { "epoch": 0.2192, "grad_norm": 0.1892520934343338, "learning_rate": 8.111868611306014e-05, "loss": 3.1994613647460937, "step": 126280 }, { "epoch": 0.21926666666666667, "grad_norm": 0.18938620388507843, "learning_rate": 8.111005732844123e-05, "loss": 3.1127769470214846, "step": 126290 }, { "epoch": 0.21933333333333332, "grad_norm": 0.47469136118888855, "learning_rate": 8.110142703175605e-05, "loss": 3.222484588623047, "step": 126300 }, { "epoch": 0.2194, "grad_norm": 0.24158211052417755, "learning_rate": 8.109279522342407e-05, "loss": 3.1062124252319334, "step": 126310 }, { "epoch": 0.21946666666666667, "grad_norm": 0.2384992241859436, "learning_rate": 8.108416190386481e-05, "loss": 3.13287410736084, "step": 126320 }, { "epoch": 0.21953333333333333, "grad_norm": 0.19880913197994232, "learning_rate": 8.10755270734979e-05, "loss": 3.134501266479492, "step": 126330 }, { "epoch": 0.2196, "grad_norm": 0.19678230583667755, "learning_rate": 8.106689073274303e-05, "loss": 3.1760890960693358, "step": 126340 }, { "epoch": 0.21966666666666668, "grad_norm": 0.19064734876155853, "learning_rate": 8.105825288201993e-05, "loss": 3.139449119567871, "step": 126350 }, { "epoch": 0.21973333333333334, "grad_norm": 0.196461021900177, "learning_rate": 8.104961352174845e-05, "loss": 3.10949764251709, "step": 126360 }, { "epoch": 0.2198, "grad_norm": 0.22252047061920166, "learning_rate": 8.104097265234848e-05, "loss": 3.155080795288086, "step": 126370 }, { "epoch": 0.21986666666666665, "grad_norm": 0.24123342335224152, "learning_rate": 8.103233027424004e-05, "loss": 3.1053146362304687, "step": 126380 }, { "epoch": 0.21993333333333334, "grad_norm": 0.19470764696598053, "learning_rate": 8.102368638784314e-05, "loss": 3.149420166015625, "step": 126390 }, { "epoch": 0.22, "grad_norm": 0.19122762978076935, "learning_rate": 8.101504099357793e-05, "loss": 3.218730163574219, "step": 126400 }, { "epoch": 0.22006666666666666, "grad_norm": 0.19722211360931396, "learning_rate": 8.10063940918646e-05, "loss": 3.2186553955078123, "step": 126410 }, { "epoch": 0.22013333333333332, "grad_norm": 0.21412427723407745, "learning_rate": 8.099774568312343e-05, "loss": 3.3018226623535156, "step": 126420 }, { "epoch": 0.2202, "grad_norm": 0.1967540681362152, "learning_rate": 8.098909576777474e-05, "loss": 3.114565849304199, "step": 126430 }, { "epoch": 0.22026666666666667, "grad_norm": 0.17949044704437256, "learning_rate": 8.098044434623898e-05, "loss": 3.1401607513427736, "step": 126440 }, { "epoch": 0.22033333333333333, "grad_norm": 0.1869954615831375, "learning_rate": 8.097179141893662e-05, "loss": 3.1698389053344727, "step": 126450 }, { "epoch": 0.2204, "grad_norm": 0.18835854530334473, "learning_rate": 8.096313698628823e-05, "loss": 3.1224668502807615, "step": 126460 }, { "epoch": 0.22046666666666667, "grad_norm": 0.2098253071308136, "learning_rate": 8.095448104871446e-05, "loss": 3.1462100982666015, "step": 126470 }, { "epoch": 0.22053333333333333, "grad_norm": 0.19808077812194824, "learning_rate": 8.094582360663601e-05, "loss": 3.117228889465332, "step": 126480 }, { "epoch": 0.2206, "grad_norm": 0.2774171829223633, "learning_rate": 8.093716466047365e-05, "loss": 3.0980653762817383, "step": 126490 }, { "epoch": 0.22066666666666668, "grad_norm": 0.17910932004451752, "learning_rate": 8.092850421064829e-05, "loss": 3.0728260040283204, "step": 126500 }, { "epoch": 0.22073333333333334, "grad_norm": 0.26247456669807434, "learning_rate": 8.09198422575808e-05, "loss": 3.1543306350708007, "step": 126510 }, { "epoch": 0.2208, "grad_norm": 0.21116915345191956, "learning_rate": 8.09111788016922e-05, "loss": 3.178885269165039, "step": 126520 }, { "epoch": 0.22086666666666666, "grad_norm": 0.25877752900123596, "learning_rate": 8.090251384340358e-05, "loss": 3.1506181716918946, "step": 126530 }, { "epoch": 0.22093333333333334, "grad_norm": 0.19101426005363464, "learning_rate": 8.08938473831361e-05, "loss": 3.1661725997924806, "step": 126540 }, { "epoch": 0.221, "grad_norm": 0.21779100596904755, "learning_rate": 8.088517942131095e-05, "loss": 3.076762390136719, "step": 126550 }, { "epoch": 0.22106666666666666, "grad_norm": 0.2031068205833435, "learning_rate": 8.087650995834945e-05, "loss": 3.121101951599121, "step": 126560 }, { "epoch": 0.22113333333333332, "grad_norm": 0.2141915112733841, "learning_rate": 8.086783899467297e-05, "loss": 3.0931930541992188, "step": 126570 }, { "epoch": 0.2212, "grad_norm": 0.18956182897090912, "learning_rate": 8.085916653070293e-05, "loss": 3.1648326873779298, "step": 126580 }, { "epoch": 0.22126666666666667, "grad_norm": 0.1941467523574829, "learning_rate": 8.085049256686086e-05, "loss": 3.0704471588134767, "step": 126590 }, { "epoch": 0.22133333333333333, "grad_norm": 0.1853586882352829, "learning_rate": 8.084181710356835e-05, "loss": 3.1565439224243166, "step": 126600 }, { "epoch": 0.2214, "grad_norm": 0.19253221154212952, "learning_rate": 8.083314014124705e-05, "loss": 3.1074077606201174, "step": 126610 }, { "epoch": 0.22146666666666667, "grad_norm": 0.208744615316391, "learning_rate": 8.08244616803187e-05, "loss": 3.1252185821533205, "step": 126620 }, { "epoch": 0.22153333333333333, "grad_norm": 0.19386518001556396, "learning_rate": 8.081578172120509e-05, "loss": 3.128150749206543, "step": 126630 }, { "epoch": 0.2216, "grad_norm": 0.2200036495923996, "learning_rate": 8.080710026432814e-05, "loss": 3.2039844512939455, "step": 126640 }, { "epoch": 0.22166666666666668, "grad_norm": 0.23489226400852203, "learning_rate": 8.079841731010976e-05, "loss": 3.139604377746582, "step": 126650 }, { "epoch": 0.22173333333333334, "grad_norm": 0.19234725832939148, "learning_rate": 8.0789732858972e-05, "loss": 3.084590530395508, "step": 126660 }, { "epoch": 0.2218, "grad_norm": 0.19913072884082794, "learning_rate": 8.078104691133694e-05, "loss": 3.1469661712646486, "step": 126670 }, { "epoch": 0.22186666666666666, "grad_norm": 0.18839827179908752, "learning_rate": 8.077235946762676e-05, "loss": 3.1290058135986327, "step": 126680 }, { "epoch": 0.22193333333333334, "grad_norm": 0.18682706356048584, "learning_rate": 8.076367052826369e-05, "loss": 3.0984115600585938, "step": 126690 }, { "epoch": 0.222, "grad_norm": 0.2055714875459671, "learning_rate": 8.075498009367006e-05, "loss": 3.1627485275268556, "step": 126700 }, { "epoch": 0.22206666666666666, "grad_norm": 0.1854117512702942, "learning_rate": 8.074628816426825e-05, "loss": 3.157741355895996, "step": 126710 }, { "epoch": 0.22213333333333332, "grad_norm": 0.2006644606590271, "learning_rate": 8.073759474048071e-05, "loss": 3.1591033935546875, "step": 126720 }, { "epoch": 0.2222, "grad_norm": 0.25255775451660156, "learning_rate": 8.072889982273e-05, "loss": 3.1169368743896486, "step": 126730 }, { "epoch": 0.22226666666666667, "grad_norm": 0.18337838351726532, "learning_rate": 8.072020341143871e-05, "loss": 3.1013933181762696, "step": 126740 }, { "epoch": 0.22233333333333333, "grad_norm": 0.21053069829940796, "learning_rate": 8.071150550702953e-05, "loss": 3.1275960922241213, "step": 126750 }, { "epoch": 0.2224, "grad_norm": 0.19020436704158783, "learning_rate": 8.070280610992518e-05, "loss": 3.140028953552246, "step": 126760 }, { "epoch": 0.22246666666666667, "grad_norm": 0.20204482972621918, "learning_rate": 8.069410522054853e-05, "loss": 3.1315847396850587, "step": 126770 }, { "epoch": 0.22253333333333333, "grad_norm": 0.18884894251823425, "learning_rate": 8.068540283932242e-05, "loss": 3.172721290588379, "step": 126780 }, { "epoch": 0.2226, "grad_norm": 0.20188632607460022, "learning_rate": 8.067669896666987e-05, "loss": 3.149603271484375, "step": 126790 }, { "epoch": 0.22266666666666668, "grad_norm": 0.22231411933898926, "learning_rate": 8.066799360301389e-05, "loss": 3.1361005783081053, "step": 126800 }, { "epoch": 0.22273333333333334, "grad_norm": 0.21275117993354797, "learning_rate": 8.065928674877761e-05, "loss": 3.2138511657714846, "step": 126810 }, { "epoch": 0.2228, "grad_norm": 0.1987212896347046, "learning_rate": 8.065057840438421e-05, "loss": 3.1304073333740234, "step": 126820 }, { "epoch": 0.22286666666666666, "grad_norm": 0.18106873333454132, "learning_rate": 8.064186857025695e-05, "loss": 3.1275089263916014, "step": 126830 }, { "epoch": 0.22293333333333334, "grad_norm": 0.18402960896492004, "learning_rate": 8.063315724681917e-05, "loss": 3.1537508010864257, "step": 126840 }, { "epoch": 0.223, "grad_norm": 0.19464503228664398, "learning_rate": 8.062444443449424e-05, "loss": 3.0697288513183594, "step": 126850 }, { "epoch": 0.22306666666666666, "grad_norm": 0.19809356331825256, "learning_rate": 8.061573013370567e-05, "loss": 3.1122161865234377, "step": 126860 }, { "epoch": 0.22313333333333332, "grad_norm": 0.20613707602024078, "learning_rate": 8.060701434487698e-05, "loss": 3.146551513671875, "step": 126870 }, { "epoch": 0.2232, "grad_norm": 0.2551702558994293, "learning_rate": 8.059829706843183e-05, "loss": 3.1562526702880858, "step": 126880 }, { "epoch": 0.22326666666666667, "grad_norm": 0.2098216414451599, "learning_rate": 8.058957830479387e-05, "loss": 3.1725341796875, "step": 126890 }, { "epoch": 0.22333333333333333, "grad_norm": 0.1818893402814865, "learning_rate": 8.05808580543869e-05, "loss": 3.1159114837646484, "step": 126900 }, { "epoch": 0.2234, "grad_norm": 0.3002387583255768, "learning_rate": 8.057213631763474e-05, "loss": 3.1586582183837892, "step": 126910 }, { "epoch": 0.22346666666666667, "grad_norm": 0.1890110820531845, "learning_rate": 8.056341309496128e-05, "loss": 3.1100910186767576, "step": 126920 }, { "epoch": 0.22353333333333333, "grad_norm": 0.19473281502723694, "learning_rate": 8.055468838679054e-05, "loss": 3.083287811279297, "step": 126930 }, { "epoch": 0.2236, "grad_norm": 0.1932666301727295, "learning_rate": 8.054596219354654e-05, "loss": 3.155905342102051, "step": 126940 }, { "epoch": 0.22366666666666668, "grad_norm": 0.19296088814735413, "learning_rate": 8.053723451565343e-05, "loss": 3.1104158401489257, "step": 126950 }, { "epoch": 0.22373333333333334, "grad_norm": 0.20426121354103088, "learning_rate": 8.05285053535354e-05, "loss": 3.1045427322387695, "step": 126960 }, { "epoch": 0.2238, "grad_norm": 0.19656942784786224, "learning_rate": 8.051977470761671e-05, "loss": 3.1132469177246094, "step": 126970 }, { "epoch": 0.22386666666666666, "grad_norm": 0.19426877796649933, "learning_rate": 8.051104257832174e-05, "loss": 3.118735504150391, "step": 126980 }, { "epoch": 0.22393333333333335, "grad_norm": 0.1999381184577942, "learning_rate": 8.050230896607485e-05, "loss": 3.116728591918945, "step": 126990 }, { "epoch": 0.224, "grad_norm": 0.18930858373641968, "learning_rate": 8.049357387130057e-05, "loss": 3.1460079193115233, "step": 127000 }, { "epoch": 0.22406666666666666, "grad_norm": 0.17948728799819946, "learning_rate": 8.048483729442342e-05, "loss": 3.192322540283203, "step": 127010 }, { "epoch": 0.22413333333333332, "grad_norm": 0.2433280050754547, "learning_rate": 8.047609923586806e-05, "loss": 3.144515037536621, "step": 127020 }, { "epoch": 0.2242, "grad_norm": 0.19593632221221924, "learning_rate": 8.046735969605918e-05, "loss": 2.8360666275024413, "step": 127030 }, { "epoch": 0.22426666666666667, "grad_norm": 0.1887250393629074, "learning_rate": 8.045861867542157e-05, "loss": 3.1209089279174806, "step": 127040 }, { "epoch": 0.22433333333333333, "grad_norm": 0.18481147289276123, "learning_rate": 8.044987617438007e-05, "loss": 3.0652162551879885, "step": 127050 }, { "epoch": 0.2244, "grad_norm": 0.3177352249622345, "learning_rate": 8.044113219335961e-05, "loss": 3.1537858963012697, "step": 127060 }, { "epoch": 0.22446666666666668, "grad_norm": 0.27753645181655884, "learning_rate": 8.043238673278512e-05, "loss": 3.2848987579345703, "step": 127070 }, { "epoch": 0.22453333333333333, "grad_norm": 0.688001275062561, "learning_rate": 8.042363979308174e-05, "loss": 3.5504680633544923, "step": 127080 }, { "epoch": 0.2246, "grad_norm": 0.18517334759235382, "learning_rate": 8.041489137467456e-05, "loss": 3.195246696472168, "step": 127090 }, { "epoch": 0.22466666666666665, "grad_norm": 0.18953508138656616, "learning_rate": 8.040614147798879e-05, "loss": 3.173606109619141, "step": 127100 }, { "epoch": 0.22473333333333334, "grad_norm": 0.18557070195674896, "learning_rate": 8.039739010344973e-05, "loss": 3.141640281677246, "step": 127110 }, { "epoch": 0.2248, "grad_norm": 0.1922931969165802, "learning_rate": 8.03886372514827e-05, "loss": 3.157114791870117, "step": 127120 }, { "epoch": 0.22486666666666666, "grad_norm": 0.19058912992477417, "learning_rate": 8.037988292251317e-05, "loss": 3.153568649291992, "step": 127130 }, { "epoch": 0.22493333333333335, "grad_norm": 0.19084811210632324, "learning_rate": 8.037112711696659e-05, "loss": 3.194266128540039, "step": 127140 }, { "epoch": 0.225, "grad_norm": 0.237528994679451, "learning_rate": 8.036236983526853e-05, "loss": 3.201992416381836, "step": 127150 }, { "epoch": 0.22506666666666666, "grad_norm": 0.202072411775589, "learning_rate": 8.035361107784462e-05, "loss": 3.1698724746704103, "step": 127160 }, { "epoch": 0.22513333333333332, "grad_norm": 0.21182098984718323, "learning_rate": 8.03448508451206e-05, "loss": 3.2243572235107423, "step": 127170 }, { "epoch": 0.2252, "grad_norm": 1.8349145650863647, "learning_rate": 8.033608913752222e-05, "loss": 3.083407402038574, "step": 127180 }, { "epoch": 0.22526666666666667, "grad_norm": 0.19248731434345245, "learning_rate": 8.032732595547534e-05, "loss": 3.1425144195556642, "step": 127190 }, { "epoch": 0.22533333333333333, "grad_norm": 0.18302316963672638, "learning_rate": 8.03185612994059e-05, "loss": 3.1057401657104493, "step": 127200 }, { "epoch": 0.2254, "grad_norm": 0.421371728181839, "learning_rate": 8.030979516973989e-05, "loss": 3.1446094512939453, "step": 127210 }, { "epoch": 0.22546666666666668, "grad_norm": 0.1916179656982422, "learning_rate": 8.030102756690337e-05, "loss": 3.1208755493164064, "step": 127220 }, { "epoch": 0.22553333333333334, "grad_norm": 0.1898062527179718, "learning_rate": 8.029225849132247e-05, "loss": 3.0654874801635743, "step": 127230 }, { "epoch": 0.2256, "grad_norm": 0.24118733406066895, "learning_rate": 8.02834879434234e-05, "loss": 3.097855567932129, "step": 127240 }, { "epoch": 0.22566666666666665, "grad_norm": 0.18039610981941223, "learning_rate": 8.027471592363245e-05, "loss": 3.1890058517456055, "step": 127250 }, { "epoch": 0.22573333333333334, "grad_norm": 0.2136910855770111, "learning_rate": 8.0265942432376e-05, "loss": 3.1093502044677734, "step": 127260 }, { "epoch": 0.2258, "grad_norm": 0.18207032978534698, "learning_rate": 8.025716747008043e-05, "loss": 3.1516685485839844, "step": 127270 }, { "epoch": 0.22586666666666666, "grad_norm": 0.18998666107654572, "learning_rate": 8.024839103717226e-05, "loss": 3.1328229904174805, "step": 127280 }, { "epoch": 0.22593333333333335, "grad_norm": 0.1956258863210678, "learning_rate": 8.023961313407806e-05, "loss": 3.1427066802978514, "step": 127290 }, { "epoch": 0.226, "grad_norm": 0.23768319189548492, "learning_rate": 8.023083376122445e-05, "loss": 3.130507469177246, "step": 127300 }, { "epoch": 0.22606666666666667, "grad_norm": 0.18786586821079254, "learning_rate": 8.022205291903816e-05, "loss": 3.1048879623413086, "step": 127310 }, { "epoch": 0.22613333333333333, "grad_norm": 0.19463007152080536, "learning_rate": 8.021327060794596e-05, "loss": 3.1318716049194335, "step": 127320 }, { "epoch": 0.2262, "grad_norm": 0.20057313144207, "learning_rate": 8.020448682837471e-05, "loss": 3.1133453369140627, "step": 127330 }, { "epoch": 0.22626666666666667, "grad_norm": 0.19872184097766876, "learning_rate": 8.019570158075134e-05, "loss": 3.1292655944824217, "step": 127340 }, { "epoch": 0.22633333333333333, "grad_norm": 0.19821274280548096, "learning_rate": 8.018691486550282e-05, "loss": 3.1261587142944336, "step": 127350 }, { "epoch": 0.2264, "grad_norm": 0.1989864856004715, "learning_rate": 8.017812668305626e-05, "loss": 3.1082807540893556, "step": 127360 }, { "epoch": 0.22646666666666668, "grad_norm": 0.2091100811958313, "learning_rate": 8.016933703383878e-05, "loss": 3.143501853942871, "step": 127370 }, { "epoch": 0.22653333333333334, "grad_norm": 0.20064328610897064, "learning_rate": 8.016054591827758e-05, "loss": 3.1081336975097655, "step": 127380 }, { "epoch": 0.2266, "grad_norm": 0.36215704679489136, "learning_rate": 8.015175333679994e-05, "loss": 3.2148368835449217, "step": 127390 }, { "epoch": 0.22666666666666666, "grad_norm": 0.17220012843608856, "learning_rate": 8.014295928983322e-05, "loss": 3.322887420654297, "step": 127400 }, { "epoch": 0.22673333333333334, "grad_norm": 0.20895101130008698, "learning_rate": 8.013416377780485e-05, "loss": 3.1501386642456053, "step": 127410 }, { "epoch": 0.2268, "grad_norm": 0.18328309059143066, "learning_rate": 8.012536680114231e-05, "loss": 3.094955825805664, "step": 127420 }, { "epoch": 0.22686666666666666, "grad_norm": 0.1927691400051117, "learning_rate": 8.01165683602732e-05, "loss": 3.129623222351074, "step": 127430 }, { "epoch": 0.22693333333333332, "grad_norm": 0.1837124079465866, "learning_rate": 8.01077684556251e-05, "loss": 3.0864631652832033, "step": 127440 }, { "epoch": 0.227, "grad_norm": 0.20536687970161438, "learning_rate": 8.009896708762576e-05, "loss": 3.1130741119384764, "step": 127450 }, { "epoch": 0.22706666666666667, "grad_norm": 0.1983313262462616, "learning_rate": 8.009016425670297e-05, "loss": 3.186729621887207, "step": 127460 }, { "epoch": 0.22713333333333333, "grad_norm": 0.2379244863986969, "learning_rate": 8.008135996328456e-05, "loss": 3.1528255462646486, "step": 127470 }, { "epoch": 0.2272, "grad_norm": 0.21164827048778534, "learning_rate": 8.007255420779843e-05, "loss": 3.1575347900390627, "step": 127480 }, { "epoch": 0.22726666666666667, "grad_norm": 0.20149554312229156, "learning_rate": 8.006374699067261e-05, "loss": 3.127351760864258, "step": 127490 }, { "epoch": 0.22733333333333333, "grad_norm": 0.1959124654531479, "learning_rate": 8.005493831233515e-05, "loss": 3.091054916381836, "step": 127500 }, { "epoch": 0.2274, "grad_norm": 0.21485759317874908, "learning_rate": 8.00461281732142e-05, "loss": 3.0528472900390624, "step": 127510 }, { "epoch": 0.22746666666666668, "grad_norm": 0.19609355926513672, "learning_rate": 8.003731657373795e-05, "loss": 3.074630546569824, "step": 127520 }, { "epoch": 0.22753333333333334, "grad_norm": 0.213448166847229, "learning_rate": 8.002850351433466e-05, "loss": 3.167665481567383, "step": 127530 }, { "epoch": 0.2276, "grad_norm": 0.21803340315818787, "learning_rate": 8.001968899543271e-05, "loss": 3.1461700439453124, "step": 127540 }, { "epoch": 0.22766666666666666, "grad_norm": 0.1985510140657425, "learning_rate": 8.00108730174605e-05, "loss": 3.0782293319702148, "step": 127550 }, { "epoch": 0.22773333333333334, "grad_norm": 0.21261556446552277, "learning_rate": 8.000205558084655e-05, "loss": 3.2083805084228514, "step": 127560 }, { "epoch": 0.2278, "grad_norm": 0.20729495584964752, "learning_rate": 7.999323668601937e-05, "loss": 3.1114007949829103, "step": 127570 }, { "epoch": 0.22786666666666666, "grad_norm": 0.18819984793663025, "learning_rate": 7.998441633340763e-05, "loss": 3.1306888580322267, "step": 127580 }, { "epoch": 0.22793333333333332, "grad_norm": 0.19631487131118774, "learning_rate": 7.997559452344e-05, "loss": 3.1131567001342773, "step": 127590 }, { "epoch": 0.228, "grad_norm": 0.18057169020175934, "learning_rate": 7.996677125654531e-05, "loss": 3.120519256591797, "step": 127600 }, { "epoch": 0.22806666666666667, "grad_norm": 0.1737239509820938, "learning_rate": 7.995794653315233e-05, "loss": 3.1177806854248047, "step": 127610 }, { "epoch": 0.22813333333333333, "grad_norm": 0.1958797425031662, "learning_rate": 7.994912035369004e-05, "loss": 3.112110137939453, "step": 127620 }, { "epoch": 0.2282, "grad_norm": 0.18881909549236298, "learning_rate": 7.994029271858739e-05, "loss": 3.1974466323852537, "step": 127630 }, { "epoch": 0.22826666666666667, "grad_norm": 0.3220820724964142, "learning_rate": 7.993146362827345e-05, "loss": 3.236745071411133, "step": 127640 }, { "epoch": 0.22833333333333333, "grad_norm": 0.20372609794139862, "learning_rate": 7.992263308317734e-05, "loss": 3.153462600708008, "step": 127650 }, { "epoch": 0.2284, "grad_norm": 0.2002248764038086, "learning_rate": 7.991380108372826e-05, "loss": 3.0276737213134766, "step": 127660 }, { "epoch": 0.22846666666666668, "grad_norm": 0.1840684413909912, "learning_rate": 7.990496763035547e-05, "loss": 3.0927204132080077, "step": 127670 }, { "epoch": 0.22853333333333334, "grad_norm": 0.176760733127594, "learning_rate": 7.989613272348832e-05, "loss": 3.1549930572509766, "step": 127680 }, { "epoch": 0.2286, "grad_norm": 0.20377032458782196, "learning_rate": 7.988729636355622e-05, "loss": 3.1343000411987303, "step": 127690 }, { "epoch": 0.22866666666666666, "grad_norm": 0.1842782348394394, "learning_rate": 7.987845855098864e-05, "loss": 3.091546630859375, "step": 127700 }, { "epoch": 0.22873333333333334, "grad_norm": 0.19829946756362915, "learning_rate": 7.986961928621517e-05, "loss": 3.127602767944336, "step": 127710 }, { "epoch": 0.2288, "grad_norm": 0.19094783067703247, "learning_rate": 7.986077856966537e-05, "loss": 3.153526496887207, "step": 127720 }, { "epoch": 0.22886666666666666, "grad_norm": 0.19980305433273315, "learning_rate": 7.985193640176898e-05, "loss": 3.10943603515625, "step": 127730 }, { "epoch": 0.22893333333333332, "grad_norm": 0.18867281079292297, "learning_rate": 7.984309278295573e-05, "loss": 3.135318565368652, "step": 127740 }, { "epoch": 0.229, "grad_norm": 0.19454915821552277, "learning_rate": 7.983424771365548e-05, "loss": 3.1519025802612304, "step": 127750 }, { "epoch": 0.22906666666666667, "grad_norm": 0.20739524066448212, "learning_rate": 7.982540119429811e-05, "loss": 3.143718147277832, "step": 127760 }, { "epoch": 0.22913333333333333, "grad_norm": 0.20108094811439514, "learning_rate": 7.981655322531362e-05, "loss": 3.168092155456543, "step": 127770 }, { "epoch": 0.2292, "grad_norm": 0.21347425878047943, "learning_rate": 7.980770380713203e-05, "loss": 3.1022125244140626, "step": 127780 }, { "epoch": 0.22926666666666667, "grad_norm": 0.17257040739059448, "learning_rate": 7.979885294018349e-05, "loss": 3.063897705078125, "step": 127790 }, { "epoch": 0.22933333333333333, "grad_norm": 0.1998368352651596, "learning_rate": 7.979000062489814e-05, "loss": 3.1073474884033203, "step": 127800 }, { "epoch": 0.2294, "grad_norm": 0.18412069976329803, "learning_rate": 7.978114686170627e-05, "loss": 3.1341167449951173, "step": 127810 }, { "epoch": 0.22946666666666668, "grad_norm": 0.21461369097232819, "learning_rate": 7.97722916510382e-05, "loss": 3.077344512939453, "step": 127820 }, { "epoch": 0.22953333333333334, "grad_norm": 0.19368772208690643, "learning_rate": 7.976343499332431e-05, "loss": 3.131825065612793, "step": 127830 }, { "epoch": 0.2296, "grad_norm": 0.19293223321437836, "learning_rate": 7.975457688899507e-05, "loss": 3.1621246337890625, "step": 127840 }, { "epoch": 0.22966666666666666, "grad_norm": 0.19171568751335144, "learning_rate": 7.974571733848103e-05, "loss": 3.0725004196166994, "step": 127850 }, { "epoch": 0.22973333333333334, "grad_norm": 0.19997037947177887, "learning_rate": 7.97368563422128e-05, "loss": 3.1411794662475585, "step": 127860 }, { "epoch": 0.2298, "grad_norm": 0.20734265446662903, "learning_rate": 7.972799390062104e-05, "loss": 3.1545896530151367, "step": 127870 }, { "epoch": 0.22986666666666666, "grad_norm": 0.17838487029075623, "learning_rate": 7.971913001413653e-05, "loss": 3.1111661911010744, "step": 127880 }, { "epoch": 0.22993333333333332, "grad_norm": 0.2312740534543991, "learning_rate": 7.971026468319005e-05, "loss": 3.1658018112182615, "step": 127890 }, { "epoch": 0.23, "grad_norm": 0.1877313256263733, "learning_rate": 7.970139790821251e-05, "loss": 3.1159292221069337, "step": 127900 }, { "epoch": 0.23006666666666667, "grad_norm": 0.2628721296787262, "learning_rate": 7.969252968963488e-05, "loss": 3.168522071838379, "step": 127910 }, { "epoch": 0.23013333333333333, "grad_norm": 0.18705862760543823, "learning_rate": 7.968366002788817e-05, "loss": 3.14650936126709, "step": 127920 }, { "epoch": 0.2302, "grad_norm": 0.28435027599334717, "learning_rate": 7.967478892340345e-05, "loss": 3.11767635345459, "step": 127930 }, { "epoch": 0.23026666666666668, "grad_norm": 0.18831069767475128, "learning_rate": 7.966591637661197e-05, "loss": 3.139825439453125, "step": 127940 }, { "epoch": 0.23033333333333333, "grad_norm": 0.1858104169368744, "learning_rate": 7.965704238794489e-05, "loss": 3.0976686477661133, "step": 127950 }, { "epoch": 0.2304, "grad_norm": 0.21632622182369232, "learning_rate": 7.964816695783357e-05, "loss": 3.1246368408203127, "step": 127960 }, { "epoch": 0.23046666666666665, "grad_norm": 0.18731167912483215, "learning_rate": 7.963929008670935e-05, "loss": 3.1492137908935547, "step": 127970 }, { "epoch": 0.23053333333333334, "grad_norm": 0.26501014828681946, "learning_rate": 7.963041177500371e-05, "loss": 3.2328086853027345, "step": 127980 }, { "epoch": 0.2306, "grad_norm": 0.19290655851364136, "learning_rate": 7.962153202314815e-05, "loss": 3.0934799194335936, "step": 127990 }, { "epoch": 0.23066666666666666, "grad_norm": 0.22000642120838165, "learning_rate": 7.961265083157427e-05, "loss": 3.1559331893920897, "step": 128000 }, { "epoch": 0.23073333333333335, "grad_norm": 0.20008501410484314, "learning_rate": 7.960376820071375e-05, "loss": 3.186306953430176, "step": 128010 }, { "epoch": 0.2308, "grad_norm": 0.1764357089996338, "learning_rate": 7.959488413099827e-05, "loss": 3.137787628173828, "step": 128020 }, { "epoch": 0.23086666666666666, "grad_norm": 0.20149679481983185, "learning_rate": 7.958599862285968e-05, "loss": 3.1268993377685548, "step": 128030 }, { "epoch": 0.23093333333333332, "grad_norm": 0.21033979952335358, "learning_rate": 7.957711167672981e-05, "loss": 3.0925601959228515, "step": 128040 }, { "epoch": 0.231, "grad_norm": 0.19726897776126862, "learning_rate": 7.956822329304063e-05, "loss": 3.0884639739990236, "step": 128050 }, { "epoch": 0.23106666666666667, "grad_norm": 0.18488731980323792, "learning_rate": 7.955933347222412e-05, "loss": 3.1089498519897463, "step": 128060 }, { "epoch": 0.23113333333333333, "grad_norm": 0.25161120295524597, "learning_rate": 7.955044221471237e-05, "loss": 3.1103435516357423, "step": 128070 }, { "epoch": 0.2312, "grad_norm": 0.21152015030384064, "learning_rate": 7.954154952093755e-05, "loss": 3.092744255065918, "step": 128080 }, { "epoch": 0.23126666666666668, "grad_norm": 0.18958896398544312, "learning_rate": 7.953265539133185e-05, "loss": 3.112745475769043, "step": 128090 }, { "epoch": 0.23133333333333334, "grad_norm": 0.18906421959400177, "learning_rate": 7.952375982632757e-05, "loss": 3.09996337890625, "step": 128100 }, { "epoch": 0.2314, "grad_norm": 0.1809881031513214, "learning_rate": 7.951486282635706e-05, "loss": 3.0981712341308594, "step": 128110 }, { "epoch": 0.23146666666666665, "grad_norm": 0.22081880271434784, "learning_rate": 7.950596439185276e-05, "loss": 3.1840984344482424, "step": 128120 }, { "epoch": 0.23153333333333334, "grad_norm": 0.21068938076496124, "learning_rate": 7.949706452324717e-05, "loss": 3.187857818603516, "step": 128130 }, { "epoch": 0.2316, "grad_norm": 0.18273469805717468, "learning_rate": 7.948816322097284e-05, "loss": 3.678753662109375, "step": 128140 }, { "epoch": 0.23166666666666666, "grad_norm": 0.20825204253196716, "learning_rate": 7.947926048546241e-05, "loss": 3.2222129821777346, "step": 128150 }, { "epoch": 0.23173333333333335, "grad_norm": 0.20254966616630554, "learning_rate": 7.94703563171486e-05, "loss": 3.182545471191406, "step": 128160 }, { "epoch": 0.2318, "grad_norm": 0.4768636226654053, "learning_rate": 7.946145071646417e-05, "loss": 3.270386505126953, "step": 128170 }, { "epoch": 0.23186666666666667, "grad_norm": 0.543443500995636, "learning_rate": 7.945254368384199e-05, "loss": 3.1500410079956054, "step": 128180 }, { "epoch": 0.23193333333333332, "grad_norm": 0.19320379197597504, "learning_rate": 7.944363521971495e-05, "loss": 3.156943702697754, "step": 128190 }, { "epoch": 0.232, "grad_norm": 0.19049964845180511, "learning_rate": 7.943472532451605e-05, "loss": 3.1012773513793945, "step": 128200 }, { "epoch": 0.23206666666666667, "grad_norm": 0.2319997251033783, "learning_rate": 7.942581399867834e-05, "loss": 3.147959327697754, "step": 128210 }, { "epoch": 0.23213333333333333, "grad_norm": 0.19310809671878815, "learning_rate": 7.941690124263494e-05, "loss": 3.1521820068359374, "step": 128220 }, { "epoch": 0.2322, "grad_norm": 0.18664909899234772, "learning_rate": 7.940798705681905e-05, "loss": 3.151180076599121, "step": 128230 }, { "epoch": 0.23226666666666668, "grad_norm": 0.1910763680934906, "learning_rate": 7.939907144166392e-05, "loss": 3.116770553588867, "step": 128240 }, { "epoch": 0.23233333333333334, "grad_norm": 0.1799648106098175, "learning_rate": 7.93901543976029e-05, "loss": 3.1658811569213867, "step": 128250 }, { "epoch": 0.2324, "grad_norm": 0.2080504298210144, "learning_rate": 7.938123592506939e-05, "loss": 3.1260540008544924, "step": 128260 }, { "epoch": 0.23246666666666665, "grad_norm": 0.5630660057067871, "learning_rate": 7.937231602449687e-05, "loss": 3.227899169921875, "step": 128270 }, { "epoch": 0.23253333333333334, "grad_norm": 0.18608857691287994, "learning_rate": 7.936339469631883e-05, "loss": 3.2898700714111326, "step": 128280 }, { "epoch": 0.2326, "grad_norm": 0.21287183463573456, "learning_rate": 7.935447194096894e-05, "loss": 3.083957481384277, "step": 128290 }, { "epoch": 0.23266666666666666, "grad_norm": 0.19178377091884613, "learning_rate": 7.934554775888086e-05, "loss": 3.181930732727051, "step": 128300 }, { "epoch": 0.23273333333333332, "grad_norm": 0.20487910509109497, "learning_rate": 7.933662215048833e-05, "loss": 3.1052154541015624, "step": 128310 }, { "epoch": 0.2328, "grad_norm": 0.19209212064743042, "learning_rate": 7.932769511622518e-05, "loss": 3.1439044952392576, "step": 128320 }, { "epoch": 0.23286666666666667, "grad_norm": 0.2024378478527069, "learning_rate": 7.931876665652528e-05, "loss": 3.1220964431762694, "step": 128330 }, { "epoch": 0.23293333333333333, "grad_norm": 0.22158226370811462, "learning_rate": 7.93098367718226e-05, "loss": 3.163105010986328, "step": 128340 }, { "epoch": 0.233, "grad_norm": 0.17357364296913147, "learning_rate": 7.930090546255117e-05, "loss": 3.159689712524414, "step": 128350 }, { "epoch": 0.23306666666666667, "grad_norm": 0.19162680208683014, "learning_rate": 7.929197272914509e-05, "loss": 3.1277912139892576, "step": 128360 }, { "epoch": 0.23313333333333333, "grad_norm": 0.20223022997379303, "learning_rate": 7.92830385720385e-05, "loss": 3.098118019104004, "step": 128370 }, { "epoch": 0.2332, "grad_norm": 0.21542736887931824, "learning_rate": 7.927410299166567e-05, "loss": 3.084253692626953, "step": 128380 }, { "epoch": 0.23326666666666668, "grad_norm": 0.18070662021636963, "learning_rate": 7.926516598846086e-05, "loss": 3.135278511047363, "step": 128390 }, { "epoch": 0.23333333333333334, "grad_norm": 0.19037103652954102, "learning_rate": 7.925622756285846e-05, "loss": 3.0589010238647463, "step": 128400 }, { "epoch": 0.2334, "grad_norm": 0.20477844774723053, "learning_rate": 7.924728771529292e-05, "loss": 3.143839645385742, "step": 128410 }, { "epoch": 0.23346666666666666, "grad_norm": 0.1795760989189148, "learning_rate": 7.923834644619876e-05, "loss": 3.091497039794922, "step": 128420 }, { "epoch": 0.23353333333333334, "grad_norm": 0.9676828980445862, "learning_rate": 7.922940375601053e-05, "loss": 3.3424636840820314, "step": 128430 }, { "epoch": 0.2336, "grad_norm": 0.44083210825920105, "learning_rate": 7.922045964516292e-05, "loss": 3.4531749725341796, "step": 128440 }, { "epoch": 0.23366666666666666, "grad_norm": 0.21383243799209595, "learning_rate": 7.921151411409058e-05, "loss": 3.1720956802368163, "step": 128450 }, { "epoch": 0.23373333333333332, "grad_norm": 0.22341835498809814, "learning_rate": 7.920256716322836e-05, "loss": 3.1318691253662108, "step": 128460 }, { "epoch": 0.2338, "grad_norm": 0.2647055387496948, "learning_rate": 7.919361879301109e-05, "loss": 3.1085229873657227, "step": 128470 }, { "epoch": 0.23386666666666667, "grad_norm": 0.19642651081085205, "learning_rate": 7.918466900387371e-05, "loss": 3.085905838012695, "step": 128480 }, { "epoch": 0.23393333333333333, "grad_norm": 0.24700625240802765, "learning_rate": 7.917571779625118e-05, "loss": 3.155970001220703, "step": 128490 }, { "epoch": 0.234, "grad_norm": 0.20120012760162354, "learning_rate": 7.91667651705786e-05, "loss": 3.146698760986328, "step": 128500 }, { "epoch": 0.23406666666666667, "grad_norm": 0.22334331274032593, "learning_rate": 7.915781112729108e-05, "loss": 3.0757896423339846, "step": 128510 }, { "epoch": 0.23413333333333333, "grad_norm": 0.17361505329608917, "learning_rate": 7.914885566682382e-05, "loss": 3.132646179199219, "step": 128520 }, { "epoch": 0.2342, "grad_norm": 0.19003774225711823, "learning_rate": 7.91398987896121e-05, "loss": 3.1022060394287108, "step": 128530 }, { "epoch": 0.23426666666666668, "grad_norm": 0.1788550317287445, "learning_rate": 7.913094049609124e-05, "loss": 3.1864967346191406, "step": 128540 }, { "epoch": 0.23433333333333334, "grad_norm": 0.25168684124946594, "learning_rate": 7.912198078669667e-05, "loss": 3.1767982482910155, "step": 128550 }, { "epoch": 0.2344, "grad_norm": 0.22256514430046082, "learning_rate": 7.911301966186385e-05, "loss": 3.0837120056152343, "step": 128560 }, { "epoch": 0.23446666666666666, "grad_norm": 0.18927450478076935, "learning_rate": 7.910405712202833e-05, "loss": 3.122620391845703, "step": 128570 }, { "epoch": 0.23453333333333334, "grad_norm": 0.18320134282112122, "learning_rate": 7.909509316762573e-05, "loss": 3.1207984924316405, "step": 128580 }, { "epoch": 0.2346, "grad_norm": 0.19305957853794098, "learning_rate": 7.908612779909172e-05, "loss": 3.2088775634765625, "step": 128590 }, { "epoch": 0.23466666666666666, "grad_norm": 0.19854436814785004, "learning_rate": 7.907716101686206e-05, "loss": 3.1859672546386717, "step": 128600 }, { "epoch": 0.23473333333333332, "grad_norm": 0.1907212883234024, "learning_rate": 7.906819282137257e-05, "loss": 3.1320404052734374, "step": 128610 }, { "epoch": 0.2348, "grad_norm": 0.1956409215927124, "learning_rate": 7.905922321305912e-05, "loss": 3.0586456298828124, "step": 128620 }, { "epoch": 0.23486666666666667, "grad_norm": 0.1918478012084961, "learning_rate": 7.905025219235769e-05, "loss": 3.118429183959961, "step": 128630 }, { "epoch": 0.23493333333333333, "grad_norm": 0.21495753526687622, "learning_rate": 7.90412797597043e-05, "loss": 3.094618225097656, "step": 128640 }, { "epoch": 0.235, "grad_norm": 0.21450714766979218, "learning_rate": 7.903230591553504e-05, "loss": 3.0937116622924803, "step": 128650 }, { "epoch": 0.23506666666666667, "grad_norm": 0.1983068734407425, "learning_rate": 7.902333066028605e-05, "loss": 3.3576480865478517, "step": 128660 }, { "epoch": 0.23513333333333333, "grad_norm": 0.2218797355890274, "learning_rate": 7.90143539943936e-05, "loss": 3.1102523803710938, "step": 128670 }, { "epoch": 0.2352, "grad_norm": 0.20208746194839478, "learning_rate": 7.900537591829398e-05, "loss": 3.14366455078125, "step": 128680 }, { "epoch": 0.23526666666666668, "grad_norm": 0.22155267000198364, "learning_rate": 7.899639643242355e-05, "loss": 3.089147186279297, "step": 128690 }, { "epoch": 0.23533333333333334, "grad_norm": 0.2185213416814804, "learning_rate": 7.898741553721874e-05, "loss": 3.124662399291992, "step": 128700 }, { "epoch": 0.2354, "grad_norm": 0.18035852909088135, "learning_rate": 7.897843323311606e-05, "loss": 3.115731620788574, "step": 128710 }, { "epoch": 0.23546666666666666, "grad_norm": 0.18147562444210052, "learning_rate": 7.89694495205521e-05, "loss": 3.093244743347168, "step": 128720 }, { "epoch": 0.23553333333333334, "grad_norm": 0.2109653353691101, "learning_rate": 7.89604643999635e-05, "loss": 3.1504741668701173, "step": 128730 }, { "epoch": 0.2356, "grad_norm": 0.18552541732788086, "learning_rate": 7.895147787178694e-05, "loss": 3.123581314086914, "step": 128740 }, { "epoch": 0.23566666666666666, "grad_norm": 0.20104867219924927, "learning_rate": 7.894248993645923e-05, "loss": 3.143523597717285, "step": 128750 }, { "epoch": 0.23573333333333332, "grad_norm": 0.1965888887643814, "learning_rate": 7.893350059441722e-05, "loss": 3.1141162872314454, "step": 128760 }, { "epoch": 0.2358, "grad_norm": 0.19214174151420593, "learning_rate": 7.89245098460978e-05, "loss": 3.120587921142578, "step": 128770 }, { "epoch": 0.23586666666666667, "grad_norm": 0.19033685326576233, "learning_rate": 7.891551769193797e-05, "loss": 3.0819751739501955, "step": 128780 }, { "epoch": 0.23593333333333333, "grad_norm": 0.23561891913414001, "learning_rate": 7.890652413237478e-05, "loss": 3.0887107849121094, "step": 128790 }, { "epoch": 0.236, "grad_norm": 0.18494555354118347, "learning_rate": 7.889752916784537e-05, "loss": 3.109671974182129, "step": 128800 }, { "epoch": 0.23606666666666667, "grad_norm": 0.1869208961725235, "learning_rate": 7.888853279878688e-05, "loss": 3.0832395553588867, "step": 128810 }, { "epoch": 0.23613333333333333, "grad_norm": 0.2025923728942871, "learning_rate": 7.887953502563662e-05, "loss": 3.158454704284668, "step": 128820 }, { "epoch": 0.2362, "grad_norm": 0.2051381766796112, "learning_rate": 7.887053584883188e-05, "loss": 3.158624267578125, "step": 128830 }, { "epoch": 0.23626666666666668, "grad_norm": 0.19580161571502686, "learning_rate": 7.88615352688101e-05, "loss": 3.144570159912109, "step": 128840 }, { "epoch": 0.23633333333333334, "grad_norm": 0.2834929823875427, "learning_rate": 7.885253328600869e-05, "loss": 3.144870567321777, "step": 128850 }, { "epoch": 0.2364, "grad_norm": 0.19416211545467377, "learning_rate": 7.884352990086522e-05, "loss": 3.109302520751953, "step": 128860 }, { "epoch": 0.23646666666666666, "grad_norm": 0.18590520322322845, "learning_rate": 7.883452511381724e-05, "loss": 3.100794219970703, "step": 128870 }, { "epoch": 0.23653333333333335, "grad_norm": 0.21596644818782806, "learning_rate": 7.882551892530246e-05, "loss": 3.098802375793457, "step": 128880 }, { "epoch": 0.2366, "grad_norm": 0.19624997675418854, "learning_rate": 7.881651133575859e-05, "loss": 3.1792575836181642, "step": 128890 }, { "epoch": 0.23666666666666666, "grad_norm": 0.1949058175086975, "learning_rate": 7.880750234562345e-05, "loss": 3.1532314300537108, "step": 128900 }, { "epoch": 0.23673333333333332, "grad_norm": 0.34584054350852966, "learning_rate": 7.879849195533491e-05, "loss": 3.1642885208129883, "step": 128910 }, { "epoch": 0.2368, "grad_norm": 0.19392672181129456, "learning_rate": 7.878948016533091e-05, "loss": 3.0955669403076174, "step": 128920 }, { "epoch": 0.23686666666666667, "grad_norm": 0.20051176846027374, "learning_rate": 7.878046697604944e-05, "loss": 3.1395362854003905, "step": 128930 }, { "epoch": 0.23693333333333333, "grad_norm": 0.20705413818359375, "learning_rate": 7.87714523879286e-05, "loss": 3.1457422256469725, "step": 128940 }, { "epoch": 0.237, "grad_norm": 0.1851872205734253, "learning_rate": 7.87624364014065e-05, "loss": 3.1650554656982424, "step": 128950 }, { "epoch": 0.23706666666666668, "grad_norm": 0.20501430332660675, "learning_rate": 7.875341901692138e-05, "loss": 3.1250295639038086, "step": 128960 }, { "epoch": 0.23713333333333333, "grad_norm": 0.19467218220233917, "learning_rate": 7.874440023491151e-05, "loss": 3.0969621658325197, "step": 128970 }, { "epoch": 0.2372, "grad_norm": 0.1930338442325592, "learning_rate": 7.873538005581524e-05, "loss": 3.0705282211303713, "step": 128980 }, { "epoch": 0.23726666666666665, "grad_norm": 0.18512633442878723, "learning_rate": 7.872635848007098e-05, "loss": 3.124013900756836, "step": 128990 }, { "epoch": 0.23733333333333334, "grad_norm": 0.1947951465845108, "learning_rate": 7.87173355081172e-05, "loss": 3.209329605102539, "step": 129000 }, { "epoch": 0.2374, "grad_norm": 0.21121875941753387, "learning_rate": 7.870831114039248e-05, "loss": 3.1321413040161135, "step": 129010 }, { "epoch": 0.23746666666666666, "grad_norm": 0.19686919450759888, "learning_rate": 7.869928537733542e-05, "loss": 3.113859176635742, "step": 129020 }, { "epoch": 0.23753333333333335, "grad_norm": 0.205851212143898, "learning_rate": 7.86902582193847e-05, "loss": 3.109118843078613, "step": 129030 }, { "epoch": 0.2376, "grad_norm": 0.1994025558233261, "learning_rate": 7.868122966697908e-05, "loss": 3.1023271560668944, "step": 129040 }, { "epoch": 0.23766666666666666, "grad_norm": 0.20446105301380157, "learning_rate": 7.867219972055739e-05, "loss": 5.2427978515625, "step": 129050 }, { "epoch": 0.23773333333333332, "grad_norm": 0.22388149797916412, "learning_rate": 7.866316838055852e-05, "loss": 3.075715255737305, "step": 129060 }, { "epoch": 0.2378, "grad_norm": 0.1884075105190277, "learning_rate": 7.86541356474214e-05, "loss": 3.1289228439331054, "step": 129070 }, { "epoch": 0.23786666666666667, "grad_norm": 0.19466781616210938, "learning_rate": 7.86451015215851e-05, "loss": 3.138962173461914, "step": 129080 }, { "epoch": 0.23793333333333333, "grad_norm": 0.1797778308391571, "learning_rate": 7.863606600348868e-05, "loss": 3.1027219772338865, "step": 129090 }, { "epoch": 0.238, "grad_norm": 0.19058558344841003, "learning_rate": 7.862702909357132e-05, "loss": 3.1238868713378904, "step": 129100 }, { "epoch": 0.23806666666666668, "grad_norm": 0.2386757880449295, "learning_rate": 7.861799079227221e-05, "loss": 3.1892009735107423, "step": 129110 }, { "epoch": 0.23813333333333334, "grad_norm": 0.2122185230255127, "learning_rate": 7.860895110003069e-05, "loss": 3.074086570739746, "step": 129120 }, { "epoch": 0.2382, "grad_norm": 0.3677828907966614, "learning_rate": 7.85999100172861e-05, "loss": 3.1142709732055662, "step": 129130 }, { "epoch": 0.23826666666666665, "grad_norm": 0.42794859409332275, "learning_rate": 7.859086754447788e-05, "loss": 3.414458465576172, "step": 129140 }, { "epoch": 0.23833333333333334, "grad_norm": 0.18192049860954285, "learning_rate": 7.858182368204551e-05, "loss": 3.0812376022338865, "step": 129150 }, { "epoch": 0.2384, "grad_norm": 0.20953388512134552, "learning_rate": 7.85727784304286e-05, "loss": 3.158494567871094, "step": 129160 }, { "epoch": 0.23846666666666666, "grad_norm": 0.204961895942688, "learning_rate": 7.856373179006674e-05, "loss": 3.158390426635742, "step": 129170 }, { "epoch": 0.23853333333333335, "grad_norm": 0.19244657456874847, "learning_rate": 7.855468376139965e-05, "loss": 3.1106426239013674, "step": 129180 }, { "epoch": 0.2386, "grad_norm": 0.19005396962165833, "learning_rate": 7.854563434486708e-05, "loss": 3.06595516204834, "step": 129190 }, { "epoch": 0.23866666666666667, "grad_norm": 0.19923843443393707, "learning_rate": 7.853658354090888e-05, "loss": 3.1383945465087892, "step": 129200 }, { "epoch": 0.23873333333333333, "grad_norm": 0.1934855729341507, "learning_rate": 7.852753134996495e-05, "loss": 3.1900081634521484, "step": 129210 }, { "epoch": 0.2388, "grad_norm": 0.19148525595664978, "learning_rate": 7.851847777247528e-05, "loss": 3.0929357528686525, "step": 129220 }, { "epoch": 0.23886666666666667, "grad_norm": 0.18901968002319336, "learning_rate": 7.850942280887987e-05, "loss": 3.1271202087402346, "step": 129230 }, { "epoch": 0.23893333333333333, "grad_norm": 0.19676053524017334, "learning_rate": 7.850036645961887e-05, "loss": 3.1170866012573244, "step": 129240 }, { "epoch": 0.239, "grad_norm": 0.20981627702713013, "learning_rate": 7.849130872513242e-05, "loss": 3.1327390670776367, "step": 129250 }, { "epoch": 0.23906666666666668, "grad_norm": 0.18850399553775787, "learning_rate": 7.848224960586076e-05, "loss": 3.0972537994384766, "step": 129260 }, { "epoch": 0.23913333333333334, "grad_norm": 0.20540854334831238, "learning_rate": 7.847318910224422e-05, "loss": 3.1312305450439455, "step": 129270 }, { "epoch": 0.2392, "grad_norm": 0.2058604508638382, "learning_rate": 7.846412721472314e-05, "loss": 3.10922908782959, "step": 129280 }, { "epoch": 0.23926666666666666, "grad_norm": 0.18611253798007965, "learning_rate": 7.8455063943738e-05, "loss": 3.1126567840576174, "step": 129290 }, { "epoch": 0.23933333333333334, "grad_norm": 0.20237497985363007, "learning_rate": 7.844599928972928e-05, "loss": 3.096471405029297, "step": 129300 }, { "epoch": 0.2394, "grad_norm": 0.19229869544506073, "learning_rate": 7.843693325313756e-05, "loss": 3.1175567626953127, "step": 129310 }, { "epoch": 0.23946666666666666, "grad_norm": 0.1991586536169052, "learning_rate": 7.842786583440354e-05, "loss": 3.1207754135131838, "step": 129320 }, { "epoch": 0.23953333333333332, "grad_norm": 0.27051272988319397, "learning_rate": 7.841879703396784e-05, "loss": 3.157331085205078, "step": 129330 }, { "epoch": 0.2396, "grad_norm": 0.2405741959810257, "learning_rate": 7.84097268522713e-05, "loss": 3.0732526779174805, "step": 129340 }, { "epoch": 0.23966666666666667, "grad_norm": 0.18476566672325134, "learning_rate": 7.840065528975473e-05, "loss": 3.1221994400024413, "step": 129350 }, { "epoch": 0.23973333333333333, "grad_norm": 0.20881636440753937, "learning_rate": 7.839158234685908e-05, "loss": 3.1387786865234375, "step": 129360 }, { "epoch": 0.2398, "grad_norm": 0.20852269232273102, "learning_rate": 7.83825080240253e-05, "loss": 3.1423675537109377, "step": 129370 }, { "epoch": 0.23986666666666667, "grad_norm": 0.19812458753585815, "learning_rate": 7.837343232169442e-05, "loss": 3.1923154830932616, "step": 129380 }, { "epoch": 0.23993333333333333, "grad_norm": 0.3055170178413391, "learning_rate": 7.836435524030761e-05, "loss": 3.1126129150390627, "step": 129390 }, { "epoch": 0.24, "grad_norm": 0.23380047082901, "learning_rate": 7.835527678030601e-05, "loss": 3.116643714904785, "step": 129400 }, { "epoch": 0.24006666666666668, "grad_norm": 0.21099914610385895, "learning_rate": 7.834619694213087e-05, "loss": 3.0845285415649415, "step": 129410 }, { "epoch": 0.24013333333333334, "grad_norm": 0.19518707692623138, "learning_rate": 7.83371157262235e-05, "loss": 3.090993118286133, "step": 129420 }, { "epoch": 0.2402, "grad_norm": 0.19565246999263763, "learning_rate": 7.832803313302531e-05, "loss": 3.1683399200439455, "step": 129430 }, { "epoch": 0.24026666666666666, "grad_norm": 0.1887797713279724, "learning_rate": 7.831894916297771e-05, "loss": 3.1070404052734375, "step": 129440 }, { "epoch": 0.24033333333333334, "grad_norm": 0.18403683602809906, "learning_rate": 7.830986381652226e-05, "loss": 3.0849870681762694, "step": 129450 }, { "epoch": 0.2404, "grad_norm": 0.19134047627449036, "learning_rate": 7.83007770941005e-05, "loss": 3.1591363906860352, "step": 129460 }, { "epoch": 0.24046666666666666, "grad_norm": 0.18047071993350983, "learning_rate": 7.829168899615409e-05, "loss": 3.101612854003906, "step": 129470 }, { "epoch": 0.24053333333333332, "grad_norm": 0.21558399498462677, "learning_rate": 7.828259952312477e-05, "loss": 3.1141742706298827, "step": 129480 }, { "epoch": 0.2406, "grad_norm": 0.2004968374967575, "learning_rate": 7.827350867545428e-05, "loss": 3.0907142639160154, "step": 129490 }, { "epoch": 0.24066666666666667, "grad_norm": 0.22857148945331573, "learning_rate": 7.826441645358452e-05, "loss": 3.070152473449707, "step": 129500 }, { "epoch": 0.24073333333333333, "grad_norm": 0.2017117589712143, "learning_rate": 7.825532285795737e-05, "loss": 3.09793701171875, "step": 129510 }, { "epoch": 0.2408, "grad_norm": 0.18654818832874298, "learning_rate": 7.824622788901482e-05, "loss": 3.1623268127441406, "step": 129520 }, { "epoch": 0.24086666666666667, "grad_norm": 0.19253438711166382, "learning_rate": 7.823713154719893e-05, "loss": 3.1436981201171874, "step": 129530 }, { "epoch": 0.24093333333333333, "grad_norm": 0.20858557522296906, "learning_rate": 7.82280338329518e-05, "loss": 3.106964874267578, "step": 129540 }, { "epoch": 0.241, "grad_norm": 0.2097966969013214, "learning_rate": 7.821893474671563e-05, "loss": 3.0792562484741213, "step": 129550 }, { "epoch": 0.24106666666666668, "grad_norm": 0.19747111201286316, "learning_rate": 7.820983428893267e-05, "loss": 3.07620792388916, "step": 129560 }, { "epoch": 0.24113333333333334, "grad_norm": 0.20009911060333252, "learning_rate": 7.820073246004523e-05, "loss": 3.1056539535522463, "step": 129570 }, { "epoch": 0.2412, "grad_norm": 0.23199118673801422, "learning_rate": 7.819162926049568e-05, "loss": 3.1885473251342775, "step": 129580 }, { "epoch": 0.24126666666666666, "grad_norm": 0.276076078414917, "learning_rate": 7.818252469072649e-05, "loss": 3.1550481796264647, "step": 129590 }, { "epoch": 0.24133333333333334, "grad_norm": 0.2018890529870987, "learning_rate": 7.817341875118016e-05, "loss": 3.110218620300293, "step": 129600 }, { "epoch": 0.2414, "grad_norm": 0.23187041282653809, "learning_rate": 7.81643114422993e-05, "loss": 3.130044937133789, "step": 129610 }, { "epoch": 0.24146666666666666, "grad_norm": 0.19122877717018127, "learning_rate": 7.815520276452652e-05, "loss": 3.1263723373413086, "step": 129620 }, { "epoch": 0.24153333333333332, "grad_norm": 0.203164204955101, "learning_rate": 7.814609271830457e-05, "loss": 3.1002439498901366, "step": 129630 }, { "epoch": 0.2416, "grad_norm": 0.2009083330631256, "learning_rate": 7.813698130407623e-05, "loss": 3.1223194122314455, "step": 129640 }, { "epoch": 0.24166666666666667, "grad_norm": 0.21814198791980743, "learning_rate": 7.812786852228433e-05, "loss": 3.0749860763549806, "step": 129650 }, { "epoch": 0.24173333333333333, "grad_norm": 0.20039315521717072, "learning_rate": 7.81187543733718e-05, "loss": 3.0212690353393556, "step": 129660 }, { "epoch": 0.2418, "grad_norm": 0.19228285551071167, "learning_rate": 7.810963885778162e-05, "loss": 3.116402435302734, "step": 129670 }, { "epoch": 0.24186666666666667, "grad_norm": 0.183794766664505, "learning_rate": 7.810052197595683e-05, "loss": 3.1070959091186525, "step": 129680 }, { "epoch": 0.24193333333333333, "grad_norm": 0.1852482408285141, "learning_rate": 7.809140372834054e-05, "loss": 3.0976541519165037, "step": 129690 }, { "epoch": 0.242, "grad_norm": 0.2469191700220108, "learning_rate": 7.808228411537596e-05, "loss": 3.1224361419677735, "step": 129700 }, { "epoch": 0.24206666666666668, "grad_norm": 0.197658970952034, "learning_rate": 7.807316313750631e-05, "loss": 3.1251630783081055, "step": 129710 }, { "epoch": 0.24213333333333334, "grad_norm": 0.1856827288866043, "learning_rate": 7.806404079517493e-05, "loss": 3.130922317504883, "step": 129720 }, { "epoch": 0.2422, "grad_norm": 0.21898658573627472, "learning_rate": 7.805491708882516e-05, "loss": 3.111405372619629, "step": 129730 }, { "epoch": 0.24226666666666666, "grad_norm": 0.21423667669296265, "learning_rate": 7.804579201890049e-05, "loss": 3.2154579162597656, "step": 129740 }, { "epoch": 0.24233333333333335, "grad_norm": 0.19157442450523376, "learning_rate": 7.80366655858444e-05, "loss": 3.130908966064453, "step": 129750 }, { "epoch": 0.2424, "grad_norm": 0.2176418900489807, "learning_rate": 7.802753779010049e-05, "loss": 3.1033584594726564, "step": 129760 }, { "epoch": 0.24246666666666666, "grad_norm": 0.18478159606456757, "learning_rate": 7.80184086321124e-05, "loss": 3.1766887664794923, "step": 129770 }, { "epoch": 0.24253333333333332, "grad_norm": 0.32628607749938965, "learning_rate": 7.800927811232384e-05, "loss": 3.1245609283447267, "step": 129780 }, { "epoch": 0.2426, "grad_norm": 0.20412087440490723, "learning_rate": 7.800014623117857e-05, "loss": 3.1022762298583983, "step": 129790 }, { "epoch": 0.24266666666666667, "grad_norm": 0.19191405177116394, "learning_rate": 7.799101298912046e-05, "loss": 3.101492500305176, "step": 129800 }, { "epoch": 0.24273333333333333, "grad_norm": 0.22093628346920013, "learning_rate": 7.798187838659343e-05, "loss": 3.1290258407592773, "step": 129810 }, { "epoch": 0.2428, "grad_norm": 0.1969684660434723, "learning_rate": 7.797274242404143e-05, "loss": 3.1243206024169923, "step": 129820 }, { "epoch": 0.24286666666666668, "grad_norm": 0.19538657367229462, "learning_rate": 7.796360510190849e-05, "loss": 3.150746154785156, "step": 129830 }, { "epoch": 0.24293333333333333, "grad_norm": 0.19428271055221558, "learning_rate": 7.795446642063874e-05, "loss": 3.127995491027832, "step": 129840 }, { "epoch": 0.243, "grad_norm": 0.18956232070922852, "learning_rate": 7.794532638067638e-05, "loss": 3.1072269439697267, "step": 129850 }, { "epoch": 0.24306666666666665, "grad_norm": 0.28544241189956665, "learning_rate": 7.79361849824656e-05, "loss": 3.1311420440673827, "step": 129860 }, { "epoch": 0.24313333333333334, "grad_norm": 0.22687481343746185, "learning_rate": 7.792704222645073e-05, "loss": 3.1950393676757813, "step": 129870 }, { "epoch": 0.2432, "grad_norm": 0.1839340627193451, "learning_rate": 7.791789811307614e-05, "loss": 3.12241153717041, "step": 129880 }, { "epoch": 0.24326666666666666, "grad_norm": 0.19680246710777283, "learning_rate": 7.79087526427863e-05, "loss": 3.155438041687012, "step": 129890 }, { "epoch": 0.24333333333333335, "grad_norm": 0.21327312290668488, "learning_rate": 7.789960581602566e-05, "loss": 3.0595401763916015, "step": 129900 }, { "epoch": 0.2434, "grad_norm": 0.19874624907970428, "learning_rate": 7.789045763323881e-05, "loss": 3.1211368560791017, "step": 129910 }, { "epoch": 0.24346666666666666, "grad_norm": 0.2048627883195877, "learning_rate": 7.78813080948704e-05, "loss": 3.0904514312744142, "step": 129920 }, { "epoch": 0.24353333333333332, "grad_norm": 0.19842354953289032, "learning_rate": 7.787215720136513e-05, "loss": 3.1289796829223633, "step": 129930 }, { "epoch": 0.2436, "grad_norm": 0.43757450580596924, "learning_rate": 7.786300495316776e-05, "loss": 3.168450927734375, "step": 129940 }, { "epoch": 0.24366666666666667, "grad_norm": 0.19777542352676392, "learning_rate": 7.785385135072312e-05, "loss": 3.1608152389526367, "step": 129950 }, { "epoch": 0.24373333333333333, "grad_norm": 0.29431554675102234, "learning_rate": 7.78446963944761e-05, "loss": 3.1140233993530275, "step": 129960 }, { "epoch": 0.2438, "grad_norm": 0.22406014800071716, "learning_rate": 7.783554008487171e-05, "loss": 3.0903446197509767, "step": 129970 }, { "epoch": 0.24386666666666668, "grad_norm": 0.19644206762313843, "learning_rate": 7.782638242235493e-05, "loss": 3.1190515518188477, "step": 129980 }, { "epoch": 0.24393333333333334, "grad_norm": 0.20410388708114624, "learning_rate": 7.781722340737088e-05, "loss": 3.110364532470703, "step": 129990 }, { "epoch": 0.244, "grad_norm": 0.1865970641374588, "learning_rate": 7.780806304036471e-05, "loss": 3.0826671600341795, "step": 130000 }, { "epoch": 0.24406666666666665, "grad_norm": 0.19349081814289093, "learning_rate": 7.77989013217817e-05, "loss": 3.123083305358887, "step": 130010 }, { "epoch": 0.24413333333333334, "grad_norm": 0.18096202611923218, "learning_rate": 7.778973825206707e-05, "loss": 3.179860305786133, "step": 130020 }, { "epoch": 0.2442, "grad_norm": 0.35122621059417725, "learning_rate": 7.778057383166622e-05, "loss": 3.0763593673706056, "step": 130030 }, { "epoch": 0.24426666666666666, "grad_norm": 0.20089606940746307, "learning_rate": 7.777140806102457e-05, "loss": 3.1384885787963865, "step": 130040 }, { "epoch": 0.24433333333333335, "grad_norm": 0.2377898395061493, "learning_rate": 7.776224094058762e-05, "loss": 3.161794662475586, "step": 130050 }, { "epoch": 0.2444, "grad_norm": 0.20747947692871094, "learning_rate": 7.775307247080091e-05, "loss": 3.1101661682128907, "step": 130060 }, { "epoch": 0.24446666666666667, "grad_norm": 0.21462030708789825, "learning_rate": 7.774390265211007e-05, "loss": 3.09183349609375, "step": 130070 }, { "epoch": 0.24453333333333332, "grad_norm": 0.1807118058204651, "learning_rate": 7.773473148496078e-05, "loss": 3.067582702636719, "step": 130080 }, { "epoch": 0.2446, "grad_norm": 0.2122848480939865, "learning_rate": 7.772555896979881e-05, "loss": 3.1308338165283205, "step": 130090 }, { "epoch": 0.24466666666666667, "grad_norm": 0.20097436010837555, "learning_rate": 7.771638510706996e-05, "loss": 3.174765396118164, "step": 130100 }, { "epoch": 0.24473333333333333, "grad_norm": 0.2760918438434601, "learning_rate": 7.770720989722014e-05, "loss": 3.119975280761719, "step": 130110 }, { "epoch": 0.2448, "grad_norm": 0.20985813438892365, "learning_rate": 7.769803334069525e-05, "loss": 3.126809310913086, "step": 130120 }, { "epoch": 0.24486666666666668, "grad_norm": 0.2089572548866272, "learning_rate": 7.768885543794138e-05, "loss": 3.0515417098999023, "step": 130130 }, { "epoch": 0.24493333333333334, "grad_norm": 0.2083936333656311, "learning_rate": 7.767967618940454e-05, "loss": 3.088283920288086, "step": 130140 }, { "epoch": 0.245, "grad_norm": 0.18939723074436188, "learning_rate": 7.767049559553093e-05, "loss": 3.129694175720215, "step": 130150 }, { "epoch": 0.24506666666666665, "grad_norm": 0.19185718894004822, "learning_rate": 7.766131365676671e-05, "loss": 3.0896556854248045, "step": 130160 }, { "epoch": 0.24513333333333334, "grad_norm": 0.27521276473999023, "learning_rate": 7.76521303735582e-05, "loss": 3.0616025924682617, "step": 130170 }, { "epoch": 0.2452, "grad_norm": 0.18886210024356842, "learning_rate": 7.764294574635172e-05, "loss": 3.105845260620117, "step": 130180 }, { "epoch": 0.24526666666666666, "grad_norm": 0.20202931761741638, "learning_rate": 7.763375977559368e-05, "loss": 3.1290849685668944, "step": 130190 }, { "epoch": 0.24533333333333332, "grad_norm": 0.19320796430110931, "learning_rate": 7.762457246173055e-05, "loss": 3.0866527557373047, "step": 130200 }, { "epoch": 0.2454, "grad_norm": 0.19791024923324585, "learning_rate": 7.76153838052089e-05, "loss": 3.1412517547607424, "step": 130210 }, { "epoch": 0.24546666666666667, "grad_norm": 0.20490702986717224, "learning_rate": 7.760619380647529e-05, "loss": 3.106547546386719, "step": 130220 }, { "epoch": 0.24553333333333333, "grad_norm": 0.20612628757953644, "learning_rate": 7.759700246597642e-05, "loss": 3.105611801147461, "step": 130230 }, { "epoch": 0.2456, "grad_norm": 0.18293292820453644, "learning_rate": 7.7587809784159e-05, "loss": 3.144797706604004, "step": 130240 }, { "epoch": 0.24566666666666667, "grad_norm": 0.20494122803211212, "learning_rate": 7.757861576146984e-05, "loss": 3.10308723449707, "step": 130250 }, { "epoch": 0.24573333333333333, "grad_norm": 0.6598485708236694, "learning_rate": 7.75694203983558e-05, "loss": 3.090372085571289, "step": 130260 }, { "epoch": 0.2458, "grad_norm": 0.21450838446617126, "learning_rate": 7.756022369526383e-05, "loss": 3.0993701934814455, "step": 130270 }, { "epoch": 0.24586666666666668, "grad_norm": 0.20600757002830505, "learning_rate": 7.755102565264089e-05, "loss": 3.136579895019531, "step": 130280 }, { "epoch": 0.24593333333333334, "grad_norm": 0.19058267772197723, "learning_rate": 7.754182627093407e-05, "loss": 3.1025758743286134, "step": 130290 }, { "epoch": 0.246, "grad_norm": 0.3747956454753876, "learning_rate": 7.753262555059048e-05, "loss": 3.193290138244629, "step": 130300 }, { "epoch": 0.24606666666666666, "grad_norm": 0.20493488013744354, "learning_rate": 7.752342349205731e-05, "loss": 3.07657527923584, "step": 130310 }, { "epoch": 0.24613333333333334, "grad_norm": 0.2656843066215515, "learning_rate": 7.751422009578181e-05, "loss": 3.1245546340942383, "step": 130320 }, { "epoch": 0.2462, "grad_norm": 0.24261479079723358, "learning_rate": 7.750501536221131e-05, "loss": 3.0969030380249025, "step": 130330 }, { "epoch": 0.24626666666666666, "grad_norm": 0.2081269770860672, "learning_rate": 7.74958092917932e-05, "loss": 3.054640197753906, "step": 130340 }, { "epoch": 0.24633333333333332, "grad_norm": 0.19526933133602142, "learning_rate": 7.748660188497492e-05, "loss": 3.1528013229370115, "step": 130350 }, { "epoch": 0.2464, "grad_norm": 0.20064017176628113, "learning_rate": 7.747739314220396e-05, "loss": 3.0920642852783202, "step": 130360 }, { "epoch": 0.24646666666666667, "grad_norm": 0.44463515281677246, "learning_rate": 7.746818306392796e-05, "loss": 3.0486770629882813, "step": 130370 }, { "epoch": 0.24653333333333333, "grad_norm": 0.3214130103588104, "learning_rate": 7.745897165059451e-05, "loss": 3.246694564819336, "step": 130380 }, { "epoch": 0.2466, "grad_norm": 0.1958009898662567, "learning_rate": 7.744975890265134e-05, "loss": 3.160366249084473, "step": 130390 }, { "epoch": 0.24666666666666667, "grad_norm": 0.20793341100215912, "learning_rate": 7.744054482054624e-05, "loss": 3.159522819519043, "step": 130400 }, { "epoch": 0.24673333333333333, "grad_norm": 0.6958319544792175, "learning_rate": 7.743132940472702e-05, "loss": 3.1575433731079103, "step": 130410 }, { "epoch": 0.2468, "grad_norm": 0.1838734745979309, "learning_rate": 7.74221126556416e-05, "loss": 3.062670135498047, "step": 130420 }, { "epoch": 0.24686666666666668, "grad_norm": 0.20754264295101166, "learning_rate": 7.741289457373795e-05, "loss": 3.1181955337524414, "step": 130430 }, { "epoch": 0.24693333333333334, "grad_norm": 0.1902933418750763, "learning_rate": 7.74036751594641e-05, "loss": 3.129838562011719, "step": 130440 }, { "epoch": 0.247, "grad_norm": 0.24422617256641388, "learning_rate": 7.739445441326813e-05, "loss": 3.227000427246094, "step": 130450 }, { "epoch": 0.24706666666666666, "grad_norm": 0.4229270815849304, "learning_rate": 7.738523233559825e-05, "loss": 3.139012908935547, "step": 130460 }, { "epoch": 0.24713333333333334, "grad_norm": 0.21502043306827545, "learning_rate": 7.737600892690263e-05, "loss": 3.17025203704834, "step": 130470 }, { "epoch": 0.2472, "grad_norm": 0.220823734998703, "learning_rate": 7.736678418762962e-05, "loss": 3.1793437957763673, "step": 130480 }, { "epoch": 0.24726666666666666, "grad_norm": 0.21424704790115356, "learning_rate": 7.735755811822751e-05, "loss": 3.2100337982177733, "step": 130490 }, { "epoch": 0.24733333333333332, "grad_norm": 0.44343000650405884, "learning_rate": 7.734833071914478e-05, "loss": 3.179047966003418, "step": 130500 }, { "epoch": 0.2474, "grad_norm": 0.20584286749362946, "learning_rate": 7.733910199082991e-05, "loss": 3.1605161666870116, "step": 130510 }, { "epoch": 0.24746666666666667, "grad_norm": 0.21955521404743195, "learning_rate": 7.732987193373143e-05, "loss": 3.1246692657470705, "step": 130520 }, { "epoch": 0.24753333333333333, "grad_norm": 0.1932043731212616, "learning_rate": 7.732064054829795e-05, "loss": 3.186284828186035, "step": 130530 }, { "epoch": 0.2476, "grad_norm": 0.21017810702323914, "learning_rate": 7.731140783497818e-05, "loss": 3.1099538803100586, "step": 130540 }, { "epoch": 0.24766666666666667, "grad_norm": 0.20173309743404388, "learning_rate": 7.730217379422085e-05, "loss": 3.137156867980957, "step": 130550 }, { "epoch": 0.24773333333333333, "grad_norm": 1.4309595823287964, "learning_rate": 7.729293842647476e-05, "loss": 3.104152870178223, "step": 130560 }, { "epoch": 0.2478, "grad_norm": 0.1927039623260498, "learning_rate": 7.728370173218878e-05, "loss": 3.1010250091552733, "step": 130570 }, { "epoch": 0.24786666666666668, "grad_norm": 0.20927287638187408, "learning_rate": 7.727446371181187e-05, "loss": 3.110220527648926, "step": 130580 }, { "epoch": 0.24793333333333334, "grad_norm": 0.19950976967811584, "learning_rate": 7.726522436579302e-05, "loss": 3.05800838470459, "step": 130590 }, { "epoch": 0.248, "grad_norm": 0.22317920625209808, "learning_rate": 7.725598369458131e-05, "loss": 3.1005685806274412, "step": 130600 }, { "epoch": 0.24806666666666666, "grad_norm": 0.1852644830942154, "learning_rate": 7.724674169862586e-05, "loss": 3.118148422241211, "step": 130610 }, { "epoch": 0.24813333333333334, "grad_norm": 0.20242933928966522, "learning_rate": 7.723749837837586e-05, "loss": 3.115787124633789, "step": 130620 }, { "epoch": 0.2482, "grad_norm": 0.25317543745040894, "learning_rate": 7.722825373428058e-05, "loss": 3.2229694366455077, "step": 130630 }, { "epoch": 0.24826666666666666, "grad_norm": 0.31111419200897217, "learning_rate": 7.721900776678932e-05, "loss": 3.177674674987793, "step": 130640 }, { "epoch": 0.24833333333333332, "grad_norm": 1.2280418872833252, "learning_rate": 7.720976047635151e-05, "loss": 3.0677967071533203, "step": 130650 }, { "epoch": 0.2484, "grad_norm": 0.18696750700473785, "learning_rate": 7.72005118634166e-05, "loss": 3.0856590270996094, "step": 130660 }, { "epoch": 0.24846666666666667, "grad_norm": 0.542655348777771, "learning_rate": 7.719126192843406e-05, "loss": 3.1448089599609377, "step": 130670 }, { "epoch": 0.24853333333333333, "grad_norm": 0.18834935128688812, "learning_rate": 7.71820106718535e-05, "loss": 3.0835309982299806, "step": 130680 }, { "epoch": 0.2486, "grad_norm": 0.6409217119216919, "learning_rate": 7.717275809412461e-05, "loss": 3.135593605041504, "step": 130690 }, { "epoch": 0.24866666666666667, "grad_norm": 0.22523550689220428, "learning_rate": 7.716350419569703e-05, "loss": 3.1824068069458007, "step": 130700 }, { "epoch": 0.24873333333333333, "grad_norm": 0.19868077337741852, "learning_rate": 7.715424897702057e-05, "loss": 3.1162811279296876, "step": 130710 }, { "epoch": 0.2488, "grad_norm": 0.19609664380550385, "learning_rate": 7.714499243854505e-05, "loss": 3.0799842834472657, "step": 130720 }, { "epoch": 0.24886666666666668, "grad_norm": 0.184064581990242, "learning_rate": 7.713573458072042e-05, "loss": 3.139324951171875, "step": 130730 }, { "epoch": 0.24893333333333334, "grad_norm": 0.18962138891220093, "learning_rate": 7.712647540399657e-05, "loss": 3.0745416641235352, "step": 130740 }, { "epoch": 0.249, "grad_norm": 0.19830356538295746, "learning_rate": 7.71172149088236e-05, "loss": 3.1069169998168946, "step": 130750 }, { "epoch": 0.24906666666666666, "grad_norm": 0.21197432279586792, "learning_rate": 7.710795309565155e-05, "loss": 3.1705413818359376, "step": 130760 }, { "epoch": 0.24913333333333335, "grad_norm": 0.3304200768470764, "learning_rate": 7.709868996493063e-05, "loss": 3.091227149963379, "step": 130770 }, { "epoch": 0.2492, "grad_norm": 0.22616362571716309, "learning_rate": 7.708942551711104e-05, "loss": 3.211791229248047, "step": 130780 }, { "epoch": 0.24926666666666666, "grad_norm": 0.21792897582054138, "learning_rate": 7.708015975264307e-05, "loss": 3.0710853576660155, "step": 130790 }, { "epoch": 0.24933333333333332, "grad_norm": 0.1842462569475174, "learning_rate": 7.707089267197706e-05, "loss": 3.112366485595703, "step": 130800 }, { "epoch": 0.2494, "grad_norm": 0.20462875068187714, "learning_rate": 7.706162427556343e-05, "loss": 3.102873611450195, "step": 130810 }, { "epoch": 0.24946666666666667, "grad_norm": 0.1847117394208908, "learning_rate": 7.705235456385266e-05, "loss": 3.1189123153686524, "step": 130820 }, { "epoch": 0.24953333333333333, "grad_norm": 0.20086945593357086, "learning_rate": 7.704308353729531e-05, "loss": 3.097393608093262, "step": 130830 }, { "epoch": 0.2496, "grad_norm": 0.2115822434425354, "learning_rate": 7.703381119634197e-05, "loss": 3.0962574005126955, "step": 130840 }, { "epoch": 0.24966666666666668, "grad_norm": 0.18000033497810364, "learning_rate": 7.702453754144332e-05, "loss": 3.096761131286621, "step": 130850 }, { "epoch": 0.24973333333333333, "grad_norm": 0.25529199838638306, "learning_rate": 7.701526257305007e-05, "loss": 3.1101190567016603, "step": 130860 }, { "epoch": 0.2498, "grad_norm": 0.20311905443668365, "learning_rate": 7.700598629161304e-05, "loss": 3.1747623443603517, "step": 130870 }, { "epoch": 0.24986666666666665, "grad_norm": 0.2020729035139084, "learning_rate": 7.699670869758309e-05, "loss": 3.127811050415039, "step": 130880 }, { "epoch": 0.24993333333333334, "grad_norm": 0.20837697386741638, "learning_rate": 7.698742979141113e-05, "loss": 3.140334701538086, "step": 130890 }, { "epoch": 0.25, "grad_norm": 0.19381238520145416, "learning_rate": 7.697814957354819e-05, "loss": 3.1220001220703124, "step": 130900 }, { "epoch": 0.25006666666666666, "grad_norm": 0.20099198818206787, "learning_rate": 7.69688680444453e-05, "loss": 3.073299217224121, "step": 130910 }, { "epoch": 0.2501333333333333, "grad_norm": 0.2005809247493744, "learning_rate": 7.695958520455355e-05, "loss": 3.101541519165039, "step": 130920 }, { "epoch": 0.2502, "grad_norm": 0.19838783144950867, "learning_rate": 7.695030105432417e-05, "loss": 3.094756317138672, "step": 130930 }, { "epoch": 0.2502666666666667, "grad_norm": 0.21376672387123108, "learning_rate": 7.694101559420837e-05, "loss": 3.1461042404174804, "step": 130940 }, { "epoch": 0.25033333333333335, "grad_norm": 0.1880926936864853, "learning_rate": 7.693172882465748e-05, "loss": 3.0917789459228517, "step": 130950 }, { "epoch": 0.2504, "grad_norm": 0.19503068923950195, "learning_rate": 7.692244074612285e-05, "loss": 3.1348398208618162, "step": 130960 }, { "epoch": 0.25046666666666667, "grad_norm": 0.17917245626449585, "learning_rate": 7.691315135905595e-05, "loss": 3.1019851684570314, "step": 130970 }, { "epoch": 0.25053333333333333, "grad_norm": 0.3140735328197479, "learning_rate": 7.690386066390822e-05, "loss": 3.0731340408325196, "step": 130980 }, { "epoch": 0.2506, "grad_norm": 0.24666203558444977, "learning_rate": 7.68945686611313e-05, "loss": 3.0892635345458985, "step": 130990 }, { "epoch": 0.25066666666666665, "grad_norm": 0.19600367546081543, "learning_rate": 7.688527535117675e-05, "loss": 3.066097640991211, "step": 131000 }, { "epoch": 0.2507333333333333, "grad_norm": 0.3283669948577881, "learning_rate": 7.687598073449628e-05, "loss": 3.097826385498047, "step": 131010 }, { "epoch": 0.2508, "grad_norm": 0.20759394764900208, "learning_rate": 7.686668481154167e-05, "loss": 3.0902950286865236, "step": 131020 }, { "epoch": 0.2508666666666667, "grad_norm": 0.19426025450229645, "learning_rate": 7.685738758276471e-05, "loss": 3.109091377258301, "step": 131030 }, { "epoch": 0.25093333333333334, "grad_norm": 0.46763691306114197, "learning_rate": 7.684808904861728e-05, "loss": 3.167312240600586, "step": 131040 }, { "epoch": 0.251, "grad_norm": 0.21592260897159576, "learning_rate": 7.683878920955135e-05, "loss": 3.0964334487915037, "step": 131050 }, { "epoch": 0.25106666666666666, "grad_norm": 0.19615115225315094, "learning_rate": 7.682948806601888e-05, "loss": 3.0835765838623046, "step": 131060 }, { "epoch": 0.2511333333333333, "grad_norm": 0.20492210984230042, "learning_rate": 7.6820185618472e-05, "loss": 3.064090919494629, "step": 131070 }, { "epoch": 0.2512, "grad_norm": 0.18885301053524017, "learning_rate": 7.681088186736278e-05, "loss": 3.091996192932129, "step": 131080 }, { "epoch": 0.2512666666666667, "grad_norm": 0.21573273837566376, "learning_rate": 7.680157681314349e-05, "loss": 3.1009237289428713, "step": 131090 }, { "epoch": 0.25133333333333335, "grad_norm": 0.17836618423461914, "learning_rate": 7.679227045626633e-05, "loss": 3.1048952102661134, "step": 131100 }, { "epoch": 0.2514, "grad_norm": 0.21474595367908478, "learning_rate": 7.678296279718364e-05, "loss": 3.1237125396728516, "step": 131110 }, { "epoch": 0.25146666666666667, "grad_norm": 0.26227203011512756, "learning_rate": 7.677365383634782e-05, "loss": 3.1128471374511717, "step": 131120 }, { "epoch": 0.25153333333333333, "grad_norm": 0.20067629218101501, "learning_rate": 7.67643435742113e-05, "loss": 3.1654659271240235, "step": 131130 }, { "epoch": 0.2516, "grad_norm": 0.20465698838233948, "learning_rate": 7.675503201122663e-05, "loss": 3.0780624389648437, "step": 131140 }, { "epoch": 0.25166666666666665, "grad_norm": 0.1915210336446762, "learning_rate": 7.674571914784635e-05, "loss": 3.0960338592529295, "step": 131150 }, { "epoch": 0.2517333333333333, "grad_norm": 0.20574969053268433, "learning_rate": 7.673640498452311e-05, "loss": 3.092115783691406, "step": 131160 }, { "epoch": 0.2518, "grad_norm": 0.2250441312789917, "learning_rate": 7.672708952170962e-05, "loss": 3.099943733215332, "step": 131170 }, { "epoch": 0.2518666666666667, "grad_norm": 0.18265187740325928, "learning_rate": 7.671777275985866e-05, "loss": 3.09893913269043, "step": 131180 }, { "epoch": 0.25193333333333334, "grad_norm": 0.18874385952949524, "learning_rate": 7.670845469942304e-05, "loss": 3.0699174880981444, "step": 131190 }, { "epoch": 0.252, "grad_norm": 0.23396532237529755, "learning_rate": 7.669913534085565e-05, "loss": 3.138132858276367, "step": 131200 }, { "epoch": 0.25206666666666666, "grad_norm": 0.19594243168830872, "learning_rate": 7.668981468460946e-05, "loss": 3.1604066848754884, "step": 131210 }, { "epoch": 0.2521333333333333, "grad_norm": 0.19283434748649597, "learning_rate": 7.668049273113747e-05, "loss": 3.074832725524902, "step": 131220 }, { "epoch": 0.2522, "grad_norm": 0.19939422607421875, "learning_rate": 7.667116948089279e-05, "loss": 3.1302860260009764, "step": 131230 }, { "epoch": 0.25226666666666664, "grad_norm": 0.2094760686159134, "learning_rate": 7.666184493432855e-05, "loss": 3.1960660934448244, "step": 131240 }, { "epoch": 0.25233333333333335, "grad_norm": 0.21854250133037567, "learning_rate": 7.665251909189795e-05, "loss": 3.113912010192871, "step": 131250 }, { "epoch": 0.2524, "grad_norm": 0.2030598372220993, "learning_rate": 7.664319195405427e-05, "loss": 3.1666793823242188, "step": 131260 }, { "epoch": 0.2524666666666667, "grad_norm": 0.19328442215919495, "learning_rate": 7.663386352125087e-05, "loss": 3.0825252532958984, "step": 131270 }, { "epoch": 0.25253333333333333, "grad_norm": 0.58448725938797, "learning_rate": 7.662453379394107e-05, "loss": 3.0957033157348635, "step": 131280 }, { "epoch": 0.2526, "grad_norm": 0.19897449016571045, "learning_rate": 7.661520277257842e-05, "loss": 3.097664451599121, "step": 131290 }, { "epoch": 0.25266666666666665, "grad_norm": 0.20914138853549957, "learning_rate": 7.660587045761638e-05, "loss": 3.1423152923583983, "step": 131300 }, { "epoch": 0.2527333333333333, "grad_norm": 0.18076956272125244, "learning_rate": 7.659653684950858e-05, "loss": 3.121456527709961, "step": 131310 }, { "epoch": 0.2528, "grad_norm": 0.21648748219013214, "learning_rate": 7.658720194870863e-05, "loss": 3.1404762268066406, "step": 131320 }, { "epoch": 0.2528666666666667, "grad_norm": 0.19014327228069305, "learning_rate": 7.657786575567027e-05, "loss": 3.0800708770751952, "step": 131330 }, { "epoch": 0.25293333333333334, "grad_norm": 0.23843728005886078, "learning_rate": 7.656852827084728e-05, "loss": 3.054199981689453, "step": 131340 }, { "epoch": 0.253, "grad_norm": 0.3071354925632477, "learning_rate": 7.655918949469345e-05, "loss": 3.119614601135254, "step": 131350 }, { "epoch": 0.25306666666666666, "grad_norm": 0.22083532810211182, "learning_rate": 7.654984942766272e-05, "loss": 3.1534540176391603, "step": 131360 }, { "epoch": 0.2531333333333333, "grad_norm": 0.2949063777923584, "learning_rate": 7.654050807020906e-05, "loss": 3.120478630065918, "step": 131370 }, { "epoch": 0.2532, "grad_norm": 0.21545208990573883, "learning_rate": 7.653116542278645e-05, "loss": 3.109859657287598, "step": 131380 }, { "epoch": 0.25326666666666664, "grad_norm": 0.19328367710113525, "learning_rate": 7.652182148584903e-05, "loss": 3.1209564208984375, "step": 131390 }, { "epoch": 0.25333333333333335, "grad_norm": 0.18428869545459747, "learning_rate": 7.651247625985091e-05, "loss": 3.106141471862793, "step": 131400 }, { "epoch": 0.2534, "grad_norm": 0.2172003835439682, "learning_rate": 7.650312974524632e-05, "loss": 3.2643665313720702, "step": 131410 }, { "epoch": 0.2534666666666667, "grad_norm": 0.19496457278728485, "learning_rate": 7.649378194248955e-05, "loss": 3.0972217559814452, "step": 131420 }, { "epoch": 0.25353333333333333, "grad_norm": 0.18633554875850677, "learning_rate": 7.648443285203492e-05, "loss": 3.100021553039551, "step": 131430 }, { "epoch": 0.2536, "grad_norm": 0.22328804433345795, "learning_rate": 7.647508247433684e-05, "loss": 3.1317447662353515, "step": 131440 }, { "epoch": 0.25366666666666665, "grad_norm": 0.2073354572057724, "learning_rate": 7.646573080984975e-05, "loss": 3.1126747131347656, "step": 131450 }, { "epoch": 0.2537333333333333, "grad_norm": 0.19602303206920624, "learning_rate": 7.645637785902821e-05, "loss": 3.120380401611328, "step": 131460 }, { "epoch": 0.2538, "grad_norm": 0.19591465592384338, "learning_rate": 7.644702362232679e-05, "loss": 3.1172462463378907, "step": 131470 }, { "epoch": 0.2538666666666667, "grad_norm": 0.1886298656463623, "learning_rate": 7.643766810020015e-05, "loss": 3.1300954818725586, "step": 131480 }, { "epoch": 0.25393333333333334, "grad_norm": 0.19617930054664612, "learning_rate": 7.642831129310298e-05, "loss": 3.0855655670166016, "step": 131490 }, { "epoch": 0.254, "grad_norm": 0.20393683016300201, "learning_rate": 7.641895320149008e-05, "loss": 3.1232578277587892, "step": 131500 }, { "epoch": 0.25406666666666666, "grad_norm": 0.6660206913948059, "learning_rate": 7.640959382581631e-05, "loss": 3.1426136016845705, "step": 131510 }, { "epoch": 0.2541333333333333, "grad_norm": 0.199762225151062, "learning_rate": 7.640023316653653e-05, "loss": 3.1114324569702148, "step": 131520 }, { "epoch": 0.2542, "grad_norm": 0.23541301488876343, "learning_rate": 7.639087122410571e-05, "loss": 3.1933364868164062, "step": 131530 }, { "epoch": 0.25426666666666664, "grad_norm": 0.18946394324302673, "learning_rate": 7.63815079989789e-05, "loss": 3.1071638107299804, "step": 131540 }, { "epoch": 0.25433333333333336, "grad_norm": 0.1886608600616455, "learning_rate": 7.637214349161115e-05, "loss": 3.1361194610595704, "step": 131550 }, { "epoch": 0.2544, "grad_norm": 0.19660130143165588, "learning_rate": 7.636277770245765e-05, "loss": 3.0953773498535155, "step": 131560 }, { "epoch": 0.2544666666666667, "grad_norm": 0.19135749340057373, "learning_rate": 7.635341063197359e-05, "loss": 3.133963203430176, "step": 131570 }, { "epoch": 0.25453333333333333, "grad_norm": 0.18914949893951416, "learning_rate": 7.634404228061424e-05, "loss": 3.072311210632324, "step": 131580 }, { "epoch": 0.2546, "grad_norm": 0.25436875224113464, "learning_rate": 7.633467264883497e-05, "loss": 3.0430665969848634, "step": 131590 }, { "epoch": 0.25466666666666665, "grad_norm": 0.20219625532627106, "learning_rate": 7.632530173709115e-05, "loss": 3.105142021179199, "step": 131600 }, { "epoch": 0.2547333333333333, "grad_norm": 0.20409247279167175, "learning_rate": 7.631592954583824e-05, "loss": 3.101444625854492, "step": 131610 }, { "epoch": 0.2548, "grad_norm": 0.21752382814884186, "learning_rate": 7.630655607553178e-05, "loss": 3.1334089279174804, "step": 131620 }, { "epoch": 0.2548666666666667, "grad_norm": 0.218354731798172, "learning_rate": 7.629718132662734e-05, "loss": 3.105608367919922, "step": 131630 }, { "epoch": 0.25493333333333335, "grad_norm": 0.22354857623577118, "learning_rate": 7.628780529958058e-05, "loss": 3.1104766845703127, "step": 131640 }, { "epoch": 0.255, "grad_norm": 0.21189667284488678, "learning_rate": 7.627842799484721e-05, "loss": 3.0645204544067384, "step": 131650 }, { "epoch": 0.25506666666666666, "grad_norm": 0.21187496185302734, "learning_rate": 7.6269049412883e-05, "loss": 3.1188837051391602, "step": 131660 }, { "epoch": 0.2551333333333333, "grad_norm": 0.37335631251335144, "learning_rate": 7.625966955414378e-05, "loss": 3.0670204162597656, "step": 131670 }, { "epoch": 0.2552, "grad_norm": 0.18035072088241577, "learning_rate": 7.625028841908546e-05, "loss": 3.0181116104125976, "step": 131680 }, { "epoch": 0.25526666666666664, "grad_norm": 0.23173385858535767, "learning_rate": 7.624090600816397e-05, "loss": 3.1655654907226562, "step": 131690 }, { "epoch": 0.25533333333333336, "grad_norm": 0.1963232457637787, "learning_rate": 7.623152232183537e-05, "loss": 3.2957271575927733, "step": 131700 }, { "epoch": 0.2554, "grad_norm": 0.2092929631471634, "learning_rate": 7.622213736055573e-05, "loss": 3.1136663436889647, "step": 131710 }, { "epoch": 0.2554666666666667, "grad_norm": 0.19098572432994843, "learning_rate": 7.621275112478116e-05, "loss": 3.1705158233642576, "step": 131720 }, { "epoch": 0.25553333333333333, "grad_norm": 0.21361896395683289, "learning_rate": 7.62033636149679e-05, "loss": 3.0777267456054687, "step": 131730 }, { "epoch": 0.2556, "grad_norm": 0.22326962649822235, "learning_rate": 7.619397483157223e-05, "loss": 3.0656314849853517, "step": 131740 }, { "epoch": 0.25566666666666665, "grad_norm": 0.18675856292247772, "learning_rate": 7.618458477505043e-05, "loss": 3.1513578414916994, "step": 131750 }, { "epoch": 0.2557333333333333, "grad_norm": 0.29793956875801086, "learning_rate": 7.617519344585895e-05, "loss": 3.173833465576172, "step": 131760 }, { "epoch": 0.2558, "grad_norm": 0.22142262756824493, "learning_rate": 7.616580084445421e-05, "loss": 3.142588996887207, "step": 131770 }, { "epoch": 0.2558666666666667, "grad_norm": 0.18635593354701996, "learning_rate": 7.615640697129273e-05, "loss": 3.1377079010009767, "step": 131780 }, { "epoch": 0.25593333333333335, "grad_norm": 0.23020139336585999, "learning_rate": 7.61470118268311e-05, "loss": 3.1477592468261717, "step": 131790 }, { "epoch": 0.256, "grad_norm": 0.2112380564212799, "learning_rate": 7.613761541152596e-05, "loss": 3.1278011322021486, "step": 131800 }, { "epoch": 0.25606666666666666, "grad_norm": 0.20457592606544495, "learning_rate": 7.6128217725834e-05, "loss": 3.0592008590698243, "step": 131810 }, { "epoch": 0.2561333333333333, "grad_norm": 0.20253172516822815, "learning_rate": 7.611881877021198e-05, "loss": 3.126668930053711, "step": 131820 }, { "epoch": 0.2562, "grad_norm": 0.21946214139461517, "learning_rate": 7.610941854511672e-05, "loss": 3.1356380462646483, "step": 131830 }, { "epoch": 0.25626666666666664, "grad_norm": 0.20620959997177124, "learning_rate": 7.610001705100513e-05, "loss": 3.077481269836426, "step": 131840 }, { "epoch": 0.25633333333333336, "grad_norm": 0.2033991664648056, "learning_rate": 7.609061428833415e-05, "loss": 3.1237628936767576, "step": 131850 }, { "epoch": 0.2564, "grad_norm": 0.20671768486499786, "learning_rate": 7.608121025756077e-05, "loss": 3.3243621826171874, "step": 131860 }, { "epoch": 0.2564666666666667, "grad_norm": 0.21810367703437805, "learning_rate": 7.607180495914208e-05, "loss": 3.109395217895508, "step": 131870 }, { "epoch": 0.25653333333333334, "grad_norm": 0.3314041495323181, "learning_rate": 7.606239839353522e-05, "loss": 3.0829845428466798, "step": 131880 }, { "epoch": 0.2566, "grad_norm": 0.2564932703971863, "learning_rate": 7.605299056119737e-05, "loss": 3.058031463623047, "step": 131890 }, { "epoch": 0.25666666666666665, "grad_norm": 0.26344209909439087, "learning_rate": 7.604358146258578e-05, "loss": 3.137917709350586, "step": 131900 }, { "epoch": 0.2567333333333333, "grad_norm": 0.24726185202598572, "learning_rate": 7.603417109815777e-05, "loss": 3.21533203125, "step": 131910 }, { "epoch": 0.2568, "grad_norm": 0.1838756799697876, "learning_rate": 7.602475946837075e-05, "loss": 3.178369331359863, "step": 131920 }, { "epoch": 0.2568666666666667, "grad_norm": 0.20542095601558685, "learning_rate": 7.601534657368212e-05, "loss": 3.081593704223633, "step": 131930 }, { "epoch": 0.25693333333333335, "grad_norm": 0.44237709045410156, "learning_rate": 7.600593241454942e-05, "loss": 3.2133289337158204, "step": 131940 }, { "epoch": 0.257, "grad_norm": 0.449504017829895, "learning_rate": 7.599651699143018e-05, "loss": 3.179610252380371, "step": 131950 }, { "epoch": 0.25706666666666667, "grad_norm": 0.22625629603862762, "learning_rate": 7.598710030478205e-05, "loss": 3.2067806243896486, "step": 131960 }, { "epoch": 0.2571333333333333, "grad_norm": 0.24302294850349426, "learning_rate": 7.597768235506268e-05, "loss": 3.1702436447143554, "step": 131970 }, { "epoch": 0.2572, "grad_norm": 0.21498417854309082, "learning_rate": 7.596826314272988e-05, "loss": 3.1148910522460938, "step": 131980 }, { "epoch": 0.25726666666666664, "grad_norm": 0.1955830454826355, "learning_rate": 7.59588426682414e-05, "loss": 3.1157772064208986, "step": 131990 }, { "epoch": 0.25733333333333336, "grad_norm": 0.18618197739124298, "learning_rate": 7.594942093205514e-05, "loss": 3.147664451599121, "step": 132000 }, { "epoch": 0.2574, "grad_norm": 0.41500481963157654, "learning_rate": 7.593999793462902e-05, "loss": 3.065707206726074, "step": 132010 }, { "epoch": 0.2574666666666667, "grad_norm": 0.1959788203239441, "learning_rate": 7.593057367642105e-05, "loss": 3.0479686737060545, "step": 132020 }, { "epoch": 0.25753333333333334, "grad_norm": 0.19796961545944214, "learning_rate": 7.592114815788926e-05, "loss": 3.1381784439086915, "step": 132030 }, { "epoch": 0.2576, "grad_norm": 0.18788960576057434, "learning_rate": 7.591172137949177e-05, "loss": 3.124369812011719, "step": 132040 }, { "epoch": 0.25766666666666665, "grad_norm": 0.21633397042751312, "learning_rate": 7.590229334168678e-05, "loss": 3.1006376266479494, "step": 132050 }, { "epoch": 0.2577333333333333, "grad_norm": 0.2126448005437851, "learning_rate": 7.589286404493252e-05, "loss": 3.1215641021728517, "step": 132060 }, { "epoch": 0.2578, "grad_norm": 0.19629278779029846, "learning_rate": 7.588343348968727e-05, "loss": 3.092130661010742, "step": 132070 }, { "epoch": 0.2578666666666667, "grad_norm": 0.17957495152950287, "learning_rate": 7.587400167640942e-05, "loss": 3.0850803375244142, "step": 132080 }, { "epoch": 0.25793333333333335, "grad_norm": 0.18157535791397095, "learning_rate": 7.586456860555738e-05, "loss": 3.074777030944824, "step": 132090 }, { "epoch": 0.258, "grad_norm": 0.22074094414710999, "learning_rate": 7.58551342775896e-05, "loss": 3.0707677841186523, "step": 132100 }, { "epoch": 0.25806666666666667, "grad_norm": 0.2056787759065628, "learning_rate": 7.584569869296467e-05, "loss": 3.0567514419555666, "step": 132110 }, { "epoch": 0.2581333333333333, "grad_norm": 0.18549613654613495, "learning_rate": 7.583626185214116e-05, "loss": 3.0922334671020506, "step": 132120 }, { "epoch": 0.2582, "grad_norm": 0.22267542779445648, "learning_rate": 7.582682375557778e-05, "loss": 3.1059192657470702, "step": 132130 }, { "epoch": 0.25826666666666664, "grad_norm": 0.2108243852853775, "learning_rate": 7.581738440373323e-05, "loss": 3.0710487365722656, "step": 132140 }, { "epoch": 0.25833333333333336, "grad_norm": 0.19634418189525604, "learning_rate": 7.580794379706628e-05, "loss": 3.228224182128906, "step": 132150 }, { "epoch": 0.2584, "grad_norm": 0.23406067490577698, "learning_rate": 7.579850193603582e-05, "loss": 3.1226198196411135, "step": 132160 }, { "epoch": 0.2584666666666667, "grad_norm": 0.188054621219635, "learning_rate": 7.578905882110072e-05, "loss": 3.1146240234375, "step": 132170 }, { "epoch": 0.25853333333333334, "grad_norm": 0.19870629906654358, "learning_rate": 7.577961445271998e-05, "loss": 3.1507871627807615, "step": 132180 }, { "epoch": 0.2586, "grad_norm": 0.20437955856323242, "learning_rate": 7.577016883135261e-05, "loss": 3.12508487701416, "step": 132190 }, { "epoch": 0.25866666666666666, "grad_norm": 0.19799001514911652, "learning_rate": 7.576072195745773e-05, "loss": 3.0830039978027344, "step": 132200 }, { "epoch": 0.2587333333333333, "grad_norm": 0.23014678061008453, "learning_rate": 7.575127383149447e-05, "loss": 3.1263404846191407, "step": 132210 }, { "epoch": 0.2588, "grad_norm": 0.7838783264160156, "learning_rate": 7.574182445392205e-05, "loss": 3.002673530578613, "step": 132220 }, { "epoch": 0.2588666666666667, "grad_norm": 0.19614499807357788, "learning_rate": 7.573237382519976e-05, "loss": 3.0965879440307615, "step": 132230 }, { "epoch": 0.25893333333333335, "grad_norm": 0.1872715801000595, "learning_rate": 7.57229219457869e-05, "loss": 3.111216354370117, "step": 132240 }, { "epoch": 0.259, "grad_norm": 0.19942858815193176, "learning_rate": 7.57134688161429e-05, "loss": 3.103302764892578, "step": 132250 }, { "epoch": 0.25906666666666667, "grad_norm": 0.20907889306545258, "learning_rate": 7.570401443672722e-05, "loss": 3.1263477325439455, "step": 132260 }, { "epoch": 0.2591333333333333, "grad_norm": 0.20114527642726898, "learning_rate": 7.569455880799936e-05, "loss": 3.110331916809082, "step": 132270 }, { "epoch": 0.2592, "grad_norm": 0.1883893609046936, "learning_rate": 7.568510193041892e-05, "loss": 3.080101203918457, "step": 132280 }, { "epoch": 0.25926666666666665, "grad_norm": 0.18975497782230377, "learning_rate": 7.567564380444551e-05, "loss": 3.224177932739258, "step": 132290 }, { "epoch": 0.25933333333333336, "grad_norm": 0.20442336797714233, "learning_rate": 7.566618443053885e-05, "loss": 3.0591073989868165, "step": 132300 }, { "epoch": 0.2594, "grad_norm": 0.2470877319574356, "learning_rate": 7.565672380915871e-05, "loss": 3.108767509460449, "step": 132310 }, { "epoch": 0.2594666666666667, "grad_norm": 0.21689723432064056, "learning_rate": 7.56472619407649e-05, "loss": 3.198388862609863, "step": 132320 }, { "epoch": 0.25953333333333334, "grad_norm": 0.2596255838871002, "learning_rate": 7.56377988258173e-05, "loss": 3.1133909225463867, "step": 132330 }, { "epoch": 0.2596, "grad_norm": 0.20692887902259827, "learning_rate": 7.562833446477586e-05, "loss": 3.079119110107422, "step": 132340 }, { "epoch": 0.25966666666666666, "grad_norm": 0.1953042447566986, "learning_rate": 7.561886885810057e-05, "loss": 3.113859939575195, "step": 132350 }, { "epoch": 0.2597333333333333, "grad_norm": 0.23592793941497803, "learning_rate": 7.56094020062515e-05, "loss": 3.0775081634521486, "step": 132360 }, { "epoch": 0.2598, "grad_norm": 0.22590172290802002, "learning_rate": 7.55999339096888e-05, "loss": 3.11263427734375, "step": 132370 }, { "epoch": 0.2598666666666667, "grad_norm": 0.40807560086250305, "learning_rate": 7.559046456887262e-05, "loss": 3.1142406463623047, "step": 132380 }, { "epoch": 0.25993333333333335, "grad_norm": 0.2520703971385956, "learning_rate": 7.558099398426323e-05, "loss": 3.119576263427734, "step": 132390 }, { "epoch": 0.26, "grad_norm": 0.1916477084159851, "learning_rate": 7.557152215632092e-05, "loss": 3.126235580444336, "step": 132400 }, { "epoch": 0.26006666666666667, "grad_norm": 0.23806917667388916, "learning_rate": 7.556204908550607e-05, "loss": 3.1108558654785154, "step": 132410 }, { "epoch": 0.2601333333333333, "grad_norm": 0.24594269692897797, "learning_rate": 7.555257477227909e-05, "loss": 3.2152225494384767, "step": 132420 }, { "epoch": 0.2602, "grad_norm": 0.20112007856369019, "learning_rate": 7.554309921710047e-05, "loss": 3.1027971267700196, "step": 132430 }, { "epoch": 0.26026666666666665, "grad_norm": 0.18043828010559082, "learning_rate": 7.553362242043077e-05, "loss": 3.0867176055908203, "step": 132440 }, { "epoch": 0.26033333333333336, "grad_norm": 0.22307385504245758, "learning_rate": 7.55241443827306e-05, "loss": 3.072601890563965, "step": 132450 }, { "epoch": 0.2604, "grad_norm": 0.2772645950317383, "learning_rate": 7.551466510446061e-05, "loss": 3.1671417236328123, "step": 132460 }, { "epoch": 0.2604666666666667, "grad_norm": 0.25439655780792236, "learning_rate": 7.550518458608157e-05, "loss": 3.2467761993408204, "step": 132470 }, { "epoch": 0.26053333333333334, "grad_norm": 0.19844377040863037, "learning_rate": 7.549570282805421e-05, "loss": 3.1703212738037108, "step": 132480 }, { "epoch": 0.2606, "grad_norm": 0.19453582167625427, "learning_rate": 7.548621983083942e-05, "loss": 3.066040802001953, "step": 132490 }, { "epoch": 0.26066666666666666, "grad_norm": 0.2048194855451584, "learning_rate": 7.54767355948981e-05, "loss": 3.125718116760254, "step": 132500 }, { "epoch": 0.2607333333333333, "grad_norm": 0.22788941860198975, "learning_rate": 7.54672501206912e-05, "loss": 3.1154436111450194, "step": 132510 }, { "epoch": 0.2608, "grad_norm": 0.20346185564994812, "learning_rate": 7.545776340867978e-05, "loss": 3.0343902587890623, "step": 132520 }, { "epoch": 0.2608666666666667, "grad_norm": 0.18529774248600006, "learning_rate": 7.544827545932492e-05, "loss": 3.093495178222656, "step": 132530 }, { "epoch": 0.26093333333333335, "grad_norm": 0.21118316054344177, "learning_rate": 7.543878627308777e-05, "loss": 3.069477844238281, "step": 132540 }, { "epoch": 0.261, "grad_norm": 0.21427223086357117, "learning_rate": 7.542929585042955e-05, "loss": 3.1492341995239257, "step": 132550 }, { "epoch": 0.26106666666666667, "grad_norm": 0.19941505789756775, "learning_rate": 7.54198041918115e-05, "loss": 3.094634246826172, "step": 132560 }, { "epoch": 0.26113333333333333, "grad_norm": 0.2033727467060089, "learning_rate": 7.541031129769496e-05, "loss": 3.133271598815918, "step": 132570 }, { "epoch": 0.2612, "grad_norm": 0.1906985342502594, "learning_rate": 7.540081716854135e-05, "loss": 3.067905616760254, "step": 132580 }, { "epoch": 0.26126666666666665, "grad_norm": 0.24753740429878235, "learning_rate": 7.539132180481208e-05, "loss": 3.166851043701172, "step": 132590 }, { "epoch": 0.2613333333333333, "grad_norm": 0.18032486736774445, "learning_rate": 7.53818252069687e-05, "loss": 3.054424285888672, "step": 132600 }, { "epoch": 0.2614, "grad_norm": 0.19013787806034088, "learning_rate": 7.537232737547276e-05, "loss": 3.0807064056396483, "step": 132610 }, { "epoch": 0.2614666666666667, "grad_norm": 0.190599262714386, "learning_rate": 7.536282831078587e-05, "loss": 3.079762840270996, "step": 132620 }, { "epoch": 0.26153333333333334, "grad_norm": 0.2034352570772171, "learning_rate": 7.535332801336976e-05, "loss": 3.1380632400512694, "step": 132630 }, { "epoch": 0.2616, "grad_norm": 0.22660960257053375, "learning_rate": 7.534382648368616e-05, "loss": 3.111302947998047, "step": 132640 }, { "epoch": 0.26166666666666666, "grad_norm": 0.2128271609544754, "learning_rate": 7.533432372219687e-05, "loss": 3.1131277084350586, "step": 132650 }, { "epoch": 0.2617333333333333, "grad_norm": 0.2140427678823471, "learning_rate": 7.532481972936379e-05, "loss": 3.1226789474487306, "step": 132660 }, { "epoch": 0.2618, "grad_norm": 0.23680883646011353, "learning_rate": 7.531531450564883e-05, "loss": 3.143629264831543, "step": 132670 }, { "epoch": 0.2618666666666667, "grad_norm": 0.20178160071372986, "learning_rate": 7.530580805151398e-05, "loss": 3.077263069152832, "step": 132680 }, { "epoch": 0.26193333333333335, "grad_norm": 0.21379952132701874, "learning_rate": 7.529630036742128e-05, "loss": 3.104401779174805, "step": 132690 }, { "epoch": 0.262, "grad_norm": 0.2088557928800583, "learning_rate": 7.528679145383286e-05, "loss": 3.080929183959961, "step": 132700 }, { "epoch": 0.26206666666666667, "grad_norm": 0.18566477298736572, "learning_rate": 7.52772813112109e-05, "loss": 3.0789779663085937, "step": 132710 }, { "epoch": 0.26213333333333333, "grad_norm": 0.3187240958213806, "learning_rate": 7.526776994001758e-05, "loss": 3.1704906463623046, "step": 132720 }, { "epoch": 0.2622, "grad_norm": 0.4172568619251251, "learning_rate": 7.525825734071523e-05, "loss": 3.333565902709961, "step": 132730 }, { "epoch": 0.26226666666666665, "grad_norm": 0.1987871676683426, "learning_rate": 7.524874351376619e-05, "loss": 3.0943756103515625, "step": 132740 }, { "epoch": 0.2623333333333333, "grad_norm": 0.23481807112693787, "learning_rate": 7.523922845963285e-05, "loss": 3.081447982788086, "step": 132750 }, { "epoch": 0.2624, "grad_norm": 0.21594534814357758, "learning_rate": 7.52297121787777e-05, "loss": 3.1279937744140627, "step": 132760 }, { "epoch": 0.2624666666666667, "grad_norm": 0.28253114223480225, "learning_rate": 7.522019467166326e-05, "loss": 3.119947624206543, "step": 132770 }, { "epoch": 0.26253333333333334, "grad_norm": 0.3352331817150116, "learning_rate": 7.521067593875211e-05, "loss": 3.1102542877197266, "step": 132780 }, { "epoch": 0.2626, "grad_norm": 0.1956866979598999, "learning_rate": 7.520115598050693e-05, "loss": 3.1755548477172852, "step": 132790 }, { "epoch": 0.26266666666666666, "grad_norm": 0.19929173588752747, "learning_rate": 7.519163479739036e-05, "loss": 3.1425472259521485, "step": 132800 }, { "epoch": 0.2627333333333333, "grad_norm": 0.2015686333179474, "learning_rate": 7.518211238986521e-05, "loss": 3.127678108215332, "step": 132810 }, { "epoch": 0.2628, "grad_norm": 0.20105239748954773, "learning_rate": 7.517258875839431e-05, "loss": 3.1200773239135744, "step": 132820 }, { "epoch": 0.2628666666666667, "grad_norm": 0.18759244680404663, "learning_rate": 7.516306390344052e-05, "loss": 3.075087547302246, "step": 132830 }, { "epoch": 0.26293333333333335, "grad_norm": 0.22557255625724792, "learning_rate": 7.515353782546681e-05, "loss": 3.118935775756836, "step": 132840 }, { "epoch": 0.263, "grad_norm": 0.1982867568731308, "learning_rate": 7.514401052493616e-05, "loss": 3.123307228088379, "step": 132850 }, { "epoch": 0.26306666666666667, "grad_norm": 3.092207908630371, "learning_rate": 7.513448200231166e-05, "loss": 3.0654375076293947, "step": 132860 }, { "epoch": 0.26313333333333333, "grad_norm": 0.20948153734207153, "learning_rate": 7.51249522580564e-05, "loss": 3.1319753646850588, "step": 132870 }, { "epoch": 0.2632, "grad_norm": 0.1944577544927597, "learning_rate": 7.511542129263358e-05, "loss": 3.108662414550781, "step": 132880 }, { "epoch": 0.26326666666666665, "grad_norm": 0.19391584396362305, "learning_rate": 7.510588910650643e-05, "loss": 3.114826774597168, "step": 132890 }, { "epoch": 0.2633333333333333, "grad_norm": 0.21516507863998413, "learning_rate": 7.509635570013826e-05, "loss": 3.100732612609863, "step": 132900 }, { "epoch": 0.2634, "grad_norm": 0.2762870788574219, "learning_rate": 7.508682107399243e-05, "loss": 3.0352405548095702, "step": 132910 }, { "epoch": 0.2634666666666667, "grad_norm": 0.20512856543064117, "learning_rate": 7.507728522853236e-05, "loss": 3.181723403930664, "step": 132920 }, { "epoch": 0.26353333333333334, "grad_norm": 0.20049987733364105, "learning_rate": 7.506774816422151e-05, "loss": 3.1425264358520506, "step": 132930 }, { "epoch": 0.2636, "grad_norm": 0.20745477080345154, "learning_rate": 7.505820988152346e-05, "loss": 3.127569389343262, "step": 132940 }, { "epoch": 0.26366666666666666, "grad_norm": 0.20781022310256958, "learning_rate": 7.504867038090175e-05, "loss": 3.155399513244629, "step": 132950 }, { "epoch": 0.2637333333333333, "grad_norm": 0.21094894409179688, "learning_rate": 7.503912966282007e-05, "loss": 3.119636344909668, "step": 132960 }, { "epoch": 0.2638, "grad_norm": 0.2244199961423874, "learning_rate": 7.502958772774213e-05, "loss": 3.0757678985595702, "step": 132970 }, { "epoch": 0.2638666666666667, "grad_norm": 1.0661320686340332, "learning_rate": 7.50200445761317e-05, "loss": 3.0998113632202147, "step": 132980 }, { "epoch": 0.26393333333333335, "grad_norm": 0.30002060532569885, "learning_rate": 7.501050020845262e-05, "loss": 3.08496036529541, "step": 132990 }, { "epoch": 0.264, "grad_norm": 0.20627222955226898, "learning_rate": 7.500095462516878e-05, "loss": 3.118402671813965, "step": 133000 }, { "epoch": 0.26406666666666667, "grad_norm": 0.2056003212928772, "learning_rate": 7.499140782674414e-05, "loss": 3.0862831115722655, "step": 133010 }, { "epoch": 0.26413333333333333, "grad_norm": 0.1917048990726471, "learning_rate": 7.498185981364268e-05, "loss": 3.110786247253418, "step": 133020 }, { "epoch": 0.2642, "grad_norm": 0.254085510969162, "learning_rate": 7.49723105863285e-05, "loss": 3.096898078918457, "step": 133030 }, { "epoch": 0.26426666666666665, "grad_norm": 0.24820265173912048, "learning_rate": 7.496276014526572e-05, "loss": 3.1158050537109374, "step": 133040 }, { "epoch": 0.2643333333333333, "grad_norm": 0.20706363022327423, "learning_rate": 7.495320849091853e-05, "loss": 3.119660568237305, "step": 133050 }, { "epoch": 0.2644, "grad_norm": 0.19205087423324585, "learning_rate": 7.494365562375116e-05, "loss": 3.1060197830200194, "step": 133060 }, { "epoch": 0.2644666666666667, "grad_norm": 0.1933826357126236, "learning_rate": 7.493410154422793e-05, "loss": 3.096670150756836, "step": 133070 }, { "epoch": 0.26453333333333334, "grad_norm": 0.25355586409568787, "learning_rate": 7.49245462528132e-05, "loss": 3.077116584777832, "step": 133080 }, { "epoch": 0.2646, "grad_norm": 0.2868587374687195, "learning_rate": 7.49149897499714e-05, "loss": 3.1009883880615234, "step": 133090 }, { "epoch": 0.26466666666666666, "grad_norm": 0.21626774966716766, "learning_rate": 7.4905432036167e-05, "loss": 3.112152671813965, "step": 133100 }, { "epoch": 0.2647333333333333, "grad_norm": 0.21215620636940002, "learning_rate": 7.489587311186457e-05, "loss": 3.0804553985595704, "step": 133110 }, { "epoch": 0.2648, "grad_norm": 0.22820328176021576, "learning_rate": 7.488631297752866e-05, "loss": 3.1313749313354493, "step": 133120 }, { "epoch": 0.26486666666666664, "grad_norm": 0.1957819163799286, "learning_rate": 7.487675163362397e-05, "loss": 3.043170928955078, "step": 133130 }, { "epoch": 0.26493333333333335, "grad_norm": 0.20300862193107605, "learning_rate": 7.48671890806152e-05, "loss": 3.0658599853515627, "step": 133140 }, { "epoch": 0.265, "grad_norm": 0.18417109549045563, "learning_rate": 7.485762531896714e-05, "loss": 3.085042190551758, "step": 133150 }, { "epoch": 0.2650666666666667, "grad_norm": 0.19986990094184875, "learning_rate": 7.48480603491446e-05, "loss": 3.0879940032958983, "step": 133160 }, { "epoch": 0.26513333333333333, "grad_norm": 0.2315804362297058, "learning_rate": 7.48384941716125e-05, "loss": 3.0464765548706056, "step": 133170 }, { "epoch": 0.2652, "grad_norm": 0.19167311489582062, "learning_rate": 7.482892678683577e-05, "loss": 3.1310157775878906, "step": 133180 }, { "epoch": 0.26526666666666665, "grad_norm": 0.20256644487380981, "learning_rate": 7.481935819527946e-05, "loss": 3.166621208190918, "step": 133190 }, { "epoch": 0.2653333333333333, "grad_norm": 0.1928977519273758, "learning_rate": 7.48097883974086e-05, "loss": 3.0995121002197266, "step": 133200 }, { "epoch": 0.2654, "grad_norm": 0.21286746859550476, "learning_rate": 7.480021739368831e-05, "loss": 3.125872039794922, "step": 133210 }, { "epoch": 0.2654666666666667, "grad_norm": 0.21209055185317993, "learning_rate": 7.479064518458381e-05, "loss": 3.0546735763549804, "step": 133220 }, { "epoch": 0.26553333333333334, "grad_norm": 0.26824653148651123, "learning_rate": 7.478107177056033e-05, "loss": 3.102033233642578, "step": 133230 }, { "epoch": 0.2656, "grad_norm": 0.19953741133213043, "learning_rate": 7.477149715208318e-05, "loss": 3.1017181396484377, "step": 133240 }, { "epoch": 0.26566666666666666, "grad_norm": 0.23043522238731384, "learning_rate": 7.476192132961773e-05, "loss": 3.108574104309082, "step": 133250 }, { "epoch": 0.2657333333333333, "grad_norm": 0.18925504386425018, "learning_rate": 7.475234430362937e-05, "loss": 3.084564781188965, "step": 133260 }, { "epoch": 0.2658, "grad_norm": 0.2281927615404129, "learning_rate": 7.474276607458361e-05, "loss": 3.1782033920288084, "step": 133270 }, { "epoch": 0.26586666666666664, "grad_norm": 0.21876119077205658, "learning_rate": 7.473318664294599e-05, "loss": 3.1257482528686524, "step": 133280 }, { "epoch": 0.26593333333333335, "grad_norm": 0.2086677849292755, "learning_rate": 7.472360600918208e-05, "loss": 3.083843994140625, "step": 133290 }, { "epoch": 0.266, "grad_norm": 0.2142460197210312, "learning_rate": 7.471402417375755e-05, "loss": 3.0105236053466795, "step": 133300 }, { "epoch": 0.2660666666666667, "grad_norm": 0.22230127453804016, "learning_rate": 7.470444113713811e-05, "loss": 3.1816381454467773, "step": 133310 }, { "epoch": 0.26613333333333333, "grad_norm": 0.27080923318862915, "learning_rate": 7.469485689978954e-05, "loss": 3.171385955810547, "step": 133320 }, { "epoch": 0.2662, "grad_norm": 0.208296537399292, "learning_rate": 7.468527146217768e-05, "loss": 3.1139570236206056, "step": 133330 }, { "epoch": 0.26626666666666665, "grad_norm": 0.2906053066253662, "learning_rate": 7.467568482476837e-05, "loss": 3.126170349121094, "step": 133340 }, { "epoch": 0.2663333333333333, "grad_norm": 0.19445247948169708, "learning_rate": 7.466609698802761e-05, "loss": 3.0475101470947266, "step": 133350 }, { "epoch": 0.2664, "grad_norm": 0.23922765254974365, "learning_rate": 7.465650795242139e-05, "loss": 3.10037784576416, "step": 133360 }, { "epoch": 0.2664666666666667, "grad_norm": 0.2661675810813904, "learning_rate": 7.464691771841576e-05, "loss": 3.1025699615478515, "step": 133370 }, { "epoch": 0.26653333333333334, "grad_norm": 0.2107594609260559, "learning_rate": 7.463732628647687e-05, "loss": 3.1544370651245117, "step": 133380 }, { "epoch": 0.2666, "grad_norm": 0.20045825839042664, "learning_rate": 7.462773365707085e-05, "loss": 3.1570764541625977, "step": 133390 }, { "epoch": 0.26666666666666666, "grad_norm": 0.41731807589530945, "learning_rate": 7.461813983066398e-05, "loss": 3.1288843154907227, "step": 133400 }, { "epoch": 0.2667333333333333, "grad_norm": 0.23598088324069977, "learning_rate": 7.460854480772255e-05, "loss": 3.0999420166015623, "step": 133410 }, { "epoch": 0.2668, "grad_norm": 0.21479061245918274, "learning_rate": 7.45989485887129e-05, "loss": 3.112569808959961, "step": 133420 }, { "epoch": 0.26686666666666664, "grad_norm": 0.1984972357749939, "learning_rate": 7.458935117410146e-05, "loss": 3.1168575286865234, "step": 133430 }, { "epoch": 0.26693333333333336, "grad_norm": 0.1953693926334381, "learning_rate": 7.45797525643547e-05, "loss": 3.1515764236450194, "step": 133440 }, { "epoch": 0.267, "grad_norm": 0.1857457011938095, "learning_rate": 7.457015275993912e-05, "loss": 2.9669994354248046, "step": 133450 }, { "epoch": 0.2670666666666667, "grad_norm": 0.19327855110168457, "learning_rate": 7.456055176132134e-05, "loss": 3.1438608169555664, "step": 133460 }, { "epoch": 0.26713333333333333, "grad_norm": 0.20275767147541046, "learning_rate": 7.455094956896798e-05, "loss": 3.186603546142578, "step": 133470 }, { "epoch": 0.2672, "grad_norm": 0.19151173532009125, "learning_rate": 7.454134618334576e-05, "loss": 3.204872894287109, "step": 133480 }, { "epoch": 0.26726666666666665, "grad_norm": 0.3338609039783478, "learning_rate": 7.453174160492144e-05, "loss": 3.1510993957519533, "step": 133490 }, { "epoch": 0.2673333333333333, "grad_norm": 0.18796168267726898, "learning_rate": 7.452213583416183e-05, "loss": 3.162104034423828, "step": 133500 }, { "epoch": 0.2674, "grad_norm": 0.17908744513988495, "learning_rate": 7.451252887153381e-05, "loss": 3.095016288757324, "step": 133510 }, { "epoch": 0.2674666666666667, "grad_norm": 0.2017337530851364, "learning_rate": 7.450292071750433e-05, "loss": 3.112526512145996, "step": 133520 }, { "epoch": 0.26753333333333335, "grad_norm": 0.20294135808944702, "learning_rate": 7.449331137254036e-05, "loss": 3.140985107421875, "step": 133530 }, { "epoch": 0.2676, "grad_norm": 0.18939770758152008, "learning_rate": 7.448370083710897e-05, "loss": 3.113382911682129, "step": 133540 }, { "epoch": 0.26766666666666666, "grad_norm": 0.7951088547706604, "learning_rate": 7.447408911167723e-05, "loss": 3.0747913360595702, "step": 133550 }, { "epoch": 0.2677333333333333, "grad_norm": 0.1956586390733719, "learning_rate": 7.446447619671235e-05, "loss": 3.1155683517456056, "step": 133560 }, { "epoch": 0.2678, "grad_norm": 0.32875099778175354, "learning_rate": 7.445486209268153e-05, "loss": 3.049757385253906, "step": 133570 }, { "epoch": 0.26786666666666664, "grad_norm": 0.2088843286037445, "learning_rate": 7.444524680005207e-05, "loss": 3.1631534576416014, "step": 133580 }, { "epoch": 0.26793333333333336, "grad_norm": 0.1964053511619568, "learning_rate": 7.44356303192913e-05, "loss": 3.053297233581543, "step": 133590 }, { "epoch": 0.268, "grad_norm": 0.18201935291290283, "learning_rate": 7.442601265086661e-05, "loss": 3.1159088134765627, "step": 133600 }, { "epoch": 0.2680666666666667, "grad_norm": 0.3498184084892273, "learning_rate": 7.441639379524545e-05, "loss": 3.171310043334961, "step": 133610 }, { "epoch": 0.26813333333333333, "grad_norm": 0.204649418592453, "learning_rate": 7.440677375289535e-05, "loss": 3.1913713455200194, "step": 133620 }, { "epoch": 0.2682, "grad_norm": 0.19182538986206055, "learning_rate": 7.439715252428389e-05, "loss": 3.1507522583007814, "step": 133630 }, { "epoch": 0.26826666666666665, "grad_norm": 0.19924664497375488, "learning_rate": 7.438753010987867e-05, "loss": 3.0985157012939455, "step": 133640 }, { "epoch": 0.2683333333333333, "grad_norm": 0.1993686705827713, "learning_rate": 7.437790651014738e-05, "loss": 3.1182737350463867, "step": 133650 }, { "epoch": 0.2684, "grad_norm": 0.24974462389945984, "learning_rate": 7.436828172555778e-05, "loss": 3.0732051849365236, "step": 133660 }, { "epoch": 0.2684666666666667, "grad_norm": 0.20657570660114288, "learning_rate": 7.435865575657766e-05, "loss": 3.050934982299805, "step": 133670 }, { "epoch": 0.26853333333333335, "grad_norm": 0.191284641623497, "learning_rate": 7.434902860367488e-05, "loss": 3.110609436035156, "step": 133680 }, { "epoch": 0.2686, "grad_norm": 0.20239923894405365, "learning_rate": 7.433940026731735e-05, "loss": 3.090116500854492, "step": 133690 }, { "epoch": 0.26866666666666666, "grad_norm": 0.2143649309873581, "learning_rate": 7.432977074797305e-05, "loss": 3.0842666625976562, "step": 133700 }, { "epoch": 0.2687333333333333, "grad_norm": 0.19850626587867737, "learning_rate": 7.432014004611001e-05, "loss": 3.046321678161621, "step": 133710 }, { "epoch": 0.2688, "grad_norm": 0.2217162549495697, "learning_rate": 7.431050816219633e-05, "loss": 3.121613883972168, "step": 133720 }, { "epoch": 0.26886666666666664, "grad_norm": 0.20359677076339722, "learning_rate": 7.430087509670014e-05, "loss": 3.0983417510986326, "step": 133730 }, { "epoch": 0.26893333333333336, "grad_norm": 0.19187843799591064, "learning_rate": 7.429124085008965e-05, "loss": 3.0802947998046877, "step": 133740 }, { "epoch": 0.269, "grad_norm": 0.2135349065065384, "learning_rate": 7.428160542283311e-05, "loss": 3.1037673950195312, "step": 133750 }, { "epoch": 0.2690666666666667, "grad_norm": 0.21584221720695496, "learning_rate": 7.427196881539887e-05, "loss": 3.1136171340942385, "step": 133760 }, { "epoch": 0.26913333333333334, "grad_norm": 0.35736557841300964, "learning_rate": 7.426233102825528e-05, "loss": 3.2825218200683595, "step": 133770 }, { "epoch": 0.2692, "grad_norm": 0.22493433952331543, "learning_rate": 7.425269206187075e-05, "loss": 3.1122116088867187, "step": 133780 }, { "epoch": 0.26926666666666665, "grad_norm": 0.2228381335735321, "learning_rate": 7.424305191671381e-05, "loss": 3.0855968475341795, "step": 133790 }, { "epoch": 0.2693333333333333, "grad_norm": 0.1933923363685608, "learning_rate": 7.423341059325299e-05, "loss": 3.1511131286621095, "step": 133800 }, { "epoch": 0.2694, "grad_norm": 0.23663592338562012, "learning_rate": 7.422376809195691e-05, "loss": 3.1773248672485352, "step": 133810 }, { "epoch": 0.2694666666666667, "grad_norm": 0.17536406219005585, "learning_rate": 7.421412441329422e-05, "loss": 3.05360107421875, "step": 133820 }, { "epoch": 0.26953333333333335, "grad_norm": 0.27041831612586975, "learning_rate": 7.420447955773362e-05, "loss": 3.3474239349365233, "step": 133830 }, { "epoch": 0.2696, "grad_norm": 13.269535064697266, "learning_rate": 7.419483352574394e-05, "loss": 6.03665771484375, "step": 133840 }, { "epoch": 0.26966666666666667, "grad_norm": 0.2497134655714035, "learning_rate": 7.418518631779395e-05, "loss": 5.275164031982422, "step": 133850 }, { "epoch": 0.2697333333333333, "grad_norm": 0.24080006778240204, "learning_rate": 7.417553793435258e-05, "loss": 3.0855052947998045, "step": 133860 }, { "epoch": 0.2698, "grad_norm": 0.19015109539031982, "learning_rate": 7.416588837588875e-05, "loss": 3.1045516967773437, "step": 133870 }, { "epoch": 0.26986666666666664, "grad_norm": 0.1921290010213852, "learning_rate": 7.415623764287151e-05, "loss": 3.1313331604003904, "step": 133880 }, { "epoch": 0.26993333333333336, "grad_norm": 0.2821393311023712, "learning_rate": 7.414658573576988e-05, "loss": 3.1435077667236326, "step": 133890 }, { "epoch": 0.27, "grad_norm": 0.2047576755285263, "learning_rate": 7.413693265505301e-05, "loss": 3.1506818771362304, "step": 133900 }, { "epoch": 0.2700666666666667, "grad_norm": 0.24451269209384918, "learning_rate": 7.412727840119003e-05, "loss": 3.1250373840332033, "step": 133910 }, { "epoch": 0.27013333333333334, "grad_norm": 0.1941368132829666, "learning_rate": 7.411762297465024e-05, "loss": 3.106261444091797, "step": 133920 }, { "epoch": 0.2702, "grad_norm": 0.20134776830673218, "learning_rate": 7.410796637590287e-05, "loss": 3.084756851196289, "step": 133930 }, { "epoch": 0.27026666666666666, "grad_norm": 0.23843252658843994, "learning_rate": 7.40983086054173e-05, "loss": 3.119174003601074, "step": 133940 }, { "epoch": 0.2703333333333333, "grad_norm": 0.21384552121162415, "learning_rate": 7.408864966366293e-05, "loss": 3.0664669036865235, "step": 133950 }, { "epoch": 0.2704, "grad_norm": 0.24186080694198608, "learning_rate": 7.40789895511092e-05, "loss": 3.030487632751465, "step": 133960 }, { "epoch": 0.2704666666666667, "grad_norm": 0.21141204237937927, "learning_rate": 7.406932826822564e-05, "loss": 3.0483020782470702, "step": 133970 }, { "epoch": 0.27053333333333335, "grad_norm": 0.19125162065029144, "learning_rate": 7.405966581548185e-05, "loss": 3.1303379058837892, "step": 133980 }, { "epoch": 0.2706, "grad_norm": 0.19534151256084442, "learning_rate": 7.405000219334743e-05, "loss": 3.112082290649414, "step": 133990 }, { "epoch": 0.27066666666666667, "grad_norm": 0.19458423554897308, "learning_rate": 7.404033740229208e-05, "loss": 3.07997989654541, "step": 134000 }, { "epoch": 0.2707333333333333, "grad_norm": 0.2016034722328186, "learning_rate": 7.403067144278555e-05, "loss": 3.078660011291504, "step": 134010 }, { "epoch": 0.2708, "grad_norm": 0.25284600257873535, "learning_rate": 7.402100431529763e-05, "loss": 3.0510269165039063, "step": 134020 }, { "epoch": 0.27086666666666664, "grad_norm": 0.1904573142528534, "learning_rate": 7.40113360202982e-05, "loss": 3.179186248779297, "step": 134030 }, { "epoch": 0.27093333333333336, "grad_norm": 0.23069605231285095, "learning_rate": 7.400166655825713e-05, "loss": 3.1008575439453123, "step": 134040 }, { "epoch": 0.271, "grad_norm": 0.19449234008789062, "learning_rate": 7.399199592964445e-05, "loss": 3.0918739318847654, "step": 134050 }, { "epoch": 0.2710666666666667, "grad_norm": 0.23147566616535187, "learning_rate": 7.398232413493014e-05, "loss": 3.059198570251465, "step": 134060 }, { "epoch": 0.27113333333333334, "grad_norm": 0.22112716734409332, "learning_rate": 7.397265117458432e-05, "loss": 3.0894275665283204, "step": 134070 }, { "epoch": 0.2712, "grad_norm": 0.1998993158340454, "learning_rate": 7.396297704907714e-05, "loss": 3.06519775390625, "step": 134080 }, { "epoch": 0.27126666666666666, "grad_norm": 0.19499999284744263, "learning_rate": 7.395330175887875e-05, "loss": 3.079549026489258, "step": 134090 }, { "epoch": 0.2713333333333333, "grad_norm": 0.21091867983341217, "learning_rate": 7.394362530445945e-05, "loss": 3.1021934509277345, "step": 134100 }, { "epoch": 0.2714, "grad_norm": 0.4076251685619354, "learning_rate": 7.393394768628954e-05, "loss": 3.0671525955200196, "step": 134110 }, { "epoch": 0.2714666666666667, "grad_norm": 0.24760350584983826, "learning_rate": 7.392426890483937e-05, "loss": 3.0737625122070313, "step": 134120 }, { "epoch": 0.27153333333333335, "grad_norm": 0.24694116413593292, "learning_rate": 7.39145889605794e-05, "loss": 3.057008171081543, "step": 134130 }, { "epoch": 0.2716, "grad_norm": 0.2502225935459137, "learning_rate": 7.390490785398009e-05, "loss": 3.064742851257324, "step": 134140 }, { "epoch": 0.27166666666666667, "grad_norm": 0.2134014219045639, "learning_rate": 7.389522558551198e-05, "loss": 3.132498550415039, "step": 134150 }, { "epoch": 0.2717333333333333, "grad_norm": 0.2023645043373108, "learning_rate": 7.388554215564567e-05, "loss": 3.116108703613281, "step": 134160 }, { "epoch": 0.2718, "grad_norm": 0.1918283998966217, "learning_rate": 7.38758575648518e-05, "loss": 3.0535709381103517, "step": 134170 }, { "epoch": 0.27186666666666665, "grad_norm": 0.1949814110994339, "learning_rate": 7.38661718136011e-05, "loss": 3.157307815551758, "step": 134180 }, { "epoch": 0.27193333333333336, "grad_norm": 0.24040280282497406, "learning_rate": 7.38564849023643e-05, "loss": 3.081077003479004, "step": 134190 }, { "epoch": 0.272, "grad_norm": 0.23605339229106903, "learning_rate": 7.384679683161225e-05, "loss": 3.1000900268554688, "step": 134200 }, { "epoch": 0.2720666666666667, "grad_norm": 0.29754704236984253, "learning_rate": 7.383710760181581e-05, "loss": 3.285557174682617, "step": 134210 }, { "epoch": 0.27213333333333334, "grad_norm": 0.20965149998664856, "learning_rate": 7.382741721344594e-05, "loss": 3.104419708251953, "step": 134220 }, { "epoch": 0.2722, "grad_norm": 0.18226826190948486, "learning_rate": 7.381772566697359e-05, "loss": 3.0643503189086916, "step": 134230 }, { "epoch": 0.27226666666666666, "grad_norm": 0.1924256831407547, "learning_rate": 7.380803296286984e-05, "loss": 3.052046012878418, "step": 134240 }, { "epoch": 0.2723333333333333, "grad_norm": 0.18970367312431335, "learning_rate": 7.379833910160578e-05, "loss": 3.093863868713379, "step": 134250 }, { "epoch": 0.2724, "grad_norm": 0.21271127462387085, "learning_rate": 7.378864408365255e-05, "loss": 3.098672866821289, "step": 134260 }, { "epoch": 0.2724666666666667, "grad_norm": 0.3401971161365509, "learning_rate": 7.377894790948139e-05, "loss": 3.148152542114258, "step": 134270 }, { "epoch": 0.27253333333333335, "grad_norm": 0.21635308861732483, "learning_rate": 7.376925057956355e-05, "loss": 3.0935964584350586, "step": 134280 }, { "epoch": 0.2726, "grad_norm": 0.2198425531387329, "learning_rate": 7.375955209437038e-05, "loss": 3.1023962020874025, "step": 134290 }, { "epoch": 0.27266666666666667, "grad_norm": 0.21156102418899536, "learning_rate": 7.374985245437323e-05, "loss": 3.104592132568359, "step": 134300 }, { "epoch": 0.2727333333333333, "grad_norm": 0.21053335070610046, "learning_rate": 7.374015166004358e-05, "loss": 3.121542739868164, "step": 134310 }, { "epoch": 0.2728, "grad_norm": 0.1968613862991333, "learning_rate": 7.373044971185291e-05, "loss": 3.1498128890991213, "step": 134320 }, { "epoch": 0.27286666666666665, "grad_norm": 0.1989702731370926, "learning_rate": 7.372074661027276e-05, "loss": 3.124240684509277, "step": 134330 }, { "epoch": 0.2729333333333333, "grad_norm": 0.27111825346946716, "learning_rate": 7.371104235577474e-05, "loss": 3.1046321868896483, "step": 134340 }, { "epoch": 0.273, "grad_norm": 0.19378654658794403, "learning_rate": 7.370133694883051e-05, "loss": 3.048709678649902, "step": 134350 }, { "epoch": 0.2730666666666667, "grad_norm": 0.18691390752792358, "learning_rate": 7.36916303899118e-05, "loss": 3.10750675201416, "step": 134360 }, { "epoch": 0.27313333333333334, "grad_norm": 0.2201625555753708, "learning_rate": 7.368192267949039e-05, "loss": 3.107248878479004, "step": 134370 }, { "epoch": 0.2732, "grad_norm": 0.20307864248752594, "learning_rate": 7.36722138180381e-05, "loss": 3.144384765625, "step": 134380 }, { "epoch": 0.27326666666666666, "grad_norm": 0.197172611951828, "learning_rate": 7.366250380602682e-05, "loss": 3.0581939697265623, "step": 134390 }, { "epoch": 0.2733333333333333, "grad_norm": 0.20068129897117615, "learning_rate": 7.36527926439285e-05, "loss": 3.0725566864013674, "step": 134400 }, { "epoch": 0.2734, "grad_norm": 0.18621718883514404, "learning_rate": 7.364308033221512e-05, "loss": 3.1021514892578126, "step": 134410 }, { "epoch": 0.2734666666666667, "grad_norm": 0.2860851585865021, "learning_rate": 7.363336687135875e-05, "loss": 3.0683385848999025, "step": 134420 }, { "epoch": 0.27353333333333335, "grad_norm": 0.22512899339199066, "learning_rate": 7.362365226183152e-05, "loss": 3.099903678894043, "step": 134430 }, { "epoch": 0.2736, "grad_norm": 0.20756562054157257, "learning_rate": 7.361393650410555e-05, "loss": 3.0647819519042967, "step": 134440 }, { "epoch": 0.27366666666666667, "grad_norm": 0.21397359669208527, "learning_rate": 7.360421959865311e-05, "loss": 3.1037654876708984, "step": 134450 }, { "epoch": 0.27373333333333333, "grad_norm": 0.26557862758636475, "learning_rate": 7.359450154594644e-05, "loss": 3.126969909667969, "step": 134460 }, { "epoch": 0.2738, "grad_norm": 0.21124398708343506, "learning_rate": 7.35847823464579e-05, "loss": 3.111252212524414, "step": 134470 }, { "epoch": 0.27386666666666665, "grad_norm": 0.2068970650434494, "learning_rate": 7.357506200065987e-05, "loss": 3.174595069885254, "step": 134480 }, { "epoch": 0.2739333333333333, "grad_norm": 0.19095183908939362, "learning_rate": 7.356534050902479e-05, "loss": 3.0438385009765625, "step": 134490 }, { "epoch": 0.274, "grad_norm": 0.20683947205543518, "learning_rate": 7.355561787202519e-05, "loss": 3.097141456604004, "step": 134500 }, { "epoch": 0.2740666666666667, "grad_norm": 0.2336668223142624, "learning_rate": 7.354589409013357e-05, "loss": 3.3810089111328123, "step": 134510 }, { "epoch": 0.27413333333333334, "grad_norm": 0.20158912241458893, "learning_rate": 7.35361691638226e-05, "loss": 3.1220523834228517, "step": 134520 }, { "epoch": 0.2742, "grad_norm": 0.1926807314157486, "learning_rate": 7.352644309356492e-05, "loss": 3.0901767730712892, "step": 134530 }, { "epoch": 0.27426666666666666, "grad_norm": 0.21388843655586243, "learning_rate": 7.351671587983324e-05, "loss": 3.036776542663574, "step": 134540 }, { "epoch": 0.2743333333333333, "grad_norm": 0.19268257915973663, "learning_rate": 7.350698752310037e-05, "loss": 3.1131452560424804, "step": 134550 }, { "epoch": 0.2744, "grad_norm": 0.20978009700775146, "learning_rate": 7.349725802383914e-05, "loss": 3.105458068847656, "step": 134560 }, { "epoch": 0.2744666666666667, "grad_norm": 0.18905165791511536, "learning_rate": 7.348752738252244e-05, "loss": 3.130253219604492, "step": 134570 }, { "epoch": 0.27453333333333335, "grad_norm": 0.2142236828804016, "learning_rate": 7.34777955996232e-05, "loss": 3.090705680847168, "step": 134580 }, { "epoch": 0.2746, "grad_norm": 0.29366472363471985, "learning_rate": 7.346806267561444e-05, "loss": 3.1377573013305664, "step": 134590 }, { "epoch": 0.27466666666666667, "grad_norm": 0.261595219373703, "learning_rate": 7.345832861096918e-05, "loss": 3.1011077880859377, "step": 134600 }, { "epoch": 0.27473333333333333, "grad_norm": 0.30754294991493225, "learning_rate": 7.344859340616059e-05, "loss": 3.1221046447753906, "step": 134610 }, { "epoch": 0.2748, "grad_norm": 0.21175730228424072, "learning_rate": 7.343885706166178e-05, "loss": 3.0796236038208007, "step": 134620 }, { "epoch": 0.27486666666666665, "grad_norm": 0.2154357135295868, "learning_rate": 7.342911957794602e-05, "loss": 3.1025903701782225, "step": 134630 }, { "epoch": 0.2749333333333333, "grad_norm": 0.1937161386013031, "learning_rate": 7.341938095548657e-05, "loss": 3.1182439804077147, "step": 134640 }, { "epoch": 0.275, "grad_norm": 0.19995974004268646, "learning_rate": 7.340964119475676e-05, "loss": 3.103485870361328, "step": 134650 }, { "epoch": 0.2750666666666667, "grad_norm": 0.18674497306346893, "learning_rate": 7.339990029622997e-05, "loss": 3.05023250579834, "step": 134660 }, { "epoch": 0.27513333333333334, "grad_norm": 0.24366851150989532, "learning_rate": 7.339015826037967e-05, "loss": 3.1528688430786134, "step": 134670 }, { "epoch": 0.2752, "grad_norm": 0.22504529356956482, "learning_rate": 7.338041508767934e-05, "loss": 3.080894660949707, "step": 134680 }, { "epoch": 0.27526666666666666, "grad_norm": 0.19625116884708405, "learning_rate": 7.337067077860254e-05, "loss": 3.1360904693603517, "step": 134690 }, { "epoch": 0.2753333333333333, "grad_norm": 0.21911031007766724, "learning_rate": 7.336092533362288e-05, "loss": 3.067639923095703, "step": 134700 }, { "epoch": 0.2754, "grad_norm": 0.2038031369447708, "learning_rate": 7.335117875321402e-05, "loss": 3.1099891662597656, "step": 134710 }, { "epoch": 0.2754666666666667, "grad_norm": 0.22058255970478058, "learning_rate": 7.33414310378497e-05, "loss": 3.0550409317016602, "step": 134720 }, { "epoch": 0.27553333333333335, "grad_norm": 0.19328975677490234, "learning_rate": 7.33316821880037e-05, "loss": 3.0823444366455077, "step": 134730 }, { "epoch": 0.2756, "grad_norm": 0.20407716929912567, "learning_rate": 7.33219322041498e-05, "loss": 3.0930912017822267, "step": 134740 }, { "epoch": 0.27566666666666667, "grad_norm": 0.19886519014835358, "learning_rate": 7.331218108676194e-05, "loss": 3.0671642303466795, "step": 134750 }, { "epoch": 0.27573333333333333, "grad_norm": 0.1958458572626114, "learning_rate": 7.330242883631403e-05, "loss": 3.0615156173706053, "step": 134760 }, { "epoch": 0.2758, "grad_norm": 0.4259364604949951, "learning_rate": 7.329267545328008e-05, "loss": 3.2334251403808594, "step": 134770 }, { "epoch": 0.27586666666666665, "grad_norm": 0.20543938875198364, "learning_rate": 7.328292093813414e-05, "loss": 3.1181428909301756, "step": 134780 }, { "epoch": 0.2759333333333333, "grad_norm": 0.20845936238765717, "learning_rate": 7.327316529135032e-05, "loss": 3.116670036315918, "step": 134790 }, { "epoch": 0.276, "grad_norm": 0.19170987606048584, "learning_rate": 7.326340851340276e-05, "loss": 3.0764705657958986, "step": 134800 }, { "epoch": 0.2760666666666667, "grad_norm": 0.18155130743980408, "learning_rate": 7.325365060476571e-05, "loss": 3.0689504623413084, "step": 134810 }, { "epoch": 0.27613333333333334, "grad_norm": 0.19840967655181885, "learning_rate": 7.324389156591343e-05, "loss": 3.1909130096435545, "step": 134820 }, { "epoch": 0.2762, "grad_norm": 0.19265837967395782, "learning_rate": 7.323413139732021e-05, "loss": 3.0820133209228517, "step": 134830 }, { "epoch": 0.27626666666666666, "grad_norm": 0.2234729379415512, "learning_rate": 7.322437009946048e-05, "loss": 3.10168399810791, "step": 134840 }, { "epoch": 0.2763333333333333, "grad_norm": 0.1887475997209549, "learning_rate": 7.321460767280867e-05, "loss": 3.107189750671387, "step": 134850 }, { "epoch": 0.2764, "grad_norm": 0.246076762676239, "learning_rate": 7.320484411783924e-05, "loss": 3.0398956298828126, "step": 134860 }, { "epoch": 0.2764666666666667, "grad_norm": 0.215611070394516, "learning_rate": 7.319507943502675e-05, "loss": 3.1660173416137694, "step": 134870 }, { "epoch": 0.27653333333333335, "grad_norm": 0.2145218700170517, "learning_rate": 7.31853136248458e-05, "loss": 3.0788217544555665, "step": 134880 }, { "epoch": 0.2766, "grad_norm": 0.20573213696479797, "learning_rate": 7.317554668777108e-05, "loss": 3.051458549499512, "step": 134890 }, { "epoch": 0.27666666666666667, "grad_norm": 0.20435939729213715, "learning_rate": 7.316577862427724e-05, "loss": 3.2715526580810548, "step": 134900 }, { "epoch": 0.27673333333333333, "grad_norm": 0.23743869364261627, "learning_rate": 7.315600943483909e-05, "loss": 3.047878074645996, "step": 134910 }, { "epoch": 0.2768, "grad_norm": 0.19719374179840088, "learning_rate": 7.314623911993142e-05, "loss": 3.0448740005493162, "step": 134920 }, { "epoch": 0.27686666666666665, "grad_norm": 0.20045331120491028, "learning_rate": 7.313646768002913e-05, "loss": 3.114451599121094, "step": 134930 }, { "epoch": 0.2769333333333333, "grad_norm": 0.23126038908958435, "learning_rate": 7.312669511560713e-05, "loss": 3.0455894470214844, "step": 134940 }, { "epoch": 0.277, "grad_norm": 0.23679879307746887, "learning_rate": 7.311692142714041e-05, "loss": 3.0630767822265623, "step": 134950 }, { "epoch": 0.2770666666666667, "grad_norm": 0.20713621377944946, "learning_rate": 7.3107146615104e-05, "loss": 3.111878204345703, "step": 134960 }, { "epoch": 0.27713333333333334, "grad_norm": 0.20014235377311707, "learning_rate": 7.309737067997303e-05, "loss": 3.062550735473633, "step": 134970 }, { "epoch": 0.2772, "grad_norm": 0.20272691547870636, "learning_rate": 7.30875936222226e-05, "loss": 3.051674461364746, "step": 134980 }, { "epoch": 0.27726666666666666, "grad_norm": 0.18445643782615662, "learning_rate": 7.307781544232791e-05, "loss": 3.1141803741455076, "step": 134990 }, { "epoch": 0.2773333333333333, "grad_norm": 0.19292689859867096, "learning_rate": 7.306803614076425e-05, "loss": 3.1270544052124025, "step": 135000 }, { "epoch": 0.2774, "grad_norm": 0.24535305798053741, "learning_rate": 7.305825571800693e-05, "loss": 3.1001205444335938, "step": 135010 }, { "epoch": 0.27746666666666664, "grad_norm": 0.20619742572307587, "learning_rate": 7.304847417453129e-05, "loss": 3.1294504165649415, "step": 135020 }, { "epoch": 0.27753333333333335, "grad_norm": 0.19140364229679108, "learning_rate": 7.303869151081278e-05, "loss": 3.1339330673217773, "step": 135030 }, { "epoch": 0.2776, "grad_norm": 0.20656096935272217, "learning_rate": 7.302890772732684e-05, "loss": 3.237970733642578, "step": 135040 }, { "epoch": 0.2776666666666667, "grad_norm": 0.23879766464233398, "learning_rate": 7.301912282454902e-05, "loss": 3.095878791809082, "step": 135050 }, { "epoch": 0.27773333333333333, "grad_norm": 0.1913091391324997, "learning_rate": 7.30093368029549e-05, "loss": 3.1678558349609376, "step": 135060 }, { "epoch": 0.2778, "grad_norm": 0.21187421679496765, "learning_rate": 7.299954966302012e-05, "loss": 3.1048383712768555, "step": 135070 }, { "epoch": 0.27786666666666665, "grad_norm": 0.2017248570919037, "learning_rate": 7.298976140522037e-05, "loss": 3.077295112609863, "step": 135080 }, { "epoch": 0.2779333333333333, "grad_norm": 0.363299697637558, "learning_rate": 7.297997203003138e-05, "loss": 3.1285730361938477, "step": 135090 }, { "epoch": 0.278, "grad_norm": 0.20892050862312317, "learning_rate": 7.297018153792898e-05, "loss": 3.1025869369506838, "step": 135100 }, { "epoch": 0.2780666666666667, "grad_norm": 0.2741938531398773, "learning_rate": 7.296038992938902e-05, "loss": 3.081759452819824, "step": 135110 }, { "epoch": 0.27813333333333334, "grad_norm": 0.23444442451000214, "learning_rate": 7.295059720488739e-05, "loss": 3.0927349090576173, "step": 135120 }, { "epoch": 0.2782, "grad_norm": 0.2294454127550125, "learning_rate": 7.294080336490005e-05, "loss": 3.073099136352539, "step": 135130 }, { "epoch": 0.27826666666666666, "grad_norm": 0.20826095342636108, "learning_rate": 7.293100840990305e-05, "loss": 3.0966537475585936, "step": 135140 }, { "epoch": 0.2783333333333333, "grad_norm": 0.2228473275899887, "learning_rate": 7.292121234037243e-05, "loss": 3.1006813049316406, "step": 135150 }, { "epoch": 0.2784, "grad_norm": 0.1958552747964859, "learning_rate": 7.291141515678434e-05, "loss": 3.166895294189453, "step": 135160 }, { "epoch": 0.27846666666666664, "grad_norm": 0.2322738617658615, "learning_rate": 7.290161685961494e-05, "loss": 3.1087202072143554, "step": 135170 }, { "epoch": 0.27853333333333335, "grad_norm": 0.22587135434150696, "learning_rate": 7.289181744934048e-05, "loss": 3.1003589630126953, "step": 135180 }, { "epoch": 0.2786, "grad_norm": 1.2448137998580933, "learning_rate": 7.288201692643723e-05, "loss": 2.9218515396118163, "step": 135190 }, { "epoch": 0.2786666666666667, "grad_norm": 0.6249520778656006, "learning_rate": 7.287221529138157e-05, "loss": 3.0058099746704103, "step": 135200 }, { "epoch": 0.27873333333333333, "grad_norm": 0.20508508384227753, "learning_rate": 7.286241254464984e-05, "loss": 3.0540750503540037, "step": 135210 }, { "epoch": 0.2788, "grad_norm": 0.2091277539730072, "learning_rate": 7.285260868671855e-05, "loss": 3.08648681640625, "step": 135220 }, { "epoch": 0.27886666666666665, "grad_norm": 0.24524623155593872, "learning_rate": 7.284280371806415e-05, "loss": 3.116571617126465, "step": 135230 }, { "epoch": 0.2789333333333333, "grad_norm": 0.23484712839126587, "learning_rate": 7.283299763916324e-05, "loss": 3.6850948333740234, "step": 135240 }, { "epoch": 0.279, "grad_norm": 0.18721537292003632, "learning_rate": 7.28231904504924e-05, "loss": 3.060105323791504, "step": 135250 }, { "epoch": 0.2790666666666667, "grad_norm": 0.19440560042858124, "learning_rate": 7.281338215252833e-05, "loss": 3.1406534194946287, "step": 135260 }, { "epoch": 0.27913333333333334, "grad_norm": 0.19960108399391174, "learning_rate": 7.280357274574772e-05, "loss": 3.1146213531494142, "step": 135270 }, { "epoch": 0.2792, "grad_norm": 0.20562557876110077, "learning_rate": 7.279376223062738e-05, "loss": 3.123423767089844, "step": 135280 }, { "epoch": 0.27926666666666666, "grad_norm": 0.20420007407665253, "learning_rate": 7.278395060764409e-05, "loss": 3.2533847808837892, "step": 135290 }, { "epoch": 0.2793333333333333, "grad_norm": 0.19779573380947113, "learning_rate": 7.277413787727477e-05, "loss": 3.079606628417969, "step": 135300 }, { "epoch": 0.2794, "grad_norm": 0.21706652641296387, "learning_rate": 7.276432403999633e-05, "loss": 3.097688102722168, "step": 135310 }, { "epoch": 0.27946666666666664, "grad_norm": 0.3385089933872223, "learning_rate": 7.275450909628579e-05, "loss": 3.072599983215332, "step": 135320 }, { "epoch": 0.27953333333333336, "grad_norm": 0.2991317808628082, "learning_rate": 7.274469304662017e-05, "loss": 3.127715301513672, "step": 135330 }, { "epoch": 0.2796, "grad_norm": 0.25209322571754456, "learning_rate": 7.273487589147656e-05, "loss": 3.2544570922851563, "step": 135340 }, { "epoch": 0.2796666666666667, "grad_norm": 0.19478070735931396, "learning_rate": 7.272505763133214e-05, "loss": 3.1083347320556642, "step": 135350 }, { "epoch": 0.27973333333333333, "grad_norm": 0.20810149610042572, "learning_rate": 7.271523826666409e-05, "loss": 3.1442737579345703, "step": 135360 }, { "epoch": 0.2798, "grad_norm": 0.2017703354358673, "learning_rate": 7.27054177979497e-05, "loss": 3.0428924560546875, "step": 135370 }, { "epoch": 0.27986666666666665, "grad_norm": 0.20417873561382294, "learning_rate": 7.269559622566623e-05, "loss": 3.127569007873535, "step": 135380 }, { "epoch": 0.2799333333333333, "grad_norm": 0.21798557043075562, "learning_rate": 7.268577355029108e-05, "loss": 3.1303260803222654, "step": 135390 }, { "epoch": 0.28, "grad_norm": 0.18594203889369965, "learning_rate": 7.267594977230166e-05, "loss": 3.1670677185058596, "step": 135400 }, { "epoch": 0.2800666666666667, "grad_norm": 0.18593917787075043, "learning_rate": 7.266612489217546e-05, "loss": 3.171974754333496, "step": 135410 }, { "epoch": 0.28013333333333335, "grad_norm": 0.27888232469558716, "learning_rate": 7.265629891038999e-05, "loss": 3.145647430419922, "step": 135420 }, { "epoch": 0.2802, "grad_norm": 0.1944991648197174, "learning_rate": 7.264647182742282e-05, "loss": 3.088460350036621, "step": 135430 }, { "epoch": 0.28026666666666666, "grad_norm": 0.18983863294124603, "learning_rate": 7.26366436437516e-05, "loss": 3.2194725036621095, "step": 135440 }, { "epoch": 0.2803333333333333, "grad_norm": 0.2256973534822464, "learning_rate": 7.262681435985401e-05, "loss": 3.117356872558594, "step": 135450 }, { "epoch": 0.2804, "grad_norm": 0.21810144186019897, "learning_rate": 7.26169839762078e-05, "loss": 3.083418846130371, "step": 135460 }, { "epoch": 0.28046666666666664, "grad_norm": 0.23035523295402527, "learning_rate": 7.260715249329075e-05, "loss": 3.082942008972168, "step": 135470 }, { "epoch": 0.28053333333333336, "grad_norm": 0.24343855679035187, "learning_rate": 7.25973199115807e-05, "loss": 3.1280950546264648, "step": 135480 }, { "epoch": 0.2806, "grad_norm": 0.2454870343208313, "learning_rate": 7.258748623155558e-05, "loss": 3.0903175354003904, "step": 135490 }, { "epoch": 0.2806666666666667, "grad_norm": 0.2033456712961197, "learning_rate": 7.257765145369332e-05, "loss": 3.018671226501465, "step": 135500 }, { "epoch": 0.28073333333333333, "grad_norm": 0.3966335952281952, "learning_rate": 7.256781557847196e-05, "loss": 3.142201805114746, "step": 135510 }, { "epoch": 0.2808, "grad_norm": 0.23108384013175964, "learning_rate": 7.255797860636951e-05, "loss": 3.1468574523925783, "step": 135520 }, { "epoch": 0.28086666666666665, "grad_norm": 0.20694789290428162, "learning_rate": 7.254814053786413e-05, "loss": 3.133475685119629, "step": 135530 }, { "epoch": 0.2809333333333333, "grad_norm": 0.21877174079418182, "learning_rate": 7.253830137343396e-05, "loss": 3.0722782135009767, "step": 135540 }, { "epoch": 0.281, "grad_norm": 0.20354413986206055, "learning_rate": 7.252846111355722e-05, "loss": 3.0857988357543946, "step": 135550 }, { "epoch": 0.2810666666666667, "grad_norm": 0.21611465513706207, "learning_rate": 7.251861975871219e-05, "loss": 3.109951400756836, "step": 135560 }, { "epoch": 0.28113333333333335, "grad_norm": 0.19647854566574097, "learning_rate": 7.25087773093772e-05, "loss": 3.076380157470703, "step": 135570 }, { "epoch": 0.2812, "grad_norm": 0.2356613576412201, "learning_rate": 7.249893376603063e-05, "loss": 3.106260871887207, "step": 135580 }, { "epoch": 0.28126666666666666, "grad_norm": 0.22371603548526764, "learning_rate": 7.248908912915092e-05, "loss": 3.0517635345458984, "step": 135590 }, { "epoch": 0.2813333333333333, "grad_norm": 0.19725416600704193, "learning_rate": 7.247924339921655e-05, "loss": 3.0804182052612306, "step": 135600 }, { "epoch": 0.2814, "grad_norm": 0.21652981638908386, "learning_rate": 7.246939657670604e-05, "loss": 3.0814865112304686, "step": 135610 }, { "epoch": 0.28146666666666664, "grad_norm": 0.21851813793182373, "learning_rate": 7.245954866209803e-05, "loss": 3.1377553939819336, "step": 135620 }, { "epoch": 0.28153333333333336, "grad_norm": 0.19520120322704315, "learning_rate": 7.244969965587112e-05, "loss": 3.0723945617675783, "step": 135630 }, { "epoch": 0.2816, "grad_norm": 0.23173393309116364, "learning_rate": 7.243984955850402e-05, "loss": 3.089372444152832, "step": 135640 }, { "epoch": 0.2816666666666667, "grad_norm": 0.22523672878742218, "learning_rate": 7.242999837047549e-05, "loss": 3.139412689208984, "step": 135650 }, { "epoch": 0.28173333333333334, "grad_norm": 0.19419468939304352, "learning_rate": 7.242014609226435e-05, "loss": 3.0754770278930663, "step": 135660 }, { "epoch": 0.2818, "grad_norm": 0.19469429552555084, "learning_rate": 7.241029272434942e-05, "loss": 3.097929573059082, "step": 135670 }, { "epoch": 0.28186666666666665, "grad_norm": 0.20259028673171997, "learning_rate": 7.240043826720963e-05, "loss": 3.1047689437866213, "step": 135680 }, { "epoch": 0.2819333333333333, "grad_norm": 0.18869540095329285, "learning_rate": 7.239058272132397e-05, "loss": 3.062444305419922, "step": 135690 }, { "epoch": 0.282, "grad_norm": 0.19659289717674255, "learning_rate": 7.238072608717141e-05, "loss": 3.0304920196533205, "step": 135700 }, { "epoch": 0.2820666666666667, "grad_norm": 0.2381151169538498, "learning_rate": 7.237086836523105e-05, "loss": 3.0444339752197265, "step": 135710 }, { "epoch": 0.28213333333333335, "grad_norm": 0.20561634004116058, "learning_rate": 7.2361009555982e-05, "loss": 3.0765640258789064, "step": 135720 }, { "epoch": 0.2822, "grad_norm": 0.20907653868198395, "learning_rate": 7.235114965990345e-05, "loss": 3.1131284713745115, "step": 135730 }, { "epoch": 0.28226666666666667, "grad_norm": 0.19360916316509247, "learning_rate": 7.23412886774746e-05, "loss": 3.161656379699707, "step": 135740 }, { "epoch": 0.2823333333333333, "grad_norm": 0.21554872393608093, "learning_rate": 7.233142660917477e-05, "loss": 3.053024673461914, "step": 135750 }, { "epoch": 0.2824, "grad_norm": 0.21936388313770294, "learning_rate": 7.232156345548327e-05, "loss": 3.064804458618164, "step": 135760 }, { "epoch": 0.28246666666666664, "grad_norm": 0.20975705981254578, "learning_rate": 7.231169921687947e-05, "loss": 3.043684387207031, "step": 135770 }, { "epoch": 0.28253333333333336, "grad_norm": 0.1909835934638977, "learning_rate": 7.230183389384285e-05, "loss": 3.076956558227539, "step": 135780 }, { "epoch": 0.2826, "grad_norm": 0.20936565101146698, "learning_rate": 7.229196748685287e-05, "loss": 3.1177928924560545, "step": 135790 }, { "epoch": 0.2826666666666667, "grad_norm": 0.18323653936386108, "learning_rate": 7.228209999638908e-05, "loss": 3.106785774230957, "step": 135800 }, { "epoch": 0.28273333333333334, "grad_norm": 0.1953027993440628, "learning_rate": 7.227223142293109e-05, "loss": 3.093608283996582, "step": 135810 }, { "epoch": 0.2828, "grad_norm": 0.26185402274131775, "learning_rate": 7.226236176695855e-05, "loss": 3.1174678802490234, "step": 135820 }, { "epoch": 0.28286666666666666, "grad_norm": 0.2126893252134323, "learning_rate": 7.225249102895113e-05, "loss": 3.1201751708984373, "step": 135830 }, { "epoch": 0.2829333333333333, "grad_norm": 0.22818569839000702, "learning_rate": 7.224261920938863e-05, "loss": 3.088941192626953, "step": 135840 }, { "epoch": 0.283, "grad_norm": 0.23217108845710754, "learning_rate": 7.223274630875084e-05, "loss": 3.0391862869262694, "step": 135850 }, { "epoch": 0.2830666666666667, "grad_norm": 0.20320576429367065, "learning_rate": 7.22228723275176e-05, "loss": 3.0559268951416017, "step": 135860 }, { "epoch": 0.28313333333333335, "grad_norm": 0.20092031359672546, "learning_rate": 7.221299726616885e-05, "loss": 3.0751482009887696, "step": 135870 }, { "epoch": 0.2832, "grad_norm": 0.22583970427513123, "learning_rate": 7.220312112518455e-05, "loss": 3.097934341430664, "step": 135880 }, { "epoch": 0.28326666666666667, "grad_norm": 0.2565787434577942, "learning_rate": 7.21932439050447e-05, "loss": 2.788398551940918, "step": 135890 }, { "epoch": 0.2833333333333333, "grad_norm": 0.20063650608062744, "learning_rate": 7.21833656062294e-05, "loss": 3.1457149505615236, "step": 135900 }, { "epoch": 0.2834, "grad_norm": 0.19000130891799927, "learning_rate": 7.217348622921874e-05, "loss": 3.0394187927246095, "step": 135910 }, { "epoch": 0.28346666666666664, "grad_norm": 0.20711173117160797, "learning_rate": 7.216360577449293e-05, "loss": 3.050811004638672, "step": 135920 }, { "epoch": 0.28353333333333336, "grad_norm": 0.20018059015274048, "learning_rate": 7.215372424253217e-05, "loss": 3.0642040252685545, "step": 135930 }, { "epoch": 0.2836, "grad_norm": 0.21177633106708527, "learning_rate": 7.214384163381676e-05, "loss": 3.1510316848754885, "step": 135940 }, { "epoch": 0.2836666666666667, "grad_norm": 0.25847524404525757, "learning_rate": 7.2133957948827e-05, "loss": 3.1531244277954102, "step": 135950 }, { "epoch": 0.28373333333333334, "grad_norm": 0.21611525118350983, "learning_rate": 7.21240731880433e-05, "loss": 3.1213178634643555, "step": 135960 }, { "epoch": 0.2838, "grad_norm": 0.39061591029167175, "learning_rate": 7.21141873519461e-05, "loss": 3.239899444580078, "step": 135970 }, { "epoch": 0.28386666666666666, "grad_norm": 0.22428454458713531, "learning_rate": 7.210430044101588e-05, "loss": 3.102464294433594, "step": 135980 }, { "epoch": 0.2839333333333333, "grad_norm": 0.21592313051223755, "learning_rate": 7.209441245573319e-05, "loss": 3.0595006942749023, "step": 135990 }, { "epoch": 0.284, "grad_norm": 0.19925080239772797, "learning_rate": 7.208452339657861e-05, "loss": 3.12554931640625, "step": 136000 }, { "epoch": 0.2840666666666667, "grad_norm": 0.21965795755386353, "learning_rate": 7.20746332640328e-05, "loss": 3.055919075012207, "step": 136010 }, { "epoch": 0.28413333333333335, "grad_norm": 0.20372635126113892, "learning_rate": 7.206474205857643e-05, "loss": 3.1201408386230467, "step": 136020 }, { "epoch": 0.2842, "grad_norm": 0.21526721119880676, "learning_rate": 7.205484978069029e-05, "loss": 3.086966133117676, "step": 136030 }, { "epoch": 0.28426666666666667, "grad_norm": 0.20530687272548676, "learning_rate": 7.204495643085513e-05, "loss": 3.0939573287963866, "step": 136040 }, { "epoch": 0.2843333333333333, "grad_norm": 0.23252488672733307, "learning_rate": 7.203506200955186e-05, "loss": 3.10339298248291, "step": 136050 }, { "epoch": 0.2844, "grad_norm": 0.1870289146900177, "learning_rate": 7.202516651726134e-05, "loss": 3.0764421463012694, "step": 136060 }, { "epoch": 0.28446666666666665, "grad_norm": 0.19931121170520782, "learning_rate": 7.201526995446457e-05, "loss": 3.057769012451172, "step": 136070 }, { "epoch": 0.28453333333333336, "grad_norm": 0.20449919998645782, "learning_rate": 7.200537232164252e-05, "loss": 3.062640380859375, "step": 136080 }, { "epoch": 0.2846, "grad_norm": 0.20297344028949738, "learning_rate": 7.19954736192763e-05, "loss": 3.0893875122070313, "step": 136090 }, { "epoch": 0.2846666666666667, "grad_norm": 0.2513751685619354, "learning_rate": 7.198557384784699e-05, "loss": 3.1101404190063477, "step": 136100 }, { "epoch": 0.28473333333333334, "grad_norm": 0.2522098422050476, "learning_rate": 7.197567300783575e-05, "loss": 3.288222885131836, "step": 136110 }, { "epoch": 0.2848, "grad_norm": 0.2214130312204361, "learning_rate": 7.196577109972382e-05, "loss": 3.146269607543945, "step": 136120 }, { "epoch": 0.28486666666666666, "grad_norm": 0.2347007542848587, "learning_rate": 7.195586812399245e-05, "loss": 3.278798294067383, "step": 136130 }, { "epoch": 0.2849333333333333, "grad_norm": 0.20018842816352844, "learning_rate": 7.194596408112299e-05, "loss": 3.119205284118652, "step": 136140 }, { "epoch": 0.285, "grad_norm": 0.2539750933647156, "learning_rate": 7.193605897159678e-05, "loss": 3.1335948944091796, "step": 136150 }, { "epoch": 0.2850666666666667, "grad_norm": 0.26662084460258484, "learning_rate": 7.192615279589527e-05, "loss": 3.0790733337402343, "step": 136160 }, { "epoch": 0.28513333333333335, "grad_norm": 0.31818559765815735, "learning_rate": 7.191624555449995e-05, "loss": 3.2900146484375, "step": 136170 }, { "epoch": 0.2852, "grad_norm": 0.1890367716550827, "learning_rate": 7.190633724789229e-05, "loss": 3.032508659362793, "step": 136180 }, { "epoch": 0.28526666666666667, "grad_norm": 0.2000933587551117, "learning_rate": 7.189642787655393e-05, "loss": 3.138959503173828, "step": 136190 }, { "epoch": 0.2853333333333333, "grad_norm": 0.20357511937618256, "learning_rate": 7.18865174409665e-05, "loss": 3.099522018432617, "step": 136200 }, { "epoch": 0.2854, "grad_norm": 0.3971244990825653, "learning_rate": 7.187660594161165e-05, "loss": 3.123660659790039, "step": 136210 }, { "epoch": 0.28546666666666665, "grad_norm": 0.23080074787139893, "learning_rate": 7.186669337897113e-05, "loss": 3.1101476669311525, "step": 136220 }, { "epoch": 0.2855333333333333, "grad_norm": 0.2071463167667389, "learning_rate": 7.185677975352675e-05, "loss": 3.070368003845215, "step": 136230 }, { "epoch": 0.2856, "grad_norm": 0.21620628237724304, "learning_rate": 7.184686506576032e-05, "loss": 3.2308120727539062, "step": 136240 }, { "epoch": 0.2856666666666667, "grad_norm": 0.2341943234205246, "learning_rate": 7.183694931615374e-05, "loss": 3.0994022369384764, "step": 136250 }, { "epoch": 0.28573333333333334, "grad_norm": 0.2403384894132614, "learning_rate": 7.182703250518899e-05, "loss": 3.1255632400512696, "step": 136260 }, { "epoch": 0.2858, "grad_norm": 0.19767217338085175, "learning_rate": 7.181711463334799e-05, "loss": 3.0888626098632814, "step": 136270 }, { "epoch": 0.28586666666666666, "grad_norm": 0.21136432886123657, "learning_rate": 7.180719570111285e-05, "loss": 3.1114776611328123, "step": 136280 }, { "epoch": 0.2859333333333333, "grad_norm": 0.2109096795320511, "learning_rate": 7.179727570896564e-05, "loss": 3.114798355102539, "step": 136290 }, { "epoch": 0.286, "grad_norm": 0.22856128215789795, "learning_rate": 7.178735465738851e-05, "loss": 3.118935203552246, "step": 136300 }, { "epoch": 0.2860666666666667, "grad_norm": 0.20958562195301056, "learning_rate": 7.177743254686366e-05, "loss": 3.031927299499512, "step": 136310 }, { "epoch": 0.28613333333333335, "grad_norm": 0.18594703078269958, "learning_rate": 7.176750937787336e-05, "loss": 3.0633020401000977, "step": 136320 }, { "epoch": 0.2862, "grad_norm": 0.27606725692749023, "learning_rate": 7.175758515089987e-05, "loss": 2.933796691894531, "step": 136330 }, { "epoch": 0.28626666666666667, "grad_norm": 0.27443215250968933, "learning_rate": 7.174765986642561e-05, "loss": 3.0108369827270507, "step": 136340 }, { "epoch": 0.28633333333333333, "grad_norm": 0.19178229570388794, "learning_rate": 7.173773352493294e-05, "loss": 3.1025800704956055, "step": 136350 }, { "epoch": 0.2864, "grad_norm": 0.2080274522304535, "learning_rate": 7.172780612690432e-05, "loss": 3.045489501953125, "step": 136360 }, { "epoch": 0.28646666666666665, "grad_norm": 0.20466715097427368, "learning_rate": 7.171787767282228e-05, "loss": 3.1842041015625, "step": 136370 }, { "epoch": 0.2865333333333333, "grad_norm": 0.2586720585823059, "learning_rate": 7.170794816316935e-05, "loss": 3.1025009155273438, "step": 136380 }, { "epoch": 0.2866, "grad_norm": 0.5389622449874878, "learning_rate": 7.169801759842817e-05, "loss": 2.488631820678711, "step": 136390 }, { "epoch": 0.2866666666666667, "grad_norm": 0.1900252103805542, "learning_rate": 7.16880859790814e-05, "loss": 3.1381397247314453, "step": 136400 }, { "epoch": 0.28673333333333334, "grad_norm": 0.2485426962375641, "learning_rate": 7.167815330561174e-05, "loss": 3.1579294204711914, "step": 136410 }, { "epoch": 0.2868, "grad_norm": 0.2103429138660431, "learning_rate": 7.166821957850197e-05, "loss": 3.084157180786133, "step": 136420 }, { "epoch": 0.28686666666666666, "grad_norm": 0.21548989415168762, "learning_rate": 7.165828479823489e-05, "loss": 3.140024757385254, "step": 136430 }, { "epoch": 0.2869333333333333, "grad_norm": 0.2476048469543457, "learning_rate": 7.164834896529338e-05, "loss": 3.071320343017578, "step": 136440 }, { "epoch": 0.287, "grad_norm": 0.20481398701667786, "learning_rate": 7.163841208016034e-05, "loss": 3.134907531738281, "step": 136450 }, { "epoch": 0.2870666666666667, "grad_norm": 0.2585482895374298, "learning_rate": 7.162847414331876e-05, "loss": 2.867279815673828, "step": 136460 }, { "epoch": 0.28713333333333335, "grad_norm": 0.1901901513338089, "learning_rate": 7.161853515525167e-05, "loss": 3.1216970443725587, "step": 136470 }, { "epoch": 0.2872, "grad_norm": 0.2008020132780075, "learning_rate": 7.160859511644214e-05, "loss": 3.254364013671875, "step": 136480 }, { "epoch": 0.28726666666666667, "grad_norm": 0.1920188069343567, "learning_rate": 7.159865402737326e-05, "loss": 3.1195100784301757, "step": 136490 }, { "epoch": 0.28733333333333333, "grad_norm": 0.2507277727127075, "learning_rate": 7.158871188852825e-05, "loss": 3.0720703125, "step": 136500 }, { "epoch": 0.2874, "grad_norm": 0.21268922090530396, "learning_rate": 7.15787687003903e-05, "loss": 3.1071266174316405, "step": 136510 }, { "epoch": 0.28746666666666665, "grad_norm": 0.19107100367546082, "learning_rate": 7.15688244634427e-05, "loss": 3.0500213623046877, "step": 136520 }, { "epoch": 0.2875333333333333, "grad_norm": 0.20185601711273193, "learning_rate": 7.155887917816877e-05, "loss": 3.110744667053223, "step": 136530 }, { "epoch": 0.2876, "grad_norm": 0.2274792194366455, "learning_rate": 7.15489328450519e-05, "loss": 3.0606706619262694, "step": 136540 }, { "epoch": 0.2876666666666667, "grad_norm": 0.20935213565826416, "learning_rate": 7.153898546457551e-05, "loss": 3.0732030868530273, "step": 136550 }, { "epoch": 0.28773333333333334, "grad_norm": 0.2191091924905777, "learning_rate": 7.152903703722309e-05, "loss": 3.0868898391723634, "step": 136560 }, { "epoch": 0.2878, "grad_norm": 0.19853366911411285, "learning_rate": 7.151908756347817e-05, "loss": 3.0869729995727537, "step": 136570 }, { "epoch": 0.28786666666666666, "grad_norm": 0.194431334733963, "learning_rate": 7.150913704382432e-05, "loss": 3.1221837997436523, "step": 136580 }, { "epoch": 0.2879333333333333, "grad_norm": 0.25424760580062866, "learning_rate": 7.149918547874518e-05, "loss": 3.0725732803344727, "step": 136590 }, { "epoch": 0.288, "grad_norm": 0.19171319901943207, "learning_rate": 7.148923286872445e-05, "loss": 3.0639884948730467, "step": 136600 }, { "epoch": 0.2880666666666667, "grad_norm": 0.21412049233913422, "learning_rate": 7.147927921424582e-05, "loss": 3.134748649597168, "step": 136610 }, { "epoch": 0.28813333333333335, "grad_norm": 0.19339898228645325, "learning_rate": 7.146932451579313e-05, "loss": 3.114295768737793, "step": 136620 }, { "epoch": 0.2882, "grad_norm": 0.20135948061943054, "learning_rate": 7.145936877385018e-05, "loss": 3.1024702072143553, "step": 136630 }, { "epoch": 0.28826666666666667, "grad_norm": 0.18952438235282898, "learning_rate": 7.144941198890087e-05, "loss": 3.1383129119873048, "step": 136640 }, { "epoch": 0.28833333333333333, "grad_norm": 0.21303464472293854, "learning_rate": 7.143945416142915e-05, "loss": 3.1718299865722654, "step": 136650 }, { "epoch": 0.2884, "grad_norm": 0.20283012092113495, "learning_rate": 7.142949529191898e-05, "loss": 3.1434324264526365, "step": 136660 }, { "epoch": 0.28846666666666665, "grad_norm": 0.21064493060112, "learning_rate": 7.14195353808544e-05, "loss": 3.0694562911987306, "step": 136670 }, { "epoch": 0.2885333333333333, "grad_norm": 0.2100248783826828, "learning_rate": 7.140957442871952e-05, "loss": 3.18924617767334, "step": 136680 }, { "epoch": 0.2886, "grad_norm": 0.19801892340183258, "learning_rate": 7.139961243599847e-05, "loss": 3.0729475021362305, "step": 136690 }, { "epoch": 0.2886666666666667, "grad_norm": 0.22349946200847626, "learning_rate": 7.138964940317543e-05, "loss": 3.0589632034301757, "step": 136700 }, { "epoch": 0.28873333333333334, "grad_norm": 0.21331153810024261, "learning_rate": 7.137968533073466e-05, "loss": 3.060628318786621, "step": 136710 }, { "epoch": 0.2888, "grad_norm": 0.21133846044540405, "learning_rate": 7.136972021916043e-05, "loss": 3.1037691116333006, "step": 136720 }, { "epoch": 0.28886666666666666, "grad_norm": 0.261549711227417, "learning_rate": 7.135975406893711e-05, "loss": 3.062620735168457, "step": 136730 }, { "epoch": 0.2889333333333333, "grad_norm": 0.24864065647125244, "learning_rate": 7.134978688054909e-05, "loss": 3.0650955200195313, "step": 136740 }, { "epoch": 0.289, "grad_norm": 0.20554371178150177, "learning_rate": 7.133981865448078e-05, "loss": 3.0212610244750975, "step": 136750 }, { "epoch": 0.2890666666666667, "grad_norm": 0.199406236410141, "learning_rate": 7.132984939121671e-05, "loss": 3.04685001373291, "step": 136760 }, { "epoch": 0.28913333333333335, "grad_norm": 0.19843536615371704, "learning_rate": 7.13198790912414e-05, "loss": 3.0953180313110353, "step": 136770 }, { "epoch": 0.2892, "grad_norm": 0.2016967236995697, "learning_rate": 7.130990775503944e-05, "loss": 3.106058883666992, "step": 136780 }, { "epoch": 0.28926666666666667, "grad_norm": 0.19465401768684387, "learning_rate": 7.129993538309549e-05, "loss": 3.1575033187866213, "step": 136790 }, { "epoch": 0.28933333333333333, "grad_norm": 0.19944801926612854, "learning_rate": 7.128996197589425e-05, "loss": 3.072457122802734, "step": 136800 }, { "epoch": 0.2894, "grad_norm": 0.8915180563926697, "learning_rate": 7.127998753392046e-05, "loss": 3.100258445739746, "step": 136810 }, { "epoch": 0.28946666666666665, "grad_norm": 0.24848267436027527, "learning_rate": 7.12700120576589e-05, "loss": 3.0477781295776367, "step": 136820 }, { "epoch": 0.2895333333333333, "grad_norm": 0.20503342151641846, "learning_rate": 7.126003554759444e-05, "loss": 3.091659736633301, "step": 136830 }, { "epoch": 0.2896, "grad_norm": 0.21840302646160126, "learning_rate": 7.125005800421196e-05, "loss": 3.106566047668457, "step": 136840 }, { "epoch": 0.2896666666666667, "grad_norm": 0.19728368520736694, "learning_rate": 7.124007942799641e-05, "loss": 3.2096458435058595, "step": 136850 }, { "epoch": 0.28973333333333334, "grad_norm": 0.2313506007194519, "learning_rate": 7.123009981943278e-05, "loss": 3.010733222961426, "step": 136860 }, { "epoch": 0.2898, "grad_norm": 0.22091570496559143, "learning_rate": 7.122011917900614e-05, "loss": 3.136876678466797, "step": 136870 }, { "epoch": 0.28986666666666666, "grad_norm": 0.18412034213542938, "learning_rate": 7.121013750720155e-05, "loss": 3.0908748626708986, "step": 136880 }, { "epoch": 0.2899333333333333, "grad_norm": 0.1962573081254959, "learning_rate": 7.12001548045042e-05, "loss": 3.083729934692383, "step": 136890 }, { "epoch": 0.29, "grad_norm": 0.8177154660224915, "learning_rate": 7.119017107139925e-05, "loss": 2.8990684509277345, "step": 136900 }, { "epoch": 0.29006666666666664, "grad_norm": 0.2153201848268509, "learning_rate": 7.118018630837196e-05, "loss": 2.9529756546020507, "step": 136910 }, { "epoch": 0.29013333333333335, "grad_norm": 0.18571707606315613, "learning_rate": 7.117020051590764e-05, "loss": 3.0710451126098635, "step": 136920 }, { "epoch": 0.2902, "grad_norm": 0.1930873990058899, "learning_rate": 7.116021369449162e-05, "loss": 3.119340515136719, "step": 136930 }, { "epoch": 0.2902666666666667, "grad_norm": 0.2851179838180542, "learning_rate": 7.11502258446093e-05, "loss": 3.1800065994262696, "step": 136940 }, { "epoch": 0.29033333333333333, "grad_norm": 0.19861751794815063, "learning_rate": 7.114023696674614e-05, "loss": 3.0199201583862303, "step": 136950 }, { "epoch": 0.2904, "grad_norm": 0.21045956015586853, "learning_rate": 7.113024706138762e-05, "loss": 3.114371681213379, "step": 136960 }, { "epoch": 0.29046666666666665, "grad_norm": 0.2543427050113678, "learning_rate": 7.112025612901929e-05, "loss": 3.09041805267334, "step": 136970 }, { "epoch": 0.2905333333333333, "grad_norm": 0.1945609748363495, "learning_rate": 7.111026417012676e-05, "loss": 3.1167236328125, "step": 136980 }, { "epoch": 0.2906, "grad_norm": 0.21260832250118256, "learning_rate": 7.110027118519567e-05, "loss": 3.1129974365234374, "step": 136990 }, { "epoch": 0.2906666666666667, "grad_norm": 0.20032647252082825, "learning_rate": 7.109027717471172e-05, "loss": 3.073011016845703, "step": 137000 }, { "epoch": 0.29073333333333334, "grad_norm": 0.1869666576385498, "learning_rate": 7.108028213916064e-05, "loss": 3.0656381607055665, "step": 137010 }, { "epoch": 0.2908, "grad_norm": 0.2084699273109436, "learning_rate": 7.107028607902825e-05, "loss": 3.0537628173828124, "step": 137020 }, { "epoch": 0.29086666666666666, "grad_norm": 0.21786218881607056, "learning_rate": 7.106028899480037e-05, "loss": 3.1208173751831056, "step": 137030 }, { "epoch": 0.2909333333333333, "grad_norm": 0.35820820927619934, "learning_rate": 7.105029088696293e-05, "loss": 3.0994842529296873, "step": 137040 }, { "epoch": 0.291, "grad_norm": 0.20154331624507904, "learning_rate": 7.104029175600184e-05, "loss": 3.1352108001708983, "step": 137050 }, { "epoch": 0.29106666666666664, "grad_norm": 0.19447945058345795, "learning_rate": 7.103029160240312e-05, "loss": 3.0907367706298827, "step": 137060 }, { "epoch": 0.29113333333333336, "grad_norm": 0.2048235833644867, "learning_rate": 7.102029042665283e-05, "loss": 3.1129018783569338, "step": 137070 }, { "epoch": 0.2912, "grad_norm": 0.22078727185726166, "learning_rate": 7.101028822923701e-05, "loss": 3.0684484481811523, "step": 137080 }, { "epoch": 0.2912666666666667, "grad_norm": 0.20776714384555817, "learning_rate": 7.100028501064184e-05, "loss": 3.093784713745117, "step": 137090 }, { "epoch": 0.29133333333333333, "grad_norm": 0.18362274765968323, "learning_rate": 7.099028077135352e-05, "loss": 3.0565715789794923, "step": 137100 }, { "epoch": 0.2914, "grad_norm": 0.20187899470329285, "learning_rate": 7.098027551185828e-05, "loss": 3.095266914367676, "step": 137110 }, { "epoch": 0.29146666666666665, "grad_norm": 0.22705647349357605, "learning_rate": 7.097026923264243e-05, "loss": 3.1060720443725587, "step": 137120 }, { "epoch": 0.2915333333333333, "grad_norm": 0.23059114813804626, "learning_rate": 7.09602619341923e-05, "loss": 3.262329864501953, "step": 137130 }, { "epoch": 0.2916, "grad_norm": 0.19224102795124054, "learning_rate": 7.095025361699427e-05, "loss": 3.1151718139648437, "step": 137140 }, { "epoch": 0.2916666666666667, "grad_norm": 0.20059853792190552, "learning_rate": 7.094024428153481e-05, "loss": 3.070237159729004, "step": 137150 }, { "epoch": 0.29173333333333334, "grad_norm": 0.19345928728580475, "learning_rate": 7.09302339283004e-05, "loss": 3.0561925888061525, "step": 137160 }, { "epoch": 0.2918, "grad_norm": 0.19455765187740326, "learning_rate": 7.092022255777756e-05, "loss": 3.3019275665283203, "step": 137170 }, { "epoch": 0.29186666666666666, "grad_norm": 0.22596625983715057, "learning_rate": 7.091021017045292e-05, "loss": 3.089734649658203, "step": 137180 }, { "epoch": 0.2919333333333333, "grad_norm": 0.19806218147277832, "learning_rate": 7.090019676681308e-05, "loss": 3.183668327331543, "step": 137190 }, { "epoch": 0.292, "grad_norm": 0.19947172701358795, "learning_rate": 7.089018234734476e-05, "loss": 3.034767913818359, "step": 137200 }, { "epoch": 0.29206666666666664, "grad_norm": 0.19026567041873932, "learning_rate": 7.088016691253467e-05, "loss": 3.0895498275756834, "step": 137210 }, { "epoch": 0.29213333333333336, "grad_norm": 0.2060554474592209, "learning_rate": 7.087015046286964e-05, "loss": 3.0533140182495115, "step": 137220 }, { "epoch": 0.2922, "grad_norm": 0.2022400200366974, "learning_rate": 7.086013299883647e-05, "loss": 3.100572204589844, "step": 137230 }, { "epoch": 0.2922666666666667, "grad_norm": 0.3483610153198242, "learning_rate": 7.085011452092206e-05, "loss": 3.0758947372436523, "step": 137240 }, { "epoch": 0.29233333333333333, "grad_norm": 0.19862988591194153, "learning_rate": 7.084009502961333e-05, "loss": 3.092565155029297, "step": 137250 }, { "epoch": 0.2924, "grad_norm": 0.21472668647766113, "learning_rate": 7.08300745253973e-05, "loss": 3.098770332336426, "step": 137260 }, { "epoch": 0.29246666666666665, "grad_norm": 0.2085365206003189, "learning_rate": 7.082005300876097e-05, "loss": 3.0949195861816405, "step": 137270 }, { "epoch": 0.2925333333333333, "grad_norm": 0.1859356015920639, "learning_rate": 7.081003048019144e-05, "loss": 3.080963706970215, "step": 137280 }, { "epoch": 0.2926, "grad_norm": 0.4375040531158447, "learning_rate": 7.080000694017585e-05, "loss": 3.1405982971191406, "step": 137290 }, { "epoch": 0.2926666666666667, "grad_norm": 0.2092631757259369, "learning_rate": 7.078998238920136e-05, "loss": 3.0886863708496093, "step": 137300 }, { "epoch": 0.29273333333333335, "grad_norm": 0.20896661281585693, "learning_rate": 7.077995682775522e-05, "loss": 3.0997766494750976, "step": 137310 }, { "epoch": 0.2928, "grad_norm": 0.20747292041778564, "learning_rate": 7.07699302563247e-05, "loss": 3.142325592041016, "step": 137320 }, { "epoch": 0.29286666666666666, "grad_norm": 0.20106492936611176, "learning_rate": 7.075990267539712e-05, "loss": 3.073158073425293, "step": 137330 }, { "epoch": 0.2929333333333333, "grad_norm": 0.20087140798568726, "learning_rate": 7.074987408545989e-05, "loss": 3.0752416610717774, "step": 137340 }, { "epoch": 0.293, "grad_norm": 0.18908005952835083, "learning_rate": 7.073984448700042e-05, "loss": 3.0922687530517576, "step": 137350 }, { "epoch": 0.29306666666666664, "grad_norm": 0.1974479854106903, "learning_rate": 7.072981388050617e-05, "loss": 3.0900291442871093, "step": 137360 }, { "epoch": 0.29313333333333336, "grad_norm": 0.21394824981689453, "learning_rate": 7.071978226646468e-05, "loss": 3.0959882736206055, "step": 137370 }, { "epoch": 0.2932, "grad_norm": 0.24948999285697937, "learning_rate": 7.070974964536353e-05, "loss": 3.049463081359863, "step": 137380 }, { "epoch": 0.2932666666666667, "grad_norm": 0.20914334058761597, "learning_rate": 7.069971601769034e-05, "loss": 3.1065292358398438, "step": 137390 }, { "epoch": 0.29333333333333333, "grad_norm": 0.19937172532081604, "learning_rate": 7.068968138393278e-05, "loss": 3.0584169387817384, "step": 137400 }, { "epoch": 0.2934, "grad_norm": 0.208944171667099, "learning_rate": 7.067964574457857e-05, "loss": 3.0846351623535155, "step": 137410 }, { "epoch": 0.29346666666666665, "grad_norm": 0.2073621302843094, "learning_rate": 7.066960910011548e-05, "loss": 3.11908016204834, "step": 137420 }, { "epoch": 0.2935333333333333, "grad_norm": 0.2650955617427826, "learning_rate": 7.065957145103133e-05, "loss": 3.0891456604003906, "step": 137430 }, { "epoch": 0.2936, "grad_norm": 0.23657487332820892, "learning_rate": 7.064953279781397e-05, "loss": 3.105772590637207, "step": 137440 }, { "epoch": 0.2936666666666667, "grad_norm": 0.21536923944950104, "learning_rate": 7.063949314095135e-05, "loss": 3.128805160522461, "step": 137450 }, { "epoch": 0.29373333333333335, "grad_norm": 0.20362411439418793, "learning_rate": 7.062945248093142e-05, "loss": 3.2511417388916017, "step": 137460 }, { "epoch": 0.2938, "grad_norm": 0.213265061378479, "learning_rate": 7.061941081824219e-05, "loss": 3.0784765243530274, "step": 137470 }, { "epoch": 0.29386666666666666, "grad_norm": 0.19258184731006622, "learning_rate": 7.060936815337172e-05, "loss": 3.049722671508789, "step": 137480 }, { "epoch": 0.2939333333333333, "grad_norm": 0.2120281457901001, "learning_rate": 7.059932448680814e-05, "loss": 3.051565933227539, "step": 137490 }, { "epoch": 0.294, "grad_norm": 0.2017063945531845, "learning_rate": 7.058927981903959e-05, "loss": 3.15212459564209, "step": 137500 }, { "epoch": 0.29406666666666664, "grad_norm": 0.2181047797203064, "learning_rate": 7.057923415055427e-05, "loss": 3.0655849456787108, "step": 137510 }, { "epoch": 0.29413333333333336, "grad_norm": 0.23232340812683105, "learning_rate": 7.056918748184047e-05, "loss": 3.1067022323608398, "step": 137520 }, { "epoch": 0.2942, "grad_norm": 0.19611306488513947, "learning_rate": 7.055913981338648e-05, "loss": 3.070867347717285, "step": 137530 }, { "epoch": 0.2942666666666667, "grad_norm": 0.2876184582710266, "learning_rate": 7.054909114568065e-05, "loss": 3.054581642150879, "step": 137540 }, { "epoch": 0.29433333333333334, "grad_norm": 0.22589001059532166, "learning_rate": 7.053904147921139e-05, "loss": 3.071952056884766, "step": 137550 }, { "epoch": 0.2944, "grad_norm": 0.2521408796310425, "learning_rate": 7.052899081446716e-05, "loss": 3.0619224548339843, "step": 137560 }, { "epoch": 0.29446666666666665, "grad_norm": 0.20664498209953308, "learning_rate": 7.051893915193643e-05, "loss": 3.277660369873047, "step": 137570 }, { "epoch": 0.2945333333333333, "grad_norm": 0.19727374613285065, "learning_rate": 7.050888649210777e-05, "loss": 3.0616390228271486, "step": 137580 }, { "epoch": 0.2946, "grad_norm": 0.2336261123418808, "learning_rate": 7.049883283546978e-05, "loss": 3.069724464416504, "step": 137590 }, { "epoch": 0.2946666666666667, "grad_norm": 0.19704565405845642, "learning_rate": 7.04887781825111e-05, "loss": 3.045753288269043, "step": 137600 }, { "epoch": 0.29473333333333335, "grad_norm": 0.20195762813091278, "learning_rate": 7.047872253372042e-05, "loss": 3.1285989761352537, "step": 137610 }, { "epoch": 0.2948, "grad_norm": 0.21567216515541077, "learning_rate": 7.046866588958649e-05, "loss": 3.1080841064453124, "step": 137620 }, { "epoch": 0.29486666666666667, "grad_norm": 0.2315462827682495, "learning_rate": 7.04586082505981e-05, "loss": 3.2125797271728516, "step": 137630 }, { "epoch": 0.2949333333333333, "grad_norm": 0.1978050172328949, "learning_rate": 7.044854961724409e-05, "loss": 3.1586635589599608, "step": 137640 }, { "epoch": 0.295, "grad_norm": 0.2721981704235077, "learning_rate": 7.043848999001335e-05, "loss": 3.125861930847168, "step": 137650 }, { "epoch": 0.29506666666666664, "grad_norm": 0.1993376761674881, "learning_rate": 7.04284293693948e-05, "loss": 3.116773796081543, "step": 137660 }, { "epoch": 0.29513333333333336, "grad_norm": 0.18807050585746765, "learning_rate": 7.041836775587743e-05, "loss": 3.079759216308594, "step": 137670 }, { "epoch": 0.2952, "grad_norm": 0.20705968141555786, "learning_rate": 7.040830514995029e-05, "loss": 3.084111785888672, "step": 137680 }, { "epoch": 0.2952666666666667, "grad_norm": 0.218755841255188, "learning_rate": 7.039824155210244e-05, "loss": 3.094685745239258, "step": 137690 }, { "epoch": 0.29533333333333334, "grad_norm": 0.1894661784172058, "learning_rate": 7.038817696282302e-05, "loss": 3.0748252868652344, "step": 137700 }, { "epoch": 0.2954, "grad_norm": 0.24771423637866974, "learning_rate": 7.037811138260122e-05, "loss": 3.0588333129882814, "step": 137710 }, { "epoch": 0.29546666666666666, "grad_norm": 0.2148604542016983, "learning_rate": 7.036804481192622e-05, "loss": 3.106441116333008, "step": 137720 }, { "epoch": 0.2955333333333333, "grad_norm": 0.20729181170463562, "learning_rate": 7.035797725128734e-05, "loss": 3.1197046279907226, "step": 137730 }, { "epoch": 0.2956, "grad_norm": 0.2130853831768036, "learning_rate": 7.034790870117389e-05, "loss": 3.0710823059082033, "step": 137740 }, { "epoch": 0.2956666666666667, "grad_norm": 0.2547794580459595, "learning_rate": 7.033783916207522e-05, "loss": 3.0886728286743166, "step": 137750 }, { "epoch": 0.29573333333333335, "grad_norm": 0.25626662373542786, "learning_rate": 7.032776863448078e-05, "loss": 3.0919910430908204, "step": 137760 }, { "epoch": 0.2958, "grad_norm": 0.20598922669887543, "learning_rate": 7.031769711888e-05, "loss": 3.002806854248047, "step": 137770 }, { "epoch": 0.29586666666666667, "grad_norm": 0.21441859006881714, "learning_rate": 7.030762461576241e-05, "loss": 3.1282543182373046, "step": 137780 }, { "epoch": 0.2959333333333333, "grad_norm": 0.22438788414001465, "learning_rate": 7.029755112561758e-05, "loss": 3.04992790222168, "step": 137790 }, { "epoch": 0.296, "grad_norm": 0.20007109642028809, "learning_rate": 7.028747664893511e-05, "loss": 3.101877784729004, "step": 137800 }, { "epoch": 0.29606666666666664, "grad_norm": 0.21545526385307312, "learning_rate": 7.027740118620466e-05, "loss": 3.0661611557006836, "step": 137810 }, { "epoch": 0.29613333333333336, "grad_norm": 0.19303251802921295, "learning_rate": 7.026732473791593e-05, "loss": 3.0736972808837892, "step": 137820 }, { "epoch": 0.2962, "grad_norm": 0.2035951167345047, "learning_rate": 7.025724730455868e-05, "loss": 3.0748023986816406, "step": 137830 }, { "epoch": 0.2962666666666667, "grad_norm": 0.20536097884178162, "learning_rate": 7.024716888662271e-05, "loss": 3.1255889892578126, "step": 137840 }, { "epoch": 0.29633333333333334, "grad_norm": 0.19274236261844635, "learning_rate": 7.023708948459787e-05, "loss": 3.0350624084472657, "step": 137850 }, { "epoch": 0.2964, "grad_norm": 0.25230297446250916, "learning_rate": 7.022700909897406e-05, "loss": 3.0531248092651366, "step": 137860 }, { "epoch": 0.29646666666666666, "grad_norm": 0.1984786093235016, "learning_rate": 7.021692773024123e-05, "loss": 3.086394119262695, "step": 137870 }, { "epoch": 0.2965333333333333, "grad_norm": 0.20042215287685394, "learning_rate": 7.020684537888935e-05, "loss": 3.079954719543457, "step": 137880 }, { "epoch": 0.2966, "grad_norm": 0.2009027749300003, "learning_rate": 7.019676204540848e-05, "loss": 3.0982437133789062, "step": 137890 }, { "epoch": 0.2966666666666667, "grad_norm": 0.21792061626911163, "learning_rate": 7.01866777302887e-05, "loss": 3.1463600158691407, "step": 137900 }, { "epoch": 0.29673333333333335, "grad_norm": 0.21175651252269745, "learning_rate": 7.017659243402014e-05, "loss": 3.0901111602783202, "step": 137910 }, { "epoch": 0.2968, "grad_norm": 0.1796213686466217, "learning_rate": 7.0166506157093e-05, "loss": 3.100010108947754, "step": 137920 }, { "epoch": 0.29686666666666667, "grad_norm": 0.20904043316841125, "learning_rate": 7.015641889999749e-05, "loss": 3.1102573394775392, "step": 137930 }, { "epoch": 0.2969333333333333, "grad_norm": 0.21445800364017487, "learning_rate": 7.014633066322391e-05, "loss": 3.087591552734375, "step": 137940 }, { "epoch": 0.297, "grad_norm": 0.2097627818584442, "learning_rate": 7.013624144726258e-05, "loss": 2.9044977188110352, "step": 137950 }, { "epoch": 0.29706666666666665, "grad_norm": 0.21276485919952393, "learning_rate": 7.012615125260388e-05, "loss": 3.075885009765625, "step": 137960 }, { "epoch": 0.29713333333333336, "grad_norm": 0.26894062757492065, "learning_rate": 7.011606007973821e-05, "loss": 3.0527921676635743, "step": 137970 }, { "epoch": 0.2972, "grad_norm": 0.2500566244125366, "learning_rate": 7.010596792915606e-05, "loss": 3.0748308181762694, "step": 137980 }, { "epoch": 0.2972666666666667, "grad_norm": 0.1904960572719574, "learning_rate": 7.009587480134792e-05, "loss": 3.069287872314453, "step": 137990 }, { "epoch": 0.29733333333333334, "grad_norm": 0.19659000635147095, "learning_rate": 7.00857806968044e-05, "loss": 3.0831745147705076, "step": 138000 }, { "epoch": 0.2974, "grad_norm": 0.19241903722286224, "learning_rate": 7.007568561601606e-05, "loss": 3.1071279525756834, "step": 138010 }, { "epoch": 0.29746666666666666, "grad_norm": 0.2111819088459015, "learning_rate": 7.00655895594736e-05, "loss": 3.0398160934448244, "step": 138020 }, { "epoch": 0.2975333333333333, "grad_norm": 0.19702719151973724, "learning_rate": 7.00554925276677e-05, "loss": 3.1013349533081054, "step": 138030 }, { "epoch": 0.2976, "grad_norm": 0.1889553815126419, "learning_rate": 7.004539452108914e-05, "loss": 3.0378150939941406, "step": 138040 }, { "epoch": 0.2976666666666667, "grad_norm": 0.1924562007188797, "learning_rate": 7.003529554022869e-05, "loss": 3.0574907302856444, "step": 138050 }, { "epoch": 0.29773333333333335, "grad_norm": 0.2007715255022049, "learning_rate": 7.00251955855772e-05, "loss": 3.0952606201171875, "step": 138060 }, { "epoch": 0.2978, "grad_norm": 0.21771076321601868, "learning_rate": 7.001509465762561e-05, "loss": 3.0986961364746093, "step": 138070 }, { "epoch": 0.29786666666666667, "grad_norm": 0.21373620629310608, "learning_rate": 7.000499275686482e-05, "loss": 3.1123451232910155, "step": 138080 }, { "epoch": 0.29793333333333333, "grad_norm": 0.19171831011772156, "learning_rate": 6.999488988378584e-05, "loss": 3.098918914794922, "step": 138090 }, { "epoch": 0.298, "grad_norm": 0.8777772784233093, "learning_rate": 6.99847860388797e-05, "loss": 3.041713333129883, "step": 138100 }, { "epoch": 0.29806666666666665, "grad_norm": 0.20087043941020966, "learning_rate": 6.997468122263747e-05, "loss": 3.0751363754272463, "step": 138110 }, { "epoch": 0.2981333333333333, "grad_norm": 0.20881898701190948, "learning_rate": 6.996457543555033e-05, "loss": 3.037791442871094, "step": 138120 }, { "epoch": 0.2982, "grad_norm": 0.26065558195114136, "learning_rate": 6.995446867810941e-05, "loss": 3.1847103118896483, "step": 138130 }, { "epoch": 0.2982666666666667, "grad_norm": 0.1975654810667038, "learning_rate": 6.994436095080594e-05, "loss": 3.1423389434814455, "step": 138140 }, { "epoch": 0.29833333333333334, "grad_norm": 0.260308176279068, "learning_rate": 6.993425225413122e-05, "loss": 3.1274871826171875, "step": 138150 }, { "epoch": 0.2984, "grad_norm": 0.2270546555519104, "learning_rate": 6.992414258857657e-05, "loss": 3.0999847412109376, "step": 138160 }, { "epoch": 0.29846666666666666, "grad_norm": 0.2049383670091629, "learning_rate": 6.991403195463334e-05, "loss": 3.0628429412841798, "step": 138170 }, { "epoch": 0.2985333333333333, "grad_norm": 0.2019924521446228, "learning_rate": 6.990392035279296e-05, "loss": 3.0709457397460938, "step": 138180 }, { "epoch": 0.2986, "grad_norm": 0.2591339647769928, "learning_rate": 6.989380778354686e-05, "loss": 3.0631549835205076, "step": 138190 }, { "epoch": 0.2986666666666667, "grad_norm": 0.19830600917339325, "learning_rate": 6.98836942473866e-05, "loss": 3.039948081970215, "step": 138200 }, { "epoch": 0.29873333333333335, "grad_norm": 0.20629370212554932, "learning_rate": 6.987357974480369e-05, "loss": 3.144840621948242, "step": 138210 }, { "epoch": 0.2988, "grad_norm": 0.19798250496387482, "learning_rate": 6.986346427628977e-05, "loss": 2.883602523803711, "step": 138220 }, { "epoch": 0.29886666666666667, "grad_norm": 0.19626381993293762, "learning_rate": 6.985334784233646e-05, "loss": 3.0349822998046876, "step": 138230 }, { "epoch": 0.29893333333333333, "grad_norm": 0.21443071961402893, "learning_rate": 6.984323044343547e-05, "loss": 3.1572647094726562, "step": 138240 }, { "epoch": 0.299, "grad_norm": 0.2029469758272171, "learning_rate": 6.983311208007854e-05, "loss": 3.0929244995117187, "step": 138250 }, { "epoch": 0.29906666666666665, "grad_norm": 0.2652130722999573, "learning_rate": 6.982299275275747e-05, "loss": 2.6183494567871093, "step": 138260 }, { "epoch": 0.2991333333333333, "grad_norm": 0.2321552187204361, "learning_rate": 6.981287246196409e-05, "loss": 3.0760780334472657, "step": 138270 }, { "epoch": 0.2992, "grad_norm": 0.23843063414096832, "learning_rate": 6.980275120819029e-05, "loss": 3.1627620697021483, "step": 138280 }, { "epoch": 0.2992666666666667, "grad_norm": 0.20686255395412445, "learning_rate": 6.979262899192799e-05, "loss": 3.070756721496582, "step": 138290 }, { "epoch": 0.29933333333333334, "grad_norm": 0.33995091915130615, "learning_rate": 6.978250581366918e-05, "loss": 3.3632110595703124, "step": 138300 }, { "epoch": 0.2994, "grad_norm": 0.19193331897258759, "learning_rate": 6.977238167390587e-05, "loss": 3.0775861740112305, "step": 138310 }, { "epoch": 0.29946666666666666, "grad_norm": 0.21166174113750458, "learning_rate": 6.976225657313013e-05, "loss": 3.0716119766235352, "step": 138320 }, { "epoch": 0.2995333333333333, "grad_norm": 0.35215720534324646, "learning_rate": 6.97521305118341e-05, "loss": 3.0730594635009765, "step": 138330 }, { "epoch": 0.2996, "grad_norm": 0.2438032627105713, "learning_rate": 6.974200349050996e-05, "loss": 3.096489143371582, "step": 138340 }, { "epoch": 0.2996666666666667, "grad_norm": 0.20769309997558594, "learning_rate": 6.973187550964986e-05, "loss": 3.121146583557129, "step": 138350 }, { "epoch": 0.29973333333333335, "grad_norm": 0.24014955759048462, "learning_rate": 6.97217465697461e-05, "loss": 3.0967739105224608, "step": 138360 }, { "epoch": 0.2998, "grad_norm": 0.2026800662279129, "learning_rate": 6.971161667129099e-05, "loss": 3.056631851196289, "step": 138370 }, { "epoch": 0.29986666666666667, "grad_norm": 0.2136913388967514, "learning_rate": 6.970148581477686e-05, "loss": 3.0626594543457033, "step": 138380 }, { "epoch": 0.29993333333333333, "grad_norm": 0.22886276245117188, "learning_rate": 6.969135400069613e-05, "loss": 3.088131332397461, "step": 138390 }, { "epoch": 0.3, "grad_norm": 0.22036772966384888, "learning_rate": 6.968122122954121e-05, "loss": 3.149226760864258, "step": 138400 }, { "epoch": 0.30006666666666665, "grad_norm": 0.2053080052137375, "learning_rate": 6.967108750180463e-05, "loss": 2.937725639343262, "step": 138410 }, { "epoch": 0.3001333333333333, "grad_norm": 0.19501782953739166, "learning_rate": 6.96609528179789e-05, "loss": 3.0929447174072267, "step": 138420 }, { "epoch": 0.3002, "grad_norm": 0.20018990337848663, "learning_rate": 6.965081717855662e-05, "loss": 3.0787620544433594, "step": 138430 }, { "epoch": 0.3002666666666667, "grad_norm": 0.19199293851852417, "learning_rate": 6.964068058403042e-05, "loss": 3.0976337432861327, "step": 138440 }, { "epoch": 0.30033333333333334, "grad_norm": 0.22511474788188934, "learning_rate": 6.963054303489295e-05, "loss": 3.1317230224609376, "step": 138450 }, { "epoch": 0.3004, "grad_norm": 0.22138658165931702, "learning_rate": 6.962040453163699e-05, "loss": 3.1261341094970705, "step": 138460 }, { "epoch": 0.30046666666666666, "grad_norm": 0.19719652831554413, "learning_rate": 6.961026507475524e-05, "loss": 3.0882614135742186, "step": 138470 }, { "epoch": 0.3005333333333333, "grad_norm": 0.19879530370235443, "learning_rate": 6.960012466474056e-05, "loss": 3.07407283782959, "step": 138480 }, { "epoch": 0.3006, "grad_norm": 0.1965930014848709, "learning_rate": 6.95899833020858e-05, "loss": 3.2560283660888674, "step": 138490 }, { "epoch": 0.3006666666666667, "grad_norm": 0.21964006125926971, "learning_rate": 6.957984098728387e-05, "loss": 3.033539581298828, "step": 138500 }, { "epoch": 0.30073333333333335, "grad_norm": 0.19920310378074646, "learning_rate": 6.956969772082773e-05, "loss": 3.1072528839111326, "step": 138510 }, { "epoch": 0.3008, "grad_norm": 0.24680741131305695, "learning_rate": 6.955955350321039e-05, "loss": 3.053150177001953, "step": 138520 }, { "epoch": 0.30086666666666667, "grad_norm": 0.18688981235027313, "learning_rate": 6.954940833492487e-05, "loss": 3.0465599060058595, "step": 138530 }, { "epoch": 0.30093333333333333, "grad_norm": 0.19974873960018158, "learning_rate": 6.953926221646428e-05, "loss": 3.1178251266479493, "step": 138540 }, { "epoch": 0.301, "grad_norm": 0.2100582867860794, "learning_rate": 6.952911514832175e-05, "loss": 3.125465202331543, "step": 138550 }, { "epoch": 0.30106666666666665, "grad_norm": 0.37841665744781494, "learning_rate": 6.951896713099047e-05, "loss": 3.0027294158935547, "step": 138560 }, { "epoch": 0.3011333333333333, "grad_norm": 0.20812398195266724, "learning_rate": 6.950881816496367e-05, "loss": 3.0704946517944336, "step": 138570 }, { "epoch": 0.3012, "grad_norm": 0.29568931460380554, "learning_rate": 6.949866825073465e-05, "loss": 3.099392318725586, "step": 138580 }, { "epoch": 0.3012666666666667, "grad_norm": 0.2666790187358856, "learning_rate": 6.94885173887967e-05, "loss": 3.0644628524780275, "step": 138590 }, { "epoch": 0.30133333333333334, "grad_norm": 0.5929621458053589, "learning_rate": 6.947836557964322e-05, "loss": 3.0584552764892576, "step": 138600 }, { "epoch": 0.3014, "grad_norm": 0.21050433814525604, "learning_rate": 6.946821282376762e-05, "loss": 3.057097625732422, "step": 138610 }, { "epoch": 0.30146666666666666, "grad_norm": 0.18785248696804047, "learning_rate": 6.945805912166335e-05, "loss": 3.10050106048584, "step": 138620 }, { "epoch": 0.3015333333333333, "grad_norm": 0.19747047126293182, "learning_rate": 6.944790447382391e-05, "loss": 3.0728321075439453, "step": 138630 }, { "epoch": 0.3016, "grad_norm": 0.20521345734596252, "learning_rate": 6.943774888074287e-05, "loss": 3.0808202743530275, "step": 138640 }, { "epoch": 0.3016666666666667, "grad_norm": 0.45973554253578186, "learning_rate": 6.942759234291384e-05, "loss": 3.074161911010742, "step": 138650 }, { "epoch": 0.30173333333333335, "grad_norm": 0.28100186586380005, "learning_rate": 6.941743486083044e-05, "loss": 3.083689498901367, "step": 138660 }, { "epoch": 0.3018, "grad_norm": 0.19127477705478668, "learning_rate": 6.940727643498639e-05, "loss": 3.1151268005371096, "step": 138670 }, { "epoch": 0.30186666666666667, "grad_norm": 0.7901930809020996, "learning_rate": 6.939711706587541e-05, "loss": 3.138861656188965, "step": 138680 }, { "epoch": 0.30193333333333333, "grad_norm": 0.5943142175674438, "learning_rate": 6.938695675399129e-05, "loss": 3.171053886413574, "step": 138690 }, { "epoch": 0.302, "grad_norm": 0.19604934751987457, "learning_rate": 6.937679549982786e-05, "loss": 3.058670997619629, "step": 138700 }, { "epoch": 0.30206666666666665, "grad_norm": 0.21157841384410858, "learning_rate": 6.936663330387898e-05, "loss": 3.078403091430664, "step": 138710 }, { "epoch": 0.3021333333333333, "grad_norm": 0.19457733631134033, "learning_rate": 6.935647016663859e-05, "loss": 3.0293701171875, "step": 138720 }, { "epoch": 0.3022, "grad_norm": 0.21564054489135742, "learning_rate": 6.934630608860064e-05, "loss": 3.072559928894043, "step": 138730 }, { "epoch": 0.3022666666666667, "grad_norm": 0.21210245788097382, "learning_rate": 6.933614107025917e-05, "loss": 3.076827812194824, "step": 138740 }, { "epoch": 0.30233333333333334, "grad_norm": 0.24799363315105438, "learning_rate": 6.932597511210821e-05, "loss": 3.131361198425293, "step": 138750 }, { "epoch": 0.3024, "grad_norm": 0.19939954578876495, "learning_rate": 6.931580821464188e-05, "loss": 3.0900259017944336, "step": 138760 }, { "epoch": 0.30246666666666666, "grad_norm": 0.22199614346027374, "learning_rate": 6.930564037835434e-05, "loss": 3.083889389038086, "step": 138770 }, { "epoch": 0.3025333333333333, "grad_norm": 0.19597983360290527, "learning_rate": 6.929547160373975e-05, "loss": 3.046815872192383, "step": 138780 }, { "epoch": 0.3026, "grad_norm": 0.31329336762428284, "learning_rate": 6.928530189129236e-05, "loss": 3.181880760192871, "step": 138790 }, { "epoch": 0.30266666666666664, "grad_norm": 0.20352190732955933, "learning_rate": 6.92751312415065e-05, "loss": 3.099446105957031, "step": 138800 }, { "epoch": 0.30273333333333335, "grad_norm": 0.1922585368156433, "learning_rate": 6.926495965487644e-05, "loss": 3.0661033630371093, "step": 138810 }, { "epoch": 0.3028, "grad_norm": 0.497308611869812, "learning_rate": 6.925478713189662e-05, "loss": 3.177608108520508, "step": 138820 }, { "epoch": 0.3028666666666667, "grad_norm": 0.20898671448230743, "learning_rate": 6.92446136730614e-05, "loss": 3.1787200927734376, "step": 138830 }, { "epoch": 0.30293333333333333, "grad_norm": 0.24918466806411743, "learning_rate": 6.92344392788653e-05, "loss": 3.0915224075317385, "step": 138840 }, { "epoch": 0.303, "grad_norm": 0.18936964869499207, "learning_rate": 6.92242639498028e-05, "loss": 3.028412437438965, "step": 138850 }, { "epoch": 0.30306666666666665, "grad_norm": 0.22826065123081207, "learning_rate": 6.921408768636848e-05, "loss": 3.077797508239746, "step": 138860 }, { "epoch": 0.3031333333333333, "grad_norm": 0.5974269509315491, "learning_rate": 6.920391048905692e-05, "loss": 3.0128074645996095, "step": 138870 }, { "epoch": 0.3032, "grad_norm": 0.19506694376468658, "learning_rate": 6.919373235836281e-05, "loss": 2.8129962921142577, "step": 138880 }, { "epoch": 0.3032666666666667, "grad_norm": 0.22668279707431793, "learning_rate": 6.91835532947808e-05, "loss": 3.1212364196777345, "step": 138890 }, { "epoch": 0.30333333333333334, "grad_norm": 0.22738666832447052, "learning_rate": 6.917337329880567e-05, "loss": 3.0926658630371096, "step": 138900 }, { "epoch": 0.3034, "grad_norm": 0.21600233018398285, "learning_rate": 6.916319237093219e-05, "loss": 3.102016067504883, "step": 138910 }, { "epoch": 0.30346666666666666, "grad_norm": 0.21514756977558136, "learning_rate": 6.915301051165519e-05, "loss": 3.10846004486084, "step": 138920 }, { "epoch": 0.3035333333333333, "grad_norm": 0.2231314480304718, "learning_rate": 6.914282772146954e-05, "loss": 3.0694612503051757, "step": 138930 }, { "epoch": 0.3036, "grad_norm": 0.20520047843456268, "learning_rate": 6.913264400087017e-05, "loss": 3.083135795593262, "step": 138940 }, { "epoch": 0.30366666666666664, "grad_norm": 0.19830606877803802, "learning_rate": 6.912245935035206e-05, "loss": 3.0311916351318358, "step": 138950 }, { "epoch": 0.30373333333333336, "grad_norm": 0.20894299447536469, "learning_rate": 6.91122737704102e-05, "loss": 3.103026008605957, "step": 138960 }, { "epoch": 0.3038, "grad_norm": 0.20514193177223206, "learning_rate": 6.910208726153966e-05, "loss": 3.1080780029296875, "step": 138970 }, { "epoch": 0.3038666666666667, "grad_norm": 0.1990991234779358, "learning_rate": 6.909189982423554e-05, "loss": 3.101220703125, "step": 138980 }, { "epoch": 0.30393333333333333, "grad_norm": 0.2032502144575119, "learning_rate": 6.908171145899298e-05, "loss": 3.1119195938110353, "step": 138990 }, { "epoch": 0.304, "grad_norm": 0.19888916611671448, "learning_rate": 6.90715221663072e-05, "loss": 3.1111454010009765, "step": 139000 }, { "epoch": 0.30406666666666665, "grad_norm": 0.21503935754299164, "learning_rate": 6.906133194667342e-05, "loss": 3.0988838195800783, "step": 139010 }, { "epoch": 0.3041333333333333, "grad_norm": 0.22336094081401825, "learning_rate": 6.90511408005869e-05, "loss": 3.0642936706542967, "step": 139020 }, { "epoch": 0.3042, "grad_norm": 0.21699269115924835, "learning_rate": 6.904094872854301e-05, "loss": 3.0945926666259767, "step": 139030 }, { "epoch": 0.3042666666666667, "grad_norm": 0.2373618632555008, "learning_rate": 6.90307557310371e-05, "loss": 3.0985719680786135, "step": 139040 }, { "epoch": 0.30433333333333334, "grad_norm": 0.20259226858615875, "learning_rate": 6.902056180856458e-05, "loss": 3.1020530700683593, "step": 139050 }, { "epoch": 0.3044, "grad_norm": 0.2236437201499939, "learning_rate": 6.901036696162093e-05, "loss": 3.0681083679199217, "step": 139060 }, { "epoch": 0.30446666666666666, "grad_norm": 0.21444597840309143, "learning_rate": 6.900017119070167e-05, "loss": 3.1357044219970702, "step": 139070 }, { "epoch": 0.3045333333333333, "grad_norm": 0.19875115156173706, "learning_rate": 6.898997449630232e-05, "loss": 3.089300346374512, "step": 139080 }, { "epoch": 0.3046, "grad_norm": 0.21510550379753113, "learning_rate": 6.89797768789185e-05, "loss": 3.079701614379883, "step": 139090 }, { "epoch": 0.30466666666666664, "grad_norm": 0.19024361670017242, "learning_rate": 6.896957833904585e-05, "loss": 3.092775344848633, "step": 139100 }, { "epoch": 0.30473333333333336, "grad_norm": 0.20595255494117737, "learning_rate": 6.895937887718005e-05, "loss": 3.0658153533935546, "step": 139110 }, { "epoch": 0.3048, "grad_norm": 0.23896799981594086, "learning_rate": 6.894917849381683e-05, "loss": 3.052048110961914, "step": 139120 }, { "epoch": 0.3048666666666667, "grad_norm": 0.23609614372253418, "learning_rate": 6.893897718945198e-05, "loss": 3.3824493408203127, "step": 139130 }, { "epoch": 0.30493333333333333, "grad_norm": 0.2392929494380951, "learning_rate": 6.892877496458132e-05, "loss": 3.276982879638672, "step": 139140 }, { "epoch": 0.305, "grad_norm": 0.19565606117248535, "learning_rate": 6.89185718197007e-05, "loss": 3.06204833984375, "step": 139150 }, { "epoch": 0.30506666666666665, "grad_norm": 0.2037235051393509, "learning_rate": 6.890836775530606e-05, "loss": 3.081349754333496, "step": 139160 }, { "epoch": 0.3051333333333333, "grad_norm": 0.22220715880393982, "learning_rate": 6.889816277189334e-05, "loss": 3.1258249282836914, "step": 139170 }, { "epoch": 0.3052, "grad_norm": 0.22871173918247223, "learning_rate": 6.888795686995853e-05, "loss": 3.2207733154296876, "step": 139180 }, { "epoch": 0.3052666666666667, "grad_norm": 0.20038574934005737, "learning_rate": 6.887775004999771e-05, "loss": 3.091124153137207, "step": 139190 }, { "epoch": 0.30533333333333335, "grad_norm": 0.2554948925971985, "learning_rate": 6.886754231250692e-05, "loss": 3.0687707901000976, "step": 139200 }, { "epoch": 0.3054, "grad_norm": 0.20602336525917053, "learning_rate": 6.885733365798233e-05, "loss": 3.1032758712768556, "step": 139210 }, { "epoch": 0.30546666666666666, "grad_norm": 0.22918394207954407, "learning_rate": 6.88471240869201e-05, "loss": 3.0584505081176756, "step": 139220 }, { "epoch": 0.3055333333333333, "grad_norm": 0.2129095196723938, "learning_rate": 6.883691359981646e-05, "loss": 3.0632400512695312, "step": 139230 }, { "epoch": 0.3056, "grad_norm": 0.23772713541984558, "learning_rate": 6.88267021971677e-05, "loss": 3.205921173095703, "step": 139240 }, { "epoch": 0.30566666666666664, "grad_norm": 0.19628731906414032, "learning_rate": 6.881648987947012e-05, "loss": 3.1264400482177734, "step": 139250 }, { "epoch": 0.30573333333333336, "grad_norm": 0.21052905917167664, "learning_rate": 6.880627664722006e-05, "loss": 3.074654769897461, "step": 139260 }, { "epoch": 0.3058, "grad_norm": 0.2389458864927292, "learning_rate": 6.879606250091393e-05, "loss": 3.0604551315307615, "step": 139270 }, { "epoch": 0.3058666666666667, "grad_norm": 0.221536323428154, "learning_rate": 6.878584744104818e-05, "loss": 3.090751838684082, "step": 139280 }, { "epoch": 0.30593333333333333, "grad_norm": 0.1911926418542862, "learning_rate": 6.877563146811931e-05, "loss": 3.100711631774902, "step": 139290 }, { "epoch": 0.306, "grad_norm": 0.21355073153972626, "learning_rate": 6.876541458262383e-05, "loss": 3.049273872375488, "step": 139300 }, { "epoch": 0.30606666666666665, "grad_norm": 0.22626928985118866, "learning_rate": 6.875519678505834e-05, "loss": 3.121961975097656, "step": 139310 }, { "epoch": 0.3061333333333333, "grad_norm": 0.191472589969635, "learning_rate": 6.874497807591947e-05, "loss": 3.0502315521240235, "step": 139320 }, { "epoch": 0.3062, "grad_norm": 0.21936841309070587, "learning_rate": 6.873475845570386e-05, "loss": 3.05587158203125, "step": 139330 }, { "epoch": 0.3062666666666667, "grad_norm": 0.223373144865036, "learning_rate": 6.872453792490825e-05, "loss": 3.1137413024902343, "step": 139340 }, { "epoch": 0.30633333333333335, "grad_norm": 0.19664816558361053, "learning_rate": 6.871431648402938e-05, "loss": 3.140751075744629, "step": 139350 }, { "epoch": 0.3064, "grad_norm": 0.23665273189544678, "learning_rate": 6.870409413356405e-05, "loss": 3.0911752700805666, "step": 139360 }, { "epoch": 0.30646666666666667, "grad_norm": 0.2029453068971634, "learning_rate": 6.86938708740091e-05, "loss": 3.058778190612793, "step": 139370 }, { "epoch": 0.3065333333333333, "grad_norm": 0.5420721173286438, "learning_rate": 6.868364670586144e-05, "loss": 3.199982261657715, "step": 139380 }, { "epoch": 0.3066, "grad_norm": 0.2123294621706009, "learning_rate": 6.867342162961799e-05, "loss": 3.1446441650390624, "step": 139390 }, { "epoch": 0.30666666666666664, "grad_norm": 0.31133878231048584, "learning_rate": 6.866319564577572e-05, "loss": 3.0534736633300783, "step": 139400 }, { "epoch": 0.30673333333333336, "grad_norm": 0.2310098260641098, "learning_rate": 6.865296875483169e-05, "loss": 3.091577911376953, "step": 139410 }, { "epoch": 0.3068, "grad_norm": 0.20822307467460632, "learning_rate": 6.864274095728291e-05, "loss": 2.9813913345336913, "step": 139420 }, { "epoch": 0.3068666666666667, "grad_norm": 0.20814929902553558, "learning_rate": 6.863251225362652e-05, "loss": 3.1256189346313477, "step": 139430 }, { "epoch": 0.30693333333333334, "grad_norm": 0.22174383699893951, "learning_rate": 6.862228264435965e-05, "loss": 3.0353816986083983, "step": 139440 }, { "epoch": 0.307, "grad_norm": 0.19834136962890625, "learning_rate": 6.861205212997956e-05, "loss": 3.137867736816406, "step": 139450 }, { "epoch": 0.30706666666666665, "grad_norm": 0.21123896539211273, "learning_rate": 6.860182071098342e-05, "loss": 3.0934391021728516, "step": 139460 }, { "epoch": 0.3071333333333333, "grad_norm": 0.1972113996744156, "learning_rate": 6.859158838786855e-05, "loss": 3.0697418212890626, "step": 139470 }, { "epoch": 0.3072, "grad_norm": 0.24906133115291595, "learning_rate": 6.858135516113226e-05, "loss": 3.213915252685547, "step": 139480 }, { "epoch": 0.3072666666666667, "grad_norm": 0.2071509212255478, "learning_rate": 6.857112103127196e-05, "loss": 3.0392759323120115, "step": 139490 }, { "epoch": 0.30733333333333335, "grad_norm": 0.1950642168521881, "learning_rate": 6.856088599878505e-05, "loss": 3.103363609313965, "step": 139500 }, { "epoch": 0.3074, "grad_norm": 0.21418048441410065, "learning_rate": 6.855065006416897e-05, "loss": 3.065860939025879, "step": 139510 }, { "epoch": 0.30746666666666667, "grad_norm": 0.23431771993637085, "learning_rate": 6.854041322792125e-05, "loss": 3.124747467041016, "step": 139520 }, { "epoch": 0.3075333333333333, "grad_norm": 0.19492579996585846, "learning_rate": 6.85301754905394e-05, "loss": 3.140982818603516, "step": 139530 }, { "epoch": 0.3076, "grad_norm": 0.20039723813533783, "learning_rate": 6.851993685252107e-05, "loss": 3.069913864135742, "step": 139540 }, { "epoch": 0.30766666666666664, "grad_norm": 0.25051695108413696, "learning_rate": 6.850969731436386e-05, "loss": 3.120174217224121, "step": 139550 }, { "epoch": 0.30773333333333336, "grad_norm": 0.19719168543815613, "learning_rate": 6.849945687656547e-05, "loss": 3.089107131958008, "step": 139560 }, { "epoch": 0.3078, "grad_norm": 0.18904338777065277, "learning_rate": 6.848921553962361e-05, "loss": 3.1164794921875, "step": 139570 }, { "epoch": 0.3078666666666667, "grad_norm": 0.21656514704227448, "learning_rate": 6.847897330403604e-05, "loss": 3.033525848388672, "step": 139580 }, { "epoch": 0.30793333333333334, "grad_norm": 0.20271499454975128, "learning_rate": 6.84687301703006e-05, "loss": 3.130241584777832, "step": 139590 }, { "epoch": 0.308, "grad_norm": 0.21505261957645416, "learning_rate": 6.845848613891511e-05, "loss": 3.0810998916625976, "step": 139600 }, { "epoch": 0.30806666666666666, "grad_norm": 0.20338183641433716, "learning_rate": 6.844824121037748e-05, "loss": 3.0966888427734376, "step": 139610 }, { "epoch": 0.3081333333333333, "grad_norm": 0.20057275891304016, "learning_rate": 6.843799538518568e-05, "loss": 3.10211067199707, "step": 139620 }, { "epoch": 0.3082, "grad_norm": 0.2040339857339859, "learning_rate": 6.842774866383766e-05, "loss": 3.127766990661621, "step": 139630 }, { "epoch": 0.3082666666666667, "grad_norm": 0.20557798445224762, "learning_rate": 6.841750104683146e-05, "loss": 3.0412309646606444, "step": 139640 }, { "epoch": 0.30833333333333335, "grad_norm": 0.19968414306640625, "learning_rate": 6.840725253466517e-05, "loss": 3.063773345947266, "step": 139650 }, { "epoch": 0.3084, "grad_norm": 0.2110573798418045, "learning_rate": 6.839700312783687e-05, "loss": 3.238547134399414, "step": 139660 }, { "epoch": 0.30846666666666667, "grad_norm": 0.2179730385541916, "learning_rate": 6.838675282684477e-05, "loss": 3.1357452392578127, "step": 139670 }, { "epoch": 0.3085333333333333, "grad_norm": 0.22888344526290894, "learning_rate": 6.837650163218703e-05, "loss": 3.0841306686401366, "step": 139680 }, { "epoch": 0.3086, "grad_norm": 0.18929359316825867, "learning_rate": 6.83662495443619e-05, "loss": 3.085018348693848, "step": 139690 }, { "epoch": 0.30866666666666664, "grad_norm": 0.19693683087825775, "learning_rate": 6.83559965638677e-05, "loss": 3.31237907409668, "step": 139700 }, { "epoch": 0.30873333333333336, "grad_norm": 0.19431902468204498, "learning_rate": 6.834574269120274e-05, "loss": 3.117441177368164, "step": 139710 }, { "epoch": 0.3088, "grad_norm": 0.2297990769147873, "learning_rate": 6.83354879268654e-05, "loss": 3.068415069580078, "step": 139720 }, { "epoch": 0.3088666666666667, "grad_norm": 0.1964755356311798, "learning_rate": 6.832523227135412e-05, "loss": 3.1017297744750976, "step": 139730 }, { "epoch": 0.30893333333333334, "grad_norm": 0.21714633703231812, "learning_rate": 6.831497572516733e-05, "loss": 3.110983657836914, "step": 139740 }, { "epoch": 0.309, "grad_norm": 0.23034130036830902, "learning_rate": 6.830471828880356e-05, "loss": 3.0239484786987303, "step": 139750 }, { "epoch": 0.30906666666666666, "grad_norm": 0.18116582930088043, "learning_rate": 6.829445996276135e-05, "loss": 3.0802698135375977, "step": 139760 }, { "epoch": 0.3091333333333333, "grad_norm": 0.21478214859962463, "learning_rate": 6.828420074753929e-05, "loss": 3.13503475189209, "step": 139770 }, { "epoch": 0.3092, "grad_norm": 0.22300070524215698, "learning_rate": 6.827394064363604e-05, "loss": 3.1648881912231444, "step": 139780 }, { "epoch": 0.3092666666666667, "grad_norm": 0.22808215022087097, "learning_rate": 6.826367965155026e-05, "loss": 3.119496154785156, "step": 139790 }, { "epoch": 0.30933333333333335, "grad_norm": 0.19503530859947205, "learning_rate": 6.825341777178066e-05, "loss": 3.0847394943237303, "step": 139800 }, { "epoch": 0.3094, "grad_norm": 0.20515869557857513, "learning_rate": 6.824315500482604e-05, "loss": 3.0601831436157227, "step": 139810 }, { "epoch": 0.30946666666666667, "grad_norm": 0.19352827966213226, "learning_rate": 6.823289135118518e-05, "loss": 3.1043466567993163, "step": 139820 }, { "epoch": 0.3095333333333333, "grad_norm": 0.2891380488872528, "learning_rate": 6.822262681135695e-05, "loss": 3.085840034484863, "step": 139830 }, { "epoch": 0.3096, "grad_norm": 0.20934328436851501, "learning_rate": 6.821236138584024e-05, "loss": 3.1227699279785157, "step": 139840 }, { "epoch": 0.30966666666666665, "grad_norm": 0.2167099416255951, "learning_rate": 6.820209507513399e-05, "loss": 3.018851089477539, "step": 139850 }, { "epoch": 0.30973333333333336, "grad_norm": 0.2239229381084442, "learning_rate": 6.819182787973717e-05, "loss": 3.082673454284668, "step": 139860 }, { "epoch": 0.3098, "grad_norm": 0.26627957820892334, "learning_rate": 6.818155980014883e-05, "loss": 3.0735231399536134, "step": 139870 }, { "epoch": 0.3098666666666667, "grad_norm": 0.22904953360557556, "learning_rate": 6.817129083686801e-05, "loss": 2.9424068450927736, "step": 139880 }, { "epoch": 0.30993333333333334, "grad_norm": 0.19006206095218658, "learning_rate": 6.816102099039384e-05, "loss": 3.081417465209961, "step": 139890 }, { "epoch": 0.31, "grad_norm": 0.201414555311203, "learning_rate": 6.815075026122546e-05, "loss": 3.109160041809082, "step": 139900 }, { "epoch": 0.31006666666666666, "grad_norm": 0.222655788064003, "learning_rate": 6.814047864986208e-05, "loss": 3.084964370727539, "step": 139910 }, { "epoch": 0.3101333333333333, "grad_norm": 0.3084982633590698, "learning_rate": 6.813020615680292e-05, "loss": 3.070184898376465, "step": 139920 }, { "epoch": 0.3102, "grad_norm": 0.22777625918388367, "learning_rate": 6.811993278254726e-05, "loss": 3.1250900268554687, "step": 139930 }, { "epoch": 0.3102666666666667, "grad_norm": 0.18983320891857147, "learning_rate": 6.810965852759447e-05, "loss": 3.0381378173828124, "step": 139940 }, { "epoch": 0.31033333333333335, "grad_norm": 0.20058122277259827, "learning_rate": 6.809938339244386e-05, "loss": 3.1043222427368162, "step": 139950 }, { "epoch": 0.3104, "grad_norm": 0.19299490749835968, "learning_rate": 6.808910737759487e-05, "loss": 3.0564842224121094, "step": 139960 }, { "epoch": 0.31046666666666667, "grad_norm": 0.218187615275383, "learning_rate": 6.807883048354696e-05, "loss": 3.06080322265625, "step": 139970 }, { "epoch": 0.31053333333333333, "grad_norm": 0.21189254522323608, "learning_rate": 6.80685527107996e-05, "loss": 3.100642204284668, "step": 139980 }, { "epoch": 0.3106, "grad_norm": 0.25885990262031555, "learning_rate": 6.805827405985234e-05, "loss": 3.053976821899414, "step": 139990 }, { "epoch": 0.31066666666666665, "grad_norm": 0.9711171388626099, "learning_rate": 6.804799453120478e-05, "loss": 3.214427185058594, "step": 140000 }, { "epoch": 0.3107333333333333, "grad_norm": 0.2316991537809372, "learning_rate": 6.803771412535652e-05, "loss": 3.127761650085449, "step": 140010 }, { "epoch": 0.3108, "grad_norm": 0.2840670645236969, "learning_rate": 6.802743284280724e-05, "loss": 3.178848075866699, "step": 140020 }, { "epoch": 0.3108666666666667, "grad_norm": 0.18879127502441406, "learning_rate": 6.801715068405663e-05, "loss": 3.0999338150024416, "step": 140030 }, { "epoch": 0.31093333333333334, "grad_norm": 0.21694500744342804, "learning_rate": 6.800686764960447e-05, "loss": 3.0129919052124023, "step": 140040 }, { "epoch": 0.311, "grad_norm": 0.19087764620780945, "learning_rate": 6.799658373995053e-05, "loss": 3.0955955505371096, "step": 140050 }, { "epoch": 0.31106666666666666, "grad_norm": 0.1908525675535202, "learning_rate": 6.798629895559466e-05, "loss": 3.046487236022949, "step": 140060 }, { "epoch": 0.3111333333333333, "grad_norm": 0.1963142603635788, "learning_rate": 6.797601329703673e-05, "loss": 3.069183921813965, "step": 140070 }, { "epoch": 0.3112, "grad_norm": 0.19702982902526855, "learning_rate": 6.796572676477669e-05, "loss": 3.025160026550293, "step": 140080 }, { "epoch": 0.3112666666666667, "grad_norm": 0.21282470226287842, "learning_rate": 6.795543935931446e-05, "loss": 3.122618865966797, "step": 140090 }, { "epoch": 0.31133333333333335, "grad_norm": 0.28768473863601685, "learning_rate": 6.794515108115007e-05, "loss": 3.0334548950195312, "step": 140100 }, { "epoch": 0.3114, "grad_norm": 0.2205188274383545, "learning_rate": 6.793486193078357e-05, "loss": 3.2441337585449217, "step": 140110 }, { "epoch": 0.31146666666666667, "grad_norm": 0.2023680955171585, "learning_rate": 6.792457190871504e-05, "loss": 3.0578945159912108, "step": 140120 }, { "epoch": 0.31153333333333333, "grad_norm": 0.1987197995185852, "learning_rate": 6.791428101544464e-05, "loss": 3.1039093017578123, "step": 140130 }, { "epoch": 0.3116, "grad_norm": 0.2109653353691101, "learning_rate": 6.790398925147252e-05, "loss": 3.064649200439453, "step": 140140 }, { "epoch": 0.31166666666666665, "grad_norm": 0.2521439790725708, "learning_rate": 6.789369661729892e-05, "loss": 3.3103843688964845, "step": 140150 }, { "epoch": 0.3117333333333333, "grad_norm": 0.20793600380420685, "learning_rate": 6.788340311342407e-05, "loss": 3.0580814361572264, "step": 140160 }, { "epoch": 0.3118, "grad_norm": 0.23062004148960114, "learning_rate": 6.787310874034832e-05, "loss": 3.1660882949829103, "step": 140170 }, { "epoch": 0.3118666666666667, "grad_norm": 0.2280472069978714, "learning_rate": 6.786281349857197e-05, "loss": 3.017110824584961, "step": 140180 }, { "epoch": 0.31193333333333334, "grad_norm": 0.24389949440956116, "learning_rate": 6.785251738859541e-05, "loss": 3.081784248352051, "step": 140190 }, { "epoch": 0.312, "grad_norm": 0.2595084607601166, "learning_rate": 6.784222041091911e-05, "loss": 3.1600053787231444, "step": 140200 }, { "epoch": 0.31206666666666666, "grad_norm": 0.1956637054681778, "learning_rate": 6.783192256604352e-05, "loss": 3.0585275650024415, "step": 140210 }, { "epoch": 0.3121333333333333, "grad_norm": 0.25966596603393555, "learning_rate": 6.782162385446914e-05, "loss": 3.1160676956176756, "step": 140220 }, { "epoch": 0.3122, "grad_norm": 0.2055167853832245, "learning_rate": 6.781132427669654e-05, "loss": 3.042675590515137, "step": 140230 }, { "epoch": 0.3122666666666667, "grad_norm": 0.21680104732513428, "learning_rate": 6.780102383322631e-05, "loss": 3.0854578018188477, "step": 140240 }, { "epoch": 0.31233333333333335, "grad_norm": 0.21142493188381195, "learning_rate": 6.77907225245591e-05, "loss": 3.079006576538086, "step": 140250 }, { "epoch": 0.3124, "grad_norm": 0.19989915192127228, "learning_rate": 6.778042035119559e-05, "loss": 3.0236795425415037, "step": 140260 }, { "epoch": 0.31246666666666667, "grad_norm": 0.21415391564369202, "learning_rate": 6.77701173136365e-05, "loss": 3.0414716720581056, "step": 140270 }, { "epoch": 0.31253333333333333, "grad_norm": 0.20429645478725433, "learning_rate": 6.775981341238262e-05, "loss": 3.1007221221923826, "step": 140280 }, { "epoch": 0.3126, "grad_norm": 0.20403583347797394, "learning_rate": 6.774950864793472e-05, "loss": 3.038703727722168, "step": 140290 }, { "epoch": 0.31266666666666665, "grad_norm": 0.2133440226316452, "learning_rate": 6.773920302079367e-05, "loss": 3.0609199523925783, "step": 140300 }, { "epoch": 0.3127333333333333, "grad_norm": 0.24648506939411163, "learning_rate": 6.772889653146037e-05, "loss": 3.2336910247802733, "step": 140310 }, { "epoch": 0.3128, "grad_norm": 0.2570726275444031, "learning_rate": 6.771858918043574e-05, "loss": 3.060990905761719, "step": 140320 }, { "epoch": 0.3128666666666667, "grad_norm": 0.19890287518501282, "learning_rate": 6.770828096822076e-05, "loss": 3.016658973693848, "step": 140330 }, { "epoch": 0.31293333333333334, "grad_norm": 0.1918763667345047, "learning_rate": 6.769797189531645e-05, "loss": 3.0999780654907227, "step": 140340 }, { "epoch": 0.313, "grad_norm": 0.22319021821022034, "learning_rate": 6.768766196222386e-05, "loss": 3.063760185241699, "step": 140350 }, { "epoch": 0.31306666666666666, "grad_norm": 0.3278812766075134, "learning_rate": 6.767735116944412e-05, "loss": 3.011438179016113, "step": 140360 }, { "epoch": 0.3131333333333333, "grad_norm": 0.7012143135070801, "learning_rate": 6.766703951747835e-05, "loss": 3.050065803527832, "step": 140370 }, { "epoch": 0.3132, "grad_norm": 0.19477517902851105, "learning_rate": 6.765672700682775e-05, "loss": 3.0795566558837892, "step": 140380 }, { "epoch": 0.3132666666666667, "grad_norm": 0.21479998528957367, "learning_rate": 6.764641363799352e-05, "loss": 3.089567184448242, "step": 140390 }, { "epoch": 0.31333333333333335, "grad_norm": 0.1940528303384781, "learning_rate": 6.763609941147695e-05, "loss": 3.0314340591430664, "step": 140400 }, { "epoch": 0.3134, "grad_norm": 0.2001204490661621, "learning_rate": 6.762578432777935e-05, "loss": 3.085813522338867, "step": 140410 }, { "epoch": 0.31346666666666667, "grad_norm": 0.20861899852752686, "learning_rate": 6.761546838740207e-05, "loss": 3.0824634552001955, "step": 140420 }, { "epoch": 0.31353333333333333, "grad_norm": 0.19163531064987183, "learning_rate": 6.76051515908465e-05, "loss": 3.2043231964111327, "step": 140430 }, { "epoch": 0.3136, "grad_norm": 0.20445723831653595, "learning_rate": 6.759483393861406e-05, "loss": 3.137096405029297, "step": 140440 }, { "epoch": 0.31366666666666665, "grad_norm": 0.20745949447155, "learning_rate": 6.758451543120627e-05, "loss": 3.0884334564208986, "step": 140450 }, { "epoch": 0.3137333333333333, "grad_norm": 0.1953427791595459, "learning_rate": 6.757419606912461e-05, "loss": 3.0447109222412108, "step": 140460 }, { "epoch": 0.3138, "grad_norm": 0.20250147581100464, "learning_rate": 6.756387585287065e-05, "loss": 3.0747556686401367, "step": 140470 }, { "epoch": 0.3138666666666667, "grad_norm": 0.19081012904644012, "learning_rate": 6.7553554782946e-05, "loss": 3.039894676208496, "step": 140480 }, { "epoch": 0.31393333333333334, "grad_norm": 0.19516366720199585, "learning_rate": 6.75432328598523e-05, "loss": 3.0702804565429687, "step": 140490 }, { "epoch": 0.314, "grad_norm": 0.19527298212051392, "learning_rate": 6.753291008409122e-05, "loss": 3.0460220336914063, "step": 140500 }, { "epoch": 0.31406666666666666, "grad_norm": 0.2196791023015976, "learning_rate": 6.752258645616451e-05, "loss": 3.1118539810180663, "step": 140510 }, { "epoch": 0.3141333333333333, "grad_norm": 0.19996723532676697, "learning_rate": 6.751226197657392e-05, "loss": 3.104959487915039, "step": 140520 }, { "epoch": 0.3142, "grad_norm": 0.19527727365493774, "learning_rate": 6.750193664582125e-05, "loss": 3.0895471572875977, "step": 140530 }, { "epoch": 0.3142666666666667, "grad_norm": 0.20018216967582703, "learning_rate": 6.749161046440837e-05, "loss": 3.048689079284668, "step": 140540 }, { "epoch": 0.31433333333333335, "grad_norm": 0.1948484182357788, "learning_rate": 6.748128343283718e-05, "loss": 3.0755441665649412, "step": 140550 }, { "epoch": 0.3144, "grad_norm": 0.21792587637901306, "learning_rate": 6.747095555160959e-05, "loss": 3.0926183700561523, "step": 140560 }, { "epoch": 0.31446666666666667, "grad_norm": 0.2992475628852844, "learning_rate": 6.746062682122757e-05, "loss": 3.0976219177246094, "step": 140570 }, { "epoch": 0.31453333333333333, "grad_norm": 0.19283992052078247, "learning_rate": 6.745029724219317e-05, "loss": 3.170955276489258, "step": 140580 }, { "epoch": 0.3146, "grad_norm": 0.18588483333587646, "learning_rate": 6.74399668150084e-05, "loss": 3.0847846984863283, "step": 140590 }, { "epoch": 0.31466666666666665, "grad_norm": 0.20049689710140228, "learning_rate": 6.742963554017539e-05, "loss": 3.0704269409179688, "step": 140600 }, { "epoch": 0.3147333333333333, "grad_norm": 0.22825652360916138, "learning_rate": 6.741930341819626e-05, "loss": 3.4071815490722654, "step": 140610 }, { "epoch": 0.3148, "grad_norm": 0.20136235654354095, "learning_rate": 6.740897044957322e-05, "loss": 3.0710094451904295, "step": 140620 }, { "epoch": 0.3148666666666667, "grad_norm": 1.2207684516906738, "learning_rate": 6.739863663480846e-05, "loss": 3.2085479736328124, "step": 140630 }, { "epoch": 0.31493333333333334, "grad_norm": 0.23502662777900696, "learning_rate": 6.738830197440425e-05, "loss": 3.1717395782470703, "step": 140640 }, { "epoch": 0.315, "grad_norm": 0.21450576186180115, "learning_rate": 6.737796646886291e-05, "loss": 3.1008068084716798, "step": 140650 }, { "epoch": 0.31506666666666666, "grad_norm": 0.1995629519224167, "learning_rate": 6.736763011868677e-05, "loss": 3.099322509765625, "step": 140660 }, { "epoch": 0.3151333333333333, "grad_norm": 0.21452948451042175, "learning_rate": 6.735729292437821e-05, "loss": 3.0791385650634764, "step": 140670 }, { "epoch": 0.3152, "grad_norm": 0.20947498083114624, "learning_rate": 6.734695488643966e-05, "loss": 3.032111930847168, "step": 140680 }, { "epoch": 0.31526666666666664, "grad_norm": 0.2210196554660797, "learning_rate": 6.73366160053736e-05, "loss": 3.1039093017578123, "step": 140690 }, { "epoch": 0.31533333333333335, "grad_norm": 0.21105177700519562, "learning_rate": 6.732627628168255e-05, "loss": 3.1231481552124025, "step": 140700 }, { "epoch": 0.3154, "grad_norm": 0.1967947781085968, "learning_rate": 6.731593571586901e-05, "loss": 3.054948616027832, "step": 140710 }, { "epoch": 0.3154666666666667, "grad_norm": 0.24488309025764465, "learning_rate": 6.730559430843561e-05, "loss": 3.10216064453125, "step": 140720 }, { "epoch": 0.31553333333333333, "grad_norm": 0.229944109916687, "learning_rate": 6.729525205988497e-05, "loss": 3.042781448364258, "step": 140730 }, { "epoch": 0.3156, "grad_norm": 0.26117458939552307, "learning_rate": 6.728490897071977e-05, "loss": 3.072224998474121, "step": 140740 }, { "epoch": 0.31566666666666665, "grad_norm": 0.19931882619857788, "learning_rate": 6.727456504144272e-05, "loss": 3.030923271179199, "step": 140750 }, { "epoch": 0.3157333333333333, "grad_norm": 0.22038312256336212, "learning_rate": 6.726422027255657e-05, "loss": 3.1462087631225586, "step": 140760 }, { "epoch": 0.3158, "grad_norm": 0.19723229110240936, "learning_rate": 6.725387466456411e-05, "loss": 3.0990718841552733, "step": 140770 }, { "epoch": 0.3158666666666667, "grad_norm": 0.3323909640312195, "learning_rate": 6.72435282179682e-05, "loss": 3.078787994384766, "step": 140780 }, { "epoch": 0.31593333333333334, "grad_norm": 0.21231094002723694, "learning_rate": 6.723318093327169e-05, "loss": 3.0302228927612305, "step": 140790 }, { "epoch": 0.316, "grad_norm": 0.20697714388370514, "learning_rate": 6.72228328109775e-05, "loss": 3.05413761138916, "step": 140800 }, { "epoch": 0.31606666666666666, "grad_norm": 0.21142169833183289, "learning_rate": 6.721248385158859e-05, "loss": 3.0739007949829102, "step": 140810 }, { "epoch": 0.3161333333333333, "grad_norm": 0.25109249353408813, "learning_rate": 6.720213405560796e-05, "loss": 3.2473304748535154, "step": 140820 }, { "epoch": 0.3162, "grad_norm": 0.20905302464962006, "learning_rate": 6.719178342353866e-05, "loss": 3.0756473541259766, "step": 140830 }, { "epoch": 0.31626666666666664, "grad_norm": 0.20422272384166718, "learning_rate": 6.718143195588377e-05, "loss": 3.0650157928466797, "step": 140840 }, { "epoch": 0.31633333333333336, "grad_norm": 0.19387975335121155, "learning_rate": 6.71710796531464e-05, "loss": 3.149542808532715, "step": 140850 }, { "epoch": 0.3164, "grad_norm": 0.21226131916046143, "learning_rate": 6.71607265158297e-05, "loss": 3.1234607696533203, "step": 140860 }, { "epoch": 0.3164666666666667, "grad_norm": 0.21255388855934143, "learning_rate": 6.715037254443689e-05, "loss": 3.1558420181274416, "step": 140870 }, { "epoch": 0.31653333333333333, "grad_norm": 0.22059133648872375, "learning_rate": 6.714001773947121e-05, "loss": 3.0698268890380858, "step": 140880 }, { "epoch": 0.3166, "grad_norm": 0.233964741230011, "learning_rate": 6.712966210143593e-05, "loss": 3.0924453735351562, "step": 140890 }, { "epoch": 0.31666666666666665, "grad_norm": 0.23055796325206757, "learning_rate": 6.711930563083438e-05, "loss": 3.1862245559692384, "step": 140900 }, { "epoch": 0.3167333333333333, "grad_norm": 0.2275809645652771, "learning_rate": 6.710894832816993e-05, "loss": 3.1190614700317383, "step": 140910 }, { "epoch": 0.3168, "grad_norm": 0.23531852662563324, "learning_rate": 6.709859019394599e-05, "loss": 3.071307373046875, "step": 140920 }, { "epoch": 0.3168666666666667, "grad_norm": 0.21176017820835114, "learning_rate": 6.7088231228666e-05, "loss": 3.051597785949707, "step": 140930 }, { "epoch": 0.31693333333333334, "grad_norm": 0.1990339457988739, "learning_rate": 6.707787143283341e-05, "loss": 3.015188217163086, "step": 140940 }, { "epoch": 0.317, "grad_norm": 0.21077241003513336, "learning_rate": 6.706751080695179e-05, "loss": 3.1236446380615233, "step": 140950 }, { "epoch": 0.31706666666666666, "grad_norm": 0.19058682024478912, "learning_rate": 6.70571493515247e-05, "loss": 3.0484867095947266, "step": 140960 }, { "epoch": 0.3171333333333333, "grad_norm": 0.22973166406154633, "learning_rate": 6.704678706705572e-05, "loss": 3.077609825134277, "step": 140970 }, { "epoch": 0.3172, "grad_norm": 0.19723263382911682, "learning_rate": 6.703642395404852e-05, "loss": 3.053659439086914, "step": 140980 }, { "epoch": 0.31726666666666664, "grad_norm": 0.19673094153404236, "learning_rate": 6.702606001300678e-05, "loss": 3.0768049240112303, "step": 140990 }, { "epoch": 0.31733333333333336, "grad_norm": 0.20383068919181824, "learning_rate": 6.70156952444342e-05, "loss": 3.056406021118164, "step": 141000 }, { "epoch": 0.3174, "grad_norm": 0.2045264095067978, "learning_rate": 6.70053296488346e-05, "loss": 3.075966644287109, "step": 141010 }, { "epoch": 0.3174666666666667, "grad_norm": 0.2485523372888565, "learning_rate": 6.699496322671176e-05, "loss": 3.0906652450561523, "step": 141020 }, { "epoch": 0.31753333333333333, "grad_norm": 0.24038656055927277, "learning_rate": 6.698459597856952e-05, "loss": 3.039254570007324, "step": 141030 }, { "epoch": 0.3176, "grad_norm": 0.19763639569282532, "learning_rate": 6.697422790491177e-05, "loss": 3.089058685302734, "step": 141040 }, { "epoch": 0.31766666666666665, "grad_norm": 0.20064018666744232, "learning_rate": 6.696385900624244e-05, "loss": 3.045425796508789, "step": 141050 }, { "epoch": 0.3177333333333333, "grad_norm": 0.20466159284114838, "learning_rate": 6.69534892830655e-05, "loss": 3.0456306457519533, "step": 141060 }, { "epoch": 0.3178, "grad_norm": 0.19693423807621002, "learning_rate": 6.694311873588495e-05, "loss": 3.0190217971801756, "step": 141070 }, { "epoch": 0.3178666666666667, "grad_norm": 0.2798132300376892, "learning_rate": 6.693274736520485e-05, "loss": 3.0563884735107423, "step": 141080 }, { "epoch": 0.31793333333333335, "grad_norm": 0.2218409925699234, "learning_rate": 6.692237517152928e-05, "loss": 3.0668804168701174, "step": 141090 }, { "epoch": 0.318, "grad_norm": 0.21749259531497955, "learning_rate": 6.691200215536237e-05, "loss": 3.0528961181640626, "step": 141100 }, { "epoch": 0.31806666666666666, "grad_norm": 0.3296882212162018, "learning_rate": 6.69016283172083e-05, "loss": 2.720260238647461, "step": 141110 }, { "epoch": 0.3181333333333333, "grad_norm": 0.2000923752784729, "learning_rate": 6.689125365757125e-05, "loss": 3.0277191162109376, "step": 141120 }, { "epoch": 0.3182, "grad_norm": 0.1944703608751297, "learning_rate": 6.688087817695548e-05, "loss": 3.034079170227051, "step": 141130 }, { "epoch": 0.31826666666666664, "grad_norm": 0.19559912383556366, "learning_rate": 6.68705018758653e-05, "loss": 3.082249641418457, "step": 141140 }, { "epoch": 0.31833333333333336, "grad_norm": 0.19209378957748413, "learning_rate": 6.6860124754805e-05, "loss": 3.0871555328369142, "step": 141150 }, { "epoch": 0.3184, "grad_norm": 0.20067058503627777, "learning_rate": 6.684974681427897e-05, "loss": 3.0932680130004884, "step": 141160 }, { "epoch": 0.3184666666666667, "grad_norm": 0.32111525535583496, "learning_rate": 6.68393680547916e-05, "loss": 3.0623912811279297, "step": 141170 }, { "epoch": 0.31853333333333333, "grad_norm": 0.193163201212883, "learning_rate": 6.682898847684738e-05, "loss": 3.073040008544922, "step": 141180 }, { "epoch": 0.3186, "grad_norm": 0.19617559015750885, "learning_rate": 6.681860808095074e-05, "loss": 3.063777732849121, "step": 141190 }, { "epoch": 0.31866666666666665, "grad_norm": 0.2042887657880783, "learning_rate": 6.680822686760623e-05, "loss": 3.1113853454589844, "step": 141200 }, { "epoch": 0.3187333333333333, "grad_norm": 0.215925931930542, "learning_rate": 6.679784483731843e-05, "loss": 3.057962417602539, "step": 141210 }, { "epoch": 0.3188, "grad_norm": 0.23575522005558014, "learning_rate": 6.678746199059194e-05, "loss": 3.059622955322266, "step": 141220 }, { "epoch": 0.3188666666666667, "grad_norm": 0.19467128813266754, "learning_rate": 6.677707832793138e-05, "loss": 3.07348690032959, "step": 141230 }, { "epoch": 0.31893333333333335, "grad_norm": 0.20028367638587952, "learning_rate": 6.676669384984148e-05, "loss": 3.044710159301758, "step": 141240 }, { "epoch": 0.319, "grad_norm": 0.2386050969362259, "learning_rate": 6.675630855682692e-05, "loss": 3.074580764770508, "step": 141250 }, { "epoch": 0.31906666666666667, "grad_norm": 0.19161567091941833, "learning_rate": 6.674592244939248e-05, "loss": 3.043287467956543, "step": 141260 }, { "epoch": 0.3191333333333333, "grad_norm": 0.20833732187747955, "learning_rate": 6.673553552804299e-05, "loss": 3.1538211822509767, "step": 141270 }, { "epoch": 0.3192, "grad_norm": 0.20054127275943756, "learning_rate": 6.672514779328326e-05, "loss": 3.0550052642822267, "step": 141280 }, { "epoch": 0.31926666666666664, "grad_norm": 0.19310972094535828, "learning_rate": 6.671475924561818e-05, "loss": 3.125807762145996, "step": 141290 }, { "epoch": 0.31933333333333336, "grad_norm": 0.21451027691364288, "learning_rate": 6.670436988555269e-05, "loss": 3.174092674255371, "step": 141300 }, { "epoch": 0.3194, "grad_norm": 0.20129257440567017, "learning_rate": 6.669397971359173e-05, "loss": 3.0658411026000976, "step": 141310 }, { "epoch": 0.3194666666666667, "grad_norm": 0.2202337086200714, "learning_rate": 6.668358873024031e-05, "loss": 3.0798343658447265, "step": 141320 }, { "epoch": 0.31953333333333334, "grad_norm": 0.20883667469024658, "learning_rate": 6.667319693600348e-05, "loss": 3.0887788772583007, "step": 141330 }, { "epoch": 0.3196, "grad_norm": 0.19863103330135345, "learning_rate": 6.666280433138632e-05, "loss": 3.107638931274414, "step": 141340 }, { "epoch": 0.31966666666666665, "grad_norm": 0.20564590394496918, "learning_rate": 6.665241091689393e-05, "loss": 3.110915946960449, "step": 141350 }, { "epoch": 0.3197333333333333, "grad_norm": 0.28493398427963257, "learning_rate": 6.664201669303148e-05, "loss": 3.076621437072754, "step": 141360 }, { "epoch": 0.3198, "grad_norm": 0.20019112527370453, "learning_rate": 6.663162166030417e-05, "loss": 3.0320068359375, "step": 141370 }, { "epoch": 0.3198666666666667, "grad_norm": 0.2968514859676361, "learning_rate": 6.662122581921726e-05, "loss": 3.0391807556152344, "step": 141380 }, { "epoch": 0.31993333333333335, "grad_norm": 0.2020113170146942, "learning_rate": 6.661082917027598e-05, "loss": 3.0557727813720703, "step": 141390 }, { "epoch": 0.32, "grad_norm": 0.20554804801940918, "learning_rate": 6.660043171398569e-05, "loss": 3.1499906539916993, "step": 141400 }, { "epoch": 0.32006666666666667, "grad_norm": 0.20624572038650513, "learning_rate": 6.65900334508517e-05, "loss": 3.0447601318359374, "step": 141410 }, { "epoch": 0.3201333333333333, "grad_norm": 0.23198524117469788, "learning_rate": 6.657963438137947e-05, "loss": 3.0876394271850587, "step": 141420 }, { "epoch": 0.3202, "grad_norm": 0.22250328958034515, "learning_rate": 6.656923450607437e-05, "loss": 3.1058328628540037, "step": 141430 }, { "epoch": 0.32026666666666664, "grad_norm": 0.28902673721313477, "learning_rate": 6.655883382544192e-05, "loss": 3.1057348251342773, "step": 141440 }, { "epoch": 0.32033333333333336, "grad_norm": 0.19543632864952087, "learning_rate": 6.65484323399876e-05, "loss": 3.0570980072021485, "step": 141450 }, { "epoch": 0.3204, "grad_norm": 0.20620104670524597, "learning_rate": 6.653803005021697e-05, "loss": 3.0905006408691404, "step": 141460 }, { "epoch": 0.3204666666666667, "grad_norm": 0.19606634974479675, "learning_rate": 6.652762695663562e-05, "loss": 3.109770393371582, "step": 141470 }, { "epoch": 0.32053333333333334, "grad_norm": 0.21684013307094574, "learning_rate": 6.651722305974919e-05, "loss": 3.1166109085083007, "step": 141480 }, { "epoch": 0.3206, "grad_norm": 0.6075031161308289, "learning_rate": 6.650681836006334e-05, "loss": 3.1310260772705076, "step": 141490 }, { "epoch": 0.32066666666666666, "grad_norm": 0.20025984942913055, "learning_rate": 6.649641285808379e-05, "loss": 3.170131874084473, "step": 141500 }, { "epoch": 0.3207333333333333, "grad_norm": 3.950049638748169, "learning_rate": 6.648600655431627e-05, "loss": 4.056352233886718, "step": 141510 }, { "epoch": 0.3208, "grad_norm": 0.21925601363182068, "learning_rate": 6.647559944926658e-05, "loss": 3.4692630767822266, "step": 141520 }, { "epoch": 0.3208666666666667, "grad_norm": 0.23255404829978943, "learning_rate": 6.646519154344053e-05, "loss": 3.0988742828369142, "step": 141530 }, { "epoch": 0.32093333333333335, "grad_norm": 0.23969772458076477, "learning_rate": 6.645478283734399e-05, "loss": 3.189076805114746, "step": 141540 }, { "epoch": 0.321, "grad_norm": 0.2640736401081085, "learning_rate": 6.644437333148285e-05, "loss": 3.024021530151367, "step": 141550 }, { "epoch": 0.32106666666666667, "grad_norm": 0.2128422111272812, "learning_rate": 6.643396302636309e-05, "loss": 3.1781217575073244, "step": 141560 }, { "epoch": 0.3211333333333333, "grad_norm": 0.22104446589946747, "learning_rate": 6.642355192249065e-05, "loss": 3.070182991027832, "step": 141570 }, { "epoch": 0.3212, "grad_norm": 0.23469041287899017, "learning_rate": 6.641314002037156e-05, "loss": 3.096900749206543, "step": 141580 }, { "epoch": 0.32126666666666664, "grad_norm": 0.20325268805027008, "learning_rate": 6.640272732051189e-05, "loss": 3.0774309158325197, "step": 141590 }, { "epoch": 0.32133333333333336, "grad_norm": 0.2165001779794693, "learning_rate": 6.639231382341772e-05, "loss": 3.0847755432128907, "step": 141600 }, { "epoch": 0.3214, "grad_norm": 0.20487043261528015, "learning_rate": 6.638189952959519e-05, "loss": 3.0757162094116213, "step": 141610 }, { "epoch": 0.3214666666666667, "grad_norm": 0.20170100033283234, "learning_rate": 6.637148443955048e-05, "loss": 3.061941719055176, "step": 141620 }, { "epoch": 0.32153333333333334, "grad_norm": 0.2554703652858734, "learning_rate": 6.636106855378981e-05, "loss": 3.11771240234375, "step": 141630 }, { "epoch": 0.3216, "grad_norm": 0.2261139154434204, "learning_rate": 6.635065187281941e-05, "loss": 3.0975584030151366, "step": 141640 }, { "epoch": 0.32166666666666666, "grad_norm": 0.25172117352485657, "learning_rate": 6.634023439714558e-05, "loss": 3.1170631408691407, "step": 141650 }, { "epoch": 0.3217333333333333, "grad_norm": 0.20548219978809357, "learning_rate": 6.632981612727466e-05, "loss": 3.0820491790771483, "step": 141660 }, { "epoch": 0.3218, "grad_norm": 0.201654314994812, "learning_rate": 6.6319397063713e-05, "loss": 3.0390867233276366, "step": 141670 }, { "epoch": 0.3218666666666667, "grad_norm": 0.22895249724388123, "learning_rate": 6.630897720696703e-05, "loss": 2.9968284606933593, "step": 141680 }, { "epoch": 0.32193333333333335, "grad_norm": 0.19959981739521027, "learning_rate": 6.629855655754316e-05, "loss": 3.0777435302734375, "step": 141690 }, { "epoch": 0.322, "grad_norm": 0.7148136496543884, "learning_rate": 6.628813511594789e-05, "loss": 3.384320831298828, "step": 141700 }, { "epoch": 0.32206666666666667, "grad_norm": 0.20335261523723602, "learning_rate": 6.627771288268774e-05, "loss": 3.1072179794311525, "step": 141710 }, { "epoch": 0.3221333333333333, "grad_norm": 0.24886304140090942, "learning_rate": 6.626728985826927e-05, "loss": 3.0577167510986327, "step": 141720 }, { "epoch": 0.3222, "grad_norm": 0.20839519798755646, "learning_rate": 6.625686604319909e-05, "loss": 3.046116065979004, "step": 141730 }, { "epoch": 0.32226666666666665, "grad_norm": 0.2211132049560547, "learning_rate": 6.624644143798383e-05, "loss": 3.107321357727051, "step": 141740 }, { "epoch": 0.32233333333333336, "grad_norm": 0.197379469871521, "learning_rate": 6.623601604313016e-05, "loss": 3.077088165283203, "step": 141750 }, { "epoch": 0.3224, "grad_norm": 0.21090474724769592, "learning_rate": 6.622558985914477e-05, "loss": 3.1290159225463867, "step": 141760 }, { "epoch": 0.3224666666666667, "grad_norm": 0.19444601237773895, "learning_rate": 6.621516288653446e-05, "loss": 3.0520298004150392, "step": 141770 }, { "epoch": 0.32253333333333334, "grad_norm": 0.197790265083313, "learning_rate": 6.6204735125806e-05, "loss": 3.084744453430176, "step": 141780 }, { "epoch": 0.3226, "grad_norm": 0.20964941382408142, "learning_rate": 6.619430657746621e-05, "loss": 3.0466312408447265, "step": 141790 }, { "epoch": 0.32266666666666666, "grad_norm": 0.19316516816616058, "learning_rate": 6.618387724202196e-05, "loss": 2.9872695922851564, "step": 141800 }, { "epoch": 0.3227333333333333, "grad_norm": 0.20276102423667908, "learning_rate": 6.617344711998016e-05, "loss": 3.0540420532226564, "step": 141810 }, { "epoch": 0.3228, "grad_norm": 0.3270992338657379, "learning_rate": 6.616301621184775e-05, "loss": 3.146649932861328, "step": 141820 }, { "epoch": 0.3228666666666667, "grad_norm": 0.20351862907409668, "learning_rate": 6.615258451813173e-05, "loss": 3.1254629135131835, "step": 141830 }, { "epoch": 0.32293333333333335, "grad_norm": 0.24153663218021393, "learning_rate": 6.614215203933909e-05, "loss": 3.061900329589844, "step": 141840 }, { "epoch": 0.323, "grad_norm": 0.2060500979423523, "learning_rate": 6.61317187759769e-05, "loss": 3.0779935836791994, "step": 141850 }, { "epoch": 0.32306666666666667, "grad_norm": 0.21465618908405304, "learning_rate": 6.612128472855224e-05, "loss": 3.055446815490723, "step": 141860 }, { "epoch": 0.32313333333333333, "grad_norm": 0.1996673196554184, "learning_rate": 6.611084989757228e-05, "loss": 3.076155090332031, "step": 141870 }, { "epoch": 0.3232, "grad_norm": 0.20946669578552246, "learning_rate": 6.610041428354416e-05, "loss": 3.03954963684082, "step": 141880 }, { "epoch": 0.32326666666666665, "grad_norm": 0.202721506357193, "learning_rate": 6.608997788697511e-05, "loss": 3.0880901336669924, "step": 141890 }, { "epoch": 0.3233333333333333, "grad_norm": 0.24608214199543, "learning_rate": 6.607954070837238e-05, "loss": 3.069511604309082, "step": 141900 }, { "epoch": 0.3234, "grad_norm": 0.21804389357566833, "learning_rate": 6.606910274824324e-05, "loss": 3.0535795211791994, "step": 141910 }, { "epoch": 0.3234666666666667, "grad_norm": 0.22120966017246246, "learning_rate": 6.605866400709503e-05, "loss": 3.0902395248413086, "step": 141920 }, { "epoch": 0.32353333333333334, "grad_norm": 0.20027396082878113, "learning_rate": 6.604822448543508e-05, "loss": 3.0814905166625977, "step": 141930 }, { "epoch": 0.3236, "grad_norm": 0.21746215224266052, "learning_rate": 6.603778418377083e-05, "loss": 3.0529674530029296, "step": 141940 }, { "epoch": 0.32366666666666666, "grad_norm": 0.19826188683509827, "learning_rate": 6.60273431026097e-05, "loss": 3.044199562072754, "step": 141950 }, { "epoch": 0.3237333333333333, "grad_norm": 0.20546935498714447, "learning_rate": 6.601690124245919e-05, "loss": 3.0539134979248046, "step": 141960 }, { "epoch": 0.3238, "grad_norm": 0.20666714012622833, "learning_rate": 6.600645860382675e-05, "loss": 3.0523916244506837, "step": 141970 }, { "epoch": 0.3238666666666667, "grad_norm": 0.19691677391529083, "learning_rate": 6.599601518722001e-05, "loss": 3.0429691314697265, "step": 141980 }, { "epoch": 0.32393333333333335, "grad_norm": 0.20288024842739105, "learning_rate": 6.598557099314651e-05, "loss": 3.1651548385620116, "step": 141990 }, { "epoch": 0.324, "grad_norm": 0.26636266708374023, "learning_rate": 6.597512602211389e-05, "loss": 3.058724021911621, "step": 142000 }, { "epoch": 0.32406666666666667, "grad_norm": 0.8114754557609558, "learning_rate": 6.596468027462981e-05, "loss": 3.0780920028686523, "step": 142010 }, { "epoch": 0.32413333333333333, "grad_norm": 0.19648000597953796, "learning_rate": 6.595423375120197e-05, "loss": 3.095498466491699, "step": 142020 }, { "epoch": 0.3242, "grad_norm": 0.2001439481973648, "learning_rate": 6.594378645233814e-05, "loss": 3.1574512481689454, "step": 142030 }, { "epoch": 0.32426666666666665, "grad_norm": 0.23710420727729797, "learning_rate": 6.593333837854607e-05, "loss": 3.0156978607177733, "step": 142040 }, { "epoch": 0.3243333333333333, "grad_norm": 0.19072726368904114, "learning_rate": 6.592288953033358e-05, "loss": 3.007805824279785, "step": 142050 }, { "epoch": 0.3244, "grad_norm": 0.20497344434261322, "learning_rate": 6.591243990820852e-05, "loss": 3.07656307220459, "step": 142060 }, { "epoch": 0.3244666666666667, "grad_norm": 0.2153298407793045, "learning_rate": 6.590198951267882e-05, "loss": 3.133647155761719, "step": 142070 }, { "epoch": 0.32453333333333334, "grad_norm": 0.22376836836338043, "learning_rate": 6.589153834425235e-05, "loss": 3.0710512161254884, "step": 142080 }, { "epoch": 0.3246, "grad_norm": 0.20900607109069824, "learning_rate": 6.58810864034371e-05, "loss": 3.0820686340332033, "step": 142090 }, { "epoch": 0.32466666666666666, "grad_norm": 0.24319341778755188, "learning_rate": 6.587063369074108e-05, "loss": 3.0780080795288085, "step": 142100 }, { "epoch": 0.3247333333333333, "grad_norm": 0.20712266862392426, "learning_rate": 6.586018020667233e-05, "loss": 3.097504806518555, "step": 142110 }, { "epoch": 0.3248, "grad_norm": 0.2099536657333374, "learning_rate": 6.584972595173893e-05, "loss": 3.1266056060791017, "step": 142120 }, { "epoch": 0.3248666666666667, "grad_norm": 0.2108115255832672, "learning_rate": 6.583927092644898e-05, "loss": 3.019967460632324, "step": 142130 }, { "epoch": 0.32493333333333335, "grad_norm": 0.21178174018859863, "learning_rate": 6.582881513131066e-05, "loss": 3.0213035583496093, "step": 142140 }, { "epoch": 0.325, "grad_norm": 0.21247880160808563, "learning_rate": 6.581835856683214e-05, "loss": 3.06943359375, "step": 142150 }, { "epoch": 0.32506666666666667, "grad_norm": 0.20007942616939545, "learning_rate": 6.580790123352166e-05, "loss": 3.074766731262207, "step": 142160 }, { "epoch": 0.32513333333333333, "grad_norm": 0.41712966561317444, "learning_rate": 6.579744313188749e-05, "loss": 3.5889877319335937, "step": 142170 }, { "epoch": 0.3252, "grad_norm": 0.2419971227645874, "learning_rate": 6.578698426243791e-05, "loss": 3.5337448120117188, "step": 142180 }, { "epoch": 0.32526666666666665, "grad_norm": 0.1935780942440033, "learning_rate": 6.577652462568126e-05, "loss": 3.00515193939209, "step": 142190 }, { "epoch": 0.3253333333333333, "grad_norm": 0.19898483157157898, "learning_rate": 6.576606422212597e-05, "loss": 3.0632583618164064, "step": 142200 }, { "epoch": 0.3254, "grad_norm": 0.1908819079399109, "learning_rate": 6.575560305228039e-05, "loss": 3.065135192871094, "step": 142210 }, { "epoch": 0.3254666666666667, "grad_norm": 0.264557421207428, "learning_rate": 6.574514111665302e-05, "loss": 3.1376615524291993, "step": 142220 }, { "epoch": 0.32553333333333334, "grad_norm": 0.23119772970676422, "learning_rate": 6.573467841575233e-05, "loss": 3.0508514404296876, "step": 142230 }, { "epoch": 0.3256, "grad_norm": 0.2030603140592575, "learning_rate": 6.572421495008683e-05, "loss": 3.0896299362182615, "step": 142240 }, { "epoch": 0.32566666666666666, "grad_norm": 0.22288836538791656, "learning_rate": 6.571375072016512e-05, "loss": 3.0750049591064452, "step": 142250 }, { "epoch": 0.3257333333333333, "grad_norm": 0.1981414556503296, "learning_rate": 6.570328572649577e-05, "loss": 3.050869369506836, "step": 142260 }, { "epoch": 0.3258, "grad_norm": 0.24864934384822845, "learning_rate": 6.569281996958743e-05, "loss": 3.1067073822021483, "step": 142270 }, { "epoch": 0.3258666666666667, "grad_norm": 0.21134118735790253, "learning_rate": 6.568235344994879e-05, "loss": 3.0691205978393556, "step": 142280 }, { "epoch": 0.32593333333333335, "grad_norm": 0.2155112773180008, "learning_rate": 6.567188616808854e-05, "loss": 3.247811508178711, "step": 142290 }, { "epoch": 0.326, "grad_norm": 0.3526473641395569, "learning_rate": 6.566141812451543e-05, "loss": 3.139781379699707, "step": 142300 }, { "epoch": 0.32606666666666667, "grad_norm": 0.21003997325897217, "learning_rate": 6.565094931973827e-05, "loss": 3.154277229309082, "step": 142310 }, { "epoch": 0.32613333333333333, "grad_norm": 0.22467350959777832, "learning_rate": 6.564047975426586e-05, "loss": 3.1287168502807616, "step": 142320 }, { "epoch": 0.3262, "grad_norm": 0.2341964691877365, "learning_rate": 6.563000942860706e-05, "loss": 3.0468576431274412, "step": 142330 }, { "epoch": 0.32626666666666665, "grad_norm": 0.2101985514163971, "learning_rate": 6.561953834327078e-05, "loss": 3.042616271972656, "step": 142340 }, { "epoch": 0.3263333333333333, "grad_norm": 0.19335944950580597, "learning_rate": 6.560906649876594e-05, "loss": 3.02301025390625, "step": 142350 }, { "epoch": 0.3264, "grad_norm": 0.2252260148525238, "learning_rate": 6.559859389560155e-05, "loss": 3.1544269561767577, "step": 142360 }, { "epoch": 0.3264666666666667, "grad_norm": 0.19061268866062164, "learning_rate": 6.558812053428657e-05, "loss": 3.095094108581543, "step": 142370 }, { "epoch": 0.32653333333333334, "grad_norm": 0.19587811827659607, "learning_rate": 6.557764641533006e-05, "loss": 3.1044601440429687, "step": 142380 }, { "epoch": 0.3266, "grad_norm": 0.2106415331363678, "learning_rate": 6.556717153924112e-05, "loss": 3.106619453430176, "step": 142390 }, { "epoch": 0.32666666666666666, "grad_norm": 0.2395533174276352, "learning_rate": 6.555669590652885e-05, "loss": 3.0192827224731444, "step": 142400 }, { "epoch": 0.3267333333333333, "grad_norm": 0.2234927862882614, "learning_rate": 6.55462195177024e-05, "loss": 3.127855110168457, "step": 142410 }, { "epoch": 0.3268, "grad_norm": 0.19717641174793243, "learning_rate": 6.553574237327097e-05, "loss": 3.124235725402832, "step": 142420 }, { "epoch": 0.3268666666666667, "grad_norm": 0.2155969887971878, "learning_rate": 6.55252644737438e-05, "loss": 3.06220703125, "step": 142430 }, { "epoch": 0.32693333333333335, "grad_norm": 0.22725126147270203, "learning_rate": 6.551478581963015e-05, "loss": 3.065756988525391, "step": 142440 }, { "epoch": 0.327, "grad_norm": 0.2080198973417282, "learning_rate": 6.55043064114393e-05, "loss": 3.030874252319336, "step": 142450 }, { "epoch": 0.32706666666666667, "grad_norm": 0.20068293809890747, "learning_rate": 6.549382624968062e-05, "loss": 3.068501663208008, "step": 142460 }, { "epoch": 0.32713333333333333, "grad_norm": 0.21008791029453278, "learning_rate": 6.548334533486347e-05, "loss": 3.0595537185668946, "step": 142470 }, { "epoch": 0.3272, "grad_norm": 0.21017126739025116, "learning_rate": 6.547286366749726e-05, "loss": 3.04055118560791, "step": 142480 }, { "epoch": 0.32726666666666665, "grad_norm": 0.23922812938690186, "learning_rate": 6.546238124809145e-05, "loss": 3.0351583480834963, "step": 142490 }, { "epoch": 0.3273333333333333, "grad_norm": 0.2281261384487152, "learning_rate": 6.545189807715551e-05, "loss": 3.1263656616210938, "step": 142500 }, { "epoch": 0.3274, "grad_norm": 0.29017317295074463, "learning_rate": 6.544141415519897e-05, "loss": 3.1446697235107424, "step": 142510 }, { "epoch": 0.3274666666666667, "grad_norm": 0.22727684676647186, "learning_rate": 6.54309294827314e-05, "loss": 2.932598114013672, "step": 142520 }, { "epoch": 0.32753333333333334, "grad_norm": 0.23506124317646027, "learning_rate": 6.542044406026236e-05, "loss": 3.101615333557129, "step": 142530 }, { "epoch": 0.3276, "grad_norm": 0.3234335780143738, "learning_rate": 6.540995788830153e-05, "loss": 3.0567018508911135, "step": 142540 }, { "epoch": 0.32766666666666666, "grad_norm": 0.2052137851715088, "learning_rate": 6.539947096735854e-05, "loss": 3.065748405456543, "step": 142550 }, { "epoch": 0.3277333333333333, "grad_norm": 0.2023831158876419, "learning_rate": 6.53889832979431e-05, "loss": 3.040292167663574, "step": 142560 }, { "epoch": 0.3278, "grad_norm": 0.20058061182498932, "learning_rate": 6.537849488056496e-05, "loss": 3.0663625717163088, "step": 142570 }, { "epoch": 0.32786666666666664, "grad_norm": 0.1999233514070511, "learning_rate": 6.536800571573389e-05, "loss": 3.017694664001465, "step": 142580 }, { "epoch": 0.32793333333333335, "grad_norm": 0.19381386041641235, "learning_rate": 6.535751580395972e-05, "loss": 3.0701831817626952, "step": 142590 }, { "epoch": 0.328, "grad_norm": 0.2285601943731308, "learning_rate": 6.534702514575226e-05, "loss": 3.0164194107055664, "step": 142600 }, { "epoch": 0.3280666666666667, "grad_norm": 0.20723797380924225, "learning_rate": 6.533653374162143e-05, "loss": 3.0267881393432616, "step": 142610 }, { "epoch": 0.32813333333333333, "grad_norm": 0.21616613864898682, "learning_rate": 6.532604159207714e-05, "loss": 3.051464080810547, "step": 142620 }, { "epoch": 0.3282, "grad_norm": 0.20502933859825134, "learning_rate": 6.531554869762936e-05, "loss": 3.018601417541504, "step": 142630 }, { "epoch": 0.32826666666666665, "grad_norm": 0.21382132172584534, "learning_rate": 6.530505505878808e-05, "loss": 3.144314765930176, "step": 142640 }, { "epoch": 0.3283333333333333, "grad_norm": 0.22845439612865448, "learning_rate": 6.52945606760633e-05, "loss": 3.0532066345214846, "step": 142650 }, { "epoch": 0.3284, "grad_norm": 0.2328505665063858, "learning_rate": 6.528406554996513e-05, "loss": 3.0626548767089843, "step": 142660 }, { "epoch": 0.3284666666666667, "grad_norm": 0.1936771273612976, "learning_rate": 6.527356968100366e-05, "loss": 3.0758710861206056, "step": 142670 }, { "epoch": 0.32853333333333334, "grad_norm": 0.20706139504909515, "learning_rate": 6.5263073069689e-05, "loss": 3.027085876464844, "step": 142680 }, { "epoch": 0.3286, "grad_norm": 0.22533617913722992, "learning_rate": 6.525257571653138e-05, "loss": 3.0359724044799803, "step": 142690 }, { "epoch": 0.32866666666666666, "grad_norm": 0.21748220920562744, "learning_rate": 6.524207762204095e-05, "loss": 3.051622772216797, "step": 142700 }, { "epoch": 0.3287333333333333, "grad_norm": 0.2645784914493561, "learning_rate": 6.523157878672799e-05, "loss": 3.035235786437988, "step": 142710 }, { "epoch": 0.3288, "grad_norm": 0.19765403866767883, "learning_rate": 6.522107921110278e-05, "loss": 3.1728858947753906, "step": 142720 }, { "epoch": 0.32886666666666664, "grad_norm": 0.31293928623199463, "learning_rate": 6.521057889567565e-05, "loss": 3.102817344665527, "step": 142730 }, { "epoch": 0.32893333333333336, "grad_norm": 0.20152895152568817, "learning_rate": 6.520007784095695e-05, "loss": 3.0634870529174805, "step": 142740 }, { "epoch": 0.329, "grad_norm": 0.2044340968132019, "learning_rate": 6.518957604745704e-05, "loss": 3.075705146789551, "step": 142750 }, { "epoch": 0.3290666666666667, "grad_norm": 0.23252452909946442, "learning_rate": 6.517907351568637e-05, "loss": 3.1478126525878904, "step": 142760 }, { "epoch": 0.32913333333333333, "grad_norm": 0.21991021931171417, "learning_rate": 6.516857024615543e-05, "loss": 3.061351203918457, "step": 142770 }, { "epoch": 0.3292, "grad_norm": 0.19404709339141846, "learning_rate": 6.515806623937466e-05, "loss": 3.0874284744262694, "step": 142780 }, { "epoch": 0.32926666666666665, "grad_norm": 0.19655288755893707, "learning_rate": 6.514756149585466e-05, "loss": 3.088675308227539, "step": 142790 }, { "epoch": 0.3293333333333333, "grad_norm": 0.21087664365768433, "learning_rate": 6.513705601610596e-05, "loss": 3.069390869140625, "step": 142800 }, { "epoch": 0.3294, "grad_norm": 0.20438751578330994, "learning_rate": 6.512654980063915e-05, "loss": 3.08284912109375, "step": 142810 }, { "epoch": 0.3294666666666667, "grad_norm": 0.19620326161384583, "learning_rate": 6.511604284996491e-05, "loss": 3.0911272048950194, "step": 142820 }, { "epoch": 0.32953333333333334, "grad_norm": 0.2204519510269165, "learning_rate": 6.510553516459391e-05, "loss": 3.01390380859375, "step": 142830 }, { "epoch": 0.3296, "grad_norm": 0.2256813496351242, "learning_rate": 6.509502674503685e-05, "loss": 2.9218990325927736, "step": 142840 }, { "epoch": 0.32966666666666666, "grad_norm": 0.25128546357154846, "learning_rate": 6.508451759180447e-05, "loss": 3.117828369140625, "step": 142850 }, { "epoch": 0.3297333333333333, "grad_norm": 0.21911221742630005, "learning_rate": 6.507400770540758e-05, "loss": 3.165704345703125, "step": 142860 }, { "epoch": 0.3298, "grad_norm": 0.19048592448234558, "learning_rate": 6.506349708635699e-05, "loss": 3.0502294540405273, "step": 142870 }, { "epoch": 0.32986666666666664, "grad_norm": 0.2078658789396286, "learning_rate": 6.505298573516357e-05, "loss": 3.061653709411621, "step": 142880 }, { "epoch": 0.32993333333333336, "grad_norm": 0.1990717053413391, "learning_rate": 6.504247365233818e-05, "loss": 3.123483085632324, "step": 142890 }, { "epoch": 0.33, "grad_norm": 0.2069348841905594, "learning_rate": 6.503196083839174e-05, "loss": 3.079271697998047, "step": 142900 }, { "epoch": 0.3300666666666667, "grad_norm": 0.22102993726730347, "learning_rate": 6.502144729383527e-05, "loss": 3.039951515197754, "step": 142910 }, { "epoch": 0.33013333333333333, "grad_norm": 0.20475584268569946, "learning_rate": 6.501093301917971e-05, "loss": 3.021999168395996, "step": 142920 }, { "epoch": 0.3302, "grad_norm": 0.2299342304468155, "learning_rate": 6.500041801493614e-05, "loss": 3.0812110900878906, "step": 142930 }, { "epoch": 0.33026666666666665, "grad_norm": 0.1976166069507599, "learning_rate": 6.498990228161559e-05, "loss": 3.081429100036621, "step": 142940 }, { "epoch": 0.3303333333333333, "grad_norm": 0.1969723105430603, "learning_rate": 6.497938581972918e-05, "loss": 3.2893344879150392, "step": 142950 }, { "epoch": 0.3304, "grad_norm": 0.20128069818019867, "learning_rate": 6.496886862978806e-05, "loss": 3.0635719299316406, "step": 142960 }, { "epoch": 0.3304666666666667, "grad_norm": 0.2301284372806549, "learning_rate": 6.495835071230338e-05, "loss": 3.0478757858276366, "step": 142970 }, { "epoch": 0.33053333333333335, "grad_norm": 0.19415351748466492, "learning_rate": 6.494783206778639e-05, "loss": 3.0592626571655273, "step": 142980 }, { "epoch": 0.3306, "grad_norm": 0.20271123945713043, "learning_rate": 6.493731269674829e-05, "loss": 2.9932661056518555, "step": 142990 }, { "epoch": 0.33066666666666666, "grad_norm": 0.19259299337863922, "learning_rate": 6.492679259970039e-05, "loss": 3.076495552062988, "step": 143000 }, { "epoch": 0.3307333333333333, "grad_norm": 0.20085643231868744, "learning_rate": 6.491627177715398e-05, "loss": 3.0022525787353516, "step": 143010 }, { "epoch": 0.3308, "grad_norm": 0.2042025923728943, "learning_rate": 6.490575022962046e-05, "loss": 3.1495676040649414, "step": 143020 }, { "epoch": 0.33086666666666664, "grad_norm": 0.19770000874996185, "learning_rate": 6.489522795761116e-05, "loss": 3.0330860137939455, "step": 143030 }, { "epoch": 0.33093333333333336, "grad_norm": 0.26228031516075134, "learning_rate": 6.488470496163754e-05, "loss": 3.0227262496948244, "step": 143040 }, { "epoch": 0.331, "grad_norm": 0.20324614644050598, "learning_rate": 6.487418124221106e-05, "loss": 3.022336006164551, "step": 143050 }, { "epoch": 0.3310666666666667, "grad_norm": 0.20943526923656464, "learning_rate": 6.486365679984318e-05, "loss": 3.0228652954101562, "step": 143060 }, { "epoch": 0.33113333333333334, "grad_norm": 0.20707252621650696, "learning_rate": 6.485313163504547e-05, "loss": 3.085906219482422, "step": 143070 }, { "epoch": 0.3312, "grad_norm": 0.20516763627529144, "learning_rate": 6.484260574832945e-05, "loss": 3.0330318450927733, "step": 143080 }, { "epoch": 0.33126666666666665, "grad_norm": 0.2821410298347473, "learning_rate": 6.483207914020675e-05, "loss": 3.1178926467895507, "step": 143090 }, { "epoch": 0.3313333333333333, "grad_norm": 0.20684832334518433, "learning_rate": 6.482155181118898e-05, "loss": 3.0953012466430665, "step": 143100 }, { "epoch": 0.3314, "grad_norm": 0.18878930807113647, "learning_rate": 6.481102376178783e-05, "loss": 3.0179107666015623, "step": 143110 }, { "epoch": 0.3314666666666667, "grad_norm": 0.21173465251922607, "learning_rate": 6.4800494992515e-05, "loss": 3.071986770629883, "step": 143120 }, { "epoch": 0.33153333333333335, "grad_norm": 0.22755934298038483, "learning_rate": 6.47899655038822e-05, "loss": 3.0792844772338865, "step": 143130 }, { "epoch": 0.3316, "grad_norm": 0.2572055757045746, "learning_rate": 6.477943529640123e-05, "loss": 3.104561614990234, "step": 143140 }, { "epoch": 0.33166666666666667, "grad_norm": 0.20776621997356415, "learning_rate": 6.476890437058389e-05, "loss": 3.089533042907715, "step": 143150 }, { "epoch": 0.3317333333333333, "grad_norm": 0.4120592176914215, "learning_rate": 6.475837272694202e-05, "loss": 3.1076269149780273, "step": 143160 }, { "epoch": 0.3318, "grad_norm": 0.21580569446086884, "learning_rate": 6.47478403659875e-05, "loss": 3.102906608581543, "step": 143170 }, { "epoch": 0.33186666666666664, "grad_norm": 0.24597753584384918, "learning_rate": 6.473730728823225e-05, "loss": 3.033608818054199, "step": 143180 }, { "epoch": 0.33193333333333336, "grad_norm": 0.23150718212127686, "learning_rate": 6.47267734941882e-05, "loss": 3.0681787490844727, "step": 143190 }, { "epoch": 0.332, "grad_norm": 0.2376984804868698, "learning_rate": 6.471623898436737e-05, "loss": 3.0184988021850585, "step": 143200 }, { "epoch": 0.3320666666666667, "grad_norm": 0.22384308278560638, "learning_rate": 6.470570375928174e-05, "loss": 3.1253101348876955, "step": 143210 }, { "epoch": 0.33213333333333334, "grad_norm": 0.20905005931854248, "learning_rate": 6.469516781944335e-05, "loss": 3.0666505813598635, "step": 143220 }, { "epoch": 0.3322, "grad_norm": 0.19987310469150543, "learning_rate": 6.468463116536432e-05, "loss": 3.02681941986084, "step": 143230 }, { "epoch": 0.33226666666666665, "grad_norm": 0.2254449874162674, "learning_rate": 6.467409379755676e-05, "loss": 3.0960336685180665, "step": 143240 }, { "epoch": 0.3323333333333333, "grad_norm": 0.19855529069900513, "learning_rate": 6.466355571653282e-05, "loss": 3.0169822692871096, "step": 143250 }, { "epoch": 0.3324, "grad_norm": 0.20523753762245178, "learning_rate": 6.46530169228047e-05, "loss": 3.0237396240234373, "step": 143260 }, { "epoch": 0.3324666666666667, "grad_norm": 0.19186890125274658, "learning_rate": 6.464247741688461e-05, "loss": 3.0554941177368162, "step": 143270 }, { "epoch": 0.33253333333333335, "grad_norm": 0.21353252232074738, "learning_rate": 6.463193719928484e-05, "loss": 3.0209827423095703, "step": 143280 }, { "epoch": 0.3326, "grad_norm": 0.23972566425800323, "learning_rate": 6.462139627051766e-05, "loss": 2.933207130432129, "step": 143290 }, { "epoch": 0.33266666666666667, "grad_norm": 0.1953059434890747, "learning_rate": 6.461085463109539e-05, "loss": 3.079942512512207, "step": 143300 }, { "epoch": 0.3327333333333333, "grad_norm": 0.19979333877563477, "learning_rate": 6.460031228153043e-05, "loss": 3.0393447875976562, "step": 143310 }, { "epoch": 0.3328, "grad_norm": 0.20097561180591583, "learning_rate": 6.458976922233514e-05, "loss": 3.089723014831543, "step": 143320 }, { "epoch": 0.33286666666666664, "grad_norm": 0.20768263936042786, "learning_rate": 6.457922545402197e-05, "loss": 3.0342493057250977, "step": 143330 }, { "epoch": 0.33293333333333336, "grad_norm": 0.19759680330753326, "learning_rate": 6.456868097710338e-05, "loss": 3.1348236083984373, "step": 143340 }, { "epoch": 0.333, "grad_norm": 0.29210489988327026, "learning_rate": 6.455813579209187e-05, "loss": 3.0639114379882812, "step": 143350 }, { "epoch": 0.3330666666666667, "grad_norm": 0.20635297894477844, "learning_rate": 6.454758989950002e-05, "loss": 3.0139835357666014, "step": 143360 }, { "epoch": 0.33313333333333334, "grad_norm": 0.22692975401878357, "learning_rate": 6.453704329984033e-05, "loss": 2.99603385925293, "step": 143370 }, { "epoch": 0.3332, "grad_norm": 0.2114311009645462, "learning_rate": 6.452649599362544e-05, "loss": 3.0653354644775392, "step": 143380 }, { "epoch": 0.33326666666666666, "grad_norm": 0.19097666442394257, "learning_rate": 6.451594798136798e-05, "loss": 3.0433204650878904, "step": 143390 }, { "epoch": 0.3333333333333333, "grad_norm": 0.24960152804851532, "learning_rate": 6.450539926358065e-05, "loss": 3.1101861953735352, "step": 143400 }, { "epoch": 0.3334, "grad_norm": 0.3425430655479431, "learning_rate": 6.449484984077612e-05, "loss": 3.351131820678711, "step": 143410 }, { "epoch": 0.3334666666666667, "grad_norm": 0.2066217064857483, "learning_rate": 6.448429971346714e-05, "loss": 3.0724723815917967, "step": 143420 }, { "epoch": 0.33353333333333335, "grad_norm": 0.20615282654762268, "learning_rate": 6.44737488821665e-05, "loss": 3.0598392486572266, "step": 143430 }, { "epoch": 0.3336, "grad_norm": 0.22299374639987946, "learning_rate": 6.446319734738702e-05, "loss": 3.0162275314331053, "step": 143440 }, { "epoch": 0.33366666666666667, "grad_norm": 0.20442818105220795, "learning_rate": 6.445264510964151e-05, "loss": 3.1248619079589846, "step": 143450 }, { "epoch": 0.3337333333333333, "grad_norm": 0.21617019176483154, "learning_rate": 6.444209216944289e-05, "loss": 3.0903564453125, "step": 143460 }, { "epoch": 0.3338, "grad_norm": 0.22483006119728088, "learning_rate": 6.443153852730404e-05, "loss": 3.0690393447875977, "step": 143470 }, { "epoch": 0.33386666666666664, "grad_norm": 0.24315941333770752, "learning_rate": 6.442098418373791e-05, "loss": 3.049068832397461, "step": 143480 }, { "epoch": 0.33393333333333336, "grad_norm": 0.2010088562965393, "learning_rate": 6.441042913925748e-05, "loss": 3.0657787322998047, "step": 143490 }, { "epoch": 0.334, "grad_norm": 0.2138679474592209, "learning_rate": 6.439987339437579e-05, "loss": 3.0903385162353514, "step": 143500 }, { "epoch": 0.3340666666666667, "grad_norm": 0.21066369116306305, "learning_rate": 6.438931694960587e-05, "loss": 3.197052574157715, "step": 143510 }, { "epoch": 0.33413333333333334, "grad_norm": 0.19785690307617188, "learning_rate": 6.437875980546082e-05, "loss": 3.0533721923828123, "step": 143520 }, { "epoch": 0.3342, "grad_norm": 0.1974993795156479, "learning_rate": 6.436820196245372e-05, "loss": 3.0345975875854494, "step": 143530 }, { "epoch": 0.33426666666666666, "grad_norm": 0.18594308197498322, "learning_rate": 6.435764342109774e-05, "loss": 3.1382015228271483, "step": 143540 }, { "epoch": 0.3343333333333333, "grad_norm": 0.2221164107322693, "learning_rate": 6.434708418190609e-05, "loss": 3.0630489349365235, "step": 143550 }, { "epoch": 0.3344, "grad_norm": 0.26199397444725037, "learning_rate": 6.433652424539195e-05, "loss": 3.1040489196777346, "step": 143560 }, { "epoch": 0.3344666666666667, "grad_norm": 0.21439939737319946, "learning_rate": 6.43259636120686e-05, "loss": 3.075539398193359, "step": 143570 }, { "epoch": 0.33453333333333335, "grad_norm": 0.19322636723518372, "learning_rate": 6.43154022824493e-05, "loss": 3.0268217086791993, "step": 143580 }, { "epoch": 0.3346, "grad_norm": 0.2508547306060791, "learning_rate": 6.43048402570474e-05, "loss": 3.0744487762451174, "step": 143590 }, { "epoch": 0.33466666666666667, "grad_norm": 0.20599007606506348, "learning_rate": 6.429427753637625e-05, "loss": 2.992021369934082, "step": 143600 }, { "epoch": 0.3347333333333333, "grad_norm": 0.21092478930950165, "learning_rate": 6.428371412094922e-05, "loss": 3.0589262008666993, "step": 143610 }, { "epoch": 0.3348, "grad_norm": 0.21820197999477386, "learning_rate": 6.427315001127974e-05, "loss": 3.04748420715332, "step": 143620 }, { "epoch": 0.33486666666666665, "grad_norm": 0.19954784214496613, "learning_rate": 6.426258520788126e-05, "loss": 3.074074935913086, "step": 143630 }, { "epoch": 0.33493333333333336, "grad_norm": 0.2544414699077606, "learning_rate": 6.42520197112673e-05, "loss": 3.1102340698242186, "step": 143640 }, { "epoch": 0.335, "grad_norm": 0.20351733267307281, "learning_rate": 6.424145352195133e-05, "loss": 3.0798099517822264, "step": 143650 }, { "epoch": 0.3350666666666667, "grad_norm": 0.20988470315933228, "learning_rate": 6.423088664044696e-05, "loss": 3.040192222595215, "step": 143660 }, { "epoch": 0.33513333333333334, "grad_norm": 0.26113656163215637, "learning_rate": 6.422031906726774e-05, "loss": 3.052179145812988, "step": 143670 }, { "epoch": 0.3352, "grad_norm": 0.19892948865890503, "learning_rate": 6.420975080292734e-05, "loss": 3.0482101440429688, "step": 143680 }, { "epoch": 0.33526666666666666, "grad_norm": 0.19800062477588654, "learning_rate": 6.419918184793936e-05, "loss": 3.02667179107666, "step": 143690 }, { "epoch": 0.3353333333333333, "grad_norm": 0.19950070977210999, "learning_rate": 6.418861220281752e-05, "loss": 3.0624956130981444, "step": 143700 }, { "epoch": 0.3354, "grad_norm": 0.22113053500652313, "learning_rate": 6.417804186807556e-05, "loss": 3.0458820343017576, "step": 143710 }, { "epoch": 0.3354666666666667, "grad_norm": 0.20483750104904175, "learning_rate": 6.416747084422721e-05, "loss": 3.089750862121582, "step": 143720 }, { "epoch": 0.33553333333333335, "grad_norm": 0.2157076746225357, "learning_rate": 6.415689913178628e-05, "loss": 3.057352638244629, "step": 143730 }, { "epoch": 0.3356, "grad_norm": 0.2210099846124649, "learning_rate": 6.414632673126657e-05, "loss": 3.013254737854004, "step": 143740 }, { "epoch": 0.33566666666666667, "grad_norm": 0.22202527523040771, "learning_rate": 6.413575364318198e-05, "loss": 3.042171096801758, "step": 143750 }, { "epoch": 0.33573333333333333, "grad_norm": 0.22410793602466583, "learning_rate": 6.412517986804637e-05, "loss": 3.0488422393798826, "step": 143760 }, { "epoch": 0.3358, "grad_norm": 0.21804364025592804, "learning_rate": 6.411460540637368e-05, "loss": 2.97921142578125, "step": 143770 }, { "epoch": 0.33586666666666665, "grad_norm": 95.32162475585938, "learning_rate": 6.410403025867786e-05, "loss": 4.418306732177735, "step": 143780 }, { "epoch": 0.3359333333333333, "grad_norm": 0.22873073816299438, "learning_rate": 6.40934544254729e-05, "loss": 3.551525115966797, "step": 143790 }, { "epoch": 0.336, "grad_norm": 0.24834178388118744, "learning_rate": 6.408287790727284e-05, "loss": 3.048597526550293, "step": 143800 }, { "epoch": 0.3360666666666667, "grad_norm": 0.23675456643104553, "learning_rate": 6.407230070459173e-05, "loss": 3.0942333221435545, "step": 143810 }, { "epoch": 0.33613333333333334, "grad_norm": 0.21008117496967316, "learning_rate": 6.406172281794366e-05, "loss": 3.0825998306274416, "step": 143820 }, { "epoch": 0.3362, "grad_norm": 0.3462832272052765, "learning_rate": 6.405114424784276e-05, "loss": 3.740850830078125, "step": 143830 }, { "epoch": 0.33626666666666666, "grad_norm": 0.22462709248065948, "learning_rate": 6.404056499480317e-05, "loss": 3.088753890991211, "step": 143840 }, { "epoch": 0.3363333333333333, "grad_norm": 0.27706876397132874, "learning_rate": 6.402998505933913e-05, "loss": 3.1606489181518556, "step": 143850 }, { "epoch": 0.3364, "grad_norm": 0.21441234648227692, "learning_rate": 6.401940444196481e-05, "loss": 3.0553457260131838, "step": 143860 }, { "epoch": 0.3364666666666667, "grad_norm": 0.21536894142627716, "learning_rate": 6.400882314319449e-05, "loss": 3.0606252670288088, "step": 143870 }, { "epoch": 0.33653333333333335, "grad_norm": 0.19252891838550568, "learning_rate": 6.399824116354246e-05, "loss": 3.1788299560546873, "step": 143880 }, { "epoch": 0.3366, "grad_norm": 0.2228247970342636, "learning_rate": 6.398765850352305e-05, "loss": 3.0434591293334963, "step": 143890 }, { "epoch": 0.33666666666666667, "grad_norm": 0.2677527368068695, "learning_rate": 6.397707516365061e-05, "loss": 3.0138246536254885, "step": 143900 }, { "epoch": 0.33673333333333333, "grad_norm": 0.19876864552497864, "learning_rate": 6.396649114443953e-05, "loss": 3.051939010620117, "step": 143910 }, { "epoch": 0.3368, "grad_norm": 0.2546871304512024, "learning_rate": 6.395590644640424e-05, "loss": 3.3947582244873047, "step": 143920 }, { "epoch": 0.33686666666666665, "grad_norm": 0.21449020504951477, "learning_rate": 6.394532107005921e-05, "loss": 3.0822626113891602, "step": 143930 }, { "epoch": 0.3369333333333333, "grad_norm": 0.21304596960544586, "learning_rate": 6.393473501591889e-05, "loss": 3.025815010070801, "step": 143940 }, { "epoch": 0.337, "grad_norm": 0.2072671502828598, "learning_rate": 6.392414828449783e-05, "loss": 3.076980400085449, "step": 143950 }, { "epoch": 0.3370666666666667, "grad_norm": 0.3780979514122009, "learning_rate": 6.391356087631057e-05, "loss": 3.0210439682006838, "step": 143960 }, { "epoch": 0.33713333333333334, "grad_norm": 0.21303388476371765, "learning_rate": 6.390297279187173e-05, "loss": 3.0685302734375, "step": 143970 }, { "epoch": 0.3372, "grad_norm": 0.6424124836921692, "learning_rate": 6.389238403169588e-05, "loss": 3.1205745697021485, "step": 143980 }, { "epoch": 0.33726666666666666, "grad_norm": 0.20216470956802368, "learning_rate": 6.388179459629773e-05, "loss": 3.0396541595458983, "step": 143990 }, { "epoch": 0.3373333333333333, "grad_norm": 0.21712006628513336, "learning_rate": 6.387120448619192e-05, "loss": 3.111580657958984, "step": 144000 }, { "epoch": 0.3374, "grad_norm": 0.21016773581504822, "learning_rate": 6.38606137018932e-05, "loss": 3.260028076171875, "step": 144010 }, { "epoch": 0.3374666666666667, "grad_norm": 0.20969440042972565, "learning_rate": 6.38500222439163e-05, "loss": 3.030069923400879, "step": 144020 }, { "epoch": 0.33753333333333335, "grad_norm": 0.19960136711597443, "learning_rate": 6.383943011277601e-05, "loss": 3.0739978790283202, "step": 144030 }, { "epoch": 0.3376, "grad_norm": 0.2137286216020584, "learning_rate": 6.382883730898717e-05, "loss": 3.048274612426758, "step": 144040 }, { "epoch": 0.33766666666666667, "grad_norm": 0.2907031178474426, "learning_rate": 6.38182438330646e-05, "loss": 3.0631065368652344, "step": 144050 }, { "epoch": 0.33773333333333333, "grad_norm": 0.19733582437038422, "learning_rate": 6.380764968552319e-05, "loss": 3.030510902404785, "step": 144060 }, { "epoch": 0.3378, "grad_norm": 0.21764753758907318, "learning_rate": 6.379705486687786e-05, "loss": 3.15054988861084, "step": 144070 }, { "epoch": 0.33786666666666665, "grad_norm": 0.20293281972408295, "learning_rate": 6.378645937764357e-05, "loss": 3.054685592651367, "step": 144080 }, { "epoch": 0.3379333333333333, "grad_norm": 0.21371352672576904, "learning_rate": 6.377586321833529e-05, "loss": 3.1199668884277343, "step": 144090 }, { "epoch": 0.338, "grad_norm": 0.2094542384147644, "learning_rate": 6.376526638946803e-05, "loss": 3.014533805847168, "step": 144100 }, { "epoch": 0.3380666666666667, "grad_norm": 0.2064414769411087, "learning_rate": 6.375466889155683e-05, "loss": 3.0306140899658205, "step": 144110 }, { "epoch": 0.33813333333333334, "grad_norm": 0.19492371380329132, "learning_rate": 6.37440707251168e-05, "loss": 3.040548324584961, "step": 144120 }, { "epoch": 0.3382, "grad_norm": 0.21258552372455597, "learning_rate": 6.3733471890663e-05, "loss": 3.0664831161499024, "step": 144130 }, { "epoch": 0.33826666666666666, "grad_norm": 0.20232534408569336, "learning_rate": 6.372287238871063e-05, "loss": 3.037627029418945, "step": 144140 }, { "epoch": 0.3383333333333333, "grad_norm": 0.20626765489578247, "learning_rate": 6.37122722197748e-05, "loss": 3.1106796264648438, "step": 144150 }, { "epoch": 0.3384, "grad_norm": 0.210426464676857, "learning_rate": 6.370167138437079e-05, "loss": 3.093125343322754, "step": 144160 }, { "epoch": 0.3384666666666667, "grad_norm": 0.19718457758426666, "learning_rate": 6.369106988301382e-05, "loss": 3.088723373413086, "step": 144170 }, { "epoch": 0.33853333333333335, "grad_norm": 0.2168850600719452, "learning_rate": 6.368046771621912e-05, "loss": 2.996918487548828, "step": 144180 }, { "epoch": 0.3386, "grad_norm": 0.22999975085258484, "learning_rate": 6.366986488450203e-05, "loss": 3.0690975189208984, "step": 144190 }, { "epoch": 0.33866666666666667, "grad_norm": 0.1988411694765091, "learning_rate": 6.365926138837788e-05, "loss": 3.022164535522461, "step": 144200 }, { "epoch": 0.33873333333333333, "grad_norm": 0.19535589218139648, "learning_rate": 6.364865722836204e-05, "loss": 3.0110544204711913, "step": 144210 }, { "epoch": 0.3388, "grad_norm": 0.2812189757823944, "learning_rate": 6.363805240496993e-05, "loss": 3.078213691711426, "step": 144220 }, { "epoch": 0.33886666666666665, "grad_norm": 0.24574606120586395, "learning_rate": 6.362744691871697e-05, "loss": 3.063526725769043, "step": 144230 }, { "epoch": 0.3389333333333333, "grad_norm": 0.2105480432510376, "learning_rate": 6.36168407701186e-05, "loss": 3.0978372573852537, "step": 144240 }, { "epoch": 0.339, "grad_norm": 0.19981542229652405, "learning_rate": 6.360623395969038e-05, "loss": 3.0092830657958984, "step": 144250 }, { "epoch": 0.3390666666666667, "grad_norm": 0.22131188213825226, "learning_rate": 6.359562648794779e-05, "loss": 3.067815971374512, "step": 144260 }, { "epoch": 0.33913333333333334, "grad_norm": 0.21729017794132233, "learning_rate": 6.35850183554064e-05, "loss": 3.103898811340332, "step": 144270 }, { "epoch": 0.3392, "grad_norm": 0.2065008282661438, "learning_rate": 6.357440956258183e-05, "loss": 3.1405109405517577, "step": 144280 }, { "epoch": 0.33926666666666666, "grad_norm": 0.19536684453487396, "learning_rate": 6.356380010998968e-05, "loss": 3.035267639160156, "step": 144290 }, { "epoch": 0.3393333333333333, "grad_norm": 0.20113976299762726, "learning_rate": 6.355318999814563e-05, "loss": 3.0704078674316406, "step": 144300 }, { "epoch": 0.3394, "grad_norm": 0.23258726298809052, "learning_rate": 6.354257922756535e-05, "loss": 3.088663864135742, "step": 144310 }, { "epoch": 0.3394666666666667, "grad_norm": 0.4890526235103607, "learning_rate": 6.353196779876458e-05, "loss": 3.064902496337891, "step": 144320 }, { "epoch": 0.33953333333333335, "grad_norm": 0.21474775671958923, "learning_rate": 6.352135571225909e-05, "loss": 3.041058158874512, "step": 144330 }, { "epoch": 0.3396, "grad_norm": 0.23964408040046692, "learning_rate": 6.351074296856463e-05, "loss": 3.028184509277344, "step": 144340 }, { "epoch": 0.3396666666666667, "grad_norm": 0.2108258306980133, "learning_rate": 6.350012956819704e-05, "loss": 3.0878551483154295, "step": 144350 }, { "epoch": 0.33973333333333333, "grad_norm": 0.2186507284641266, "learning_rate": 6.348951551167217e-05, "loss": 3.022142219543457, "step": 144360 }, { "epoch": 0.3398, "grad_norm": 0.21884728968143463, "learning_rate": 6.347890079950591e-05, "loss": 3.045118522644043, "step": 144370 }, { "epoch": 0.33986666666666665, "grad_norm": 0.34176746010780334, "learning_rate": 6.346828543221417e-05, "loss": 3.128133010864258, "step": 144380 }, { "epoch": 0.3399333333333333, "grad_norm": 0.24034816026687622, "learning_rate": 6.345766941031289e-05, "loss": 3.1506475448608398, "step": 144390 }, { "epoch": 0.34, "grad_norm": 0.25112032890319824, "learning_rate": 6.344705273431806e-05, "loss": 3.060677719116211, "step": 144400 }, { "epoch": 0.3400666666666667, "grad_norm": 0.21940836310386658, "learning_rate": 6.343643540474569e-05, "loss": 3.0987367630004883, "step": 144410 }, { "epoch": 0.34013333333333334, "grad_norm": 0.20978078246116638, "learning_rate": 6.342581742211179e-05, "loss": 3.1736785888671877, "step": 144420 }, { "epoch": 0.3402, "grad_norm": 0.20704695582389832, "learning_rate": 6.341519878693249e-05, "loss": 3.042494201660156, "step": 144430 }, { "epoch": 0.34026666666666666, "grad_norm": 0.2295542061328888, "learning_rate": 6.340457949972385e-05, "loss": 3.108312797546387, "step": 144440 }, { "epoch": 0.3403333333333333, "grad_norm": 0.21046291291713715, "learning_rate": 6.339395956100202e-05, "loss": 3.0576778411865235, "step": 144450 }, { "epoch": 0.3404, "grad_norm": 0.20828558504581451, "learning_rate": 6.338333897128318e-05, "loss": 3.1188114166259764, "step": 144460 }, { "epoch": 0.34046666666666664, "grad_norm": 0.2237098217010498, "learning_rate": 6.337271773108353e-05, "loss": 3.0295658111572266, "step": 144470 }, { "epoch": 0.34053333333333335, "grad_norm": 0.20449499785900116, "learning_rate": 6.336209584091929e-05, "loss": 3.06260986328125, "step": 144480 }, { "epoch": 0.3406, "grad_norm": 0.23365619778633118, "learning_rate": 6.335147330130673e-05, "loss": 3.1002212524414063, "step": 144490 }, { "epoch": 0.3406666666666667, "grad_norm": 0.21750976145267487, "learning_rate": 6.334085011276215e-05, "loss": 3.0977073669433595, "step": 144500 }, { "epoch": 0.34073333333333333, "grad_norm": 0.2402481734752655, "learning_rate": 6.333022627580188e-05, "loss": 3.0346216201782226, "step": 144510 }, { "epoch": 0.3408, "grad_norm": 0.22278955578804016, "learning_rate": 6.331960179094225e-05, "loss": 3.029402732849121, "step": 144520 }, { "epoch": 0.34086666666666665, "grad_norm": 0.20168721675872803, "learning_rate": 6.33089766586997e-05, "loss": 3.1090484619140626, "step": 144530 }, { "epoch": 0.3409333333333333, "grad_norm": 0.229123055934906, "learning_rate": 6.329835087959059e-05, "loss": 3.081011199951172, "step": 144540 }, { "epoch": 0.341, "grad_norm": 0.19569812715053558, "learning_rate": 6.328772445413143e-05, "loss": 3.1474557876586915, "step": 144550 }, { "epoch": 0.3410666666666667, "grad_norm": 0.23307360708713531, "learning_rate": 6.327709738283867e-05, "loss": 3.055642509460449, "step": 144560 }, { "epoch": 0.34113333333333334, "grad_norm": 0.19948223233222961, "learning_rate": 6.326646966622886e-05, "loss": 3.022195816040039, "step": 144570 }, { "epoch": 0.3412, "grad_norm": 0.24865736067295074, "learning_rate": 6.32558413048185e-05, "loss": 3.0528465270996095, "step": 144580 }, { "epoch": 0.34126666666666666, "grad_norm": 0.20070825517177582, "learning_rate": 6.32452122991242e-05, "loss": 3.0722124099731447, "step": 144590 }, { "epoch": 0.3413333333333333, "grad_norm": 0.23433318734169006, "learning_rate": 6.323458264966255e-05, "loss": 3.0452142715454102, "step": 144600 }, { "epoch": 0.3414, "grad_norm": 0.2078062742948532, "learning_rate": 6.322395235695022e-05, "loss": 3.031927299499512, "step": 144610 }, { "epoch": 0.34146666666666664, "grad_norm": 0.21626022458076477, "learning_rate": 6.321332142150385e-05, "loss": 3.0758153915405275, "step": 144620 }, { "epoch": 0.34153333333333336, "grad_norm": 0.19759781658649445, "learning_rate": 6.320268984384018e-05, "loss": 3.045274353027344, "step": 144630 }, { "epoch": 0.3416, "grad_norm": 0.22323454916477203, "learning_rate": 6.319205762447591e-05, "loss": 3.0001205444335937, "step": 144640 }, { "epoch": 0.3416666666666667, "grad_norm": 0.22754546999931335, "learning_rate": 6.318142476392784e-05, "loss": 3.1496183395385744, "step": 144650 }, { "epoch": 0.34173333333333333, "grad_norm": 0.2460154891014099, "learning_rate": 6.317079126271272e-05, "loss": 3.050302505493164, "step": 144660 }, { "epoch": 0.3418, "grad_norm": 0.19315461814403534, "learning_rate": 6.316015712134741e-05, "loss": 3.021599769592285, "step": 144670 }, { "epoch": 0.34186666666666665, "grad_norm": 0.20455001294612885, "learning_rate": 6.314952234034876e-05, "loss": 3.033883476257324, "step": 144680 }, { "epoch": 0.3419333333333333, "grad_norm": 0.20469553768634796, "learning_rate": 6.313888692023368e-05, "loss": 3.0460439682006837, "step": 144690 }, { "epoch": 0.342, "grad_norm": 0.2019001692533493, "learning_rate": 6.312825086151908e-05, "loss": 3.2954898834228517, "step": 144700 }, { "epoch": 0.3420666666666667, "grad_norm": 0.24127180874347687, "learning_rate": 6.31176141647219e-05, "loss": 3.05926570892334, "step": 144710 }, { "epoch": 0.34213333333333334, "grad_norm": 0.1960514336824417, "learning_rate": 6.310697683035913e-05, "loss": 3.0417407989501952, "step": 144720 }, { "epoch": 0.3422, "grad_norm": 0.23327812552452087, "learning_rate": 6.30963388589478e-05, "loss": 3.0306774139404298, "step": 144730 }, { "epoch": 0.34226666666666666, "grad_norm": 0.3269047141075134, "learning_rate": 6.308570025100494e-05, "loss": 2.968241882324219, "step": 144740 }, { "epoch": 0.3423333333333333, "grad_norm": 0.2203007936477661, "learning_rate": 6.307506100704761e-05, "loss": 3.0859607696533202, "step": 144750 }, { "epoch": 0.3424, "grad_norm": 0.19776912033557892, "learning_rate": 6.306442112759297e-05, "loss": 3.0208091735839844, "step": 144760 }, { "epoch": 0.34246666666666664, "grad_norm": 0.2198261171579361, "learning_rate": 6.305378061315809e-05, "loss": 3.059035301208496, "step": 144770 }, { "epoch": 0.34253333333333336, "grad_norm": 0.23733758926391602, "learning_rate": 6.30431394642602e-05, "loss": 3.032197380065918, "step": 144780 }, { "epoch": 0.3426, "grad_norm": 0.435032457113266, "learning_rate": 6.303249768141647e-05, "loss": 3.1108875274658203, "step": 144790 }, { "epoch": 0.3426666666666667, "grad_norm": 0.22706696391105652, "learning_rate": 6.302185526514413e-05, "loss": 3.016348457336426, "step": 144800 }, { "epoch": 0.34273333333333333, "grad_norm": 0.36312198638916016, "learning_rate": 6.301121221596045e-05, "loss": 3.0566219329833983, "step": 144810 }, { "epoch": 0.3428, "grad_norm": 0.21021367609500885, "learning_rate": 6.300056853438273e-05, "loss": 3.0366483688354493, "step": 144820 }, { "epoch": 0.34286666666666665, "grad_norm": 0.20528864860534668, "learning_rate": 6.298992422092827e-05, "loss": 2.990726280212402, "step": 144830 }, { "epoch": 0.3429333333333333, "grad_norm": 0.33287855982780457, "learning_rate": 6.297927927611444e-05, "loss": 3.011208724975586, "step": 144840 }, { "epoch": 0.343, "grad_norm": 0.1985446661710739, "learning_rate": 6.296863370045861e-05, "loss": 3.065607261657715, "step": 144850 }, { "epoch": 0.3430666666666667, "grad_norm": 0.8844954967498779, "learning_rate": 6.295798749447822e-05, "loss": 3.0682819366455076, "step": 144860 }, { "epoch": 0.34313333333333335, "grad_norm": 0.2007124274969101, "learning_rate": 6.29473406586907e-05, "loss": 3.034929656982422, "step": 144870 }, { "epoch": 0.3432, "grad_norm": 0.19717322289943695, "learning_rate": 6.293669319361352e-05, "loss": 3.053482246398926, "step": 144880 }, { "epoch": 0.34326666666666666, "grad_norm": 0.21283920109272003, "learning_rate": 6.292604509976421e-05, "loss": 3.010713577270508, "step": 144890 }, { "epoch": 0.3433333333333333, "grad_norm": 0.21988746523857117, "learning_rate": 6.29153963776603e-05, "loss": 3.0236913681030275, "step": 144900 }, { "epoch": 0.3434, "grad_norm": 0.2745862603187561, "learning_rate": 6.290474702781934e-05, "loss": 3.02712459564209, "step": 144910 }, { "epoch": 0.34346666666666664, "grad_norm": 0.2126852422952652, "learning_rate": 6.289409705075893e-05, "loss": 3.095321464538574, "step": 144920 }, { "epoch": 0.34353333333333336, "grad_norm": 0.22864346206188202, "learning_rate": 6.288344644699674e-05, "loss": 3.1023271560668944, "step": 144930 }, { "epoch": 0.3436, "grad_norm": 0.5476874113082886, "learning_rate": 6.287279521705036e-05, "loss": 3.0118602752685546, "step": 144940 }, { "epoch": 0.3436666666666667, "grad_norm": 0.21252869069576263, "learning_rate": 6.286214336143753e-05, "loss": 3.040223503112793, "step": 144950 }, { "epoch": 0.34373333333333334, "grad_norm": 0.3782886564731598, "learning_rate": 6.285149088067596e-05, "loss": 3.084750175476074, "step": 144960 }, { "epoch": 0.3438, "grad_norm": 0.20419596135616302, "learning_rate": 6.28408377752834e-05, "loss": 3.115876007080078, "step": 144970 }, { "epoch": 0.34386666666666665, "grad_norm": 0.27835607528686523, "learning_rate": 6.283018404577765e-05, "loss": 3.042608642578125, "step": 144980 }, { "epoch": 0.3439333333333333, "grad_norm": 0.8909023404121399, "learning_rate": 6.281952969267649e-05, "loss": 2.968600273132324, "step": 144990 }, { "epoch": 0.344, "grad_norm": 0.23347435891628265, "learning_rate": 6.280887471649777e-05, "loss": 2.8796382904052735, "step": 145000 }, { "epoch": 0.3440666666666667, "grad_norm": 0.20093412697315216, "learning_rate": 6.279821911775937e-05, "loss": 3.0641021728515625, "step": 145010 }, { "epoch": 0.34413333333333335, "grad_norm": 0.2048221379518509, "learning_rate": 6.278756289697918e-05, "loss": 3.0645687103271486, "step": 145020 }, { "epoch": 0.3442, "grad_norm": 0.21884037554264069, "learning_rate": 6.277690605467516e-05, "loss": 3.1376325607299806, "step": 145030 }, { "epoch": 0.34426666666666667, "grad_norm": 0.22793512046337128, "learning_rate": 6.276624859136524e-05, "loss": 3.0627864837646483, "step": 145040 }, { "epoch": 0.3443333333333333, "grad_norm": 0.2314969301223755, "learning_rate": 6.275559050756744e-05, "loss": 3.0734901428222656, "step": 145050 }, { "epoch": 0.3444, "grad_norm": 0.21759657561779022, "learning_rate": 6.274493180379975e-05, "loss": 3.0600826263427736, "step": 145060 }, { "epoch": 0.34446666666666664, "grad_norm": 0.2244385927915573, "learning_rate": 6.273427248058025e-05, "loss": 3.0255542755126954, "step": 145070 }, { "epoch": 0.34453333333333336, "grad_norm": 0.22372496128082275, "learning_rate": 6.272361253842702e-05, "loss": 3.0716678619384767, "step": 145080 }, { "epoch": 0.3446, "grad_norm": 0.1891488879919052, "learning_rate": 6.271295197785818e-05, "loss": 3.023055648803711, "step": 145090 }, { "epoch": 0.3446666666666667, "grad_norm": 0.31489551067352295, "learning_rate": 6.270229079939185e-05, "loss": 3.1096622467041017, "step": 145100 }, { "epoch": 0.34473333333333334, "grad_norm": 0.20210625231266022, "learning_rate": 6.269162900354622e-05, "loss": 3.056776428222656, "step": 145110 }, { "epoch": 0.3448, "grad_norm": 0.22890211641788483, "learning_rate": 6.268096659083949e-05, "loss": 3.1161128997802736, "step": 145120 }, { "epoch": 0.34486666666666665, "grad_norm": 0.22090493142604828, "learning_rate": 6.267030356178989e-05, "loss": 3.1201545715332033, "step": 145130 }, { "epoch": 0.3449333333333333, "grad_norm": 0.20048896968364716, "learning_rate": 6.26596399169157e-05, "loss": 3.3004226684570312, "step": 145140 }, { "epoch": 0.345, "grad_norm": 0.2139034867286682, "learning_rate": 6.26489756567352e-05, "loss": 3.054941749572754, "step": 145150 }, { "epoch": 0.3450666666666667, "grad_norm": 0.38366395235061646, "learning_rate": 6.26383107817667e-05, "loss": 3.061714935302734, "step": 145160 }, { "epoch": 0.34513333333333335, "grad_norm": 0.2104494720697403, "learning_rate": 6.262764529252858e-05, "loss": 3.0899240493774416, "step": 145170 }, { "epoch": 0.3452, "grad_norm": 0.31299248337745667, "learning_rate": 6.261697918953921e-05, "loss": 3.0486076354980467, "step": 145180 }, { "epoch": 0.34526666666666667, "grad_norm": 0.2039875090122223, "learning_rate": 6.260631247331701e-05, "loss": 3.0414323806762695, "step": 145190 }, { "epoch": 0.3453333333333333, "grad_norm": 0.2565118372440338, "learning_rate": 6.25956451443804e-05, "loss": 3.143378257751465, "step": 145200 }, { "epoch": 0.3454, "grad_norm": 0.1968095302581787, "learning_rate": 6.258497720324787e-05, "loss": 3.0197219848632812, "step": 145210 }, { "epoch": 0.34546666666666664, "grad_norm": 0.23158316314220428, "learning_rate": 6.257430865043793e-05, "loss": 3.0598058700561523, "step": 145220 }, { "epoch": 0.34553333333333336, "grad_norm": 0.21479417383670807, "learning_rate": 6.25636394864691e-05, "loss": 3.1613346099853517, "step": 145230 }, { "epoch": 0.3456, "grad_norm": 0.2196379005908966, "learning_rate": 6.255296971185994e-05, "loss": 3.0127561569213865, "step": 145240 }, { "epoch": 0.3456666666666667, "grad_norm": 0.21661540865898132, "learning_rate": 6.254229932712905e-05, "loss": 3.049694633483887, "step": 145250 }, { "epoch": 0.34573333333333334, "grad_norm": 0.23352943360805511, "learning_rate": 6.253162833279506e-05, "loss": 3.0928693771362306, "step": 145260 }, { "epoch": 0.3458, "grad_norm": 0.2193913608789444, "learning_rate": 6.25209567293766e-05, "loss": 3.0485567092895507, "step": 145270 }, { "epoch": 0.34586666666666666, "grad_norm": 0.20274202525615692, "learning_rate": 6.251028451739234e-05, "loss": 3.120919036865234, "step": 145280 }, { "epoch": 0.3459333333333333, "grad_norm": 0.19938334822654724, "learning_rate": 6.249961169736103e-05, "loss": 3.011946678161621, "step": 145290 }, { "epoch": 0.346, "grad_norm": 0.2107277512550354, "learning_rate": 6.248893826980137e-05, "loss": 3.0424495697021485, "step": 145300 }, { "epoch": 0.3460666666666667, "grad_norm": 0.20675648748874664, "learning_rate": 6.247826423523216e-05, "loss": 3.1551387786865233, "step": 145310 }, { "epoch": 0.34613333333333335, "grad_norm": 0.24882863461971283, "learning_rate": 6.246758959417219e-05, "loss": 3.03810977935791, "step": 145320 }, { "epoch": 0.3462, "grad_norm": 0.20768103003501892, "learning_rate": 6.245691434714026e-05, "loss": 3.0721439361572265, "step": 145330 }, { "epoch": 0.34626666666666667, "grad_norm": 0.20885005593299866, "learning_rate": 6.244623849465526e-05, "loss": 3.0685327529907225, "step": 145340 }, { "epoch": 0.3463333333333333, "grad_norm": 0.21770864725112915, "learning_rate": 6.243556203723607e-05, "loss": 3.0496292114257812, "step": 145350 }, { "epoch": 0.3464, "grad_norm": 0.2936326265335083, "learning_rate": 6.242488497540159e-05, "loss": 3.0918880462646485, "step": 145360 }, { "epoch": 0.34646666666666665, "grad_norm": 0.2580738067626953, "learning_rate": 6.241420730967079e-05, "loss": 3.0176841735839846, "step": 145370 }, { "epoch": 0.34653333333333336, "grad_norm": 0.21747970581054688, "learning_rate": 6.240352904056264e-05, "loss": 3.046396255493164, "step": 145380 }, { "epoch": 0.3466, "grad_norm": 0.23339882493019104, "learning_rate": 6.239285016859612e-05, "loss": 2.9893802642822265, "step": 145390 }, { "epoch": 0.3466666666666667, "grad_norm": 0.21176014840602875, "learning_rate": 6.238217069429029e-05, "loss": 3.0766674041748048, "step": 145400 }, { "epoch": 0.34673333333333334, "grad_norm": 0.20193378627300262, "learning_rate": 6.23714906181642e-05, "loss": 3.135784912109375, "step": 145410 }, { "epoch": 0.3468, "grad_norm": 0.2408638596534729, "learning_rate": 6.236080994073693e-05, "loss": 3.039097213745117, "step": 145420 }, { "epoch": 0.34686666666666666, "grad_norm": 0.23088976740837097, "learning_rate": 6.235012866252764e-05, "loss": 3.024058532714844, "step": 145430 }, { "epoch": 0.3469333333333333, "grad_norm": 0.20768985152244568, "learning_rate": 6.233944678405545e-05, "loss": 3.0717519760131835, "step": 145440 }, { "epoch": 0.347, "grad_norm": 0.20328694581985474, "learning_rate": 6.232876430583954e-05, "loss": 3.0005531311035156, "step": 145450 }, { "epoch": 0.3470666666666667, "grad_norm": 0.37051844596862793, "learning_rate": 6.231808122839915e-05, "loss": 3.162669563293457, "step": 145460 }, { "epoch": 0.34713333333333335, "grad_norm": 0.20068150758743286, "learning_rate": 6.230739755225347e-05, "loss": 3.0622913360595705, "step": 145470 }, { "epoch": 0.3472, "grad_norm": 0.20426587760448456, "learning_rate": 6.22967132779218e-05, "loss": 3.0212568283081054, "step": 145480 }, { "epoch": 0.34726666666666667, "grad_norm": 0.3265894949436188, "learning_rate": 6.228602840592341e-05, "loss": 3.2596408843994142, "step": 145490 }, { "epoch": 0.3473333333333333, "grad_norm": 0.43589526414871216, "learning_rate": 6.227534293677766e-05, "loss": 3.043394660949707, "step": 145500 }, { "epoch": 0.3474, "grad_norm": 0.21450646221637726, "learning_rate": 6.226465687100386e-05, "loss": 3.0901241302490234, "step": 145510 }, { "epoch": 0.34746666666666665, "grad_norm": 0.21181577444076538, "learning_rate": 6.225397020912145e-05, "loss": 3.1660429000854493, "step": 145520 }, { "epoch": 0.34753333333333336, "grad_norm": 0.25768664479255676, "learning_rate": 6.224328295164979e-05, "loss": 3.0075759887695312, "step": 145530 }, { "epoch": 0.3476, "grad_norm": 0.20610682666301727, "learning_rate": 6.223259509910835e-05, "loss": 3.1446760177612303, "step": 145540 }, { "epoch": 0.3476666666666667, "grad_norm": 0.40326470136642456, "learning_rate": 6.222190665201659e-05, "loss": 3.128560256958008, "step": 145550 }, { "epoch": 0.34773333333333334, "grad_norm": 0.18730266392230988, "learning_rate": 6.221121761089402e-05, "loss": 3.0688665390014647, "step": 145560 }, { "epoch": 0.3478, "grad_norm": 0.2078181505203247, "learning_rate": 6.220052797626015e-05, "loss": 3.0591461181640627, "step": 145570 }, { "epoch": 0.34786666666666666, "grad_norm": 0.2245773822069168, "learning_rate": 6.218983774863454e-05, "loss": 3.14644718170166, "step": 145580 }, { "epoch": 0.3479333333333333, "grad_norm": 0.3612389862537384, "learning_rate": 6.217914692853679e-05, "loss": 3.1216875076293946, "step": 145590 }, { "epoch": 0.348, "grad_norm": 0.20528544485569, "learning_rate": 6.21684555164865e-05, "loss": 3.105733108520508, "step": 145600 }, { "epoch": 0.3480666666666667, "grad_norm": 0.2624904215335846, "learning_rate": 6.215776351300332e-05, "loss": 3.1174150466918946, "step": 145610 }, { "epoch": 0.34813333333333335, "grad_norm": 0.19106867909431458, "learning_rate": 6.214707091860694e-05, "loss": 3.0791229248046874, "step": 145620 }, { "epoch": 0.3482, "grad_norm": 0.21698634326457977, "learning_rate": 6.2136377733817e-05, "loss": 3.0551891326904297, "step": 145630 }, { "epoch": 0.34826666666666667, "grad_norm": 0.2522886097431183, "learning_rate": 6.21256839591533e-05, "loss": 3.1035091400146486, "step": 145640 }, { "epoch": 0.34833333333333333, "grad_norm": 0.20479236543178558, "learning_rate": 6.211498959513555e-05, "loss": 3.1020830154418944, "step": 145650 }, { "epoch": 0.3484, "grad_norm": 0.19965171813964844, "learning_rate": 6.210429464228357e-05, "loss": 3.02185173034668, "step": 145660 }, { "epoch": 0.34846666666666665, "grad_norm": 0.22805172204971313, "learning_rate": 6.209359910111715e-05, "loss": 3.026559829711914, "step": 145670 }, { "epoch": 0.3485333333333333, "grad_norm": 0.2053983211517334, "learning_rate": 6.208290297215615e-05, "loss": 3.0165180206298827, "step": 145680 }, { "epoch": 0.3486, "grad_norm": 0.24573162198066711, "learning_rate": 6.207220625592042e-05, "loss": 3.084007453918457, "step": 145690 }, { "epoch": 0.3486666666666667, "grad_norm": 0.2130766212940216, "learning_rate": 6.206150895292988e-05, "loss": 3.025757598876953, "step": 145700 }, { "epoch": 0.34873333333333334, "grad_norm": 0.20552058517932892, "learning_rate": 6.205081106370446e-05, "loss": 3.083974075317383, "step": 145710 }, { "epoch": 0.3488, "grad_norm": 0.20602571964263916, "learning_rate": 6.204011258876411e-05, "loss": 3.0693641662597657, "step": 145720 }, { "epoch": 0.34886666666666666, "grad_norm": 0.2048279345035553, "learning_rate": 6.202941352862882e-05, "loss": 3.0234970092773437, "step": 145730 }, { "epoch": 0.3489333333333333, "grad_norm": 0.20787493884563446, "learning_rate": 6.20187138838186e-05, "loss": 3.0855573654174804, "step": 145740 }, { "epoch": 0.349, "grad_norm": 0.23717819154262543, "learning_rate": 6.20080136548535e-05, "loss": 3.0696550369262696, "step": 145750 }, { "epoch": 0.3490666666666667, "grad_norm": 0.22454410791397095, "learning_rate": 6.199731284225359e-05, "loss": 3.1033098220825197, "step": 145760 }, { "epoch": 0.34913333333333335, "grad_norm": 0.22534514963626862, "learning_rate": 6.198661144653896e-05, "loss": 3.243834686279297, "step": 145770 }, { "epoch": 0.3492, "grad_norm": 0.20838381350040436, "learning_rate": 6.197590946822976e-05, "loss": 3.0473098754882812, "step": 145780 }, { "epoch": 0.34926666666666667, "grad_norm": 0.20164959132671356, "learning_rate": 6.196520690784613e-05, "loss": 3.1573562622070312, "step": 145790 }, { "epoch": 0.34933333333333333, "grad_norm": 0.1945868581533432, "learning_rate": 6.195450376590826e-05, "loss": 3.0755014419555664, "step": 145800 }, { "epoch": 0.3494, "grad_norm": 0.2197469025850296, "learning_rate": 6.194380004293635e-05, "loss": 3.061570167541504, "step": 145810 }, { "epoch": 0.34946666666666665, "grad_norm": 0.22697389125823975, "learning_rate": 6.193309573945065e-05, "loss": 3.073002815246582, "step": 145820 }, { "epoch": 0.3495333333333333, "grad_norm": 0.2544826567173004, "learning_rate": 6.192239085597144e-05, "loss": 3.1050251007080076, "step": 145830 }, { "epoch": 0.3496, "grad_norm": 0.21357353031635284, "learning_rate": 6.191168539301902e-05, "loss": 3.0561954498291017, "step": 145840 }, { "epoch": 0.3496666666666667, "grad_norm": 0.20204298198223114, "learning_rate": 6.190097935111369e-05, "loss": 3.0517515182495116, "step": 145850 }, { "epoch": 0.34973333333333334, "grad_norm": 0.2975344657897949, "learning_rate": 6.189027273077583e-05, "loss": 3.0537076950073243, "step": 145860 }, { "epoch": 0.3498, "grad_norm": 0.24432970583438873, "learning_rate": 6.18795655325258e-05, "loss": 3.0769771575927733, "step": 145870 }, { "epoch": 0.34986666666666666, "grad_norm": 0.2166474163532257, "learning_rate": 6.186885775688403e-05, "loss": 3.0537656784057616, "step": 145880 }, { "epoch": 0.3499333333333333, "grad_norm": 0.41151174902915955, "learning_rate": 6.185814940437094e-05, "loss": 3.0606204986572267, "step": 145890 }, { "epoch": 0.35, "grad_norm": 0.20154425501823425, "learning_rate": 6.184744047550703e-05, "loss": 3.051986503601074, "step": 145900 }, { "epoch": 0.3500666666666667, "grad_norm": 0.2718218266963959, "learning_rate": 6.183673097081275e-05, "loss": 3.0313690185546873, "step": 145910 }, { "epoch": 0.35013333333333335, "grad_norm": 0.22412483394145966, "learning_rate": 6.182602089080866e-05, "loss": 3.0579544067382813, "step": 145920 }, { "epoch": 0.3502, "grad_norm": 0.20945699512958527, "learning_rate": 6.181531023601528e-05, "loss": 3.0234500885009767, "step": 145930 }, { "epoch": 0.35026666666666667, "grad_norm": 0.2804655134677887, "learning_rate": 6.18045990069532e-05, "loss": 3.1086328506469725, "step": 145940 }, { "epoch": 0.35033333333333333, "grad_norm": 0.21031762659549713, "learning_rate": 6.179388720414303e-05, "loss": 3.0708261489868165, "step": 145950 }, { "epoch": 0.3504, "grad_norm": 0.23249779641628265, "learning_rate": 6.178317482810542e-05, "loss": 2.9847253799438476, "step": 145960 }, { "epoch": 0.35046666666666665, "grad_norm": 0.318538099527359, "learning_rate": 6.1772461879361e-05, "loss": 3.051965522766113, "step": 145970 }, { "epoch": 0.3505333333333333, "grad_norm": 0.22343537211418152, "learning_rate": 6.176174835843048e-05, "loss": 3.094122123718262, "step": 145980 }, { "epoch": 0.3506, "grad_norm": 0.20301182568073273, "learning_rate": 6.175103426583457e-05, "loss": 3.0837175369262697, "step": 145990 }, { "epoch": 0.3506666666666667, "grad_norm": 0.21651455760002136, "learning_rate": 6.1740319602094e-05, "loss": 2.9882349014282226, "step": 146000 }, { "epoch": 0.35073333333333334, "grad_norm": 0.22560180723667145, "learning_rate": 6.172960436772957e-05, "loss": 2.994365692138672, "step": 146010 }, { "epoch": 0.3508, "grad_norm": 0.25947508215904236, "learning_rate": 6.171888856326211e-05, "loss": 3.0501083374023437, "step": 146020 }, { "epoch": 0.35086666666666666, "grad_norm": 0.2122042179107666, "learning_rate": 6.170817218921236e-05, "loss": 3.068577766418457, "step": 146030 }, { "epoch": 0.3509333333333333, "grad_norm": 0.23252397775650024, "learning_rate": 6.169745524610125e-05, "loss": 3.1980648040771484, "step": 146040 }, { "epoch": 0.351, "grad_norm": 0.223749041557312, "learning_rate": 6.168673773444963e-05, "loss": 3.000300407409668, "step": 146050 }, { "epoch": 0.3510666666666667, "grad_norm": 0.20096570253372192, "learning_rate": 6.167601965477841e-05, "loss": 3.059388542175293, "step": 146060 }, { "epoch": 0.35113333333333335, "grad_norm": 0.2284896969795227, "learning_rate": 6.166530100760857e-05, "loss": 3.0404287338256837, "step": 146070 }, { "epoch": 0.3512, "grad_norm": 0.2102699726819992, "learning_rate": 6.165458179346103e-05, "loss": 3.091171455383301, "step": 146080 }, { "epoch": 0.35126666666666667, "grad_norm": 0.22922159731388092, "learning_rate": 6.16438620128568e-05, "loss": 3.028310203552246, "step": 146090 }, { "epoch": 0.35133333333333333, "grad_norm": 0.21369053423404694, "learning_rate": 6.163314166631691e-05, "loss": 3.008572006225586, "step": 146100 }, { "epoch": 0.3514, "grad_norm": 0.19483685493469238, "learning_rate": 6.16224207543624e-05, "loss": 3.0509082794189455, "step": 146110 }, { "epoch": 0.35146666666666665, "grad_norm": 0.21263174712657928, "learning_rate": 6.161169927751434e-05, "loss": 3.0414138793945313, "step": 146120 }, { "epoch": 0.3515333333333333, "grad_norm": 0.20763593912124634, "learning_rate": 6.160097723629386e-05, "loss": 2.9988967895507814, "step": 146130 }, { "epoch": 0.3516, "grad_norm": 0.21219904720783234, "learning_rate": 6.159025463122207e-05, "loss": 3.0405204772949217, "step": 146140 }, { "epoch": 0.3516666666666667, "grad_norm": 0.19318406283855438, "learning_rate": 6.157953146282012e-05, "loss": 3.0523923873901366, "step": 146150 }, { "epoch": 0.35173333333333334, "grad_norm": 0.2132415622472763, "learning_rate": 6.156880773160924e-05, "loss": 3.268269729614258, "step": 146160 }, { "epoch": 0.3518, "grad_norm": 0.2070237696170807, "learning_rate": 6.15580834381106e-05, "loss": 3.0634176254272463, "step": 146170 }, { "epoch": 0.35186666666666666, "grad_norm": 0.34811291098594666, "learning_rate": 6.154735858284545e-05, "loss": 3.085504722595215, "step": 146180 }, { "epoch": 0.3519333333333333, "grad_norm": 0.20568911731243134, "learning_rate": 6.153663316633508e-05, "loss": 3.074115753173828, "step": 146190 }, { "epoch": 0.352, "grad_norm": 0.20072771608829498, "learning_rate": 6.152590718910076e-05, "loss": 2.9877553939819337, "step": 146200 }, { "epoch": 0.3520666666666667, "grad_norm": 0.21810638904571533, "learning_rate": 6.151518065166382e-05, "loss": 3.007970428466797, "step": 146210 }, { "epoch": 0.35213333333333335, "grad_norm": 0.22370095551013947, "learning_rate": 6.150445355454562e-05, "loss": 3.0385936737060546, "step": 146220 }, { "epoch": 0.3522, "grad_norm": 0.23047374188899994, "learning_rate": 6.149372589826752e-05, "loss": 3.0835546493530273, "step": 146230 }, { "epoch": 0.3522666666666667, "grad_norm": 0.20593708753585815, "learning_rate": 6.148299768335094e-05, "loss": 3.0067068099975587, "step": 146240 }, { "epoch": 0.35233333333333333, "grad_norm": 0.19416995346546173, "learning_rate": 6.147226891031731e-05, "loss": 3.0392370223999023, "step": 146250 }, { "epoch": 0.3524, "grad_norm": 0.20200178027153015, "learning_rate": 6.146153957968809e-05, "loss": 3.0333446502685546, "step": 146260 }, { "epoch": 0.35246666666666665, "grad_norm": 0.19921909272670746, "learning_rate": 6.145080969198475e-05, "loss": 3.0496023178100584, "step": 146270 }, { "epoch": 0.3525333333333333, "grad_norm": 0.24218103289604187, "learning_rate": 6.144007924772883e-05, "loss": 2.9981733322143556, "step": 146280 }, { "epoch": 0.3526, "grad_norm": 0.23431342840194702, "learning_rate": 6.142934824744184e-05, "loss": 3.059451103210449, "step": 146290 }, { "epoch": 0.3526666666666667, "grad_norm": 0.2457919418811798, "learning_rate": 6.141861669164537e-05, "loss": 3.114111328125, "step": 146300 }, { "epoch": 0.35273333333333334, "grad_norm": 0.2756574749946594, "learning_rate": 6.140788458086101e-05, "loss": 3.0736337661743165, "step": 146310 }, { "epoch": 0.3528, "grad_norm": 0.21438592672348022, "learning_rate": 6.139715191561038e-05, "loss": 3.063591194152832, "step": 146320 }, { "epoch": 0.35286666666666666, "grad_norm": 0.24220533668994904, "learning_rate": 6.138641869641512e-05, "loss": 3.0537851333618162, "step": 146330 }, { "epoch": 0.3529333333333333, "grad_norm": 0.20888088643550873, "learning_rate": 6.13756849237969e-05, "loss": 3.0622400283813476, "step": 146340 }, { "epoch": 0.353, "grad_norm": 0.2196826934814453, "learning_rate": 6.136495059827747e-05, "loss": 3.0394853591918944, "step": 146350 }, { "epoch": 0.35306666666666664, "grad_norm": 0.20439793169498444, "learning_rate": 6.13542157203785e-05, "loss": 3.00854377746582, "step": 146360 }, { "epoch": 0.35313333333333335, "grad_norm": 0.23027534782886505, "learning_rate": 6.134348029062175e-05, "loss": 3.020013618469238, "step": 146370 }, { "epoch": 0.3532, "grad_norm": 0.20925234258174896, "learning_rate": 6.133274430952904e-05, "loss": 3.0258787155151365, "step": 146380 }, { "epoch": 0.3532666666666667, "grad_norm": 0.19749069213867188, "learning_rate": 6.132200777762215e-05, "loss": 3.016649627685547, "step": 146390 }, { "epoch": 0.35333333333333333, "grad_norm": 0.2404709905385971, "learning_rate": 6.131127069542293e-05, "loss": 3.0227678298950194, "step": 146400 }, { "epoch": 0.3534, "grad_norm": 0.2261573225259781, "learning_rate": 6.130053306345323e-05, "loss": 3.0175348281860352, "step": 146410 }, { "epoch": 0.35346666666666665, "grad_norm": 0.2263140231370926, "learning_rate": 6.128979488223495e-05, "loss": 3.0411922454833986, "step": 146420 }, { "epoch": 0.3535333333333333, "grad_norm": 0.26048973202705383, "learning_rate": 6.127905615229002e-05, "loss": 2.9942033767700194, "step": 146430 }, { "epoch": 0.3536, "grad_norm": 0.20903533697128296, "learning_rate": 6.126831687414034e-05, "loss": 3.029634475708008, "step": 146440 }, { "epoch": 0.3536666666666667, "grad_norm": 0.2051391899585724, "learning_rate": 6.125757704830791e-05, "loss": 3.074419403076172, "step": 146450 }, { "epoch": 0.35373333333333334, "grad_norm": 0.20271119475364685, "learning_rate": 6.124683667531474e-05, "loss": 3.017755317687988, "step": 146460 }, { "epoch": 0.3538, "grad_norm": 0.6552635431289673, "learning_rate": 6.123609575568281e-05, "loss": 3.144153022766113, "step": 146470 }, { "epoch": 0.35386666666666666, "grad_norm": 0.2377835214138031, "learning_rate": 6.12253542899342e-05, "loss": 3.0781890869140627, "step": 146480 }, { "epoch": 0.3539333333333333, "grad_norm": 0.2626859247684479, "learning_rate": 6.121461227859099e-05, "loss": 3.0631532669067383, "step": 146490 }, { "epoch": 0.354, "grad_norm": 0.22353582084178925, "learning_rate": 6.120386972217526e-05, "loss": 2.967671775817871, "step": 146500 }, { "epoch": 0.35406666666666664, "grad_norm": 0.23260121047496796, "learning_rate": 6.119312662120916e-05, "loss": 3.0392082214355467, "step": 146510 }, { "epoch": 0.35413333333333336, "grad_norm": 0.21251127123832703, "learning_rate": 6.118238297621484e-05, "loss": 3.0367841720581055, "step": 146520 }, { "epoch": 0.3542, "grad_norm": 0.23988816142082214, "learning_rate": 6.117163878771446e-05, "loss": 3.189604949951172, "step": 146530 }, { "epoch": 0.3542666666666667, "grad_norm": 0.2194833904504776, "learning_rate": 6.116089405623026e-05, "loss": 3.0476316452026366, "step": 146540 }, { "epoch": 0.35433333333333333, "grad_norm": 0.20514783263206482, "learning_rate": 6.115014878228445e-05, "loss": 3.0562057495117188, "step": 146550 }, { "epoch": 0.3544, "grad_norm": 0.21562506258487701, "learning_rate": 6.11394029663993e-05, "loss": 3.069367027282715, "step": 146560 }, { "epoch": 0.35446666666666665, "grad_norm": 0.34438371658325195, "learning_rate": 6.11286566090971e-05, "loss": 3.1294363021850584, "step": 146570 }, { "epoch": 0.3545333333333333, "grad_norm": 0.21033591032028198, "learning_rate": 6.111790971090016e-05, "loss": 2.9865732192993164, "step": 146580 }, { "epoch": 0.3546, "grad_norm": 0.2135368287563324, "learning_rate": 6.110716227233084e-05, "loss": 3.0494470596313477, "step": 146590 }, { "epoch": 0.3546666666666667, "grad_norm": 0.2330072522163391, "learning_rate": 6.109641429391147e-05, "loss": 3.1161861419677734, "step": 146600 }, { "epoch": 0.35473333333333334, "grad_norm": 0.5130770802497864, "learning_rate": 6.108566577616447e-05, "loss": 3.0514572143554686, "step": 146610 }, { "epoch": 0.3548, "grad_norm": 0.21807773411273956, "learning_rate": 6.107491671961224e-05, "loss": 2.9580764770507812, "step": 146620 }, { "epoch": 0.35486666666666666, "grad_norm": 0.21951259672641754, "learning_rate": 6.106416712477724e-05, "loss": 3.0463518142700194, "step": 146630 }, { "epoch": 0.3549333333333333, "grad_norm": 0.19859692454338074, "learning_rate": 6.105341699218193e-05, "loss": 3.019088363647461, "step": 146640 }, { "epoch": 0.355, "grad_norm": 0.21285592019557953, "learning_rate": 6.104266632234881e-05, "loss": 3.055490493774414, "step": 146650 }, { "epoch": 0.35506666666666664, "grad_norm": 0.2035180777311325, "learning_rate": 6.10319151158004e-05, "loss": 3.070675277709961, "step": 146660 }, { "epoch": 0.35513333333333336, "grad_norm": 0.21592287719249725, "learning_rate": 6.102116337305925e-05, "loss": 3.083593559265137, "step": 146670 }, { "epoch": 0.3552, "grad_norm": 0.21732261776924133, "learning_rate": 6.1010411094647954e-05, "loss": 2.995553398132324, "step": 146680 }, { "epoch": 0.3552666666666667, "grad_norm": 0.23001989722251892, "learning_rate": 6.0999658281089086e-05, "loss": 3.014800262451172, "step": 146690 }, { "epoch": 0.35533333333333333, "grad_norm": 0.21097198128700256, "learning_rate": 6.098890493290529e-05, "loss": 3.0482580184936525, "step": 146700 }, { "epoch": 0.3554, "grad_norm": 0.4198441803455353, "learning_rate": 6.0978151050619215e-05, "loss": 2.802774429321289, "step": 146710 }, { "epoch": 0.35546666666666665, "grad_norm": 0.20360077917575836, "learning_rate": 6.0967396634753545e-05, "loss": 2.8281318664550783, "step": 146720 }, { "epoch": 0.3555333333333333, "grad_norm": 0.21561023592948914, "learning_rate": 6.0956641685830974e-05, "loss": 3.027931785583496, "step": 146730 }, { "epoch": 0.3556, "grad_norm": 0.2065499871969223, "learning_rate": 6.0945886204374234e-05, "loss": 3.0685136795043944, "step": 146740 }, { "epoch": 0.3556666666666667, "grad_norm": 0.19705398380756378, "learning_rate": 6.093513019090611e-05, "loss": 3.0529815673828127, "step": 146750 }, { "epoch": 0.35573333333333335, "grad_norm": 0.3041519820690155, "learning_rate": 6.0924373645949354e-05, "loss": 3.123940849304199, "step": 146760 }, { "epoch": 0.3558, "grad_norm": 0.20625129342079163, "learning_rate": 6.0913616570026776e-05, "loss": 3.0273597717285154, "step": 146770 }, { "epoch": 0.35586666666666666, "grad_norm": 0.19363628327846527, "learning_rate": 6.090285896366121e-05, "loss": 3.017694091796875, "step": 146780 }, { "epoch": 0.3559333333333333, "grad_norm": 0.20420709252357483, "learning_rate": 6.0892100827375534e-05, "loss": 3.038407325744629, "step": 146790 }, { "epoch": 0.356, "grad_norm": 0.24940314888954163, "learning_rate": 6.0881342161692624e-05, "loss": 3.0921133041381834, "step": 146800 }, { "epoch": 0.35606666666666664, "grad_norm": 0.2621418535709381, "learning_rate": 6.087058296713539e-05, "loss": 3.0497648239135744, "step": 146810 }, { "epoch": 0.35613333333333336, "grad_norm": 0.21117796003818512, "learning_rate": 6.085982324422678e-05, "loss": 3.046395492553711, "step": 146820 }, { "epoch": 0.3562, "grad_norm": 0.21764212846755981, "learning_rate": 6.0849062993489744e-05, "loss": 3.0203752517700195, "step": 146830 }, { "epoch": 0.3562666666666667, "grad_norm": 0.2191578894853592, "learning_rate": 6.083830221544727e-05, "loss": 3.0419488906860352, "step": 146840 }, { "epoch": 0.35633333333333334, "grad_norm": 0.20346254110336304, "learning_rate": 6.082754091062238e-05, "loss": 3.052067184448242, "step": 146850 }, { "epoch": 0.3564, "grad_norm": 0.2041531503200531, "learning_rate": 6.081677907953811e-05, "loss": 2.939175033569336, "step": 146860 }, { "epoch": 0.35646666666666665, "grad_norm": 0.20596256852149963, "learning_rate": 6.080601672271753e-05, "loss": 3.0704875946044923, "step": 146870 }, { "epoch": 0.3565333333333333, "grad_norm": 0.20456422865390778, "learning_rate": 6.079525384068372e-05, "loss": 2.988974761962891, "step": 146880 }, { "epoch": 0.3566, "grad_norm": 0.22354933619499207, "learning_rate": 6.078449043395982e-05, "loss": 3.0389360427856444, "step": 146890 }, { "epoch": 0.3566666666666667, "grad_norm": 0.1986459195613861, "learning_rate": 6.077372650306894e-05, "loss": 3.097547721862793, "step": 146900 }, { "epoch": 0.35673333333333335, "grad_norm": 0.21982961893081665, "learning_rate": 6.076296204853429e-05, "loss": 3.1652219772338865, "step": 146910 }, { "epoch": 0.3568, "grad_norm": 0.6785557866096497, "learning_rate": 6.075219707087902e-05, "loss": 3.0594881057739256, "step": 146920 }, { "epoch": 0.35686666666666667, "grad_norm": 0.27008500695228577, "learning_rate": 6.074143157062637e-05, "loss": 3.083014488220215, "step": 146930 }, { "epoch": 0.3569333333333333, "grad_norm": 0.2120303511619568, "learning_rate": 6.073066554829958e-05, "loss": 3.0369935989379884, "step": 146940 }, { "epoch": 0.357, "grad_norm": 0.21019603312015533, "learning_rate": 6.0719899004421924e-05, "loss": 3.0054210662841796, "step": 146950 }, { "epoch": 0.35706666666666664, "grad_norm": 0.3224093019962311, "learning_rate": 6.070913193951668e-05, "loss": 3.088928985595703, "step": 146960 }, { "epoch": 0.35713333333333336, "grad_norm": 0.20247620344161987, "learning_rate": 6.0698364354107195e-05, "loss": 3.06226806640625, "step": 146970 }, { "epoch": 0.3572, "grad_norm": 0.19968442618846893, "learning_rate": 6.0687596248716806e-05, "loss": 3.0448587417602537, "step": 146980 }, { "epoch": 0.3572666666666667, "grad_norm": 0.2125001698732376, "learning_rate": 6.067682762386886e-05, "loss": 3.028939437866211, "step": 146990 }, { "epoch": 0.35733333333333334, "grad_norm": 0.20290814340114594, "learning_rate": 6.0666058480086786e-05, "loss": 2.9927898406982423, "step": 147000 }, { "epoch": 0.3574, "grad_norm": 0.20855756103992462, "learning_rate": 6.065528881789397e-05, "loss": 3.050901412963867, "step": 147010 }, { "epoch": 0.35746666666666665, "grad_norm": 0.23380866646766663, "learning_rate": 6.06445186378139e-05, "loss": 3.3480579376220705, "step": 147020 }, { "epoch": 0.3575333333333333, "grad_norm": 0.20225323736667633, "learning_rate": 6.063374794037001e-05, "loss": 3.066212272644043, "step": 147030 }, { "epoch": 0.3576, "grad_norm": 0.20749762654304504, "learning_rate": 6.0622976726085824e-05, "loss": 3.0891632080078124, "step": 147040 }, { "epoch": 0.3576666666666667, "grad_norm": 0.23289638757705688, "learning_rate": 6.061220499548484e-05, "loss": 3.0236181259155273, "step": 147050 }, { "epoch": 0.35773333333333335, "grad_norm": 0.2247457653284073, "learning_rate": 6.060143274909062e-05, "loss": 3.008365249633789, "step": 147060 }, { "epoch": 0.3578, "grad_norm": 0.27683839201927185, "learning_rate": 6.0590659987426744e-05, "loss": 3.084278678894043, "step": 147070 }, { "epoch": 0.35786666666666667, "grad_norm": 0.2038893848657608, "learning_rate": 6.0579886711016784e-05, "loss": 3.0759340286254884, "step": 147080 }, { "epoch": 0.3579333333333333, "grad_norm": 0.2425788938999176, "learning_rate": 6.056911292038438e-05, "loss": 3.068401908874512, "step": 147090 }, { "epoch": 0.358, "grad_norm": 0.20968881249427795, "learning_rate": 6.055833861605318e-05, "loss": 3.025800323486328, "step": 147100 }, { "epoch": 0.35806666666666664, "grad_norm": 0.2074478566646576, "learning_rate": 6.054756379854684e-05, "loss": 3.0688493728637694, "step": 147110 }, { "epoch": 0.35813333333333336, "grad_norm": 0.21850602328777313, "learning_rate": 6.0536788468389074e-05, "loss": 3.0403255462646483, "step": 147120 }, { "epoch": 0.3582, "grad_norm": 0.24608634412288666, "learning_rate": 6.052601262610359e-05, "loss": 3.1702350616455077, "step": 147130 }, { "epoch": 0.3582666666666667, "grad_norm": 0.22337715327739716, "learning_rate": 6.051523627221414e-05, "loss": 3.0510934829711913, "step": 147140 }, { "epoch": 0.35833333333333334, "grad_norm": 0.21571455895900726, "learning_rate": 6.050445940724451e-05, "loss": 3.0902450561523436, "step": 147150 }, { "epoch": 0.3584, "grad_norm": 0.21531888842582703, "learning_rate": 6.049368203171847e-05, "loss": 3.209867477416992, "step": 147160 }, { "epoch": 0.35846666666666666, "grad_norm": 0.22265410423278809, "learning_rate": 6.048290414615986e-05, "loss": 3.014289665222168, "step": 147170 }, { "epoch": 0.3585333333333333, "grad_norm": 0.20152322947978973, "learning_rate": 6.0472125751092515e-05, "loss": 3.0871068954467775, "step": 147180 }, { "epoch": 0.3586, "grad_norm": 0.20514127612113953, "learning_rate": 6.046134684704031e-05, "loss": 3.0418067932128907, "step": 147190 }, { "epoch": 0.3586666666666667, "grad_norm": 0.21317806839942932, "learning_rate": 6.045056743452714e-05, "loss": 3.0678247451782226, "step": 147200 }, { "epoch": 0.35873333333333335, "grad_norm": 0.2053796947002411, "learning_rate": 6.043978751407693e-05, "loss": 3.000249481201172, "step": 147210 }, { "epoch": 0.3588, "grad_norm": 0.21657226979732513, "learning_rate": 6.0429007086213615e-05, "loss": 3.0108118057250977, "step": 147220 }, { "epoch": 0.35886666666666667, "grad_norm": 0.26780152320861816, "learning_rate": 6.04182261514612e-05, "loss": 3.0358806610107423, "step": 147230 }, { "epoch": 0.3589333333333333, "grad_norm": 0.24653847515583038, "learning_rate": 6.0407444710343616e-05, "loss": 3.080570411682129, "step": 147240 }, { "epoch": 0.359, "grad_norm": 0.2176237851381302, "learning_rate": 6.0396662763384934e-05, "loss": 3.042424964904785, "step": 147250 }, { "epoch": 0.35906666666666665, "grad_norm": 0.22874514758586884, "learning_rate": 6.038588031110916e-05, "loss": 3.1000165939331055, "step": 147260 }, { "epoch": 0.35913333333333336, "grad_norm": 0.2490989714860916, "learning_rate": 6.03750973540404e-05, "loss": 3.066173553466797, "step": 147270 }, { "epoch": 0.3592, "grad_norm": 0.3597398102283478, "learning_rate": 6.036431389270272e-05, "loss": 3.0770681381225584, "step": 147280 }, { "epoch": 0.3592666666666667, "grad_norm": 0.22112299501895905, "learning_rate": 6.035352992762025e-05, "loss": 3.034270095825195, "step": 147290 }, { "epoch": 0.35933333333333334, "grad_norm": 0.20775876939296722, "learning_rate": 6.0342745459317104e-05, "loss": 3.0382965087890623, "step": 147300 }, { "epoch": 0.3594, "grad_norm": 0.22446362674236298, "learning_rate": 6.03319604883175e-05, "loss": 3.0374914169311524, "step": 147310 }, { "epoch": 0.35946666666666666, "grad_norm": 0.22251033782958984, "learning_rate": 6.032117501514558e-05, "loss": 3.036248779296875, "step": 147320 }, { "epoch": 0.3595333333333333, "grad_norm": 0.20362447202205658, "learning_rate": 6.0310389040325586e-05, "loss": 3.053523826599121, "step": 147330 }, { "epoch": 0.3596, "grad_norm": 0.21156606078147888, "learning_rate": 6.029960256438174e-05, "loss": 3.0285572052001952, "step": 147340 }, { "epoch": 0.3596666666666667, "grad_norm": 0.21383832395076752, "learning_rate": 6.028881558783831e-05, "loss": 3.0337347030639648, "step": 147350 }, { "epoch": 0.35973333333333335, "grad_norm": 0.21645520627498627, "learning_rate": 6.0278028111219584e-05, "loss": 3.0500659942626953, "step": 147360 }, { "epoch": 0.3598, "grad_norm": 0.21217328310012817, "learning_rate": 6.02672401350499e-05, "loss": 3.087873840332031, "step": 147370 }, { "epoch": 0.35986666666666667, "grad_norm": 0.2107846885919571, "learning_rate": 6.025645165985354e-05, "loss": 2.9909109115600585, "step": 147380 }, { "epoch": 0.3599333333333333, "grad_norm": 0.24395355582237244, "learning_rate": 6.024566268615492e-05, "loss": 2.98240966796875, "step": 147390 }, { "epoch": 0.36, "grad_norm": 0.21743124723434448, "learning_rate": 6.023487321447839e-05, "loss": 3.11021728515625, "step": 147400 }, { "epoch": 0.36006666666666665, "grad_norm": 0.23704563081264496, "learning_rate": 6.022408324534837e-05, "loss": 3.0793628692626953, "step": 147410 }, { "epoch": 0.36013333333333336, "grad_norm": 0.21206034719944, "learning_rate": 6.02132927792893e-05, "loss": 3.076348876953125, "step": 147420 }, { "epoch": 0.3602, "grad_norm": 0.22187967598438263, "learning_rate": 6.020250181682562e-05, "loss": 3.047744941711426, "step": 147430 }, { "epoch": 0.3602666666666667, "grad_norm": 0.21413789689540863, "learning_rate": 6.0191710358481835e-05, "loss": 3.0682676315307615, "step": 147440 }, { "epoch": 0.36033333333333334, "grad_norm": 0.1912863552570343, "learning_rate": 6.018091840478243e-05, "loss": 2.9614501953125, "step": 147450 }, { "epoch": 0.3604, "grad_norm": 0.21921853721141815, "learning_rate": 6.0170125956251934e-05, "loss": 3.0119098663330077, "step": 147460 }, { "epoch": 0.36046666666666666, "grad_norm": 0.21239855885505676, "learning_rate": 6.015933301341492e-05, "loss": 3.0444177627563476, "step": 147470 }, { "epoch": 0.3605333333333333, "grad_norm": 0.20675082504749298, "learning_rate": 6.014853957679597e-05, "loss": 3.033530616760254, "step": 147480 }, { "epoch": 0.3606, "grad_norm": 0.20150429010391235, "learning_rate": 6.013774564691965e-05, "loss": 3.0369890213012694, "step": 147490 }, { "epoch": 0.3606666666666667, "grad_norm": 0.20525583624839783, "learning_rate": 6.012695122431061e-05, "loss": 3.0970272064208983, "step": 147500 }, { "epoch": 0.36073333333333335, "grad_norm": 0.23435722291469574, "learning_rate": 6.01161563094935e-05, "loss": 3.0391656875610353, "step": 147510 }, { "epoch": 0.3608, "grad_norm": 0.3711802065372467, "learning_rate": 6.010536090299299e-05, "loss": 3.1259593963623047, "step": 147520 }, { "epoch": 0.36086666666666667, "grad_norm": 0.22185340523719788, "learning_rate": 6.009456500533377e-05, "loss": 3.0567115783691405, "step": 147530 }, { "epoch": 0.36093333333333333, "grad_norm": 0.22935952246189117, "learning_rate": 6.008376861704057e-05, "loss": 3.0535816192626952, "step": 147540 }, { "epoch": 0.361, "grad_norm": 0.20697903633117676, "learning_rate": 6.007297173863814e-05, "loss": 3.052383613586426, "step": 147550 }, { "epoch": 0.36106666666666665, "grad_norm": 0.2321557104587555, "learning_rate": 6.006217437065126e-05, "loss": 3.0046016693115236, "step": 147560 }, { "epoch": 0.3611333333333333, "grad_norm": 0.23449872434139252, "learning_rate": 6.005137651360468e-05, "loss": 2.9801111221313477, "step": 147570 }, { "epoch": 0.3612, "grad_norm": 0.21560801565647125, "learning_rate": 6.004057816802325e-05, "loss": 3.036289596557617, "step": 147580 }, { "epoch": 0.3612666666666667, "grad_norm": 0.4250737130641937, "learning_rate": 6.0029779334431804e-05, "loss": 3.0269683837890624, "step": 147590 }, { "epoch": 0.36133333333333334, "grad_norm": 0.2813136577606201, "learning_rate": 6.00189800133552e-05, "loss": 3.105910873413086, "step": 147600 }, { "epoch": 0.3614, "grad_norm": 0.22196269035339355, "learning_rate": 6.000818020531833e-05, "loss": 3.012718391418457, "step": 147610 }, { "epoch": 0.36146666666666666, "grad_norm": 0.23489317297935486, "learning_rate": 5.999737991084612e-05, "loss": 3.098996162414551, "step": 147620 }, { "epoch": 0.3615333333333333, "grad_norm": 0.28486400842666626, "learning_rate": 5.9986579130463486e-05, "loss": 3.0297964096069334, "step": 147630 }, { "epoch": 0.3616, "grad_norm": 0.2086504101753235, "learning_rate": 5.997577786469539e-05, "loss": 3.167852783203125, "step": 147640 }, { "epoch": 0.3616666666666667, "grad_norm": 0.2140302062034607, "learning_rate": 5.996497611406682e-05, "loss": 3.053493690490723, "step": 147650 }, { "epoch": 0.36173333333333335, "grad_norm": 0.22731885313987732, "learning_rate": 5.995417387910277e-05, "loss": 3.0417034149169924, "step": 147660 }, { "epoch": 0.3618, "grad_norm": 0.20007763803005219, "learning_rate": 5.994337116032829e-05, "loss": 3.0178764343261717, "step": 147670 }, { "epoch": 0.36186666666666667, "grad_norm": 0.2353745400905609, "learning_rate": 5.99325679582684e-05, "loss": 3.0403966903686523, "step": 147680 }, { "epoch": 0.36193333333333333, "grad_norm": 0.21362653374671936, "learning_rate": 5.992176427344821e-05, "loss": 3.0889089584350584, "step": 147690 }, { "epoch": 0.362, "grad_norm": 0.2199268341064453, "learning_rate": 5.9910960106392813e-05, "loss": 3.0158102035522463, "step": 147700 }, { "epoch": 0.36206666666666665, "grad_norm": 0.2003437876701355, "learning_rate": 5.9900155457627313e-05, "loss": 3.067193603515625, "step": 147710 }, { "epoch": 0.3621333333333333, "grad_norm": 0.21973755955696106, "learning_rate": 5.988935032767688e-05, "loss": 3.014522171020508, "step": 147720 }, { "epoch": 0.3622, "grad_norm": 0.19737301766872406, "learning_rate": 5.9878544717066665e-05, "loss": 3.0430288314819336, "step": 147730 }, { "epoch": 0.3622666666666667, "grad_norm": 0.2195676863193512, "learning_rate": 5.986773862632188e-05, "loss": 2.9963623046875, "step": 147740 }, { "epoch": 0.36233333333333334, "grad_norm": 0.21269288659095764, "learning_rate": 5.985693205596773e-05, "loss": 3.0837871551513674, "step": 147750 }, { "epoch": 0.3624, "grad_norm": 0.21306376159191132, "learning_rate": 5.984612500652945e-05, "loss": 3.0473657608032227, "step": 147760 }, { "epoch": 0.36246666666666666, "grad_norm": 0.20870031416416168, "learning_rate": 5.983531747853231e-05, "loss": 3.0270305633544923, "step": 147770 }, { "epoch": 0.3625333333333333, "grad_norm": 0.3461677134037018, "learning_rate": 5.982450947250161e-05, "loss": 3.017169189453125, "step": 147780 }, { "epoch": 0.3626, "grad_norm": 0.22816288471221924, "learning_rate": 5.981370098896264e-05, "loss": 3.044194984436035, "step": 147790 }, { "epoch": 0.3626666666666667, "grad_norm": 0.24362348020076752, "learning_rate": 5.980289202844076e-05, "loss": 2.921190643310547, "step": 147800 }, { "epoch": 0.36273333333333335, "grad_norm": 0.9556068181991577, "learning_rate": 5.9792082591461285e-05, "loss": 3.0023996353149416, "step": 147810 }, { "epoch": 0.3628, "grad_norm": 0.2330692708492279, "learning_rate": 5.9781272678549623e-05, "loss": 3.0650632858276365, "step": 147820 }, { "epoch": 0.36286666666666667, "grad_norm": 0.2187574803829193, "learning_rate": 5.9770462290231166e-05, "loss": 3.165844535827637, "step": 147830 }, { "epoch": 0.36293333333333333, "grad_norm": 0.20325803756713867, "learning_rate": 5.975965142703135e-05, "loss": 3.075797271728516, "step": 147840 }, { "epoch": 0.363, "grad_norm": 0.2230132520198822, "learning_rate": 5.974884008947561e-05, "loss": 3.086116409301758, "step": 147850 }, { "epoch": 0.36306666666666665, "grad_norm": 0.23532713949680328, "learning_rate": 5.9738028278089434e-05, "loss": 3.0833974838256837, "step": 147860 }, { "epoch": 0.3631333333333333, "grad_norm": 0.3977736532688141, "learning_rate": 5.97272159933983e-05, "loss": 3.044137191772461, "step": 147870 }, { "epoch": 0.3632, "grad_norm": 0.22163310647010803, "learning_rate": 5.971640323592775e-05, "loss": 3.0790374755859373, "step": 147880 }, { "epoch": 0.3632666666666667, "grad_norm": 0.3001177906990051, "learning_rate": 5.9705590006203296e-05, "loss": 3.024612045288086, "step": 147890 }, { "epoch": 0.36333333333333334, "grad_norm": 0.252520889043808, "learning_rate": 5.969477630475051e-05, "loss": 3.1953447341918944, "step": 147900 }, { "epoch": 0.3634, "grad_norm": 0.20024515688419342, "learning_rate": 5.9683962132094994e-05, "loss": 3.061273765563965, "step": 147910 }, { "epoch": 0.36346666666666666, "grad_norm": 0.23649144172668457, "learning_rate": 5.9673147488762336e-05, "loss": 3.0465938568115236, "step": 147920 }, { "epoch": 0.3635333333333333, "grad_norm": 0.21040071547031403, "learning_rate": 5.96623323752782e-05, "loss": 3.0978418350219727, "step": 147930 }, { "epoch": 0.3636, "grad_norm": 0.22715766727924347, "learning_rate": 5.965151679216819e-05, "loss": 3.0432226181030275, "step": 147940 }, { "epoch": 0.3636666666666667, "grad_norm": 0.22129330039024353, "learning_rate": 5.964070073995804e-05, "loss": 2.975424385070801, "step": 147950 }, { "epoch": 0.36373333333333335, "grad_norm": 0.21552199125289917, "learning_rate": 5.962988421917343e-05, "loss": 3.0629766464233397, "step": 147960 }, { "epoch": 0.3638, "grad_norm": 0.2185899317264557, "learning_rate": 5.961906723034006e-05, "loss": 3.052250099182129, "step": 147970 }, { "epoch": 0.36386666666666667, "grad_norm": 0.2589299976825714, "learning_rate": 5.9608249773983705e-05, "loss": 3.00665168762207, "step": 147980 }, { "epoch": 0.36393333333333333, "grad_norm": 0.20999866724014282, "learning_rate": 5.9597431850630125e-05, "loss": 3.0650285720825194, "step": 147990 }, { "epoch": 0.364, "grad_norm": 0.21707651019096375, "learning_rate": 5.958661346080512e-05, "loss": 3.0213748931884767, "step": 148000 }, { "epoch": 0.36406666666666665, "grad_norm": 0.22311173379421234, "learning_rate": 5.9575794605034486e-05, "loss": 3.0328229904174804, "step": 148010 }, { "epoch": 0.3641333333333333, "grad_norm": 0.19711469113826752, "learning_rate": 5.956497528384407e-05, "loss": 3.0817222595214844, "step": 148020 }, { "epoch": 0.3642, "grad_norm": 0.2042665183544159, "learning_rate": 5.955415549775974e-05, "loss": 3.007679557800293, "step": 148030 }, { "epoch": 0.3642666666666667, "grad_norm": 0.21380282938480377, "learning_rate": 5.954333524730739e-05, "loss": 3.033221435546875, "step": 148040 }, { "epoch": 0.36433333333333334, "grad_norm": 0.22022099792957306, "learning_rate": 5.9532514533012875e-05, "loss": 3.029957962036133, "step": 148050 }, { "epoch": 0.3644, "grad_norm": 0.2534361779689789, "learning_rate": 5.952169335540216e-05, "loss": 3.1073539733886717, "step": 148060 }, { "epoch": 0.36446666666666666, "grad_norm": 0.21759629249572754, "learning_rate": 5.9510871715001206e-05, "loss": 3.0361553192138673, "step": 148070 }, { "epoch": 0.3645333333333333, "grad_norm": 0.20632843673229218, "learning_rate": 5.950004961233595e-05, "loss": 3.0227567672729494, "step": 148080 }, { "epoch": 0.3646, "grad_norm": 0.2893393933773041, "learning_rate": 5.9489227047932416e-05, "loss": 3.106182670593262, "step": 148090 }, { "epoch": 0.36466666666666664, "grad_norm": 0.21856829524040222, "learning_rate": 5.9478404022316615e-05, "loss": 3.033547592163086, "step": 148100 }, { "epoch": 0.36473333333333335, "grad_norm": 0.22888319194316864, "learning_rate": 5.946758053601458e-05, "loss": 2.892223358154297, "step": 148110 }, { "epoch": 0.3648, "grad_norm": 0.23237158358097076, "learning_rate": 5.9456756589552376e-05, "loss": 3.0043087005615234, "step": 148120 }, { "epoch": 0.3648666666666667, "grad_norm": 0.3328093886375427, "learning_rate": 5.944593218345609e-05, "loss": 3.1624250411987305, "step": 148130 }, { "epoch": 0.36493333333333333, "grad_norm": 0.20941944420337677, "learning_rate": 5.943510731825183e-05, "loss": 3.0675632476806642, "step": 148140 }, { "epoch": 0.365, "grad_norm": 0.21729691326618195, "learning_rate": 5.9424281994465714e-05, "loss": 3.043931770324707, "step": 148150 }, { "epoch": 0.36506666666666665, "grad_norm": 0.2893616557121277, "learning_rate": 5.941345621262391e-05, "loss": 3.0315118789672852, "step": 148160 }, { "epoch": 0.3651333333333333, "grad_norm": 0.23890651762485504, "learning_rate": 5.940262997325258e-05, "loss": 3.0272794723510743, "step": 148170 }, { "epoch": 0.3652, "grad_norm": 0.22568295896053314, "learning_rate": 5.9391803276877924e-05, "loss": 3.0329986572265626, "step": 148180 }, { "epoch": 0.3652666666666667, "grad_norm": 0.20997315645217896, "learning_rate": 5.9380976124026156e-05, "loss": 3.045069122314453, "step": 148190 }, { "epoch": 0.36533333333333334, "grad_norm": 0.23450340330600739, "learning_rate": 5.937014851522353e-05, "loss": 3.0133108139038085, "step": 148200 }, { "epoch": 0.3654, "grad_norm": 0.2160722017288208, "learning_rate": 5.9359320450996303e-05, "loss": 3.0374073028564452, "step": 148210 }, { "epoch": 0.36546666666666666, "grad_norm": 0.21258865296840668, "learning_rate": 5.9348491931870756e-05, "loss": 3.0413965225219726, "step": 148220 }, { "epoch": 0.3655333333333333, "grad_norm": 0.20468290150165558, "learning_rate": 5.9337662958373194e-05, "loss": 3.096142578125, "step": 148230 }, { "epoch": 0.3656, "grad_norm": 0.22586670517921448, "learning_rate": 5.9326833531029945e-05, "loss": 3.0109987258911133, "step": 148240 }, { "epoch": 0.36566666666666664, "grad_norm": 0.20486146211624146, "learning_rate": 5.931600365036737e-05, "loss": 3.0174970626831055, "step": 148250 }, { "epoch": 0.36573333333333335, "grad_norm": 0.21383945643901825, "learning_rate": 5.930517331691183e-05, "loss": 2.9980747222900392, "step": 148260 }, { "epoch": 0.3658, "grad_norm": 0.19956208765506744, "learning_rate": 5.929434253118973e-05, "loss": 3.035451889038086, "step": 148270 }, { "epoch": 0.3658666666666667, "grad_norm": 0.4813164472579956, "learning_rate": 5.928351129372749e-05, "loss": 3.1110368728637696, "step": 148280 }, { "epoch": 0.36593333333333333, "grad_norm": 0.20751109719276428, "learning_rate": 5.9272679605051526e-05, "loss": 2.9754533767700195, "step": 148290 }, { "epoch": 0.366, "grad_norm": 0.21517795324325562, "learning_rate": 5.9261847465688324e-05, "loss": 3.0170406341552733, "step": 148300 }, { "epoch": 0.36606666666666665, "grad_norm": 0.21180234849452972, "learning_rate": 5.925101487616436e-05, "loss": 3.099349784851074, "step": 148310 }, { "epoch": 0.3661333333333333, "grad_norm": 0.2017754167318344, "learning_rate": 5.924018183700613e-05, "loss": 3.022739601135254, "step": 148320 }, { "epoch": 0.3662, "grad_norm": 0.22006280720233917, "learning_rate": 5.9229348348740165e-05, "loss": 3.0756149291992188, "step": 148330 }, { "epoch": 0.3662666666666667, "grad_norm": 0.21217437088489532, "learning_rate": 5.9218514411893034e-05, "loss": 3.036087417602539, "step": 148340 }, { "epoch": 0.36633333333333334, "grad_norm": 0.30035290122032166, "learning_rate": 5.9207680026991265e-05, "loss": 3.036271095275879, "step": 148350 }, { "epoch": 0.3664, "grad_norm": 0.20931562781333923, "learning_rate": 5.91968451945615e-05, "loss": 3.0527681350708007, "step": 148360 }, { "epoch": 0.36646666666666666, "grad_norm": 0.2092559039592743, "learning_rate": 5.9186009915130314e-05, "loss": 3.0688270568847655, "step": 148370 }, { "epoch": 0.3665333333333333, "grad_norm": 0.2038225382566452, "learning_rate": 5.917517418922436e-05, "loss": 3.1158000946044924, "step": 148380 }, { "epoch": 0.3666, "grad_norm": 0.21710270643234253, "learning_rate": 5.916433801737028e-05, "loss": 2.972727394104004, "step": 148390 }, { "epoch": 0.36666666666666664, "grad_norm": 0.2203679084777832, "learning_rate": 5.915350140009477e-05, "loss": 3.0882051467895506, "step": 148400 }, { "epoch": 0.36673333333333336, "grad_norm": 0.22664612531661987, "learning_rate": 5.914266433792451e-05, "loss": 3.022066116333008, "step": 148410 }, { "epoch": 0.3668, "grad_norm": 0.2000519186258316, "learning_rate": 5.913182683138625e-05, "loss": 3.0329113006591797, "step": 148420 }, { "epoch": 0.3668666666666667, "grad_norm": 0.20985707640647888, "learning_rate": 5.912098888100672e-05, "loss": 3.043448257446289, "step": 148430 }, { "epoch": 0.36693333333333333, "grad_norm": 0.3119560480117798, "learning_rate": 5.9110150487312676e-05, "loss": 3.0117412567138673, "step": 148440 }, { "epoch": 0.367, "grad_norm": 0.24005846679210663, "learning_rate": 5.9099311650830926e-05, "loss": 2.9977533340454103, "step": 148450 }, { "epoch": 0.36706666666666665, "grad_norm": 0.20461656153202057, "learning_rate": 5.9088472372088264e-05, "loss": 3.0561933517456055, "step": 148460 }, { "epoch": 0.3671333333333333, "grad_norm": 0.19151513278484344, "learning_rate": 5.9077632651611515e-05, "loss": 2.9975263595581056, "step": 148470 }, { "epoch": 0.3672, "grad_norm": 0.21025897562503815, "learning_rate": 5.906679248992755e-05, "loss": 3.0780433654785155, "step": 148480 }, { "epoch": 0.3672666666666667, "grad_norm": 0.21544349193572998, "learning_rate": 5.905595188756321e-05, "loss": 3.0571861267089844, "step": 148490 }, { "epoch": 0.36733333333333335, "grad_norm": 0.21675002574920654, "learning_rate": 5.9045110845045424e-05, "loss": 3.009504699707031, "step": 148500 }, { "epoch": 0.3674, "grad_norm": 0.21508841216564178, "learning_rate": 5.903426936290108e-05, "loss": 2.968495559692383, "step": 148510 }, { "epoch": 0.36746666666666666, "grad_norm": 0.21594266593456268, "learning_rate": 5.902342744165714e-05, "loss": 3.121001434326172, "step": 148520 }, { "epoch": 0.3675333333333333, "grad_norm": 0.24459126591682434, "learning_rate": 5.901258508184056e-05, "loss": 3.0138195037841795, "step": 148530 }, { "epoch": 0.3676, "grad_norm": 0.2030535191297531, "learning_rate": 5.90017422839783e-05, "loss": 3.0289632797241213, "step": 148540 }, { "epoch": 0.36766666666666664, "grad_norm": 0.23285990953445435, "learning_rate": 5.899089904859736e-05, "loss": 3.0377777099609373, "step": 148550 }, { "epoch": 0.36773333333333336, "grad_norm": 0.20432056486606598, "learning_rate": 5.898005537622477e-05, "loss": 3.0071949005126952, "step": 148560 }, { "epoch": 0.3678, "grad_norm": 0.2818230390548706, "learning_rate": 5.8969211267387594e-05, "loss": 3.0974822998046876, "step": 148570 }, { "epoch": 0.3678666666666667, "grad_norm": 0.21039295196533203, "learning_rate": 5.8958366722612855e-05, "loss": 3.011369514465332, "step": 148580 }, { "epoch": 0.36793333333333333, "grad_norm": 0.20855854451656342, "learning_rate": 5.894752174242768e-05, "loss": 3.0938716888427735, "step": 148590 }, { "epoch": 0.368, "grad_norm": 0.23269009590148926, "learning_rate": 5.8936676327359154e-05, "loss": 3.141713523864746, "step": 148600 }, { "epoch": 0.36806666666666665, "grad_norm": 0.2419523149728775, "learning_rate": 5.8925830477934417e-05, "loss": 3.0665382385253905, "step": 148610 }, { "epoch": 0.3681333333333333, "grad_norm": 0.20924724638462067, "learning_rate": 5.89149841946806e-05, "loss": 3.004747009277344, "step": 148620 }, { "epoch": 0.3682, "grad_norm": 0.20317701995372772, "learning_rate": 5.890413747812489e-05, "loss": 3.0151445388793947, "step": 148630 }, { "epoch": 0.3682666666666667, "grad_norm": 0.23327073454856873, "learning_rate": 5.889329032879446e-05, "loss": 3.038560485839844, "step": 148640 }, { "epoch": 0.36833333333333335, "grad_norm": 0.20957084000110626, "learning_rate": 5.888244274721655e-05, "loss": 3.0324064254760743, "step": 148650 }, { "epoch": 0.3684, "grad_norm": 0.19972467422485352, "learning_rate": 5.887159473391837e-05, "loss": 3.048392677307129, "step": 148660 }, { "epoch": 0.36846666666666666, "grad_norm": 0.20658430457115173, "learning_rate": 5.886074628942718e-05, "loss": 3.025222969055176, "step": 148670 }, { "epoch": 0.3685333333333333, "grad_norm": 0.24655090272426605, "learning_rate": 5.884989741427026e-05, "loss": 3.0757518768310548, "step": 148680 }, { "epoch": 0.3686, "grad_norm": 0.24038143455982208, "learning_rate": 5.883904810897492e-05, "loss": 3.032400131225586, "step": 148690 }, { "epoch": 0.36866666666666664, "grad_norm": 0.2437821626663208, "learning_rate": 5.882819837406845e-05, "loss": 2.641427993774414, "step": 148700 }, { "epoch": 0.36873333333333336, "grad_norm": 0.21173441410064697, "learning_rate": 5.8817348210078186e-05, "loss": 2.2928085327148438, "step": 148710 }, { "epoch": 0.3688, "grad_norm": 0.35175564885139465, "learning_rate": 5.880649761753151e-05, "loss": 2.388190269470215, "step": 148720 }, { "epoch": 0.3688666666666667, "grad_norm": 0.24073341488838196, "learning_rate": 5.879564659695579e-05, "loss": 1.9797348022460937, "step": 148730 }, { "epoch": 0.36893333333333334, "grad_norm": 0.2297505885362625, "learning_rate": 5.8784795148878434e-05, "loss": 2.8536596298217773, "step": 148740 }, { "epoch": 0.369, "grad_norm": 0.2094852328300476, "learning_rate": 5.877394327382686e-05, "loss": 3.070954132080078, "step": 148750 }, { "epoch": 0.36906666666666665, "grad_norm": 0.20946626365184784, "learning_rate": 5.876309097232849e-05, "loss": 3.1518875122070313, "step": 148760 }, { "epoch": 0.3691333333333333, "grad_norm": 0.2759895920753479, "learning_rate": 5.875223824491083e-05, "loss": 3.064415168762207, "step": 148770 }, { "epoch": 0.3692, "grad_norm": 0.2041255384683609, "learning_rate": 5.874138509210132e-05, "loss": 3.0220449447631834, "step": 148780 }, { "epoch": 0.3692666666666667, "grad_norm": 0.24146749079227448, "learning_rate": 5.873053151442749e-05, "loss": 3.1220640182495116, "step": 148790 }, { "epoch": 0.36933333333333335, "grad_norm": 0.20629781484603882, "learning_rate": 5.871967751241686e-05, "loss": 3.0705589294433593, "step": 148800 }, { "epoch": 0.3694, "grad_norm": 0.22497080266475677, "learning_rate": 5.8708823086596975e-05, "loss": 3.0399871826171876, "step": 148810 }, { "epoch": 0.36946666666666667, "grad_norm": 0.2521445155143738, "learning_rate": 5.869796823749539e-05, "loss": 3.0763320922851562, "step": 148820 }, { "epoch": 0.3695333333333333, "grad_norm": 0.19885118305683136, "learning_rate": 5.8687112965639714e-05, "loss": 3.075191116333008, "step": 148830 }, { "epoch": 0.3696, "grad_norm": 0.23274463415145874, "learning_rate": 5.867625727155753e-05, "loss": 3.0361358642578127, "step": 148840 }, { "epoch": 0.36966666666666664, "grad_norm": 0.2077152580022812, "learning_rate": 5.8665401155776486e-05, "loss": 3.0423700332641603, "step": 148850 }, { "epoch": 0.36973333333333336, "grad_norm": 0.6224363446235657, "learning_rate": 5.8654544618824225e-05, "loss": 3.065578079223633, "step": 148860 }, { "epoch": 0.3698, "grad_norm": 0.2306647002696991, "learning_rate": 5.8643687661228396e-05, "loss": 3.047718048095703, "step": 148870 }, { "epoch": 0.3698666666666667, "grad_norm": 0.22279994189739227, "learning_rate": 5.8632830283516714e-05, "loss": 3.0429128646850585, "step": 148880 }, { "epoch": 0.36993333333333334, "grad_norm": 0.2465888112783432, "learning_rate": 5.862197248621688e-05, "loss": 3.011934280395508, "step": 148890 }, { "epoch": 0.37, "grad_norm": 0.6442394256591797, "learning_rate": 5.8611114269856617e-05, "loss": 3.2186073303222655, "step": 148900 }, { "epoch": 0.37006666666666665, "grad_norm": 0.23191983997821808, "learning_rate": 5.860025563496367e-05, "loss": 3.172338676452637, "step": 148910 }, { "epoch": 0.3701333333333333, "grad_norm": 0.24338941276073456, "learning_rate": 5.8589396582065836e-05, "loss": 3.0464515686035156, "step": 148920 }, { "epoch": 0.3702, "grad_norm": 0.23036624491214752, "learning_rate": 5.85785371116909e-05, "loss": 3.0753135681152344, "step": 148930 }, { "epoch": 0.3702666666666667, "grad_norm": 0.23169836401939392, "learning_rate": 5.856767722436664e-05, "loss": 3.042465591430664, "step": 148940 }, { "epoch": 0.37033333333333335, "grad_norm": 0.4136826992034912, "learning_rate": 5.855681692062094e-05, "loss": 3.1325368881225586, "step": 148950 }, { "epoch": 0.3704, "grad_norm": 0.22516992688179016, "learning_rate": 5.85459562009816e-05, "loss": 3.0672481536865233, "step": 148960 }, { "epoch": 0.37046666666666667, "grad_norm": 0.25442683696746826, "learning_rate": 5.853509506597652e-05, "loss": 3.1204721450805666, "step": 148970 }, { "epoch": 0.3705333333333333, "grad_norm": 0.19995155930519104, "learning_rate": 5.8524233516133585e-05, "loss": 3.0414281845092774, "step": 148980 }, { "epoch": 0.3706, "grad_norm": 0.2414809614419937, "learning_rate": 5.851337155198071e-05, "loss": 3.1213823318481446, "step": 148990 }, { "epoch": 0.37066666666666664, "grad_norm": 0.20424404740333557, "learning_rate": 5.8502509174045825e-05, "loss": 3.0637643814086912, "step": 149000 }, { "epoch": 0.37073333333333336, "grad_norm": 0.3084029257297516, "learning_rate": 5.84916463828569e-05, "loss": 3.2730873107910154, "step": 149010 }, { "epoch": 0.3708, "grad_norm": 0.2100885510444641, "learning_rate": 5.848078317894188e-05, "loss": 3.0348155975341795, "step": 149020 }, { "epoch": 0.3708666666666667, "grad_norm": 0.21500293910503387, "learning_rate": 5.846991956282877e-05, "loss": 3.0222541809082033, "step": 149030 }, { "epoch": 0.37093333333333334, "grad_norm": 0.20682094991207123, "learning_rate": 5.845905553504558e-05, "loss": 3.0358823776245116, "step": 149040 }, { "epoch": 0.371, "grad_norm": 0.20159895718097687, "learning_rate": 5.844819109612035e-05, "loss": 2.999139976501465, "step": 149050 }, { "epoch": 0.37106666666666666, "grad_norm": 0.21361008286476135, "learning_rate": 5.8437326246581125e-05, "loss": 3.043686294555664, "step": 149060 }, { "epoch": 0.3711333333333333, "grad_norm": 0.21905438601970673, "learning_rate": 5.842646098695599e-05, "loss": 3.0270196914672853, "step": 149070 }, { "epoch": 0.3712, "grad_norm": 0.2116727977991104, "learning_rate": 5.841559531777302e-05, "loss": 3.0232858657836914, "step": 149080 }, { "epoch": 0.3712666666666667, "grad_norm": 0.22814038395881653, "learning_rate": 5.840472923956034e-05, "loss": 3.0567771911621096, "step": 149090 }, { "epoch": 0.37133333333333335, "grad_norm": 0.21293026208877563, "learning_rate": 5.839386275284608e-05, "loss": 3.0908655166625976, "step": 149100 }, { "epoch": 0.3714, "grad_norm": 0.2050376534461975, "learning_rate": 5.8382995858158386e-05, "loss": 3.0081090927124023, "step": 149110 }, { "epoch": 0.37146666666666667, "grad_norm": 0.20627515017986298, "learning_rate": 5.837212855602544e-05, "loss": 2.9890605926513674, "step": 149120 }, { "epoch": 0.3715333333333333, "grad_norm": 0.2237691730260849, "learning_rate": 5.836126084697542e-05, "loss": 3.0674598693847654, "step": 149130 }, { "epoch": 0.3716, "grad_norm": 0.3802986443042755, "learning_rate": 5.835039273153655e-05, "loss": 3.0521366119384767, "step": 149140 }, { "epoch": 0.37166666666666665, "grad_norm": 0.21574454009532928, "learning_rate": 5.833952421023706e-05, "loss": 3.016092300415039, "step": 149150 }, { "epoch": 0.37173333333333336, "grad_norm": 0.21179154515266418, "learning_rate": 5.8328655283605204e-05, "loss": 3.078965759277344, "step": 149160 }, { "epoch": 0.3718, "grad_norm": 0.23722171783447266, "learning_rate": 5.831778595216924e-05, "loss": 3.0166175842285154, "step": 149170 }, { "epoch": 0.3718666666666667, "grad_norm": 0.2338137924671173, "learning_rate": 5.8306916216457473e-05, "loss": 3.098880577087402, "step": 149180 }, { "epoch": 0.37193333333333334, "grad_norm": 0.22421719133853912, "learning_rate": 5.8296046076998213e-05, "loss": 3.045833206176758, "step": 149190 }, { "epoch": 0.372, "grad_norm": 0.22763946652412415, "learning_rate": 5.828517553431977e-05, "loss": 3.053907012939453, "step": 149200 }, { "epoch": 0.37206666666666666, "grad_norm": 0.24432598054409027, "learning_rate": 5.8274304588950515e-05, "loss": 3.015933036804199, "step": 149210 }, { "epoch": 0.3721333333333333, "grad_norm": 0.22479958832263947, "learning_rate": 5.826343324141881e-05, "loss": 3.0169757843017577, "step": 149220 }, { "epoch": 0.3722, "grad_norm": 0.20588479936122894, "learning_rate": 5.825256149225303e-05, "loss": 3.0286970138549805, "step": 149230 }, { "epoch": 0.3722666666666667, "grad_norm": 0.23667727410793304, "learning_rate": 5.824168934198161e-05, "loss": 3.0423301696777343, "step": 149240 }, { "epoch": 0.37233333333333335, "grad_norm": 0.21750719845294952, "learning_rate": 5.823081679113297e-05, "loss": 2.9773277282714843, "step": 149250 }, { "epoch": 0.3724, "grad_norm": 0.22019992768764496, "learning_rate": 5.8219943840235534e-05, "loss": 3.0367332458496095, "step": 149260 }, { "epoch": 0.37246666666666667, "grad_norm": 0.23138241469860077, "learning_rate": 5.82090704898178e-05, "loss": 3.112187957763672, "step": 149270 }, { "epoch": 0.3725333333333333, "grad_norm": 0.22728726267814636, "learning_rate": 5.819819674040823e-05, "loss": 3.0049631118774416, "step": 149280 }, { "epoch": 0.3726, "grad_norm": 0.4766903817653656, "learning_rate": 5.818732259253533e-05, "loss": 3.0776142120361327, "step": 149290 }, { "epoch": 0.37266666666666665, "grad_norm": 0.22389526665210724, "learning_rate": 5.8176448046727635e-05, "loss": 3.2678268432617186, "step": 149300 }, { "epoch": 0.37273333333333336, "grad_norm": 0.20370084047317505, "learning_rate": 5.816557310351369e-05, "loss": 3.018631172180176, "step": 149310 }, { "epoch": 0.3728, "grad_norm": 0.2226530909538269, "learning_rate": 5.815469776342206e-05, "loss": 3.094321441650391, "step": 149320 }, { "epoch": 0.3728666666666667, "grad_norm": 0.22466179728507996, "learning_rate": 5.814382202698133e-05, "loss": 3.0338584899902346, "step": 149330 }, { "epoch": 0.37293333333333334, "grad_norm": 0.20812709629535675, "learning_rate": 5.813294589472009e-05, "loss": 3.0615158081054688, "step": 149340 }, { "epoch": 0.373, "grad_norm": 0.22262994945049286, "learning_rate": 5.8122069367166955e-05, "loss": 3.0815860748291017, "step": 149350 }, { "epoch": 0.37306666666666666, "grad_norm": 0.2765413522720337, "learning_rate": 5.811119244485058e-05, "loss": 3.0533868789672853, "step": 149360 }, { "epoch": 0.3731333333333333, "grad_norm": 0.22458383440971375, "learning_rate": 5.810031512829963e-05, "loss": 3.0247398376464845, "step": 149370 }, { "epoch": 0.3732, "grad_norm": 0.2135460525751114, "learning_rate": 5.808943741804276e-05, "loss": 3.008017730712891, "step": 149380 }, { "epoch": 0.3732666666666667, "grad_norm": 0.23484919965267181, "learning_rate": 5.807855931460868e-05, "loss": 3.0997220993041994, "step": 149390 }, { "epoch": 0.37333333333333335, "grad_norm": 0.26615190505981445, "learning_rate": 5.8067680818526126e-05, "loss": 3.0054805755615233, "step": 149400 }, { "epoch": 0.3734, "grad_norm": 0.4713717997074127, "learning_rate": 5.805680193032381e-05, "loss": 3.1708208084106446, "step": 149410 }, { "epoch": 0.37346666666666667, "grad_norm": 0.21897174417972565, "learning_rate": 5.804592265053049e-05, "loss": 3.002455139160156, "step": 149420 }, { "epoch": 0.37353333333333333, "grad_norm": 0.22881749272346497, "learning_rate": 5.803504297967495e-05, "loss": 3.0312665939331054, "step": 149430 }, { "epoch": 0.3736, "grad_norm": 0.21878927946090698, "learning_rate": 5.802416291828597e-05, "loss": 3.014921188354492, "step": 149440 }, { "epoch": 0.37366666666666665, "grad_norm": 0.23970356583595276, "learning_rate": 5.801328246689237e-05, "loss": 3.0246692657470704, "step": 149450 }, { "epoch": 0.3737333333333333, "grad_norm": 0.2868172526359558, "learning_rate": 5.8002401626022985e-05, "loss": 3.069691467285156, "step": 149460 }, { "epoch": 0.3738, "grad_norm": 0.256011426448822, "learning_rate": 5.799152039620666e-05, "loss": 3.0530242919921875, "step": 149470 }, { "epoch": 0.3738666666666667, "grad_norm": 0.203923299908638, "learning_rate": 5.798063877797225e-05, "loss": 2.993233871459961, "step": 149480 }, { "epoch": 0.37393333333333334, "grad_norm": 0.22051315009593964, "learning_rate": 5.796975677184867e-05, "loss": 3.141311454772949, "step": 149490 }, { "epoch": 0.374, "grad_norm": 0.20907463133335114, "learning_rate": 5.7958874378364814e-05, "loss": 3.0641805648803713, "step": 149500 }, { "epoch": 0.37406666666666666, "grad_norm": 0.22136227786540985, "learning_rate": 5.79479915980496e-05, "loss": 3.0473520278930666, "step": 149510 }, { "epoch": 0.3741333333333333, "grad_norm": 0.42857232689857483, "learning_rate": 5.7937108431431976e-05, "loss": 3.0450071334838866, "step": 149520 }, { "epoch": 0.3742, "grad_norm": 0.3161044716835022, "learning_rate": 5.792622487904091e-05, "loss": 3.069741058349609, "step": 149530 }, { "epoch": 0.3742666666666667, "grad_norm": 0.24301062524318695, "learning_rate": 5.7915340941405394e-05, "loss": 3.0131814956665037, "step": 149540 }, { "epoch": 0.37433333333333335, "grad_norm": 0.2593287527561188, "learning_rate": 5.79044566190544e-05, "loss": 3.0588537216186524, "step": 149550 }, { "epoch": 0.3744, "grad_norm": 0.29706278443336487, "learning_rate": 5.7893571912516974e-05, "loss": 3.0124908447265626, "step": 149560 }, { "epoch": 0.37446666666666667, "grad_norm": 0.2136470377445221, "learning_rate": 5.788268682232215e-05, "loss": 3.059620475769043, "step": 149570 }, { "epoch": 0.37453333333333333, "grad_norm": 0.23700794577598572, "learning_rate": 5.787180134899897e-05, "loss": 2.9979841232299806, "step": 149580 }, { "epoch": 0.3746, "grad_norm": 0.2106659710407257, "learning_rate": 5.786091549307651e-05, "loss": 3.026190757751465, "step": 149590 }, { "epoch": 0.37466666666666665, "grad_norm": 0.23901018500328064, "learning_rate": 5.785002925508387e-05, "loss": 3.0139066696166994, "step": 149600 }, { "epoch": 0.3747333333333333, "grad_norm": 0.8736511468887329, "learning_rate": 5.783914263555017e-05, "loss": 3.0240671157836916, "step": 149610 }, { "epoch": 0.3748, "grad_norm": 0.22861970961093903, "learning_rate": 5.7828255635004534e-05, "loss": 3.1316781997680665, "step": 149620 }, { "epoch": 0.3748666666666667, "grad_norm": 0.2270156592130661, "learning_rate": 5.78173682539761e-05, "loss": 3.0802080154418947, "step": 149630 }, { "epoch": 0.37493333333333334, "grad_norm": 0.21904754638671875, "learning_rate": 5.780648049299406e-05, "loss": 3.1109426498413084, "step": 149640 }, { "epoch": 0.375, "grad_norm": 0.2901405394077301, "learning_rate": 5.779559235258758e-05, "loss": 3.054947090148926, "step": 149650 }, { "epoch": 0.37506666666666666, "grad_norm": 0.3586965799331665, "learning_rate": 5.778470383328588e-05, "loss": 3.0834070205688477, "step": 149660 }, { "epoch": 0.3751333333333333, "grad_norm": 0.2032257318496704, "learning_rate": 5.777381493561818e-05, "loss": 3.022391128540039, "step": 149670 }, { "epoch": 0.3752, "grad_norm": 0.23431552946567535, "learning_rate": 5.77629256601137e-05, "loss": 3.1774877548217773, "step": 149680 }, { "epoch": 0.3752666666666667, "grad_norm": 0.22486165165901184, "learning_rate": 5.7752036007301726e-05, "loss": 3.000980567932129, "step": 149690 }, { "epoch": 0.37533333333333335, "grad_norm": 0.23745808005332947, "learning_rate": 5.774114597771152e-05, "loss": 3.0328886032104494, "step": 149700 }, { "epoch": 0.3754, "grad_norm": 0.29480454325675964, "learning_rate": 5.7730255571872386e-05, "loss": 2.8332260131835936, "step": 149710 }, { "epoch": 0.37546666666666667, "grad_norm": 0.209975466132164, "learning_rate": 5.771936479031363e-05, "loss": 3.3119503021240235, "step": 149720 }, { "epoch": 0.37553333333333333, "grad_norm": 0.3741937279701233, "learning_rate": 5.770847363356461e-05, "loss": 3.2186569213867187, "step": 149730 }, { "epoch": 0.3756, "grad_norm": 0.268950492143631, "learning_rate": 5.769758210215466e-05, "loss": 3.0318233489990236, "step": 149740 }, { "epoch": 0.37566666666666665, "grad_norm": 0.2230360209941864, "learning_rate": 5.768669019661315e-05, "loss": 3.0113605499267577, "step": 149750 }, { "epoch": 0.3757333333333333, "grad_norm": 0.19972044229507446, "learning_rate": 5.7675797917469455e-05, "loss": 3.070932960510254, "step": 149760 }, { "epoch": 0.3758, "grad_norm": 0.2837403416633606, "learning_rate": 5.7664905265253e-05, "loss": 3.0494150161743163, "step": 149770 }, { "epoch": 0.3758666666666667, "grad_norm": 0.23756776750087738, "learning_rate": 5.765401224049319e-05, "loss": 3.0697465896606446, "step": 149780 }, { "epoch": 0.37593333333333334, "grad_norm": 0.21732982993125916, "learning_rate": 5.76431188437195e-05, "loss": 3.017709732055664, "step": 149790 }, { "epoch": 0.376, "grad_norm": 0.2053588181734085, "learning_rate": 5.7632225075461354e-05, "loss": 2.972257614135742, "step": 149800 }, { "epoch": 0.37606666666666666, "grad_norm": 0.5916385054588318, "learning_rate": 5.762133093624826e-05, "loss": 3.0669439315795897, "step": 149810 }, { "epoch": 0.3761333333333333, "grad_norm": 0.2583186626434326, "learning_rate": 5.76104364266097e-05, "loss": 3.0221818923950194, "step": 149820 }, { "epoch": 0.3762, "grad_norm": 0.22499816119670868, "learning_rate": 5.7599541547075184e-05, "loss": 3.026020812988281, "step": 149830 }, { "epoch": 0.3762666666666667, "grad_norm": 0.9135274887084961, "learning_rate": 5.758864629817425e-05, "loss": 3.1537405014038087, "step": 149840 }, { "epoch": 0.37633333333333335, "grad_norm": 0.6330942511558533, "learning_rate": 5.757775068043645e-05, "loss": 3.1689174652099608, "step": 149850 }, { "epoch": 0.3764, "grad_norm": 0.23326243460178375, "learning_rate": 5.756685469439135e-05, "loss": 3.1568330764770507, "step": 149860 }, { "epoch": 0.37646666666666667, "grad_norm": 0.23684240877628326, "learning_rate": 5.755595834056853e-05, "loss": 3.0528120040893554, "step": 149870 }, { "epoch": 0.37653333333333333, "grad_norm": 0.24838267266750336, "learning_rate": 5.7545061619497596e-05, "loss": 3.032523345947266, "step": 149880 }, { "epoch": 0.3766, "grad_norm": 0.22662831842899323, "learning_rate": 5.753416453170819e-05, "loss": 3.032366943359375, "step": 149890 }, { "epoch": 0.37666666666666665, "grad_norm": 0.25278371572494507, "learning_rate": 5.7523267077729925e-05, "loss": 3.11844539642334, "step": 149900 }, { "epoch": 0.3767333333333333, "grad_norm": 0.22413523495197296, "learning_rate": 5.751236925809247e-05, "loss": 3.2397533416748048, "step": 149910 }, { "epoch": 0.3768, "grad_norm": 0.20588405430316925, "learning_rate": 5.75014710733255e-05, "loss": 3.0597219467163086, "step": 149920 }, { "epoch": 0.3768666666666667, "grad_norm": 0.22342941164970398, "learning_rate": 5.7490572523958696e-05, "loss": 3.0473508834838867, "step": 149930 }, { "epoch": 0.37693333333333334, "grad_norm": 0.22142717242240906, "learning_rate": 5.747967361052179e-05, "loss": 3.034083938598633, "step": 149940 }, { "epoch": 0.377, "grad_norm": 0.28968706727027893, "learning_rate": 5.7468774333544495e-05, "loss": 3.0370914459228517, "step": 149950 }, { "epoch": 0.37706666666666666, "grad_norm": 0.21531942486763, "learning_rate": 5.745787469355657e-05, "loss": 3.0610857009887695, "step": 149960 }, { "epoch": 0.3771333333333333, "grad_norm": 0.2010510116815567, "learning_rate": 5.7446974691087754e-05, "loss": 3.046231269836426, "step": 149970 }, { "epoch": 0.3772, "grad_norm": 0.44266095757484436, "learning_rate": 5.7436074326667864e-05, "loss": 3.073024940490723, "step": 149980 }, { "epoch": 0.37726666666666664, "grad_norm": 0.21715234220027924, "learning_rate": 5.742517360082667e-05, "loss": 3.0857959747314454, "step": 149990 }, { "epoch": 0.37733333333333335, "grad_norm": 0.2350039780139923, "learning_rate": 5.7414272514094e-05, "loss": 3.013338088989258, "step": 150000 } ], "logging_steps": 10, "max_steps": 150000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.0102047604736e+19, "train_batch_size": 20, "trial_name": null, "trial_params": null }