[ { "loss": 1.5673105239868164, "grad_norm": 17.69691276550293, "learning_rate": 1.8e-05, "epoch": 0.0003215744284014535, "step": 10 }, { "loss": 0.3935260772705078, "grad_norm": 4.892251491546631, "learning_rate": 3.8e-05, "epoch": 0.000643148856802907, "step": 20 }, { "loss": 0.28224611282348633, "grad_norm": 3.7167558670043945, "learning_rate": 5.8e-05, "epoch": 0.0009647232852043606, "step": 30 }, { "loss": 0.21302554607391358, "grad_norm": 2.2682483196258545, "learning_rate": 7.800000000000001e-05, "epoch": 0.001286297713605814, "step": 40 }, { "loss": 0.19408349990844725, "grad_norm": 2.2181177139282227, "learning_rate": 9.8e-05, "epoch": 0.0016078721420072675, "step": 50 }, { "loss": 0.21122798919677735, "grad_norm": 1.552700400352478, "learning_rate": 0.000118, "epoch": 0.0019294465704087212, "step": 60 }, { "loss": 0.17427901029586793, "grad_norm": 1.7993711233139038, "learning_rate": 0.000138, "epoch": 0.0022510209988101746, "step": 70 }, { "loss": 0.18862361907958985, "grad_norm": 1.6078184843063354, "learning_rate": 0.00015800000000000002, "epoch": 0.002572595427211628, "step": 80 }, { "loss": 0.15153205394744873, "grad_norm": 3.307420492172241, "learning_rate": 0.00017800000000000002, "epoch": 0.0028941698556130815, "step": 90 }, { "loss": 0.19316819906234742, "grad_norm": 1.4174200296401978, "learning_rate": 0.00019800000000000002, "epoch": 0.003215744284014535, "step": 100 }, { "loss": 0.18397116661071777, "grad_norm": 2.051401138305664, "learning_rate": 0.00019999995839788484, "epoch": 0.003537318712415989, "step": 110 }, { "loss": 0.15460578203201295, "grad_norm": 1.2434544563293457, "learning_rate": 0.00019999981458814847, "epoch": 0.0038588931408174423, "step": 120 }, { "loss": 0.16346523761749268, "grad_norm": 2.292675256729126, "learning_rate": 0.00019999956805733223, "epoch": 0.004180467569218895, "step": 130 }, { "loss": 0.1633118748664856, "grad_norm": 1.3027211427688599, "learning_rate": 0.00019999921880568935, "epoch": 0.004502041997620349, "step": 140 }, { "loss": 0.14462895393371583, "grad_norm": 1.0612585544586182, "learning_rate": 0.00019999876683357855, "epoch": 0.004823616426021803, "step": 150 }, { "loss": 0.16672265529632568, "grad_norm": 1.4711265563964844, "learning_rate": 0.00019999821214146418, "epoch": 0.005145190854423256, "step": 160 }, { "loss": 0.1713968276977539, "grad_norm": 0.9445065259933472, "learning_rate": 0.00019999755472991594, "epoch": 0.00546676528282471, "step": 170 }, { "loss": 0.15581530332565308, "grad_norm": 1.4779735803604126, "learning_rate": 0.0001999967945996092, "epoch": 0.005788339711226163, "step": 180 }, { "loss": 0.15593582391738892, "grad_norm": 1.2822918891906738, "learning_rate": 0.00019999593175132476, "epoch": 0.006109914139627617, "step": 190 }, { "loss": 0.19365686178207397, "grad_norm": 1.0924097299575806, "learning_rate": 0.00019999496618594893, "epoch": 0.00643148856802907, "step": 200 }, { "loss": 0.15486612319946289, "grad_norm": 0.7953126430511475, "learning_rate": 0.00019999389790447355, "epoch": 0.006753062996430524, "step": 210 }, { "loss": 0.16763465404510497, "grad_norm": 1.3703619241714478, "learning_rate": 0.00019999272690799603, "epoch": 0.007074637424831978, "step": 220 }, { "loss": 0.15374833345413208, "grad_norm": 0.6760343909263611, "learning_rate": 0.00019999145319771913, "epoch": 0.007396211853233431, "step": 230 }, { "loss": 0.17234739065170288, "grad_norm": 1.6318228244781494, "learning_rate": 0.00019999007677495127, "epoch": 0.007717786281634885, "step": 240 }, { "loss": 0.1620724081993103, "grad_norm": 0.681673526763916, "learning_rate": 0.00019998859764110637, "epoch": 0.008039360710036338, "step": 250 }, { "loss": 0.16377369165420533, "grad_norm": 1.023579716682434, "learning_rate": 0.00019998701579770377, "epoch": 0.00836093513843779, "step": 260 }, { "loss": 0.15195246934890747, "grad_norm": 1.2718433141708374, "learning_rate": 0.00019998533124636838, "epoch": 0.008682509566839245, "step": 270 }, { "loss": 0.19328283071517943, "grad_norm": 1.8994773626327515, "learning_rate": 0.00019998354398883056, "epoch": 0.009004083995240698, "step": 280 }, { "loss": 0.13401613235473633, "grad_norm": 0.902221143245697, "learning_rate": 0.00019998165402692624, "epoch": 0.009325658423642151, "step": 290 }, { "loss": 0.17590770721435547, "grad_norm": 0.6986466646194458, "learning_rate": 0.0001999796613625968, "epoch": 0.009647232852043606, "step": 300 }, { "loss": 0.15391089916229247, "grad_norm": 1.071215033531189, "learning_rate": 0.00019997756599788913, "epoch": 0.00996880728044506, "step": 310 }, { "loss": 0.156918466091156, "grad_norm": 0.8470816612243652, "learning_rate": 0.0001999753679349556, "epoch": 0.010290381708846512, "step": 320 }, { "loss": 0.1681678533554077, "grad_norm": 1.6104198694229126, "learning_rate": 0.00019997306717605413, "epoch": 0.010611956137247965, "step": 330 }, { "loss": 0.13588624000549315, "grad_norm": 0.7630122900009155, "learning_rate": 0.00019997066372354804, "epoch": 0.01093353056564942, "step": 340 }, { "loss": 0.16677809953689576, "grad_norm": 1.205472707748413, "learning_rate": 0.0001999681575799062, "epoch": 0.011255104994050873, "step": 350 }, { "loss": 0.12492926120758056, "grad_norm": 0.7292776107788086, "learning_rate": 0.000199965548747703, "epoch": 0.011576679422452326, "step": 360 }, { "loss": 0.14817183017730712, "grad_norm": 0.6910330653190613, "learning_rate": 0.0001999628372296182, "epoch": 0.01189825385085378, "step": 370 }, { "loss": 0.15440335273742675, "grad_norm": 1.0198880434036255, "learning_rate": 0.00019996002302843712, "epoch": 0.012219828279255234, "step": 380 }, { "loss": 0.1331814169883728, "grad_norm": 0.7604308128356934, "learning_rate": 0.00019995710614705058, "epoch": 0.012541402707656687, "step": 390 }, { "loss": 0.10600050687789916, "grad_norm": 0.8126104474067688, "learning_rate": 0.00019995408658845482, "epoch": 0.01286297713605814, "step": 400 }, { "loss": 0.14562102556228637, "grad_norm": 0.6844388842582703, "learning_rate": 0.00019995096435575157, "epoch": 0.013184551564459595, "step": 410 }, { "loss": 0.12237757444381714, "grad_norm": 0.8213316798210144, "learning_rate": 0.00019994773945214797, "epoch": 0.013506125992861048, "step": 420 }, { "loss": 0.16479880809783937, "grad_norm": 1.0815225839614868, "learning_rate": 0.00019994441188095677, "epoch": 0.0138277004212625, "step": 430 }, { "loss": 0.14453794956207275, "grad_norm": 1.1022553443908691, "learning_rate": 0.00019994098164559601, "epoch": 0.014149274849663955, "step": 440 }, { "loss": 0.15858374834060668, "grad_norm": 0.820088267326355, "learning_rate": 0.00019993744874958937, "epoch": 0.014470849278065408, "step": 450 }, { "loss": 0.12721436023712157, "grad_norm": 1.0894126892089844, "learning_rate": 0.00019993381319656582, "epoch": 0.014792423706466861, "step": 460 }, { "loss": 0.15983102321624756, "grad_norm": 1.233209252357483, "learning_rate": 0.00019993007499025984, "epoch": 0.015113998134868314, "step": 470 }, { "loss": 0.12201125621795654, "grad_norm": 0.6897937655448914, "learning_rate": 0.0001999262341345114, "epoch": 0.01543557256326977, "step": 480 }, { "loss": 0.13091821670532228, "grad_norm": 1.1130845546722412, "learning_rate": 0.00019992229063326583, "epoch": 0.015757146991671224, "step": 490 }, { "loss": 0.11298969984054566, "grad_norm": 1.0091015100479126, "learning_rate": 0.000199918244490574, "epoch": 0.016078721420072675, "step": 500 }, { "loss": 0.133112633228302, "grad_norm": 0.7309413552284241, "learning_rate": 0.0001999140957105921, "epoch": 0.01640029584847413, "step": 510 }, { "loss": 0.13967225551605225, "grad_norm": 0.8078439235687256, "learning_rate": 0.00019990984429758187, "epoch": 0.01672187027687558, "step": 520 }, { "loss": 0.15573676824569702, "grad_norm": 0.8659370541572571, "learning_rate": 0.00019990549025591036, "epoch": 0.017043444705277036, "step": 530 }, { "loss": 0.1315290927886963, "grad_norm": 0.9505926966667175, "learning_rate": 0.00019990103359005014, "epoch": 0.01736501913367849, "step": 540 }, { "loss": 0.12422749996185303, "grad_norm": 0.8183798789978027, "learning_rate": 0.00019989647430457912, "epoch": 0.017686593562079942, "step": 550 }, { "loss": 0.14997694492340088, "grad_norm": 1.2214590311050415, "learning_rate": 0.00019989181240418069, "epoch": 0.018008167990481397, "step": 560 }, { "loss": 0.13226089477539063, "grad_norm": 0.6610310673713684, "learning_rate": 0.0001998870478936436, "epoch": 0.01832974241888285, "step": 570 }, { "loss": 0.13494133949279785, "grad_norm": 0.7559733986854553, "learning_rate": 0.000199882180777862, "epoch": 0.018651316847284303, "step": 580 }, { "loss": 0.13185596466064453, "grad_norm": 0.9044352173805237, "learning_rate": 0.00019987721106183546, "epoch": 0.018972891275685758, "step": 590 }, { "loss": 0.14558923244476318, "grad_norm": 0.9192641377449036, "learning_rate": 0.00019987213875066894, "epoch": 0.019294465704087212, "step": 600 }, { "loss": 0.1581730842590332, "grad_norm": 0.5637726187705994, "learning_rate": 0.0001998669638495728, "epoch": 0.019616040132488664, "step": 610 }, { "loss": 0.11631412506103515, "grad_norm": 0.7248178124427795, "learning_rate": 0.0001998616863638627, "epoch": 0.01993761456089012, "step": 620 }, { "loss": 0.13501194715499878, "grad_norm": 1.1470476388931274, "learning_rate": 0.00019985630629895985, "epoch": 0.020259188989291573, "step": 630 }, { "loss": 0.14213244915008544, "grad_norm": 1.4807144403457642, "learning_rate": 0.0001998508236603906, "epoch": 0.020580763417693024, "step": 640 }, { "loss": 0.14146710634231568, "grad_norm": 0.8103519678115845, "learning_rate": 0.0001998452384537869, "epoch": 0.02090233784609448, "step": 650 }, { "loss": 0.1671810507774353, "grad_norm": 1.4063045978546143, "learning_rate": 0.00019983955068488584, "epoch": 0.02122391227449593, "step": 660 }, { "loss": 0.1154186487197876, "grad_norm": 0.6153373122215271, "learning_rate": 0.00019983376035953004, "epoch": 0.021545486702897385, "step": 670 }, { "loss": 0.13083285093307495, "grad_norm": 0.7791343927383423, "learning_rate": 0.0001998278674836674, "epoch": 0.02186706113129884, "step": 680 }, { "loss": 0.09962020516395569, "grad_norm": 0.7736439108848572, "learning_rate": 0.00019982187206335107, "epoch": 0.02218863555970029, "step": 690 }, { "loss": 0.12725504636764526, "grad_norm": 0.6886352300643921, "learning_rate": 0.00019981577410473972, "epoch": 0.022510209988101746, "step": 700 }, { "loss": 0.13899757862091064, "grad_norm": 0.837552011013031, "learning_rate": 0.0001998095736140972, "epoch": 0.0228317844165032, "step": 710 }, { "loss": 0.15298879146575928, "grad_norm": 0.9249375462532043, "learning_rate": 0.00019980327059779276, "epoch": 0.023153358844904652, "step": 720 }, { "loss": 0.10059181451797486, "grad_norm": 0.5768576264381409, "learning_rate": 0.00019979686506230092, "epoch": 0.023474933273306107, "step": 730 }, { "loss": 0.13777350187301635, "grad_norm": 1.2064447402954102, "learning_rate": 0.0001997903570142015, "epoch": 0.02379650770170756, "step": 740 }, { "loss": 0.1392066478729248, "grad_norm": 0.5575463771820068, "learning_rate": 0.00019978374646017973, "epoch": 0.024118082130109013, "step": 750 }, { "loss": 0.14810303449630738, "grad_norm": 0.7672901153564453, "learning_rate": 0.00019977703340702595, "epoch": 0.024439656558510468, "step": 760 }, { "loss": 0.150481915473938, "grad_norm": 0.8861374258995056, "learning_rate": 0.00019977021786163598, "epoch": 0.024761230986911922, "step": 770 }, { "loss": 0.1382421851158142, "grad_norm": 0.7194581627845764, "learning_rate": 0.00019976329983101084, "epoch": 0.025082805415313374, "step": 780 }, { "loss": 0.12950103282928466, "grad_norm": 1.3259282112121582, "learning_rate": 0.00019975627932225675, "epoch": 0.02540437984371483, "step": 790 }, { "loss": 0.11794104576110839, "grad_norm": 0.7138015031814575, "learning_rate": 0.0001997491563425853, "epoch": 0.02572595427211628, "step": 800 }, { "loss": 0.11722338199615479, "grad_norm": 0.9002670049667358, "learning_rate": 0.00019974193089931328, "epoch": 0.026047528700517734, "step": 810 }, { "loss": 0.1378617525100708, "grad_norm": 0.7109375596046448, "learning_rate": 0.0001997346029998628, "epoch": 0.02636910312891919, "step": 820 }, { "loss": 0.1343904733657837, "grad_norm": 0.6565284729003906, "learning_rate": 0.00019972717265176115, "epoch": 0.02669067755732064, "step": 830 }, { "loss": 0.11249516010284424, "grad_norm": 0.5646419525146484, "learning_rate": 0.0001997196398626409, "epoch": 0.027012251985722095, "step": 840 }, { "loss": 0.11767094135284424, "grad_norm": 0.6782432198524475, "learning_rate": 0.00019971200464023977, "epoch": 0.02733382641412355, "step": 850 }, { "loss": 0.11552731990814209, "grad_norm": 0.748511016368866, "learning_rate": 0.00019970426699240083, "epoch": 0.027655400842525, "step": 860 }, { "loss": 0.12385071516036987, "grad_norm": 0.6928240060806274, "learning_rate": 0.00019969642692707225, "epoch": 0.027976975270926456, "step": 870 }, { "loss": 0.13992183208465575, "grad_norm": 1.11572265625, "learning_rate": 0.00019968848445230743, "epoch": 0.02829854969932791, "step": 880 }, { "loss": 0.15401984453201295, "grad_norm": 0.6393543481826782, "learning_rate": 0.00019968043957626502, "epoch": 0.028620124127729362, "step": 890 }, { "loss": 0.12747691869735717, "grad_norm": 0.7813221216201782, "learning_rate": 0.00019967229230720881, "epoch": 0.028941698556130817, "step": 900 }, { "loss": 0.12719355821609496, "grad_norm": 0.7403981685638428, "learning_rate": 0.00019966404265350774, "epoch": 0.02926327298453227, "step": 910 }, { "loss": 0.10643273591995239, "grad_norm": 0.48086461424827576, "learning_rate": 0.000199655690623636, "epoch": 0.029584847412933723, "step": 920 }, { "loss": 0.12535429000854492, "grad_norm": 1.4569687843322754, "learning_rate": 0.0001996472362261729, "epoch": 0.029906421841335178, "step": 930 }, { "loss": 0.11564887762069702, "grad_norm": 0.6724082827568054, "learning_rate": 0.00019963867946980285, "epoch": 0.03022799626973663, "step": 940 }, { "loss": 0.12263286113739014, "grad_norm": 0.8747388124465942, "learning_rate": 0.00019963002036331554, "epoch": 0.030549570698138084, "step": 950 }, { "loss": 0.11707538366317749, "grad_norm": 1.0104649066925049, "learning_rate": 0.00019962125891560568, "epoch": 0.03087114512653954, "step": 960 }, { "loss": 0.12445554733276368, "grad_norm": 0.7107091546058655, "learning_rate": 0.00019961239513567312, "epoch": 0.03119271955494099, "step": 970 }, { "loss": 0.10379760265350342, "grad_norm": 0.5619798898696899, "learning_rate": 0.00019960342903262284, "epoch": 0.03151429398334245, "step": 980 }, { "loss": 0.12779237031936647, "grad_norm": 0.5871363878250122, "learning_rate": 0.000199594360615665, "epoch": 0.0318358684117439, "step": 990 }, { "loss": 0.12199541330337524, "grad_norm": 0.6088923215866089, "learning_rate": 0.0001995851898941147, "epoch": 0.03215744284014535, "step": 1000 }, { "loss": 0.13736236095428467, "grad_norm": 0.8911682963371277, "learning_rate": 0.00019957591687739233, "epoch": 0.0324790172685468, "step": 1010 }, { "loss": 0.11516963243484497, "grad_norm": 0.6520726680755615, "learning_rate": 0.00019956654157502314, "epoch": 0.03280059169694826, "step": 1020 }, { "loss": 0.14110876321792604, "grad_norm": 0.6925550103187561, "learning_rate": 0.00019955706399663766, "epoch": 0.03312216612534971, "step": 1030 }, { "loss": 0.14986019134521483, "grad_norm": 0.6888828873634338, "learning_rate": 0.00019954748415197134, "epoch": 0.03344374055375116, "step": 1040 }, { "loss": 0.10206960439682007, "grad_norm": 0.5661484599113464, "learning_rate": 0.00019953780205086464, "epoch": 0.03376531498215262, "step": 1050 }, { "loss": 0.11590293645858765, "grad_norm": 0.6978943943977356, "learning_rate": 0.00019952801770326328, "epoch": 0.03408688941055407, "step": 1060 }, { "loss": 0.10875160694122314, "grad_norm": 0.501489520072937, "learning_rate": 0.00019951813111921776, "epoch": 0.03440846383895552, "step": 1070 }, { "loss": 0.07669517397880554, "grad_norm": 0.5798457264900208, "learning_rate": 0.00019950814230888374, "epoch": 0.03473003826735698, "step": 1080 }, { "loss": 0.12092061042785644, "grad_norm": 0.9621400237083435, "learning_rate": 0.00019949805128252187, "epoch": 0.03505161269575843, "step": 1090 }, { "loss": 0.14484325647354127, "grad_norm": 0.6315482258796692, "learning_rate": 0.00019948785805049774, "epoch": 0.035373187124159884, "step": 1100 }, { "loss": 0.14884073734283448, "grad_norm": 0.905430793762207, "learning_rate": 0.000199477562623282, "epoch": 0.03569476155256134, "step": 1110 }, { "loss": 0.13443020582199097, "grad_norm": 0.9212183356285095, "learning_rate": 0.00019946716501145022, "epoch": 0.036016335980962794, "step": 1120 }, { "loss": 0.12805633544921874, "grad_norm": 0.9336081743240356, "learning_rate": 0.00019945666522568298, "epoch": 0.036337910409364245, "step": 1130 }, { "loss": 0.1238585352897644, "grad_norm": 1.1159182786941528, "learning_rate": 0.00019944606327676579, "epoch": 0.0366594848377657, "step": 1140 }, { "loss": 0.11155736446380615, "grad_norm": 0.7781017422676086, "learning_rate": 0.0001994353591755891, "epoch": 0.036981059266167154, "step": 1150 }, { "loss": 0.12316350936889649, "grad_norm": 0.9370765089988708, "learning_rate": 0.0001994245529331483, "epoch": 0.037302633694568606, "step": 1160 }, { "loss": 0.12969765663146973, "grad_norm": 0.8996332883834839, "learning_rate": 0.00019941364456054367, "epoch": 0.037624208122970064, "step": 1170 }, { "loss": 0.1030587911605835, "grad_norm": 1.21480131149292, "learning_rate": 0.0001994026340689805, "epoch": 0.037945782551371515, "step": 1180 }, { "loss": 0.14748505353927613, "grad_norm": 0.8236110806465149, "learning_rate": 0.00019939152146976886, "epoch": 0.038267356979772967, "step": 1190 }, { "loss": 0.15111169815063477, "grad_norm": 0.8031110763549805, "learning_rate": 0.00019938030677432372, "epoch": 0.038588931408174425, "step": 1200 }, { "loss": 0.12816275358200074, "grad_norm": 0.6093372702598572, "learning_rate": 0.00019936898999416505, "epoch": 0.038910505836575876, "step": 1210 }, { "loss": 0.13696140050888062, "grad_norm": 0.9523636102676392, "learning_rate": 0.00019935757114091751, "epoch": 0.03923208026497733, "step": 1220 }, { "loss": 0.10912299156188965, "grad_norm": 0.7931325435638428, "learning_rate": 0.0001993460502263107, "epoch": 0.039553654693378786, "step": 1230 }, { "loss": 0.1422716498374939, "grad_norm": 0.7136402130126953, "learning_rate": 0.0001993344272621791, "epoch": 0.03987522912178024, "step": 1240 }, { "loss": 0.1305735230445862, "grad_norm": 0.7761098146438599, "learning_rate": 0.00019932270226046193, "epoch": 0.04019680355018169, "step": 1250 }, { "loss": 0.13865208625793457, "grad_norm": 0.8858702182769775, "learning_rate": 0.00019931087523320326, "epoch": 0.040518377978583146, "step": 1260 }, { "loss": 0.12371792793273925, "grad_norm": 0.6040947437286377, "learning_rate": 0.00019929894619255195, "epoch": 0.0408399524069846, "step": 1270 }, { "loss": 0.13337819576263427, "grad_norm": 0.9483299851417542, "learning_rate": 0.0001992869151507617, "epoch": 0.04116152683538605, "step": 1280 }, { "loss": 0.1270521402359009, "grad_norm": 0.6090492010116577, "learning_rate": 0.00019927478212019092, "epoch": 0.0414831012637875, "step": 1290 }, { "loss": 0.12199642658233642, "grad_norm": 1.03255033493042, "learning_rate": 0.00019926254711330281, "epoch": 0.04180467569218896, "step": 1300 }, { "loss": 0.12788064479827882, "grad_norm": 1.19109046459198, "learning_rate": 0.00019925021014266536, "epoch": 0.04212625012059041, "step": 1310 }, { "loss": 0.12751251459121704, "grad_norm": 0.9528569579124451, "learning_rate": 0.00019923777122095123, "epoch": 0.04244782454899186, "step": 1320 }, { "loss": 0.12552311420440673, "grad_norm": 0.6547811031341553, "learning_rate": 0.00019922523036093782, "epoch": 0.04276939897739332, "step": 1330 }, { "loss": 0.11283427476882935, "grad_norm": 0.7006787657737732, "learning_rate": 0.00019921258757550734, "epoch": 0.04309097340579477, "step": 1340 }, { "loss": 0.1332577347755432, "grad_norm": 0.7077374458312988, "learning_rate": 0.00019919984287764653, "epoch": 0.04341254783419622, "step": 1350 }, { "loss": 0.12081760168075562, "grad_norm": 0.5344422459602356, "learning_rate": 0.00019918699628044694, "epoch": 0.04373412226259768, "step": 1360 }, { "loss": 0.11974830627441406, "grad_norm": 0.8550666570663452, "learning_rate": 0.00019917404779710478, "epoch": 0.04405569669099913, "step": 1370 }, { "loss": 0.108591628074646, "grad_norm": 0.583908200263977, "learning_rate": 0.00019916099744092088, "epoch": 0.04437727111940058, "step": 1380 }, { "loss": 0.1248102068901062, "grad_norm": 1.138697862625122, "learning_rate": 0.00019914784522530074, "epoch": 0.04469884554780204, "step": 1390 }, { "loss": 0.11697250604629517, "grad_norm": 0.914976954460144, "learning_rate": 0.00019913459116375446, "epoch": 0.04502041997620349, "step": 1400 }, { "loss": 0.10801193714141846, "grad_norm": 0.618378758430481, "learning_rate": 0.0001991212352698968, "epoch": 0.04534199440460494, "step": 1410 }, { "loss": 0.11103023290634155, "grad_norm": 0.9175752401351929, "learning_rate": 0.00019910777755744708, "epoch": 0.0456635688330064, "step": 1420 }, { "loss": 0.11435778141021728, "grad_norm": 0.7195553779602051, "learning_rate": 0.00019909421804022924, "epoch": 0.04598514326140785, "step": 1430 }, { "loss": 0.1225479245185852, "grad_norm": 0.567287266254425, "learning_rate": 0.0001990805567321718, "epoch": 0.046306717689809304, "step": 1440 }, { "loss": 0.13343338966369628, "grad_norm": 1.069581389427185, "learning_rate": 0.00019906679364730786, "epoch": 0.04662829211821076, "step": 1450 }, { "loss": 0.13520500659942628, "grad_norm": 0.8290631175041199, "learning_rate": 0.00019905292879977498, "epoch": 0.046949866546612214, "step": 1460 }, { "loss": 0.10813941955566406, "grad_norm": 0.8197263479232788, "learning_rate": 0.00019903896220381534, "epoch": 0.047271440975013665, "step": 1470 }, { "loss": 0.10801063776016236, "grad_norm": 0.6460339426994324, "learning_rate": 0.00019902489387377557, "epoch": 0.04759301540341512, "step": 1480 }, { "loss": 0.12939759492874145, "grad_norm": 0.958020031452179, "learning_rate": 0.0001990107238241069, "epoch": 0.047914589831816574, "step": 1490 }, { "loss": 0.12598515748977662, "grad_norm": 0.8492549657821655, "learning_rate": 0.00019899645206936494, "epoch": 0.048236164260218026, "step": 1500 }, { "loss": 0.11651817560195923, "grad_norm": 0.5606909394264221, "learning_rate": 0.0001989820786242098, "epoch": 0.048557738688619484, "step": 1510 }, { "loss": 0.11292610168457032, "grad_norm": 0.7311549782752991, "learning_rate": 0.00019896760350340608, "epoch": 0.048879313117020935, "step": 1520 }, { "loss": 0.09686515927314758, "grad_norm": 0.5886459946632385, "learning_rate": 0.00019895302672182287, "epoch": 0.049200887545422387, "step": 1530 }, { "loss": 0.11024473905563355, "grad_norm": 0.8052958250045776, "learning_rate": 0.00019893834829443354, "epoch": 0.049522461973823845, "step": 1540 }, { "loss": 0.11257057189941407, "grad_norm": 0.8030698299407959, "learning_rate": 0.000198923568236316, "epoch": 0.049844036402225296, "step": 1550 }, { "eval_loss": 0.11128632724285126, "eval_runtime": 46.063, "eval_samples_per_second": 109.111, "eval_steps_per_second": 27.289, "epoch": 0.049972666173585874, "step": 1554 }, { "loss": 0.11879251003265381, "grad_norm": 1.0847232341766357, "learning_rate": 0.00019890868656265251, "epoch": 0.05016561083062675, "step": 1560 }, { "loss": 0.09545501470565795, "grad_norm": 0.6696542501449585, "learning_rate": 0.0001988937032887297, "epoch": 0.050487185259028206, "step": 1570 }, { "loss": 0.1338610291481018, "grad_norm": 0.8771032691001892, "learning_rate": 0.00019887861842993857, "epoch": 0.05080875968742966, "step": 1580 }, { "loss": 0.13209749460220338, "grad_norm": 0.6574100255966187, "learning_rate": 0.00019886343200177455, "epoch": 0.05113033411583111, "step": 1590 }, { "loss": 0.10262678861618042, "grad_norm": 0.6385945081710815, "learning_rate": 0.0001988481440198372, "epoch": 0.05145190854423256, "step": 1600 }, { "loss": 0.13754674196243286, "grad_norm": 0.8028077483177185, "learning_rate": 0.0001988327544998307, "epoch": 0.05177348297263402, "step": 1610 }, { "loss": 0.09865680932998658, "grad_norm": 0.8681406378746033, "learning_rate": 0.00019881726345756322, "epoch": 0.05209505740103547, "step": 1620 }, { "loss": 0.118634033203125, "grad_norm": 0.7602818012237549, "learning_rate": 0.0001988016709089474, "epoch": 0.05241663182943692, "step": 1630 }, { "loss": 0.10665820837020874, "grad_norm": 0.5052031874656677, "learning_rate": 0.00019878597687000014, "epoch": 0.05273820625783838, "step": 1640 }, { "loss": 0.13749959468841552, "grad_norm": 0.5683307647705078, "learning_rate": 0.00019877018135684246, "epoch": 0.05305978068623983, "step": 1650 }, { "loss": 0.12095248699188232, "grad_norm": 0.5208232402801514, "learning_rate": 0.00019875428438569982, "epoch": 0.05338135511464128, "step": 1660 }, { "loss": 0.1353804349899292, "grad_norm": 0.7039898633956909, "learning_rate": 0.00019873828597290175, "epoch": 0.05370292954304274, "step": 1670 }, { "loss": 0.10228586196899414, "grad_norm": 0.3282141089439392, "learning_rate": 0.00019872218613488203, "epoch": 0.05402450397144419, "step": 1680 }, { "loss": 0.1056720495223999, "grad_norm": 0.5899987816810608, "learning_rate": 0.00019870598488817858, "epoch": 0.05434607839984564, "step": 1690 }, { "loss": 0.09971145391464234, "grad_norm": 1.2377015352249146, "learning_rate": 0.00019868968224943356, "epoch": 0.0546676528282471, "step": 1700 }, { "loss": 0.10820199251174926, "grad_norm": 0.5962371230125427, "learning_rate": 0.00019867327823539323, "epoch": 0.05498922725664855, "step": 1710 }, { "loss": 0.12856364250183105, "grad_norm": 1.2653589248657227, "learning_rate": 0.00019865677286290802, "epoch": 0.05531080168505, "step": 1720 }, { "loss": 0.12928249835968017, "grad_norm": 1.5054935216903687, "learning_rate": 0.00019864016614893246, "epoch": 0.05563237611345146, "step": 1730 }, { "loss": 0.1322105646133423, "grad_norm": 0.7001881003379822, "learning_rate": 0.00019862345811052517, "epoch": 0.05595395054185291, "step": 1740 }, { "loss": 0.09415720701217652, "grad_norm": 0.3953130841255188, "learning_rate": 0.00019860664876484885, "epoch": 0.05627552497025436, "step": 1750 }, { "loss": 0.11939146518707275, "grad_norm": 0.9448032379150391, "learning_rate": 0.0001985897381291703, "epoch": 0.05659709939865582, "step": 1760 }, { "loss": 0.1330693244934082, "grad_norm": 0.6536581516265869, "learning_rate": 0.00019857272622086033, "epoch": 0.05691867382705727, "step": 1770 }, { "loss": 0.12102277278900146, "grad_norm": 0.746406078338623, "learning_rate": 0.00019855561305739375, "epoch": 0.057240248255458724, "step": 1780 }, { "loss": 0.11163841485977173, "grad_norm": 0.4073915481567383, "learning_rate": 0.00019853839865634944, "epoch": 0.05756182268386018, "step": 1790 }, { "loss": 0.12480851411819457, "grad_norm": 0.9843113422393799, "learning_rate": 0.00019852108303541029, "epoch": 0.057883397112261634, "step": 1800 }, { "loss": 0.10980266332626343, "grad_norm": 0.41751521825790405, "learning_rate": 0.00019850366621236307, "epoch": 0.058204971540663085, "step": 1810 }, { "loss": 0.10909017324447631, "grad_norm": 0.8801973462104797, "learning_rate": 0.00019848614820509858, "epoch": 0.05852654596906454, "step": 1820 }, { "loss": 0.12614597082138063, "grad_norm": 0.8948915004730225, "learning_rate": 0.0001984685290316116, "epoch": 0.058848120397465994, "step": 1830 }, { "loss": 0.12774603366851806, "grad_norm": 0.9197909832000732, "learning_rate": 0.00019845080871000068, "epoch": 0.059169694825867446, "step": 1840 }, { "loss": 0.10084892511367798, "grad_norm": 0.9967162013053894, "learning_rate": 0.0001984329872584684, "epoch": 0.059491269254268904, "step": 1850 }, { "loss": 0.11144490242004394, "grad_norm": 0.5707647800445557, "learning_rate": 0.00019841506469532117, "epoch": 0.059812843682670355, "step": 1860 }, { "loss": 0.10548099279403686, "grad_norm": 0.528365969657898, "learning_rate": 0.00019839704103896927, "epoch": 0.060134418111071807, "step": 1870 }, { "loss": 0.10982582569122315, "grad_norm": 0.33779260516166687, "learning_rate": 0.00019837891630792686, "epoch": 0.06045599253947326, "step": 1880 }, { "loss": 0.11971958875656127, "grad_norm": 0.7719383239746094, "learning_rate": 0.00019836069052081188, "epoch": 0.060777566967874716, "step": 1890 }, { "loss": 0.11518895626068115, "grad_norm": 0.809697151184082, "learning_rate": 0.00019834236369634608, "epoch": 0.06109914139627617, "step": 1900 }, { "loss": 0.10948337316513061, "grad_norm": 0.8163278102874756, "learning_rate": 0.00019832393585335502, "epoch": 0.06142071582467762, "step": 1910 }, { "loss": 0.118583083152771, "grad_norm": 0.46315309405326843, "learning_rate": 0.000198305407010768, "epoch": 0.06174229025307908, "step": 1920 }, { "loss": 0.10560156106948852, "grad_norm": 0.5412709712982178, "learning_rate": 0.00019828677718761805, "epoch": 0.06206386468148053, "step": 1930 }, { "loss": 0.12434232234954834, "grad_norm": 0.7563145756721497, "learning_rate": 0.00019826804640304206, "epoch": 0.06238543910988198, "step": 1940 }, { "loss": 0.12473642826080322, "grad_norm": 0.8467011451721191, "learning_rate": 0.00019824921467628045, "epoch": 0.06270701353828344, "step": 1950 }, { "loss": 0.10392096042633056, "grad_norm": 0.4988096356391907, "learning_rate": 0.0001982302820266774, "epoch": 0.0630285879666849, "step": 1960 }, { "loss": 0.13397929668426514, "grad_norm": 0.6903615593910217, "learning_rate": 0.00019821124847368082, "epoch": 0.06335016239508634, "step": 1970 }, { "loss": 0.11959564685821533, "grad_norm": 1.0386449098587036, "learning_rate": 0.0001981921140368422, "epoch": 0.0636717368234878, "step": 1980 }, { "loss": 0.1142409086227417, "grad_norm": 0.6791936159133911, "learning_rate": 0.00019817287873581666, "epoch": 0.06399331125188926, "step": 1990 }, { "loss": 0.15232725143432618, "grad_norm": 1.3021385669708252, "learning_rate": 0.00019815354259036295, "epoch": 0.0643148856802907, "step": 2000 }, { "loss": 0.11425412893295288, "grad_norm": 0.5564187169075012, "learning_rate": 0.0001981341056203434, "epoch": 0.06463646010869216, "step": 2010 }, { "loss": 0.1432906746864319, "grad_norm": 0.9113200306892395, "learning_rate": 0.00019811456784572394, "epoch": 0.0649580345370936, "step": 2020 }, { "loss": 0.13707234859466552, "grad_norm": 0.6878952980041504, "learning_rate": 0.000198094929286574, "epoch": 0.06527960896549506, "step": 2030 }, { "loss": 0.14629603624343873, "grad_norm": 0.8060909509658813, "learning_rate": 0.00019807518996306657, "epoch": 0.06560118339389652, "step": 2040 }, { "loss": 0.14162900447845458, "grad_norm": 0.8747666478157043, "learning_rate": 0.0001980553498954781, "epoch": 0.06592275782229796, "step": 2050 }, { "loss": 0.1435432553291321, "grad_norm": 0.9168952703475952, "learning_rate": 0.00019803540910418862, "epoch": 0.06624433225069942, "step": 2060 }, { "loss": 0.13169909715652467, "grad_norm": 0.8191623687744141, "learning_rate": 0.0001980153676096815, "epoch": 0.06656590667910088, "step": 2070 }, { "loss": 0.10620007514953614, "grad_norm": 0.58042311668396, "learning_rate": 0.00019799522543254366, "epoch": 0.06688748110750233, "step": 2080 }, { "loss": 0.11155413389205933, "grad_norm": 0.6616443991661072, "learning_rate": 0.00019797498259346537, "epoch": 0.06720905553590378, "step": 2090 }, { "loss": 0.13231509923934937, "grad_norm": 0.7794826626777649, "learning_rate": 0.00019795463911324035, "epoch": 0.06753062996430524, "step": 2100 }, { "loss": 0.13808358907699586, "grad_norm": 0.9658569097518921, "learning_rate": 0.00019793419501276568, "epoch": 0.06785220439270669, "step": 2110 }, { "loss": 0.132569420337677, "grad_norm": 0.7185949683189392, "learning_rate": 0.0001979136503130418, "epoch": 0.06817377882110814, "step": 2120 }, { "loss": 0.10459121465682983, "grad_norm": 0.6469611525535583, "learning_rate": 0.00019789300503517249, "epoch": 0.0684953532495096, "step": 2130 }, { "loss": 0.12158732414245606, "grad_norm": 1.1708670854568481, "learning_rate": 0.00019787225920036482, "epoch": 0.06881692767791105, "step": 2140 }, { "loss": 0.11037311553955079, "grad_norm": 0.7065096497535706, "learning_rate": 0.00019785141282992923, "epoch": 0.0691385021063125, "step": 2150 }, { "loss": 0.10084078311920167, "grad_norm": 0.7217418551445007, "learning_rate": 0.0001978304659452793, "epoch": 0.06946007653471396, "step": 2160 }, { "loss": 0.10967367887496948, "grad_norm": 0.5506851673126221, "learning_rate": 0.000197809418567932, "epoch": 0.06978165096311541, "step": 2170 }, { "loss": 0.11019937992095948, "grad_norm": 0.6507880091667175, "learning_rate": 0.00019778827071950742, "epoch": 0.07010322539151687, "step": 2180 }, { "loss": 0.11477915048599244, "grad_norm": 0.6157989501953125, "learning_rate": 0.00019776702242172892, "epoch": 0.07042479981991832, "step": 2190 }, { "loss": 0.134388530254364, "grad_norm": 0.8920512199401855, "learning_rate": 0.00019774567369642301, "epoch": 0.07074637424831977, "step": 2200 }, { "loss": 0.093971848487854, "grad_norm": 0.6216977834701538, "learning_rate": 0.0001977242245655194, "epoch": 0.07106794867672123, "step": 2210 }, { "loss": 0.1283738136291504, "grad_norm": 0.7825973033905029, "learning_rate": 0.00019770267505105088, "epoch": 0.07138952310512268, "step": 2220 }, { "loss": 0.1231916069984436, "grad_norm": 0.8116011619567871, "learning_rate": 0.00019768102517515336, "epoch": 0.07171109753352413, "step": 2230 }, { "loss": 0.07891560792922973, "grad_norm": 0.8718013763427734, "learning_rate": 0.00019765927496006596, "epoch": 0.07203267196192559, "step": 2240 }, { "loss": 0.09509350061416626, "grad_norm": 0.6453697085380554, "learning_rate": 0.00019763742442813068, "epoch": 0.07235424639032705, "step": 2250 }, { "loss": 0.11959600448608398, "grad_norm": 0.559689462184906, "learning_rate": 0.00019761547360179268, "epoch": 0.07267582081872849, "step": 2260 }, { "loss": 0.11340382099151611, "grad_norm": 0.42303594946861267, "learning_rate": 0.0001975934225036002, "epoch": 0.07299739524712995, "step": 2270 }, { "loss": 0.10959010124206543, "grad_norm": 0.5502386093139648, "learning_rate": 0.00019757127115620435, "epoch": 0.0733189696755314, "step": 2280 }, { "loss": 0.1179365873336792, "grad_norm": 1.1213687658309937, "learning_rate": 0.00019754901958235927, "epoch": 0.07364054410393285, "step": 2290 }, { "loss": 0.12511249780654907, "grad_norm": 0.611772358417511, "learning_rate": 0.00019752666780492207, "epoch": 0.07396211853233431, "step": 2300 }, { "loss": 0.13076653480529785, "grad_norm": 0.5212668180465698, "learning_rate": 0.0001975042158468528, "epoch": 0.07428369296073577, "step": 2310 }, { "loss": 0.10176039934158325, "grad_norm": 1.010329246520996, "learning_rate": 0.00019748166373121436, "epoch": 0.07460526738913721, "step": 2320 }, { "loss": 0.10914967060089112, "grad_norm": 0.7520009279251099, "learning_rate": 0.0001974590114811726, "epoch": 0.07492684181753867, "step": 2330 }, { "loss": 0.08320418596267701, "grad_norm": 0.6081514358520508, "learning_rate": 0.0001974362591199962, "epoch": 0.07524841624594013, "step": 2340 }, { "loss": 0.11121056079864503, "grad_norm": 0.779585599899292, "learning_rate": 0.00019741340667105665, "epoch": 0.07556999067434157, "step": 2350 }, { "loss": 0.09840821027755738, "grad_norm": 0.7340258359909058, "learning_rate": 0.0001973904541578283, "epoch": 0.07589156510274303, "step": 2360 }, { "loss": 0.13002623319625856, "grad_norm": 0.6933698058128357, "learning_rate": 0.00019736740160388824, "epoch": 0.07621313953114449, "step": 2370 }, { "loss": 0.12301526069641114, "grad_norm": 0.4505113363265991, "learning_rate": 0.00019734424903291635, "epoch": 0.07653471395954593, "step": 2380 }, { "loss": 0.11275660991668701, "grad_norm": 0.9241744875907898, "learning_rate": 0.00019732099646869528, "epoch": 0.07685628838794739, "step": 2390 }, { "loss": 0.13603054285049437, "grad_norm": 0.9413949847221375, "learning_rate": 0.00019729764393511032, "epoch": 0.07717786281634885, "step": 2400 }, { "loss": 0.10451099872589112, "grad_norm": 0.7786242961883545, "learning_rate": 0.0001972741914561495, "epoch": 0.0774994372447503, "step": 2410 }, { "loss": 0.10279628038406372, "grad_norm": 0.8419315218925476, "learning_rate": 0.00019725063905590352, "epoch": 0.07782101167315175, "step": 2420 }, { "loss": 0.12877086400985718, "grad_norm": 0.8151343464851379, "learning_rate": 0.00019722698675856564, "epoch": 0.07814258610155321, "step": 2430 }, { "loss": 0.11589571237564086, "grad_norm": 0.7058280110359192, "learning_rate": 0.0001972032345884319, "epoch": 0.07846416052995465, "step": 2440 }, { "loss": 0.11044619083404542, "grad_norm": 0.7708027958869934, "learning_rate": 0.00019717938256990077, "epoch": 0.07878573495835611, "step": 2450 }, { "loss": 0.13818830251693726, "grad_norm": 0.5580777525901794, "learning_rate": 0.00019715543072747335, "epoch": 0.07910730938675757, "step": 2460 }, { "loss": 0.12281345129013062, "grad_norm": 0.5901768803596497, "learning_rate": 0.0001971313790857533, "epoch": 0.07942888381515902, "step": 2470 }, { "loss": 0.10907325744628907, "grad_norm": 0.8018592596054077, "learning_rate": 0.00019710722766944674, "epoch": 0.07975045824356047, "step": 2480 }, { "loss": 0.12140153646469116, "grad_norm": 0.6740052700042725, "learning_rate": 0.00019708297650336234, "epoch": 0.08007203267196193, "step": 2490 }, { "loss": 0.1088398814201355, "grad_norm": 0.5753097534179688, "learning_rate": 0.00019705862561241117, "epoch": 0.08039360710036338, "step": 2500 }, { "loss": 0.11455504894256592, "grad_norm": 0.49554967880249023, "learning_rate": 0.00019703417502160684, "epoch": 0.08071518152876483, "step": 2510 }, { "loss": 0.12731305360794068, "grad_norm": 0.9352779388427734, "learning_rate": 0.00019700962475606524, "epoch": 0.08103675595716629, "step": 2520 }, { "loss": 0.10423781871795654, "grad_norm": 0.6742168664932251, "learning_rate": 0.00019698497484100472, "epoch": 0.08135833038556774, "step": 2530 }, { "loss": 0.11573268175125122, "grad_norm": 0.5347793102264404, "learning_rate": 0.00019696022530174605, "epoch": 0.0816799048139692, "step": 2540 }, { "loss": 0.11344225406646728, "grad_norm": 1.1676106452941895, "learning_rate": 0.0001969353761637122, "epoch": 0.08200147924237065, "step": 2550 }, { "loss": 0.11661807298660279, "grad_norm": 0.7781999707221985, "learning_rate": 0.00019691042745242858, "epoch": 0.0823230536707721, "step": 2560 }, { "loss": 0.11757798194885254, "grad_norm": 0.6178641319274902, "learning_rate": 0.00019688537919352274, "epoch": 0.08264462809917356, "step": 2570 }, { "loss": 0.11788698434829711, "grad_norm": 0.7667732238769531, "learning_rate": 0.00019686023141272466, "epoch": 0.082966202527575, "step": 2580 }, { "loss": 0.09521175622940063, "grad_norm": 1.0335626602172852, "learning_rate": 0.00019683498413586642, "epoch": 0.08328777695597646, "step": 2590 }, { "loss": 0.11917680501937866, "grad_norm": 0.582943856716156, "learning_rate": 0.00019680963738888233, "epoch": 0.08360935138437792, "step": 2600 }, { "loss": 0.11382901668548584, "grad_norm": 0.7423402070999146, "learning_rate": 0.00019678419119780892, "epoch": 0.08393092581277936, "step": 2610 }, { "loss": 0.12486684322357178, "grad_norm": 0.6837660074234009, "learning_rate": 0.00019675864558878483, "epoch": 0.08425250024118082, "step": 2620 }, { "loss": 0.10534079074859619, "grad_norm": 0.7795819044113159, "learning_rate": 0.0001967330005880508, "epoch": 0.08457407466958228, "step": 2630 }, { "loss": 0.11522390842437744, "grad_norm": 0.5674572587013245, "learning_rate": 0.00019670725622194976, "epoch": 0.08489564909798372, "step": 2640 }, { "loss": 0.11588020324707031, "grad_norm": 0.7193135023117065, "learning_rate": 0.0001966814125169266, "epoch": 0.08521722352638518, "step": 2650 }, { "loss": 0.11439142227172852, "grad_norm": 0.7520074844360352, "learning_rate": 0.00019665546949952835, "epoch": 0.08553879795478664, "step": 2660 }, { "loss": 0.1080745816230774, "grad_norm": 0.7337353825569153, "learning_rate": 0.00019662942719640396, "epoch": 0.08586037238318808, "step": 2670 }, { "loss": 0.09863368272781373, "grad_norm": 0.9156220555305481, "learning_rate": 0.00019660328563430443, "epoch": 0.08618194681158954, "step": 2680 }, { "loss": 0.1464648127555847, "grad_norm": 0.5568152666091919, "learning_rate": 0.00019657704484008274, "epoch": 0.086503521239991, "step": 2690 }, { "loss": 0.12296129465103149, "grad_norm": 1.0155256986618042, "learning_rate": 0.00019655070484069368, "epoch": 0.08682509566839244, "step": 2700 }, { "loss": 0.14590543508529663, "grad_norm": 0.6132897138595581, "learning_rate": 0.00019652426566319414, "epoch": 0.0871466700967939, "step": 2710 }, { "loss": 0.08513785004615784, "grad_norm": 0.6537352800369263, "learning_rate": 0.00019649772733474269, "epoch": 0.08746824452519536, "step": 2720 }, { "loss": 0.12123624086380005, "grad_norm": 0.8966730237007141, "learning_rate": 0.00019647108988259988, "epoch": 0.0877898189535968, "step": 2730 }, { "loss": 0.08953339457511902, "grad_norm": 0.6152775287628174, "learning_rate": 0.00019644435333412806, "epoch": 0.08811139338199826, "step": 2740 }, { "loss": 0.10960085391998291, "grad_norm": 0.48908594250679016, "learning_rate": 0.0001964175177167913, "epoch": 0.08843296781039972, "step": 2750 }, { "loss": 0.11797866821289063, "grad_norm": 0.7124961018562317, "learning_rate": 0.00019639058305815553, "epoch": 0.08875454223880117, "step": 2760 }, { "loss": 0.11341865062713623, "grad_norm": 0.579239547252655, "learning_rate": 0.00019636354938588836, "epoch": 0.08907611666720262, "step": 2770 }, { "loss": 0.14069968461990356, "grad_norm": 0.7737932205200195, "learning_rate": 0.0001963364167277591, "epoch": 0.08939769109560408, "step": 2780 }, { "loss": 0.10434212684631347, "grad_norm": 0.7406035661697388, "learning_rate": 0.0001963091851116388, "epoch": 0.08971926552400553, "step": 2790 }, { "loss": 0.1168674349784851, "grad_norm": 0.9775945544242859, "learning_rate": 0.00019628185456550007, "epoch": 0.09004083995240698, "step": 2800 }, { "loss": 0.12225688695907592, "grad_norm": 0.44528913497924805, "learning_rate": 0.00019625442511741727, "epoch": 0.09036241438080844, "step": 2810 }, { "loss": 0.12359638214111328, "grad_norm": 0.4269179105758667, "learning_rate": 0.00019622689679556622, "epoch": 0.09068398880920989, "step": 2820 }, { "loss": 0.1075709342956543, "grad_norm": 0.6485812067985535, "learning_rate": 0.00019619926962822435, "epoch": 0.09100556323761134, "step": 2830 }, { "loss": 0.11307716369628906, "grad_norm": 1.5778266191482544, "learning_rate": 0.00019617154364377068, "epoch": 0.0913271376660128, "step": 2840 }, { "loss": 0.11073293685913085, "grad_norm": 0.8932989239692688, "learning_rate": 0.0001961437188706857, "epoch": 0.09164871209441425, "step": 2850 }, { "loss": 0.11458207368850708, "grad_norm": 0.7158249616622925, "learning_rate": 0.0001961157953375513, "epoch": 0.0919702865228157, "step": 2860 }, { "loss": 0.10697572231292725, "grad_norm": 0.5875152945518494, "learning_rate": 0.00019608777307305095, "epoch": 0.09229186095121716, "step": 2870 }, { "loss": 0.1203433632850647, "grad_norm": 1.4160408973693848, "learning_rate": 0.00019605965210596948, "epoch": 0.09261343537961861, "step": 2880 }, { "loss": 0.13875064849853516, "grad_norm": 0.6960219740867615, "learning_rate": 0.00019603143246519307, "epoch": 0.09293500980802007, "step": 2890 }, { "loss": 0.1032223105430603, "grad_norm": 0.635543704032898, "learning_rate": 0.00019600311417970932, "epoch": 0.09325658423642152, "step": 2900 }, { "loss": 0.12232608795166015, "grad_norm": 0.7557558417320251, "learning_rate": 0.0001959746972786071, "epoch": 0.09357815866482297, "step": 2910 }, { "loss": 0.11726034879684448, "grad_norm": 0.6953281164169312, "learning_rate": 0.00019594618179107664, "epoch": 0.09389973309322443, "step": 2920 }, { "loss": 0.10258761644363404, "grad_norm": 0.452549546957016, "learning_rate": 0.00019591756774640944, "epoch": 0.09422130752162589, "step": 2930 }, { "loss": 0.12623608112335205, "grad_norm": 0.7142686247825623, "learning_rate": 0.00019588885517399812, "epoch": 0.09454288195002733, "step": 2940 }, { "loss": 0.12421194314956666, "grad_norm": 0.5964231491088867, "learning_rate": 0.0001958600441033367, "epoch": 0.09486445637842879, "step": 2950 }, { "loss": 0.11757348775863648, "grad_norm": 0.6275390386581421, "learning_rate": 0.0001958311345640202, "epoch": 0.09518603080683025, "step": 2960 }, { "loss": 0.1174355149269104, "grad_norm": 0.611051082611084, "learning_rate": 0.00019580212658574492, "epoch": 0.09550760523523169, "step": 2970 }, { "loss": 0.11205054521560669, "grad_norm": 0.7474158406257629, "learning_rate": 0.00019577302019830817, "epoch": 0.09582917966363315, "step": 2980 }, { "loss": 0.12011423110961914, "grad_norm": 1.0890192985534668, "learning_rate": 0.00019574381543160843, "epoch": 0.09615075409203461, "step": 2990 }, { "loss": 0.09580199718475342, "grad_norm": 0.6116976737976074, "learning_rate": 0.00019571451231564525, "epoch": 0.09647232852043605, "step": 3000 }, { "loss": 0.1161598801612854, "grad_norm": 0.6288789510726929, "learning_rate": 0.0001956851108805191, "epoch": 0.09679390294883751, "step": 3010 }, { "loss": 0.10462770462036133, "grad_norm": 0.9161404967308044, "learning_rate": 0.00019565561115643152, "epoch": 0.09711547737723897, "step": 3020 }, { "loss": 0.12679061889648438, "grad_norm": 0.5990720987319946, "learning_rate": 0.00019562601317368503, "epoch": 0.09743705180564041, "step": 3030 }, { "loss": 0.10021860599517822, "grad_norm": 1.0173943042755127, "learning_rate": 0.00019559631696268302, "epoch": 0.09775862623404187, "step": 3040 }, { "loss": 0.09998620748519897, "grad_norm": 0.5726870894432068, "learning_rate": 0.00019556652255392983, "epoch": 0.09808020066244333, "step": 3050 }, { "loss": 0.13044171333312987, "grad_norm": 0.8290376663208008, "learning_rate": 0.00019553662997803066, "epoch": 0.09840177509084477, "step": 3060 }, { "loss": 0.10107682943344116, "grad_norm": 0.6910918354988098, "learning_rate": 0.00019550663926569154, "epoch": 0.09872334951924623, "step": 3070 }, { "loss": 0.12903016805648804, "grad_norm": 1.1203150749206543, "learning_rate": 0.00019547655044771932, "epoch": 0.09904492394764769, "step": 3080 }, { "loss": 0.10066742897033691, "grad_norm": 0.37552332878112793, "learning_rate": 0.00019544636355502156, "epoch": 0.09936649837604913, "step": 3090 }, { "loss": 0.11238259077072144, "grad_norm": 0.7681753039360046, "learning_rate": 0.00019541607861860673, "epoch": 0.09968807280445059, "step": 3100 }, { "eval_loss": 0.10922306776046753, "eval_runtime": 34.3459, "eval_samples_per_second": 146.335, "eval_steps_per_second": 36.598, "epoch": 0.09994533234717175, "step": 3108 }, { "loss": 0.08798834085464477, "grad_norm": 0.6184873580932617, "learning_rate": 0.0001953856956695838, "epoch": 0.10000964723285205, "step": 3110 }, { "loss": 0.13838959932327272, "grad_norm": 0.7986240386962891, "learning_rate": 0.00019535521473916257, "epoch": 0.1003312216612535, "step": 3120 }, { "loss": 0.10403268337249756, "grad_norm": 0.7976729869842529, "learning_rate": 0.0001953246358586534, "epoch": 0.10065279608965495, "step": 3130 }, { "loss": 0.10503251552581787, "grad_norm": 0.5033293962478638, "learning_rate": 0.00019529395905946737, "epoch": 0.10097437051805641, "step": 3140 }, { "loss": 0.13334119319915771, "grad_norm": 0.6307993531227112, "learning_rate": 0.00019526318437311605, "epoch": 0.10129594494645786, "step": 3150 }, { "loss": 0.0990634799003601, "grad_norm": 0.9739433526992798, "learning_rate": 0.0001952323118312115, "epoch": 0.10161751937485931, "step": 3160 }, { "loss": 0.10344570875167847, "grad_norm": 0.8039160966873169, "learning_rate": 0.00019520134146546654, "epoch": 0.10193909380326076, "step": 3170 }, { "loss": 0.1280883550643921, "grad_norm": 0.8540322184562683, "learning_rate": 0.00019517027330769423, "epoch": 0.10226066823166222, "step": 3180 }, { "loss": 0.10271800756454467, "grad_norm": 0.9251324534416199, "learning_rate": 0.0001951391073898082, "epoch": 0.10258224266006367, "step": 3190 }, { "loss": 0.12908483743667604, "grad_norm": 0.8988800048828125, "learning_rate": 0.00019510784374382245, "epoch": 0.10290381708846512, "step": 3200 }, { "loss": 0.11154094934463502, "grad_norm": 0.6570804715156555, "learning_rate": 0.00019507648240185143, "epoch": 0.10322539151686658, "step": 3210 }, { "loss": 0.15660790205001832, "grad_norm": 0.40354835987091064, "learning_rate": 0.0001950450233961099, "epoch": 0.10354696594526804, "step": 3220 }, { "loss": 0.11898343563079834, "grad_norm": 0.5271028876304626, "learning_rate": 0.00019501346675891292, "epoch": 0.10386854037366948, "step": 3230 }, { "loss": 0.08217236399650574, "grad_norm": 0.6451640129089355, "learning_rate": 0.00019498181252267593, "epoch": 0.10419011480207094, "step": 3240 }, { "loss": 0.12326722145080567, "grad_norm": 0.6667381525039673, "learning_rate": 0.0001949500607199145, "epoch": 0.1045116892304724, "step": 3250 }, { "loss": 0.09181814193725586, "grad_norm": 0.7040378451347351, "learning_rate": 0.00019491821138324452, "epoch": 0.10483326365887384, "step": 3260 }, { "loss": 0.1170729398727417, "grad_norm": 0.9717530012130737, "learning_rate": 0.00019488626454538204, "epoch": 0.1051548380872753, "step": 3270 }, { "loss": 0.11985208988189697, "grad_norm": 0.8807070255279541, "learning_rate": 0.00019485422023914322, "epoch": 0.10547641251567676, "step": 3280 }, { "loss": 0.1229805588722229, "grad_norm": 0.8843317627906799, "learning_rate": 0.00019482207849744442, "epoch": 0.1057979869440782, "step": 3290 }, { "loss": 0.11908345222473145, "grad_norm": 0.6614115834236145, "learning_rate": 0.00019478983935330199, "epoch": 0.10611956137247966, "step": 3300 }, { "loss": 0.11808741092681885, "grad_norm": 0.7376132011413574, "learning_rate": 0.00019475750283983248, "epoch": 0.10644113580088112, "step": 3310 }, { "loss": 0.0965549647808075, "grad_norm": 0.6531370282173157, "learning_rate": 0.00019472506899025226, "epoch": 0.10676271022928256, "step": 3320 }, { "loss": 0.10322840213775634, "grad_norm": 0.39049506187438965, "learning_rate": 0.00019469253783787788, "epoch": 0.10708428465768402, "step": 3330 }, { "loss": 0.09751293063163757, "grad_norm": 0.7664731740951538, "learning_rate": 0.00019465990941612573, "epoch": 0.10740585908608548, "step": 3340 }, { "loss": 0.10536651611328125, "grad_norm": 0.8594152331352234, "learning_rate": 0.0001946271837585121, "epoch": 0.10772743351448692, "step": 3350 }, { "loss": 0.13831456899642944, "grad_norm": 0.48862630128860474, "learning_rate": 0.00019459436089865325, "epoch": 0.10804900794288838, "step": 3360 }, { "loss": 0.1286901354789734, "grad_norm": 0.7956599593162537, "learning_rate": 0.0001945614408702652, "epoch": 0.10837058237128984, "step": 3370 }, { "loss": 0.09993658065795899, "grad_norm": 0.6222932934761047, "learning_rate": 0.00019452842370716386, "epoch": 0.10869215679969128, "step": 3380 }, { "loss": 0.11540353298187256, "grad_norm": 0.6490775942802429, "learning_rate": 0.0001944953094432648, "epoch": 0.10901373122809274, "step": 3390 }, { "loss": 0.11278325319290161, "grad_norm": 0.7428007125854492, "learning_rate": 0.00019446209811258352, "epoch": 0.1093353056564942, "step": 3400 }, { "loss": 0.09938254356384277, "grad_norm": 0.5227949023246765, "learning_rate": 0.00019442878974923507, "epoch": 0.10965688008489564, "step": 3410 }, { "loss": 0.13301162719726561, "grad_norm": 0.8114507794380188, "learning_rate": 0.00019439538438743422, "epoch": 0.1099784545132971, "step": 3420 }, { "loss": 0.1044914722442627, "grad_norm": 0.6826546788215637, "learning_rate": 0.0001943618820614954, "epoch": 0.11030002894169856, "step": 3430 }, { "loss": 0.10617796182632447, "grad_norm": 0.5690863132476807, "learning_rate": 0.00019432828280583259, "epoch": 0.1106216033701, "step": 3440 }, { "loss": 0.11815530061721802, "grad_norm": 0.6827008128166199, "learning_rate": 0.00019429458665495941, "epoch": 0.11094317779850146, "step": 3450 }, { "loss": 0.08990739583969116, "grad_norm": 0.815086841583252, "learning_rate": 0.00019426079364348895, "epoch": 0.11126475222690292, "step": 3460 }, { "loss": 0.12010560035705567, "grad_norm": 0.5905897617340088, "learning_rate": 0.0001942269038061338, "epoch": 0.11158632665530437, "step": 3470 }, { "loss": 0.08246596455574036, "grad_norm": 0.7142632007598877, "learning_rate": 0.0001941929171777061, "epoch": 0.11190790108370582, "step": 3480 }, { "loss": 0.12285442352294922, "grad_norm": 0.48332491517066956, "learning_rate": 0.00019415883379311733, "epoch": 0.11222947551210728, "step": 3490 }, { "loss": 0.10786828994750977, "grad_norm": 0.720145046710968, "learning_rate": 0.00019412465368737832, "epoch": 0.11255104994050873, "step": 3500 }, { "loss": 0.118403160572052, "grad_norm": 0.9487232565879822, "learning_rate": 0.00019409037689559932, "epoch": 0.11287262436891018, "step": 3510 }, { "loss": 0.13273926973342895, "grad_norm": 0.5884988903999329, "learning_rate": 0.00019405600345298994, "epoch": 0.11319419879731164, "step": 3520 }, { "loss": 0.09212377071380615, "grad_norm": 0.6843329071998596, "learning_rate": 0.00019402153339485897, "epoch": 0.11351577322571309, "step": 3530 }, { "loss": 0.09920811653137207, "grad_norm": 0.6576499342918396, "learning_rate": 0.0001939869667566145, "epoch": 0.11383734765411455, "step": 3540 }, { "loss": 0.10872898101806641, "grad_norm": 0.7083240747451782, "learning_rate": 0.00019395230357376387, "epoch": 0.114158922082516, "step": 3550 }, { "loss": 0.1173086166381836, "grad_norm": 0.8997201323509216, "learning_rate": 0.00019391754388191346, "epoch": 0.11448049651091745, "step": 3560 }, { "loss": 0.11752113103866577, "grad_norm": 0.5752458572387695, "learning_rate": 0.0001938826877167689, "epoch": 0.1148020709393189, "step": 3570 }, { "loss": 0.09167235493659973, "grad_norm": 0.28855404257774353, "learning_rate": 0.00019384773511413493, "epoch": 0.11512364536772036, "step": 3580 }, { "loss": 0.11027594804763793, "grad_norm": 0.5791109204292297, "learning_rate": 0.00019381268610991521, "epoch": 0.11544521979612181, "step": 3590 }, { "loss": 0.0963161051273346, "grad_norm": 0.9037119150161743, "learning_rate": 0.0001937775407401126, "epoch": 0.11576679422452327, "step": 3600 }, { "loss": 0.10461825132369995, "grad_norm": 0.7854295969009399, "learning_rate": 0.00019374229904082889, "epoch": 0.11608836865292473, "step": 3610 }, { "loss": 0.11341136693954468, "grad_norm": 0.7756221890449524, "learning_rate": 0.00019370696104826474, "epoch": 0.11640994308132617, "step": 3620 }, { "loss": 0.11079100370407105, "grad_norm": 0.8821120858192444, "learning_rate": 0.0001936715267987198, "epoch": 0.11673151750972763, "step": 3630 }, { "loss": 0.10128632783889771, "grad_norm": 1.0691667795181274, "learning_rate": 0.00019363599632859257, "epoch": 0.11705309193812909, "step": 3640 }, { "loss": 0.10989173650741577, "grad_norm": 0.773428738117218, "learning_rate": 0.00019360036967438045, "epoch": 0.11737466636653053, "step": 3650 }, { "loss": 0.12041290998458862, "grad_norm": 0.6354496479034424, "learning_rate": 0.00019356464687267952, "epoch": 0.11769624079493199, "step": 3660 }, { "loss": 0.11748863458633423, "grad_norm": 0.4678747355937958, "learning_rate": 0.00019352882796018475, "epoch": 0.11801781522333345, "step": 3670 }, { "loss": 0.12304327487945557, "grad_norm": 0.934917688369751, "learning_rate": 0.0001934929129736898, "epoch": 0.11833938965173489, "step": 3680 }, { "loss": 0.119256591796875, "grad_norm": 0.6330606341362, "learning_rate": 0.00019345690195008694, "epoch": 0.11866096408013635, "step": 3690 }, { "loss": 0.11701215505599975, "grad_norm": 0.6313028335571289, "learning_rate": 0.0001934207949263672, "epoch": 0.11898253850853781, "step": 3700 }, { "loss": 0.08980629444122315, "grad_norm": 0.5904396772384644, "learning_rate": 0.00019338459193962013, "epoch": 0.11930411293693925, "step": 3710 }, { "loss": 0.10626832246780396, "grad_norm": 0.7841988205909729, "learning_rate": 0.00019334829302703393, "epoch": 0.11962568736534071, "step": 3720 }, { "loss": 0.1016309142112732, "grad_norm": 0.8079530000686646, "learning_rate": 0.0001933118982258953, "epoch": 0.11994726179374215, "step": 3730 }, { "loss": 0.10360891819000244, "grad_norm": 0.6402804851531982, "learning_rate": 0.00019327540757358943, "epoch": 0.12026883622214361, "step": 3740 }, { "loss": 0.11208920478820801, "grad_norm": 0.535378098487854, "learning_rate": 0.00019323882110759997, "epoch": 0.12059041065054507, "step": 3750 }, { "loss": 0.13467209339141845, "grad_norm": 0.6953940391540527, "learning_rate": 0.00019320213886550903, "epoch": 0.12091198507894652, "step": 3760 }, { "loss": 0.0965237021446228, "grad_norm": 0.6152417659759521, "learning_rate": 0.00019316536088499707, "epoch": 0.12123355950734797, "step": 3770 }, { "loss": 0.1212538480758667, "grad_norm": 1.36095130443573, "learning_rate": 0.0001931284872038429, "epoch": 0.12155513393574943, "step": 3780 }, { "loss": 0.09757543206214905, "grad_norm": 0.7375221252441406, "learning_rate": 0.00019309151785992364, "epoch": 0.12187670836415088, "step": 3790 }, { "loss": 0.10885839462280274, "grad_norm": 0.8550742864608765, "learning_rate": 0.00019305445289121464, "epoch": 0.12219828279255233, "step": 3800 }, { "loss": 0.0911238431930542, "grad_norm": 0.7616929411888123, "learning_rate": 0.00019301729233578953, "epoch": 0.12251985722095379, "step": 3810 }, { "loss": 0.0981452465057373, "grad_norm": 0.5779492855072021, "learning_rate": 0.0001929800362318201, "epoch": 0.12284143164935524, "step": 3820 }, { "loss": 0.1250522494316101, "grad_norm": 0.5627166032791138, "learning_rate": 0.00019294268461757626, "epoch": 0.1231630060777567, "step": 3830 }, { "loss": 0.11018778085708618, "grad_norm": 0.7056961059570312, "learning_rate": 0.00019290523753142617, "epoch": 0.12348458050615815, "step": 3840 }, { "loss": 0.10934178829193116, "grad_norm": 0.5087243318557739, "learning_rate": 0.0001928676950118358, "epoch": 0.1238061549345596, "step": 3850 }, { "loss": 0.10876210927963256, "grad_norm": 0.5404261350631714, "learning_rate": 0.00019283005709736943, "epoch": 0.12412772936296106, "step": 3860 }, { "loss": 0.11565039157867432, "grad_norm": 0.6383166909217834, "learning_rate": 0.00019279232382668916, "epoch": 0.12444930379136251, "step": 3870 }, { "loss": 0.12286177873611451, "grad_norm": 0.5657302737236023, "learning_rate": 0.00019275449523855503, "epoch": 0.12477087821976396, "step": 3880 }, { "loss": 0.09651960134506225, "grad_norm": 0.5566875338554382, "learning_rate": 0.00019271657137182513, "epoch": 0.12509245264816543, "step": 3890 }, { "loss": 0.09890485405921937, "grad_norm": 0.4659144878387451, "learning_rate": 0.0001926785522654553, "epoch": 0.12541402707656688, "step": 3900 }, { "loss": 0.0990807831287384, "grad_norm": 0.8052150011062622, "learning_rate": 0.00019264043795849927, "epoch": 0.12573560150496832, "step": 3910 }, { "loss": 0.11594893932342529, "grad_norm": 1.0626065731048584, "learning_rate": 0.00019260222849010848, "epoch": 0.1260571759333698, "step": 3920 }, { "loss": 0.12493889331817627, "grad_norm": 0.8749810457229614, "learning_rate": 0.00019256392389953227, "epoch": 0.12637875036177124, "step": 3930 }, { "loss": 0.1070855975151062, "grad_norm": 0.6140100359916687, "learning_rate": 0.00019252552422611752, "epoch": 0.12670032479017268, "step": 3940 }, { "loss": 0.08729969263076783, "grad_norm": 0.4629083275794983, "learning_rate": 0.0001924870295093089, "epoch": 0.12702189921857415, "step": 3950 }, { "loss": 0.12051993608474731, "grad_norm": 0.8901966214179993, "learning_rate": 0.0001924484397886487, "epoch": 0.1273434736469756, "step": 3960 }, { "loss": 0.10127103328704834, "grad_norm": 0.9668626189231873, "learning_rate": 0.00019240975510377673, "epoch": 0.12766504807537704, "step": 3970 }, { "loss": 0.09287093877792359, "grad_norm": 0.6984792947769165, "learning_rate": 0.00019237097549443038, "epoch": 0.1279866225037785, "step": 3980 }, { "loss": 0.12040114402770996, "grad_norm": 0.9510995149612427, "learning_rate": 0.00019233210100044458, "epoch": 0.12830819693217996, "step": 3990 }, { "loss": 0.12150945663452148, "grad_norm": 0.6838380694389343, "learning_rate": 0.00019229313166175174, "epoch": 0.1286297713605814, "step": 4000 }, { "loss": 0.1207665205001831, "grad_norm": 0.6368831992149353, "learning_rate": 0.0001922540675183816, "epoch": 0.12895134578898285, "step": 4010 }, { "loss": 0.10408351421356202, "grad_norm": 0.921284556388855, "learning_rate": 0.00019221490861046138, "epoch": 0.12927292021738432, "step": 4020 }, { "loss": 0.11057004928588868, "grad_norm": 0.7935436964035034, "learning_rate": 0.00019217565497821557, "epoch": 0.12959449464578576, "step": 4030 }, { "loss": 0.09651212692260742, "grad_norm": 0.5812630653381348, "learning_rate": 0.00019213630666196604, "epoch": 0.1299160690741872, "step": 4040 }, { "loss": 0.09005358219146728, "grad_norm": 0.3681035339832306, "learning_rate": 0.00019209686370213187, "epoch": 0.13023764350258868, "step": 4050 }, { "loss": 0.10046392679214478, "grad_norm": 0.6533730626106262, "learning_rate": 0.00019205732613922937, "epoch": 0.13055921793099012, "step": 4060 }, { "loss": 0.10442250967025757, "grad_norm": 0.43741610646247864, "learning_rate": 0.000192017694013872, "epoch": 0.13088079235939157, "step": 4070 }, { "loss": 0.08786097168922424, "grad_norm": 0.6568700075149536, "learning_rate": 0.0001919779673667704, "epoch": 0.13120236678779304, "step": 4080 }, { "loss": 0.10092254877090454, "grad_norm": 0.9713712334632874, "learning_rate": 0.0001919381462387323, "epoch": 0.13152394121619448, "step": 4090 }, { "loss": 0.1092909812927246, "grad_norm": 0.6388143301010132, "learning_rate": 0.00019189823067066247, "epoch": 0.13184551564459593, "step": 4100 }, { "loss": 0.11206631660461426, "grad_norm": 0.6925147175788879, "learning_rate": 0.00019185822070356268, "epoch": 0.1321670900729974, "step": 4110 }, { "loss": 0.10706722736358643, "grad_norm": 0.7303212285041809, "learning_rate": 0.00019181811637853166, "epoch": 0.13248866450139885, "step": 4120 }, { "loss": 0.10609568357467651, "grad_norm": 0.3527115285396576, "learning_rate": 0.00019177791773676513, "epoch": 0.1328102389298003, "step": 4130 }, { "loss": 0.09472618699073791, "grad_norm": 0.37147092819213867, "learning_rate": 0.00019173762481955556, "epoch": 0.13313181335820176, "step": 4140 }, { "loss": 0.12064085006713868, "grad_norm": 0.7030910849571228, "learning_rate": 0.0001916972376682924, "epoch": 0.1334533877866032, "step": 4150 }, { "loss": 0.11871163845062256, "grad_norm": 1.1606062650680542, "learning_rate": 0.00019165675632446186, "epoch": 0.13377496221500465, "step": 4160 }, { "loss": 0.1308833122253418, "grad_norm": 0.9086573719978333, "learning_rate": 0.00019161618082964691, "epoch": 0.13409653664340612, "step": 4170 }, { "loss": 0.11038376092910766, "grad_norm": 0.7408903241157532, "learning_rate": 0.00019157551122552712, "epoch": 0.13441811107180757, "step": 4180 }, { "loss": 0.08468644618988037, "grad_norm": 0.9437806606292725, "learning_rate": 0.00019153474755387892, "epoch": 0.134739685500209, "step": 4190 }, { "loss": 0.08930802345275879, "grad_norm": 0.7443150877952576, "learning_rate": 0.00019149388985657522, "epoch": 0.13506125992861048, "step": 4200 }, { "loss": 0.08937476873397827, "grad_norm": 0.9245612025260925, "learning_rate": 0.00019145293817558558, "epoch": 0.13538283435701193, "step": 4210 }, { "loss": 0.1095966100692749, "grad_norm": 0.564530074596405, "learning_rate": 0.00019141189255297608, "epoch": 0.13570440878541337, "step": 4220 }, { "loss": 0.09740201234817505, "grad_norm": 0.6564256548881531, "learning_rate": 0.00019137075303090934, "epoch": 0.13602598321381484, "step": 4230 }, { "loss": 0.10432863235473633, "grad_norm": 0.6156473159790039, "learning_rate": 0.0001913295196516443, "epoch": 0.1363475576422163, "step": 4240 }, { "loss": 0.09986402988433837, "grad_norm": 0.9064126014709473, "learning_rate": 0.00019128819245753652, "epoch": 0.13666913207061773, "step": 4250 }, { "loss": 0.09308716058731079, "grad_norm": 0.5122585296630859, "learning_rate": 0.0001912467714910378, "epoch": 0.1369907064990192, "step": 4260 }, { "loss": 0.10415486097335816, "grad_norm": 0.966684103012085, "learning_rate": 0.00019120525679469622, "epoch": 0.13731228092742065, "step": 4270 }, { "loss": 0.09036895036697387, "grad_norm": 0.8586301803588867, "learning_rate": 0.00019116364841115628, "epoch": 0.1376338553558221, "step": 4280 }, { "loss": 0.09274351596832275, "grad_norm": 0.5143318772315979, "learning_rate": 0.00019112194638315862, "epoch": 0.13795542978422357, "step": 4290 }, { "loss": 0.10373387336730958, "grad_norm": 0.765060305595398, "learning_rate": 0.00019108015075354002, "epoch": 0.138277004212625, "step": 4300 }, { "loss": 0.09613911509513855, "grad_norm": 0.6508856415748596, "learning_rate": 0.0001910382615652336, "epoch": 0.13859857864102645, "step": 4310 }, { "loss": 0.11943503618240356, "grad_norm": 0.7866203188896179, "learning_rate": 0.00019099627886126839, "epoch": 0.13892015306942793, "step": 4320 }, { "loss": 0.11114203929901123, "grad_norm": 0.4582284092903137, "learning_rate": 0.00019095420268476956, "epoch": 0.13924172749782937, "step": 4330 }, { "loss": 0.08318037986755371, "grad_norm": 0.7825440168380737, "learning_rate": 0.0001909120330789583, "epoch": 0.13956330192623081, "step": 4340 }, { "loss": 0.10253852605819702, "grad_norm": 0.7011892795562744, "learning_rate": 0.0001908697700871518, "epoch": 0.1398848763546323, "step": 4350 }, { "loss": 0.1049849271774292, "grad_norm": 0.34880417585372925, "learning_rate": 0.00019082741375276308, "epoch": 0.14020645078303373, "step": 4360 }, { "loss": 0.10247675180435181, "grad_norm": 0.5124948620796204, "learning_rate": 0.00019078496411930118, "epoch": 0.14052802521143518, "step": 4370 }, { "loss": 0.11412211656570434, "grad_norm": 0.3058924674987793, "learning_rate": 0.00019074242123037083, "epoch": 0.14084959963983665, "step": 4380 }, { "loss": 0.12336907386779786, "grad_norm": 0.8137784004211426, "learning_rate": 0.00019069978512967266, "epoch": 0.1411711740682381, "step": 4390 }, { "loss": 0.09460805058479309, "grad_norm": 1.2490235567092896, "learning_rate": 0.00019065705586100303, "epoch": 0.14149274849663954, "step": 4400 }, { "loss": 0.11223421096801758, "grad_norm": 0.8012495040893555, "learning_rate": 0.00019061423346825395, "epoch": 0.141814322925041, "step": 4410 }, { "loss": 0.09242308735847474, "grad_norm": 0.43816885352134705, "learning_rate": 0.00019057131799541318, "epoch": 0.14213589735344245, "step": 4420 }, { "loss": 0.10507001876831054, "grad_norm": 0.4903210401535034, "learning_rate": 0.00019052830948656398, "epoch": 0.1424574717818439, "step": 4430 }, { "loss": 0.09061969518661499, "grad_norm": 0.7580713629722595, "learning_rate": 0.00019048520798588525, "epoch": 0.14277904621024537, "step": 4440 }, { "loss": 0.09204312562942504, "grad_norm": 0.6889179348945618, "learning_rate": 0.00019044201353765142, "epoch": 0.1431006206386468, "step": 4450 }, { "loss": 0.1126853346824646, "grad_norm": 0.43935051560401917, "learning_rate": 0.00019039872618623234, "epoch": 0.14342219506704826, "step": 4460 }, { "loss": 0.11694524288177491, "grad_norm": 1.1189888715744019, "learning_rate": 0.00019035534597609339, "epoch": 0.14374376949544973, "step": 4470 }, { "loss": 0.12891401052474977, "grad_norm": 0.94930499792099, "learning_rate": 0.0001903118729517952, "epoch": 0.14406534392385117, "step": 4480 }, { "loss": 0.10222654342651367, "grad_norm": 0.7653531432151794, "learning_rate": 0.00019026830715799387, "epoch": 0.14438691835225262, "step": 4490 }, { "loss": 0.08867776989936829, "grad_norm": 0.6891590356826782, "learning_rate": 0.00019022464863944073, "epoch": 0.1447084927806541, "step": 4500 }, { "loss": 0.10994209051132202, "grad_norm": 0.8964745998382568, "learning_rate": 0.0001901808974409823, "epoch": 0.14503006720905554, "step": 4510 }, { "loss": 0.0991736888885498, "grad_norm": 0.5754009485244751, "learning_rate": 0.0001901370536075605, "epoch": 0.14535164163745698, "step": 4520 }, { "loss": 0.09913696646690369, "grad_norm": 0.744156539440155, "learning_rate": 0.00019009311718421213, "epoch": 0.14567321606585845, "step": 4530 }, { "loss": 0.1233474612236023, "grad_norm": 0.5341463685035706, "learning_rate": 0.0001900490882160693, "epoch": 0.1459947904942599, "step": 4540 }, { "loss": 0.08937983512878418, "grad_norm": 0.7251766324043274, "learning_rate": 0.00019000496674835915, "epoch": 0.14631636492266134, "step": 4550 }, { "loss": 0.09609851837158204, "grad_norm": 0.5500466823577881, "learning_rate": 0.00018996075282640379, "epoch": 0.1466379393510628, "step": 4560 }, { "loss": 0.11425167322158813, "grad_norm": 1.043576717376709, "learning_rate": 0.00018991644649562028, "epoch": 0.14695951377946426, "step": 4570 }, { "loss": 0.11258641481399537, "grad_norm": 0.4216415286064148, "learning_rate": 0.00018987204780152072, "epoch": 0.1472810882078657, "step": 4580 }, { "loss": 0.12220112085342408, "grad_norm": 1.0332443714141846, "learning_rate": 0.000189827556789712, "epoch": 0.14760266263626717, "step": 4590 }, { "loss": 0.11982320547103882, "grad_norm": 1.0259549617767334, "learning_rate": 0.0001897829735058958, "epoch": 0.14792423706466862, "step": 4600 }, { "loss": 0.10923948287963867, "grad_norm": 0.7364688515663147, "learning_rate": 0.00018973829799586868, "epoch": 0.14824581149307006, "step": 4610 }, { "loss": 0.10167646408081055, "grad_norm": 0.8405701518058777, "learning_rate": 0.00018969353030552188, "epoch": 0.14856738592147153, "step": 4620 }, { "loss": 0.09561271667480468, "grad_norm": 0.8111253976821899, "learning_rate": 0.00018964867048084134, "epoch": 0.14888896034987298, "step": 4630 }, { "loss": 0.10397641658782959, "grad_norm": 0.8518447279930115, "learning_rate": 0.00018960371856790764, "epoch": 0.14921053477827442, "step": 4640 }, { "loss": 0.12073606252670288, "grad_norm": 0.766708493232727, "learning_rate": 0.00018955867461289597, "epoch": 0.1495321092066759, "step": 4650 }, { "loss": 0.1148116946220398, "grad_norm": 0.5033470392227173, "learning_rate": 0.00018951353866207603, "epoch": 0.14985368363507734, "step": 4660 }, { "eval_loss": 0.10172691941261292, "eval_runtime": 34.399, "eval_samples_per_second": 146.109, "eval_steps_per_second": 36.542, "epoch": 0.14991799852075763, "step": 4662 }, { "loss": 0.1416254758834839, "grad_norm": 0.5513647198677063, "learning_rate": 0.00018946831076181204, "epoch": 0.15017525806347878, "step": 4670 }, { "loss": 0.1062854290008545, "grad_norm": 0.40923750400543213, "learning_rate": 0.00018942299095856267, "epoch": 0.15049683249188026, "step": 4680 }, { "loss": 0.0943223237991333, "grad_norm": 0.5009618401527405, "learning_rate": 0.00018937757929888104, "epoch": 0.1508184069202817, "step": 4690 }, { "loss": 0.11033749580383301, "grad_norm": 0.3853249251842499, "learning_rate": 0.0001893320758294146, "epoch": 0.15113998134868314, "step": 4700 }, { "loss": 0.0948384165763855, "grad_norm": 0.8394500613212585, "learning_rate": 0.00018928648059690498, "epoch": 0.15146155577708462, "step": 4710 }, { "loss": 0.09225496649742126, "grad_norm": 1.1599781513214111, "learning_rate": 0.00018924079364818826, "epoch": 0.15178313020548606, "step": 4720 }, { "loss": 0.13177945613861083, "grad_norm": 0.8892496228218079, "learning_rate": 0.0001891950150301947, "epoch": 0.1521047046338875, "step": 4730 }, { "loss": 0.09583035111427307, "grad_norm": 0.7567657828330994, "learning_rate": 0.0001891491447899486, "epoch": 0.15242627906228898, "step": 4740 }, { "loss": 0.08644075989723206, "grad_norm": 0.488677442073822, "learning_rate": 0.00018910318297456845, "epoch": 0.15274785349069042, "step": 4750 }, { "loss": 0.09413954019546508, "grad_norm": 0.5898675918579102, "learning_rate": 0.00018905712963126684, "epoch": 0.15306942791909187, "step": 4760 }, { "loss": 0.11096861362457275, "grad_norm": 1.2141278982162476, "learning_rate": 0.00018901098480735037, "epoch": 0.15339100234749334, "step": 4770 }, { "loss": 0.09794067144393921, "grad_norm": 0.6396763920783997, "learning_rate": 0.00018896474855021955, "epoch": 0.15371257677589478, "step": 4780 }, { "loss": 0.11338918209075928, "grad_norm": 0.7813658714294434, "learning_rate": 0.00018891842090736888, "epoch": 0.15403415120429623, "step": 4790 }, { "loss": 0.10229338407516479, "grad_norm": 0.5102593898773193, "learning_rate": 0.00018887200192638668, "epoch": 0.1543557256326977, "step": 4800 }, { "loss": 0.11056987047195435, "grad_norm": 0.933292031288147, "learning_rate": 0.0001888254916549551, "epoch": 0.15467730006109914, "step": 4810 }, { "loss": 0.11467688083648682, "grad_norm": 0.8504388928413391, "learning_rate": 0.00018877889014085015, "epoch": 0.1549988744895006, "step": 4820 }, { "loss": 0.10686609745025635, "grad_norm": 0.7789998054504395, "learning_rate": 0.00018873219743194145, "epoch": 0.15532044891790206, "step": 4830 }, { "loss": 0.08134291768074035, "grad_norm": 0.5356742143630981, "learning_rate": 0.00018868541357619235, "epoch": 0.1556420233463035, "step": 4840 }, { "loss": 0.12447811365127563, "grad_norm": 0.6773043870925903, "learning_rate": 0.0001886385386216598, "epoch": 0.15596359777470495, "step": 4850 }, { "loss": 0.1055222988128662, "grad_norm": 0.5099416375160217, "learning_rate": 0.0001885915726164944, "epoch": 0.15628517220310642, "step": 4860 }, { "loss": 0.11184096336364746, "grad_norm": 1.135096549987793, "learning_rate": 0.00018854451560894016, "epoch": 0.15660674663150786, "step": 4870 }, { "loss": 0.11401809453964233, "grad_norm": 0.5366411805152893, "learning_rate": 0.00018849736764733468, "epoch": 0.1569283210599093, "step": 4880 }, { "loss": 0.09933592081069946, "grad_norm": 0.5675321221351624, "learning_rate": 0.00018845012878010895, "epoch": 0.15724989548831078, "step": 4890 }, { "loss": 0.09386941194534301, "grad_norm": 0.517961323261261, "learning_rate": 0.00018840279905578733, "epoch": 0.15757146991671223, "step": 4900 }, { "loss": 0.11577614545822143, "grad_norm": 0.9143649339675903, "learning_rate": 0.0001883553785229875, "epoch": 0.15789304434511367, "step": 4910 }, { "loss": 0.08844751715660096, "grad_norm": 0.6606913805007935, "learning_rate": 0.00018830786723042042, "epoch": 0.15821461877351514, "step": 4920 }, { "loss": 0.09490571618080139, "grad_norm": 0.6583348512649536, "learning_rate": 0.00018826026522689033, "epoch": 0.1585361932019166, "step": 4930 }, { "loss": 0.09571805000305175, "grad_norm": 0.8969154953956604, "learning_rate": 0.00018821257256129462, "epoch": 0.15885776763031803, "step": 4940 }, { "loss": 0.10312877893447876, "grad_norm": 0.32090887427330017, "learning_rate": 0.00018816478928262377, "epoch": 0.1591793420587195, "step": 4950 }, { "loss": 0.09461336731910705, "grad_norm": 0.5978054404258728, "learning_rate": 0.00018811691543996136, "epoch": 0.15950091648712095, "step": 4960 }, { "loss": 0.09903188943862914, "grad_norm": 0.30837303400039673, "learning_rate": 0.000188068951082484, "epoch": 0.1598224909155224, "step": 4970 }, { "loss": 0.1048235297203064, "grad_norm": 0.667777419090271, "learning_rate": 0.00018802089625946135, "epoch": 0.16014406534392386, "step": 4980 }, { "loss": 0.0945176899433136, "grad_norm": 0.5308945178985596, "learning_rate": 0.0001879727510202559, "epoch": 0.1604656397723253, "step": 4990 }, { "loss": 0.11314963102340699, "grad_norm": 0.8820076584815979, "learning_rate": 0.00018792451541432302, "epoch": 0.16078721420072675, "step": 5000 }, { "loss": 0.09827512502670288, "grad_norm": 0.4974328875541687, "learning_rate": 0.000187876189491211, "epoch": 0.16110878862912822, "step": 5010 }, { "loss": 0.14028635025024414, "grad_norm": 0.7012617588043213, "learning_rate": 0.00018782777330056082, "epoch": 0.16143036305752967, "step": 5020 }, { "loss": 0.09744994044303894, "grad_norm": 0.5276012420654297, "learning_rate": 0.00018777926689210619, "epoch": 0.1617519374859311, "step": 5030 }, { "loss": 0.11486378908157349, "grad_norm": 0.6841879487037659, "learning_rate": 0.0001877306703156735, "epoch": 0.16207351191433259, "step": 5040 }, { "loss": 0.11499395370483398, "grad_norm": 0.5109455585479736, "learning_rate": 0.0001876819836211818, "epoch": 0.16239508634273403, "step": 5050 }, { "loss": 0.11083143949508667, "grad_norm": 0.9456536769866943, "learning_rate": 0.00018763320685864264, "epoch": 0.16271666077113547, "step": 5060 }, { "loss": 0.09588128924369813, "grad_norm": 0.7423805594444275, "learning_rate": 0.00018758434007816018, "epoch": 0.16303823519953695, "step": 5070 }, { "loss": 0.11503241062164307, "grad_norm": 0.44641563296318054, "learning_rate": 0.000187535383329931, "epoch": 0.1633598096279384, "step": 5080 }, { "loss": 0.12453733682632447, "grad_norm": 1.7256242036819458, "learning_rate": 0.00018748633666424404, "epoch": 0.16368138405633983, "step": 5090 }, { "loss": 0.13549509048461914, "grad_norm": 0.8264243006706238, "learning_rate": 0.00018743720013148068, "epoch": 0.1640029584847413, "step": 5100 }, { "loss": 0.13447096347808837, "grad_norm": 0.44616052508354187, "learning_rate": 0.0001873879737821146, "epoch": 0.16432453291314275, "step": 5110 }, { "loss": 0.11622662544250488, "grad_norm": 0.5612630248069763, "learning_rate": 0.00018733865766671175, "epoch": 0.1646461073415442, "step": 5120 }, { "loss": 0.10126912593841553, "grad_norm": 0.6290414333343506, "learning_rate": 0.00018728925183593024, "epoch": 0.16496768176994567, "step": 5130 }, { "loss": 0.11963098049163819, "grad_norm": 0.7760690450668335, "learning_rate": 0.00018723975634052037, "epoch": 0.1652892561983471, "step": 5140 }, { "loss": 0.13792331218719484, "grad_norm": 1.2590798139572144, "learning_rate": 0.00018719017123132456, "epoch": 0.16561083062674856, "step": 5150 }, { "loss": 0.09700539112091064, "grad_norm": 0.6620594263076782, "learning_rate": 0.0001871404965592772, "epoch": 0.16593240505515, "step": 5160 }, { "loss": 0.0915036141872406, "grad_norm": 0.529654324054718, "learning_rate": 0.00018709073237540487, "epoch": 0.16625397948355147, "step": 5170 }, { "loss": 0.11442764997482299, "grad_norm": 0.749620258808136, "learning_rate": 0.00018704087873082584, "epoch": 0.16657555391195292, "step": 5180 }, { "loss": 0.1019809365272522, "grad_norm": 0.9167417883872986, "learning_rate": 0.0001869909356767505, "epoch": 0.16689712834035436, "step": 5190 }, { "loss": 0.08411781787872315, "grad_norm": 0.7064757943153381, "learning_rate": 0.00018694090326448098, "epoch": 0.16721870276875583, "step": 5200 }, { "loss": 0.09732612371444702, "grad_norm": 0.8301167488098145, "learning_rate": 0.00018689078154541112, "epoch": 0.16754027719715728, "step": 5210 }, { "loss": 0.09780767560005188, "grad_norm": 0.8980233073234558, "learning_rate": 0.0001868405705710267, "epoch": 0.16786185162555872, "step": 5220 }, { "loss": 0.10536625385284423, "grad_norm": 0.7485562562942505, "learning_rate": 0.00018679027039290497, "epoch": 0.1681834260539602, "step": 5230 }, { "loss": 0.11484253406524658, "grad_norm": 0.8951029777526855, "learning_rate": 0.000186739881062715, "epoch": 0.16850500048236164, "step": 5240 }, { "loss": 0.12118645906448364, "grad_norm": 0.6397461295127869, "learning_rate": 0.00018668940263221734, "epoch": 0.16882657491076308, "step": 5250 }, { "loss": 0.09094181060791015, "grad_norm": 0.5097070932388306, "learning_rate": 0.000186638835153264, "epoch": 0.16914814933916456, "step": 5260 }, { "loss": 0.1319599747657776, "grad_norm": 0.5678567886352539, "learning_rate": 0.0001865881786777986, "epoch": 0.169469723767566, "step": 5270 }, { "loss": 0.10581289529800415, "grad_norm": 0.7563341856002808, "learning_rate": 0.0001865374332578561, "epoch": 0.16979129819596744, "step": 5280 }, { "loss": 0.10248388051986694, "grad_norm": 0.7660218477249146, "learning_rate": 0.00018648659894556283, "epoch": 0.17011287262436892, "step": 5290 }, { "loss": 0.09813046455383301, "grad_norm": 0.33348825573921204, "learning_rate": 0.00018643567579313647, "epoch": 0.17043444705277036, "step": 5300 }, { "loss": 0.10771477222442627, "grad_norm": 0.7716835737228394, "learning_rate": 0.00018638466385288588, "epoch": 0.1707560214811718, "step": 5310 }, { "loss": 0.08386213183403016, "grad_norm": 0.3895624876022339, "learning_rate": 0.00018633356317721125, "epoch": 0.17107759590957328, "step": 5320 }, { "loss": 0.09969307780265808, "grad_norm": 0.5911557078361511, "learning_rate": 0.00018628237381860377, "epoch": 0.17139917033797472, "step": 5330 }, { "loss": 0.10970526933670044, "grad_norm": 0.7556646466255188, "learning_rate": 0.0001862310958296458, "epoch": 0.17172074476637617, "step": 5340 }, { "loss": 0.10416722297668457, "grad_norm": 0.6202457547187805, "learning_rate": 0.0001861797292630108, "epoch": 0.17204231919477764, "step": 5350 }, { "loss": 0.09194405674934387, "grad_norm": 0.5211172699928284, "learning_rate": 0.00018612827417146315, "epoch": 0.17236389362317908, "step": 5360 }, { "loss": 0.12105774879455566, "grad_norm": 0.9380770921707153, "learning_rate": 0.00018607673060785812, "epoch": 0.17268546805158053, "step": 5370 }, { "loss": 0.08323792219161988, "grad_norm": 0.9509512782096863, "learning_rate": 0.00018602509862514194, "epoch": 0.173007042479982, "step": 5380 }, { "loss": 0.09783884286880493, "grad_norm": 0.532935619354248, "learning_rate": 0.00018597337827635165, "epoch": 0.17332861690838344, "step": 5390 }, { "loss": 0.11766867637634278, "grad_norm": 0.4933967590332031, "learning_rate": 0.00018592156961461508, "epoch": 0.1736501913367849, "step": 5400 }, { "loss": 0.10667554140090943, "grad_norm": 0.463371217250824, "learning_rate": 0.0001858696726931507, "epoch": 0.17397176576518636, "step": 5410 }, { "loss": 0.1050226092338562, "grad_norm": 1.000925064086914, "learning_rate": 0.00018581768756526762, "epoch": 0.1742933401935878, "step": 5420 }, { "loss": 0.0907900094985962, "grad_norm": 0.8843211531639099, "learning_rate": 0.00018576561428436576, "epoch": 0.17461491462198925, "step": 5430 }, { "loss": 0.09724093675613403, "grad_norm": 0.581278383731842, "learning_rate": 0.00018571345290393539, "epoch": 0.17493648905039072, "step": 5440 }, { "loss": 0.10390642881393433, "grad_norm": 0.5287315249443054, "learning_rate": 0.00018566120347755734, "epoch": 0.17525806347879216, "step": 5450 }, { "loss": 0.11524388790130616, "grad_norm": 0.5281156301498413, "learning_rate": 0.0001856088660589029, "epoch": 0.1755796379071936, "step": 5460 }, { "loss": 0.10431855916976929, "grad_norm": 0.7305079698562622, "learning_rate": 0.0001855564407017337, "epoch": 0.17590121233559508, "step": 5470 }, { "loss": 0.10620676279067993, "grad_norm": 0.29204392433166504, "learning_rate": 0.00018550392745990177, "epoch": 0.17622278676399653, "step": 5480 }, { "loss": 0.11840394735336304, "grad_norm": 0.6711931824684143, "learning_rate": 0.00018545132638734935, "epoch": 0.17654436119239797, "step": 5490 }, { "loss": 0.11159731149673462, "grad_norm": 0.7318800687789917, "learning_rate": 0.00018539863753810898, "epoch": 0.17686593562079944, "step": 5500 }, { "loss": 0.09772720336914062, "grad_norm": 0.5916475057601929, "learning_rate": 0.00018534586096630325, "epoch": 0.17718751004920089, "step": 5510 }, { "loss": 0.11480109691619873, "grad_norm": 0.5805314183235168, "learning_rate": 0.00018529299672614497, "epoch": 0.17750908447760233, "step": 5520 }, { "loss": 0.12061715126037598, "grad_norm": 0.8949525952339172, "learning_rate": 0.00018524004487193696, "epoch": 0.1778306589060038, "step": 5530 }, { "loss": 0.12319254875183105, "grad_norm": 0.7165164351463318, "learning_rate": 0.000185187005458072, "epoch": 0.17815223333440525, "step": 5540 }, { "loss": 0.09097777605056763, "grad_norm": 0.3125317692756653, "learning_rate": 0.00018513387853903287, "epoch": 0.1784738077628067, "step": 5550 }, { "loss": 0.12090181112289429, "grad_norm": 0.8263435363769531, "learning_rate": 0.0001850806641693922, "epoch": 0.17879538219120816, "step": 5560 }, { "loss": 0.09867244362831115, "grad_norm": 0.9423380494117737, "learning_rate": 0.00018502736240381251, "epoch": 0.1791169566196096, "step": 5570 }, { "loss": 0.0943190574645996, "grad_norm": 0.5803310871124268, "learning_rate": 0.000184973973297046, "epoch": 0.17943853104801105, "step": 5580 }, { "loss": 0.11495518684387207, "grad_norm": 0.9183040857315063, "learning_rate": 0.0001849204969039347, "epoch": 0.17976010547641252, "step": 5590 }, { "loss": 0.11546754837036133, "grad_norm": 0.6139799952507019, "learning_rate": 0.0001848669332794102, "epoch": 0.18008167990481397, "step": 5600 }, { "loss": 0.10671877861022949, "grad_norm": 0.5461157560348511, "learning_rate": 0.0001848132824784937, "epoch": 0.1804032543332154, "step": 5610 }, { "loss": 0.1120229721069336, "grad_norm": 0.9857230186462402, "learning_rate": 0.00018475954455629605, "epoch": 0.18072482876161688, "step": 5620 }, { "loss": 0.10375741720199586, "grad_norm": 0.7333449721336365, "learning_rate": 0.0001847057195680175, "epoch": 0.18104640319001833, "step": 5630 }, { "loss": 0.14358291625976563, "grad_norm": 0.8412182331085205, "learning_rate": 0.00018465180756894775, "epoch": 0.18136797761841977, "step": 5640 }, { "loss": 0.09533635377883912, "grad_norm": 1.1052337884902954, "learning_rate": 0.0001845978086144659, "epoch": 0.18168955204682125, "step": 5650 }, { "loss": 0.09434363842010499, "grad_norm": 0.8389447331428528, "learning_rate": 0.0001845437227600404, "epoch": 0.1820111264752227, "step": 5660 }, { "loss": 0.11739598512649536, "grad_norm": 0.46825969219207764, "learning_rate": 0.00018448955006122885, "epoch": 0.18233270090362413, "step": 5670 }, { "loss": 0.10412952899932862, "grad_norm": 0.725148618221283, "learning_rate": 0.00018443529057367817, "epoch": 0.1826542753320256, "step": 5680 }, { "loss": 0.11758382320404052, "grad_norm": 0.6468080878257751, "learning_rate": 0.00018438094435312445, "epoch": 0.18297584976042705, "step": 5690 }, { "loss": 0.09282848238945007, "grad_norm": 0.8106268644332886, "learning_rate": 0.00018432651145539272, "epoch": 0.1832974241888285, "step": 5700 }, { "loss": 0.09028403759002686, "grad_norm": 0.5799915790557861, "learning_rate": 0.0001842719919363972, "epoch": 0.18361899861722997, "step": 5710 }, { "loss": 0.09892613887786865, "grad_norm": 0.7924032211303711, "learning_rate": 0.00018421738585214104, "epoch": 0.1839405730456314, "step": 5720 }, { "loss": 0.1065057396888733, "grad_norm": 0.7232165932655334, "learning_rate": 0.00018416269325871628, "epoch": 0.18426214747403286, "step": 5730 }, { "loss": 0.10866960287094116, "grad_norm": 0.753584086894989, "learning_rate": 0.00018410791421230385, "epoch": 0.18458372190243433, "step": 5740 }, { "loss": 0.11250456571578979, "grad_norm": 0.6191846132278442, "learning_rate": 0.00018405304876917348, "epoch": 0.18490529633083577, "step": 5750 }, { "loss": 0.08145703673362732, "grad_norm": 0.817142903804779, "learning_rate": 0.0001839980969856837, "epoch": 0.18522687075923722, "step": 5760 }, { "loss": 0.10531235933303833, "grad_norm": 0.5584696531295776, "learning_rate": 0.0001839430589182816, "epoch": 0.1855484451876387, "step": 5770 }, { "loss": 0.10580016374588012, "grad_norm": 0.3925810754299164, "learning_rate": 0.000183887934623503, "epoch": 0.18587001961604013, "step": 5780 }, { "loss": 0.09895262718200684, "grad_norm": 0.5467655658721924, "learning_rate": 0.00018383272415797234, "epoch": 0.18619159404444158, "step": 5790 }, { "loss": 0.13267139196395875, "grad_norm": 1.0078212022781372, "learning_rate": 0.00018377742757840244, "epoch": 0.18651316847284305, "step": 5800 }, { "loss": 0.11166650056838989, "grad_norm": 0.737362802028656, "learning_rate": 0.00018372204494159465, "epoch": 0.1868347429012445, "step": 5810 }, { "loss": 0.10281800031661988, "grad_norm": 1.0692893266677856, "learning_rate": 0.00018366657630443882, "epoch": 0.18715631732964594, "step": 5820 }, { "loss": 0.0977216124534607, "grad_norm": 0.6687707304954529, "learning_rate": 0.00018361102172391293, "epoch": 0.1874778917580474, "step": 5830 }, { "loss": 0.10584867000579834, "grad_norm": 0.7733855843544006, "learning_rate": 0.00018355538125708334, "epoch": 0.18779946618644885, "step": 5840 }, { "loss": 0.09958687424659729, "grad_norm": 0.7798100113868713, "learning_rate": 0.00018349965496110472, "epoch": 0.1881210406148503, "step": 5850 }, { "loss": 0.10751545429229736, "grad_norm": 0.6445668339729309, "learning_rate": 0.00018344384289321978, "epoch": 0.18844261504325177, "step": 5860 }, { "loss": 0.09561465978622437, "grad_norm": 0.7363864183425903, "learning_rate": 0.00018338794511075943, "epoch": 0.18876418947165322, "step": 5870 }, { "loss": 0.11222798824310302, "grad_norm": 0.9353572726249695, "learning_rate": 0.00018333196167114255, "epoch": 0.18908576390005466, "step": 5880 }, { "loss": 0.09195201396942139, "grad_norm": 0.7091829180717468, "learning_rate": 0.00018327589263187602, "epoch": 0.18940733832845613, "step": 5890 }, { "loss": 0.07509946227073669, "grad_norm": 0.6560493111610413, "learning_rate": 0.00018321973805055474, "epoch": 0.18972891275685758, "step": 5900 }, { "loss": 0.09939819574356079, "grad_norm": 0.44442832469940186, "learning_rate": 0.00018316349798486132, "epoch": 0.19005048718525902, "step": 5910 }, { "loss": 0.09033665060997009, "grad_norm": 0.7245857119560242, "learning_rate": 0.00018310717249256633, "epoch": 0.1903720616136605, "step": 5920 }, { "loss": 0.13576501607894897, "grad_norm": 0.8852182030677795, "learning_rate": 0.00018305076163152798, "epoch": 0.19069363604206194, "step": 5930 }, { "loss": 0.0938784658908844, "grad_norm": 0.6078718900680542, "learning_rate": 0.0001829942654596923, "epoch": 0.19101521047046338, "step": 5940 }, { "loss": 0.11368472576141357, "grad_norm": 0.5211871862411499, "learning_rate": 0.00018293768403509282, "epoch": 0.19133678489886485, "step": 5950 }, { "loss": 0.08751682639122009, "grad_norm": 0.5084543824195862, "learning_rate": 0.00018288101741585072, "epoch": 0.1916583593272663, "step": 5960 }, { "loss": 0.12171189785003662, "grad_norm": 0.9775353074073792, "learning_rate": 0.00018282426566017463, "epoch": 0.19197993375566774, "step": 5970 }, { "loss": 0.09461681842803955, "grad_norm": 0.6826309561729431, "learning_rate": 0.00018276742882636073, "epoch": 0.19230150818406921, "step": 5980 }, { "loss": 0.12977845668792726, "grad_norm": 0.9831529855728149, "learning_rate": 0.0001827105069727925, "epoch": 0.19262308261247066, "step": 5990 }, { "loss": 0.11812064647674561, "grad_norm": 0.4797765910625458, "learning_rate": 0.00018265350015794082, "epoch": 0.1929446570408721, "step": 6000 }, { "loss": 0.08487967252731324, "grad_norm": 0.6206749081611633, "learning_rate": 0.00018259640844036382, "epoch": 0.19326623146927357, "step": 6010 }, { "loss": 0.10489945411682129, "grad_norm": 0.8495069146156311, "learning_rate": 0.00018253923187870678, "epoch": 0.19358780589767502, "step": 6020 }, { "loss": 0.09415996074676514, "grad_norm": 0.512051522731781, "learning_rate": 0.00018248197053170225, "epoch": 0.19390938032607646, "step": 6030 }, { "loss": 0.10890600681304932, "grad_norm": 0.696121096611023, "learning_rate": 0.00018242462445816976, "epoch": 0.19423095475447794, "step": 6040 }, { "loss": 0.12515428066253662, "grad_norm": 0.6889166235923767, "learning_rate": 0.000182367193717016, "epoch": 0.19455252918287938, "step": 6050 }, { "loss": 0.12075965404510498, "grad_norm": 0.6734789609909058, "learning_rate": 0.0001823096783672345, "epoch": 0.19487410361128082, "step": 6060 }, { "loss": 0.10391931533813477, "grad_norm": 1.2357184886932373, "learning_rate": 0.00018225207846790572, "epoch": 0.1951956780396823, "step": 6070 }, { "loss": 0.12936931848526, "grad_norm": 0.5635527968406677, "learning_rate": 0.00018219439407819713, "epoch": 0.19551725246808374, "step": 6080 }, { "loss": 0.10976414680480957, "grad_norm": 0.9435555338859558, "learning_rate": 0.00018213662525736274, "epoch": 0.19583882689648519, "step": 6090 }, { "loss": 0.09746204018592834, "grad_norm": 0.4754216969013214, "learning_rate": 0.0001820787720647435, "epoch": 0.19616040132488666, "step": 6100 }, { "loss": 0.10500679016113282, "grad_norm": 0.4885876476764679, "learning_rate": 0.00018202083455976694, "epoch": 0.1964819757532881, "step": 6110 }, { "loss": 0.09986729621887207, "grad_norm": 0.674632728099823, "learning_rate": 0.00018196281280194714, "epoch": 0.19680355018168955, "step": 6120 }, { "loss": 0.10967297554016113, "grad_norm": 0.794371485710144, "learning_rate": 0.00018190470685088483, "epoch": 0.19712512461009102, "step": 6130 }, { "loss": 0.0897097647190094, "grad_norm": 0.5588971376419067, "learning_rate": 0.0001818465167662672, "epoch": 0.19744669903849246, "step": 6140 }, { "loss": 0.10592899322509766, "grad_norm": 1.0722829103469849, "learning_rate": 0.00018178824260786782, "epoch": 0.1977682734668939, "step": 6150 }, { "loss": 0.10921781063079834, "grad_norm": 0.8634899854660034, "learning_rate": 0.0001817298844355466, "epoch": 0.19808984789529538, "step": 6160 }, { "loss": 0.08706367015838623, "grad_norm": 0.7577278017997742, "learning_rate": 0.0001816714423092499, "epoch": 0.19841142232369682, "step": 6170 }, { "loss": 0.10604641437530518, "grad_norm": 0.6018843054771423, "learning_rate": 0.00018161291628901013, "epoch": 0.19873299675209827, "step": 6180 }, { "loss": 0.1325182557106018, "grad_norm": 0.8310794830322266, "learning_rate": 0.000181554306434946, "epoch": 0.19905457118049974, "step": 6190 }, { "loss": 0.09288270473480224, "grad_norm": 0.479806512594223, "learning_rate": 0.0001814956128072623, "epoch": 0.19937614560890118, "step": 6200 }, { "loss": 0.11677355766296386, "grad_norm": 0.6561117768287659, "learning_rate": 0.0001814368354662498, "epoch": 0.19969772003730263, "step": 6210 }, { "eval_loss": 0.09828303754329681, "eval_runtime": 34.4157, "eval_samples_per_second": 146.038, "eval_steps_per_second": 36.524, "epoch": 0.1998906646943435, "step": 6216 }, { "loss": 0.10979170799255371, "grad_norm": 0.6072927713394165, "learning_rate": 0.00018137797447228545, "epoch": 0.2000192944657041, "step": 6220 }, { "loss": 0.07831120491027832, "grad_norm": 0.46518734097480774, "learning_rate": 0.00018131902988583192, "epoch": 0.20034086889410554, "step": 6230 }, { "loss": 0.0935763657093048, "grad_norm": 0.878280520439148, "learning_rate": 0.00018126000176743785, "epoch": 0.200662443322507, "step": 6240 }, { "loss": 0.08065707087516785, "grad_norm": 1.1618974208831787, "learning_rate": 0.0001812008901777377, "epoch": 0.20098401775090846, "step": 6250 }, { "loss": 0.10877575874328613, "grad_norm": 0.9426342844963074, "learning_rate": 0.00018114169517745163, "epoch": 0.2013055921793099, "step": 6260 }, { "loss": 0.09039924144744874, "grad_norm": 0.5820074677467346, "learning_rate": 0.0001810824168273855, "epoch": 0.20162716660771135, "step": 6270 }, { "loss": 0.10094892978668213, "grad_norm": 0.999288022518158, "learning_rate": 0.0001810230551884308, "epoch": 0.20194874103611282, "step": 6280 }, { "loss": 0.06847935914993286, "grad_norm": 0.4741264283657074, "learning_rate": 0.00018096361032156452, "epoch": 0.20227031546451427, "step": 6290 }, { "loss": 0.08855631351470947, "grad_norm": 0.5086546540260315, "learning_rate": 0.00018090408228784927, "epoch": 0.2025918898929157, "step": 6300 }, { "loss": 0.08249475955963134, "grad_norm": 0.7000566124916077, "learning_rate": 0.00018084447114843287, "epoch": 0.20291346432131716, "step": 6310 }, { "loss": 0.10006040334701538, "grad_norm": 0.7028230428695679, "learning_rate": 0.0001807847769645487, "epoch": 0.20323503874971863, "step": 6320 }, { "loss": 0.10323059558868408, "grad_norm": 0.9211930632591248, "learning_rate": 0.00018072499979751542, "epoch": 0.20355661317812007, "step": 6330 }, { "loss": 0.10563849210739136, "grad_norm": 0.547076940536499, "learning_rate": 0.0001806651397087369, "epoch": 0.20387818760652152, "step": 6340 }, { "loss": 0.10014694929122925, "grad_norm": 0.5817078351974487, "learning_rate": 0.00018060519675970214, "epoch": 0.204199762034923, "step": 6350 }, { "loss": 0.1062752366065979, "grad_norm": 1.991796851158142, "learning_rate": 0.00018054517101198528, "epoch": 0.20452133646332443, "step": 6360 }, { "loss": 0.10059846639633178, "grad_norm": 0.5084323883056641, "learning_rate": 0.00018048506252724557, "epoch": 0.20484291089172588, "step": 6370 }, { "loss": 0.06880200505256653, "grad_norm": 0.3965473771095276, "learning_rate": 0.0001804248713672272, "epoch": 0.20516448532012735, "step": 6380 }, { "loss": 0.0917474627494812, "grad_norm": 0.7427871823310852, "learning_rate": 0.0001803645975937593, "epoch": 0.2054860597485288, "step": 6390 }, { "loss": 0.10400476455688476, "grad_norm": 0.862004280090332, "learning_rate": 0.00018030424126875584, "epoch": 0.20580763417693024, "step": 6400 }, { "loss": 0.11660934686660766, "grad_norm": 0.8293169140815735, "learning_rate": 0.00018024380245421564, "epoch": 0.2061292086053317, "step": 6410 }, { "loss": 0.10720772743225097, "grad_norm": 0.5888752341270447, "learning_rate": 0.00018018328121222217, "epoch": 0.20645078303373315, "step": 6420 }, { "loss": 0.11179831027984619, "grad_norm": 0.6469850540161133, "learning_rate": 0.00018012267760494363, "epoch": 0.2067723574621346, "step": 6430 }, { "loss": 0.09034927487373352, "grad_norm": 0.4641270935535431, "learning_rate": 0.00018006199169463285, "epoch": 0.20709393189053607, "step": 6440 }, { "loss": 0.09115891456604004, "grad_norm": 0.683894693851471, "learning_rate": 0.0001800012235436271, "epoch": 0.20741550631893751, "step": 6450 }, { "loss": 0.10708479881286621, "grad_norm": 0.8873001337051392, "learning_rate": 0.0001799403732143483, "epoch": 0.20773708074733896, "step": 6460 }, { "loss": 0.08460823893547058, "grad_norm": 0.35685697197914124, "learning_rate": 0.0001798794407693026, "epoch": 0.20805865517574043, "step": 6470 }, { "loss": 0.12977834939956664, "grad_norm": 1.287055253982544, "learning_rate": 0.0001798184262710806, "epoch": 0.20838022960414188, "step": 6480 }, { "loss": 0.127907657623291, "grad_norm": 0.3801760673522949, "learning_rate": 0.00017975732978235716, "epoch": 0.20870180403254332, "step": 6490 }, { "loss": 0.11531537771224976, "grad_norm": 1.2351274490356445, "learning_rate": 0.00017969615136589141, "epoch": 0.2090233784609448, "step": 6500 }, { "loss": 0.10003373622894288, "grad_norm": 0.43053725361824036, "learning_rate": 0.00017963489108452656, "epoch": 0.20934495288934624, "step": 6510 }, { "loss": 0.11411433219909668, "grad_norm": 0.7325254678726196, "learning_rate": 0.00017957354900118992, "epoch": 0.20966652731774768, "step": 6520 }, { "loss": 0.10711169242858887, "grad_norm": 1.153060793876648, "learning_rate": 0.00017951212517889294, "epoch": 0.20998810174614915, "step": 6530 }, { "loss": 0.13314961194992064, "grad_norm": 0.879439115524292, "learning_rate": 0.00017945061968073088, "epoch": 0.2103096761745506, "step": 6540 }, { "loss": 0.10021822452545166, "grad_norm": 1.6467922925949097, "learning_rate": 0.000179389032569883, "epoch": 0.21063125060295204, "step": 6550 }, { "loss": 0.10448052883148193, "grad_norm": 0.8538579940795898, "learning_rate": 0.00017932736390961234, "epoch": 0.2109528250313535, "step": 6560 }, { "loss": 0.10208958387374878, "grad_norm": 0.7283279299736023, "learning_rate": 0.00017926561376326578, "epoch": 0.21127439945975496, "step": 6570 }, { "loss": 0.10935897827148437, "grad_norm": 1.3195074796676636, "learning_rate": 0.00017920378219427377, "epoch": 0.2115959738881564, "step": 6580 }, { "loss": 0.12804192304611206, "grad_norm": 1.063864827156067, "learning_rate": 0.00017914186926615058, "epoch": 0.21191754831655787, "step": 6590 }, { "loss": 0.10533335208892822, "grad_norm": 0.8524525165557861, "learning_rate": 0.00017907987504249388, "epoch": 0.21223912274495932, "step": 6600 }, { "loss": 0.07631253004074097, "grad_norm": 0.6262599229812622, "learning_rate": 0.00017901779958698499, "epoch": 0.21256069717336076, "step": 6610 }, { "loss": 0.08477730751037597, "grad_norm": 0.8015865683555603, "learning_rate": 0.00017895564296338857, "epoch": 0.21288227160176224, "step": 6620 }, { "loss": 0.0852261483669281, "grad_norm": 0.7002324461936951, "learning_rate": 0.00017889340523555267, "epoch": 0.21320384603016368, "step": 6630 }, { "loss": 0.12231900691986083, "grad_norm": 0.36369815468788147, "learning_rate": 0.00017883108646740868, "epoch": 0.21352542045856512, "step": 6640 }, { "loss": 0.11484540700912475, "grad_norm": 0.6854612827301025, "learning_rate": 0.0001787686867229713, "epoch": 0.2138469948869666, "step": 6650 }, { "loss": 0.11369551420211792, "grad_norm": 0.6591107845306396, "learning_rate": 0.00017870620606633825, "epoch": 0.21416856931536804, "step": 6660 }, { "loss": 0.12295722961425781, "grad_norm": 0.40087050199508667, "learning_rate": 0.00017864364456169054, "epoch": 0.21449014374376948, "step": 6670 }, { "loss": 0.10497041940689086, "grad_norm": 0.7661556601524353, "learning_rate": 0.00017858100227329207, "epoch": 0.21481171817217096, "step": 6680 }, { "loss": 0.1099507451057434, "grad_norm": 0.7987531423568726, "learning_rate": 0.00017851827926548982, "epoch": 0.2151332926005724, "step": 6690 }, { "loss": 0.10177501440048217, "grad_norm": 0.8020938634872437, "learning_rate": 0.00017845547560271365, "epoch": 0.21545486702897385, "step": 6700 }, { "loss": 0.09196376204490661, "grad_norm": 0.4774525761604309, "learning_rate": 0.0001783925913494763, "epoch": 0.21577644145737532, "step": 6710 }, { "loss": 0.11327618360519409, "grad_norm": 0.4038717746734619, "learning_rate": 0.00017832962657037327, "epoch": 0.21609801588577676, "step": 6720 }, { "loss": 0.10533188581466675, "grad_norm": 1.034292459487915, "learning_rate": 0.0001782665813300828, "epoch": 0.2164195903141782, "step": 6730 }, { "loss": 0.1124314546585083, "grad_norm": 0.9090122580528259, "learning_rate": 0.0001782034556933657, "epoch": 0.21674116474257968, "step": 6740 }, { "loss": 0.13018221855163575, "grad_norm": 1.0018212795257568, "learning_rate": 0.00017814024972506544, "epoch": 0.21706273917098112, "step": 6750 }, { "loss": 0.12027702331542969, "grad_norm": 0.7981282472610474, "learning_rate": 0.00017807696349010808, "epoch": 0.21738431359938257, "step": 6760 }, { "loss": 0.08453170061111451, "grad_norm": 0.5748383402824402, "learning_rate": 0.00017801359705350197, "epoch": 0.21770588802778404, "step": 6770 }, { "loss": 0.09876747727394104, "grad_norm": 0.7399095892906189, "learning_rate": 0.0001779501504803379, "epoch": 0.21802746245618548, "step": 6780 }, { "loss": 0.10025395154953003, "grad_norm": 0.6438655853271484, "learning_rate": 0.00017788662383578905, "epoch": 0.21834903688458693, "step": 6790 }, { "loss": 0.11686220169067382, "grad_norm": 0.4488239884376526, "learning_rate": 0.00017782301718511075, "epoch": 0.2186706113129884, "step": 6800 }, { "loss": 0.11589747667312622, "grad_norm": 1.082696557044983, "learning_rate": 0.00017775933059364061, "epoch": 0.21899218574138984, "step": 6810 }, { "loss": 0.08020769357681275, "grad_norm": 0.6219139099121094, "learning_rate": 0.0001776955641267983, "epoch": 0.2193137601697913, "step": 6820 }, { "loss": 0.1270586371421814, "grad_norm": 0.6280378103256226, "learning_rate": 0.00017763171785008545, "epoch": 0.21963533459819276, "step": 6830 }, { "loss": 0.09877665042877197, "grad_norm": 0.6493573784828186, "learning_rate": 0.0001775677918290859, "epoch": 0.2199569090265942, "step": 6840 }, { "loss": 0.08704429864883423, "grad_norm": 0.637754499912262, "learning_rate": 0.00017750378612946518, "epoch": 0.22027848345499565, "step": 6850 }, { "loss": 0.10899631977081299, "grad_norm": 0.6064416766166687, "learning_rate": 0.0001774397008169708, "epoch": 0.22060005788339712, "step": 6860 }, { "loss": 0.09086751341819763, "grad_norm": 0.6109260320663452, "learning_rate": 0.00017737553595743201, "epoch": 0.22092163231179857, "step": 6870 }, { "loss": 0.11162760257720947, "grad_norm": 0.8633526563644409, "learning_rate": 0.00017731129161675972, "epoch": 0.2212432067402, "step": 6880 }, { "loss": 0.10341193675994872, "grad_norm": 0.6742216944694519, "learning_rate": 0.00017724696786094662, "epoch": 0.22156478116860148, "step": 6890 }, { "loss": 0.08535621166229249, "grad_norm": 0.6198155283927917, "learning_rate": 0.0001771825647560668, "epoch": 0.22188635559700293, "step": 6900 }, { "loss": 0.10448358058929444, "grad_norm": 0.7120717763900757, "learning_rate": 0.00017711808236827597, "epoch": 0.22220793002540437, "step": 6910 }, { "loss": 0.08835526704788207, "grad_norm": 0.6049490571022034, "learning_rate": 0.00017705352076381127, "epoch": 0.22252950445380584, "step": 6920 }, { "loss": 0.0722814679145813, "grad_norm": 0.3987206518650055, "learning_rate": 0.00017698888000899124, "epoch": 0.2228510788822073, "step": 6930 }, { "loss": 0.10768955945968628, "grad_norm": 0.5643488764762878, "learning_rate": 0.00017692416017021562, "epoch": 0.22317265331060873, "step": 6940 }, { "loss": 0.12390257120132446, "grad_norm": 0.39716973900794983, "learning_rate": 0.00017685936131396548, "epoch": 0.2234942277390102, "step": 6950 }, { "loss": 0.08557771444320679, "grad_norm": 0.38591131567955017, "learning_rate": 0.00017679448350680304, "epoch": 0.22381580216741165, "step": 6960 }, { "loss": 0.08185508251190185, "grad_norm": 0.9277451634407043, "learning_rate": 0.00017672952681537155, "epoch": 0.2241373765958131, "step": 6970 }, { "loss": 0.11774744987487792, "grad_norm": 0.5688632726669312, "learning_rate": 0.00017666449130639543, "epoch": 0.22445895102421456, "step": 6980 }, { "loss": 0.10373042821884156, "grad_norm": 0.4881923496723175, "learning_rate": 0.00017659937704667992, "epoch": 0.224780525452616, "step": 6990 }, { "loss": 0.1025202989578247, "grad_norm": 0.761814296245575, "learning_rate": 0.00017653418410311126, "epoch": 0.22510209988101745, "step": 7000 }, { "loss": 0.10153430700302124, "grad_norm": 1.0617280006408691, "learning_rate": 0.00017646891254265645, "epoch": 0.22542367430941893, "step": 7010 }, { "loss": 0.08137912154197693, "grad_norm": 0.5417892932891846, "learning_rate": 0.00017640356243236327, "epoch": 0.22574524873782037, "step": 7020 }, { "loss": 0.11546515226364136, "grad_norm": 0.5282878875732422, "learning_rate": 0.00017633813383936015, "epoch": 0.22606682316622181, "step": 7030 }, { "loss": 0.1052852988243103, "grad_norm": 0.6157312989234924, "learning_rate": 0.00017627262683085618, "epoch": 0.2263883975946233, "step": 7040 }, { "loss": 0.10838897228240967, "grad_norm": 0.6011908650398254, "learning_rate": 0.00017620704147414107, "epoch": 0.22670997202302473, "step": 7050 }, { "loss": 0.0947287917137146, "grad_norm": 0.5182114243507385, "learning_rate": 0.00017614137783658482, "epoch": 0.22703154645142617, "step": 7060 }, { "loss": 0.09444833993911743, "grad_norm": 0.5272310376167297, "learning_rate": 0.000176075635985638, "epoch": 0.22735312087982765, "step": 7070 }, { "loss": 0.09998944401741028, "grad_norm": 0.702633798122406, "learning_rate": 0.00017600981598883152, "epoch": 0.2276746953082291, "step": 7080 }, { "loss": 0.07797504663467407, "grad_norm": 0.9731748700141907, "learning_rate": 0.00017594391791377647, "epoch": 0.22799626973663054, "step": 7090 }, { "loss": 0.09055452346801758, "grad_norm": 0.6219237446784973, "learning_rate": 0.00017587794182816416, "epoch": 0.228317844165032, "step": 7100 }, { "loss": 0.10952922105789184, "grad_norm": 0.6282539963722229, "learning_rate": 0.00017581188779976613, "epoch": 0.22863941859343345, "step": 7110 }, { "loss": 0.10705915689468384, "grad_norm": 1.1422780752182007, "learning_rate": 0.00017574575589643384, "epoch": 0.2289609930218349, "step": 7120 }, { "loss": 0.09305276870727539, "grad_norm": 0.6948840618133545, "learning_rate": 0.00017567954618609894, "epoch": 0.22928256745023637, "step": 7130 }, { "loss": 0.09609219431877136, "grad_norm": 0.5128272175788879, "learning_rate": 0.00017561325873677276, "epoch": 0.2296041418786378, "step": 7140 }, { "loss": 0.10106382369995118, "grad_norm": 0.9541149139404297, "learning_rate": 0.00017554689361654668, "epoch": 0.22992571630703926, "step": 7150 }, { "loss": 0.11649857759475708, "grad_norm": 0.8821794986724854, "learning_rate": 0.00017548045089359177, "epoch": 0.23024729073544073, "step": 7160 }, { "loss": 0.09836030006408691, "grad_norm": 0.839900016784668, "learning_rate": 0.00017541393063615884, "epoch": 0.23056886516384217, "step": 7170 }, { "loss": 0.09720966219902039, "grad_norm": 0.6266375780105591, "learning_rate": 0.00017534733291257838, "epoch": 0.23089043959224362, "step": 7180 }, { "loss": 0.10686802864074707, "grad_norm": 0.9709400534629822, "learning_rate": 0.00017528065779126033, "epoch": 0.2312120140206451, "step": 7190 }, { "loss": 0.0964643657207489, "grad_norm": 0.6434114575386047, "learning_rate": 0.0001752139053406943, "epoch": 0.23153358844904653, "step": 7200 }, { "loss": 0.09592649936676026, "grad_norm": 0.6001937389373779, "learning_rate": 0.00017514707562944927, "epoch": 0.23185516287744798, "step": 7210 }, { "loss": 0.09946632385253906, "grad_norm": 0.5942351222038269, "learning_rate": 0.0001750801687261735, "epoch": 0.23217673730584945, "step": 7220 }, { "loss": 0.11563431024551392, "grad_norm": 0.9182334542274475, "learning_rate": 0.00017501318469959463, "epoch": 0.2324983117342509, "step": 7230 }, { "loss": 0.10812370777130127, "grad_norm": 0.5348995327949524, "learning_rate": 0.00017494612361851957, "epoch": 0.23281988616265234, "step": 7240 }, { "loss": 0.10731531381607055, "grad_norm": 0.696471095085144, "learning_rate": 0.0001748789855518342, "epoch": 0.2331414605910538, "step": 7250 }, { "loss": 0.09992768168449402, "grad_norm": 1.2543549537658691, "learning_rate": 0.00017481177056850368, "epoch": 0.23346303501945526, "step": 7260 }, { "loss": 0.0912170648574829, "grad_norm": 1.0720438957214355, "learning_rate": 0.00017474447873757208, "epoch": 0.2337846094478567, "step": 7270 }, { "loss": 0.10393401384353637, "grad_norm": 0.7423423528671265, "learning_rate": 0.0001746771101281624, "epoch": 0.23410618387625817, "step": 7280 }, { "loss": 0.11489673852920532, "grad_norm": 0.9315942525863647, "learning_rate": 0.00017460966480947656, "epoch": 0.23442775830465962, "step": 7290 }, { "loss": 0.08871862292289734, "grad_norm": 0.6180339455604553, "learning_rate": 0.0001745421428507952, "epoch": 0.23474933273306106, "step": 7300 }, { "loss": 0.08185545206069947, "grad_norm": 0.3975384533405304, "learning_rate": 0.00017447454432147782, "epoch": 0.23507090716146253, "step": 7310 }, { "loss": 0.0861358404159546, "grad_norm": 0.49309736490249634, "learning_rate": 0.00017440686929096242, "epoch": 0.23539248158986398, "step": 7320 }, { "loss": 0.09481869339942932, "grad_norm": 0.8725064992904663, "learning_rate": 0.00017433911782876565, "epoch": 0.23571405601826542, "step": 7330 }, { "loss": 0.09213900566101074, "grad_norm": 0.7655817270278931, "learning_rate": 0.00017427129000448268, "epoch": 0.2360356304466669, "step": 7340 }, { "loss": 0.09312846064567566, "grad_norm": 0.8202439546585083, "learning_rate": 0.00017420338588778714, "epoch": 0.23635720487506834, "step": 7350 }, { "loss": 0.09380109310150146, "grad_norm": 0.8451393842697144, "learning_rate": 0.00017413540554843098, "epoch": 0.23667877930346978, "step": 7360 }, { "loss": 0.10455808639526368, "grad_norm": 0.7913851141929626, "learning_rate": 0.00017406734905624447, "epoch": 0.23700035373187125, "step": 7370 }, { "loss": 0.11372662782669067, "grad_norm": 0.5296722054481506, "learning_rate": 0.0001739992164811361, "epoch": 0.2373219281602727, "step": 7380 }, { "loss": 0.10835660696029663, "grad_norm": 0.6034450531005859, "learning_rate": 0.0001739310078930925, "epoch": 0.23764350258867414, "step": 7390 }, { "loss": 0.10947132110595703, "grad_norm": 1.676663875579834, "learning_rate": 0.00017386272336217845, "epoch": 0.23796507701707562, "step": 7400 }, { "loss": 0.09162078499794006, "grad_norm": 0.5721451044082642, "learning_rate": 0.00017379436295853664, "epoch": 0.23828665144547706, "step": 7410 }, { "loss": 0.1085892677307129, "grad_norm": 0.8040403723716736, "learning_rate": 0.00017372592675238776, "epoch": 0.2386082258738785, "step": 7420 }, { "loss": 0.09048799872398376, "grad_norm": 0.5279026031494141, "learning_rate": 0.00017365741481403037, "epoch": 0.23892980030227998, "step": 7430 }, { "loss": 0.1112370252609253, "grad_norm": 0.596389651298523, "learning_rate": 0.00017358882721384077, "epoch": 0.23925137473068142, "step": 7440 }, { "loss": 0.12354772090911866, "grad_norm": 0.4829653799533844, "learning_rate": 0.00017352016402227304, "epoch": 0.23957294915908287, "step": 7450 }, { "loss": 0.10008889436721802, "grad_norm": 0.8064790964126587, "learning_rate": 0.00017345142530985887, "epoch": 0.2398945235874843, "step": 7460 }, { "loss": 0.08722924590110778, "grad_norm": 0.5152531862258911, "learning_rate": 0.00017338261114720752, "epoch": 0.24021609801588578, "step": 7470 }, { "loss": 0.10029129981994629, "grad_norm": 0.5598156452178955, "learning_rate": 0.00017331372160500584, "epoch": 0.24053767244428723, "step": 7480 }, { "loss": 0.09828886985778809, "grad_norm": 0.30673664808273315, "learning_rate": 0.00017324475675401795, "epoch": 0.24085924687268867, "step": 7490 }, { "loss": 0.09425093531608582, "grad_norm": 2.1410884857177734, "learning_rate": 0.0001731757166650855, "epoch": 0.24118082130109014, "step": 7500 }, { "loss": 0.09232192039489746, "grad_norm": 0.9482953548431396, "learning_rate": 0.0001731066014091273, "epoch": 0.2415023957294916, "step": 7510 }, { "loss": 0.08345150351524352, "grad_norm": 0.6554622054100037, "learning_rate": 0.0001730374110571394, "epoch": 0.24182397015789303, "step": 7520 }, { "loss": 0.08806174993515015, "grad_norm": 0.5079277157783508, "learning_rate": 0.00017296814568019508, "epoch": 0.2421455445862945, "step": 7530 }, { "loss": 0.08180884122848511, "grad_norm": 0.5006304383277893, "learning_rate": 0.00017289880534944455, "epoch": 0.24246711901469595, "step": 7540 }, { "loss": 0.08461836576461793, "grad_norm": 0.6327130794525146, "learning_rate": 0.00017282939013611514, "epoch": 0.2427886934430974, "step": 7550 }, { "loss": 0.11851485967636108, "grad_norm": 0.6704835891723633, "learning_rate": 0.000172759900111511, "epoch": 0.24311026787149886, "step": 7560 }, { "loss": 0.09942713379859924, "grad_norm": 0.5339265465736389, "learning_rate": 0.00017269033534701313, "epoch": 0.2434318422999003, "step": 7570 }, { "loss": 0.12718113660812377, "grad_norm": 0.6755968928337097, "learning_rate": 0.00017262069591407938, "epoch": 0.24375341672830175, "step": 7580 }, { "loss": 0.08766931891441346, "grad_norm": 0.40743404626846313, "learning_rate": 0.0001725509818842443, "epoch": 0.24407499115670322, "step": 7590 }, { "loss": 0.08795983195304871, "grad_norm": 1.024290919303894, "learning_rate": 0.00017248119332911895, "epoch": 0.24439656558510467, "step": 7600 }, { "loss": 0.10301086902618409, "grad_norm": 0.6083598136901855, "learning_rate": 0.0001724113303203911, "epoch": 0.2447181400135061, "step": 7610 }, { "loss": 0.09267066121101379, "grad_norm": 0.47813767194747925, "learning_rate": 0.00017234139292982486, "epoch": 0.24503971444190759, "step": 7620 }, { "loss": 0.09036524295806884, "grad_norm": 1.0438446998596191, "learning_rate": 0.0001722713812292608, "epoch": 0.24536128887030903, "step": 7630 }, { "loss": 0.13027456998825074, "grad_norm": 0.6518002152442932, "learning_rate": 0.00017220129529061585, "epoch": 0.24568286329871047, "step": 7640 }, { "loss": 0.09606041312217713, "grad_norm": 0.9624584913253784, "learning_rate": 0.0001721311351858832, "epoch": 0.24600443772711195, "step": 7650 }, { "loss": 0.10024822950363159, "grad_norm": 0.7557567954063416, "learning_rate": 0.00017206090098713218, "epoch": 0.2463260121555134, "step": 7660 }, { "loss": 0.08903319239616395, "grad_norm": 1.0236475467681885, "learning_rate": 0.00017199059276650825, "epoch": 0.24664758658391484, "step": 7670 }, { "loss": 0.11563560962677003, "grad_norm": 1.1230127811431885, "learning_rate": 0.00017192021059623288, "epoch": 0.2469691610123163, "step": 7680 }, { "loss": 0.08626883029937744, "grad_norm": 0.8625466227531433, "learning_rate": 0.00017184975454860357, "epoch": 0.24729073544071775, "step": 7690 }, { "loss": 0.11050916910171509, "grad_norm": 0.9562323093414307, "learning_rate": 0.00017177922469599363, "epoch": 0.2476123098691192, "step": 7700 }, { "loss": 0.12541766166687013, "grad_norm": 0.9169332981109619, "learning_rate": 0.00017170862111085227, "epoch": 0.24793388429752067, "step": 7710 }, { "loss": 0.08499926924705506, "grad_norm": 0.7092348337173462, "learning_rate": 0.00017163794386570438, "epoch": 0.2482554587259221, "step": 7720 }, { "loss": 0.11418701410293579, "grad_norm": 0.5422489643096924, "learning_rate": 0.0001715671930331505, "epoch": 0.24857703315432356, "step": 7730 }, { "loss": 0.07331929206848145, "grad_norm": 0.717381477355957, "learning_rate": 0.0001714963686858668, "epoch": 0.24889860758272503, "step": 7740 }, { "loss": 0.09337686896324157, "grad_norm": 0.7024790644645691, "learning_rate": 0.0001714254708966049, "epoch": 0.24922018201112647, "step": 7750 }, { "loss": 0.12471973896026611, "grad_norm": 0.9360175132751465, "learning_rate": 0.00017135449973819196, "epoch": 0.24954175643952792, "step": 7760 }, { "loss": 0.0979580819606781, "grad_norm": 0.7486076951026917, "learning_rate": 0.00017128345528353046, "epoch": 0.2498633308679294, "step": 7770 }, { "eval_loss": 0.09749862551689148, "eval_runtime": 34.3673, "eval_samples_per_second": 146.244, "eval_steps_per_second": 36.575, "epoch": 0.2498633308679294, "step": 7770 }, { "loss": 0.12439451217651368, "grad_norm": 1.1453461647033691, "learning_rate": 0.00017121233760559817, "epoch": 0.25018490529633086, "step": 7780 }, { "loss": 0.09368115663528442, "grad_norm": 0.5301191210746765, "learning_rate": 0.000171141146777448, "epoch": 0.2505064797247323, "step": 7790 }, { "loss": 0.09548683166503906, "grad_norm": 1.0390958786010742, "learning_rate": 0.0001710698828722082, "epoch": 0.25082805415313375, "step": 7800 }, { "loss": 0.10714445114135743, "grad_norm": 0.6461021304130554, "learning_rate": 0.00017099854596308185, "epoch": 0.2511496285815352, "step": 7810 }, { "loss": 0.10078861713409423, "grad_norm": 0.9467542171478271, "learning_rate": 0.00017092713612334713, "epoch": 0.25147120300993664, "step": 7820 }, { "loss": 0.10617483854293823, "grad_norm": 0.6734544634819031, "learning_rate": 0.00017085565342635724, "epoch": 0.2517927774383381, "step": 7830 }, { "loss": 0.11229211091995239, "grad_norm": 0.7666560411453247, "learning_rate": 0.00017078409794554, "epoch": 0.2521143518667396, "step": 7840 }, { "loss": 0.08903430700302124, "grad_norm": 1.0733190774917603, "learning_rate": 0.00017071246975439817, "epoch": 0.252435926295141, "step": 7850 }, { "loss": 0.10291492938995361, "grad_norm": 0.6430104970932007, "learning_rate": 0.00017064076892650914, "epoch": 0.25275750072354247, "step": 7860 }, { "loss": 0.0859697163105011, "grad_norm": 0.7043799161911011, "learning_rate": 0.00017056899553552485, "epoch": 0.25307907515194394, "step": 7870 }, { "loss": 0.08448026180267335, "grad_norm": 1.1739349365234375, "learning_rate": 0.00017049714965517189, "epoch": 0.25340064958034536, "step": 7880 }, { "loss": 0.10753465890884399, "grad_norm": 0.6605959534645081, "learning_rate": 0.0001704252313592513, "epoch": 0.25372222400874683, "step": 7890 }, { "loss": 0.09561908841133118, "grad_norm": 0.505441427230835, "learning_rate": 0.00017035324072163833, "epoch": 0.2540437984371483, "step": 7900 }, { "loss": 0.12803629636764527, "grad_norm": 0.6273232698440552, "learning_rate": 0.00017028117781628282, "epoch": 0.2543653728655497, "step": 7910 }, { "loss": 0.08171590566635131, "grad_norm": 0.582779049873352, "learning_rate": 0.00017020904271720867, "epoch": 0.2546869472939512, "step": 7920 }, { "loss": 0.08273233771324158, "grad_norm": 0.4706325829029083, "learning_rate": 0.00017013683549851392, "epoch": 0.25500852172235267, "step": 7930 }, { "loss": 0.10103654861450195, "grad_norm": 0.6312887668609619, "learning_rate": 0.00017006455623437078, "epoch": 0.2553300961507541, "step": 7940 }, { "loss": 0.08203045129776002, "grad_norm": 0.4585205614566803, "learning_rate": 0.00016999220499902543, "epoch": 0.25565167057915555, "step": 7950 }, { "loss": 0.0920904517173767, "grad_norm": 0.48585036396980286, "learning_rate": 0.00016991978186679796, "epoch": 0.255973245007557, "step": 7960 }, { "loss": 0.08931515216827393, "grad_norm": 0.6253461837768555, "learning_rate": 0.00016984728691208232, "epoch": 0.25629481943595844, "step": 7970 }, { "loss": 0.09320071935653687, "grad_norm": 0.49154773354530334, "learning_rate": 0.0001697747202093463, "epoch": 0.2566163938643599, "step": 7980 }, { "loss": 0.10840188264846802, "grad_norm": 0.4476393163204193, "learning_rate": 0.00016970208183313126, "epoch": 0.2569379682927614, "step": 7990 }, { "loss": 0.09145287275314332, "grad_norm": 0.7409923076629639, "learning_rate": 0.00016962937185805235, "epoch": 0.2572595427211628, "step": 8000 }, { "loss": 0.10165148973464966, "grad_norm": 0.8097208738327026, "learning_rate": 0.00016955659035879813, "epoch": 0.2575811171495643, "step": 8010 }, { "loss": 0.11168540716171264, "grad_norm": 0.9544158577919006, "learning_rate": 0.0001694837374101307, "epoch": 0.2579026915779657, "step": 8020 }, { "loss": 0.10602585077285767, "grad_norm": 0.6628233194351196, "learning_rate": 0.00016941081308688555, "epoch": 0.25822426600636716, "step": 8030 }, { "loss": 0.08922889828681946, "grad_norm": 0.8359578847885132, "learning_rate": 0.00016933781746397136, "epoch": 0.25854584043476864, "step": 8040 }, { "loss": 0.0837334394454956, "grad_norm": 0.8613901138305664, "learning_rate": 0.0001692647506163703, "epoch": 0.25886741486317005, "step": 8050 }, { "loss": 0.0981156051158905, "grad_norm": 0.6106037497520447, "learning_rate": 0.0001691916126191375, "epoch": 0.2591889892915715, "step": 8060 }, { "loss": 0.09956081509590149, "grad_norm": 0.8334420323371887, "learning_rate": 0.00016911840354740128, "epoch": 0.259510563719973, "step": 8070 }, { "loss": 0.10414061546325684, "grad_norm": 0.7758110761642456, "learning_rate": 0.0001690451234763629, "epoch": 0.2598321381483744, "step": 8080 }, { "loss": 0.0904352068901062, "grad_norm": 0.4925134479999542, "learning_rate": 0.00016897177248129653, "epoch": 0.2601537125767759, "step": 8090 }, { "loss": 0.08173861503601074, "grad_norm": 0.5777755975723267, "learning_rate": 0.0001688983506375493, "epoch": 0.26047528700517736, "step": 8100 }, { "loss": 0.07746504545211792, "grad_norm": 0.4453226327896118, "learning_rate": 0.0001688248580205411, "epoch": 0.2607968614335788, "step": 8110 }, { "loss": 0.09968498945236207, "grad_norm": 0.5532230734825134, "learning_rate": 0.00016875129470576437, "epoch": 0.26111843586198025, "step": 8120 }, { "loss": 0.09109488129615784, "grad_norm": 0.44665494561195374, "learning_rate": 0.0001686776607687844, "epoch": 0.2614400102903817, "step": 8130 }, { "loss": 0.10301159620285034, "grad_norm": 0.9509112238883972, "learning_rate": 0.00016860395628523885, "epoch": 0.26176158471878314, "step": 8140 }, { "loss": 0.1160635232925415, "grad_norm": 0.46450453996658325, "learning_rate": 0.0001685301813308379, "epoch": 0.2620831591471846, "step": 8150 }, { "loss": 0.10146403312683105, "grad_norm": 0.7742125391960144, "learning_rate": 0.00016845633598136408, "epoch": 0.2624047335755861, "step": 8160 }, { "loss": 0.11969466209411621, "grad_norm": 0.43207746744155884, "learning_rate": 0.00016838242031267235, "epoch": 0.2627263080039875, "step": 8170 }, { "loss": 0.10110535621643066, "grad_norm": 0.6822372078895569, "learning_rate": 0.00016830843440068978, "epoch": 0.26304788243238897, "step": 8180 }, { "loss": 0.08360062837600708, "grad_norm": 0.8064790964126587, "learning_rate": 0.00016823437832141563, "epoch": 0.26336945686079044, "step": 8190 }, { "loss": 0.09945321083068848, "grad_norm": 0.7815895676612854, "learning_rate": 0.00016816025215092128, "epoch": 0.26369103128919186, "step": 8200 }, { "loss": 0.09642932415008545, "grad_norm": 0.7080429196357727, "learning_rate": 0.00016808605596535005, "epoch": 0.26401260571759333, "step": 8210 }, { "loss": 0.08843590021133423, "grad_norm": 0.6929242610931396, "learning_rate": 0.0001680117898409172, "epoch": 0.2643341801459948, "step": 8220 }, { "loss": 0.07515991926193237, "grad_norm": 0.5778882503509521, "learning_rate": 0.00016793745385390986, "epoch": 0.2646557545743962, "step": 8230 }, { "loss": 0.13268963098526002, "grad_norm": 1.0547934770584106, "learning_rate": 0.0001678630480806869, "epoch": 0.2649773290027977, "step": 8240 }, { "loss": 0.08606036305427552, "grad_norm": 0.35389772057533264, "learning_rate": 0.00016778857259767884, "epoch": 0.26529890343119916, "step": 8250 }, { "loss": 0.08920258283615112, "grad_norm": 1.022499680519104, "learning_rate": 0.00016771402748138784, "epoch": 0.2656204778596006, "step": 8260 }, { "loss": 0.09268574118614196, "grad_norm": 0.842609703540802, "learning_rate": 0.00016763941280838765, "epoch": 0.26594205228800205, "step": 8270 }, { "loss": 0.09981530904769897, "grad_norm": 0.9523076415061951, "learning_rate": 0.00016756472865532337, "epoch": 0.2662636267164035, "step": 8280 }, { "loss": 0.1299252986907959, "grad_norm": 0.909056544303894, "learning_rate": 0.00016748997509891152, "epoch": 0.26658520114480494, "step": 8290 }, { "loss": 0.09837405681610108, "grad_norm": 0.9995813965797424, "learning_rate": 0.00016741515221593988, "epoch": 0.2669067755732064, "step": 8300 }, { "loss": 0.11543529033660889, "grad_norm": 0.43808504939079285, "learning_rate": 0.0001673402600832675, "epoch": 0.2672283500016079, "step": 8310 }, { "loss": 0.10366925001144409, "grad_norm": 0.8612508773803711, "learning_rate": 0.00016726529877782452, "epoch": 0.2675499244300093, "step": 8320 }, { "loss": 0.10360457897186279, "grad_norm": 1.0201222896575928, "learning_rate": 0.00016719026837661214, "epoch": 0.2678714988584108, "step": 8330 }, { "loss": 0.0959369421005249, "grad_norm": 0.5185285210609436, "learning_rate": 0.0001671151689567025, "epoch": 0.26819307328681224, "step": 8340 }, { "loss": 0.11198742389678955, "grad_norm": 0.9601693153381348, "learning_rate": 0.00016704000059523876, "epoch": 0.26851464771521366, "step": 8350 }, { "loss": 0.10278092622756958, "grad_norm": 0.7881720662117004, "learning_rate": 0.00016696476336943477, "epoch": 0.26883622214361513, "step": 8360 }, { "loss": 0.10845927000045777, "grad_norm": 0.5752276182174683, "learning_rate": 0.00016688945735657515, "epoch": 0.2691577965720166, "step": 8370 }, { "loss": 0.1280617117881775, "grad_norm": 0.5794875025749207, "learning_rate": 0.00016681408263401524, "epoch": 0.269479371000418, "step": 8380 }, { "loss": 0.0788794457912445, "grad_norm": 0.33127912878990173, "learning_rate": 0.00016673863927918085, "epoch": 0.2698009454288195, "step": 8390 }, { "loss": 0.11556386947631836, "grad_norm": 0.7478880286216736, "learning_rate": 0.00016666312736956842, "epoch": 0.27012251985722097, "step": 8400 }, { "loss": 0.10363576412200928, "grad_norm": 0.6624906063079834, "learning_rate": 0.00016658754698274468, "epoch": 0.2704440942856224, "step": 8410 }, { "loss": 0.10713640451431275, "grad_norm": 0.7080273628234863, "learning_rate": 0.0001665118981963468, "epoch": 0.27076566871402385, "step": 8420 }, { "loss": 0.11110186576843262, "grad_norm": 0.5911359190940857, "learning_rate": 0.00016643618108808217, "epoch": 0.2710872431424253, "step": 8430 }, { "loss": 0.07568821907043458, "grad_norm": 0.6160790324211121, "learning_rate": 0.00016636039573572837, "epoch": 0.27140881757082674, "step": 8440 }, { "loss": 0.0935560405254364, "grad_norm": 0.47352269291877747, "learning_rate": 0.00016628454221713306, "epoch": 0.2717303919992282, "step": 8450 }, { "loss": 0.10653687715530395, "grad_norm": 0.8653551340103149, "learning_rate": 0.00016620862061021393, "epoch": 0.2720519664276297, "step": 8460 }, { "loss": 0.10586884021759033, "grad_norm": 0.7118867039680481, "learning_rate": 0.0001661326309929587, "epoch": 0.2723735408560311, "step": 8470 }, { "loss": 0.10795516967773437, "grad_norm": 0.7776938676834106, "learning_rate": 0.00016605657344342473, "epoch": 0.2726951152844326, "step": 8480 }, { "loss": 0.08810576796531677, "grad_norm": 0.6349517703056335, "learning_rate": 0.00016598044803973943, "epoch": 0.27301668971283405, "step": 8490 }, { "loss": 0.09347708225250244, "grad_norm": 0.2226419895887375, "learning_rate": 0.00016590425486009972, "epoch": 0.27333826414123547, "step": 8500 }, { "loss": 0.08297247290611268, "grad_norm": 0.24822402000427246, "learning_rate": 0.00016582799398277219, "epoch": 0.27365983856963694, "step": 8510 }, { "loss": 0.0796057939529419, "grad_norm": 0.34090086817741394, "learning_rate": 0.00016575166548609302, "epoch": 0.2739814129980384, "step": 8520 }, { "loss": 0.10786666870117187, "grad_norm": 0.7712501287460327, "learning_rate": 0.00016567526944846777, "epoch": 0.2743029874264398, "step": 8530 }, { "loss": 0.10834369659423829, "grad_norm": 0.7854555249214172, "learning_rate": 0.00016559880594837147, "epoch": 0.2746245618548413, "step": 8540 }, { "loss": 0.10700218677520752, "grad_norm": 0.5751836895942688, "learning_rate": 0.00016552227506434837, "epoch": 0.27494613628324277, "step": 8550 }, { "loss": 0.09334764480590821, "grad_norm": 0.5211126804351807, "learning_rate": 0.00016544567687501197, "epoch": 0.2752677107116442, "step": 8560 }, { "loss": 0.1230047106742859, "grad_norm": 0.35446053743362427, "learning_rate": 0.0001653690114590449, "epoch": 0.27558928514004566, "step": 8570 }, { "loss": 0.104975426197052, "grad_norm": 0.4927564561367035, "learning_rate": 0.00016529227889519886, "epoch": 0.27591085956844713, "step": 8580 }, { "loss": 0.11820746660232544, "grad_norm": 0.551878035068512, "learning_rate": 0.0001652154792622945, "epoch": 0.27623243399684855, "step": 8590 }, { "loss": 0.09666290879249573, "grad_norm": 0.8831762671470642, "learning_rate": 0.0001651386126392214, "epoch": 0.27655400842525, "step": 8600 }, { "loss": 0.08981671929359436, "grad_norm": 0.34058356285095215, "learning_rate": 0.00016506167910493788, "epoch": 0.2768755828536515, "step": 8610 }, { "loss": 0.13624900579452515, "grad_norm": 2.133625030517578, "learning_rate": 0.0001649846787384711, "epoch": 0.2771971572820529, "step": 8620 }, { "loss": 0.10119202136993408, "grad_norm": 0.6735418438911438, "learning_rate": 0.0001649076116189168, "epoch": 0.2775187317104544, "step": 8630 }, { "loss": 0.10100538730621338, "grad_norm": 0.5642951130867004, "learning_rate": 0.0001648304778254393, "epoch": 0.27784030613885585, "step": 8640 }, { "loss": 0.0916166365146637, "grad_norm": 0.40952157974243164, "learning_rate": 0.0001647532774372714, "epoch": 0.27816188056725727, "step": 8650 }, { "loss": 0.09529081583023072, "grad_norm": 0.6564289331436157, "learning_rate": 0.00016467601053371436, "epoch": 0.27848345499565874, "step": 8660 }, { "loss": 0.08425755500793457, "grad_norm": 0.46509331464767456, "learning_rate": 0.0001645986771941377, "epoch": 0.2788050294240602, "step": 8670 }, { "loss": 0.10253841876983642, "grad_norm": 0.8329104781150818, "learning_rate": 0.00016452127749797915, "epoch": 0.27912660385246163, "step": 8680 }, { "loss": 0.08099549412727355, "grad_norm": 0.502581000328064, "learning_rate": 0.00016444381152474477, "epoch": 0.2794481782808631, "step": 8690 }, { "loss": 0.11632890701293945, "grad_norm": 0.6065788865089417, "learning_rate": 0.0001643662793540085, "epoch": 0.2797697527092646, "step": 8700 }, { "loss": 0.0974295198917389, "grad_norm": 0.5808644890785217, "learning_rate": 0.0001642886810654124, "epoch": 0.280091327137666, "step": 8710 }, { "loss": 0.0918147623538971, "grad_norm": 0.35146135091781616, "learning_rate": 0.0001642110167386665, "epoch": 0.28041290156606746, "step": 8720 }, { "loss": 0.11327881813049316, "grad_norm": 0.8766664266586304, "learning_rate": 0.00016413328645354844, "epoch": 0.28073447599446893, "step": 8730 }, { "loss": 0.09906228184700012, "grad_norm": 0.639585554599762, "learning_rate": 0.00016405549028990386, "epoch": 0.28105605042287035, "step": 8740 }, { "loss": 0.08947083353996277, "grad_norm": 0.5350400805473328, "learning_rate": 0.00016397762832764593, "epoch": 0.2813776248512718, "step": 8750 }, { "loss": 0.10893043279647827, "grad_norm": 0.7001042366027832, "learning_rate": 0.00016389970064675544, "epoch": 0.2816991992796733, "step": 8760 }, { "loss": 0.10575506687164307, "grad_norm": 0.70212721824646, "learning_rate": 0.0001638217073272807, "epoch": 0.2820207737080747, "step": 8770 }, { "loss": 0.1042213797569275, "grad_norm": 0.9001713395118713, "learning_rate": 0.00016374364844933746, "epoch": 0.2823423481364762, "step": 8780 }, { "loss": 0.08165057897567748, "grad_norm": 0.8453369140625, "learning_rate": 0.00016366552409310874, "epoch": 0.28266392256487766, "step": 8790 }, { "loss": 0.09870893955230713, "grad_norm": 0.24046075344085693, "learning_rate": 0.00016358733433884492, "epoch": 0.2829854969932791, "step": 8800 }, { "loss": 0.09446768164634704, "grad_norm": 1.1681368350982666, "learning_rate": 0.0001635090792668635, "epoch": 0.28330707142168055, "step": 8810 }, { "loss": 0.0984427809715271, "grad_norm": 0.8611108064651489, "learning_rate": 0.0001634307589575491, "epoch": 0.283628645850082, "step": 8820 }, { "loss": 0.14263057708740234, "grad_norm": 0.9652822017669678, "learning_rate": 0.00016335237349135326, "epoch": 0.28395022027848343, "step": 8830 }, { "loss": 0.09047924280166626, "grad_norm": 1.2011903524398804, "learning_rate": 0.0001632739229487946, "epoch": 0.2842717947068849, "step": 8840 }, { "loss": 0.07878822684288025, "grad_norm": 0.4895530045032501, "learning_rate": 0.00016319540741045852, "epoch": 0.2845933691352864, "step": 8850 }, { "loss": 0.10191992521286011, "grad_norm": 0.9160004258155823, "learning_rate": 0.00016311682695699714, "epoch": 0.2849149435636878, "step": 8860 }, { "loss": 0.14895331859588623, "grad_norm": 0.8889757990837097, "learning_rate": 0.00016303818166912932, "epoch": 0.28523651799208927, "step": 8870 }, { "loss": 0.09080453515052796, "grad_norm": 0.9305357933044434, "learning_rate": 0.0001629594716276405, "epoch": 0.28555809242049074, "step": 8880 }, { "loss": 0.11647429466247558, "grad_norm": 0.41978469491004944, "learning_rate": 0.00016288069691338257, "epoch": 0.28587966684889216, "step": 8890 }, { "loss": 0.11093926429748535, "grad_norm": 0.5302769541740417, "learning_rate": 0.00016280185760727403, "epoch": 0.2862012412772936, "step": 8900 }, { "loss": 0.10094484090805053, "grad_norm": 1.0867546796798706, "learning_rate": 0.00016272295379029953, "epoch": 0.2865228157056951, "step": 8910 }, { "loss": 0.10699278116226196, "grad_norm": 0.5287032723426819, "learning_rate": 0.00016264398554351016, "epoch": 0.2868443901340965, "step": 8920 }, { "loss": 0.10200968980789185, "grad_norm": 0.7823114991188049, "learning_rate": 0.00016256495294802298, "epoch": 0.287165964562498, "step": 8930 }, { "loss": 0.07948703169822693, "grad_norm": 0.5769001245498657, "learning_rate": 0.0001624858560850214, "epoch": 0.28748753899089946, "step": 8940 }, { "loss": 0.11868566274642944, "grad_norm": 0.8702729940414429, "learning_rate": 0.00016240669503575462, "epoch": 0.2878091134193009, "step": 8950 }, { "loss": 0.12385741472244263, "grad_norm": 0.432796835899353, "learning_rate": 0.00016232746988153793, "epoch": 0.28813068784770235, "step": 8960 }, { "loss": 0.09350829720497131, "grad_norm": 0.5241557955741882, "learning_rate": 0.00016224818070375245, "epoch": 0.2884522622761038, "step": 8970 }, { "loss": 0.10613198280334472, "grad_norm": 0.6393463015556335, "learning_rate": 0.00016216882758384496, "epoch": 0.28877383670450524, "step": 8980 }, { "loss": 0.09328456521034241, "grad_norm": 0.9340083599090576, "learning_rate": 0.000162089410603328, "epoch": 0.2890954111329067, "step": 8990 }, { "loss": 0.09708858728408813, "grad_norm": 0.40460801124572754, "learning_rate": 0.00016200992984377976, "epoch": 0.2894169855613082, "step": 9000 }, { "loss": 0.11160033941268921, "grad_norm": 0.6721453666687012, "learning_rate": 0.00016193038538684385, "epoch": 0.2897385599897096, "step": 9010 }, { "loss": 0.07697651386260987, "grad_norm": 0.6095776557922363, "learning_rate": 0.00016185077731422932, "epoch": 0.29006013441811107, "step": 9020 }, { "loss": 0.08334786295890809, "grad_norm": 0.4550764858722687, "learning_rate": 0.00016177110570771063, "epoch": 0.29038170884651254, "step": 9030 }, { "loss": 0.09118367433547973, "grad_norm": 0.712659478187561, "learning_rate": 0.00016169137064912746, "epoch": 0.29070328327491396, "step": 9040 }, { "loss": 0.08763358592987061, "grad_norm": 0.4912260174751282, "learning_rate": 0.00016161157222038466, "epoch": 0.29102485770331543, "step": 9050 }, { "loss": 0.11886682510375976, "grad_norm": 0.635463297367096, "learning_rate": 0.0001615317105034522, "epoch": 0.2913464321317169, "step": 9060 }, { "loss": 0.09079389572143555, "grad_norm": 0.48032060265541077, "learning_rate": 0.0001614517855803651, "epoch": 0.2916680065601183, "step": 9070 }, { "loss": 0.09244561195373535, "grad_norm": 0.8139591813087463, "learning_rate": 0.00016137179753322316, "epoch": 0.2919895809885198, "step": 9080 }, { "loss": 0.08526058197021484, "grad_norm": 0.7081871628761292, "learning_rate": 0.00016129174644419116, "epoch": 0.29231115541692126, "step": 9090 }, { "loss": 0.09352965950965882, "grad_norm": 0.47736358642578125, "learning_rate": 0.00016121163239549862, "epoch": 0.2926327298453227, "step": 9100 }, { "loss": 0.11282715797424317, "grad_norm": 0.5685864686965942, "learning_rate": 0.00016113145546943968, "epoch": 0.29295430427372415, "step": 9110 }, { "loss": 0.12271944284439087, "grad_norm": 0.8204271793365479, "learning_rate": 0.0001610512157483731, "epoch": 0.2932758787021256, "step": 9120 }, { "loss": 0.08996185660362244, "grad_norm": 0.8587689995765686, "learning_rate": 0.00016097091331472216, "epoch": 0.29359745313052704, "step": 9130 }, { "loss": 0.09678568243980408, "grad_norm": 0.7826471328735352, "learning_rate": 0.00016089054825097453, "epoch": 0.2939190275589285, "step": 9140 }, { "loss": 0.09184351563453674, "grad_norm": 0.4400404691696167, "learning_rate": 0.0001608101206396822, "epoch": 0.29424060198733, "step": 9150 }, { "loss": 0.11043401956558227, "grad_norm": 0.9030609130859375, "learning_rate": 0.0001607296305634615, "epoch": 0.2945621764157314, "step": 9160 }, { "loss": 0.08099195957183838, "grad_norm": 0.7449926137924194, "learning_rate": 0.00016064907810499276, "epoch": 0.2948837508441329, "step": 9170 }, { "loss": 0.0982122004032135, "grad_norm": 0.8458951711654663, "learning_rate": 0.00016056846334702054, "epoch": 0.29520532527253435, "step": 9180 }, { "loss": 0.1208492398262024, "grad_norm": 0.6890580654144287, "learning_rate": 0.00016048778637235337, "epoch": 0.29552689970093576, "step": 9190 }, { "loss": 0.10449072122573852, "grad_norm": 0.7368547320365906, "learning_rate": 0.00016040704726386366, "epoch": 0.29584847412933724, "step": 9200 }, { "loss": 0.13114418983459472, "grad_norm": 0.5611347556114197, "learning_rate": 0.00016032624610448762, "epoch": 0.2961700485577387, "step": 9210 }, { "loss": 0.1002449631690979, "grad_norm": 0.31099745631217957, "learning_rate": 0.0001602453829772252, "epoch": 0.2964916229861401, "step": 9220 }, { "loss": 0.09459711909294129, "grad_norm": 0.5505207777023315, "learning_rate": 0.00016016445796514011, "epoch": 0.2968131974145416, "step": 9230 }, { "loss": 0.05700767636299133, "grad_norm": 0.2523616850376129, "learning_rate": 0.00016008347115135946, "epoch": 0.29713477184294307, "step": 9240 }, { "loss": 0.10036879777908325, "grad_norm": 0.5966441035270691, "learning_rate": 0.00016000242261907403, "epoch": 0.2974563462713445, "step": 9250 }, { "loss": 0.11783348321914673, "grad_norm": 0.8323610424995422, "learning_rate": 0.00015992131245153783, "epoch": 0.29777792069974596, "step": 9260 }, { "loss": 0.10106947422027587, "grad_norm": 0.7663300633430481, "learning_rate": 0.00015984014073206827, "epoch": 0.29809949512814743, "step": 9270 }, { "loss": 0.10415028333663941, "grad_norm": 0.8918988108634949, "learning_rate": 0.00015975890754404596, "epoch": 0.29842106955654885, "step": 9280 }, { "loss": 0.09814226031303405, "grad_norm": 0.30188941955566406, "learning_rate": 0.00015967761297091472, "epoch": 0.2987426439849503, "step": 9290 }, { "loss": 0.09462435245513916, "grad_norm": 0.6528173089027405, "learning_rate": 0.0001595962570961813, "epoch": 0.2990642184133518, "step": 9300 }, { "loss": 0.10978235006332397, "grad_norm": 0.45726704597473145, "learning_rate": 0.00015951484000341552, "epoch": 0.2993857928417532, "step": 9310 }, { "loss": 0.10773928165435791, "grad_norm": 0.39979204535484314, "learning_rate": 0.00015943336177625007, "epoch": 0.2997073672701547, "step": 9320 }, { "eval_loss": 0.0934017226099968, "eval_runtime": 34.8471, "eval_samples_per_second": 144.23, "eval_steps_per_second": 36.072, "epoch": 0.29983599704151526, "step": 9324 }, { "loss": 0.11000183820724488, "grad_norm": 0.6095293760299683, "learning_rate": 0.0001593518224983804, "epoch": 0.30002894169855615, "step": 9330 }, { "loss": 0.08486317992210388, "grad_norm": 0.4363544285297394, "learning_rate": 0.00015927022225356469, "epoch": 0.30035051612695757, "step": 9340 }, { "loss": 0.09942440390586853, "grad_norm": 0.6024127006530762, "learning_rate": 0.00015918856112562372, "epoch": 0.30067209055535904, "step": 9350 }, { "loss": 0.10907498598098755, "grad_norm": 0.5117012858390808, "learning_rate": 0.00015910683919844089, "epoch": 0.3009936649837605, "step": 9360 }, { "loss": 0.10354846715927124, "grad_norm": 0.48038336634635925, "learning_rate": 0.00015902505655596198, "epoch": 0.30131523941216193, "step": 9370 }, { "loss": 0.09846914410591126, "grad_norm": 0.7857918739318848, "learning_rate": 0.00015894321328219517, "epoch": 0.3016368138405634, "step": 9380 }, { "loss": 0.09118065237998962, "grad_norm": 0.8895276784896851, "learning_rate": 0.00015886130946121088, "epoch": 0.3019583882689649, "step": 9390 }, { "loss": 0.09130282402038574, "grad_norm": 0.5060708522796631, "learning_rate": 0.00015877934517714176, "epoch": 0.3022799626973663, "step": 9400 }, { "loss": 0.08922684788703919, "grad_norm": 0.6186339855194092, "learning_rate": 0.0001586973205141826, "epoch": 0.30260153712576776, "step": 9410 }, { "loss": 0.10332038402557372, "grad_norm": 0.7059121131896973, "learning_rate": 0.00015861523555659016, "epoch": 0.30292311155416923, "step": 9420 }, { "loss": 0.10528864860534667, "grad_norm": 0.4513157606124878, "learning_rate": 0.00015853309038868315, "epoch": 0.30324468598257065, "step": 9430 }, { "loss": 0.09138191342353821, "grad_norm": 0.7483622431755066, "learning_rate": 0.00015845088509484207, "epoch": 0.3035662604109721, "step": 9440 }, { "loss": 0.08996622562408448, "grad_norm": 1.0308533906936646, "learning_rate": 0.00015836861975950935, "epoch": 0.3038878348393736, "step": 9450 }, { "loss": 0.07570862770080566, "grad_norm": 0.5102515816688538, "learning_rate": 0.00015828629446718894, "epoch": 0.304209409267775, "step": 9460 }, { "loss": 0.08456374406814575, "grad_norm": 0.38768988847732544, "learning_rate": 0.00015820390930244637, "epoch": 0.3045309836961765, "step": 9470 }, { "loss": 0.1260838747024536, "grad_norm": 0.943644106388092, "learning_rate": 0.00015812146434990884, "epoch": 0.30485255812457795, "step": 9480 }, { "loss": 0.08907887935638428, "grad_norm": 0.4711124300956726, "learning_rate": 0.00015803895969426478, "epoch": 0.30517413255297937, "step": 9490 }, { "loss": 0.10089514255523682, "grad_norm": 0.7654880881309509, "learning_rate": 0.000157956395420264, "epoch": 0.30549570698138084, "step": 9500 }, { "loss": 0.07927495241165161, "grad_norm": 0.4468529224395752, "learning_rate": 0.00015787377161271762, "epoch": 0.3058172814097823, "step": 9510 }, { "loss": 0.09040161371231079, "grad_norm": 0.46876248717308044, "learning_rate": 0.00015779108835649783, "epoch": 0.30613885583818373, "step": 9520 }, { "loss": 0.09458811283111572, "grad_norm": 0.5352162718772888, "learning_rate": 0.00015770834573653795, "epoch": 0.3064604302665852, "step": 9530 }, { "loss": 0.0799996018409729, "grad_norm": 0.6501600742340088, "learning_rate": 0.00015762554383783224, "epoch": 0.3067820046949867, "step": 9540 }, { "loss": 0.11077144145965576, "grad_norm": 0.5619154572486877, "learning_rate": 0.00015754268274543586, "epoch": 0.3071035791233881, "step": 9550 }, { "loss": 0.10113047361373902, "grad_norm": 0.8997209072113037, "learning_rate": 0.00015745976254446478, "epoch": 0.30742515355178957, "step": 9560 }, { "loss": 0.09810148477554322, "grad_norm": 0.7278547286987305, "learning_rate": 0.0001573767833200957, "epoch": 0.30774672798019104, "step": 9570 }, { "loss": 0.11046998500823975, "grad_norm": 0.3747931122779846, "learning_rate": 0.0001572937451575659, "epoch": 0.30806830240859245, "step": 9580 }, { "loss": 0.12715048789978028, "grad_norm": 1.1526594161987305, "learning_rate": 0.0001572106481421732, "epoch": 0.3083898768369939, "step": 9590 }, { "loss": 0.10706145763397217, "grad_norm": 0.4139595031738281, "learning_rate": 0.00015712749235927603, "epoch": 0.3087114512653954, "step": 9600 }, { "loss": 0.11112467050552369, "grad_norm": 0.6514472961425781, "learning_rate": 0.00015704427789429298, "epoch": 0.3090330256937968, "step": 9610 }, { "loss": 0.08490209579467774, "grad_norm": 0.4603259563446045, "learning_rate": 0.000156961004832703, "epoch": 0.3093546001221983, "step": 9620 }, { "loss": 0.11466584205627442, "grad_norm": 0.8583003282546997, "learning_rate": 0.00015687767326004527, "epoch": 0.30967617455059976, "step": 9630 }, { "loss": 0.10502034425735474, "grad_norm": 0.6610942482948303, "learning_rate": 0.00015679428326191905, "epoch": 0.3099977489790012, "step": 9640 }, { "loss": 0.09413716197013855, "grad_norm": 0.6740992665290833, "learning_rate": 0.00015671083492398356, "epoch": 0.31031932340740265, "step": 9650 }, { "loss": 0.10678738355636597, "grad_norm": 0.4354815185070038, "learning_rate": 0.000156627328331958, "epoch": 0.3106408978358041, "step": 9660 }, { "loss": 0.08357634544372558, "grad_norm": 0.39228543639183044, "learning_rate": 0.00015654376357162147, "epoch": 0.31096247226420554, "step": 9670 }, { "loss": 0.09768832325935364, "grad_norm": 0.7303394675254822, "learning_rate": 0.00015646014072881267, "epoch": 0.311284046692607, "step": 9680 }, { "loss": 0.10822383165359498, "grad_norm": 0.5756319761276245, "learning_rate": 0.0001563764598894301, "epoch": 0.3116056211210085, "step": 9690 }, { "loss": 0.10973855257034301, "grad_norm": 0.553230345249176, "learning_rate": 0.00015629272113943174, "epoch": 0.3119271955494099, "step": 9700 }, { "loss": 0.09531382918357849, "grad_norm": 0.6640179753303528, "learning_rate": 0.00015620892456483514, "epoch": 0.31224876997781137, "step": 9710 }, { "loss": 0.08025224208831787, "grad_norm": 0.864226222038269, "learning_rate": 0.00015612507025171714, "epoch": 0.31257034440621284, "step": 9720 }, { "loss": 0.10060276985168456, "grad_norm": 0.9080818891525269, "learning_rate": 0.00015604115828621402, "epoch": 0.31289191883461426, "step": 9730 }, { "loss": 0.09277968406677246, "grad_norm": 0.4111309349536896, "learning_rate": 0.0001559571887545212, "epoch": 0.31321349326301573, "step": 9740 }, { "loss": 0.0899915099143982, "grad_norm": 0.769721508026123, "learning_rate": 0.00015587316174289325, "epoch": 0.3135350676914172, "step": 9750 }, { "loss": 0.10643835067749023, "grad_norm": 0.720486044883728, "learning_rate": 0.0001557890773376438, "epoch": 0.3138566421198186, "step": 9760 }, { "loss": 0.09196939468383789, "grad_norm": 0.5397993326187134, "learning_rate": 0.0001557049356251454, "epoch": 0.3141782165482201, "step": 9770 }, { "loss": 0.07454143166542053, "grad_norm": 0.6117302775382996, "learning_rate": 0.00015562073669182945, "epoch": 0.31449979097662156, "step": 9780 }, { "loss": 0.10379166603088379, "grad_norm": 0.8357234001159668, "learning_rate": 0.0001555364806241862, "epoch": 0.314821365405023, "step": 9790 }, { "loss": 0.11008110046386718, "grad_norm": 1.1281821727752686, "learning_rate": 0.00015545216750876458, "epoch": 0.31514293983342445, "step": 9800 }, { "loss": 0.09503305554389954, "grad_norm": 0.5694693326950073, "learning_rate": 0.00015536779743217206, "epoch": 0.3154645142618259, "step": 9810 }, { "loss": 0.10269997119903565, "grad_norm": 0.6259468197822571, "learning_rate": 0.00015528337048107465, "epoch": 0.31578608869022734, "step": 9820 }, { "loss": 0.07942078709602356, "grad_norm": 0.5972447395324707, "learning_rate": 0.0001551988867421968, "epoch": 0.3161076631186288, "step": 9830 }, { "loss": 0.09182611703872681, "grad_norm": 0.7243683338165283, "learning_rate": 0.00015511434630232127, "epoch": 0.3164292375470303, "step": 9840 }, { "loss": 0.08888370990753174, "grad_norm": 0.5140639543533325, "learning_rate": 0.00015502974924828906, "epoch": 0.3167508119754317, "step": 9850 }, { "loss": 0.08769227266311645, "grad_norm": 0.8622345924377441, "learning_rate": 0.0001549450956669994, "epoch": 0.3170723864038332, "step": 9860 }, { "loss": 0.09619449377059937, "grad_norm": 0.3897106647491455, "learning_rate": 0.00015486038564540948, "epoch": 0.31739396083223465, "step": 9870 }, { "loss": 0.10312601327896118, "grad_norm": 0.7995700240135193, "learning_rate": 0.00015477561927053446, "epoch": 0.31771553526063606, "step": 9880 }, { "loss": 0.11048386096954346, "grad_norm": 0.8452174663543701, "learning_rate": 0.00015469079662944757, "epoch": 0.31803710968903753, "step": 9890 }, { "loss": 0.06555310487747193, "grad_norm": 0.5714414715766907, "learning_rate": 0.00015460591780927957, "epoch": 0.318358684117439, "step": 9900 }, { "loss": 0.10164577960968017, "grad_norm": 0.5852811336517334, "learning_rate": 0.00015452098289721913, "epoch": 0.3186802585458404, "step": 9910 }, { "loss": 0.1025164246559143, "grad_norm": 0.8045811653137207, "learning_rate": 0.00015443599198051246, "epoch": 0.3190018329742419, "step": 9920 }, { "loss": 0.07474237084388732, "grad_norm": 0.5519168376922607, "learning_rate": 0.0001543509451464633, "epoch": 0.31932340740264337, "step": 9930 }, { "loss": 0.10572299957275391, "grad_norm": 0.666706919670105, "learning_rate": 0.00015426584248243286, "epoch": 0.3196449818310448, "step": 9940 }, { "loss": 0.11387902498245239, "grad_norm": 0.5598200559616089, "learning_rate": 0.00015418068407583967, "epoch": 0.31996655625944626, "step": 9950 }, { "loss": 0.1391116499900818, "grad_norm": 2.396148920059204, "learning_rate": 0.00015409547001415946, "epoch": 0.3202881306878477, "step": 9960 }, { "loss": 0.09753554463386535, "grad_norm": 0.7613327503204346, "learning_rate": 0.0001540102003849253, "epoch": 0.32060970511624914, "step": 9970 }, { "loss": 0.10358134508132935, "grad_norm": 0.5053406357765198, "learning_rate": 0.0001539248752757272, "epoch": 0.3209312795446506, "step": 9980 }, { "loss": 0.14178507328033446, "grad_norm": 0.7225109934806824, "learning_rate": 0.0001538394947742122, "epoch": 0.3212528539730521, "step": 9990 }, { "loss": 0.08253385424613953, "grad_norm": 0.5420551896095276, "learning_rate": 0.00015375405896808422, "epoch": 0.3215744284014535, "step": 10000 }, { "loss": 0.09111645221710205, "grad_norm": 0.6374110579490662, "learning_rate": 0.00015366856794510398, "epoch": 0.321896002829855, "step": 10010 }, { "loss": 0.12315458059310913, "grad_norm": 0.6631460189819336, "learning_rate": 0.00015358302179308898, "epoch": 0.32221757725825645, "step": 10020 }, { "loss": 0.11130249500274658, "grad_norm": 0.5429269075393677, "learning_rate": 0.00015349742059991332, "epoch": 0.32253915168665787, "step": 10030 }, { "loss": 0.06493273973464966, "grad_norm": 0.6440100073814392, "learning_rate": 0.00015341176445350763, "epoch": 0.32286072611505934, "step": 10040 }, { "loss": 0.10156418085098266, "grad_norm": 0.5670591592788696, "learning_rate": 0.00015332605344185897, "epoch": 0.3231823005434608, "step": 10050 }, { "loss": 0.09578874111175537, "grad_norm": 0.8024997711181641, "learning_rate": 0.0001532402876530108, "epoch": 0.3235038749718622, "step": 10060 }, { "loss": 0.0960733413696289, "grad_norm": 0.5642770528793335, "learning_rate": 0.0001531544671750628, "epoch": 0.3238254494002637, "step": 10070 }, { "loss": 0.12410423755645753, "grad_norm": 0.5657545924186707, "learning_rate": 0.0001530685920961709, "epoch": 0.32414702382866517, "step": 10080 }, { "loss": 0.0712913691997528, "grad_norm": 0.47271034121513367, "learning_rate": 0.00015298266250454704, "epoch": 0.3244685982570666, "step": 10090 }, { "loss": 0.094403076171875, "grad_norm": 0.5328975915908813, "learning_rate": 0.00015289667848845923, "epoch": 0.32479017268546806, "step": 10100 }, { "loss": 0.08306188583374023, "grad_norm": 0.6818568110466003, "learning_rate": 0.0001528106401362313, "epoch": 0.32511174711386953, "step": 10110 }, { "loss": 0.09016135931015015, "grad_norm": 0.6705556511878967, "learning_rate": 0.00015272454753624293, "epoch": 0.32543332154227095, "step": 10120 }, { "loss": 0.10912590026855469, "grad_norm": 0.4123816192150116, "learning_rate": 0.00015263840077692954, "epoch": 0.3257548959706724, "step": 10130 }, { "loss": 0.09481947422027588, "grad_norm": 0.4842923581600189, "learning_rate": 0.0001525521999467822, "epoch": 0.3260764703990739, "step": 10140 }, { "loss": 0.11109416484832764, "grad_norm": 0.6905588507652283, "learning_rate": 0.00015246594513434748, "epoch": 0.3263980448274753, "step": 10150 }, { "loss": 0.1307373523712158, "grad_norm": 0.7909269332885742, "learning_rate": 0.00015237963642822746, "epoch": 0.3267196192558768, "step": 10160 }, { "loss": 0.09989275336265564, "grad_norm": 0.6991463899612427, "learning_rate": 0.00015229327391707944, "epoch": 0.32704119368427825, "step": 10170 }, { "loss": 0.08000798225402832, "grad_norm": 0.6140600442886353, "learning_rate": 0.0001522068576896162, "epoch": 0.32736276811267967, "step": 10180 }, { "loss": 0.10280264616012573, "grad_norm": 0.9740838408470154, "learning_rate": 0.0001521203878346055, "epoch": 0.32768434254108114, "step": 10190 }, { "loss": 0.0873986840248108, "grad_norm": 0.2624066174030304, "learning_rate": 0.00015203386444087035, "epoch": 0.3280059169694826, "step": 10200 }, { "loss": 0.09654288291931153, "grad_norm": 0.6746272444725037, "learning_rate": 0.00015194728759728864, "epoch": 0.32832749139788403, "step": 10210 }, { "loss": 0.11106714010238647, "grad_norm": 0.7071442008018494, "learning_rate": 0.00015186065739279317, "epoch": 0.3286490658262855, "step": 10220 }, { "loss": 0.10884366035461426, "grad_norm": 0.6379486322402954, "learning_rate": 0.00015177397391637163, "epoch": 0.328970640254687, "step": 10230 }, { "loss": 0.11274509429931641, "grad_norm": 0.6626328825950623, "learning_rate": 0.00015168723725706638, "epoch": 0.3292922146830884, "step": 10240 }, { "loss": 0.10389626026153564, "grad_norm": 0.5899779796600342, "learning_rate": 0.00015160044750397441, "epoch": 0.32961378911148986, "step": 10250 }, { "loss": 0.11639647483825684, "grad_norm": 0.6206676959991455, "learning_rate": 0.00015151360474624729, "epoch": 0.32993536353989134, "step": 10260 }, { "loss": 0.11473977565765381, "grad_norm": 0.6148909330368042, "learning_rate": 0.00015142670907309098, "epoch": 0.33025693796829275, "step": 10270 }, { "loss": 0.09996345043182372, "grad_norm": 0.44436290860176086, "learning_rate": 0.0001513397605737658, "epoch": 0.3305785123966942, "step": 10280 }, { "loss": 0.09571771621704102, "grad_norm": 0.6439607739448547, "learning_rate": 0.00015125275933758637, "epoch": 0.33090008682509564, "step": 10290 }, { "loss": 0.08798513412475586, "grad_norm": 0.6966301798820496, "learning_rate": 0.00015116570545392147, "epoch": 0.3312216612534971, "step": 10300 }, { "loss": 0.10646851062774658, "grad_norm": 0.459700345993042, "learning_rate": 0.00015107859901219395, "epoch": 0.3315432356818986, "step": 10310 }, { "loss": 0.10774039030075074, "grad_norm": 0.613568902015686, "learning_rate": 0.00015099144010188067, "epoch": 0.3318648101103, "step": 10320 }, { "loss": 0.10567674636840821, "grad_norm": 0.8810312151908875, "learning_rate": 0.0001509042288125123, "epoch": 0.3321863845387015, "step": 10330 }, { "loss": 0.11079902648925781, "grad_norm": 0.727370023727417, "learning_rate": 0.0001508169652336735, "epoch": 0.33250795896710295, "step": 10340 }, { "loss": 0.08177146911621094, "grad_norm": 0.5525153875350952, "learning_rate": 0.00015072964945500242, "epoch": 0.33282953339550436, "step": 10350 }, { "loss": 0.09797362089157105, "grad_norm": 0.5044106841087341, "learning_rate": 0.00015064228156619103, "epoch": 0.33315110782390583, "step": 10360 }, { "loss": 0.10018237829208373, "grad_norm": 0.7975836992263794, "learning_rate": 0.00015055486165698465, "epoch": 0.3334726822523073, "step": 10370 }, { "loss": 0.0940122663974762, "grad_norm": 0.5432740449905396, "learning_rate": 0.0001504673898171822, "epoch": 0.3337942566807087, "step": 10380 }, { "loss": 0.07373791337013244, "grad_norm": 0.6882239580154419, "learning_rate": 0.00015037986613663574, "epoch": 0.3341158311091102, "step": 10390 }, { "loss": 0.09904557466506958, "grad_norm": 0.5263988375663757, "learning_rate": 0.00015029229070525086, "epoch": 0.33443740553751167, "step": 10400 }, { "loss": 0.08657509088516235, "grad_norm": 0.8776784539222717, "learning_rate": 0.0001502046636129861, "epoch": 0.3347589799659131, "step": 10410 }, { "loss": 0.10448260307312011, "grad_norm": 0.8565472364425659, "learning_rate": 0.00015011698494985307, "epoch": 0.33508055439431456, "step": 10420 }, { "loss": 0.10305610895156861, "grad_norm": 0.779041051864624, "learning_rate": 0.00015002925480591642, "epoch": 0.33540212882271603, "step": 10430 }, { "loss": 0.1126700758934021, "grad_norm": 0.8828395009040833, "learning_rate": 0.00014994147327129374, "epoch": 0.33572370325111744, "step": 10440 }, { "loss": 0.08342687487602234, "grad_norm": 0.624516487121582, "learning_rate": 0.00014985364043615528, "epoch": 0.3360452776795189, "step": 10450 }, { "loss": 0.09242086410522461, "grad_norm": 1.018872618675232, "learning_rate": 0.0001497657563907241, "epoch": 0.3363668521079204, "step": 10460 }, { "loss": 0.08708491325378417, "grad_norm": 0.2908959686756134, "learning_rate": 0.00014967782122527566, "epoch": 0.3366884265363218, "step": 10470 }, { "loss": 0.08389623165130615, "grad_norm": 0.43761253356933594, "learning_rate": 0.00014958983503013826, "epoch": 0.3370100009647233, "step": 10480 }, { "loss": 0.10015045404434204, "grad_norm": 0.7681123614311218, "learning_rate": 0.00014950179789569232, "epoch": 0.33733157539312475, "step": 10490 }, { "loss": 0.10144755840301514, "grad_norm": 0.5038902759552002, "learning_rate": 0.00014941370991237077, "epoch": 0.33765314982152617, "step": 10500 }, { "loss": 0.08995380401611328, "grad_norm": 0.3418147563934326, "learning_rate": 0.00014932557117065866, "epoch": 0.33797472424992764, "step": 10510 }, { "loss": 0.10645936727523804, "grad_norm": 0.872319221496582, "learning_rate": 0.00014923738176109323, "epoch": 0.3382962986783291, "step": 10520 }, { "loss": 0.07098982334136963, "grad_norm": 0.42508718371391296, "learning_rate": 0.0001491491417742638, "epoch": 0.3386178731067305, "step": 10530 }, { "loss": 0.09140524864196778, "grad_norm": 0.6166735291481018, "learning_rate": 0.00014906085130081157, "epoch": 0.338939447535132, "step": 10540 }, { "loss": 0.10014245510101319, "grad_norm": 0.1629897654056549, "learning_rate": 0.00014897251043142964, "epoch": 0.33926102196353347, "step": 10550 }, { "loss": 0.12346161603927612, "grad_norm": 0.5388593077659607, "learning_rate": 0.00014888411925686288, "epoch": 0.3395825963919349, "step": 10560 }, { "loss": 0.09298954606056213, "grad_norm": 0.9729686379432678, "learning_rate": 0.00014879567786790784, "epoch": 0.33990417082033636, "step": 10570 }, { "loss": 0.10407984256744385, "grad_norm": 0.6590120196342468, "learning_rate": 0.00014870718635541259, "epoch": 0.34022574524873783, "step": 10580 }, { "loss": 0.1211812973022461, "grad_norm": 0.7960436940193176, "learning_rate": 0.0001486186448102768, "epoch": 0.34054731967713925, "step": 10590 }, { "loss": 0.09517050385475159, "grad_norm": 0.4327518939971924, "learning_rate": 0.00014853005332345144, "epoch": 0.3408688941055407, "step": 10600 }, { "loss": 0.0846939206123352, "grad_norm": 0.7907319664955139, "learning_rate": 0.0001484414119859388, "epoch": 0.3411904685339422, "step": 10610 }, { "loss": 0.08831584453582764, "grad_norm": 0.7718275785446167, "learning_rate": 0.0001483527208887924, "epoch": 0.3415120429623436, "step": 10620 }, { "loss": 0.11044741868972778, "grad_norm": 1.1279953718185425, "learning_rate": 0.00014826398012311688, "epoch": 0.3418336173907451, "step": 10630 }, { "loss": 0.09619516134262085, "grad_norm": 0.40065017342567444, "learning_rate": 0.00014817518978006784, "epoch": 0.34215519181914655, "step": 10640 }, { "loss": 0.1134846568107605, "grad_norm": 0.48741984367370605, "learning_rate": 0.00014808634995085187, "epoch": 0.34247676624754797, "step": 10650 }, { "loss": 0.0754625678062439, "grad_norm": 0.6248498558998108, "learning_rate": 0.00014799746072672638, "epoch": 0.34279834067594944, "step": 10660 }, { "loss": 0.08936263918876648, "grad_norm": 0.5139371156692505, "learning_rate": 0.0001479085221989995, "epoch": 0.3431199151043509, "step": 10670 }, { "loss": 0.09549421072006226, "grad_norm": 0.8832308053970337, "learning_rate": 0.00014781953445903003, "epoch": 0.34344148953275233, "step": 10680 }, { "loss": 0.10033886432647705, "grad_norm": 0.478547066450119, "learning_rate": 0.00014773049759822727, "epoch": 0.3437630639611538, "step": 10690 }, { "loss": 0.09127762913703918, "grad_norm": 0.669744074344635, "learning_rate": 0.00014764141170805107, "epoch": 0.3440846383895553, "step": 10700 }, { "loss": 0.0837335467338562, "grad_norm": 0.4499804675579071, "learning_rate": 0.0001475522768800115, "epoch": 0.3444062128179567, "step": 10710 }, { "loss": 0.0952267050743103, "grad_norm": 0.5402239561080933, "learning_rate": 0.0001474630932056691, "epoch": 0.34472778724635816, "step": 10720 }, { "loss": 0.13682855367660524, "grad_norm": 0.9907369017601013, "learning_rate": 0.0001473738607766344, "epoch": 0.34504936167475964, "step": 10730 }, { "loss": 0.08810251951217651, "grad_norm": 0.4828674793243408, "learning_rate": 0.0001472845796845681, "epoch": 0.34537093610316105, "step": 10740 }, { "loss": 0.11136370897293091, "grad_norm": 0.699180006980896, "learning_rate": 0.00014719525002118087, "epoch": 0.3456925105315625, "step": 10750 }, { "loss": 0.08398210406303405, "grad_norm": 0.6285035014152527, "learning_rate": 0.0001471058718782333, "epoch": 0.346014084959964, "step": 10760 }, { "loss": 0.08259177803993226, "grad_norm": 0.8560423851013184, "learning_rate": 0.00014701644534753568, "epoch": 0.3463356593883654, "step": 10770 }, { "loss": 0.07981761693954467, "grad_norm": 0.3733856976032257, "learning_rate": 0.0001469269705209482, "epoch": 0.3466572338167669, "step": 10780 }, { "loss": 0.08973867297172547, "grad_norm": 0.7273002862930298, "learning_rate": 0.00014683744749038045, "epoch": 0.34697880824516836, "step": 10790 }, { "loss": 0.10675228834152221, "grad_norm": 0.7694258093833923, "learning_rate": 0.0001467478763477916, "epoch": 0.3473003826735698, "step": 10800 }, { "loss": 0.07637117505073547, "grad_norm": 0.9570711851119995, "learning_rate": 0.00014665825718519036, "epoch": 0.34762195710197125, "step": 10810 }, { "loss": 0.12145779132843018, "grad_norm": 0.9601441621780396, "learning_rate": 0.00014656859009463458, "epoch": 0.3479435315303727, "step": 10820 }, { "loss": 0.07213935852050782, "grad_norm": 0.8995713591575623, "learning_rate": 0.00014647887516823152, "epoch": 0.34826510595877413, "step": 10830 }, { "loss": 0.12633739709854125, "grad_norm": 1.2538115978240967, "learning_rate": 0.00014638911249813746, "epoch": 0.3485866803871756, "step": 10840 }, { "loss": 0.09029225111007691, "grad_norm": 0.6341054439544678, "learning_rate": 0.0001462993021765577, "epoch": 0.3489082548155771, "step": 10850 }, { "loss": 0.08201388716697693, "grad_norm": 0.48226431012153625, "learning_rate": 0.00014620944429574658, "epoch": 0.3492298292439785, "step": 10860 }, { "loss": 0.07496067881584167, "grad_norm": 0.7661513090133667, "learning_rate": 0.0001461195389480073, "epoch": 0.34955140367237997, "step": 10870 }, { "eval_loss": 0.09110351651906967, "eval_runtime": 34.3603, "eval_samples_per_second": 146.274, "eval_steps_per_second": 36.583, "epoch": 0.3498086632151011, "step": 10878 }, { "loss": 0.0849322497844696, "grad_norm": 0.9236146211624146, "learning_rate": 0.00014602958622569174, "epoch": 0.34987297810078144, "step": 10880 }, { "loss": 0.09441561698913574, "grad_norm": 0.5579268336296082, "learning_rate": 0.00014593958622120045, "epoch": 0.35019455252918286, "step": 10890 }, { "loss": 0.09172694087028503, "grad_norm": 0.45912206172943115, "learning_rate": 0.00014584953902698261, "epoch": 0.35051612695758433, "step": 10900 }, { "loss": 0.09633524417877197, "grad_norm": 0.5850641131401062, "learning_rate": 0.00014575944473553583, "epoch": 0.3508377013859858, "step": 10910 }, { "loss": 0.09510902166366578, "grad_norm": 0.9028190970420837, "learning_rate": 0.00014566930343940613, "epoch": 0.3511592758143872, "step": 10920 }, { "loss": 0.10485677719116211, "grad_norm": 0.5650879740715027, "learning_rate": 0.0001455791152311878, "epoch": 0.3514808502427887, "step": 10930 }, { "loss": 0.11609166860580444, "grad_norm": 7.561282157897949, "learning_rate": 0.00014548888020352328, "epoch": 0.35180242467119016, "step": 10940 }, { "loss": 0.09210947751998902, "grad_norm": 0.38646945357322693, "learning_rate": 0.00014539859844910319, "epoch": 0.3521239990995916, "step": 10950 }, { "loss": 0.08583818674087525, "grad_norm": 0.38824743032455444, "learning_rate": 0.000145308270060666, "epoch": 0.35244557352799305, "step": 10960 }, { "loss": 0.08737082481384277, "grad_norm": 0.40544432401657104, "learning_rate": 0.00014521789513099827, "epoch": 0.3527671479563945, "step": 10970 }, { "loss": 0.08975743055343628, "grad_norm": 0.8956839442253113, "learning_rate": 0.00014512747375293423, "epoch": 0.35308872238479594, "step": 10980 }, { "loss": 0.10598586797714234, "grad_norm": 0.9445635676383972, "learning_rate": 0.0001450370060193559, "epoch": 0.3534102968131974, "step": 10990 }, { "loss": 0.08878449201583863, "grad_norm": 0.46113863587379456, "learning_rate": 0.00014494649202319282, "epoch": 0.3537318712415989, "step": 11000 }, { "loss": 0.08599361181259155, "grad_norm": 0.3610915243625641, "learning_rate": 0.0001448559318574222, "epoch": 0.3540534456700003, "step": 11010 }, { "loss": 0.08179433941841126, "grad_norm": 0.7075566053390503, "learning_rate": 0.00014476532561506856, "epoch": 0.35437502009840177, "step": 11020 }, { "loss": 0.07348551750183105, "grad_norm": 0.7016006708145142, "learning_rate": 0.0001446746733892037, "epoch": 0.35469659452680324, "step": 11030 }, { "loss": 0.08005476593971253, "grad_norm": 0.4309087097644806, "learning_rate": 0.00014458397527294692, "epoch": 0.35501816895520466, "step": 11040 }, { "loss": 0.11042691469192505, "grad_norm": 0.7881134748458862, "learning_rate": 0.00014449323135946434, "epoch": 0.35533974338360613, "step": 11050 }, { "loss": 0.08432947397232056, "grad_norm": 0.7723735570907593, "learning_rate": 0.00014440244174196925, "epoch": 0.3556613178120076, "step": 11060 }, { "loss": 0.10001605749130249, "grad_norm": 0.8078780174255371, "learning_rate": 0.00014431160651372197, "epoch": 0.355982892240409, "step": 11070 }, { "loss": 0.08675259351730347, "grad_norm": 0.47705569863319397, "learning_rate": 0.00014422072576802958, "epoch": 0.3563044666688105, "step": 11080 }, { "loss": 0.084046471118927, "grad_norm": 0.9069617986679077, "learning_rate": 0.00014412979959824593, "epoch": 0.35662604109721197, "step": 11090 }, { "loss": 0.10433726310729981, "grad_norm": 0.8548468351364136, "learning_rate": 0.00014403882809777154, "epoch": 0.3569476155256134, "step": 11100 }, { "loss": 0.10171997547149658, "grad_norm": 0.6188079714775085, "learning_rate": 0.00014394781136005345, "epoch": 0.35726918995401485, "step": 11110 }, { "loss": 0.09457895755767823, "grad_norm": 0.5230410099029541, "learning_rate": 0.00014385674947858527, "epoch": 0.3575907643824163, "step": 11120 }, { "loss": 0.10628050565719604, "grad_norm": 1.2344039678573608, "learning_rate": 0.0001437656425469069, "epoch": 0.35791233881081774, "step": 11130 }, { "loss": 0.070767343044281, "grad_norm": 0.5525897145271301, "learning_rate": 0.00014367449065860453, "epoch": 0.3582339132392192, "step": 11140 }, { "loss": 0.08599192500114441, "grad_norm": 0.5532211661338806, "learning_rate": 0.00014358329390731057, "epoch": 0.3585554876676207, "step": 11150 }, { "loss": 0.09453628659248352, "grad_norm": 0.57228684425354, "learning_rate": 0.00014349205238670343, "epoch": 0.3588770620960221, "step": 11160 }, { "loss": 0.07618891596794128, "grad_norm": 0.5582618713378906, "learning_rate": 0.00014340076619050754, "epoch": 0.3591986365244236, "step": 11170 }, { "loss": 0.062136721611022946, "grad_norm": 0.43636590242385864, "learning_rate": 0.00014330943541249335, "epoch": 0.35952021095282505, "step": 11180 }, { "loss": 0.09901108741760253, "grad_norm": 0.5479429364204407, "learning_rate": 0.0001432180601464769, "epoch": 0.35984178538122646, "step": 11190 }, { "loss": 0.0857438862323761, "grad_norm": 0.5808840990066528, "learning_rate": 0.00014312664048632008, "epoch": 0.36016335980962794, "step": 11200 }, { "loss": 0.08954665064811707, "grad_norm": 0.6454494595527649, "learning_rate": 0.00014303517652593025, "epoch": 0.3604849342380294, "step": 11210 }, { "loss": 0.09943202137947083, "grad_norm": 0.602885365486145, "learning_rate": 0.0001429436683592604, "epoch": 0.3608065086664308, "step": 11220 }, { "loss": 0.0752259373664856, "grad_norm": 0.4851940870285034, "learning_rate": 0.00014285211608030892, "epoch": 0.3611280830948323, "step": 11230 }, { "loss": 0.0808374285697937, "grad_norm": 0.6741466522216797, "learning_rate": 0.00014276051978311943, "epoch": 0.36144965752323377, "step": 11240 }, { "loss": 0.09836857914924621, "grad_norm": 0.398953378200531, "learning_rate": 0.00014266887956178081, "epoch": 0.3617712319516352, "step": 11250 }, { "loss": 0.10557745695114136, "grad_norm": 0.6230091452598572, "learning_rate": 0.00014257719551042706, "epoch": 0.36209280638003666, "step": 11260 }, { "loss": 0.09444069862365723, "grad_norm": 0.8618478178977966, "learning_rate": 0.0001424854677232372, "epoch": 0.36241438080843813, "step": 11270 }, { "loss": 0.07490069270133973, "grad_norm": 0.6330289840698242, "learning_rate": 0.00014239369629443519, "epoch": 0.36273595523683955, "step": 11280 }, { "loss": 0.10039694309234619, "grad_norm": 0.496137410402298, "learning_rate": 0.0001423018813182898, "epoch": 0.363057529665241, "step": 11290 }, { "loss": 0.09477540254592895, "grad_norm": 0.8526451587677002, "learning_rate": 0.00014221002288911458, "epoch": 0.3633791040936425, "step": 11300 }, { "loss": 0.08862870931625366, "grad_norm": 0.8001587390899658, "learning_rate": 0.0001421181211012676, "epoch": 0.3637006785220439, "step": 11310 }, { "loss": 0.09040194749832153, "grad_norm": 0.46519744396209717, "learning_rate": 0.00014202617604915161, "epoch": 0.3640222529504454, "step": 11320 }, { "loss": 0.10902376174926758, "grad_norm": 0.8219383358955383, "learning_rate": 0.0001419341878272137, "epoch": 0.36434382737884685, "step": 11330 }, { "loss": 0.08118026852607726, "grad_norm": 0.3547578752040863, "learning_rate": 0.00014184215652994532, "epoch": 0.36466540180724827, "step": 11340 }, { "loss": 0.09958731532096862, "grad_norm": 0.46915847063064575, "learning_rate": 0.00014175008225188226, "epoch": 0.36498697623564974, "step": 11350 }, { "loss": 0.11956846714019775, "grad_norm": 1.017486810684204, "learning_rate": 0.0001416579650876043, "epoch": 0.3653085506640512, "step": 11360 }, { "loss": 0.07706149220466614, "grad_norm": 0.2844119071960449, "learning_rate": 0.00014156580513173544, "epoch": 0.36563012509245263, "step": 11370 }, { "loss": 0.086731219291687, "grad_norm": 0.4540342092514038, "learning_rate": 0.00014147360247894348, "epoch": 0.3659516995208541, "step": 11380 }, { "loss": 0.09100791215896606, "grad_norm": 0.6833384037017822, "learning_rate": 0.00014138135722394023, "epoch": 0.3662732739492556, "step": 11390 }, { "loss": 0.1060667872428894, "grad_norm": 0.550255298614502, "learning_rate": 0.00014128906946148114, "epoch": 0.366594848377657, "step": 11400 }, { "loss": 0.08594973087310791, "grad_norm": 0.6789250373840332, "learning_rate": 0.00014119673928636542, "epoch": 0.36691642280605846, "step": 11410 }, { "loss": 0.08912253379821777, "grad_norm": 0.5137705206871033, "learning_rate": 0.00014110436679343576, "epoch": 0.36723799723445993, "step": 11420 }, { "loss": 0.094441157579422, "grad_norm": 0.8425323963165283, "learning_rate": 0.00014101195207757837, "epoch": 0.36755957166286135, "step": 11430 }, { "loss": 0.09220321178436279, "grad_norm": 0.7896093726158142, "learning_rate": 0.00014091949523372277, "epoch": 0.3678811460912628, "step": 11440 }, { "loss": 0.09599274396896362, "grad_norm": 0.8824585676193237, "learning_rate": 0.00014082699635684188, "epoch": 0.3682027205196643, "step": 11450 }, { "loss": 0.10436227321624755, "grad_norm": 1.1513099670410156, "learning_rate": 0.00014073445554195173, "epoch": 0.3685242949480657, "step": 11460 }, { "loss": 0.08480619192123413, "grad_norm": 0.5512106418609619, "learning_rate": 0.00014064187288411132, "epoch": 0.3688458693764672, "step": 11470 }, { "loss": 0.09227036833763122, "grad_norm": 0.5305521488189697, "learning_rate": 0.0001405492484784228, "epoch": 0.36916744380486866, "step": 11480 }, { "loss": 0.07005593776702881, "grad_norm": 0.2772514224052429, "learning_rate": 0.00014045658242003116, "epoch": 0.3694890182332701, "step": 11490 }, { "loss": 0.08746758699417115, "grad_norm": 0.6788966059684753, "learning_rate": 0.00014036387480412412, "epoch": 0.36981059266167154, "step": 11500 }, { "loss": 0.09942635297775268, "grad_norm": 0.2725159525871277, "learning_rate": 0.00014027112572593213, "epoch": 0.370132167090073, "step": 11510 }, { "loss": 0.08165991306304932, "grad_norm": 0.7385566830635071, "learning_rate": 0.0001401783352807282, "epoch": 0.37045374151847443, "step": 11520 }, { "loss": 0.09183178544044494, "grad_norm": 0.42521658539772034, "learning_rate": 0.00014008550356382787, "epoch": 0.3707753159468759, "step": 11530 }, { "loss": 0.09264058470726014, "grad_norm": 0.7075470089912415, "learning_rate": 0.00013999263067058905, "epoch": 0.3710968903752774, "step": 11540 }, { "loss": 0.10476528406143189, "grad_norm": 0.48848652839660645, "learning_rate": 0.00013989971669641199, "epoch": 0.3714184648036788, "step": 11550 }, { "loss": 0.10007588863372803, "grad_norm": 0.7352356314659119, "learning_rate": 0.00013980676173673906, "epoch": 0.37174003923208027, "step": 11560 }, { "loss": 0.09925302863121033, "grad_norm": 0.6485668420791626, "learning_rate": 0.0001397137658870548, "epoch": 0.37206161366048174, "step": 11570 }, { "loss": 0.09216221570968627, "grad_norm": 0.4631863534450531, "learning_rate": 0.00013962072924288572, "epoch": 0.37238318808888315, "step": 11580 }, { "loss": 0.10099647045135499, "grad_norm": 0.7892594933509827, "learning_rate": 0.00013952765189980025, "epoch": 0.3727047625172846, "step": 11590 }, { "loss": 0.08765991926193237, "grad_norm": 0.5219521522521973, "learning_rate": 0.00013943453395340858, "epoch": 0.3730263369456861, "step": 11600 }, { "loss": 0.0861083447933197, "grad_norm": 0.860974907875061, "learning_rate": 0.00013934137549936272, "epoch": 0.3733479113740875, "step": 11610 }, { "loss": 0.0927880048751831, "grad_norm": 0.8678325414657593, "learning_rate": 0.00013924817663335618, "epoch": 0.373669485802489, "step": 11620 }, { "loss": 0.09621718525886536, "grad_norm": 0.794443666934967, "learning_rate": 0.00013915493745112397, "epoch": 0.37399106023089046, "step": 11630 }, { "loss": 0.08052088022232055, "grad_norm": 0.5400531888008118, "learning_rate": 0.00013906165804844267, "epoch": 0.3743126346592919, "step": 11640 }, { "loss": 0.0943139672279358, "grad_norm": 0.9415629506111145, "learning_rate": 0.00013896833852113, "epoch": 0.37463420908769335, "step": 11650 }, { "loss": 0.08249766230583191, "grad_norm": 0.7084611654281616, "learning_rate": 0.00013887497896504495, "epoch": 0.3749557835160948, "step": 11660 }, { "loss": 0.09889563918113708, "grad_norm": 0.5836837291717529, "learning_rate": 0.00013878157947608765, "epoch": 0.37527735794449624, "step": 11670 }, { "loss": 0.08768828511238098, "grad_norm": 0.5825384855270386, "learning_rate": 0.0001386881401501993, "epoch": 0.3755989323728977, "step": 11680 }, { "loss": 0.10164982080459595, "grad_norm": 0.8690288662910461, "learning_rate": 0.0001385946610833619, "epoch": 0.3759205068012992, "step": 11690 }, { "loss": 0.085177081823349, "grad_norm": 0.8311650156974792, "learning_rate": 0.00013850114237159834, "epoch": 0.3762420812297006, "step": 11700 }, { "loss": 0.09417153596878051, "grad_norm": 0.44406217336654663, "learning_rate": 0.00013840758411097224, "epoch": 0.37656365565810207, "step": 11710 }, { "loss": 0.09739122986793518, "grad_norm": 0.690326452255249, "learning_rate": 0.00013831398639758787, "epoch": 0.37688523008650354, "step": 11720 }, { "loss": 0.08809913396835327, "grad_norm": 0.80722975730896, "learning_rate": 0.0001382203493275899, "epoch": 0.37720680451490496, "step": 11730 }, { "loss": 0.10492403507232666, "grad_norm": 0.5662553310394287, "learning_rate": 0.00013812667299716358, "epoch": 0.37752837894330643, "step": 11740 }, { "loss": 0.08379825353622436, "grad_norm": 0.5302266478538513, "learning_rate": 0.00013803295750253444, "epoch": 0.3778499533717079, "step": 11750 }, { "loss": 0.10625137090682983, "grad_norm": 0.8091083765029907, "learning_rate": 0.0001379392029399682, "epoch": 0.3781715278001093, "step": 11760 }, { "loss": 0.06244894862174988, "grad_norm": 0.5015480518341064, "learning_rate": 0.00013784540940577078, "epoch": 0.3784931022285108, "step": 11770 }, { "loss": 0.07549222707748413, "grad_norm": 0.34560057520866394, "learning_rate": 0.00013775157699628802, "epoch": 0.37881467665691226, "step": 11780 }, { "loss": 0.06123560070991516, "grad_norm": 0.4780142903327942, "learning_rate": 0.00013765770580790581, "epoch": 0.3791362510853137, "step": 11790 }, { "loss": 0.09585493803024292, "grad_norm": 0.5067535042762756, "learning_rate": 0.0001375637959370498, "epoch": 0.37945782551371515, "step": 11800 }, { "loss": 0.10015147924423218, "grad_norm": 0.5134025812149048, "learning_rate": 0.00013746984748018543, "epoch": 0.3797793999421166, "step": 11810 }, { "loss": 0.08560925722122192, "grad_norm": 0.7771230936050415, "learning_rate": 0.00013737586053381773, "epoch": 0.38010097437051804, "step": 11820 }, { "loss": 0.08708308935165406, "grad_norm": 0.5460475087165833, "learning_rate": 0.0001372818351944913, "epoch": 0.3804225487989195, "step": 11830 }, { "loss": 0.09841415882110596, "grad_norm": 0.6091669797897339, "learning_rate": 0.00013718777155879012, "epoch": 0.380744123227321, "step": 11840 }, { "loss": 0.1109818696975708, "grad_norm": 0.7367703914642334, "learning_rate": 0.0001370936697233376, "epoch": 0.3810656976557224, "step": 11850 }, { "loss": 0.11050435304641723, "grad_norm": 0.7365938425064087, "learning_rate": 0.00013699952978479634, "epoch": 0.3813872720841239, "step": 11860 }, { "loss": 0.08165845274925232, "grad_norm": 0.8522195816040039, "learning_rate": 0.00013690535183986805, "epoch": 0.38170884651252535, "step": 11870 }, { "loss": 0.09400575757026672, "grad_norm": 0.5556536912918091, "learning_rate": 0.00013681113598529356, "epoch": 0.38203042094092676, "step": 11880 }, { "loss": 0.08722950220108032, "grad_norm": 0.5536159873008728, "learning_rate": 0.0001367168823178525, "epoch": 0.38235199536932823, "step": 11890 }, { "loss": 0.08267619609832763, "grad_norm": 0.701835572719574, "learning_rate": 0.00013662259093436357, "epoch": 0.3826735697977297, "step": 11900 }, { "loss": 0.09268981218338013, "grad_norm": 1.0157867670059204, "learning_rate": 0.00013652826193168393, "epoch": 0.3829951442261311, "step": 11910 }, { "loss": 0.09985225200653076, "grad_norm": 0.48218435049057007, "learning_rate": 0.00013643389540670962, "epoch": 0.3833167186545326, "step": 11920 }, { "loss": 0.08864061832427979, "grad_norm": 0.6998544335365295, "learning_rate": 0.00013633949145637516, "epoch": 0.38363829308293407, "step": 11930 }, { "loss": 0.10292292833328247, "grad_norm": 0.6836029887199402, "learning_rate": 0.0001362450501776534, "epoch": 0.3839598675113355, "step": 11940 }, { "loss": 0.07776576280593872, "grad_norm": 0.4844030439853668, "learning_rate": 0.00013615057166755566, "epoch": 0.38428144193973696, "step": 11950 }, { "loss": 0.08032951354980469, "grad_norm": 0.4513522982597351, "learning_rate": 0.0001360560560231315, "epoch": 0.38460301636813843, "step": 11960 }, { "loss": 0.09304511547088623, "grad_norm": 0.4937937259674072, "learning_rate": 0.00013596150334146853, "epoch": 0.38492459079653984, "step": 11970 }, { "loss": 0.09666025638580322, "grad_norm": 0.716979444026947, "learning_rate": 0.00013586691371969252, "epoch": 0.3852461652249413, "step": 11980 }, { "loss": 0.08163623809814453, "grad_norm": 0.41440537571907043, "learning_rate": 0.0001357722872549671, "epoch": 0.3855677396533428, "step": 11990 }, { "loss": 0.08722010254859924, "grad_norm": 0.5127982497215271, "learning_rate": 0.00013567762404449377, "epoch": 0.3858893140817442, "step": 12000 }, { "loss": 0.08775334358215332, "grad_norm": 0.5351473093032837, "learning_rate": 0.00013558292418551176, "epoch": 0.3862108885101457, "step": 12010 }, { "loss": 0.08145318031311036, "grad_norm": 0.6967948079109192, "learning_rate": 0.0001354881877752981, "epoch": 0.38653246293854715, "step": 12020 }, { "loss": 0.09338672161102295, "grad_norm": 0.4476488530635834, "learning_rate": 0.00013539341491116707, "epoch": 0.38685403736694857, "step": 12030 }, { "loss": 0.09646210074424744, "grad_norm": 0.4933212101459503, "learning_rate": 0.00013529860569047066, "epoch": 0.38717561179535004, "step": 12040 }, { "loss": 0.10104117393493653, "grad_norm": 0.7293670773506165, "learning_rate": 0.00013520376021059803, "epoch": 0.3874971862237515, "step": 12050 }, { "loss": 0.09380577206611633, "grad_norm": 0.5708062648773193, "learning_rate": 0.00013510887856897573, "epoch": 0.3878187606521529, "step": 12060 }, { "loss": 0.08531683683395386, "grad_norm": 0.51509690284729, "learning_rate": 0.00013501396086306738, "epoch": 0.3881403350805544, "step": 12070 }, { "loss": 0.10160613059997559, "grad_norm": 0.4665564000606537, "learning_rate": 0.0001349190071903736, "epoch": 0.38846190950895587, "step": 12080 }, { "loss": 0.09982122182846069, "grad_norm": 0.5586184859275818, "learning_rate": 0.00013482401764843205, "epoch": 0.3887834839373573, "step": 12090 }, { "loss": 0.08948178887367249, "grad_norm": 0.5822875499725342, "learning_rate": 0.0001347289923348172, "epoch": 0.38910505836575876, "step": 12100 }, { "loss": 0.08374988436698913, "grad_norm": 0.44458073377609253, "learning_rate": 0.00013463393134714028, "epoch": 0.38942663279416023, "step": 12110 }, { "loss": 0.11546404361724853, "grad_norm": 0.8156406879425049, "learning_rate": 0.00013453883478304905, "epoch": 0.38974820722256165, "step": 12120 }, { "loss": 0.08677539825439454, "grad_norm": 0.5954530835151672, "learning_rate": 0.00013444370274022803, "epoch": 0.3900697816509631, "step": 12130 }, { "loss": 0.09240028262138367, "grad_norm": 0.4999423623085022, "learning_rate": 0.00013434853531639796, "epoch": 0.3903913560793646, "step": 12140 }, { "loss": 0.1019981861114502, "grad_norm": 0.7203190922737122, "learning_rate": 0.0001342533326093161, "epoch": 0.390712930507766, "step": 12150 }, { "loss": 0.0922100841999054, "grad_norm": 0.5576680898666382, "learning_rate": 0.00013415809471677584, "epoch": 0.3910345049361675, "step": 12160 }, { "loss": 0.09082117080688476, "grad_norm": 0.6921060681343079, "learning_rate": 0.00013406282173660674, "epoch": 0.39135607936456895, "step": 12170 }, { "loss": 0.08153097033500671, "grad_norm": 0.42663201689720154, "learning_rate": 0.00013396751376667447, "epoch": 0.39167765379297037, "step": 12180 }, { "loss": 0.1282093644142151, "grad_norm": 0.7410852909088135, "learning_rate": 0.00013387217090488057, "epoch": 0.39199922822137184, "step": 12190 }, { "loss": 0.08008560538291931, "grad_norm": 0.8880549669265747, "learning_rate": 0.0001337767932491624, "epoch": 0.3923208026497733, "step": 12200 }, { "loss": 0.0876152753829956, "grad_norm": 0.5166379809379578, "learning_rate": 0.00013368138089749312, "epoch": 0.39264237707817473, "step": 12210 }, { "loss": 0.09751138091087341, "grad_norm": 0.7990224361419678, "learning_rate": 0.00013358593394788152, "epoch": 0.3929639515065762, "step": 12220 }, { "loss": 0.07425323724746705, "grad_norm": 0.6802085638046265, "learning_rate": 0.00013349045249837194, "epoch": 0.3932855259349777, "step": 12230 }, { "loss": 0.08448225259780884, "grad_norm": 0.41255390644073486, "learning_rate": 0.00013339493664704408, "epoch": 0.3936071003633791, "step": 12240 }, { "loss": 0.09719268679618835, "grad_norm": 0.6711472272872925, "learning_rate": 0.0001332993864920131, "epoch": 0.39392867479178056, "step": 12250 }, { "loss": 0.06422892212867737, "grad_norm": 0.4163860082626343, "learning_rate": 0.00013320380213142926, "epoch": 0.39425024922018204, "step": 12260 }, { "loss": 0.08449453115463257, "grad_norm": 0.616906464099884, "learning_rate": 0.0001331081836634781, "epoch": 0.39457182364858345, "step": 12270 }, { "loss": 0.09407067894935608, "grad_norm": 0.6051538586616516, "learning_rate": 0.00013301253118638007, "epoch": 0.3948933980769849, "step": 12280 }, { "loss": 0.08197566270828247, "grad_norm": 0.5638774633407593, "learning_rate": 0.00013291684479839065, "epoch": 0.3952149725053864, "step": 12290 }, { "loss": 0.0981809675693512, "grad_norm": 0.822028636932373, "learning_rate": 0.0001328211245978001, "epoch": 0.3955365469337878, "step": 12300 }, { "loss": 0.10272608995437622, "grad_norm": 0.7296173572540283, "learning_rate": 0.00013272537068293342, "epoch": 0.3958581213621893, "step": 12310 }, { "loss": 0.09320492148399354, "grad_norm": 0.6523847579956055, "learning_rate": 0.00013262958315215026, "epoch": 0.39617969579059076, "step": 12320 }, { "loss": 0.08853684663772583, "grad_norm": 0.8760294318199158, "learning_rate": 0.00013253376210384477, "epoch": 0.3965012702189922, "step": 12330 }, { "loss": 0.08399147987365722, "grad_norm": 0.6736103296279907, "learning_rate": 0.00013243790763644555, "epoch": 0.39682284464739365, "step": 12340 }, { "loss": 0.09626861810684204, "grad_norm": 0.668816328048706, "learning_rate": 0.00013234201984841558, "epoch": 0.3971444190757951, "step": 12350 }, { "loss": 0.06886869668960571, "grad_norm": 0.8333956003189087, "learning_rate": 0.0001322460988382519, "epoch": 0.39746599350419654, "step": 12360 }, { "loss": 0.07311125993728637, "grad_norm": 0.7170328497886658, "learning_rate": 0.0001321501447044859, "epoch": 0.397787567932598, "step": 12370 }, { "loss": 0.11943765878677368, "grad_norm": 0.6446574330329895, "learning_rate": 0.00013205415754568285, "epoch": 0.3981091423609995, "step": 12380 }, { "loss": 0.07889516353607177, "grad_norm": 0.37039467692375183, "learning_rate": 0.00013195813746044195, "epoch": 0.3984307167894009, "step": 12390 }, { "loss": 0.09751490354537964, "grad_norm": 0.48914358019828796, "learning_rate": 0.0001318620845473963, "epoch": 0.39875229121780237, "step": 12400 }, { "loss": 0.09465900659561158, "grad_norm": 0.6299532651901245, "learning_rate": 0.00013176599890521264, "epoch": 0.39907386564620384, "step": 12410 }, { "loss": 0.09306957125663758, "grad_norm": 0.7221790552139282, "learning_rate": 0.00013166988063259137, "epoch": 0.39939544007460526, "step": 12420 }, { "loss": 0.09147388339042664, "grad_norm": 0.34951910376548767, "learning_rate": 0.00013157372982826638, "epoch": 0.39971701450300673, "step": 12430 }, { "eval_loss": 0.08492592722177505, "eval_runtime": 34.3707, "eval_samples_per_second": 146.229, "eval_steps_per_second": 36.572, "epoch": 0.399781329388687, "step": 12432 }, { "loss": 0.07842938303947448, "grad_norm": 0.5732839703559875, "learning_rate": 0.00013147754659100503, "epoch": 0.4000385889314082, "step": 12440 }, { "loss": 0.08517730832099915, "grad_norm": 0.6061631441116333, "learning_rate": 0.00013138133101960796, "epoch": 0.4003601633598096, "step": 12450 }, { "loss": 0.11794278621673585, "grad_norm": 0.962023913860321, "learning_rate": 0.00013128508321290903, "epoch": 0.4006817377882111, "step": 12460 }, { "loss": 0.10742299556732178, "grad_norm": 0.4836251139640808, "learning_rate": 0.00013118880326977515, "epoch": 0.40100331221661256, "step": 12470 }, { "loss": 0.09351553916931152, "grad_norm": 1.062370777130127, "learning_rate": 0.0001310924912891064, "epoch": 0.401324886645014, "step": 12480 }, { "loss": 0.07564758062362671, "grad_norm": 0.5744089484214783, "learning_rate": 0.00013099614736983558, "epoch": 0.40164646107341545, "step": 12490 }, { "loss": 0.07484514713287353, "grad_norm": 0.4435352087020874, "learning_rate": 0.00013089977161092847, "epoch": 0.4019680355018169, "step": 12500 }, { "loss": 0.11803302764892579, "grad_norm": 0.6136540174484253, "learning_rate": 0.00013080336411138338, "epoch": 0.40228960993021834, "step": 12510 }, { "loss": 0.08221138119697571, "grad_norm": 0.4812261760234833, "learning_rate": 0.00013070692497023137, "epoch": 0.4026111843586198, "step": 12520 }, { "loss": 0.0985057532787323, "grad_norm": 0.6960001587867737, "learning_rate": 0.00013061045428653599, "epoch": 0.4029327587870213, "step": 12530 }, { "loss": 0.08646383881568909, "grad_norm": 0.7220068573951721, "learning_rate": 0.0001305139521593931, "epoch": 0.4032543332154227, "step": 12540 }, { "loss": 0.09498414993286133, "grad_norm": 0.40549272298812866, "learning_rate": 0.0001304174186879309, "epoch": 0.40357590764382417, "step": 12550 }, { "loss": 0.08594885468482971, "grad_norm": 0.7031739950180054, "learning_rate": 0.00013032085397130983, "epoch": 0.40389748207222564, "step": 12560 }, { "loss": 0.09841355085372924, "grad_norm": 0.8005086779594421, "learning_rate": 0.00013022425810872238, "epoch": 0.40421905650062706, "step": 12570 }, { "loss": 0.08631751537322999, "grad_norm": 0.6979205012321472, "learning_rate": 0.00013012763119939303, "epoch": 0.40454063092902853, "step": 12580 }, { "loss": 0.0870095133781433, "grad_norm": 0.34911099076271057, "learning_rate": 0.00013003097334257822, "epoch": 0.40486220535742995, "step": 12590 }, { "loss": 0.08911849856376648, "grad_norm": 0.7210829257965088, "learning_rate": 0.0001299342846375661, "epoch": 0.4051837797858314, "step": 12600 }, { "loss": 0.07384041547775269, "grad_norm": 0.3820243775844574, "learning_rate": 0.00012983756518367652, "epoch": 0.4055053542142329, "step": 12610 }, { "loss": 0.08409932255744934, "grad_norm": 0.483556866645813, "learning_rate": 0.00012974081508026095, "epoch": 0.4058269286426343, "step": 12620 }, { "loss": 0.0932869553565979, "grad_norm": 0.983331024646759, "learning_rate": 0.00012964403442670231, "epoch": 0.4061485030710358, "step": 12630 }, { "loss": 0.09054937958717346, "grad_norm": 0.399808406829834, "learning_rate": 0.00012954722332241496, "epoch": 0.40647007749943725, "step": 12640 }, { "loss": 0.09392053484916688, "grad_norm": 0.38033151626586914, "learning_rate": 0.0001294503818668444, "epoch": 0.40679165192783867, "step": 12650 }, { "loss": 0.07037632465362549, "grad_norm": 0.3877726197242737, "learning_rate": 0.00012935351015946756, "epoch": 0.40711322635624014, "step": 12660 }, { "loss": 0.08089154958724976, "grad_norm": 0.4302811026573181, "learning_rate": 0.00012925660829979214, "epoch": 0.4074348007846416, "step": 12670 }, { "loss": 0.08582212328910828, "grad_norm": 0.7823520302772522, "learning_rate": 0.000129159676387357, "epoch": 0.40775637521304303, "step": 12680 }, { "loss": 0.0841569185256958, "grad_norm": 0.5166428089141846, "learning_rate": 0.0001290627145217319, "epoch": 0.4080779496414445, "step": 12690 }, { "loss": 0.09074650406837463, "grad_norm": 0.5572818517684937, "learning_rate": 0.00012896572280251723, "epoch": 0.408399524069846, "step": 12700 }, { "loss": 0.09654123783111572, "grad_norm": 0.9365857839584351, "learning_rate": 0.0001288687013293441, "epoch": 0.4087210984982474, "step": 12710 }, { "loss": 0.10211057662963867, "grad_norm": 0.7367502450942993, "learning_rate": 0.00012877165020187427, "epoch": 0.40904267292664886, "step": 12720 }, { "loss": 0.09811394214630127, "grad_norm": 0.9345642924308777, "learning_rate": 0.00012867456951979985, "epoch": 0.40936424735505034, "step": 12730 }, { "loss": 0.08300668001174927, "grad_norm": 0.5749287009239197, "learning_rate": 0.00012857745938284337, "epoch": 0.40968582178345175, "step": 12740 }, { "loss": 0.08812336921691895, "grad_norm": 1.3258109092712402, "learning_rate": 0.00012848031989075754, "epoch": 0.4100073962118532, "step": 12750 }, { "loss": 0.10080107450485229, "grad_norm": 0.4471311867237091, "learning_rate": 0.00012838315114332532, "epoch": 0.4103289706402547, "step": 12760 }, { "loss": 0.09509003758430482, "grad_norm": 0.6037778854370117, "learning_rate": 0.00012828595324035976, "epoch": 0.4106505450686561, "step": 12770 }, { "loss": 0.08809060454368592, "grad_norm": 0.37151727080345154, "learning_rate": 0.00012818872628170361, "epoch": 0.4109721194970576, "step": 12780 }, { "loss": 0.09643104076385497, "grad_norm": 0.492166668176651, "learning_rate": 0.0001280914703672298, "epoch": 0.41129369392545906, "step": 12790 }, { "loss": 0.08205317258834839, "grad_norm": 0.621078372001648, "learning_rate": 0.00012799418559684072, "epoch": 0.4116152683538605, "step": 12800 }, { "loss": 0.09125524759292603, "grad_norm": 0.31062090396881104, "learning_rate": 0.0001278968720704686, "epoch": 0.41193684278226195, "step": 12810 }, { "loss": 0.11376011371612549, "grad_norm": 0.8067668080329895, "learning_rate": 0.00012779952988807512, "epoch": 0.4122584172106634, "step": 12820 }, { "loss": 0.0906079888343811, "grad_norm": 0.9971017241477966, "learning_rate": 0.00012770215914965138, "epoch": 0.41257999163906484, "step": 12830 }, { "loss": 0.097061026096344, "grad_norm": 0.6321334838867188, "learning_rate": 0.00012760475995521783, "epoch": 0.4129015660674663, "step": 12840 }, { "loss": 0.104645037651062, "grad_norm": 0.6004388928413391, "learning_rate": 0.0001275073324048242, "epoch": 0.4132231404958678, "step": 12850 }, { "loss": 0.09864118695259094, "grad_norm": 0.6671878695487976, "learning_rate": 0.0001274098765985493, "epoch": 0.4135447149242692, "step": 12860 }, { "loss": 0.10216143131256103, "grad_norm": 0.3585256040096283, "learning_rate": 0.00012731239263650095, "epoch": 0.41386628935267067, "step": 12870 }, { "loss": 0.08778411149978638, "grad_norm": 0.5079051852226257, "learning_rate": 0.0001272148806188159, "epoch": 0.41418786378107214, "step": 12880 }, { "loss": 0.07836889028549195, "grad_norm": 0.449163556098938, "learning_rate": 0.0001271173406456598, "epoch": 0.41450943820947356, "step": 12890 }, { "loss": 0.07983227968215942, "grad_norm": 0.5763904452323914, "learning_rate": 0.00012701977281722688, "epoch": 0.41483101263787503, "step": 12900 }, { "loss": 0.09611907601356506, "grad_norm": 0.617703914642334, "learning_rate": 0.00012692217723374007, "epoch": 0.4151525870662765, "step": 12910 }, { "loss": 0.09903550148010254, "grad_norm": 0.8212956786155701, "learning_rate": 0.00012682455399545082, "epoch": 0.4154741614946779, "step": 12920 }, { "loss": 0.09544740319252014, "grad_norm": 0.4063360095024109, "learning_rate": 0.00012672690320263895, "epoch": 0.4157957359230794, "step": 12930 }, { "loss": 0.101250159740448, "grad_norm": 0.8265182971954346, "learning_rate": 0.00012662922495561258, "epoch": 0.41611731035148086, "step": 12940 }, { "loss": 0.09107673168182373, "grad_norm": 0.5787447690963745, "learning_rate": 0.00012653151935470804, "epoch": 0.4164388847798823, "step": 12950 }, { "loss": 0.07801653146743774, "grad_norm": 0.5555682182312012, "learning_rate": 0.0001264337865002898, "epoch": 0.41676045920828375, "step": 12960 }, { "loss": 0.10017772912979125, "grad_norm": 0.634788453578949, "learning_rate": 0.00012633602649275027, "epoch": 0.4170820336366852, "step": 12970 }, { "loss": 0.11236907243728637, "grad_norm": 0.6519593000411987, "learning_rate": 0.00012623823943250977, "epoch": 0.41740360806508664, "step": 12980 }, { "loss": 0.10066357851028443, "grad_norm": 1.0568861961364746, "learning_rate": 0.00012614042542001642, "epoch": 0.4177251824934881, "step": 12990 }, { "loss": 0.07760431170463562, "grad_norm": 0.6190695762634277, "learning_rate": 0.00012604258455574597, "epoch": 0.4180467569218896, "step": 13000 }, { "loss": 0.09244945049285888, "grad_norm": 0.5714675784111023, "learning_rate": 0.00012594471694020185, "epoch": 0.418368331350291, "step": 13010 }, { "loss": 0.09857093691825866, "grad_norm": 0.498703271150589, "learning_rate": 0.0001258468226739149, "epoch": 0.4186899057786925, "step": 13020 }, { "loss": 0.0735785722732544, "grad_norm": 0.5419074892997742, "learning_rate": 0.00012574890185744337, "epoch": 0.41901148020709394, "step": 13030 }, { "loss": 0.10949951410293579, "grad_norm": 0.49285179376602173, "learning_rate": 0.00012565095459137274, "epoch": 0.41933305463549536, "step": 13040 }, { "loss": 0.09090704917907715, "grad_norm": 0.5541001558303833, "learning_rate": 0.00012555298097631572, "epoch": 0.41965462906389683, "step": 13050 }, { "loss": 0.0569901168346405, "grad_norm": 0.5824894905090332, "learning_rate": 0.00012545498111291202, "epoch": 0.4199762034922983, "step": 13060 }, { "loss": 0.08063774108886719, "grad_norm": 0.6675398945808411, "learning_rate": 0.00012535695510182838, "epoch": 0.4202977779206997, "step": 13070 }, { "loss": 0.09944126009941101, "grad_norm": 0.7908313870429993, "learning_rate": 0.00012525890304375831, "epoch": 0.4206193523491012, "step": 13080 }, { "loss": 0.07430846691131592, "grad_norm": 0.5902954936027527, "learning_rate": 0.0001251608250394222, "epoch": 0.42094092677750267, "step": 13090 }, { "loss": 0.09310410618782043, "grad_norm": 0.8124235272407532, "learning_rate": 0.00012506272118956692, "epoch": 0.4212625012059041, "step": 13100 }, { "loss": 0.09318177700042725, "grad_norm": 0.7630248069763184, "learning_rate": 0.00012496459159496608, "epoch": 0.42158407563430556, "step": 13110 }, { "loss": 0.07195741534233094, "grad_norm": 0.5920671820640564, "learning_rate": 0.00012486643635641965, "epoch": 0.421905650062707, "step": 13120 }, { "loss": 0.10478745698928833, "grad_norm": 0.9728026986122131, "learning_rate": 0.00012476825557475386, "epoch": 0.42222722449110844, "step": 13130 }, { "loss": 0.09816946983337402, "grad_norm": 0.5509280562400818, "learning_rate": 0.00012467004935082135, "epoch": 0.4225487989195099, "step": 13140 }, { "loss": 0.09047418236732482, "grad_norm": 0.5737391114234924, "learning_rate": 0.00012457181778550072, "epoch": 0.4228703733479114, "step": 13150 }, { "loss": 0.09822179675102234, "grad_norm": 0.8015565872192383, "learning_rate": 0.00012447356097969672, "epoch": 0.4231919477763128, "step": 13160 }, { "loss": 0.11566193103790283, "grad_norm": 0.6863377094268799, "learning_rate": 0.00012437527903433996, "epoch": 0.4235135222047143, "step": 13170 }, { "loss": 0.09870879650115967, "grad_norm": 0.6073296666145325, "learning_rate": 0.00012427697205038697, "epoch": 0.42383509663311575, "step": 13180 }, { "loss": 0.1031153678894043, "grad_norm": 1.089884877204895, "learning_rate": 0.00012417864012881987, "epoch": 0.42415667106151717, "step": 13190 }, { "loss": 0.07383702993392945, "grad_norm": 0.6829261779785156, "learning_rate": 0.0001240802833706465, "epoch": 0.42447824548991864, "step": 13200 }, { "loss": 0.08646566271781922, "grad_norm": 0.5114777684211731, "learning_rate": 0.00012398190187690012, "epoch": 0.4247998199183201, "step": 13210 }, { "loss": 0.09108679294586182, "grad_norm": 0.7739109396934509, "learning_rate": 0.00012388349574863952, "epoch": 0.4251213943467215, "step": 13220 }, { "loss": 0.09738523364067078, "grad_norm": 0.5807838439941406, "learning_rate": 0.00012378506508694864, "epoch": 0.425442968775123, "step": 13230 }, { "loss": 0.09927533268928528, "grad_norm": 0.6175647974014282, "learning_rate": 0.00012368660999293683, "epoch": 0.42576454320352447, "step": 13240 }, { "loss": 0.08608570694923401, "grad_norm": 1.083479881286621, "learning_rate": 0.00012358813056773826, "epoch": 0.4260861176319259, "step": 13250 }, { "loss": 0.07202335000038147, "grad_norm": 0.6211294531822205, "learning_rate": 0.00012348962691251234, "epoch": 0.42640769206032736, "step": 13260 }, { "loss": 0.11884490251541138, "grad_norm": 0.4971469044685364, "learning_rate": 0.00012339109912844326, "epoch": 0.42672926648872883, "step": 13270 }, { "loss": 0.06431439518928528, "grad_norm": 0.6088893413543701, "learning_rate": 0.00012329254731674, "epoch": 0.42705084091713025, "step": 13280 }, { "loss": 0.0804969072341919, "grad_norm": 0.5218471884727478, "learning_rate": 0.00012319397157863627, "epoch": 0.4273724153455317, "step": 13290 }, { "loss": 0.07834722995758056, "grad_norm": 0.6221503615379333, "learning_rate": 0.00012309537201539027, "epoch": 0.4276939897739332, "step": 13300 }, { "loss": 0.09761518239974976, "grad_norm": 0.7556706070899963, "learning_rate": 0.00012299674872828477, "epoch": 0.4280155642023346, "step": 13310 }, { "loss": 0.08869344592094422, "grad_norm": 0.6085411906242371, "learning_rate": 0.0001228981018186268, "epoch": 0.4283371386307361, "step": 13320 }, { "loss": 0.07709804773330689, "grad_norm": 0.9054849147796631, "learning_rate": 0.00012279943138774778, "epoch": 0.42865871305913755, "step": 13330 }, { "loss": 0.08806559443473816, "grad_norm": 0.48757463693618774, "learning_rate": 0.0001227007375370032, "epoch": 0.42898028748753897, "step": 13340 }, { "loss": 0.09110906720161438, "grad_norm": 0.6032870411872864, "learning_rate": 0.00012260202036777268, "epoch": 0.42930186191594044, "step": 13350 }, { "loss": 0.08105543255805969, "grad_norm": 0.6139655709266663, "learning_rate": 0.0001225032799814597, "epoch": 0.4296234363443419, "step": 13360 }, { "loss": 0.10194287300109864, "grad_norm": 0.4390583336353302, "learning_rate": 0.00012240451647949164, "epoch": 0.42994501077274333, "step": 13370 }, { "loss": 0.07526599764823913, "grad_norm": 0.4364306926727295, "learning_rate": 0.00012230572996331965, "epoch": 0.4302665852011448, "step": 13380 }, { "loss": 0.09649503231048584, "grad_norm": 0.5593969821929932, "learning_rate": 0.0001222069205344185, "epoch": 0.4305881596295463, "step": 13390 }, { "loss": 0.06970558166503907, "grad_norm": 0.40434157848358154, "learning_rate": 0.00012210808829428644, "epoch": 0.4309097340579477, "step": 13400 }, { "loss": 0.0974462866783142, "grad_norm": 0.7777182459831238, "learning_rate": 0.00012200923334444522, "epoch": 0.43123130848634916, "step": 13410 }, { "loss": 0.09920287132263184, "grad_norm": 0.644882082939148, "learning_rate": 0.00012191035578643997, "epoch": 0.43155288291475064, "step": 13420 }, { "loss": 0.09246551394462585, "grad_norm": 0.9769138097763062, "learning_rate": 0.00012181145572183889, "epoch": 0.43187445734315205, "step": 13430 }, { "loss": 0.08350251913070679, "grad_norm": 0.3481636047363281, "learning_rate": 0.00012171253325223344, "epoch": 0.4321960317715535, "step": 13440 }, { "loss": 0.06871581673622132, "grad_norm": 0.5685707926750183, "learning_rate": 0.00012161358847923801, "epoch": 0.432517606199955, "step": 13450 }, { "loss": 0.09470840096473694, "grad_norm": 0.5111106038093567, "learning_rate": 0.00012151462150448991, "epoch": 0.4328391806283564, "step": 13460 }, { "loss": 0.08720097541809083, "grad_norm": 0.7694170475006104, "learning_rate": 0.00012141563242964929, "epoch": 0.4331607550567579, "step": 13470 }, { "loss": 0.081611967086792, "grad_norm": 0.5865517854690552, "learning_rate": 0.00012131662135639898, "epoch": 0.43348232948515936, "step": 13480 }, { "loss": 0.09730910658836364, "grad_norm": 0.7233736515045166, "learning_rate": 0.00012121758838644444, "epoch": 0.4338039039135608, "step": 13490 }, { "loss": 0.08976944684982299, "grad_norm": 0.6866528987884521, "learning_rate": 0.00012111853362151357, "epoch": 0.43412547834196225, "step": 13500 }, { "loss": 0.10347449779510498, "grad_norm": 0.5420098900794983, "learning_rate": 0.00012101945716335668, "epoch": 0.4344470527703637, "step": 13510 }, { "loss": 0.08698602318763733, "grad_norm": 0.2645670771598816, "learning_rate": 0.00012092035911374639, "epoch": 0.43476862719876513, "step": 13520 }, { "loss": 0.08184699416160583, "grad_norm": 0.564333975315094, "learning_rate": 0.00012082123957447744, "epoch": 0.4350902016271666, "step": 13530 }, { "loss": 0.09781272411346435, "grad_norm": 0.6272760629653931, "learning_rate": 0.0001207220986473667, "epoch": 0.4354117760555681, "step": 13540 }, { "loss": 0.08091724514961243, "grad_norm": 0.5868409872055054, "learning_rate": 0.00012062293643425301, "epoch": 0.4357333504839695, "step": 13550 }, { "loss": 0.11358673572540283, "grad_norm": 0.9962054491043091, "learning_rate": 0.00012052375303699703, "epoch": 0.43605492491237097, "step": 13560 }, { "loss": 0.0930856466293335, "grad_norm": 0.641973614692688, "learning_rate": 0.00012042454855748118, "epoch": 0.43637649934077244, "step": 13570 }, { "loss": 0.10741612911224366, "grad_norm": 0.7930047512054443, "learning_rate": 0.00012032532309760962, "epoch": 0.43669807376917386, "step": 13580 }, { "loss": 0.08490241765975952, "grad_norm": 0.5818084478378296, "learning_rate": 0.00012022607675930795, "epoch": 0.43701964819757533, "step": 13590 }, { "loss": 0.11442885398864747, "grad_norm": 0.4617845416069031, "learning_rate": 0.00012012680964452332, "epoch": 0.4373412226259768, "step": 13600 }, { "loss": 0.09673363566398621, "grad_norm": 0.7523597478866577, "learning_rate": 0.00012002752185522412, "epoch": 0.4376627970543782, "step": 13610 }, { "loss": 0.08678258061408997, "grad_norm": 0.5370033383369446, "learning_rate": 0.00011992821349340009, "epoch": 0.4379843714827797, "step": 13620 }, { "loss": 0.11078073978424072, "grad_norm": 0.42315545678138733, "learning_rate": 0.00011982888466106197, "epoch": 0.43830594591118116, "step": 13630 }, { "loss": 0.0647172212600708, "grad_norm": 0.5130701661109924, "learning_rate": 0.00011972953546024164, "epoch": 0.4386275203395826, "step": 13640 }, { "loss": 0.0814866840839386, "grad_norm": 0.7799618244171143, "learning_rate": 0.00011963016599299184, "epoch": 0.43894909476798405, "step": 13650 }, { "loss": 0.08262754678726196, "grad_norm": 0.5943609476089478, "learning_rate": 0.0001195307763613862, "epoch": 0.4392706691963855, "step": 13660 }, { "loss": 0.0892327070236206, "grad_norm": 0.5623515844345093, "learning_rate": 0.00011943136666751895, "epoch": 0.43959224362478694, "step": 13670 }, { "loss": 0.0836462676525116, "grad_norm": 0.4980289936065674, "learning_rate": 0.00011933193701350501, "epoch": 0.4399138180531884, "step": 13680 }, { "loss": 0.07117368578910828, "grad_norm": 0.41098299622535706, "learning_rate": 0.00011923248750147979, "epoch": 0.4402353924815899, "step": 13690 }, { "loss": 0.08613250255584717, "grad_norm": 0.5188729166984558, "learning_rate": 0.0001191330182335991, "epoch": 0.4405569669099913, "step": 13700 }, { "loss": 0.10002766847610474, "grad_norm": 0.6116454601287842, "learning_rate": 0.00011903352931203903, "epoch": 0.44087854133839277, "step": 13710 }, { "loss": 0.08848408460617066, "grad_norm": 0.8638411164283752, "learning_rate": 0.00011893402083899588, "epoch": 0.44120011576679424, "step": 13720 }, { "loss": 0.09257006645202637, "grad_norm": 0.5978802442550659, "learning_rate": 0.00011883449291668594, "epoch": 0.44152169019519566, "step": 13730 }, { "loss": 0.09721587300300598, "grad_norm": 0.47306108474731445, "learning_rate": 0.00011873494564734566, "epoch": 0.44184326462359713, "step": 13740 }, { "loss": 0.07312923073768615, "grad_norm": 0.34300699830055237, "learning_rate": 0.00011863537913323119, "epoch": 0.4421648390519986, "step": 13750 }, { "loss": 0.08672202825546264, "grad_norm": 0.8224014639854431, "learning_rate": 0.00011853579347661856, "epoch": 0.4424864134804, "step": 13760 }, { "loss": 0.07279933094978333, "grad_norm": 0.8059155941009521, "learning_rate": 0.0001184361887798034, "epoch": 0.4428079879088015, "step": 13770 }, { "loss": 0.09367861747741699, "grad_norm": 0.7235598564147949, "learning_rate": 0.00011833656514510094, "epoch": 0.44312956233720296, "step": 13780 }, { "loss": 0.08866561055183411, "grad_norm": 0.34332987666130066, "learning_rate": 0.00011823692267484581, "epoch": 0.4434511367656044, "step": 13790 }, { "loss": 0.097174733877182, "grad_norm": 0.6409239768981934, "learning_rate": 0.00011813726147139201, "epoch": 0.44377271119400585, "step": 13800 }, { "loss": 0.06715927720069885, "grad_norm": 0.6418638825416565, "learning_rate": 0.00011803758163711287, "epoch": 0.4440942856224073, "step": 13810 }, { "loss": 0.11879804134368896, "grad_norm": 0.533685564994812, "learning_rate": 0.00011793788327440072, "epoch": 0.44441586005080874, "step": 13820 }, { "loss": 0.07027478814125061, "grad_norm": 0.4666531980037689, "learning_rate": 0.00011783816648566697, "epoch": 0.4447374344792102, "step": 13830 }, { "loss": 0.08207365274429321, "grad_norm": 0.32438936829566956, "learning_rate": 0.00011773843137334201, "epoch": 0.4450590089076117, "step": 13840 }, { "loss": 0.07866615056991577, "grad_norm": 0.566194474697113, "learning_rate": 0.00011763867803987505, "epoch": 0.4453805833360131, "step": 13850 }, { "loss": 0.08325292468070984, "grad_norm": 0.7310763597488403, "learning_rate": 0.0001175389065877339, "epoch": 0.4457021577644146, "step": 13860 }, { "loss": 0.10103902816772461, "grad_norm": 0.4220094382762909, "learning_rate": 0.0001174391171194051, "epoch": 0.44602373219281605, "step": 13870 }, { "loss": 0.07679594159126282, "grad_norm": 0.6618533134460449, "learning_rate": 0.00011733930973739366, "epoch": 0.44634530662121746, "step": 13880 }, { "loss": 0.10045582056045532, "grad_norm": 0.7397884130477905, "learning_rate": 0.00011723948454422298, "epoch": 0.44666688104961894, "step": 13890 }, { "loss": 0.086998850107193, "grad_norm": 0.628219485282898, "learning_rate": 0.0001171396416424348, "epoch": 0.4469884554780204, "step": 13900 }, { "loss": 0.08655040264129639, "grad_norm": 0.8280029296875, "learning_rate": 0.00011703978113458898, "epoch": 0.4473100299064218, "step": 13910 }, { "loss": 0.08575885891914367, "grad_norm": 0.538664698600769, "learning_rate": 0.00011693990312326352, "epoch": 0.4476316043348233, "step": 13920 }, { "loss": 0.07931332588195801, "grad_norm": 0.43020099401474, "learning_rate": 0.00011684000771105438, "epoch": 0.44795317876322477, "step": 13930 }, { "loss": 0.09089007377624511, "grad_norm": 1.0443196296691895, "learning_rate": 0.00011674009500057541, "epoch": 0.4482747531916262, "step": 13940 }, { "loss": 0.09716281890869141, "grad_norm": 0.9259188175201416, "learning_rate": 0.00011664016509445819, "epoch": 0.44859632762002766, "step": 13950 }, { "loss": 0.08684880137443543, "grad_norm": 0.44154876470565796, "learning_rate": 0.00011654021809535196, "epoch": 0.44891790204842913, "step": 13960 }, { "loss": 0.08301849961280823, "grad_norm": 0.7156301736831665, "learning_rate": 0.00011644025410592366, "epoch": 0.44923947647683055, "step": 13970 }, { "loss": 0.07928972244262696, "grad_norm": 0.6469382047653198, "learning_rate": 0.00011634027322885743, "epoch": 0.449561050905232, "step": 13980 }, { "eval_loss": 0.08378300815820694, "eval_runtime": 34.4401, "eval_samples_per_second": 145.935, "eval_steps_per_second": 36.498, "epoch": 0.4497539955622729, "step": 13986 }, { "loss": 0.07586904764175414, "grad_norm": 0.6927252411842346, "learning_rate": 0.00011624027556685498, "epoch": 0.4498826253336335, "step": 13990 }, { "loss": 0.1109076738357544, "grad_norm": 0.6233476996421814, "learning_rate": 0.00011614026122263517, "epoch": 0.4502041997620349, "step": 14000 }, { "loss": 0.08457133173942566, "grad_norm": 0.658160924911499, "learning_rate": 0.00011604023029893398, "epoch": 0.4505257741904364, "step": 14010 }, { "loss": 0.08707954287528992, "grad_norm": 0.7234919667243958, "learning_rate": 0.00011594018289850448, "epoch": 0.45084734861883785, "step": 14020 }, { "loss": 0.07688099145889282, "grad_norm": 0.5437519550323486, "learning_rate": 0.0001158401191241166, "epoch": 0.45116892304723927, "step": 14030 }, { "loss": 0.09311116337776185, "grad_norm": 0.8065460920333862, "learning_rate": 0.00011574003907855715, "epoch": 0.45149049747564074, "step": 14040 }, { "loss": 0.0929793119430542, "grad_norm": 0.8462100028991699, "learning_rate": 0.00011563994286462957, "epoch": 0.4518120719040422, "step": 14050 }, { "loss": 0.0810061514377594, "grad_norm": 0.8836320042610168, "learning_rate": 0.00011553983058515405, "epoch": 0.45213364633244363, "step": 14060 }, { "loss": 0.07508513331413269, "grad_norm": 1.3527172803878784, "learning_rate": 0.0001154397023429671, "epoch": 0.4524552207608451, "step": 14070 }, { "loss": 0.08644511103630066, "grad_norm": 0.47087258100509644, "learning_rate": 0.00011533955824092179, "epoch": 0.4527767951892466, "step": 14080 }, { "loss": 0.10185540914535522, "grad_norm": 0.7062323093414307, "learning_rate": 0.0001152393983818874, "epoch": 0.453098369617648, "step": 14090 }, { "loss": 0.07294328212738037, "grad_norm": 0.5222784280776978, "learning_rate": 0.00011513922286874941, "epoch": 0.45341994404604946, "step": 14100 }, { "loss": 0.10024436712265014, "grad_norm": 0.5364047884941101, "learning_rate": 0.00011503903180440932, "epoch": 0.45374151847445093, "step": 14110 }, { "loss": 0.09642171263694763, "grad_norm": 0.9905641078948975, "learning_rate": 0.00011493882529178477, "epoch": 0.45406309290285235, "step": 14120 }, { "loss": 0.10040140151977539, "grad_norm": 0.5326999425888062, "learning_rate": 0.00011483860343380908, "epoch": 0.4543846673312538, "step": 14130 }, { "loss": 0.09813051223754883, "grad_norm": 0.5978994965553284, "learning_rate": 0.00011473836633343144, "epoch": 0.4547062417596553, "step": 14140 }, { "loss": 0.09117928147315979, "grad_norm": 0.6803304553031921, "learning_rate": 0.00011463811409361667, "epoch": 0.4550278161880567, "step": 14150 }, { "loss": 0.07710156440734864, "grad_norm": 0.6730096340179443, "learning_rate": 0.00011453784681734516, "epoch": 0.4553493906164582, "step": 14160 }, { "loss": 0.10482146739959716, "grad_norm": 0.8572554588317871, "learning_rate": 0.00011443756460761273, "epoch": 0.45567096504485965, "step": 14170 }, { "loss": 0.09611796736717224, "grad_norm": 1.1262205839157104, "learning_rate": 0.0001143372675674305, "epoch": 0.45599253947326107, "step": 14180 }, { "loss": 0.10889110565185547, "grad_norm": 0.39554649591445923, "learning_rate": 0.00011423695579982492, "epoch": 0.45631411390166254, "step": 14190 }, { "loss": 0.09329955577850342, "grad_norm": 0.35157912969589233, "learning_rate": 0.00011413662940783747, "epoch": 0.456635688330064, "step": 14200 }, { "loss": 0.08668434023857116, "grad_norm": 0.5843801498413086, "learning_rate": 0.0001140362884945247, "epoch": 0.45695726275846543, "step": 14210 }, { "loss": 0.08654572367668152, "grad_norm": 0.6317597031593323, "learning_rate": 0.00011393593316295809, "epoch": 0.4572788371868669, "step": 14220 }, { "loss": 0.0918462872505188, "grad_norm": 0.428110271692276, "learning_rate": 0.00011383556351622393, "epoch": 0.4576004116152684, "step": 14230 }, { "loss": 0.10685827732086181, "grad_norm": 0.5201165080070496, "learning_rate": 0.00011373517965742313, "epoch": 0.4579219860436698, "step": 14240 }, { "loss": 0.08834422826766967, "grad_norm": 0.6804199814796448, "learning_rate": 0.00011363478168967132, "epoch": 0.45824356047207127, "step": 14250 }, { "loss": 0.09454160332679748, "grad_norm": 0.3859182894229889, "learning_rate": 0.00011353436971609854, "epoch": 0.45856513490047274, "step": 14260 }, { "loss": 0.08517266511917114, "grad_norm": 0.6662791967391968, "learning_rate": 0.00011343394383984925, "epoch": 0.45888670932887415, "step": 14270 }, { "loss": 0.09720152616500854, "grad_norm": 1.1802897453308105, "learning_rate": 0.00011333350416408222, "epoch": 0.4592082837572756, "step": 14280 }, { "loss": 0.10304139852523804, "grad_norm": 0.4195810854434967, "learning_rate": 0.00011323305079197033, "epoch": 0.4595298581856771, "step": 14290 }, { "loss": 0.10177587270736695, "grad_norm": 0.6575989723205566, "learning_rate": 0.00011313258382670055, "epoch": 0.4598514326140785, "step": 14300 }, { "loss": 0.07869631052017212, "grad_norm": 0.701774537563324, "learning_rate": 0.00011303210337147381, "epoch": 0.46017300704248, "step": 14310 }, { "loss": 0.1009220838546753, "grad_norm": 0.7061283588409424, "learning_rate": 0.00011293160952950496, "epoch": 0.46049458147088146, "step": 14320 }, { "loss": 0.0825681209564209, "grad_norm": 0.5821817517280579, "learning_rate": 0.00011283110240402252, "epoch": 0.4608161558992829, "step": 14330 }, { "loss": 0.07777943611145019, "grad_norm": 0.5349202752113342, "learning_rate": 0.0001127305820982687, "epoch": 0.46113773032768435, "step": 14340 }, { "loss": 0.10388762950897217, "grad_norm": 1.0531601905822754, "learning_rate": 0.00011263004871549923, "epoch": 0.4614593047560858, "step": 14350 }, { "loss": 0.0702982485294342, "grad_norm": 0.5619316697120667, "learning_rate": 0.00011252950235898328, "epoch": 0.46178087918448724, "step": 14360 }, { "loss": 0.07497292160987853, "grad_norm": 0.6734415292739868, "learning_rate": 0.00011242894313200332, "epoch": 0.4621024536128887, "step": 14370 }, { "loss": 0.09227219820022584, "grad_norm": 0.2237335443496704, "learning_rate": 0.00011232837113785508, "epoch": 0.4624240280412902, "step": 14380 }, { "loss": 0.07480285167694092, "grad_norm": 0.7597417235374451, "learning_rate": 0.00011222778647984744, "epoch": 0.4627456024696916, "step": 14390 }, { "loss": 0.09297425150871277, "grad_norm": 0.5696741342544556, "learning_rate": 0.00011212718926130215, "epoch": 0.46306717689809307, "step": 14400 }, { "loss": 0.09020763039588928, "grad_norm": 0.5711169838905334, "learning_rate": 0.000112026579585554, "epoch": 0.46338875132649454, "step": 14410 }, { "loss": 0.09613139629364013, "grad_norm": 0.5713536143302917, "learning_rate": 0.00011192595755595054, "epoch": 0.46371032575489596, "step": 14420 }, { "loss": 0.07961199283599854, "grad_norm": 0.7792538404464722, "learning_rate": 0.00011182532327585194, "epoch": 0.46403190018329743, "step": 14430 }, { "loss": 0.08362306952476502, "grad_norm": 0.4089360237121582, "learning_rate": 0.0001117246768486311, "epoch": 0.4643534746116989, "step": 14440 }, { "loss": 0.08947086334228516, "grad_norm": 0.8442490696907043, "learning_rate": 0.00011162401837767324, "epoch": 0.4646750490401003, "step": 14450 }, { "loss": 0.0991512417793274, "grad_norm": 0.5721593499183655, "learning_rate": 0.00011152334796637602, "epoch": 0.4649966234685018, "step": 14460 }, { "loss": 0.10386481285095214, "grad_norm": 0.9631685018539429, "learning_rate": 0.00011142266571814941, "epoch": 0.46531819789690326, "step": 14470 }, { "loss": 0.11372385025024415, "grad_norm": 0.6378226280212402, "learning_rate": 0.00011132197173641546, "epoch": 0.4656397723253047, "step": 14480 }, { "loss": 0.07930097579956055, "grad_norm": 0.39188480377197266, "learning_rate": 0.00011122126612460831, "epoch": 0.46596134675370615, "step": 14490 }, { "loss": 0.10634902715682984, "grad_norm": 0.7518141865730286, "learning_rate": 0.00011112054898617403, "epoch": 0.4662829211821076, "step": 14500 }, { "loss": 0.06619417667388916, "grad_norm": 0.3403424620628357, "learning_rate": 0.0001110198204245706, "epoch": 0.46660449561050904, "step": 14510 }, { "loss": 0.10738317966461182, "grad_norm": 0.7575116157531738, "learning_rate": 0.00011091908054326763, "epoch": 0.4669260700389105, "step": 14520 }, { "loss": 0.06974626183509827, "grad_norm": 0.22222907841205597, "learning_rate": 0.0001108183294457464, "epoch": 0.467247644467312, "step": 14530 }, { "loss": 0.07733258008956909, "grad_norm": 0.5152522921562195, "learning_rate": 0.00011071756723549973, "epoch": 0.4675692188957134, "step": 14540 }, { "loss": 0.10055925846099853, "grad_norm": 0.5919005274772644, "learning_rate": 0.00011061679401603189, "epoch": 0.4678907933241149, "step": 14550 }, { "loss": 0.10045595169067383, "grad_norm": 0.5742218494415283, "learning_rate": 0.00011051600989085831, "epoch": 0.46821236775251635, "step": 14560 }, { "loss": 0.09662904739379882, "grad_norm": 0.6789755821228027, "learning_rate": 0.00011041521496350582, "epoch": 0.46853394218091776, "step": 14570 }, { "loss": 0.10636229515075683, "grad_norm": 0.6205703020095825, "learning_rate": 0.0001103144093375122, "epoch": 0.46885551660931923, "step": 14580 }, { "loss": 0.09271013140678405, "grad_norm": 0.6065817475318909, "learning_rate": 0.00011021359311642629, "epoch": 0.4691770910377207, "step": 14590 }, { "loss": 0.10116171836853027, "grad_norm": 0.6617358326911926, "learning_rate": 0.00011011276640380776, "epoch": 0.4694986654661221, "step": 14600 }, { "loss": 0.07309399843215943, "grad_norm": 0.4628552496433258, "learning_rate": 0.00011001192930322715, "epoch": 0.4698202398945236, "step": 14610 }, { "loss": 0.09705093502998352, "grad_norm": 0.5517852902412415, "learning_rate": 0.00010991108191826553, "epoch": 0.47014181432292507, "step": 14620 }, { "loss": 0.079444420337677, "grad_norm": 0.5863767266273499, "learning_rate": 0.0001098102243525147, "epoch": 0.4704633887513265, "step": 14630 }, { "loss": 0.08762736916542054, "grad_norm": 0.8725037574768066, "learning_rate": 0.00010970935670957676, "epoch": 0.47078496317972796, "step": 14640 }, { "loss": 0.109092116355896, "grad_norm": 9.460938453674316, "learning_rate": 0.00010960847909306429, "epoch": 0.4711065376081294, "step": 14650 }, { "loss": 0.08770105242729187, "grad_norm": 0.46180495619773865, "learning_rate": 0.00010950759160660002, "epoch": 0.47142811203653084, "step": 14660 }, { "loss": 0.08387916088104248, "grad_norm": 0.7593671083450317, "learning_rate": 0.00010940669435381689, "epoch": 0.4717496864649323, "step": 14670 }, { "loss": 0.092861807346344, "grad_norm": 0.8851556777954102, "learning_rate": 0.00010930578743835782, "epoch": 0.4720712608933338, "step": 14680 }, { "loss": 0.0722186267375946, "grad_norm": 0.3864416480064392, "learning_rate": 0.00010920487096387564, "epoch": 0.4723928353217352, "step": 14690 }, { "loss": 0.0851593255996704, "grad_norm": 0.885635495185852, "learning_rate": 0.00010910394503403314, "epoch": 0.4727144097501367, "step": 14700 }, { "loss": 0.09186208248138428, "grad_norm": 0.5280024409294128, "learning_rate": 0.00010900300975250261, "epoch": 0.47303598417853815, "step": 14710 }, { "loss": 0.09017241597175599, "grad_norm": 0.3785780966281891, "learning_rate": 0.00010890206522296609, "epoch": 0.47335755860693957, "step": 14720 }, { "loss": 0.10533970594406128, "grad_norm": 0.47699448466300964, "learning_rate": 0.0001088011115491151, "epoch": 0.47367913303534104, "step": 14730 }, { "loss": 0.08337773680686951, "grad_norm": 0.5329119563102722, "learning_rate": 0.00010870014883465052, "epoch": 0.4740007074637425, "step": 14740 }, { "loss": 0.06845346093177795, "grad_norm": 0.6583190560340881, "learning_rate": 0.00010859917718328251, "epoch": 0.4743222818921439, "step": 14750 }, { "loss": 0.09752205610275269, "grad_norm": 0.6177473068237305, "learning_rate": 0.0001084981966987305, "epoch": 0.4746438563205454, "step": 14760 }, { "loss": 0.09779452085494995, "grad_norm": 0.5897741913795471, "learning_rate": 0.00010839720748472286, "epoch": 0.47496543074894687, "step": 14770 }, { "loss": 0.09048707485198974, "grad_norm": 1.006715178489685, "learning_rate": 0.00010829620964499702, "epoch": 0.4752870051773483, "step": 14780 }, { "loss": 0.08427447080612183, "grad_norm": 0.42974308133125305, "learning_rate": 0.00010819520328329927, "epoch": 0.47560857960574976, "step": 14790 }, { "loss": 0.08650519847869872, "grad_norm": 0.428631991147995, "learning_rate": 0.00010809418850338457, "epoch": 0.47593015403415123, "step": 14800 }, { "loss": 0.09579732418060302, "grad_norm": 0.6555226445198059, "learning_rate": 0.00010799316540901665, "epoch": 0.47625172846255265, "step": 14810 }, { "loss": 0.08336011171340943, "grad_norm": 0.41812366247177124, "learning_rate": 0.00010789213410396764, "epoch": 0.4765733028909541, "step": 14820 }, { "loss": 0.0817683219909668, "grad_norm": 0.9479015469551086, "learning_rate": 0.00010779109469201825, "epoch": 0.4768948773193556, "step": 14830 }, { "loss": 0.07006314992904664, "grad_norm": 0.6454442143440247, "learning_rate": 0.0001076900472769574, "epoch": 0.477216451747757, "step": 14840 }, { "loss": 0.10103509426116944, "grad_norm": 0.8812865614891052, "learning_rate": 0.00010758899196258228, "epoch": 0.4775380261761585, "step": 14850 }, { "loss": 0.07926849722862243, "grad_norm": 1.1739628314971924, "learning_rate": 0.00010748792885269827, "epoch": 0.47785960060455995, "step": 14860 }, { "loss": 0.10146574974060059, "grad_norm": 0.4796530604362488, "learning_rate": 0.00010738685805111859, "epoch": 0.47818117503296137, "step": 14870 }, { "loss": 0.09071105122566223, "grad_norm": 0.6305550336837769, "learning_rate": 0.00010728577966166447, "epoch": 0.47850274946136284, "step": 14880 }, { "loss": 0.10043426752090454, "grad_norm": 0.6661666035652161, "learning_rate": 0.00010718469378816491, "epoch": 0.47882432388976426, "step": 14890 }, { "loss": 0.09648953080177307, "grad_norm": 0.5236237049102783, "learning_rate": 0.00010708360053445665, "epoch": 0.47914589831816573, "step": 14900 }, { "loss": 0.08368160128593445, "grad_norm": 0.458296537399292, "learning_rate": 0.00010698250000438394, "epoch": 0.4794674727465672, "step": 14910 }, { "loss": 0.09150348901748658, "grad_norm": 0.5067198872566223, "learning_rate": 0.00010688139230179852, "epoch": 0.4797890471749686, "step": 14920 }, { "loss": 0.08629463911056519, "grad_norm": 0.8166804909706116, "learning_rate": 0.00010678027753055952, "epoch": 0.4801106216033701, "step": 14930 }, { "loss": 0.07823537588119507, "grad_norm": 0.5457392930984497, "learning_rate": 0.00010667915579453327, "epoch": 0.48043219603177156, "step": 14940 }, { "loss": 0.10616787672042846, "grad_norm": 0.9504504203796387, "learning_rate": 0.00010657802719759337, "epoch": 0.480753770460173, "step": 14950 }, { "loss": 0.09666532874107361, "grad_norm": 0.8659750819206238, "learning_rate": 0.00010647689184362037, "epoch": 0.48107534488857445, "step": 14960 }, { "loss": 0.0889670729637146, "grad_norm": 0.3620588481426239, "learning_rate": 0.00010637574983650178, "epoch": 0.4813969193169759, "step": 14970 }, { "loss": 0.08167270421981812, "grad_norm": 0.5511088371276855, "learning_rate": 0.00010627460128013199, "epoch": 0.48171849374537734, "step": 14980 }, { "loss": 0.08849920630455017, "grad_norm": 0.5692965388298035, "learning_rate": 0.00010617344627841204, "epoch": 0.4820400681737788, "step": 14990 }, { "loss": 0.0818578541278839, "grad_norm": 0.6101301312446594, "learning_rate": 0.00010607228493524963, "epoch": 0.4823616426021803, "step": 15000 }, { "loss": 0.09417021870613099, "grad_norm": 0.5213767886161804, "learning_rate": 0.00010597111735455902, "epoch": 0.4826832170305817, "step": 15010 }, { "loss": 0.07528434991836548, "grad_norm": 0.5721901655197144, "learning_rate": 0.00010586994364026082, "epoch": 0.4830047914589832, "step": 15020 }, { "loss": 0.093703293800354, "grad_norm": 0.4465864598751068, "learning_rate": 0.00010576876389628192, "epoch": 0.48332636588738465, "step": 15030 }, { "loss": 0.09364048838615417, "grad_norm": 0.6290184855461121, "learning_rate": 0.00010566757822655544, "epoch": 0.48364794031578606, "step": 15040 }, { "loss": 0.0754182755947113, "grad_norm": 0.4668210446834564, "learning_rate": 0.00010556638673502063, "epoch": 0.48396951474418753, "step": 15050 }, { "loss": 0.10114016532897949, "grad_norm": 0.7117626070976257, "learning_rate": 0.00010546518952562267, "epoch": 0.484291089172589, "step": 15060 }, { "loss": 0.0868488073348999, "grad_norm": 0.6074398159980774, "learning_rate": 0.0001053639867023126, "epoch": 0.4846126636009904, "step": 15070 }, { "loss": 0.07774009704589843, "grad_norm": 0.43255162239074707, "learning_rate": 0.00010526277836904726, "epoch": 0.4849342380293919, "step": 15080 }, { "loss": 0.07248947024345398, "grad_norm": 0.28224503993988037, "learning_rate": 0.0001051615646297891, "epoch": 0.48525581245779337, "step": 15090 }, { "loss": 0.09597593545913696, "grad_norm": 0.43093428015708923, "learning_rate": 0.00010506034558850618, "epoch": 0.4855773868861948, "step": 15100 }, { "loss": 0.09701937437057495, "grad_norm": 0.3181447684764862, "learning_rate": 0.00010495912134917199, "epoch": 0.48589896131459626, "step": 15110 }, { "loss": 0.08382670879364014, "grad_norm": 0.8677729964256287, "learning_rate": 0.00010485789201576539, "epoch": 0.48622053574299773, "step": 15120 }, { "loss": 0.08611851334571838, "grad_norm": 0.8189012408256531, "learning_rate": 0.00010475665769227034, "epoch": 0.48654211017139914, "step": 15130 }, { "loss": 0.09586833715438843, "grad_norm": 0.655185878276825, "learning_rate": 0.00010465541848267613, "epoch": 0.4868636845998006, "step": 15140 }, { "loss": 0.08906262516975402, "grad_norm": 0.6886798143386841, "learning_rate": 0.00010455417449097689, "epoch": 0.4871852590282021, "step": 15150 }, { "loss": 0.09160004258155822, "grad_norm": 0.44516444206237793, "learning_rate": 0.00010445292582117177, "epoch": 0.4875068334566035, "step": 15160 }, { "loss": 0.07538365125656128, "grad_norm": 0.743714451789856, "learning_rate": 0.00010435167257726468, "epoch": 0.487828407885005, "step": 15170 }, { "loss": 0.08665584921836852, "grad_norm": 0.6289533376693726, "learning_rate": 0.00010425041486326424, "epoch": 0.48814998231340645, "step": 15180 }, { "loss": 0.07832264304161071, "grad_norm": 0.6426935791969299, "learning_rate": 0.00010414915278318367, "epoch": 0.48847155674180787, "step": 15190 }, { "loss": 0.06755251884460449, "grad_norm": 0.5041143298149109, "learning_rate": 0.0001040478864410406, "epoch": 0.48879313117020934, "step": 15200 }, { "loss": 0.07771340012550354, "grad_norm": 0.42428064346313477, "learning_rate": 0.0001039466159408572, "epoch": 0.4891147055986108, "step": 15210 }, { "loss": 0.09802461862564087, "grad_norm": 0.9530168771743774, "learning_rate": 0.00010384534138665975, "epoch": 0.4894362800270122, "step": 15220 }, { "loss": 0.10731257200241089, "grad_norm": 0.7082659006118774, "learning_rate": 0.0001037440628824788, "epoch": 0.4897578544554137, "step": 15230 }, { "loss": 0.1051175832748413, "grad_norm": 0.7191018462181091, "learning_rate": 0.00010364278053234889, "epoch": 0.49007942888381517, "step": 15240 }, { "loss": 0.0677166998386383, "grad_norm": 0.49812623858451843, "learning_rate": 0.00010354149444030854, "epoch": 0.4904010033122166, "step": 15250 }, { "loss": 0.09486074447631836, "grad_norm": 1.0251941680908203, "learning_rate": 0.0001034402047104001, "epoch": 0.49072257774061806, "step": 15260 }, { "loss": 0.08186472058296204, "grad_norm": 0.5029348731040955, "learning_rate": 0.00010333891144666968, "epoch": 0.49104415216901953, "step": 15270 }, { "loss": 0.08887283802032471, "grad_norm": 0.6932932138442993, "learning_rate": 0.00010323761475316705, "epoch": 0.49136572659742095, "step": 15280 }, { "loss": 0.11510454416275025, "grad_norm": 0.8928279876708984, "learning_rate": 0.00010313631473394536, "epoch": 0.4916873010258224, "step": 15290 }, { "loss": 0.07902852892875671, "grad_norm": 0.6534966230392456, "learning_rate": 0.00010303501149306136, "epoch": 0.4920088754542239, "step": 15300 }, { "loss": 0.08356289267539978, "grad_norm": 0.7262250781059265, "learning_rate": 0.00010293370513457498, "epoch": 0.4923304498826253, "step": 15310 }, { "loss": 0.09315394163131714, "grad_norm": 1.0881065130233765, "learning_rate": 0.00010283239576254944, "epoch": 0.4926520243110268, "step": 15320 }, { "loss": 0.10244693756103515, "grad_norm": 0.7236805558204651, "learning_rate": 0.00010273108348105099, "epoch": 0.49297359873942825, "step": 15330 }, { "loss": 0.07832973003387451, "grad_norm": 0.48782211542129517, "learning_rate": 0.00010262976839414888, "epoch": 0.49329517316782967, "step": 15340 }, { "loss": 0.07148158550262451, "grad_norm": 0.5727787017822266, "learning_rate": 0.0001025284506059153, "epoch": 0.49361674759623114, "step": 15350 }, { "loss": 0.0850343406200409, "grad_norm": 0.7264379858970642, "learning_rate": 0.00010242713022042509, "epoch": 0.4939383220246326, "step": 15360 }, { "loss": 0.08559871315956116, "grad_norm": 0.626274049282074, "learning_rate": 0.00010232580734175591, "epoch": 0.49425989645303403, "step": 15370 }, { "loss": 0.07028576731681824, "grad_norm": 0.6762192845344543, "learning_rate": 0.0001022244820739879, "epoch": 0.4945814708814355, "step": 15380 }, { "loss": 0.09092403054237366, "grad_norm": 0.7768816947937012, "learning_rate": 0.00010212315452120362, "epoch": 0.494903045309837, "step": 15390 }, { "loss": 0.09877740144729615, "grad_norm": 0.8710550665855408, "learning_rate": 0.00010202182478748805, "epoch": 0.4952246197382384, "step": 15400 }, { "loss": 0.09296669363975525, "grad_norm": 0.5330905318260193, "learning_rate": 0.00010192049297692839, "epoch": 0.49554619416663986, "step": 15410 }, { "loss": 0.10949316024780273, "grad_norm": 0.6242778897285461, "learning_rate": 0.00010181915919361391, "epoch": 0.49586776859504134, "step": 15420 }, { "loss": 0.08684821128845215, "grad_norm": 0.5489434003829956, "learning_rate": 0.00010171782354163604, "epoch": 0.49618934302344275, "step": 15430 }, { "loss": 0.07282103300094604, "grad_norm": 0.3458095192909241, "learning_rate": 0.00010161648612508799, "epoch": 0.4965109174518442, "step": 15440 }, { "loss": 0.09404530525207519, "grad_norm": 0.5336048007011414, "learning_rate": 0.00010151514704806484, "epoch": 0.4968324918802457, "step": 15450 }, { "loss": 0.07916721701622009, "grad_norm": 1.0568702220916748, "learning_rate": 0.0001014138064146634, "epoch": 0.4971540663086471, "step": 15460 }, { "loss": 0.09599422812461852, "grad_norm": 0.5730618834495544, "learning_rate": 0.00010131246432898206, "epoch": 0.4974756407370486, "step": 15470 }, { "loss": 0.09638643264770508, "grad_norm": 0.6685811877250671, "learning_rate": 0.0001012111208951207, "epoch": 0.49779721516545006, "step": 15480 }, { "loss": 0.09276297092437744, "grad_norm": 0.2251046597957611, "learning_rate": 0.00010110977621718056, "epoch": 0.4981187895938515, "step": 15490 }, { "loss": 0.10188002586364746, "grad_norm": 0.6626012325286865, "learning_rate": 0.0001010084303992642, "epoch": 0.49844036402225295, "step": 15500 }, { "loss": 0.07401142120361329, "grad_norm": 0.5241210460662842, "learning_rate": 0.00010090708354547531, "epoch": 0.4987619384506544, "step": 15510 }, { "loss": 0.07829184532165527, "grad_norm": 0.4019498825073242, "learning_rate": 0.00010080573575991867, "epoch": 0.49908351287905584, "step": 15520 }, { "loss": 0.07195114493370056, "grad_norm": 0.4469619393348694, "learning_rate": 0.00010070438714670002, "epoch": 0.4994050873074573, "step": 15530 }, { "loss": 0.0959662914276123, "grad_norm": 0.4518377482891083, "learning_rate": 0.00010060303780992599, "epoch": 0.4997266617358588, "step": 15540 }, { "eval_loss": 0.0814763680100441, "eval_runtime": 34.9513, "eval_samples_per_second": 143.8, "eval_steps_per_second": 35.964, "epoch": 0.4997266617358588, "step": 15540 }, { "loss": 0.08837600946426391, "grad_norm": 0.2871614396572113, "learning_rate": 0.00010050168785370382, "epoch": 0.5000482361642602, "step": 15550 }, { "loss": 0.09808437824249268, "grad_norm": 0.546829104423523, "learning_rate": 0.00010040033738214154, "epoch": 0.5003698105926617, "step": 15560 }, { "loss": 0.06960054636001586, "grad_norm": 0.4438028335571289, "learning_rate": 0.00010029898649934759, "epoch": 0.5006913850210631, "step": 15570 }, { "loss": 0.09131770133972168, "grad_norm": 0.7185057401657104, "learning_rate": 0.00010019763530943092, "epoch": 0.5010129594494646, "step": 15580 }, { "loss": 0.0653073251247406, "grad_norm": 0.5891302227973938, "learning_rate": 0.00010009628391650077, "epoch": 0.5013345338778661, "step": 15590 }, { "loss": 0.09859957098960877, "grad_norm": 0.7047641277313232, "learning_rate": 9.999493242466654e-05, "epoch": 0.5016561083062675, "step": 15600 }, { "loss": 0.09193578362464905, "grad_norm": 0.5679323077201843, "learning_rate": 9.989358093803777e-05, "epoch": 0.5019776827346689, "step": 15610 }, { "loss": 0.09180984497070313, "grad_norm": 0.6876428127288818, "learning_rate": 9.979222956072404e-05, "epoch": 0.5022992571630704, "step": 15620 }, { "loss": 0.10084093809127807, "grad_norm": 0.2858849763870239, "learning_rate": 9.969087839683468e-05, "epoch": 0.5026208315914719, "step": 15630 }, { "loss": 0.07768757343292236, "grad_norm": 0.2807064354419708, "learning_rate": 9.958952755047905e-05, "epoch": 0.5029424060198733, "step": 15640 }, { "loss": 0.11399703025817871, "grad_norm": 0.7593263983726501, "learning_rate": 9.94881771257659e-05, "epoch": 0.5032639804482748, "step": 15650 }, { "loss": 0.07852271795272828, "grad_norm": 0.565068244934082, "learning_rate": 9.93868272268037e-05, "epoch": 0.5035855548766762, "step": 15660 }, { "loss": 0.0828331708908081, "grad_norm": 0.6039252877235413, "learning_rate": 9.928547795770036e-05, "epoch": 0.5039071293050776, "step": 15670 }, { "loss": 0.07960823178291321, "grad_norm": 0.39925456047058105, "learning_rate": 9.918412942256315e-05, "epoch": 0.5042287037334792, "step": 15680 }, { "loss": 0.07466946840286255, "grad_norm": 0.564476490020752, "learning_rate": 9.908278172549854e-05, "epoch": 0.5045502781618806, "step": 15690 }, { "loss": 0.08819963335990906, "grad_norm": 0.3453824520111084, "learning_rate": 9.89814349706122e-05, "epoch": 0.504871852590282, "step": 15700 }, { "loss": 0.1143036127090454, "grad_norm": 0.9882622361183167, "learning_rate": 9.888008926200881e-05, "epoch": 0.5051934270186835, "step": 15710 }, { "loss": 0.08639020919799804, "grad_norm": 0.7949790358543396, "learning_rate": 9.877874470379195e-05, "epoch": 0.5055150014470849, "step": 15720 }, { "loss": 0.07429859042167664, "grad_norm": 0.6657055616378784, "learning_rate": 9.867740140006406e-05, "epoch": 0.5058365758754864, "step": 15730 }, { "loss": 0.07545442581176758, "grad_norm": 0.48248526453971863, "learning_rate": 9.857605945492624e-05, "epoch": 0.5061581503038879, "step": 15740 }, { "loss": 0.07939736843109131, "grad_norm": 0.5649476647377014, "learning_rate": 9.847471897247823e-05, "epoch": 0.5064797247322893, "step": 15750 }, { "loss": 0.0747692346572876, "grad_norm": 0.6028984189033508, "learning_rate": 9.837338005681827e-05, "epoch": 0.5068012991606907, "step": 15760 }, { "loss": 0.06601083874702454, "grad_norm": 0.3233937919139862, "learning_rate": 9.827204281204298e-05, "epoch": 0.5071228735890922, "step": 15770 }, { "loss": 0.08770567774772645, "grad_norm": 0.48834502696990967, "learning_rate": 9.817070734224726e-05, "epoch": 0.5074444480174937, "step": 15780 }, { "loss": 0.10232018232345581, "grad_norm": 0.5461081862449646, "learning_rate": 9.806937375152422e-05, "epoch": 0.5077660224458951, "step": 15790 }, { "loss": 0.09111140370368957, "grad_norm": 0.23557019233703613, "learning_rate": 9.796804214396498e-05, "epoch": 0.5080875968742966, "step": 15800 }, { "loss": 0.094402277469635, "grad_norm": 0.8346803188323975, "learning_rate": 9.786671262365868e-05, "epoch": 0.508409171302698, "step": 15810 }, { "loss": 0.10833892822265626, "grad_norm": 0.817200779914856, "learning_rate": 9.77653852946922e-05, "epoch": 0.5087307457310994, "step": 15820 }, { "loss": 0.09572397470474243, "grad_norm": 0.7927505970001221, "learning_rate": 9.766406026115038e-05, "epoch": 0.509052320159501, "step": 15830 }, { "loss": 0.08352859020233154, "grad_norm": 0.6671968102455139, "learning_rate": 9.756273762711554e-05, "epoch": 0.5093738945879024, "step": 15840 }, { "loss": 0.06232442855834961, "grad_norm": 0.5571510791778564, "learning_rate": 9.746141749666759e-05, "epoch": 0.5096954690163038, "step": 15850 }, { "loss": 0.07288626432418824, "grad_norm": 0.49520349502563477, "learning_rate": 9.736009997388382e-05, "epoch": 0.5100170434447053, "step": 15860 }, { "loss": 0.10119060277938843, "grad_norm": 0.6404814124107361, "learning_rate": 9.725878516283889e-05, "epoch": 0.5103386178731067, "step": 15870 }, { "loss": 0.08675326108932495, "grad_norm": 0.7203865647315979, "learning_rate": 9.71574731676046e-05, "epoch": 0.5106601923015082, "step": 15880 }, { "loss": 0.08972042798995972, "grad_norm": 0.6852189302444458, "learning_rate": 9.705616409225004e-05, "epoch": 0.5109817667299097, "step": 15890 }, { "loss": 0.0967637062072754, "grad_norm": 0.5318194031715393, "learning_rate": 9.695485804084114e-05, "epoch": 0.5113033411583111, "step": 15900 }, { "loss": 0.08468080759048462, "grad_norm": 1.649613857269287, "learning_rate": 9.685355511744075e-05, "epoch": 0.5116249155867125, "step": 15910 }, { "loss": 0.08541063070297242, "grad_norm": 0.6673186421394348, "learning_rate": 9.675225542610847e-05, "epoch": 0.511946490015114, "step": 15920 }, { "loss": 0.07971563339233398, "grad_norm": 0.5428572297096252, "learning_rate": 9.66509590709007e-05, "epoch": 0.5122680644435155, "step": 15930 }, { "loss": 0.07468971610069275, "grad_norm": 0.8105533719062805, "learning_rate": 9.654966615587031e-05, "epoch": 0.5125896388719169, "step": 15940 }, { "loss": 0.08704789280891419, "grad_norm": 0.38906678557395935, "learning_rate": 9.644837678506666e-05, "epoch": 0.5129112133003184, "step": 15950 }, { "loss": 0.07773864269256592, "grad_norm": 0.6124730706214905, "learning_rate": 9.63470910625355e-05, "epoch": 0.5132327877287198, "step": 15960 }, { "loss": 0.09297425150871277, "grad_norm": 1.118937611579895, "learning_rate": 9.624580909231882e-05, "epoch": 0.5135543621571212, "step": 15970 }, { "loss": 0.08964041471481324, "grad_norm": 0.8257461190223694, "learning_rate": 9.614453097845473e-05, "epoch": 0.5138759365855228, "step": 15980 }, { "loss": 0.06911572813987732, "grad_norm": 0.3326082229614258, "learning_rate": 9.604325682497738e-05, "epoch": 0.5141975110139242, "step": 15990 }, { "loss": 0.08799180388450623, "grad_norm": 1.2826687097549438, "learning_rate": 9.59419867359169e-05, "epoch": 0.5145190854423256, "step": 16000 }, { "loss": 0.09639973044395447, "grad_norm": 0.8958343863487244, "learning_rate": 9.584072081529916e-05, "epoch": 0.514840659870727, "step": 16010 }, { "loss": 0.10252895355224609, "grad_norm": 0.7098028063774109, "learning_rate": 9.573945916714589e-05, "epoch": 0.5151622342991286, "step": 16020 }, { "loss": 0.09719353914260864, "grad_norm": 1.3733038902282715, "learning_rate": 9.563820189547425e-05, "epoch": 0.51548380872753, "step": 16030 }, { "loss": 0.07945320010185242, "grad_norm": 0.3698774576187134, "learning_rate": 9.553694910429704e-05, "epoch": 0.5158053831559314, "step": 16040 }, { "loss": 0.07129523158073425, "grad_norm": 0.4975164532661438, "learning_rate": 9.543570089762242e-05, "epoch": 0.5161269575843329, "step": 16050 }, { "loss": 0.0827067732810974, "grad_norm": 0.41556140780448914, "learning_rate": 9.533445737945382e-05, "epoch": 0.5164485320127343, "step": 16060 }, { "loss": 0.07715771794319153, "grad_norm": 0.6161404252052307, "learning_rate": 9.523321865378982e-05, "epoch": 0.5167701064411357, "step": 16070 }, { "loss": 0.112063467502594, "grad_norm": 0.3975111246109009, "learning_rate": 9.513198482462422e-05, "epoch": 0.5170916808695373, "step": 16080 }, { "loss": 0.08155578374862671, "grad_norm": 0.4807094633579254, "learning_rate": 9.503075599594564e-05, "epoch": 0.5174132552979387, "step": 16090 }, { "loss": 0.09136075973510742, "grad_norm": 0.8183568716049194, "learning_rate": 9.492953227173763e-05, "epoch": 0.5177348297263401, "step": 16100 }, { "loss": 0.08842723369598389, "grad_norm": 0.7488901615142822, "learning_rate": 9.482831375597848e-05, "epoch": 0.5180564041547416, "step": 16110 }, { "loss": 0.09355822801589966, "grad_norm": 0.6784147024154663, "learning_rate": 9.472710055264115e-05, "epoch": 0.518377978583143, "step": 16120 }, { "loss": 0.09595037698745727, "grad_norm": 0.4905896484851837, "learning_rate": 9.462589276569311e-05, "epoch": 0.5186995530115445, "step": 16130 }, { "loss": 0.10003188848495484, "grad_norm": 0.6823483109474182, "learning_rate": 9.452469049909625e-05, "epoch": 0.519021127439946, "step": 16140 }, { "loss": 0.09580358862876892, "grad_norm": 0.7502665519714355, "learning_rate": 9.442349385680689e-05, "epoch": 0.5193427018683474, "step": 16150 }, { "loss": 0.09321611523628234, "grad_norm": 0.40309739112854004, "learning_rate": 9.43223029427755e-05, "epoch": 0.5196642762967488, "step": 16160 }, { "loss": 0.09390120506286621, "grad_norm": 0.3821161091327667, "learning_rate": 9.422111786094666e-05, "epoch": 0.5199858507251504, "step": 16170 }, { "loss": 0.07894954681396485, "grad_norm": 0.4174655079841614, "learning_rate": 9.411993871525895e-05, "epoch": 0.5203074251535518, "step": 16180 }, { "loss": 0.07645484805107117, "grad_norm": 0.843494176864624, "learning_rate": 9.401876560964488e-05, "epoch": 0.5206289995819532, "step": 16190 }, { "loss": 0.07298891544342041, "grad_norm": 0.6622037291526794, "learning_rate": 9.391759864803073e-05, "epoch": 0.5209505740103547, "step": 16200 }, { "loss": 0.08789741396903991, "grad_norm": 0.7242502570152283, "learning_rate": 9.381643793433657e-05, "epoch": 0.5212721484387561, "step": 16210 }, { "loss": 0.08091871738433838, "grad_norm": 0.6538041830062866, "learning_rate": 9.371528357247589e-05, "epoch": 0.5215937228671575, "step": 16220 }, { "loss": 0.08055868744850159, "grad_norm": 0.7289121747016907, "learning_rate": 9.361413566635576e-05, "epoch": 0.5219152972955591, "step": 16230 }, { "loss": 0.09357218742370606, "grad_norm": 0.6839609146118164, "learning_rate": 9.35129943198766e-05, "epoch": 0.5222368717239605, "step": 16240 }, { "loss": 0.09125481843948365, "grad_norm": 0.654301106929779, "learning_rate": 9.341185963693209e-05, "epoch": 0.5225584461523619, "step": 16250 }, { "loss": 0.08754734992980957, "grad_norm": 0.6056458950042725, "learning_rate": 9.331073172140902e-05, "epoch": 0.5228800205807634, "step": 16260 }, { "loss": 0.07239229083061219, "grad_norm": 0.5807142853736877, "learning_rate": 9.320961067718733e-05, "epoch": 0.5232015950091649, "step": 16270 }, { "loss": 0.09158975481987, "grad_norm": 0.7827548384666443, "learning_rate": 9.31084966081398e-05, "epoch": 0.5235231694375663, "step": 16280 }, { "loss": 0.08536049127578735, "grad_norm": 0.5343862771987915, "learning_rate": 9.300738961813212e-05, "epoch": 0.5238447438659678, "step": 16290 }, { "loss": 0.07365837693214417, "grad_norm": 0.6306098103523254, "learning_rate": 9.290628981102265e-05, "epoch": 0.5241663182943692, "step": 16300 }, { "loss": 0.08824599981307983, "grad_norm": 0.4955608546733856, "learning_rate": 9.28051972906624e-05, "epoch": 0.5244878927227706, "step": 16310 }, { "loss": 0.10049338340759277, "grad_norm": 0.6357713341712952, "learning_rate": 9.270411216089487e-05, "epoch": 0.5248094671511722, "step": 16320 }, { "loss": 0.08429625034332275, "grad_norm": 0.692136824131012, "learning_rate": 9.2603034525556e-05, "epoch": 0.5251310415795736, "step": 16330 }, { "loss": 0.07850704789161682, "grad_norm": 0.9536001086235046, "learning_rate": 9.250196448847407e-05, "epoch": 0.525452616007975, "step": 16340 }, { "loss": 0.07307186126708984, "grad_norm": 0.7166012525558472, "learning_rate": 9.240090215346947e-05, "epoch": 0.5257741904363765, "step": 16350 }, { "loss": 0.09322544336318969, "grad_norm": 0.6299146413803101, "learning_rate": 9.229984762435469e-05, "epoch": 0.5260957648647779, "step": 16360 }, { "loss": 0.09763809442520141, "grad_norm": 0.4739687144756317, "learning_rate": 9.219880100493426e-05, "epoch": 0.5264173392931794, "step": 16370 }, { "loss": 0.0830114483833313, "grad_norm": 0.7146755456924438, "learning_rate": 9.209776239900453e-05, "epoch": 0.5267389137215809, "step": 16380 }, { "loss": 0.09164667129516602, "grad_norm": 0.924308717250824, "learning_rate": 9.199673191035363e-05, "epoch": 0.5270604881499823, "step": 16390 }, { "loss": 0.08107462525367737, "grad_norm": 0.6615635752677917, "learning_rate": 9.189570964276141e-05, "epoch": 0.5273820625783837, "step": 16400 }, { "loss": 0.07831215262413024, "grad_norm": 0.29422977566719055, "learning_rate": 9.179469569999919e-05, "epoch": 0.5277036370067852, "step": 16410 }, { "loss": 0.09040266275405884, "grad_norm": 0.6438028812408447, "learning_rate": 9.169369018582978e-05, "epoch": 0.5280252114351867, "step": 16420 }, { "loss": 0.09664230346679688, "grad_norm": 0.5955362915992737, "learning_rate": 9.159269320400735e-05, "epoch": 0.5283467858635881, "step": 16430 }, { "loss": 0.08227856755256653, "grad_norm": 0.7709397077560425, "learning_rate": 9.149170485827725e-05, "epoch": 0.5286683602919896, "step": 16440 }, { "loss": 0.06169787049293518, "grad_norm": 0.682957112789154, "learning_rate": 9.139072525237595e-05, "epoch": 0.528989934720391, "step": 16450 }, { "loss": 0.07773295640945435, "grad_norm": 0.5797860622406006, "learning_rate": 9.12897544900311e-05, "epoch": 0.5293115091487924, "step": 16460 }, { "loss": 0.0780327320098877, "grad_norm": 0.29846492409706116, "learning_rate": 9.118879267496107e-05, "epoch": 0.529633083577194, "step": 16470 }, { "loss": 0.10499351024627686, "grad_norm": 0.6282970309257507, "learning_rate": 9.108783991087515e-05, "epoch": 0.5299546580055954, "step": 16480 }, { "loss": 0.07932085394859315, "grad_norm": 0.6547428965568542, "learning_rate": 9.098689630147328e-05, "epoch": 0.5302762324339968, "step": 16490 }, { "loss": 0.07153794169425964, "grad_norm": 0.45561927556991577, "learning_rate": 9.088596195044602e-05, "epoch": 0.5305978068623983, "step": 16500 }, { "loss": 0.07359915971755981, "grad_norm": 0.6223723292350769, "learning_rate": 9.078503696147443e-05, "epoch": 0.5309193812907997, "step": 16510 }, { "loss": 0.0706337034702301, "grad_norm": 0.7523877024650574, "learning_rate": 9.06841214382299e-05, "epoch": 0.5312409557192012, "step": 16520 }, { "loss": 0.09963027238845826, "grad_norm": 0.557845413684845, "learning_rate": 9.058321548437422e-05, "epoch": 0.5315625301476027, "step": 16530 }, { "loss": 0.0828576385974884, "grad_norm": 0.7379382252693176, "learning_rate": 9.048231920355919e-05, "epoch": 0.5318841045760041, "step": 16540 }, { "loss": 0.07978498935699463, "grad_norm": 0.6947690844535828, "learning_rate": 9.03814326994268e-05, "epoch": 0.5322056790044055, "step": 16550 }, { "loss": 0.09839178323745727, "grad_norm": 0.790624737739563, "learning_rate": 9.028055607560892e-05, "epoch": 0.532527253432807, "step": 16560 }, { "loss": 0.07617455720901489, "grad_norm": 0.8281884789466858, "learning_rate": 9.017968943572732e-05, "epoch": 0.5328488278612085, "step": 16570 }, { "loss": 0.0846377968788147, "grad_norm": 0.36391204595565796, "learning_rate": 9.007883288339345e-05, "epoch": 0.5331704022896099, "step": 16580 }, { "loss": 0.0801987111568451, "grad_norm": 0.6115204095840454, "learning_rate": 8.99779865222085e-05, "epoch": 0.5334919767180114, "step": 16590 }, { "loss": 0.09012607932090759, "grad_norm": 0.5107051134109497, "learning_rate": 8.98771504557631e-05, "epoch": 0.5338135511464128, "step": 16600 }, { "loss": 0.08063666820526123, "grad_norm": 0.3612475097179413, "learning_rate": 8.977632478763735e-05, "epoch": 0.5341351255748142, "step": 16610 }, { "loss": 0.09662036299705505, "grad_norm": 0.7268196940422058, "learning_rate": 8.967550962140066e-05, "epoch": 0.5344567000032158, "step": 16620 }, { "loss": 0.07537992596626282, "grad_norm": 0.8529553413391113, "learning_rate": 8.957470506061164e-05, "epoch": 0.5347782744316172, "step": 16630 }, { "loss": 0.08451104164123535, "grad_norm": 0.9954885244369507, "learning_rate": 8.947391120881802e-05, "epoch": 0.5350998488600186, "step": 16640 }, { "loss": 0.07832143306732178, "grad_norm": 0.5641348958015442, "learning_rate": 8.93731281695565e-05, "epoch": 0.5354214232884201, "step": 16650 }, { "loss": 0.07633439302444459, "grad_norm": 0.712746798992157, "learning_rate": 8.927235604635274e-05, "epoch": 0.5357429977168215, "step": 16660 }, { "loss": 0.06378179192543029, "grad_norm": 0.5640179514884949, "learning_rate": 8.917159494272112e-05, "epoch": 0.536064572145223, "step": 16670 }, { "loss": 0.08095942735671997, "grad_norm": 0.6787976622581482, "learning_rate": 8.907084496216474e-05, "epoch": 0.5363861465736245, "step": 16680 }, { "loss": 0.07943691611289978, "grad_norm": 0.4951293468475342, "learning_rate": 8.897010620817527e-05, "epoch": 0.5367077210020259, "step": 16690 }, { "loss": 0.08359546661376953, "grad_norm": 0.5979715585708618, "learning_rate": 8.886937878423281e-05, "epoch": 0.5370292954304273, "step": 16700 }, { "loss": 0.05622500777244568, "grad_norm": 0.35826343297958374, "learning_rate": 8.876866279380582e-05, "epoch": 0.5373508698588289, "step": 16710 }, { "loss": 0.0896606206893921, "grad_norm": 0.6729931235313416, "learning_rate": 8.866795834035111e-05, "epoch": 0.5376724442872303, "step": 16720 }, { "loss": 0.08825923204421997, "grad_norm": 0.6600592732429504, "learning_rate": 8.856726552731355e-05, "epoch": 0.5379940187156317, "step": 16730 }, { "loss": 0.07278905510902405, "grad_norm": 0.5084134936332703, "learning_rate": 8.84665844581261e-05, "epoch": 0.5383155931440332, "step": 16740 }, { "loss": 0.07857139706611634, "grad_norm": 0.7476891875267029, "learning_rate": 8.836591523620959e-05, "epoch": 0.5386371675724346, "step": 16750 }, { "loss": 0.0739391565322876, "grad_norm": 1.2707487344741821, "learning_rate": 8.826525796497273e-05, "epoch": 0.538958742000836, "step": 16760 }, { "loss": 0.08058958649635314, "grad_norm": 0.28494247794151306, "learning_rate": 8.816461274781193e-05, "epoch": 0.5392803164292376, "step": 16770 }, { "loss": 0.06720038652420043, "grad_norm": 0.48339444398880005, "learning_rate": 8.80639796881113e-05, "epoch": 0.539601890857639, "step": 16780 }, { "loss": 0.06298373341560363, "grad_norm": 0.4123383164405823, "learning_rate": 8.796335888924239e-05, "epoch": 0.5399234652860404, "step": 16790 }, { "loss": 0.09222426414489746, "grad_norm": 0.8985582590103149, "learning_rate": 8.78627504545641e-05, "epoch": 0.5402450397144419, "step": 16800 }, { "loss": 0.07999668121337891, "grad_norm": 0.57627934217453, "learning_rate": 8.776215448742274e-05, "epoch": 0.5405666141428433, "step": 16810 }, { "loss": 0.09616152048110962, "grad_norm": 0.5940356254577637, "learning_rate": 8.766157109115174e-05, "epoch": 0.5408881885712448, "step": 16820 }, { "loss": 0.0929766058921814, "grad_norm": 1.051363468170166, "learning_rate": 8.756100036907166e-05, "epoch": 0.5412097629996463, "step": 16830 }, { "loss": 0.08186267614364624, "grad_norm": 0.942518949508667, "learning_rate": 8.746044242449001e-05, "epoch": 0.5415313374280477, "step": 16840 }, { "loss": 0.08210628032684326, "grad_norm": 0.37757983803749084, "learning_rate": 8.735989736070121e-05, "epoch": 0.5418529118564491, "step": 16850 }, { "loss": 0.07749378681182861, "grad_norm": 0.6084452867507935, "learning_rate": 8.72593652809864e-05, "epoch": 0.5421744862848507, "step": 16860 }, { "loss": 0.0946018636226654, "grad_norm": 0.681425154209137, "learning_rate": 8.715884628861344e-05, "epoch": 0.5424960607132521, "step": 16870 }, { "loss": 0.09242323637008668, "grad_norm": 0.5492236018180847, "learning_rate": 8.705834048683669e-05, "epoch": 0.5428176351416535, "step": 16880 }, { "loss": 0.08967363834381104, "grad_norm": 0.3688710331916809, "learning_rate": 8.6957847978897e-05, "epoch": 0.543139209570055, "step": 16890 }, { "loss": 0.0760436475276947, "grad_norm": 0.4894743859767914, "learning_rate": 8.685736886802152e-05, "epoch": 0.5434607839984564, "step": 16900 }, { "loss": 0.06940741539001465, "grad_norm": 0.28525763750076294, "learning_rate": 8.675690325742371e-05, "epoch": 0.5437823584268578, "step": 16910 }, { "loss": 0.07846690416336059, "grad_norm": 0.7112274169921875, "learning_rate": 8.665645125030311e-05, "epoch": 0.5441039328552594, "step": 16920 }, { "loss": 0.10269311666488648, "grad_norm": 0.4686936140060425, "learning_rate": 8.65560129498453e-05, "epoch": 0.5444255072836608, "step": 16930 }, { "loss": 0.08768737316131592, "grad_norm": 0.5227848887443542, "learning_rate": 8.645558845922177e-05, "epoch": 0.5447470817120622, "step": 16940 }, { "loss": 0.08848978281021118, "grad_norm": 0.7514484524726868, "learning_rate": 8.635517788158982e-05, "epoch": 0.5450686561404637, "step": 16950 }, { "loss": 0.06979380249977112, "grad_norm": 0.5240973234176636, "learning_rate": 8.625478132009248e-05, "epoch": 0.5453902305688652, "step": 16960 }, { "loss": 0.09580703973770141, "grad_norm": 0.374254435300827, "learning_rate": 8.615439887785838e-05, "epoch": 0.5457118049972666, "step": 16970 }, { "loss": 0.060118526220321655, "grad_norm": 1.8248990774154663, "learning_rate": 8.605403065800168e-05, "epoch": 0.5460333794256681, "step": 16980 }, { "loss": 0.0934792399406433, "grad_norm": 0.6583986282348633, "learning_rate": 8.595367676362181e-05, "epoch": 0.5463549538540695, "step": 16990 }, { "loss": 0.09412904381752014, "grad_norm": 0.46310746669769287, "learning_rate": 8.585333729780361e-05, "epoch": 0.5466765282824709, "step": 17000 }, { "loss": 0.08042793273925782, "grad_norm": 0.7318806052207947, "learning_rate": 8.575301236361708e-05, "epoch": 0.5469981027108725, "step": 17010 }, { "loss": 0.08339504599571228, "grad_norm": 0.817503035068512, "learning_rate": 8.565270206411722e-05, "epoch": 0.5473196771392739, "step": 17020 }, { "loss": 0.08974653482437134, "grad_norm": 0.582861602306366, "learning_rate": 8.555240650234399e-05, "epoch": 0.5476412515676753, "step": 17030 }, { "loss": 0.0690845251083374, "grad_norm": 0.4486590325832367, "learning_rate": 8.54521257813224e-05, "epoch": 0.5479628259960768, "step": 17040 }, { "loss": 0.08085647821426392, "grad_norm": 0.48037925362586975, "learning_rate": 8.535186000406204e-05, "epoch": 0.5482844004244782, "step": 17050 }, { "loss": 0.10313069820404053, "grad_norm": 0.935422420501709, "learning_rate": 8.525160927355712e-05, "epoch": 0.5486059748528797, "step": 17060 }, { "loss": 0.08438910245895385, "grad_norm": 0.7324216365814209, "learning_rate": 8.515137369278651e-05, "epoch": 0.5489275492812812, "step": 17070 }, { "loss": 0.06928063035011292, "grad_norm": 0.3410673439502716, "learning_rate": 8.50511533647135e-05, "epoch": 0.5492491237096826, "step": 17080 }, { "loss": 0.06782970428466797, "grad_norm": 0.6539098024368286, "learning_rate": 8.49509483922856e-05, "epoch": 0.549570698138084, "step": 17090 }, { "eval_loss": 0.07829523831605911, "eval_runtime": 35.216, "eval_samples_per_second": 142.719, "eval_steps_per_second": 35.694, "epoch": 0.5496993279094446, "step": 17094 }, { "loss": 0.08837011456489563, "grad_norm": 0.6394001245498657, "learning_rate": 8.485075887843473e-05, "epoch": 0.5498922725664855, "step": 17100 }, { "loss": 0.06662584543228149, "grad_norm": 0.5925083756446838, "learning_rate": 8.475058492607677e-05, "epoch": 0.550213846994887, "step": 17110 }, { "loss": 0.09058014750480652, "grad_norm": 0.6222550868988037, "learning_rate": 8.465042663811172e-05, "epoch": 0.5505354214232884, "step": 17120 }, { "loss": 0.07723432183265685, "grad_norm": 0.5994893312454224, "learning_rate": 8.455028411742339e-05, "epoch": 0.5508569958516899, "step": 17130 }, { "loss": 0.07140976786613465, "grad_norm": 0.998021125793457, "learning_rate": 8.445015746687948e-05, "epoch": 0.5511785702800913, "step": 17140 }, { "loss": 0.08850755095481873, "grad_norm": 0.33564454317092896, "learning_rate": 8.435004678933134e-05, "epoch": 0.5515001447084927, "step": 17150 }, { "loss": 0.09072214961051941, "grad_norm": 0.5762131214141846, "learning_rate": 8.424995218761397e-05, "epoch": 0.5518217191368943, "step": 17160 }, { "loss": 0.09939899444580078, "grad_norm": 0.6515547037124634, "learning_rate": 8.41498737645458e-05, "epoch": 0.5521432935652957, "step": 17170 }, { "loss": 0.0911516547203064, "grad_norm": 0.4145786166191101, "learning_rate": 8.404981162292862e-05, "epoch": 0.5524648679936971, "step": 17180 }, { "loss": 0.10890164375305175, "grad_norm": 0.7293618321418762, "learning_rate": 8.39497658655476e-05, "epoch": 0.5527864424220986, "step": 17190 }, { "loss": 0.08640298247337341, "grad_norm": 0.7666877508163452, "learning_rate": 8.384973659517094e-05, "epoch": 0.5531080168505, "step": 17200 }, { "loss": 0.0860232651233673, "grad_norm": 0.5176871418952942, "learning_rate": 8.374972391455002e-05, "epoch": 0.5534295912789015, "step": 17210 }, { "loss": 0.07132713198661804, "grad_norm": 0.23162242770195007, "learning_rate": 8.364972792641912e-05, "epoch": 0.553751165707303, "step": 17220 }, { "loss": 0.05892365574836731, "grad_norm": 0.5613875985145569, "learning_rate": 8.354974873349542e-05, "epoch": 0.5540727401357044, "step": 17230 }, { "loss": 0.07454466819763184, "grad_norm": 0.8335756063461304, "learning_rate": 8.344978643847879e-05, "epoch": 0.5543943145641058, "step": 17240 }, { "loss": 0.07813173532485962, "grad_norm": 0.833672046661377, "learning_rate": 8.334984114405177e-05, "epoch": 0.5547158889925073, "step": 17250 }, { "loss": 0.057703042030334474, "grad_norm": 0.38010871410369873, "learning_rate": 8.324991295287943e-05, "epoch": 0.5550374634209088, "step": 17260 }, { "loss": 0.0920906662940979, "grad_norm": 0.5197877287864685, "learning_rate": 8.315000196760933e-05, "epoch": 0.5553590378493102, "step": 17270 }, { "loss": 0.08447323441505432, "grad_norm": 0.7848365902900696, "learning_rate": 8.305010829087118e-05, "epoch": 0.5556806122777117, "step": 17280 }, { "loss": 0.08707159161567687, "grad_norm": 0.5702301859855652, "learning_rate": 8.295023202527718e-05, "epoch": 0.5560021867061131, "step": 17290 }, { "loss": 0.07622004747390747, "grad_norm": 0.6026646494865417, "learning_rate": 8.285037327342142e-05, "epoch": 0.5563237611345145, "step": 17300 }, { "loss": 0.06785175204277039, "grad_norm": 0.8649535179138184, "learning_rate": 8.275053213788007e-05, "epoch": 0.5566453355629161, "step": 17310 }, { "loss": 0.1002342700958252, "grad_norm": 0.7633518576622009, "learning_rate": 8.265070872121129e-05, "epoch": 0.5569669099913175, "step": 17320 }, { "loss": 0.0922668993473053, "grad_norm": 0.5194448828697205, "learning_rate": 8.255090312595487e-05, "epoch": 0.5572884844197189, "step": 17330 }, { "loss": 0.10162649154663086, "grad_norm": 0.6360050439834595, "learning_rate": 8.245111545463243e-05, "epoch": 0.5576100588481204, "step": 17340 }, { "loss": 0.06755626797676087, "grad_norm": 0.5018109083175659, "learning_rate": 8.235134580974706e-05, "epoch": 0.5579316332765218, "step": 17350 }, { "loss": 0.06583888530731201, "grad_norm": 0.6680736541748047, "learning_rate": 8.225159429378353e-05, "epoch": 0.5582532077049233, "step": 17360 }, { "loss": 0.07393178939819336, "grad_norm": 0.823982298374176, "learning_rate": 8.215186100920783e-05, "epoch": 0.5585747821333248, "step": 17370 }, { "loss": 0.08291969895362854, "grad_norm": 0.7191532850265503, "learning_rate": 8.205214605846721e-05, "epoch": 0.5588963565617262, "step": 17380 }, { "loss": 0.08974564075469971, "grad_norm": 0.5096963047981262, "learning_rate": 8.195244954399014e-05, "epoch": 0.5592179309901276, "step": 17390 }, { "loss": 0.09367521405220032, "grad_norm": 0.5104748010635376, "learning_rate": 8.185277156818619e-05, "epoch": 0.5595395054185291, "step": 17400 }, { "loss": 0.08481093645095825, "grad_norm": 0.5822654366493225, "learning_rate": 8.175311223344578e-05, "epoch": 0.5598610798469306, "step": 17410 }, { "loss": 0.06985856294631958, "grad_norm": 0.5147339701652527, "learning_rate": 8.16534716421403e-05, "epoch": 0.560182654275332, "step": 17420 }, { "loss": 0.09017146229743958, "grad_norm": 0.6477312445640564, "learning_rate": 8.155384989662181e-05, "epoch": 0.5605042287037335, "step": 17430 }, { "loss": 0.10362013578414916, "grad_norm": 0.530701756477356, "learning_rate": 8.145424709922305e-05, "epoch": 0.5608258031321349, "step": 17440 }, { "loss": 0.10210756063461304, "grad_norm": 0.5977575182914734, "learning_rate": 8.135466335225726e-05, "epoch": 0.5611473775605363, "step": 17450 }, { "loss": 0.08222898840904236, "grad_norm": 0.5708121657371521, "learning_rate": 8.125509875801813e-05, "epoch": 0.5614689519889379, "step": 17460 }, { "loss": 0.07522875666618348, "grad_norm": 0.5921570062637329, "learning_rate": 8.115555341877965e-05, "epoch": 0.5617905264173393, "step": 17470 }, { "loss": 0.08083952069282532, "grad_norm": 0.9275753498077393, "learning_rate": 8.10560274367961e-05, "epoch": 0.5621121008457407, "step": 17480 }, { "loss": 0.10283910036087036, "grad_norm": 0.4716049134731293, "learning_rate": 8.095652091430184e-05, "epoch": 0.5624336752741422, "step": 17490 }, { "loss": 0.08383597135543823, "grad_norm": 0.839942455291748, "learning_rate": 8.08570339535112e-05, "epoch": 0.5627552497025436, "step": 17500 }, { "loss": 0.08465657234191895, "grad_norm": 0.7459936141967773, "learning_rate": 8.075756665661848e-05, "epoch": 0.5630768241309451, "step": 17510 }, { "loss": 0.08436853289604188, "grad_norm": 0.8659219741821289, "learning_rate": 8.065811912579771e-05, "epoch": 0.5633983985593466, "step": 17520 }, { "loss": 0.07398121356964112, "grad_norm": 0.43199411034584045, "learning_rate": 8.055869146320269e-05, "epoch": 0.563719972987748, "step": 17530 }, { "loss": 0.0951408565044403, "grad_norm": 0.3734264373779297, "learning_rate": 8.045928377096676e-05, "epoch": 0.5640415474161494, "step": 17540 }, { "loss": 0.08055580258369446, "grad_norm": 0.4257162809371948, "learning_rate": 8.035989615120279e-05, "epoch": 0.564363121844551, "step": 17550 }, { "loss": 0.07695200443267822, "grad_norm": 0.49492180347442627, "learning_rate": 8.026052870600298e-05, "epoch": 0.5646846962729524, "step": 17560 }, { "loss": 0.09232667088508606, "grad_norm": 0.6467422246932983, "learning_rate": 8.016118153743886e-05, "epoch": 0.5650062707013538, "step": 17570 }, { "loss": 0.07816627621650696, "grad_norm": 1.0633784532546997, "learning_rate": 8.006185474756106e-05, "epoch": 0.5653278451297553, "step": 17580 }, { "loss": 0.09850267767906189, "grad_norm": 0.46513116359710693, "learning_rate": 7.996254843839938e-05, "epoch": 0.5656494195581567, "step": 17590 }, { "loss": 0.08328604102134704, "grad_norm": 0.6261907815933228, "learning_rate": 7.986326271196243e-05, "epoch": 0.5659709939865581, "step": 17600 }, { "loss": 0.07954009175300598, "grad_norm": 0.5492364168167114, "learning_rate": 7.976399767023786e-05, "epoch": 0.5662925684149597, "step": 17610 }, { "loss": 0.08056249022483826, "grad_norm": 0.6301096081733704, "learning_rate": 7.966475341519194e-05, "epoch": 0.5666141428433611, "step": 17620 }, { "loss": 0.08544570803642274, "grad_norm": 0.5183818936347961, "learning_rate": 7.956553004876967e-05, "epoch": 0.5669357172717625, "step": 17630 }, { "loss": 0.06803440451622009, "grad_norm": 0.4413444697856903, "learning_rate": 7.94663276728945e-05, "epoch": 0.567257291700164, "step": 17640 }, { "loss": 0.09343262910842895, "grad_norm": 0.8371837735176086, "learning_rate": 7.936714638946838e-05, "epoch": 0.5675788661285655, "step": 17650 }, { "loss": 0.08109647035598755, "grad_norm": 0.8725554347038269, "learning_rate": 7.926798630037156e-05, "epoch": 0.5679004405569669, "step": 17660 }, { "loss": 0.09562431573867798, "grad_norm": 0.6995370388031006, "learning_rate": 7.916884750746263e-05, "epoch": 0.5682220149853684, "step": 17670 }, { "loss": 0.06416831612586975, "grad_norm": 0.455130010843277, "learning_rate": 7.90697301125782e-05, "epoch": 0.5685435894137698, "step": 17680 }, { "loss": 0.07064977884292603, "grad_norm": 0.7157491445541382, "learning_rate": 7.897063421753282e-05, "epoch": 0.5688651638421712, "step": 17690 }, { "loss": 0.0642152488231659, "grad_norm": 0.6208144426345825, "learning_rate": 7.887155992411914e-05, "epoch": 0.5691867382705728, "step": 17700 }, { "loss": 0.092697274684906, "grad_norm": 0.5451779961585999, "learning_rate": 7.877250733410752e-05, "epoch": 0.5695083126989742, "step": 17710 }, { "loss": 0.06400271058082581, "grad_norm": 0.5939658880233765, "learning_rate": 7.867347654924601e-05, "epoch": 0.5698298871273756, "step": 17720 }, { "loss": 0.062121731042861936, "grad_norm": 0.31174877285957336, "learning_rate": 7.857446767126029e-05, "epoch": 0.5701514615557771, "step": 17730 }, { "loss": 0.08236455917358398, "grad_norm": 0.9238488674163818, "learning_rate": 7.847548080185357e-05, "epoch": 0.5704730359841785, "step": 17740 }, { "loss": 0.08480204343795776, "grad_norm": 0.625153124332428, "learning_rate": 7.837651604270642e-05, "epoch": 0.57079461041258, "step": 17750 }, { "loss": 0.08454369306564331, "grad_norm": 0.43398886919021606, "learning_rate": 7.827757349547666e-05, "epoch": 0.5711161848409815, "step": 17760 }, { "loss": 0.0682714343070984, "grad_norm": 0.5858274698257446, "learning_rate": 7.817865326179937e-05, "epoch": 0.5714377592693829, "step": 17770 }, { "loss": 0.07586719393730164, "grad_norm": 0.4799453914165497, "learning_rate": 7.807975544328666e-05, "epoch": 0.5717593336977843, "step": 17780 }, { "loss": 0.07879873514175414, "grad_norm": 0.6759800910949707, "learning_rate": 7.798088014152758e-05, "epoch": 0.5720809081261858, "step": 17790 }, { "loss": 0.08980271220207214, "grad_norm": 0.9335778951644897, "learning_rate": 7.788202745808817e-05, "epoch": 0.5724024825545873, "step": 17800 }, { "loss": 0.09147007465362549, "grad_norm": 0.6196408867835999, "learning_rate": 7.778319749451113e-05, "epoch": 0.5727240569829887, "step": 17810 }, { "loss": 0.09004471302032471, "grad_norm": 0.5582491755485535, "learning_rate": 7.768439035231584e-05, "epoch": 0.5730456314113902, "step": 17820 }, { "loss": 0.07299330830574036, "grad_norm": 0.45894795656204224, "learning_rate": 7.758560613299827e-05, "epoch": 0.5733672058397916, "step": 17830 }, { "loss": 0.07036964297294616, "grad_norm": 0.45925047993659973, "learning_rate": 7.748684493803082e-05, "epoch": 0.573688780268193, "step": 17840 }, { "loss": 0.06470816135406494, "grad_norm": 0.4903404116630554, "learning_rate": 7.738810686886221e-05, "epoch": 0.5740103546965946, "step": 17850 }, { "loss": 0.07539921402931213, "grad_norm": 0.9620845317840576, "learning_rate": 7.72893920269175e-05, "epoch": 0.574331929124996, "step": 17860 }, { "loss": 0.06352456212043762, "grad_norm": 0.4858039319515228, "learning_rate": 7.719070051359781e-05, "epoch": 0.5746535035533974, "step": 17870 }, { "loss": 0.08455619215965271, "grad_norm": 0.7511271834373474, "learning_rate": 7.70920324302803e-05, "epoch": 0.5749750779817989, "step": 17880 }, { "loss": 0.08338568210601807, "grad_norm": 0.6056551933288574, "learning_rate": 7.699338787831809e-05, "epoch": 0.5752966524102003, "step": 17890 }, { "loss": 0.0965720534324646, "grad_norm": 0.7961472868919373, "learning_rate": 7.689476695904013e-05, "epoch": 0.5756182268386018, "step": 17900 }, { "loss": 0.07739994525909424, "grad_norm": 0.44330519437789917, "learning_rate": 7.679616977375105e-05, "epoch": 0.5759398012670033, "step": 17910 }, { "loss": 0.06674630641937256, "grad_norm": 0.5733117461204529, "learning_rate": 7.66975964237311e-05, "epoch": 0.5762613756954047, "step": 17920 }, { "loss": 0.0786827027797699, "grad_norm": 0.5420511960983276, "learning_rate": 7.659904701023616e-05, "epoch": 0.5765829501238061, "step": 17930 }, { "loss": 0.09021250009536744, "grad_norm": 1.1385753154754639, "learning_rate": 7.650052163449743e-05, "epoch": 0.5769045245522076, "step": 17940 }, { "loss": 0.09374557733535767, "grad_norm": 0.6973399519920349, "learning_rate": 7.640202039772137e-05, "epoch": 0.5772260989806091, "step": 17950 }, { "loss": 0.10186971426010132, "grad_norm": 0.611345112323761, "learning_rate": 7.630354340108974e-05, "epoch": 0.5775476734090105, "step": 17960 }, { "loss": 0.09474144577980041, "grad_norm": 0.7261148691177368, "learning_rate": 7.620509074575934e-05, "epoch": 0.577869247837412, "step": 17970 }, { "loss": 0.09362642765045166, "grad_norm": 0.8504903316497803, "learning_rate": 7.610666253286197e-05, "epoch": 0.5781908222658134, "step": 17980 }, { "loss": 0.07732483148574829, "grad_norm": 0.6735202074050903, "learning_rate": 7.600825886350444e-05, "epoch": 0.5785123966942148, "step": 17990 }, { "loss": 0.07429722547531128, "grad_norm": 1.00436532497406, "learning_rate": 7.590987983876815e-05, "epoch": 0.5788339711226164, "step": 18000 }, { "loss": 0.09903724193572998, "grad_norm": 0.7012553215026855, "learning_rate": 7.581152555970931e-05, "epoch": 0.5791555455510178, "step": 18010 }, { "loss": 0.08933169841766357, "grad_norm": 0.8168038129806519, "learning_rate": 7.571319612735871e-05, "epoch": 0.5794771199794192, "step": 18020 }, { "loss": 0.06783590316772461, "grad_norm": 0.6012395024299622, "learning_rate": 7.561489164272157e-05, "epoch": 0.5797986944078207, "step": 18030 }, { "loss": 0.07372558116912842, "grad_norm": 0.5882187485694885, "learning_rate": 7.551661220677749e-05, "epoch": 0.5801202688362221, "step": 18040 }, { "loss": 0.0641398310661316, "grad_norm": 0.48085817694664, "learning_rate": 7.541835792048037e-05, "epoch": 0.5804418432646236, "step": 18050 }, { "loss": 0.07381256818771362, "grad_norm": 0.8459762930870056, "learning_rate": 7.532012888475827e-05, "epoch": 0.5807634176930251, "step": 18060 }, { "loss": 0.08163591623306274, "grad_norm": 0.803181529045105, "learning_rate": 7.522192520051329e-05, "epoch": 0.5810849921214265, "step": 18070 }, { "loss": 0.06541717648506165, "grad_norm": 0.44433683156967163, "learning_rate": 7.512374696862154e-05, "epoch": 0.5814065665498279, "step": 18080 }, { "loss": 0.07731590270996094, "grad_norm": 0.9130017161369324, "learning_rate": 7.50255942899329e-05, "epoch": 0.5817281409782294, "step": 18090 }, { "loss": 0.05984402298927307, "grad_norm": 0.3957984447479248, "learning_rate": 7.492746726527107e-05, "epoch": 0.5820497154066309, "step": 18100 }, { "loss": 0.09862715601921082, "grad_norm": 0.398759663105011, "learning_rate": 7.482936599543333e-05, "epoch": 0.5823712898350323, "step": 18110 }, { "loss": 0.08806217908859253, "grad_norm": 0.5182679891586304, "learning_rate": 7.473129058119063e-05, "epoch": 0.5826928642634338, "step": 18120 }, { "loss": 0.09343465566635131, "grad_norm": 0.7294725775718689, "learning_rate": 7.463324112328725e-05, "epoch": 0.5830144386918352, "step": 18130 }, { "loss": 0.07456960678100585, "grad_norm": 0.7115042805671692, "learning_rate": 7.453521772244082e-05, "epoch": 0.5833360131202366, "step": 18140 }, { "loss": 0.08490001559257507, "grad_norm": 0.680609941482544, "learning_rate": 7.443722047934223e-05, "epoch": 0.5836575875486382, "step": 18150 }, { "loss": 0.08044389486312867, "grad_norm": 1.0964044332504272, "learning_rate": 7.433924949465552e-05, "epoch": 0.5839791619770396, "step": 18160 }, { "loss": 0.0721604585647583, "grad_norm": 0.4555046260356903, "learning_rate": 7.424130486901764e-05, "epoch": 0.584300736405441, "step": 18170 }, { "loss": 0.07597453594207763, "grad_norm": 0.55930095911026, "learning_rate": 7.414338670303868e-05, "epoch": 0.5846223108338425, "step": 18180 }, { "loss": 0.08302205204963684, "grad_norm": 0.7470789551734924, "learning_rate": 7.404549509730135e-05, "epoch": 0.584943885262244, "step": 18190 }, { "loss": 0.07486485242843628, "grad_norm": 0.6482924818992615, "learning_rate": 7.394763015236117e-05, "epoch": 0.5852654596906454, "step": 18200 }, { "loss": 0.06772093176841736, "grad_norm": 0.8526209592819214, "learning_rate": 7.384979196874627e-05, "epoch": 0.5855870341190469, "step": 18210 }, { "loss": 0.10163276195526123, "grad_norm": 0.48233070969581604, "learning_rate": 7.375198064695724e-05, "epoch": 0.5859086085474483, "step": 18220 }, { "loss": 0.09970284700393676, "grad_norm": 0.8146294951438904, "learning_rate": 7.365419628746713e-05, "epoch": 0.5862301829758497, "step": 18230 }, { "loss": 0.07390713691711426, "grad_norm": 0.48546773195266724, "learning_rate": 7.355643899072123e-05, "epoch": 0.5865517574042513, "step": 18240 }, { "loss": 0.07537122964859008, "grad_norm": 0.6072354316711426, "learning_rate": 7.345870885713717e-05, "epoch": 0.5868733318326527, "step": 18250 }, { "loss": 0.07224909067153931, "grad_norm": 0.807327389717102, "learning_rate": 7.336100598710452e-05, "epoch": 0.5871949062610541, "step": 18260 }, { "loss": 0.07525655627250671, "grad_norm": 0.7244537472724915, "learning_rate": 7.326333048098494e-05, "epoch": 0.5875164806894556, "step": 18270 }, { "loss": 0.06138322353363037, "grad_norm": 0.3798077702522278, "learning_rate": 7.316568243911193e-05, "epoch": 0.587838055117857, "step": 18280 }, { "loss": 0.0677099347114563, "grad_norm": 0.6162866353988647, "learning_rate": 7.306806196179079e-05, "epoch": 0.5881596295462584, "step": 18290 }, { "loss": 0.09046462774276734, "grad_norm": 0.4453006684780121, "learning_rate": 7.297046914929853e-05, "epoch": 0.58848120397466, "step": 18300 }, { "loss": 0.0929905891418457, "grad_norm": 0.745503842830658, "learning_rate": 7.287290410188373e-05, "epoch": 0.5888027784030614, "step": 18310 }, { "loss": 0.07695024609565734, "grad_norm": 0.38427412509918213, "learning_rate": 7.277536691976646e-05, "epoch": 0.5891243528314628, "step": 18320 }, { "loss": 0.0723729133605957, "grad_norm": 0.40777745842933655, "learning_rate": 7.267785770313811e-05, "epoch": 0.5894459272598643, "step": 18330 }, { "loss": 0.08466371297836303, "grad_norm": 0.6734020709991455, "learning_rate": 7.258037655216143e-05, "epoch": 0.5897675016882657, "step": 18340 }, { "loss": 0.07116898298263549, "grad_norm": 0.6805622577667236, "learning_rate": 7.248292356697027e-05, "epoch": 0.5900890761166672, "step": 18350 }, { "loss": 0.07267593145370484, "grad_norm": 0.6734875440597534, "learning_rate": 7.238549884766956e-05, "epoch": 0.5904106505450687, "step": 18360 }, { "loss": 0.06956661939620971, "grad_norm": 0.8455624580383301, "learning_rate": 7.228810249433527e-05, "epoch": 0.5907322249734701, "step": 18370 }, { "loss": 0.0653856337070465, "grad_norm": 0.5372180342674255, "learning_rate": 7.21907346070141e-05, "epoch": 0.5910537994018715, "step": 18380 }, { "loss": 0.08260570168495178, "grad_norm": 0.6930351257324219, "learning_rate": 7.209339528572363e-05, "epoch": 0.591375373830273, "step": 18390 }, { "loss": 0.07606642842292785, "grad_norm": 0.4146069586277008, "learning_rate": 7.199608463045204e-05, "epoch": 0.5916969482586745, "step": 18400 }, { "loss": 0.07034127712249756, "grad_norm": 0.39609354734420776, "learning_rate": 7.189880274115806e-05, "epoch": 0.5920185226870759, "step": 18410 }, { "loss": 0.08222041130065919, "grad_norm": 0.7135039567947388, "learning_rate": 7.180154971777087e-05, "epoch": 0.5923400971154774, "step": 18420 }, { "loss": 0.06555980443954468, "grad_norm": 0.7420735955238342, "learning_rate": 7.170432566019001e-05, "epoch": 0.5926616715438788, "step": 18430 }, { "loss": 0.09034892320632934, "grad_norm": 0.7950654029846191, "learning_rate": 7.160713066828531e-05, "epoch": 0.5929832459722802, "step": 18440 }, { "loss": 0.07953452467918395, "grad_norm": 0.7548782825469971, "learning_rate": 7.150996484189665e-05, "epoch": 0.5933048204006818, "step": 18450 }, { "loss": 0.06989264488220215, "grad_norm": 0.4734945595264435, "learning_rate": 7.141282828083404e-05, "epoch": 0.5936263948290832, "step": 18460 }, { "loss": 0.07788462042808533, "grad_norm": 0.5163804292678833, "learning_rate": 7.13157210848774e-05, "epoch": 0.5939479692574846, "step": 18470 }, { "loss": 0.08228410482406616, "grad_norm": 0.49889716506004333, "learning_rate": 7.121864335377644e-05, "epoch": 0.5942695436858861, "step": 18480 }, { "loss": 0.09119044542312622, "grad_norm": 0.8352193236351013, "learning_rate": 7.112159518725057e-05, "epoch": 0.5945911181142876, "step": 18490 }, { "loss": 0.09865041375160218, "grad_norm": 0.9083040356636047, "learning_rate": 7.102457668498906e-05, "epoch": 0.594912692542689, "step": 18500 }, { "loss": 0.07140249013900757, "grad_norm": 0.5583008527755737, "learning_rate": 7.092758794665042e-05, "epoch": 0.5952342669710905, "step": 18510 }, { "loss": 0.05078734159469604, "grad_norm": 0.6857233047485352, "learning_rate": 7.083062907186281e-05, "epoch": 0.5955558413994919, "step": 18520 }, { "loss": 0.10113188028335571, "grad_norm": 0.8305119872093201, "learning_rate": 7.073370016022354e-05, "epoch": 0.5958774158278933, "step": 18530 }, { "loss": 0.0677301526069641, "grad_norm": 0.37434622645378113, "learning_rate": 7.063680131129924e-05, "epoch": 0.5961989902562949, "step": 18540 }, { "loss": 0.07215762138366699, "grad_norm": 0.38779565691947937, "learning_rate": 7.053993262462566e-05, "epoch": 0.5965205646846963, "step": 18550 }, { "loss": 0.06991016268730163, "grad_norm": 0.5665784478187561, "learning_rate": 7.044309419970747e-05, "epoch": 0.5968421391130977, "step": 18560 }, { "loss": 0.09297497868537903, "grad_norm": 0.6514776945114136, "learning_rate": 7.034628613601846e-05, "epoch": 0.5971637135414992, "step": 18570 }, { "loss": 0.0842542052268982, "grad_norm": 0.755832850933075, "learning_rate": 7.024950853300101e-05, "epoch": 0.5974852879699006, "step": 18580 }, { "loss": 0.08134500980377198, "grad_norm": 0.5388493537902832, "learning_rate": 7.015276149006633e-05, "epoch": 0.597806862398302, "step": 18590 }, { "loss": 0.06592969298362732, "grad_norm": 0.3698655664920807, "learning_rate": 7.005604510659421e-05, "epoch": 0.5981284368267036, "step": 18600 }, { "loss": 0.09740493297576905, "grad_norm": 0.7075668573379517, "learning_rate": 6.995935948193294e-05, "epoch": 0.598450011255105, "step": 18610 }, { "loss": 0.08867404460906983, "grad_norm": 0.6851850152015686, "learning_rate": 6.986270471539922e-05, "epoch": 0.5987715856835064, "step": 18620 }, { "loss": 0.10935039520263672, "grad_norm": 0.5081934928894043, "learning_rate": 6.976608090627807e-05, "epoch": 0.5990931601119079, "step": 18630 }, { "loss": 0.09188774824142457, "grad_norm": 0.5268943905830383, "learning_rate": 6.966948815382268e-05, "epoch": 0.5994147345403094, "step": 18640 }, { "eval_loss": 0.07528992742300034, "eval_runtime": 34.3822, "eval_samples_per_second": 146.18, "eval_steps_per_second": 36.56, "epoch": 0.5996719940830305, "step": 18648 }, { "loss": 0.07428829073905945, "grad_norm": 0.4399220049381256, "learning_rate": 6.957292655725437e-05, "epoch": 0.5997363089687108, "step": 18650 }, { "loss": 0.09513623714447021, "grad_norm": 0.7418004870414734, "learning_rate": 6.947639621576243e-05, "epoch": 0.6000578833971123, "step": 18660 }, { "loss": 0.09230533838272095, "grad_norm": 0.6606807112693787, "learning_rate": 6.937989722850405e-05, "epoch": 0.6003794578255137, "step": 18670 }, { "loss": 0.10270729064941406, "grad_norm": 0.7914863228797913, "learning_rate": 6.928342969460418e-05, "epoch": 0.6007010322539151, "step": 18680 }, { "loss": 0.07258764505386353, "grad_norm": 0.5238516330718994, "learning_rate": 6.918699371315556e-05, "epoch": 0.6010226066823167, "step": 18690 }, { "loss": 0.08154450654983521, "grad_norm": 0.6537436246871948, "learning_rate": 6.909058938321842e-05, "epoch": 0.6013441811107181, "step": 18700 }, { "loss": 0.065982985496521, "grad_norm": 0.4509539306163788, "learning_rate": 6.899421680382053e-05, "epoch": 0.6016657555391195, "step": 18710 }, { "loss": 0.0736770749092102, "grad_norm": 0.6178690195083618, "learning_rate": 6.889787607395702e-05, "epoch": 0.601987329967521, "step": 18720 }, { "loss": 0.09866932630538941, "grad_norm": 0.47050192952156067, "learning_rate": 6.88015672925903e-05, "epoch": 0.6023089043959224, "step": 18730 }, { "loss": 0.08328339457511902, "grad_norm": 0.5887206792831421, "learning_rate": 6.870529055865e-05, "epoch": 0.6026304788243239, "step": 18740 }, { "loss": 0.06292853951454162, "grad_norm": 0.6852194666862488, "learning_rate": 6.860904597103273e-05, "epoch": 0.6029520532527254, "step": 18750 }, { "loss": 0.0628040075302124, "grad_norm": 0.7905822992324829, "learning_rate": 6.851283362860226e-05, "epoch": 0.6032736276811268, "step": 18760 }, { "loss": 0.07740421295166015, "grad_norm": 0.6432583928108215, "learning_rate": 6.841665363018907e-05, "epoch": 0.6035952021095282, "step": 18770 }, { "loss": 0.0845758557319641, "grad_norm": 0.6782318353652954, "learning_rate": 6.832050607459049e-05, "epoch": 0.6039167765379297, "step": 18780 }, { "loss": 0.07212685942649841, "grad_norm": 0.7194960713386536, "learning_rate": 6.822439106057051e-05, "epoch": 0.6042383509663312, "step": 18790 }, { "loss": 0.08795191645622254, "grad_norm": 0.4270845055580139, "learning_rate": 6.812830868685969e-05, "epoch": 0.6045599253947326, "step": 18800 }, { "loss": 0.07008818984031677, "grad_norm": 0.4772111177444458, "learning_rate": 6.803225905215503e-05, "epoch": 0.6048814998231341, "step": 18810 }, { "loss": 0.0846167504787445, "grad_norm": 0.7406246662139893, "learning_rate": 6.793624225511998e-05, "epoch": 0.6052030742515355, "step": 18820 }, { "loss": 0.0772022008895874, "grad_norm": 0.9471773505210876, "learning_rate": 6.784025839438425e-05, "epoch": 0.6055246486799369, "step": 18830 }, { "loss": 0.08314455151557923, "grad_norm": 1.0049670934677124, "learning_rate": 6.774430756854361e-05, "epoch": 0.6058462231083385, "step": 18840 }, { "loss": 0.09380330443382263, "grad_norm": 0.6991022229194641, "learning_rate": 6.764838987615998e-05, "epoch": 0.6061677975367399, "step": 18850 }, { "loss": 0.09013092517852783, "grad_norm": 0.8749735951423645, "learning_rate": 6.755250541576126e-05, "epoch": 0.6064893719651413, "step": 18860 }, { "loss": 0.08810775876045226, "grad_norm": 0.5447385311126709, "learning_rate": 6.745665428584111e-05, "epoch": 0.6068109463935428, "step": 18870 }, { "loss": 0.10087782144546509, "grad_norm": 0.35421833395957947, "learning_rate": 6.736083658485916e-05, "epoch": 0.6071325208219442, "step": 18880 }, { "loss": 0.06867042183876038, "grad_norm": 0.5186894536018372, "learning_rate": 6.726505241124048e-05, "epoch": 0.6074540952503457, "step": 18890 }, { "loss": 0.07856658101081848, "grad_norm": 0.4858158528804779, "learning_rate": 6.71693018633758e-05, "epoch": 0.6077756696787472, "step": 18900 }, { "loss": 0.07544032335281373, "grad_norm": 0.5628518462181091, "learning_rate": 6.707358503962131e-05, "epoch": 0.6080972441071486, "step": 18910 }, { "loss": 0.0858623206615448, "grad_norm": 0.5891218781471252, "learning_rate": 6.697790203829854e-05, "epoch": 0.60841881853555, "step": 18920 }, { "loss": 0.08889785408973694, "grad_norm": 0.7737987041473389, "learning_rate": 6.68822529576943e-05, "epoch": 0.6087403929639515, "step": 18930 }, { "loss": 0.07854949235916138, "grad_norm": 0.630588173866272, "learning_rate": 6.67866378960605e-05, "epoch": 0.609061967392353, "step": 18940 }, { "loss": 0.1184352993965149, "grad_norm": 0.6254100203514099, "learning_rate": 6.66910569516142e-05, "epoch": 0.6093835418207544, "step": 18950 }, { "loss": 0.08089698553085327, "grad_norm": 0.3980773985385895, "learning_rate": 6.659551022253734e-05, "epoch": 0.6097051162491559, "step": 18960 }, { "loss": 0.08007602095603943, "grad_norm": 0.7637392282485962, "learning_rate": 6.649999780697671e-05, "epoch": 0.6100266906775573, "step": 18970 }, { "loss": 0.0555802583694458, "grad_norm": 0.632079005241394, "learning_rate": 6.640451980304392e-05, "epoch": 0.6103482651059587, "step": 18980 }, { "loss": 0.08289624452590942, "grad_norm": 0.23457559943199158, "learning_rate": 6.630907630881516e-05, "epoch": 0.6106698395343603, "step": 18990 }, { "loss": 0.06061472296714783, "grad_norm": 0.5475881695747375, "learning_rate": 6.621366742233116e-05, "epoch": 0.6109914139627617, "step": 19000 }, { "loss": 0.08997538685798645, "grad_norm": 0.34357795119285583, "learning_rate": 6.611829324159722e-05, "epoch": 0.6113129883911631, "step": 19010 }, { "loss": 0.0812469482421875, "grad_norm": 0.8159956932067871, "learning_rate": 6.602295386458289e-05, "epoch": 0.6116345628195646, "step": 19020 }, { "loss": 0.10333518981933594, "grad_norm": 0.8582615256309509, "learning_rate": 6.592764938922193e-05, "epoch": 0.611956137247966, "step": 19030 }, { "loss": 0.07760070562362671, "grad_norm": 0.3401944637298584, "learning_rate": 6.583237991341239e-05, "epoch": 0.6122777116763675, "step": 19040 }, { "loss": 0.07692185044288635, "grad_norm": 0.4970964193344116, "learning_rate": 6.573714553501626e-05, "epoch": 0.612599286104769, "step": 19050 }, { "loss": 0.07856242656707764, "grad_norm": 0.5750195980072021, "learning_rate": 6.564194635185942e-05, "epoch": 0.6129208605331704, "step": 19060 }, { "loss": 0.08100840449333191, "grad_norm": 0.7286192178726196, "learning_rate": 6.55467824617318e-05, "epoch": 0.6132424349615718, "step": 19070 }, { "loss": 0.08202658891677857, "grad_norm": 0.5252206325531006, "learning_rate": 6.545165396238691e-05, "epoch": 0.6135640093899734, "step": 19080 }, { "loss": 0.08576998710632325, "grad_norm": 0.4926624894142151, "learning_rate": 6.535656095154193e-05, "epoch": 0.6138855838183748, "step": 19090 }, { "loss": 0.08644883036613464, "grad_norm": 0.5414864420890808, "learning_rate": 6.526150352687766e-05, "epoch": 0.6142071582467762, "step": 19100 }, { "loss": 0.08598004579544068, "grad_norm": 0.730283796787262, "learning_rate": 6.516648178603826e-05, "epoch": 0.6145287326751777, "step": 19110 }, { "loss": 0.07616907358169556, "grad_norm": 0.5509003400802612, "learning_rate": 6.507149582663126e-05, "epoch": 0.6148503071035791, "step": 19120 }, { "loss": 0.07464420199394226, "grad_norm": 0.627088725566864, "learning_rate": 6.497654574622741e-05, "epoch": 0.6151718815319805, "step": 19130 }, { "loss": 0.06419848203659058, "grad_norm": 0.7218136191368103, "learning_rate": 6.488163164236075e-05, "epoch": 0.6154934559603821, "step": 19140 }, { "loss": 0.08014584183692933, "grad_norm": 0.8419432044029236, "learning_rate": 6.478675361252818e-05, "epoch": 0.6158150303887835, "step": 19150 }, { "loss": 0.055648809671401976, "grad_norm": 0.5282542109489441, "learning_rate": 6.46919117541896e-05, "epoch": 0.6161366048171849, "step": 19160 }, { "loss": 0.06651024222373962, "grad_norm": 0.4777286648750305, "learning_rate": 6.459710616476782e-05, "epoch": 0.6164581792455864, "step": 19170 }, { "loss": 0.0624203085899353, "grad_norm": 0.6265890598297119, "learning_rate": 6.450233694164832e-05, "epoch": 0.6167797536739879, "step": 19180 }, { "loss": 0.07577801942825317, "grad_norm": 0.5991108417510986, "learning_rate": 6.440760418217923e-05, "epoch": 0.6171013281023893, "step": 19190 }, { "loss": 0.06770421266555786, "grad_norm": 0.5578089952468872, "learning_rate": 6.431290798367129e-05, "epoch": 0.6174229025307908, "step": 19200 }, { "loss": 0.08427031636238098, "grad_norm": 0.7189592719078064, "learning_rate": 6.421824844339761e-05, "epoch": 0.6177444769591922, "step": 19210 }, { "loss": 0.0903325617313385, "grad_norm": 0.4093859791755676, "learning_rate": 6.41236256585937e-05, "epoch": 0.6180660513875936, "step": 19220 }, { "loss": 0.08933579921722412, "grad_norm": 0.7538068890571594, "learning_rate": 6.402903972645722e-05, "epoch": 0.6183876258159952, "step": 19230 }, { "loss": 0.08776495456695557, "grad_norm": 0.6686062812805176, "learning_rate": 6.393449074414812e-05, "epoch": 0.6187092002443966, "step": 19240 }, { "loss": 0.08356345891952514, "grad_norm": 0.9015601873397827, "learning_rate": 6.383997880878828e-05, "epoch": 0.619030774672798, "step": 19250 }, { "loss": 0.08526620864868165, "grad_norm": 0.9928838610649109, "learning_rate": 6.374550401746154e-05, "epoch": 0.6193523491011995, "step": 19260 }, { "loss": 0.07604313492774964, "grad_norm": 0.8028160333633423, "learning_rate": 6.365106646721364e-05, "epoch": 0.6196739235296009, "step": 19270 }, { "loss": 0.07956154346466064, "grad_norm": 0.8392983675003052, "learning_rate": 6.355666625505201e-05, "epoch": 0.6199954979580024, "step": 19280 }, { "loss": 0.08473367691040039, "grad_norm": 0.5166031122207642, "learning_rate": 6.346230347794577e-05, "epoch": 0.6203170723864039, "step": 19290 }, { "loss": 0.09186525344848633, "grad_norm": 0.8711339235305786, "learning_rate": 6.336797823282556e-05, "epoch": 0.6206386468148053, "step": 19300 }, { "loss": 0.07015078067779541, "grad_norm": 0.48920947313308716, "learning_rate": 6.327369061658344e-05, "epoch": 0.6209602212432067, "step": 19310 }, { "loss": 0.06253830194473267, "grad_norm": 1.0902957916259766, "learning_rate": 6.317944072607285e-05, "epoch": 0.6212817956716082, "step": 19320 }, { "loss": 0.07039433717727661, "grad_norm": 0.9401423335075378, "learning_rate": 6.308522865810852e-05, "epoch": 0.6216033701000097, "step": 19330 }, { "loss": 0.07885023355484008, "grad_norm": 1.2769575119018555, "learning_rate": 6.299105450946626e-05, "epoch": 0.6219249445284111, "step": 19340 }, { "loss": 0.07023831605911254, "grad_norm": 0.8095871210098267, "learning_rate": 6.289691837688291e-05, "epoch": 0.6222465189568126, "step": 19350 }, { "loss": 0.06982327699661255, "grad_norm": 0.3558923304080963, "learning_rate": 6.28028203570564e-05, "epoch": 0.622568093385214, "step": 19360 }, { "loss": 0.08719192147254944, "grad_norm": 0.6851743459701538, "learning_rate": 6.270876054664531e-05, "epoch": 0.6228896678136154, "step": 19370 }, { "loss": 0.08308027982711792, "grad_norm": 0.3055587112903595, "learning_rate": 6.261473904226908e-05, "epoch": 0.623211242242017, "step": 19380 }, { "loss": 0.08074118494987488, "grad_norm": 0.8412294983863831, "learning_rate": 6.252075594050785e-05, "epoch": 0.6235328166704184, "step": 19390 }, { "loss": 0.07668668031692505, "grad_norm": 0.8082206845283508, "learning_rate": 6.242681133790225e-05, "epoch": 0.6238543910988198, "step": 19400 }, { "loss": 0.07915959358215333, "grad_norm": 0.6886305212974548, "learning_rate": 6.233290533095335e-05, "epoch": 0.6241759655272213, "step": 19410 }, { "loss": 0.0796347975730896, "grad_norm": 0.6029472947120667, "learning_rate": 6.223903801612258e-05, "epoch": 0.6244975399556227, "step": 19420 }, { "loss": 0.09030648469924926, "grad_norm": 0.8511977791786194, "learning_rate": 6.214520948983166e-05, "epoch": 0.6248191143840242, "step": 19430 }, { "loss": 0.07107316851615905, "grad_norm": 0.7600314617156982, "learning_rate": 6.205141984846244e-05, "epoch": 0.6251406888124257, "step": 19440 }, { "loss": 0.08018980622291565, "grad_norm": 0.592937707901001, "learning_rate": 6.195766918835678e-05, "epoch": 0.6254622632408271, "step": 19450 }, { "loss": 0.08703637719154358, "grad_norm": 0.5489158630371094, "learning_rate": 6.186395760581664e-05, "epoch": 0.6257838376692285, "step": 19460 }, { "loss": 0.07732821106910706, "grad_norm": 0.2843504548072815, "learning_rate": 6.177028519710369e-05, "epoch": 0.62610541209763, "step": 19470 }, { "loss": 0.10473834276199341, "grad_norm": 0.7938198447227478, "learning_rate": 6.167665205843944e-05, "epoch": 0.6264269865260315, "step": 19480 }, { "loss": 0.09615716934204102, "grad_norm": 1.1492993831634521, "learning_rate": 6.158305828600502e-05, "epoch": 0.6267485609544329, "step": 19490 }, { "loss": 0.06346037983894348, "grad_norm": 0.5835652351379395, "learning_rate": 6.148950397594117e-05, "epoch": 0.6270701353828344, "step": 19500 }, { "loss": 0.072739577293396, "grad_norm": 1.014746069908142, "learning_rate": 6.139598922434802e-05, "epoch": 0.6273917098112358, "step": 19510 }, { "loss": 0.09916123151779174, "grad_norm": 0.715579628944397, "learning_rate": 6.130251412728516e-05, "epoch": 0.6277132842396372, "step": 19520 }, { "loss": 0.07230787873268127, "grad_norm": 0.7324387431144714, "learning_rate": 6.120907878077138e-05, "epoch": 0.6280348586680388, "step": 19530 }, { "loss": 0.09201850891113281, "grad_norm": 0.526698648929596, "learning_rate": 6.111568328078465e-05, "epoch": 0.6283564330964402, "step": 19540 }, { "loss": 0.08800469636917115, "grad_norm": 0.989494800567627, "learning_rate": 6.102232772326202e-05, "epoch": 0.6286780075248416, "step": 19550 }, { "loss": 0.08789398670196533, "grad_norm": 0.9045369625091553, "learning_rate": 6.092901220409949e-05, "epoch": 0.6289995819532431, "step": 19560 }, { "loss": 0.06860705614089965, "grad_norm": 0.7513046860694885, "learning_rate": 6.0835736819151946e-05, "epoch": 0.6293211563816445, "step": 19570 }, { "loss": 0.06425130963325501, "grad_norm": 0.6448571085929871, "learning_rate": 6.074250166423308e-05, "epoch": 0.629642730810046, "step": 19580 }, { "loss": 0.08776512742042542, "grad_norm": 0.6769627928733826, "learning_rate": 6.0649306835115206e-05, "epoch": 0.6299643052384475, "step": 19590 }, { "loss": 0.08552072048187256, "grad_norm": 0.45814409852027893, "learning_rate": 6.055615242752923e-05, "epoch": 0.6302858796668489, "step": 19600 }, { "loss": 0.10555309057235718, "grad_norm": 1.470810055732727, "learning_rate": 6.046303853716455e-05, "epoch": 0.6306074540952503, "step": 19610 }, { "loss": 0.07876392602920532, "grad_norm": 0.4727279543876648, "learning_rate": 6.036996525966896e-05, "epoch": 0.6309290285236518, "step": 19620 }, { "loss": 0.06964776515960694, "grad_norm": 0.635856568813324, "learning_rate": 6.027693269064849e-05, "epoch": 0.6312506029520533, "step": 19630 }, { "loss": 0.07364420294761657, "grad_norm": 0.34876182675361633, "learning_rate": 6.0183940925667326e-05, "epoch": 0.6315721773804547, "step": 19640 }, { "loss": 0.08741930723190308, "grad_norm": 0.3650606870651245, "learning_rate": 6.009099006024788e-05, "epoch": 0.6318937518088562, "step": 19650 }, { "loss": 0.07190605998039246, "grad_norm": 0.8150784373283386, "learning_rate": 5.999808018987041e-05, "epoch": 0.6322153262372576, "step": 19660 }, { "loss": 0.08492738008499146, "grad_norm": 0.7493435144424438, "learning_rate": 5.990521140997316e-05, "epoch": 0.632536900665659, "step": 19670 }, { "loss": 0.08940092325210572, "grad_norm": 0.8140876293182373, "learning_rate": 5.9812383815952066e-05, "epoch": 0.6328584750940606, "step": 19680 }, { "loss": 0.09434642791748046, "grad_norm": 0.6462841033935547, "learning_rate": 5.971959750316083e-05, "epoch": 0.633180049522462, "step": 19690 }, { "loss": 0.07376375794410706, "grad_norm": 0.45153164863586426, "learning_rate": 5.962685256691071e-05, "epoch": 0.6335016239508634, "step": 19700 }, { "loss": 0.08791798949241639, "grad_norm": 0.5423551201820374, "learning_rate": 5.953414910247054e-05, "epoch": 0.6338231983792649, "step": 19710 }, { "loss": 0.08712515830993653, "grad_norm": 0.47862866520881653, "learning_rate": 5.944148720506648e-05, "epoch": 0.6341447728076663, "step": 19720 }, { "loss": 0.07166895866394044, "grad_norm": 0.6706460118293762, "learning_rate": 5.9348866969881975e-05, "epoch": 0.6344663472360678, "step": 19730 }, { "loss": 0.07896190881729126, "grad_norm": 0.8685805201530457, "learning_rate": 5.9256288492057734e-05, "epoch": 0.6347879216644693, "step": 19740 }, { "loss": 0.078327876329422, "grad_norm": 0.5587207674980164, "learning_rate": 5.916375186669153e-05, "epoch": 0.6351094960928707, "step": 19750 }, { "loss": 0.06806607842445374, "grad_norm": 0.29360270500183105, "learning_rate": 5.907125718883812e-05, "epoch": 0.6354310705212721, "step": 19760 }, { "loss": 0.09198864698410034, "grad_norm": 1.0249218940734863, "learning_rate": 5.897880455350933e-05, "epoch": 0.6357526449496737, "step": 19770 }, { "loss": 0.07347887754440308, "grad_norm": 0.3725737929344177, "learning_rate": 5.888639405567359e-05, "epoch": 0.6360742193780751, "step": 19780 }, { "loss": 0.07164725065231323, "grad_norm": 0.49716490507125854, "learning_rate": 5.879402579025615e-05, "epoch": 0.6363957938064765, "step": 19790 }, { "loss": 0.08011369109153747, "grad_norm": 0.7196645736694336, "learning_rate": 5.8701699852138867e-05, "epoch": 0.636717368234878, "step": 19800 }, { "loss": 0.07078960537910461, "grad_norm": 0.4283914864063263, "learning_rate": 5.860941633616015e-05, "epoch": 0.6370389426632794, "step": 19810 }, { "loss": 0.09213285446166992, "grad_norm": 0.6686605215072632, "learning_rate": 5.851717533711475e-05, "epoch": 0.6373605170916808, "step": 19820 }, { "loss": 0.07392160296440124, "grad_norm": 0.7527466416358948, "learning_rate": 5.8424976949753796e-05, "epoch": 0.6376820915200824, "step": 19830 }, { "loss": 0.06171455979347229, "grad_norm": 0.5245692729949951, "learning_rate": 5.83328212687847e-05, "epoch": 0.6380036659484838, "step": 19840 }, { "loss": 0.10391281843185425, "grad_norm": 0.3888297379016876, "learning_rate": 5.824070838887091e-05, "epoch": 0.6383252403768852, "step": 19850 }, { "loss": 0.09777231216430664, "grad_norm": 0.5975055694580078, "learning_rate": 5.814863840463192e-05, "epoch": 0.6386468148052867, "step": 19860 }, { "loss": 0.0733383059501648, "grad_norm": 0.4285428524017334, "learning_rate": 5.8056611410643246e-05, "epoch": 0.6389683892336881, "step": 19870 }, { "loss": 0.08428534269332885, "grad_norm": 1.0025172233581543, "learning_rate": 5.7964627501436116e-05, "epoch": 0.6392899636620896, "step": 19880 }, { "loss": 0.08907008171081543, "grad_norm": 0.6572055220603943, "learning_rate": 5.7872686771497606e-05, "epoch": 0.6396115380904911, "step": 19890 }, { "loss": 0.07242008447647094, "grad_norm": 0.5621013045310974, "learning_rate": 5.7780789315270444e-05, "epoch": 0.6399331125188925, "step": 19900 }, { "loss": 0.0710676670074463, "grad_norm": 0.3812589943408966, "learning_rate": 5.768893522715279e-05, "epoch": 0.6402546869472939, "step": 19910 }, { "loss": 0.09399187564849854, "grad_norm": 0.8854357004165649, "learning_rate": 5.75971246014984e-05, "epoch": 0.6405762613756955, "step": 19920 }, { "loss": 0.08624742031097413, "grad_norm": 0.3859586715698242, "learning_rate": 5.750535753261623e-05, "epoch": 0.6408978358040969, "step": 19930 }, { "loss": 0.08188838958740234, "grad_norm": 0.7565677165985107, "learning_rate": 5.7413634114770676e-05, "epoch": 0.6412194102324983, "step": 19940 }, { "loss": 0.08392425179481507, "grad_norm": 0.4465886950492859, "learning_rate": 5.7321954442181114e-05, "epoch": 0.6415409846608998, "step": 19950 }, { "loss": 0.08943371772766114, "grad_norm": 0.901117205619812, "learning_rate": 5.7230318609022106e-05, "epoch": 0.6418625590893012, "step": 19960 }, { "loss": 0.09252094030380249, "grad_norm": 0.49248000979423523, "learning_rate": 5.7138726709423175e-05, "epoch": 0.6421841335177026, "step": 19970 }, { "loss": 0.07759752869606018, "grad_norm": 0.3645312190055847, "learning_rate": 5.704717883746861e-05, "epoch": 0.6425057079461042, "step": 19980 }, { "loss": 0.07568894028663635, "grad_norm": 0.5617930889129639, "learning_rate": 5.695567508719762e-05, "epoch": 0.6428272823745056, "step": 19990 }, { "loss": 0.0660637617111206, "grad_norm": 0.4228644073009491, "learning_rate": 5.6864215552603974e-05, "epoch": 0.643148856802907, "step": 20000 }, { "loss": 0.08293818831443786, "grad_norm": 0.5796824097633362, "learning_rate": 5.67728003276361e-05, "epoch": 0.6434704312313085, "step": 20010 }, { "loss": 0.08752073645591736, "grad_norm": 0.6435496211051941, "learning_rate": 5.6681429506196825e-05, "epoch": 0.64379200565971, "step": 20020 }, { "loss": 0.08041639924049378, "grad_norm": 0.40585389733314514, "learning_rate": 5.659010318214346e-05, "epoch": 0.6441135800881114, "step": 20030 }, { "loss": 0.07967058420181275, "grad_norm": 0.5377001762390137, "learning_rate": 5.64988214492876e-05, "epoch": 0.6444351545165129, "step": 20040 }, { "loss": 0.07384193539619446, "grad_norm": 0.4058140516281128, "learning_rate": 5.640758440139491e-05, "epoch": 0.6447567289449143, "step": 20050 }, { "loss": 0.07030548453330994, "grad_norm": 0.3950488567352295, "learning_rate": 5.631639213218534e-05, "epoch": 0.6450783033733157, "step": 20060 }, { "loss": 0.06710969805717468, "grad_norm": 0.6428706049919128, "learning_rate": 5.622524473533269e-05, "epoch": 0.6453998778017173, "step": 20070 }, { "loss": 0.06697219610214233, "grad_norm": 0.7152665257453918, "learning_rate": 5.613414230446467e-05, "epoch": 0.6457214522301187, "step": 20080 }, { "loss": 0.07348057627677917, "grad_norm": 0.7897191643714905, "learning_rate": 5.604308493316299e-05, "epoch": 0.6460430266585201, "step": 20090 }, { "loss": 0.07560304403305054, "grad_norm": 0.2724365293979645, "learning_rate": 5.595207271496283e-05, "epoch": 0.6463646010869216, "step": 20100 }, { "loss": 0.09230870008468628, "grad_norm": 0.31173989176750183, "learning_rate": 5.5861105743353173e-05, "epoch": 0.646686175515323, "step": 20110 }, { "loss": 0.06364266872406006, "grad_norm": 0.5667968988418579, "learning_rate": 5.577018411177641e-05, "epoch": 0.6470077499437245, "step": 20120 }, { "loss": 0.06696459054946899, "grad_norm": 0.5604732036590576, "learning_rate": 5.5679307913628365e-05, "epoch": 0.647329324372126, "step": 20130 }, { "loss": 0.09952617287635804, "grad_norm": 0.5189045667648315, "learning_rate": 5.558847724225828e-05, "epoch": 0.6476508988005274, "step": 20140 }, { "loss": 0.07203555703163148, "grad_norm": 0.4901675283908844, "learning_rate": 5.549769219096848e-05, "epoch": 0.6479724732289288, "step": 20150 }, { "loss": 0.07998304963111877, "grad_norm": 0.5910018086433411, "learning_rate": 5.540695285301465e-05, "epoch": 0.6482940476573303, "step": 20160 }, { "loss": 0.08121640682220459, "grad_norm": 0.40404200553894043, "learning_rate": 5.531625932160536e-05, "epoch": 0.6486156220857318, "step": 20170 }, { "loss": 0.06217573285102844, "grad_norm": 0.8459587693214417, "learning_rate": 5.522561168990206e-05, "epoch": 0.6489371965141332, "step": 20180 }, { "loss": 0.07579060792922973, "grad_norm": 0.6870545148849487, "learning_rate": 5.5135010051019275e-05, "epoch": 0.6492587709425347, "step": 20190 }, { "loss": 0.0787390947341919, "grad_norm": 0.8223719596862793, "learning_rate": 5.5044454498024065e-05, "epoch": 0.6495803453709361, "step": 20200 }, { "eval_loss": 0.07322059571743011, "eval_runtime": 34.7708, "eval_samples_per_second": 144.547, "eval_steps_per_second": 36.151, "epoch": 0.6496446602566164, "step": 20202 }, { "loss": 0.07401165962219239, "grad_norm": 0.4529065489768982, "learning_rate": 5.495394512393628e-05, "epoch": 0.6499019197993375, "step": 20210 }, { "loss": 0.07655703425407409, "grad_norm": 0.5374553203582764, "learning_rate": 5.486348202172833e-05, "epoch": 0.6502234942277391, "step": 20220 }, { "loss": 0.0764824092388153, "grad_norm": 0.637631356716156, "learning_rate": 5.4773065284324996e-05, "epoch": 0.6505450686561405, "step": 20230 }, { "loss": 0.08825883865356446, "grad_norm": 0.6297155022621155, "learning_rate": 5.468269500460359e-05, "epoch": 0.6508666430845419, "step": 20240 }, { "loss": 0.0745690643787384, "grad_norm": 0.7897636294364929, "learning_rate": 5.4592371275393494e-05, "epoch": 0.6511882175129434, "step": 20250 }, { "loss": 0.07205396890640259, "grad_norm": 0.4687490463256836, "learning_rate": 5.450209418947652e-05, "epoch": 0.6515097919413448, "step": 20260 }, { "loss": 0.07531830668449402, "grad_norm": 0.5674930214881897, "learning_rate": 5.441186383958632e-05, "epoch": 0.6518313663697463, "step": 20270 }, { "loss": 0.07985269427299499, "grad_norm": 0.686039388179779, "learning_rate": 5.4321680318408695e-05, "epoch": 0.6521529407981478, "step": 20280 }, { "loss": 0.07927957773208619, "grad_norm": 0.54157555103302, "learning_rate": 5.423154371858138e-05, "epoch": 0.6524745152265492, "step": 20290 }, { "loss": 0.07954124212265015, "grad_norm": 0.7790160775184631, "learning_rate": 5.414145413269371e-05, "epoch": 0.6527960896549506, "step": 20300 }, { "loss": 0.07763062715530396, "grad_norm": 0.6327701210975647, "learning_rate": 5.405141165328697e-05, "epoch": 0.6531176640833521, "step": 20310 }, { "loss": 0.07775360345840454, "grad_norm": 0.6861497163772583, "learning_rate": 5.396141637285383e-05, "epoch": 0.6534392385117536, "step": 20320 }, { "loss": 0.059467607736587526, "grad_norm": 0.7230491638183594, "learning_rate": 5.387146838383867e-05, "epoch": 0.653760812940155, "step": 20330 }, { "loss": 0.0787361741065979, "grad_norm": 0.4892818033695221, "learning_rate": 5.378156777863717e-05, "epoch": 0.6540823873685565, "step": 20340 }, { "loss": 0.06754761934280396, "grad_norm": 0.4849034547805786, "learning_rate": 5.369171464959635e-05, "epoch": 0.6544039617969579, "step": 20350 }, { "loss": 0.06928637623786926, "grad_norm": 0.8103519678115845, "learning_rate": 5.360190908901455e-05, "epoch": 0.6547255362253593, "step": 20360 }, { "loss": 0.0871894121170044, "grad_norm": 0.7014930248260498, "learning_rate": 5.35121511891411e-05, "epoch": 0.6550471106537609, "step": 20370 }, { "loss": 0.07648140788078309, "grad_norm": 0.5109164714813232, "learning_rate": 5.3422441042176566e-05, "epoch": 0.6553686850821623, "step": 20380 }, { "loss": 0.0827454686164856, "grad_norm": 0.6321818232536316, "learning_rate": 5.333277874027227e-05, "epoch": 0.6556902595105637, "step": 20390 }, { "loss": 0.07089014649391175, "grad_norm": 0.7621293067932129, "learning_rate": 5.324316437553041e-05, "epoch": 0.6560118339389652, "step": 20400 }, { "loss": 0.09134788513183593, "grad_norm": 0.3434469699859619, "learning_rate": 5.3153598040004146e-05, "epoch": 0.6563334083673666, "step": 20410 }, { "loss": 0.06535201668739318, "grad_norm": 0.6273918151855469, "learning_rate": 5.306407982569704e-05, "epoch": 0.6566549827957681, "step": 20420 }, { "loss": 0.08040034770965576, "grad_norm": 0.2526308596134186, "learning_rate": 5.297460982456344e-05, "epoch": 0.6569765572241696, "step": 20430 }, { "loss": 0.0793336808681488, "grad_norm": 0.807705819606781, "learning_rate": 5.288518812850799e-05, "epoch": 0.657298131652571, "step": 20440 }, { "loss": 0.0994856297969818, "grad_norm": 0.4748459756374359, "learning_rate": 5.279581482938578e-05, "epoch": 0.6576197060809724, "step": 20450 }, { "loss": 0.07574291825294495, "grad_norm": 0.42891186475753784, "learning_rate": 5.270649001900226e-05, "epoch": 0.657941280509374, "step": 20460 }, { "loss": 0.07210445404052734, "grad_norm": 0.7261443138122559, "learning_rate": 5.2617213789112906e-05, "epoch": 0.6582628549377754, "step": 20470 }, { "loss": 0.07575239539146424, "grad_norm": 0.4555186927318573, "learning_rate": 5.2527986231423554e-05, "epoch": 0.6585844293661768, "step": 20480 }, { "loss": 0.08972333669662476, "grad_norm": 0.8295562267303467, "learning_rate": 5.24388074375898e-05, "epoch": 0.6589060037945783, "step": 20490 }, { "loss": 0.07050554752349854, "grad_norm": 0.28110215067863464, "learning_rate": 5.2349677499217185e-05, "epoch": 0.6592275782229797, "step": 20500 }, { "loss": 0.0773880124092102, "grad_norm": 0.49791833758354187, "learning_rate": 5.226059650786122e-05, "epoch": 0.6595491526513811, "step": 20510 }, { "loss": 0.0658577024936676, "grad_norm": 0.4790436625480652, "learning_rate": 5.217156455502692e-05, "epoch": 0.6598707270797827, "step": 20520 }, { "loss": 0.08007944226264954, "grad_norm": 0.3390914797782898, "learning_rate": 5.208258173216909e-05, "epoch": 0.6601923015081841, "step": 20530 }, { "loss": 0.08911491632461548, "grad_norm": 0.6809537410736084, "learning_rate": 5.1993648130692055e-05, "epoch": 0.6605138759365855, "step": 20540 }, { "loss": 0.0683016836643219, "grad_norm": 0.3764163851737976, "learning_rate": 5.1904763841949466e-05, "epoch": 0.660835450364987, "step": 20550 }, { "loss": 0.07006845474243165, "grad_norm": 0.2706121802330017, "learning_rate": 5.181592895724447e-05, "epoch": 0.6611570247933884, "step": 20560 }, { "loss": 0.08022654056549072, "grad_norm": 0.3748409152030945, "learning_rate": 5.172714356782932e-05, "epoch": 0.6614785992217899, "step": 20570 }, { "loss": 0.06296700835227967, "grad_norm": 0.5847156643867493, "learning_rate": 5.163840776490555e-05, "epoch": 0.6618001736501913, "step": 20580 }, { "loss": 0.06332173347473144, "grad_norm": 0.44201889634132385, "learning_rate": 5.154972163962365e-05, "epoch": 0.6621217480785928, "step": 20590 }, { "loss": 0.08053407669067383, "grad_norm": 0.4436214864253998, "learning_rate": 5.1461085283083154e-05, "epoch": 0.6624433225069942, "step": 20600 }, { "loss": 0.0634561836719513, "grad_norm": 0.4250709116458893, "learning_rate": 5.137249878633251e-05, "epoch": 0.6627648969353956, "step": 20610 }, { "loss": 0.07566155791282654, "grad_norm": 0.5983794927597046, "learning_rate": 5.128396224036881e-05, "epoch": 0.6630864713637972, "step": 20620 }, { "loss": 0.05465579628944397, "grad_norm": 0.6458154320716858, "learning_rate": 5.119547573613799e-05, "epoch": 0.6634080457921986, "step": 20630 }, { "loss": 0.07174187898635864, "grad_norm": 0.5902681946754456, "learning_rate": 5.1107039364534436e-05, "epoch": 0.6637296202206, "step": 20640 }, { "loss": 0.07929787039756775, "grad_norm": 0.4368651807308197, "learning_rate": 5.101865321640119e-05, "epoch": 0.6640511946490015, "step": 20650 }, { "loss": 0.07327082753181458, "grad_norm": 0.9439780116081238, "learning_rate": 5.0930317382529555e-05, "epoch": 0.664372769077403, "step": 20660 }, { "loss": 0.09078122973442078, "grad_norm": 0.7786108255386353, "learning_rate": 5.084203195365924e-05, "epoch": 0.6646943435058044, "step": 20670 }, { "loss": 0.07400400042533875, "grad_norm": 0.46253257989883423, "learning_rate": 5.0753797020478235e-05, "epoch": 0.6650159179342059, "step": 20680 }, { "loss": 0.07291633486747742, "grad_norm": 0.9724017381668091, "learning_rate": 5.066561267362251e-05, "epoch": 0.6653374923626073, "step": 20690 }, { "loss": 0.10171788930892944, "grad_norm": 0.5624548196792603, "learning_rate": 5.0577479003676134e-05, "epoch": 0.6656590667910087, "step": 20700 }, { "loss": 0.07935294508934021, "grad_norm": 0.7655509114265442, "learning_rate": 5.048939610117118e-05, "epoch": 0.6659806412194103, "step": 20710 }, { "loss": 0.06642114520072936, "grad_norm": 0.802371621131897, "learning_rate": 5.040136405658743e-05, "epoch": 0.6663022156478117, "step": 20720 }, { "loss": 0.0731515884399414, "grad_norm": 0.5871564149856567, "learning_rate": 5.031338296035266e-05, "epoch": 0.6666237900762131, "step": 20730 }, { "loss": 0.09851167798042297, "grad_norm": 0.6241931319236755, "learning_rate": 5.0225452902842116e-05, "epoch": 0.6669453645046146, "step": 20740 }, { "loss": 0.09080070853233338, "grad_norm": 0.5461302995681763, "learning_rate": 5.013757397437862e-05, "epoch": 0.667266938933016, "step": 20750 }, { "loss": 0.07654562592506409, "grad_norm": 0.7314095497131348, "learning_rate": 5.004974626523258e-05, "epoch": 0.6675885133614174, "step": 20760 }, { "loss": 0.08786712288856506, "grad_norm": 0.6281319260597229, "learning_rate": 4.9961969865621684e-05, "epoch": 0.667910087789819, "step": 20770 }, { "loss": 0.07048113346099853, "grad_norm": 0.4600871801376343, "learning_rate": 4.9874244865711e-05, "epoch": 0.6682316622182204, "step": 20780 }, { "loss": 0.08273769021034241, "grad_norm": 0.746705174446106, "learning_rate": 4.9786571355612786e-05, "epoch": 0.6685532366466218, "step": 20790 }, { "loss": 0.09555152654647828, "grad_norm": 0.4937872886657715, "learning_rate": 4.969894942538632e-05, "epoch": 0.6688748110750233, "step": 20800 }, { "loss": 0.063187974691391, "grad_norm": 0.6522345542907715, "learning_rate": 4.961137916503803e-05, "epoch": 0.6691963855034248, "step": 20810 }, { "loss": 0.08229098320007325, "grad_norm": 0.5910975933074951, "learning_rate": 4.952386066452114e-05, "epoch": 0.6695179599318262, "step": 20820 }, { "loss": 0.060083693265914916, "grad_norm": 0.6340411901473999, "learning_rate": 4.943639401373582e-05, "epoch": 0.6698395343602277, "step": 20830 }, { "loss": 0.07824947834014892, "grad_norm": 0.5943097472190857, "learning_rate": 4.934897930252886e-05, "epoch": 0.6701611087886291, "step": 20840 }, { "loss": 0.08365764021873474, "grad_norm": 0.9509609341621399, "learning_rate": 4.9261616620693804e-05, "epoch": 0.6704826832170305, "step": 20850 }, { "loss": 0.08415056467056274, "grad_norm": 0.4306361675262451, "learning_rate": 4.917430605797074e-05, "epoch": 0.6708042576454321, "step": 20860 }, { "loss": 0.08667734265327454, "grad_norm": 0.6566128134727478, "learning_rate": 4.908704770404611e-05, "epoch": 0.6711258320738335, "step": 20870 }, { "loss": 0.0774792730808258, "grad_norm": 0.6821538209915161, "learning_rate": 4.899984164855289e-05, "epoch": 0.6714474065022349, "step": 20880 }, { "loss": 0.06847630739212036, "grad_norm": 0.4911244511604309, "learning_rate": 4.891268798107016e-05, "epoch": 0.6717689809306364, "step": 20890 }, { "loss": 0.08165109753608704, "grad_norm": 0.5228062868118286, "learning_rate": 4.8825586791123356e-05, "epoch": 0.6720905553590378, "step": 20900 }, { "loss": 0.058252638578414916, "grad_norm": 0.3973667621612549, "learning_rate": 4.8738538168183845e-05, "epoch": 0.6724121297874392, "step": 20910 }, { "loss": 0.08311399221420288, "grad_norm": 0.7493540644645691, "learning_rate": 4.8651542201669134e-05, "epoch": 0.6727337042158408, "step": 20920 }, { "loss": 0.07413286566734315, "grad_norm": 0.4782170057296753, "learning_rate": 4.85645989809426e-05, "epoch": 0.6730552786442422, "step": 20930 }, { "loss": 0.0762550950050354, "grad_norm": 0.7324798703193665, "learning_rate": 4.8477708595313356e-05, "epoch": 0.6733768530726436, "step": 20940 }, { "loss": 0.05731356143951416, "grad_norm": 0.5743696093559265, "learning_rate": 4.8390871134036396e-05, "epoch": 0.6736984275010451, "step": 20950 }, { "loss": 0.08263179659843445, "grad_norm": 0.6200225949287415, "learning_rate": 4.830408668631221e-05, "epoch": 0.6740200019294466, "step": 20960 }, { "loss": 0.08547630310058593, "grad_norm": 0.8113134503364563, "learning_rate": 4.821735534128682e-05, "epoch": 0.674341576357848, "step": 20970 }, { "loss": 0.07483196258544922, "grad_norm": 0.8760203719139099, "learning_rate": 4.8130677188051906e-05, "epoch": 0.6746631507862495, "step": 20980 }, { "loss": 0.0705351173877716, "grad_norm": 0.49309197068214417, "learning_rate": 4.804405231564427e-05, "epoch": 0.6749847252146509, "step": 20990 }, { "loss": 0.05743688344955444, "grad_norm": 0.2787437438964844, "learning_rate": 4.795748081304615e-05, "epoch": 0.6753062996430523, "step": 21000 }, { "loss": 0.07269101142883301, "grad_norm": 0.7541826963424683, "learning_rate": 4.787096276918486e-05, "epoch": 0.6756278740714539, "step": 21010 }, { "loss": 0.09004797935485839, "grad_norm": 1.4871023893356323, "learning_rate": 4.7784498272932777e-05, "epoch": 0.6759494484998553, "step": 21020 }, { "loss": 0.07304201126098633, "grad_norm": 0.3444749712944031, "learning_rate": 4.7698087413107416e-05, "epoch": 0.6762710229282567, "step": 21030 }, { "loss": 0.07011285424232483, "grad_norm": 0.7484362721443176, "learning_rate": 4.761173027847101e-05, "epoch": 0.6765925973566582, "step": 21040 }, { "loss": 0.08315953612327576, "grad_norm": 0.6611018180847168, "learning_rate": 4.7525426957730845e-05, "epoch": 0.6769141717850596, "step": 21050 }, { "loss": 0.0854596197605133, "grad_norm": 0.5237786173820496, "learning_rate": 4.743917753953871e-05, "epoch": 0.677235746213461, "step": 21060 }, { "loss": 0.07612773776054382, "grad_norm": 0.8620582818984985, "learning_rate": 4.7352982112491074e-05, "epoch": 0.6775573206418626, "step": 21070 }, { "loss": 0.06872981786727905, "grad_norm": 0.9304988384246826, "learning_rate": 4.726684076512904e-05, "epoch": 0.677878895070264, "step": 21080 }, { "loss": 0.09908568859100342, "grad_norm": 0.7491776943206787, "learning_rate": 4.718075358593802e-05, "epoch": 0.6782004694986654, "step": 21090 }, { "loss": 0.07195566296577453, "grad_norm": 0.506563127040863, "learning_rate": 4.70947206633479e-05, "epoch": 0.6785220439270669, "step": 21100 }, { "loss": 0.0828722059726715, "grad_norm": 0.5606396794319153, "learning_rate": 4.700874208573284e-05, "epoch": 0.6788436183554684, "step": 21110 }, { "loss": 0.08981196284294128, "grad_norm": 0.8274250030517578, "learning_rate": 4.692281794141102e-05, "epoch": 0.6791651927838698, "step": 21120 }, { "loss": 0.07490351200103759, "grad_norm": 0.6135382652282715, "learning_rate": 4.683694831864492e-05, "epoch": 0.6794867672122713, "step": 21130 }, { "loss": 0.08893336653709412, "grad_norm": 0.43025079369544983, "learning_rate": 4.675113330564082e-05, "epoch": 0.6798083416406727, "step": 21140 }, { "loss": 0.06783245205879211, "grad_norm": 0.8443710207939148, "learning_rate": 4.666537299054906e-05, "epoch": 0.6801299160690741, "step": 21150 }, { "loss": 0.08298020958900451, "grad_norm": 0.7228168249130249, "learning_rate": 4.657966746146366e-05, "epoch": 0.6804514904974757, "step": 21160 }, { "loss": 0.07848789095878601, "grad_norm": 0.6177375912666321, "learning_rate": 4.649401680642246e-05, "epoch": 0.6807730649258771, "step": 21170 }, { "loss": 0.07725452184677124, "grad_norm": 0.8629878163337708, "learning_rate": 4.640842111340694e-05, "epoch": 0.6810946393542785, "step": 21180 }, { "loss": 0.08047922849655151, "grad_norm": 0.865945041179657, "learning_rate": 4.632288047034201e-05, "epoch": 0.68141621378268, "step": 21190 }, { "loss": 0.056549960374832155, "grad_norm": 0.5094005465507507, "learning_rate": 4.623739496509618e-05, "epoch": 0.6817377882110814, "step": 21200 }, { "loss": 0.06563356518745422, "grad_norm": 0.5315867066383362, "learning_rate": 4.6151964685481175e-05, "epoch": 0.6820593626394829, "step": 21210 }, { "loss": 0.08418481349945069, "grad_norm": 0.7525455951690674, "learning_rate": 4.606658971925213e-05, "epoch": 0.6823809370678844, "step": 21220 }, { "loss": 0.07750657796859742, "grad_norm": 0.8312000632286072, "learning_rate": 4.598127015410723e-05, "epoch": 0.6827025114962858, "step": 21230 }, { "loss": 0.0824580192565918, "grad_norm": 0.6511006951332092, "learning_rate": 4.5896006077687835e-05, "epoch": 0.6830240859246872, "step": 21240 }, { "loss": 0.10098731517791748, "grad_norm": 0.3902733027935028, "learning_rate": 4.581079757757835e-05, "epoch": 0.6833456603530887, "step": 21250 }, { "loss": 0.07251612544059753, "grad_norm": 0.6815239787101746, "learning_rate": 4.572564474130593e-05, "epoch": 0.6836672347814902, "step": 21260 }, { "loss": 0.07855257987976075, "grad_norm": 0.7673274278640747, "learning_rate": 4.564054765634074e-05, "epoch": 0.6839888092098916, "step": 21270 }, { "loss": 0.07359289526939392, "grad_norm": 0.3823041617870331, "learning_rate": 4.555550641009555e-05, "epoch": 0.6843103836382931, "step": 21280 }, { "loss": 0.07962161302566528, "grad_norm": 0.39447155594825745, "learning_rate": 4.5470521089925736e-05, "epoch": 0.6846319580666945, "step": 21290 }, { "loss": 0.07411338686943054, "grad_norm": 0.42518749833106995, "learning_rate": 4.5385591783129424e-05, "epoch": 0.6849535324950959, "step": 21300 }, { "loss": 0.07336281538009644, "grad_norm": 0.5015262365341187, "learning_rate": 4.5300718576946986e-05, "epoch": 0.6852751069234975, "step": 21310 }, { "loss": 0.07329775094985962, "grad_norm": 0.45550772547721863, "learning_rate": 4.521590155856133e-05, "epoch": 0.6855966813518989, "step": 21320 }, { "loss": 0.07568760514259339, "grad_norm": 0.5514888763427734, "learning_rate": 4.513114081509755e-05, "epoch": 0.6859182557803003, "step": 21330 }, { "loss": 0.0703023374080658, "grad_norm": 0.5648098587989807, "learning_rate": 4.50464364336229e-05, "epoch": 0.6862398302087018, "step": 21340 }, { "loss": 0.07627668976783752, "grad_norm": 0.5147767066955566, "learning_rate": 4.4961788501146875e-05, "epoch": 0.6865614046371032, "step": 21350 }, { "loss": 0.07581337094306946, "grad_norm": 0.5529208183288574, "learning_rate": 4.487719710462083e-05, "epoch": 0.6868829790655047, "step": 21360 }, { "loss": 0.08803019523620606, "grad_norm": 0.47467994689941406, "learning_rate": 4.4792662330938176e-05, "epoch": 0.6872045534939062, "step": 21370 }, { "loss": 0.06819509267807007, "grad_norm": 0.5418660640716553, "learning_rate": 4.470818426693413e-05, "epoch": 0.6875261279223076, "step": 21380 }, { "loss": 0.08300604224205017, "grad_norm": 0.41477343440055847, "learning_rate": 4.462376299938555e-05, "epoch": 0.687847702350709, "step": 21390 }, { "loss": 0.10577502250671386, "grad_norm": 0.9017356634140015, "learning_rate": 4.453939861501113e-05, "epoch": 0.6881692767791106, "step": 21400 }, { "loss": 0.08897504210472107, "grad_norm": 0.7038023471832275, "learning_rate": 4.445509120047094e-05, "epoch": 0.688490851207512, "step": 21410 }, { "loss": 0.06395829319953919, "grad_norm": 0.7179198861122131, "learning_rate": 4.437084084236669e-05, "epoch": 0.6888124256359134, "step": 21420 }, { "loss": 0.08118244409561157, "grad_norm": 0.6487843990325928, "learning_rate": 4.428664762724144e-05, "epoch": 0.6891340000643149, "step": 21430 }, { "loss": 0.07925102710723878, "grad_norm": 0.4036339223384857, "learning_rate": 4.420251164157945e-05, "epoch": 0.6894555744927163, "step": 21440 }, { "loss": 0.08218384385108948, "grad_norm": 0.7086281776428223, "learning_rate": 4.411843297180636e-05, "epoch": 0.6897771489211177, "step": 21450 }, { "loss": 0.07362968921661377, "grad_norm": 0.7492254972457886, "learning_rate": 4.403441170428877e-05, "epoch": 0.6900987233495193, "step": 21460 }, { "loss": 0.07590994834899903, "grad_norm": 0.4599405527114868, "learning_rate": 4.395044792533446e-05, "epoch": 0.6904202977779207, "step": 21470 }, { "loss": 0.07032697200775147, "grad_norm": 0.5740514993667603, "learning_rate": 4.386654172119204e-05, "epoch": 0.6907418722063221, "step": 21480 }, { "loss": 0.08793300986289979, "grad_norm": 0.6255429983139038, "learning_rate": 4.3782693178051026e-05, "epoch": 0.6910634466347236, "step": 21490 }, { "loss": 0.07888756394386291, "grad_norm": 0.4935165345668793, "learning_rate": 4.369890238204177e-05, "epoch": 0.691385021063125, "step": 21500 }, { "loss": 0.07613331079483032, "grad_norm": 0.8250544667243958, "learning_rate": 4.361516941923518e-05, "epoch": 0.6917065954915265, "step": 21510 }, { "loss": 0.06617268323898315, "grad_norm": 0.5559676289558411, "learning_rate": 4.3531494375642854e-05, "epoch": 0.692028169919928, "step": 21520 }, { "loss": 0.08175877332687378, "grad_norm": 0.5441673994064331, "learning_rate": 4.3447877337216825e-05, "epoch": 0.6923497443483294, "step": 21530 }, { "loss": 0.11042051315307617, "grad_norm": 0.8127007484436035, "learning_rate": 4.3364318389849635e-05, "epoch": 0.6926713187767308, "step": 21540 }, { "loss": 0.06440520882606507, "grad_norm": 0.5828354954719543, "learning_rate": 4.328081761937401e-05, "epoch": 0.6929928932051324, "step": 21550 }, { "loss": 0.06377939581871032, "grad_norm": 0.41448041796684265, "learning_rate": 4.319737511156307e-05, "epoch": 0.6933144676335338, "step": 21560 }, { "loss": 0.08158342242240905, "grad_norm": 0.8715920448303223, "learning_rate": 4.3113990952130024e-05, "epoch": 0.6936360420619352, "step": 21570 }, { "loss": 0.07553659677505493, "grad_norm": 0.7690626382827759, "learning_rate": 4.303066522672812e-05, "epoch": 0.6939576164903367, "step": 21580 }, { "loss": 0.06266080141067505, "grad_norm": 1.2075152397155762, "learning_rate": 4.2947398020950556e-05, "epoch": 0.6942791909187381, "step": 21590 }, { "loss": 0.07179356813430786, "grad_norm": 0.7406636476516724, "learning_rate": 4.2864189420330544e-05, "epoch": 0.6946007653471395, "step": 21600 }, { "loss": 0.06792376041412354, "grad_norm": 0.717032790184021, "learning_rate": 4.278103951034092e-05, "epoch": 0.6949223397755411, "step": 21610 }, { "loss": 0.06622045636177062, "grad_norm": 0.5181135535240173, "learning_rate": 4.269794837639444e-05, "epoch": 0.6952439142039425, "step": 21620 }, { "loss": 0.08370713591575622, "grad_norm": 0.9519104361534119, "learning_rate": 4.261491610384331e-05, "epoch": 0.6955654886323439, "step": 21630 }, { "loss": 0.07882735133171082, "grad_norm": 0.6479547619819641, "learning_rate": 4.2531942777979325e-05, "epoch": 0.6958870630607454, "step": 21640 }, { "loss": 0.059241306781768796, "grad_norm": 0.4295456111431122, "learning_rate": 4.244902848403378e-05, "epoch": 0.6962086374891469, "step": 21650 }, { "loss": 0.06421804428100586, "grad_norm": 0.28864648938179016, "learning_rate": 4.236617330717725e-05, "epoch": 0.6965302119175483, "step": 21660 }, { "loss": 0.0704532504081726, "grad_norm": 0.639294445514679, "learning_rate": 4.228337733251962e-05, "epoch": 0.6968517863459498, "step": 21670 }, { "loss": 0.05593020915985107, "grad_norm": 0.6021806001663208, "learning_rate": 4.220064064511004e-05, "epoch": 0.6971733607743512, "step": 21680 }, { "loss": 0.07564791440963745, "grad_norm": 0.6179336309432983, "learning_rate": 4.2117963329936597e-05, "epoch": 0.6974949352027526, "step": 21690 }, { "loss": 0.06977922320365906, "grad_norm": 0.45775020122528076, "learning_rate": 4.2035345471926545e-05, "epoch": 0.6978165096311542, "step": 21700 }, { "loss": 0.07761858105659485, "grad_norm": 0.34109559655189514, "learning_rate": 4.195278715594595e-05, "epoch": 0.6981380840595556, "step": 21710 }, { "loss": 0.05580984354019165, "grad_norm": 0.3880707323551178, "learning_rate": 4.1870288466799815e-05, "epoch": 0.698459658487957, "step": 21720 }, { "loss": 0.0565685510635376, "grad_norm": 0.27682071924209595, "learning_rate": 4.178784948923177e-05, "epoch": 0.6987812329163585, "step": 21730 }, { "loss": 0.07089447379112243, "grad_norm": 0.8211534023284912, "learning_rate": 4.170547030792422e-05, "epoch": 0.6991028073447599, "step": 21740 }, { "loss": 0.07182880640029907, "grad_norm": 0.33115750551223755, "learning_rate": 4.162315100749815e-05, "epoch": 0.6994243817731614, "step": 21750 }, { "eval_loss": 0.07118770480155945, "eval_runtime": 35.4323, "eval_samples_per_second": 141.848, "eval_steps_per_second": 35.476, "epoch": 0.6996173264302022, "step": 21756 }, { "loss": 0.07729780077934265, "grad_norm": 0.9225740432739258, "learning_rate": 4.1540891672512904e-05, "epoch": 0.6997459562015629, "step": 21760 }, { "loss": 0.07148987650871277, "grad_norm": 0.6877495646476746, "learning_rate": 4.14586923874664e-05, "epoch": 0.7000675306299643, "step": 21770 }, { "loss": 0.07752482295036316, "grad_norm": 0.5832339525222778, "learning_rate": 4.137655323679471e-05, "epoch": 0.7003891050583657, "step": 21780 }, { "loss": 0.06601099967956543, "grad_norm": 0.5884093046188354, "learning_rate": 4.129447430487229e-05, "epoch": 0.7007106794867672, "step": 21790 }, { "loss": 0.08792551755905151, "grad_norm": 0.8232205510139465, "learning_rate": 4.1212455676011583e-05, "epoch": 0.7010322539151687, "step": 21800 }, { "loss": 0.056697070598602295, "grad_norm": 0.6312450766563416, "learning_rate": 4.11304974344632e-05, "epoch": 0.7013538283435701, "step": 21810 }, { "loss": 0.06890648007392883, "grad_norm": 0.6079735159873962, "learning_rate": 4.104859966441574e-05, "epoch": 0.7016754027719716, "step": 21820 }, { "loss": 0.08191646933555603, "grad_norm": 0.9938222169876099, "learning_rate": 4.096676244999554e-05, "epoch": 0.701996977200373, "step": 21830 }, { "loss": 0.08274092078208924, "grad_norm": 0.509135365486145, "learning_rate": 4.0884985875266925e-05, "epoch": 0.7023185516287744, "step": 21840 }, { "loss": 0.07548218369483947, "grad_norm": 0.6436309218406677, "learning_rate": 4.0803270024231763e-05, "epoch": 0.702640126057176, "step": 21850 }, { "loss": 0.08233748078346252, "grad_norm": 0.7960435748100281, "learning_rate": 4.0721614980829606e-05, "epoch": 0.7029617004855774, "step": 21860 }, { "loss": 0.08757339119911194, "grad_norm": 0.5708218216896057, "learning_rate": 4.064002082893758e-05, "epoch": 0.7032832749139788, "step": 21870 }, { "loss": 0.08269885182380676, "grad_norm": 0.7067870497703552, "learning_rate": 4.055848765237021e-05, "epoch": 0.7036048493423803, "step": 21880 }, { "loss": 0.08364191055297851, "grad_norm": 0.6864334940910339, "learning_rate": 4.047701553487948e-05, "epoch": 0.7039264237707817, "step": 21890 }, { "loss": 0.06692585349082947, "grad_norm": 0.5575340986251831, "learning_rate": 4.039560456015453e-05, "epoch": 0.7042479981991832, "step": 21900 }, { "loss": 0.07891395688056946, "grad_norm": 0.8325002193450928, "learning_rate": 4.031425481182173e-05, "epoch": 0.7045695726275847, "step": 21910 }, { "loss": 0.07879780530929566, "grad_norm": 0.7286157011985779, "learning_rate": 4.023296637344462e-05, "epoch": 0.7048911470559861, "step": 21920 }, { "loss": 0.07090092897415161, "grad_norm": 0.6933513879776001, "learning_rate": 4.015173932852363e-05, "epoch": 0.7052127214843875, "step": 21930 }, { "loss": 0.08005400896072387, "grad_norm": 0.7384825944900513, "learning_rate": 4.007057376049634e-05, "epoch": 0.705534295912789, "step": 21940 }, { "loss": 0.07087463140487671, "grad_norm": 0.531400740146637, "learning_rate": 3.998946975273699e-05, "epoch": 0.7058558703411905, "step": 21950 }, { "loss": 0.06927424073219299, "grad_norm": 0.5209558010101318, "learning_rate": 3.9908427388556625e-05, "epoch": 0.7061774447695919, "step": 21960 }, { "loss": 0.0795640230178833, "grad_norm": 0.2959861755371094, "learning_rate": 3.982744675120304e-05, "epoch": 0.7064990191979934, "step": 21970 }, { "loss": 0.09337790012359619, "grad_norm": 0.8974547982215881, "learning_rate": 3.97465279238605e-05, "epoch": 0.7068205936263948, "step": 21980 }, { "loss": 0.08562746047973632, "grad_norm": 0.8576725721359253, "learning_rate": 3.9665670989649904e-05, "epoch": 0.7071421680547962, "step": 21990 }, { "loss": 0.097445809841156, "grad_norm": 0.673926830291748, "learning_rate": 3.958487603162856e-05, "epoch": 0.7074637424831978, "step": 22000 }, { "loss": 0.0739307940006256, "grad_norm": 0.6029888987541199, "learning_rate": 3.9504143132789997e-05, "epoch": 0.7077853169115992, "step": 22010 }, { "loss": 0.06431577801704406, "grad_norm": 0.26620832085609436, "learning_rate": 3.942347237606413e-05, "epoch": 0.7081068913400006, "step": 22020 }, { "loss": 0.0735122561454773, "grad_norm": 0.5332812070846558, "learning_rate": 3.9342863844316944e-05, "epoch": 0.7084284657684021, "step": 22030 }, { "loss": 0.06432998776435853, "grad_norm": 0.34079718589782715, "learning_rate": 3.92623176203506e-05, "epoch": 0.7087500401968035, "step": 22040 }, { "loss": 0.05873666405677795, "grad_norm": 0.5827487111091614, "learning_rate": 3.918183378690313e-05, "epoch": 0.709071614625205, "step": 22050 }, { "loss": 0.0894324004650116, "grad_norm": 0.8233846426010132, "learning_rate": 3.9101412426648596e-05, "epoch": 0.7093931890536065, "step": 22060 }, { "loss": 0.06385500431060791, "grad_norm": 0.5678383708000183, "learning_rate": 3.902105362219687e-05, "epoch": 0.7097147634820079, "step": 22070 }, { "loss": 0.06556373834609985, "grad_norm": 0.5707252025604248, "learning_rate": 3.8940757456093456e-05, "epoch": 0.7100363379104093, "step": 22080 }, { "loss": 0.06791006922721862, "grad_norm": 0.5018502473831177, "learning_rate": 3.8860524010819684e-05, "epoch": 0.7103579123388108, "step": 22090 }, { "loss": 0.09631652235984803, "grad_norm": 0.49287688732147217, "learning_rate": 3.878035336879229e-05, "epoch": 0.7106794867672123, "step": 22100 }, { "loss": 0.06533873677253724, "grad_norm": 0.5646507143974304, "learning_rate": 3.870024561236365e-05, "epoch": 0.7110010611956137, "step": 22110 }, { "loss": 0.06178664565086365, "grad_norm": 0.1747838854789734, "learning_rate": 3.86202008238214e-05, "epoch": 0.7113226356240152, "step": 22120 }, { "loss": 0.12747459411621093, "grad_norm": 0.631783127784729, "learning_rate": 3.854021908538857e-05, "epoch": 0.7116442100524166, "step": 22130 }, { "loss": 0.08871740698814393, "grad_norm": 0.5668731331825256, "learning_rate": 3.84603004792235e-05, "epoch": 0.711965784480818, "step": 22140 }, { "loss": 0.08891339898109436, "grad_norm": 0.673675537109375, "learning_rate": 3.8380445087419505e-05, "epoch": 0.7122873589092196, "step": 22150 }, { "loss": 0.07999474406242371, "grad_norm": 0.5109632015228271, "learning_rate": 3.8300652992005116e-05, "epoch": 0.712608933337621, "step": 22160 }, { "loss": 0.07962061762809754, "grad_norm": 0.7328252196311951, "learning_rate": 3.822092427494377e-05, "epoch": 0.7129305077660224, "step": 22170 }, { "loss": 0.07219462394714356, "grad_norm": 0.625394880771637, "learning_rate": 3.814125901813374e-05, "epoch": 0.7132520821944239, "step": 22180 }, { "loss": 0.07665095329284669, "grad_norm": 0.7968199253082275, "learning_rate": 3.8061657303408324e-05, "epoch": 0.7135736566228253, "step": 22190 }, { "loss": 0.06330210566520691, "grad_norm": 0.5527894496917725, "learning_rate": 3.798211921253533e-05, "epoch": 0.7138952310512268, "step": 22200 }, { "loss": 0.08288524746894836, "grad_norm": 0.228219136595726, "learning_rate": 3.790264482721735e-05, "epoch": 0.7142168054796283, "step": 22210 }, { "loss": 0.06985173225402833, "grad_norm": 0.6191315054893494, "learning_rate": 3.7823234229091445e-05, "epoch": 0.7145383799080297, "step": 22220 }, { "loss": 0.077042555809021, "grad_norm": 0.5698592066764832, "learning_rate": 3.774388749972916e-05, "epoch": 0.7148599543364311, "step": 22230 }, { "loss": 0.07586207985877991, "grad_norm": 0.8893696069717407, "learning_rate": 3.766460472063653e-05, "epoch": 0.7151815287648327, "step": 22240 }, { "loss": 0.0670695424079895, "grad_norm": 1.1401852369308472, "learning_rate": 3.758538597325377e-05, "epoch": 0.7155031031932341, "step": 22250 }, { "loss": 0.07725749015808106, "grad_norm": 0.7471514940261841, "learning_rate": 3.7506231338955414e-05, "epoch": 0.7158246776216355, "step": 22260 }, { "loss": 0.07863717675209045, "grad_norm": 0.543266773223877, "learning_rate": 3.742714089905014e-05, "epoch": 0.716146252050037, "step": 22270 }, { "loss": 0.06630213856697083, "grad_norm": 0.24376331269741058, "learning_rate": 3.734811473478059e-05, "epoch": 0.7164678264784384, "step": 22280 }, { "loss": 0.05101625919342041, "grad_norm": 0.8426804542541504, "learning_rate": 3.7269152927323506e-05, "epoch": 0.7167894009068398, "step": 22290 }, { "loss": 0.06777915954589844, "grad_norm": 0.41970622539520264, "learning_rate": 3.71902555577894e-05, "epoch": 0.7171109753352414, "step": 22300 }, { "loss": 0.06783168911933898, "grad_norm": 0.6039134860038757, "learning_rate": 3.711142270722268e-05, "epoch": 0.7174325497636428, "step": 22310 }, { "loss": 0.05689939856529236, "grad_norm": 0.609046220779419, "learning_rate": 3.703265445660148e-05, "epoch": 0.7177541241920442, "step": 22320 }, { "loss": 0.07239497303962708, "grad_norm": 0.654071569442749, "learning_rate": 3.695395088683749e-05, "epoch": 0.7180756986204457, "step": 22330 }, { "loss": 0.07121925354003907, "grad_norm": 0.5291280150413513, "learning_rate": 3.687531207877607e-05, "epoch": 0.7183972730488472, "step": 22340 }, { "loss": 0.07519608736038208, "grad_norm": 0.7853771448135376, "learning_rate": 3.679673811319596e-05, "epoch": 0.7187188474772486, "step": 22350 }, { "loss": 0.08853870034217834, "grad_norm": 0.3993820250034332, "learning_rate": 3.671822907080938e-05, "epoch": 0.7190404219056501, "step": 22360 }, { "loss": 0.07715902328491211, "grad_norm": 0.8283431529998779, "learning_rate": 3.6639785032261764e-05, "epoch": 0.7193619963340515, "step": 22370 }, { "loss": 0.07304844856262208, "grad_norm": 0.7513096332550049, "learning_rate": 3.6561406078131845e-05, "epoch": 0.7196835707624529, "step": 22380 }, { "loss": 0.07067491412162781, "grad_norm": 0.9522845149040222, "learning_rate": 3.648309228893152e-05, "epoch": 0.7200051451908545, "step": 22390 }, { "loss": 0.07188115119934083, "grad_norm": 0.574332058429718, "learning_rate": 3.640484374510564e-05, "epoch": 0.7203267196192559, "step": 22400 }, { "loss": 0.0681536316871643, "grad_norm": 0.29085707664489746, "learning_rate": 3.632666052703218e-05, "epoch": 0.7206482940476573, "step": 22410 }, { "loss": 0.07831199765205384, "grad_norm": 0.7932929396629333, "learning_rate": 3.624854271502186e-05, "epoch": 0.7209698684760588, "step": 22420 }, { "loss": 0.07898212671279907, "grad_norm": 0.5751041769981384, "learning_rate": 3.6170490389318346e-05, "epoch": 0.7212914429044602, "step": 22430 }, { "loss": 0.09148269891738892, "grad_norm": 0.6542364358901978, "learning_rate": 3.609250363009793e-05, "epoch": 0.7216130173328617, "step": 22440 }, { "loss": 0.08846168518066407, "grad_norm": 0.705448567867279, "learning_rate": 3.601458251746962e-05, "epoch": 0.7219345917612632, "step": 22450 }, { "loss": 0.0739761769771576, "grad_norm": 0.47682419419288635, "learning_rate": 3.593672713147501e-05, "epoch": 0.7222561661896646, "step": 22460 }, { "loss": 0.06625583171844482, "grad_norm": 0.896597146987915, "learning_rate": 3.585893755208811e-05, "epoch": 0.722577740618066, "step": 22470 }, { "loss": 0.08493435978889466, "grad_norm": 0.48587241768836975, "learning_rate": 3.578121385921533e-05, "epoch": 0.7228993150464675, "step": 22480 }, { "loss": 0.06791111230850219, "grad_norm": 0.3798055052757263, "learning_rate": 3.570355613269551e-05, "epoch": 0.723220889474869, "step": 22490 }, { "loss": 0.07332050800323486, "grad_norm": 0.6129491329193115, "learning_rate": 3.562596445229954e-05, "epoch": 0.7235424639032704, "step": 22500 }, { "loss": 0.06496210098266601, "grad_norm": 0.512057363986969, "learning_rate": 3.5548438897730726e-05, "epoch": 0.7238640383316719, "step": 22510 }, { "loss": 0.07144524455070496, "grad_norm": 0.854631781578064, "learning_rate": 3.547097954862422e-05, "epoch": 0.7241856127600733, "step": 22520 }, { "loss": 0.06772378087043762, "grad_norm": 0.6502050161361694, "learning_rate": 3.5393586484547225e-05, "epoch": 0.7245071871884747, "step": 22530 }, { "loss": 0.06079759001731873, "grad_norm": 0.721480667591095, "learning_rate": 3.531625978499895e-05, "epoch": 0.7248287616168763, "step": 22540 }, { "loss": 0.11076893806457519, "grad_norm": 0.5731940865516663, "learning_rate": 3.52389995294103e-05, "epoch": 0.7251503360452777, "step": 22550 }, { "loss": 0.07782159447669983, "grad_norm": 0.5176689624786377, "learning_rate": 3.5161805797144035e-05, "epoch": 0.7254719104736791, "step": 22560 }, { "loss": 0.08271768689155579, "grad_norm": 0.8279057145118713, "learning_rate": 3.508467866749449e-05, "epoch": 0.7257934849020806, "step": 22570 }, { "loss": 0.09557575583457947, "grad_norm": 0.969346821308136, "learning_rate": 3.500761821968767e-05, "epoch": 0.726115059330482, "step": 22580 }, { "loss": 0.06104801893234253, "grad_norm": 0.49495741724967957, "learning_rate": 3.4930624532881064e-05, "epoch": 0.7264366337588835, "step": 22590 }, { "loss": 0.08584245443344116, "grad_norm": 0.7284782528877258, "learning_rate": 3.48536976861635e-05, "epoch": 0.726758208187285, "step": 22600 }, { "loss": 0.0828737735748291, "grad_norm": 0.6159257292747498, "learning_rate": 3.4776837758555284e-05, "epoch": 0.7270797826156864, "step": 22610 }, { "loss": 0.0713537335395813, "grad_norm": 0.4589337110519409, "learning_rate": 3.470004482900785e-05, "epoch": 0.7274013570440878, "step": 22620 }, { "loss": 0.06789127588272095, "grad_norm": 0.6434908509254456, "learning_rate": 3.4623318976403895e-05, "epoch": 0.7277229314724893, "step": 22630 }, { "loss": 0.06783631443977356, "grad_norm": 0.7492509484291077, "learning_rate": 3.454666027955722e-05, "epoch": 0.7280445059008908, "step": 22640 }, { "loss": 0.06551461815834045, "grad_norm": 0.4112277925014496, "learning_rate": 3.447006881721256e-05, "epoch": 0.7283660803292922, "step": 22650 }, { "loss": 0.07875913977622986, "grad_norm": 0.5258214473724365, "learning_rate": 3.43935446680457e-05, "epoch": 0.7286876547576937, "step": 22660 }, { "loss": 0.09203838109970093, "grad_norm": 0.7541487812995911, "learning_rate": 3.431708791066313e-05, "epoch": 0.7290092291860951, "step": 22670 }, { "loss": 0.06775216460227966, "grad_norm": 0.5672399401664734, "learning_rate": 3.42406986236023e-05, "epoch": 0.7293308036144965, "step": 22680 }, { "loss": 0.068998521566391, "grad_norm": 0.562434196472168, "learning_rate": 3.4164376885331165e-05, "epoch": 0.7296523780428981, "step": 22690 }, { "loss": 0.08524279594421387, "grad_norm": 0.7267500162124634, "learning_rate": 3.408812277424843e-05, "epoch": 0.7299739524712995, "step": 22700 }, { "loss": 0.07056506872177123, "grad_norm": 0.5633522868156433, "learning_rate": 3.401193636868332e-05, "epoch": 0.7302955268997009, "step": 22710 }, { "loss": 0.07487800121307372, "grad_norm": 0.5191984176635742, "learning_rate": 3.393581774689541e-05, "epoch": 0.7306171013281024, "step": 22720 }, { "loss": 0.05978419780731201, "grad_norm": 0.38137316703796387, "learning_rate": 3.385976698707478e-05, "epoch": 0.7309386757565038, "step": 22730 }, { "loss": 0.08034716248512268, "grad_norm": 0.7938235998153687, "learning_rate": 3.3783784167341716e-05, "epoch": 0.7312602501849053, "step": 22740 }, { "loss": 0.062334203720092775, "grad_norm": 0.49491098523139954, "learning_rate": 3.3707869365746683e-05, "epoch": 0.7315818246133068, "step": 22750 }, { "loss": 0.09403547048568725, "grad_norm": 0.5901187658309937, "learning_rate": 3.363202266027037e-05, "epoch": 0.7319033990417082, "step": 22760 }, { "loss": 0.06843697428703308, "grad_norm": 0.421527236700058, "learning_rate": 3.3556244128823475e-05, "epoch": 0.7322249734701096, "step": 22770 }, { "loss": 0.076862633228302, "grad_norm": 0.558758556842804, "learning_rate": 3.348053384924671e-05, "epoch": 0.7325465478985111, "step": 22780 }, { "loss": 0.060287344455719, "grad_norm": 0.3895277678966522, "learning_rate": 3.3404891899310585e-05, "epoch": 0.7328681223269126, "step": 22790 }, { "loss": 0.07556776404380798, "grad_norm": 0.5408258438110352, "learning_rate": 3.3329318356715466e-05, "epoch": 0.733189696755314, "step": 22800 }, { "loss": 0.07142072319984435, "grad_norm": 0.565141499042511, "learning_rate": 3.325381329909149e-05, "epoch": 0.7335112711837155, "step": 22810 }, { "loss": 0.084370356798172, "grad_norm": 0.7071492075920105, "learning_rate": 3.317837680399834e-05, "epoch": 0.7338328456121169, "step": 22820 }, { "loss": 0.09431569576263428, "grad_norm": 0.657227635383606, "learning_rate": 3.310300894892546e-05, "epoch": 0.7341544200405183, "step": 22830 }, { "loss": 0.08125004172325134, "grad_norm": 0.3469249904155731, "learning_rate": 3.302770981129162e-05, "epoch": 0.7344759944689199, "step": 22840 }, { "loss": 0.056107521057128906, "grad_norm": 0.5997012257575989, "learning_rate": 3.295247946844499e-05, "epoch": 0.7347975688973213, "step": 22850 }, { "loss": 0.07729174494743347, "grad_norm": 0.619046151638031, "learning_rate": 3.2877317997663206e-05, "epoch": 0.7351191433257227, "step": 22860 }, { "loss": 0.09256871342658997, "grad_norm": 0.5356802344322205, "learning_rate": 3.2802225476153036e-05, "epoch": 0.7354407177541242, "step": 22870 }, { "loss": 0.061709392070770266, "grad_norm": 0.34582701325416565, "learning_rate": 3.272720198105049e-05, "epoch": 0.7357622921825256, "step": 22880 }, { "loss": 0.08347746729850769, "grad_norm": 0.36255910992622375, "learning_rate": 3.265224758942069e-05, "epoch": 0.7360838666109271, "step": 22890 }, { "loss": 0.0869390308856964, "grad_norm": 0.4565773606300354, "learning_rate": 3.257736237825768e-05, "epoch": 0.7364054410393286, "step": 22900 }, { "loss": 0.06486717462539673, "grad_norm": 0.4815742075443268, "learning_rate": 3.250254642448457e-05, "epoch": 0.73672701546773, "step": 22910 }, { "loss": 0.05989484190940857, "grad_norm": 0.39747703075408936, "learning_rate": 3.242779980495318e-05, "epoch": 0.7370485898961314, "step": 22920 }, { "loss": 0.07348278164863586, "grad_norm": 0.7308650016784668, "learning_rate": 3.235312259644426e-05, "epoch": 0.737370164324533, "step": 22930 }, { "loss": 0.07345086336135864, "grad_norm": 0.8181717395782471, "learning_rate": 3.2278514875667124e-05, "epoch": 0.7376917387529344, "step": 22940 }, { "loss": 0.08308499455451965, "grad_norm": 0.6992674469947815, "learning_rate": 3.220397671925979e-05, "epoch": 0.7380133131813358, "step": 22950 }, { "loss": 0.08991779088973999, "grad_norm": 0.6428148150444031, "learning_rate": 3.2129508203788836e-05, "epoch": 0.7383348876097373, "step": 22960 }, { "loss": 0.06239253878593445, "grad_norm": 0.564574122428894, "learning_rate": 3.2055109405749214e-05, "epoch": 0.7386564620381387, "step": 22970 }, { "loss": 0.07732295989990234, "grad_norm": 0.6411125659942627, "learning_rate": 3.1980780401564384e-05, "epoch": 0.7389780364665401, "step": 22980 }, { "loss": 0.07028377056121826, "grad_norm": 0.5637052059173584, "learning_rate": 3.1906521267585956e-05, "epoch": 0.7392996108949417, "step": 22990 }, { "loss": 0.07760335206985473, "grad_norm": 0.591373860836029, "learning_rate": 3.183233208009393e-05, "epoch": 0.7396211853233431, "step": 23000 }, { "loss": 0.06789370179176331, "grad_norm": 0.7457368969917297, "learning_rate": 3.175821291529632e-05, "epoch": 0.7399427597517445, "step": 23010 }, { "loss": 0.08317228555679321, "grad_norm": 0.7319291830062866, "learning_rate": 3.16841638493293e-05, "epoch": 0.740264334180146, "step": 23020 }, { "loss": 0.07127461433410645, "grad_norm": 0.7333948612213135, "learning_rate": 3.161018495825705e-05, "epoch": 0.7405859086085474, "step": 23030 }, { "loss": 0.0749741554260254, "grad_norm": 0.6388927102088928, "learning_rate": 3.1536276318071564e-05, "epoch": 0.7409074830369489, "step": 23040 }, { "loss": 0.06397749185562134, "grad_norm": 0.2538682520389557, "learning_rate": 3.1462438004692796e-05, "epoch": 0.7412290574653504, "step": 23050 }, { "loss": 0.06646652817726136, "grad_norm": 0.5968759059906006, "learning_rate": 3.138867009396836e-05, "epoch": 0.7415506318937518, "step": 23060 }, { "loss": 0.05975983738899231, "grad_norm": 0.4150048494338989, "learning_rate": 3.131497266167357e-05, "epoch": 0.7418722063221532, "step": 23070 }, { "loss": 0.06749844551086426, "grad_norm": 0.5127459168434143, "learning_rate": 3.12413457835114e-05, "epoch": 0.7421937807505548, "step": 23080 }, { "loss": 0.06747809648513795, "grad_norm": 1.4364615678787231, "learning_rate": 3.116778953511233e-05, "epoch": 0.7425153551789562, "step": 23090 }, { "loss": 0.06821455359458924, "grad_norm": 0.8889177441596985, "learning_rate": 3.1094303992034234e-05, "epoch": 0.7428369296073576, "step": 23100 }, { "loss": 0.07234838008880615, "grad_norm": 0.5564526319503784, "learning_rate": 3.1020889229762427e-05, "epoch": 0.7431585040357591, "step": 23110 }, { "loss": 0.07779403924942016, "grad_norm": 0.5989982485771179, "learning_rate": 3.094754532370945e-05, "epoch": 0.7434800784641605, "step": 23120 }, { "loss": 0.07060418128967286, "grad_norm": 0.6478323936462402, "learning_rate": 3.087427234921515e-05, "epoch": 0.743801652892562, "step": 23130 }, { "loss": 0.06922822594642639, "grad_norm": 0.33565738797187805, "learning_rate": 3.080107038154638e-05, "epoch": 0.7441232273209635, "step": 23140 }, { "loss": 0.05781306624412537, "grad_norm": 0.6275683641433716, "learning_rate": 3.072793949589718e-05, "epoch": 0.7444448017493649, "step": 23150 }, { "loss": 0.0717390537261963, "grad_norm": 0.9177708029747009, "learning_rate": 3.0654879767388546e-05, "epoch": 0.7447663761777663, "step": 23160 }, { "loss": 0.08312966227531433, "grad_norm": 0.7901967167854309, "learning_rate": 3.0581891271068305e-05, "epoch": 0.7450879506061678, "step": 23170 }, { "loss": 0.07024783492088318, "grad_norm": 0.6758073568344116, "learning_rate": 3.0508974081911224e-05, "epoch": 0.7454095250345693, "step": 23180 }, { "loss": 0.06823955178260803, "grad_norm": 0.5088121891021729, "learning_rate": 3.043612827481871e-05, "epoch": 0.7457310994629707, "step": 23190 }, { "loss": 0.07273564338684083, "grad_norm": 0.5752536654472351, "learning_rate": 3.0363353924618922e-05, "epoch": 0.7460526738913722, "step": 23200 }, { "loss": 0.06960035562515259, "grad_norm": 0.9873530268669128, "learning_rate": 3.0290651106066635e-05, "epoch": 0.7463742483197736, "step": 23210 }, { "loss": 0.06923828125, "grad_norm": 0.23608741164207458, "learning_rate": 3.0218019893843054e-05, "epoch": 0.746695822748175, "step": 23220 }, { "loss": 0.06952532529830932, "grad_norm": 0.5447070002555847, "learning_rate": 3.014546036255592e-05, "epoch": 0.7470173971765766, "step": 23230 }, { "loss": 0.06679846048355102, "grad_norm": 0.5517072081565857, "learning_rate": 3.0072972586739267e-05, "epoch": 0.747338971604978, "step": 23240 }, { "loss": 0.08158867955207824, "grad_norm": 0.46643969416618347, "learning_rate": 3.0000556640853515e-05, "epoch": 0.7476605460333794, "step": 23250 }, { "loss": 0.051595062017440796, "grad_norm": 0.6006515622138977, "learning_rate": 2.9928212599285177e-05, "epoch": 0.7479821204617809, "step": 23260 }, { "loss": 0.06733589768409728, "grad_norm": 0.5819828510284424, "learning_rate": 2.9855940536347004e-05, "epoch": 0.7483036948901823, "step": 23270 }, { "loss": 0.09315139055252075, "grad_norm": 0.5738916993141174, "learning_rate": 2.9783740526277803e-05, "epoch": 0.7486252693185838, "step": 23280 }, { "loss": 0.059750539064407346, "grad_norm": 0.9257296919822693, "learning_rate": 2.9711612643242304e-05, "epoch": 0.7489468437469853, "step": 23290 }, { "loss": 0.07237733006477357, "grad_norm": 0.38398125767707825, "learning_rate": 2.9639556961331217e-05, "epoch": 0.7492684181753867, "step": 23300 }, { "loss": 0.07754396796226501, "grad_norm": 0.7336930632591248, "learning_rate": 2.9567573554561033e-05, "epoch": 0.7495899926037881, "step": 23310 }, { "eval_loss": 0.07043655216693878, "eval_runtime": 34.5596, "eval_samples_per_second": 145.43, "eval_steps_per_second": 36.372, "epoch": 0.7495899926037881, "step": 23310 }, { "loss": 0.06745206713676452, "grad_norm": 0.65149986743927, "learning_rate": 2.9495662496873988e-05, "epoch": 0.7499115670321896, "step": 23320 }, { "loss": 0.06239452362060547, "grad_norm": 0.6391117572784424, "learning_rate": 2.9423823862138076e-05, "epoch": 0.7502331414605911, "step": 23330 }, { "loss": 0.07942205071449279, "grad_norm": 0.4198969602584839, "learning_rate": 2.9352057724146853e-05, "epoch": 0.7505547158889925, "step": 23340 }, { "loss": 0.0804401993751526, "grad_norm": 0.7204815745353699, "learning_rate": 2.928036415661942e-05, "epoch": 0.750876290317394, "step": 23350 }, { "loss": 0.06356263160705566, "grad_norm": 0.49734702706336975, "learning_rate": 2.9208743233200318e-05, "epoch": 0.7511978647457954, "step": 23360 }, { "loss": 0.06504639983177185, "grad_norm": 0.4685254693031311, "learning_rate": 2.9137195027459418e-05, "epoch": 0.7515194391741968, "step": 23370 }, { "loss": 0.06596590876579285, "grad_norm": 0.5883439183235168, "learning_rate": 2.9065719612892018e-05, "epoch": 0.7518410136025984, "step": 23380 }, { "loss": 0.05896996855735779, "grad_norm": 0.4913024604320526, "learning_rate": 2.8994317062918462e-05, "epoch": 0.7521625880309998, "step": 23390 }, { "loss": 0.07391130924224854, "grad_norm": 0.690483033657074, "learning_rate": 2.8922987450884496e-05, "epoch": 0.7524841624594012, "step": 23400 }, { "loss": 0.0676592767238617, "grad_norm": 0.8345645070075989, "learning_rate": 2.8851730850060743e-05, "epoch": 0.7528057368878027, "step": 23410 }, { "loss": 0.0608561635017395, "grad_norm": 0.536762535572052, "learning_rate": 2.878054733364286e-05, "epoch": 0.7531273113162041, "step": 23420 }, { "loss": 0.056133496761322024, "grad_norm": 0.4661767780780792, "learning_rate": 2.87094369747515e-05, "epoch": 0.7534488857446056, "step": 23430 }, { "loss": 0.0817335844039917, "grad_norm": 0.5510740876197815, "learning_rate": 2.8638399846432083e-05, "epoch": 0.7537704601730071, "step": 23440 }, { "loss": 0.06697210073471069, "grad_norm": 0.7082464098930359, "learning_rate": 2.8567436021654914e-05, "epoch": 0.7540920346014085, "step": 23450 }, { "loss": 0.07213478684425353, "grad_norm": 0.3922865688800812, "learning_rate": 2.849654557331486e-05, "epoch": 0.7544136090298099, "step": 23460 }, { "loss": 0.07061719298362731, "grad_norm": 0.5049762725830078, "learning_rate": 2.8425728574231537e-05, "epoch": 0.7547351834582114, "step": 23470 }, { "loss": 0.08335615992546082, "grad_norm": 0.24221433699131012, "learning_rate": 2.835498509714908e-05, "epoch": 0.7550567578866129, "step": 23480 }, { "loss": 0.09131989479064942, "grad_norm": 0.6416606307029724, "learning_rate": 2.8284315214736036e-05, "epoch": 0.7553783323150143, "step": 23490 }, { "loss": 0.07481069564819336, "grad_norm": 0.9378443956375122, "learning_rate": 2.821371899958547e-05, "epoch": 0.7556999067434158, "step": 23500 }, { "loss": 0.09523290395736694, "grad_norm": 0.5338574051856995, "learning_rate": 2.814319652421463e-05, "epoch": 0.7560214811718172, "step": 23510 }, { "loss": 0.08019073605537415, "grad_norm": 0.6044603586196899, "learning_rate": 2.8072747861065164e-05, "epoch": 0.7563430556002186, "step": 23520 }, { "loss": 0.05918534398078919, "grad_norm": 0.5362686514854431, "learning_rate": 2.800237308250283e-05, "epoch": 0.7566646300286202, "step": 23530 }, { "loss": 0.06204081177711487, "grad_norm": 0.5132609605789185, "learning_rate": 2.7932072260817453e-05, "epoch": 0.7569862044570216, "step": 23540 }, { "loss": 0.07708427309989929, "grad_norm": 0.719713568687439, "learning_rate": 2.7861845468222992e-05, "epoch": 0.757307778885423, "step": 23550 }, { "loss": 0.053664219379425046, "grad_norm": 0.40919116139411926, "learning_rate": 2.7791692776857225e-05, "epoch": 0.7576293533138245, "step": 23560 }, { "loss": 0.0843777060508728, "grad_norm": 0.8862894773483276, "learning_rate": 2.772161425878196e-05, "epoch": 0.7579509277422259, "step": 23570 }, { "loss": 0.08774664998054504, "grad_norm": 0.7200607657432556, "learning_rate": 2.7651609985982674e-05, "epoch": 0.7582725021706274, "step": 23580 }, { "loss": 0.07632976174354553, "grad_norm": 0.7512308359146118, "learning_rate": 2.7581680030368685e-05, "epoch": 0.7585940765990289, "step": 23590 }, { "loss": 0.05972144603729248, "grad_norm": 0.4622749090194702, "learning_rate": 2.751182446377295e-05, "epoch": 0.7589156510274303, "step": 23600 }, { "loss": 0.0827726423740387, "grad_norm": 0.8650885224342346, "learning_rate": 2.744204335795194e-05, "epoch": 0.7592372254558317, "step": 23610 }, { "loss": 0.10206010341644287, "grad_norm": 0.9212839007377625, "learning_rate": 2.7372336784585762e-05, "epoch": 0.7595587998842332, "step": 23620 }, { "loss": 0.058333611488342284, "grad_norm": 0.5360256433486938, "learning_rate": 2.7302704815277848e-05, "epoch": 0.7598803743126347, "step": 23630 }, { "loss": 0.08178957104682923, "grad_norm": 0.7339324355125427, "learning_rate": 2.7233147521555025e-05, "epoch": 0.7602019487410361, "step": 23640 }, { "loss": 0.07520390748977661, "grad_norm": 0.4708062410354614, "learning_rate": 2.716366497486744e-05, "epoch": 0.7605235231694376, "step": 23650 }, { "loss": 0.07630211710929871, "grad_norm": 0.7849609851837158, "learning_rate": 2.7094257246588474e-05, "epoch": 0.760845097597839, "step": 23660 }, { "loss": 0.07333303689956665, "grad_norm": 0.6058045625686646, "learning_rate": 2.702492440801464e-05, "epoch": 0.7611666720262404, "step": 23670 }, { "loss": 0.08173723220825195, "grad_norm": 0.5990397334098816, "learning_rate": 2.6955666530365476e-05, "epoch": 0.761488246454642, "step": 23680 }, { "loss": 0.08724742531776428, "grad_norm": 0.5341375470161438, "learning_rate": 2.6886483684783536e-05, "epoch": 0.7618098208830434, "step": 23690 }, { "loss": 0.06513100862503052, "grad_norm": 0.5419607162475586, "learning_rate": 2.6817375942334376e-05, "epoch": 0.7621313953114448, "step": 23700 }, { "loss": 0.0673224687576294, "grad_norm": 0.6282056570053101, "learning_rate": 2.6748343374006256e-05, "epoch": 0.7624529697398463, "step": 23710 }, { "loss": 0.06728132367134095, "grad_norm": 0.7399762272834778, "learning_rate": 2.6679386050710418e-05, "epoch": 0.7627745441682477, "step": 23720 }, { "loss": 0.06391294002532959, "grad_norm": 0.3468599021434784, "learning_rate": 2.661050404328065e-05, "epoch": 0.7630961185966492, "step": 23730 }, { "loss": 0.07310450673103333, "grad_norm": 0.7776421904563904, "learning_rate": 2.65416974224734e-05, "epoch": 0.7634176930250507, "step": 23740 }, { "loss": 0.06410598754882812, "grad_norm": 0.4238409698009491, "learning_rate": 2.6472966258967746e-05, "epoch": 0.7637392674534521, "step": 23750 }, { "loss": 0.0651436448097229, "grad_norm": 0.6460195183753967, "learning_rate": 2.640431062336517e-05, "epoch": 0.7640608418818535, "step": 23760 }, { "loss": 0.08409774303436279, "grad_norm": 0.5648590326309204, "learning_rate": 2.6335730586189654e-05, "epoch": 0.764382416310255, "step": 23770 }, { "loss": 0.07830084562301635, "grad_norm": 1.1920636892318726, "learning_rate": 2.626722621788744e-05, "epoch": 0.7647039907386565, "step": 23780 }, { "loss": 0.052944546937942503, "grad_norm": 0.7563640475273132, "learning_rate": 2.619879758882712e-05, "epoch": 0.7650255651670579, "step": 23790 }, { "loss": 0.0660877764225006, "grad_norm": 0.3552987575531006, "learning_rate": 2.6130444769299456e-05, "epoch": 0.7653471395954594, "step": 23800 }, { "loss": 0.07947134375572204, "grad_norm": 0.5222949385643005, "learning_rate": 2.606216782951729e-05, "epoch": 0.7656687140238608, "step": 23810 }, { "loss": 0.07922396659851075, "grad_norm": 0.9539393782615662, "learning_rate": 2.5993966839615635e-05, "epoch": 0.7659902884522622, "step": 23820 }, { "loss": 0.0755894124507904, "grad_norm": 0.4629724621772766, "learning_rate": 2.5925841869651323e-05, "epoch": 0.7663118628806638, "step": 23830 }, { "loss": 0.05568032264709473, "grad_norm": 0.380663126707077, "learning_rate": 2.5857792989603236e-05, "epoch": 0.7666334373090652, "step": 23840 }, { "loss": 0.07649948596954345, "grad_norm": 0.47023138403892517, "learning_rate": 2.578982026937208e-05, "epoch": 0.7669550117374666, "step": 23850 }, { "loss": 0.0708249032497406, "grad_norm": 0.6598566174507141, "learning_rate": 2.5721923778780233e-05, "epoch": 0.7672765861658681, "step": 23860 }, { "loss": 0.07922458052635192, "grad_norm": 0.7775975465774536, "learning_rate": 2.565410358757189e-05, "epoch": 0.7675981605942696, "step": 23870 }, { "loss": 0.0724449872970581, "grad_norm": 0.3954971432685852, "learning_rate": 2.5586359765412758e-05, "epoch": 0.767919735022671, "step": 23880 }, { "loss": 0.06616113781929016, "grad_norm": 0.603389322757721, "learning_rate": 2.5518692381890207e-05, "epoch": 0.7682413094510725, "step": 23890 }, { "loss": 0.07546033263206482, "grad_norm": 0.6811168789863586, "learning_rate": 2.5451101506512977e-05, "epoch": 0.7685628838794739, "step": 23900 }, { "loss": 0.07617400288581848, "grad_norm": 1.2383785247802734, "learning_rate": 2.53835872087113e-05, "epoch": 0.7688844583078753, "step": 23910 }, { "loss": 0.07465756535530091, "grad_norm": 0.565261960029602, "learning_rate": 2.5316149557836753e-05, "epoch": 0.7692060327362769, "step": 23920 }, { "loss": 0.06164060235023498, "grad_norm": 0.33716362714767456, "learning_rate": 2.524878862316209e-05, "epoch": 0.7695276071646783, "step": 23930 }, { "loss": 0.08892890214920043, "grad_norm": 0.6663774251937866, "learning_rate": 2.5181504473881378e-05, "epoch": 0.7698491815930797, "step": 23940 }, { "loss": 0.08810426592826844, "grad_norm": 0.7194262146949768, "learning_rate": 2.5114297179109714e-05, "epoch": 0.7701707560214812, "step": 23950 }, { "loss": 0.07091661691665649, "grad_norm": 0.4901579022407532, "learning_rate": 2.504716680788325e-05, "epoch": 0.7704923304498826, "step": 23960 }, { "loss": 0.06549544334411621, "grad_norm": 0.6214858889579773, "learning_rate": 2.49801134291592e-05, "epoch": 0.770813904878284, "step": 23970 }, { "loss": 0.06393520832061768, "grad_norm": 0.49949368834495544, "learning_rate": 2.4913137111815676e-05, "epoch": 0.7711354793066856, "step": 23980 }, { "loss": 0.06656065583229065, "grad_norm": 0.7732129096984863, "learning_rate": 2.4846237924651537e-05, "epoch": 0.771457053735087, "step": 23990 }, { "loss": 0.07589731812477112, "grad_norm": 0.3842383921146393, "learning_rate": 2.477941593638654e-05, "epoch": 0.7717786281634884, "step": 24000 }, { "loss": 0.06695120334625244, "grad_norm": 0.6397907137870789, "learning_rate": 2.471267121566101e-05, "epoch": 0.7721002025918899, "step": 24010 }, { "loss": 0.07762157917022705, "grad_norm": 0.8455729484558105, "learning_rate": 2.4646003831036048e-05, "epoch": 0.7724217770202914, "step": 24020 }, { "loss": 0.07489639520645142, "grad_norm": 0.48678871989250183, "learning_rate": 2.457941385099317e-05, "epoch": 0.7727433514486928, "step": 24030 }, { "loss": 0.08373913168907166, "grad_norm": 0.6976548433303833, "learning_rate": 2.451290134393448e-05, "epoch": 0.7730649258770943, "step": 24040 }, { "loss": 0.04959548711776733, "grad_norm": 0.21124552190303802, "learning_rate": 2.44464663781825e-05, "epoch": 0.7733865003054957, "step": 24050 }, { "loss": 0.08356249928474427, "grad_norm": 0.789567232131958, "learning_rate": 2.4380109021980002e-05, "epoch": 0.7737080747338971, "step": 24060 }, { "loss": 0.058235388994216916, "grad_norm": 0.684293806552887, "learning_rate": 2.431382934349018e-05, "epoch": 0.7740296491622987, "step": 24070 }, { "loss": 0.0808126986026764, "grad_norm": 0.6744261980056763, "learning_rate": 2.4247627410796304e-05, "epoch": 0.7743512235907001, "step": 24080 }, { "loss": 0.07473966479301453, "grad_norm": 0.7529351711273193, "learning_rate": 2.4181503291901852e-05, "epoch": 0.7746727980191015, "step": 24090 }, { "loss": 0.07294783592224122, "grad_norm": 0.6820980310440063, "learning_rate": 2.4115457054730406e-05, "epoch": 0.774994372447503, "step": 24100 }, { "loss": 0.05817674994468689, "grad_norm": 0.41013917326927185, "learning_rate": 2.404948876712543e-05, "epoch": 0.7753159468759044, "step": 24110 }, { "loss": 0.0703526794910431, "grad_norm": 0.7143634557723999, "learning_rate": 2.3983598496850445e-05, "epoch": 0.7756375213043059, "step": 24120 }, { "loss": 0.07235896587371826, "grad_norm": 0.7329757213592529, "learning_rate": 2.391778631158873e-05, "epoch": 0.7759590957327074, "step": 24130 }, { "loss": 0.06436983942985534, "grad_norm": 0.4656648337841034, "learning_rate": 2.3852052278943437e-05, "epoch": 0.7762806701611088, "step": 24140 }, { "loss": 0.06897568106651306, "grad_norm": 0.8447166085243225, "learning_rate": 2.3786396466437356e-05, "epoch": 0.7766022445895102, "step": 24150 }, { "loss": 0.066708505153656, "grad_norm": 0.6804686188697815, "learning_rate": 2.372081894151299e-05, "epoch": 0.7769238190179117, "step": 24160 }, { "loss": 0.05716971158981323, "grad_norm": 0.5383962988853455, "learning_rate": 2.3655319771532448e-05, "epoch": 0.7772453934463132, "step": 24170 }, { "loss": 0.0641344964504242, "grad_norm": 0.4108293950557709, "learning_rate": 2.3589899023777228e-05, "epoch": 0.7775669678747146, "step": 24180 }, { "loss": 0.059736895561218264, "grad_norm": 0.5595316886901855, "learning_rate": 2.3524556765448436e-05, "epoch": 0.7778885423031161, "step": 24190 }, { "loss": 0.058888697624206544, "grad_norm": 0.24895723164081573, "learning_rate": 2.3459293063666434e-05, "epoch": 0.7782101167315175, "step": 24200 }, { "loss": 0.08416748046875, "grad_norm": 0.7485470771789551, "learning_rate": 2.339410798547088e-05, "epoch": 0.7785316911599189, "step": 24210 }, { "loss": 0.0762722373008728, "grad_norm": 0.5820397734642029, "learning_rate": 2.3329001597820766e-05, "epoch": 0.7788532655883205, "step": 24220 }, { "loss": 0.09172817468643188, "grad_norm": 0.7628319263458252, "learning_rate": 2.3263973967594188e-05, "epoch": 0.7791748400167219, "step": 24230 }, { "loss": 0.06545842289924622, "grad_norm": 0.53376704454422, "learning_rate": 2.3199025161588385e-05, "epoch": 0.7794964144451233, "step": 24240 }, { "loss": 0.08564090728759766, "grad_norm": 0.6967863440513611, "learning_rate": 2.3134155246519574e-05, "epoch": 0.7798179888735248, "step": 24250 }, { "loss": 0.08012107610702515, "grad_norm": 0.5019391179084778, "learning_rate": 2.306936428902291e-05, "epoch": 0.7801395633019262, "step": 24260 }, { "loss": 0.0676426649093628, "grad_norm": 0.7035797834396362, "learning_rate": 2.300465235565257e-05, "epoch": 0.7804611377303277, "step": 24270 }, { "loss": 0.08119889497756957, "grad_norm": 1.0672072172164917, "learning_rate": 2.2940019512881363e-05, "epoch": 0.7807827121587292, "step": 24280 }, { "loss": 0.06657284498214722, "grad_norm": 0.7331158518791199, "learning_rate": 2.2875465827101082e-05, "epoch": 0.7811042865871306, "step": 24290 }, { "loss": 0.07054007053375244, "grad_norm": 0.5323323607444763, "learning_rate": 2.2810991364622057e-05, "epoch": 0.781425861015532, "step": 24300 }, { "loss": 0.0654269278049469, "grad_norm": 0.8353674411773682, "learning_rate": 2.274659619167324e-05, "epoch": 0.7817474354439335, "step": 24310 }, { "loss": 0.06039928197860718, "grad_norm": 0.8380869626998901, "learning_rate": 2.268228037440222e-05, "epoch": 0.782069009872335, "step": 24320 }, { "loss": 0.08483896255493165, "grad_norm": 1.4444787502288818, "learning_rate": 2.2618043978874957e-05, "epoch": 0.7823905843007364, "step": 24330 }, { "loss": 0.07325981259346008, "grad_norm": 0.6243902444839478, "learning_rate": 2.2553887071075963e-05, "epoch": 0.7827121587291379, "step": 24340 }, { "loss": 0.07584742903709411, "grad_norm": 0.8110746741294861, "learning_rate": 2.2489809716907972e-05, "epoch": 0.7830337331575393, "step": 24350 }, { "loss": 0.0839657187461853, "grad_norm": 0.5827569365501404, "learning_rate": 2.242581198219208e-05, "epoch": 0.7833553075859407, "step": 24360 }, { "loss": 0.06504515409469605, "grad_norm": 0.7146581411361694, "learning_rate": 2.23618939326676e-05, "epoch": 0.7836768820143423, "step": 24370 }, { "loss": 0.08918578624725342, "grad_norm": 0.8743788599967957, "learning_rate": 2.229805563399191e-05, "epoch": 0.7839984564427437, "step": 24380 }, { "loss": 0.061603051424026486, "grad_norm": 0.7088946104049683, "learning_rate": 2.223429715174058e-05, "epoch": 0.7843200308711451, "step": 24390 }, { "loss": 0.07485917806625367, "grad_norm": 0.4197693467140198, "learning_rate": 2.2170618551407075e-05, "epoch": 0.7846416052995466, "step": 24400 }, { "loss": 0.0752731740474701, "grad_norm": 0.7681658267974854, "learning_rate": 2.2107019898402892e-05, "epoch": 0.784963179727948, "step": 24410 }, { "loss": 0.06716175079345703, "grad_norm": 0.5857636332511902, "learning_rate": 2.2043501258057396e-05, "epoch": 0.7852847541563495, "step": 24420 }, { "loss": 0.08920994400978088, "grad_norm": 1.1019350290298462, "learning_rate": 2.1980062695617686e-05, "epoch": 0.785606328584751, "step": 24430 }, { "loss": 0.07643487453460693, "grad_norm": 0.5771538615226746, "learning_rate": 2.1916704276248724e-05, "epoch": 0.7859279030131524, "step": 24440 }, { "loss": 0.06678688526153564, "grad_norm": 0.3816686272621155, "learning_rate": 2.1853426065033022e-05, "epoch": 0.7862494774415538, "step": 24450 }, { "loss": 0.07544881701469422, "grad_norm": 0.3709632158279419, "learning_rate": 2.1790228126970825e-05, "epoch": 0.7865710518699554, "step": 24460 }, { "loss": 0.07349275946617126, "grad_norm": 0.8929899334907532, "learning_rate": 2.172711052697982e-05, "epoch": 0.7868926262983568, "step": 24470 }, { "loss": 0.08367403745651245, "grad_norm": 0.44464945793151855, "learning_rate": 2.1664073329895153e-05, "epoch": 0.7872142007267582, "step": 24480 }, { "loss": 0.07415645718574523, "grad_norm": 0.4882439076900482, "learning_rate": 2.160111660046955e-05, "epoch": 0.7875357751551597, "step": 24490 }, { "loss": 0.07126249670982361, "grad_norm": 0.5081717371940613, "learning_rate": 2.1538240403372877e-05, "epoch": 0.7878573495835611, "step": 24500 }, { "loss": 0.05568731427192688, "grad_norm": 0.4507085084915161, "learning_rate": 2.1475444803192424e-05, "epoch": 0.7881789240119625, "step": 24510 }, { "loss": 0.06841294169425964, "grad_norm": 0.4606808125972748, "learning_rate": 2.141272986443259e-05, "epoch": 0.7885004984403641, "step": 24520 }, { "loss": 0.07865042686462402, "grad_norm": 0.5381951928138733, "learning_rate": 2.1350095651514935e-05, "epoch": 0.7888220728687655, "step": 24530 }, { "loss": 0.06005370020866394, "grad_norm": 0.5262276530265808, "learning_rate": 2.1287542228778145e-05, "epoch": 0.7891436472971669, "step": 24540 }, { "loss": 0.06507790684700013, "grad_norm": 0.741633951663971, "learning_rate": 2.122506966047789e-05, "epoch": 0.7894652217255684, "step": 24550 }, { "loss": 0.07987558841705322, "grad_norm": 0.6250978112220764, "learning_rate": 2.116267801078682e-05, "epoch": 0.7897867961539699, "step": 24560 }, { "loss": 0.06909897923469543, "grad_norm": 0.5917408466339111, "learning_rate": 2.1100367343794392e-05, "epoch": 0.7901083705823713, "step": 24570 }, { "loss": 0.07253650426864625, "grad_norm": 0.8479233384132385, "learning_rate": 2.1038137723506903e-05, "epoch": 0.7904299450107728, "step": 24580 }, { "loss": 0.06407266855239868, "grad_norm": 0.5140347480773926, "learning_rate": 2.0975989213847447e-05, "epoch": 0.7907515194391742, "step": 24590 }, { "loss": 0.08873333334922791, "grad_norm": 0.687673807144165, "learning_rate": 2.0913921878655683e-05, "epoch": 0.7910730938675756, "step": 24600 }, { "loss": 0.08552309274673461, "grad_norm": 0.9812746047973633, "learning_rate": 2.085193578168808e-05, "epoch": 0.7913946682959772, "step": 24610 }, { "loss": 0.082278311252594, "grad_norm": 0.524066686630249, "learning_rate": 2.0790030986617493e-05, "epoch": 0.7917162427243786, "step": 24620 }, { "loss": 0.07650412917137146, "grad_norm": 0.30247119069099426, "learning_rate": 2.0728207557033286e-05, "epoch": 0.79203781715278, "step": 24630 }, { "loss": 0.06983165144920349, "grad_norm": 0.737944483757019, "learning_rate": 2.0666465556441317e-05, "epoch": 0.7923593915811815, "step": 24640 }, { "loss": 0.07627383470535279, "grad_norm": 0.4333503544330597, "learning_rate": 2.060480504826371e-05, "epoch": 0.7926809660095829, "step": 24650 }, { "loss": 0.0832338273525238, "grad_norm": 0.8529727458953857, "learning_rate": 2.0543226095838964e-05, "epoch": 0.7930025404379843, "step": 24660 }, { "loss": 0.0783322811126709, "grad_norm": 0.8573735356330872, "learning_rate": 2.0481728762421726e-05, "epoch": 0.7933241148663859, "step": 24670 }, { "loss": 0.07682687044143677, "grad_norm": 0.5854844450950623, "learning_rate": 2.0420313111182843e-05, "epoch": 0.7936456892947873, "step": 24680 }, { "loss": 0.07580558657646179, "grad_norm": 0.6965992450714111, "learning_rate": 2.0358979205209295e-05, "epoch": 0.7939672637231887, "step": 24690 }, { "loss": 0.07111702561378479, "grad_norm": 0.688252329826355, "learning_rate": 2.0297727107503993e-05, "epoch": 0.7942888381515902, "step": 24700 }, { "loss": 0.07677547931671143, "grad_norm": 0.5384435653686523, "learning_rate": 2.0236556880985912e-05, "epoch": 0.7946104125799917, "step": 24710 }, { "loss": 0.0677608609199524, "grad_norm": 0.5243275165557861, "learning_rate": 2.0175468588489844e-05, "epoch": 0.7949319870083931, "step": 24720 }, { "loss": 0.0720300555229187, "grad_norm": 0.4929002523422241, "learning_rate": 2.011446229276649e-05, "epoch": 0.7952535614367946, "step": 24730 }, { "loss": 0.08252569437026977, "grad_norm": 0.35379138588905334, "learning_rate": 2.0053538056482303e-05, "epoch": 0.795575135865196, "step": 24740 }, { "loss": 0.07043085098266602, "grad_norm": 0.5493075847625732, "learning_rate": 1.9992695942219385e-05, "epoch": 0.7958967102935974, "step": 24750 }, { "loss": 0.09129286408424378, "grad_norm": 0.7228595018386841, "learning_rate": 1.9931936012475592e-05, "epoch": 0.796218284721999, "step": 24760 }, { "loss": 0.06351727247238159, "grad_norm": 1.022997498512268, "learning_rate": 1.9871258329664233e-05, "epoch": 0.7965398591504004, "step": 24770 }, { "loss": 0.07550753355026245, "grad_norm": 0.4523955285549164, "learning_rate": 1.9810662956114246e-05, "epoch": 0.7968614335788018, "step": 24780 }, { "loss": 0.07917873859405518, "grad_norm": 0.42558956146240234, "learning_rate": 1.975014995406993e-05, "epoch": 0.7971830080072033, "step": 24790 }, { "loss": 0.06907861828804016, "grad_norm": 0.5384016633033752, "learning_rate": 1.968971938569101e-05, "epoch": 0.7975045824356047, "step": 24800 }, { "loss": 0.0709206521511078, "grad_norm": 0.7265986800193787, "learning_rate": 1.962937131305258e-05, "epoch": 0.7978261568640062, "step": 24810 }, { "loss": 0.06628232002258301, "grad_norm": 0.4229384660720825, "learning_rate": 1.9569105798144925e-05, "epoch": 0.7981477312924077, "step": 24820 }, { "loss": 0.05692559480667114, "grad_norm": 0.4667295515537262, "learning_rate": 1.9508922902873505e-05, "epoch": 0.7984693057208091, "step": 24830 }, { "loss": 0.06895289421081544, "grad_norm": 0.5784633755683899, "learning_rate": 1.9448822689059008e-05, "epoch": 0.7987908801492105, "step": 24840 }, { "loss": 0.07624900341033936, "grad_norm": 0.3647468090057373, "learning_rate": 1.9388805218437102e-05, "epoch": 0.799112454577612, "step": 24850 }, { "loss": 0.06718692779541016, "grad_norm": 0.5916265845298767, "learning_rate": 1.9328870552658497e-05, "epoch": 0.7994340290060135, "step": 24860 }, { "eval_loss": 0.06779318302869797, "eval_runtime": 34.5798, "eval_samples_per_second": 145.345, "eval_steps_per_second": 36.351, "epoch": 0.799562658777374, "step": 24864 }, { "loss": 0.054065996408462526, "grad_norm": 0.465282142162323, "learning_rate": 1.926901875328889e-05, "epoch": 0.7997556034344149, "step": 24870 }, { "loss": 0.09027044773101807, "grad_norm": 0.6328346133232117, "learning_rate": 1.9209249881808745e-05, "epoch": 0.8000771778628164, "step": 24880 }, { "loss": 0.06480810046195984, "grad_norm": 0.5475115180015564, "learning_rate": 1.9149563999613464e-05, "epoch": 0.8003987522912178, "step": 24890 }, { "loss": 0.09089791178703308, "grad_norm": 0.676400899887085, "learning_rate": 1.9089961168013093e-05, "epoch": 0.8007203267196192, "step": 24900 }, { "loss": 0.08152996897697448, "grad_norm": 0.7551012635231018, "learning_rate": 1.903044144823246e-05, "epoch": 0.8010419011480208, "step": 24910 }, { "loss": 0.07394970655441284, "grad_norm": 0.49486297369003296, "learning_rate": 1.8971004901410937e-05, "epoch": 0.8013634755764222, "step": 24920 }, { "loss": 0.07575193643569947, "grad_norm": 0.1976037621498108, "learning_rate": 1.891165158860252e-05, "epoch": 0.8016850500048236, "step": 24930 }, { "loss": 0.05858004093170166, "grad_norm": 0.4293280243873596, "learning_rate": 1.8852381570775714e-05, "epoch": 0.8020066244332251, "step": 24940 }, { "loss": 0.07835475206375123, "grad_norm": 0.40252768993377686, "learning_rate": 1.879319490881337e-05, "epoch": 0.8023281988616265, "step": 24950 }, { "loss": 0.05356748700141907, "grad_norm": 0.821725606918335, "learning_rate": 1.8734091663512854e-05, "epoch": 0.802649773290028, "step": 24960 }, { "loss": 0.07878881692886353, "grad_norm": 0.8050437569618225, "learning_rate": 1.8675071895585707e-05, "epoch": 0.8029713477184295, "step": 24970 }, { "loss": 0.0742866039276123, "grad_norm": 0.5441704988479614, "learning_rate": 1.861613566565783e-05, "epoch": 0.8032929221468309, "step": 24980 }, { "loss": 0.0770497441291809, "grad_norm": 0.48621344566345215, "learning_rate": 1.8557283034269225e-05, "epoch": 0.8036144965752323, "step": 24990 }, { "loss": 0.05645939111709595, "grad_norm": 0.5151429176330566, "learning_rate": 1.8498514061874084e-05, "epoch": 0.8039360710036338, "step": 25000 }, { "loss": 0.07547401785850524, "grad_norm": 0.6428143978118896, "learning_rate": 1.8439828808840664e-05, "epoch": 0.8042576454320353, "step": 25010 }, { "loss": 0.08763245344161988, "grad_norm": 0.5403868556022644, "learning_rate": 1.8381227335451166e-05, "epoch": 0.8045792198604367, "step": 25020 }, { "loss": 0.059272265434265135, "grad_norm": 0.26042598485946655, "learning_rate": 1.8322709701901798e-05, "epoch": 0.8049007942888382, "step": 25030 }, { "loss": 0.07984400391578675, "grad_norm": 0.47204700112342834, "learning_rate": 1.8264275968302602e-05, "epoch": 0.8052223687172396, "step": 25040 }, { "loss": 0.049272620677948, "grad_norm": 0.38910478353500366, "learning_rate": 1.8205926194677403e-05, "epoch": 0.805543943145641, "step": 25050 }, { "loss": 0.07370315790176392, "grad_norm": 0.5536523461341858, "learning_rate": 1.8147660440963932e-05, "epoch": 0.8058655175740426, "step": 25060 }, { "loss": 0.07952592968940735, "grad_norm": 0.7388674020767212, "learning_rate": 1.8089478767013423e-05, "epoch": 0.806187092002444, "step": 25070 }, { "loss": 0.058814072608947755, "grad_norm": 0.5940684080123901, "learning_rate": 1.8031381232590893e-05, "epoch": 0.8065086664308454, "step": 25080 }, { "loss": 0.06479309201240539, "grad_norm": 0.5064087510108948, "learning_rate": 1.797336789737484e-05, "epoch": 0.8068302408592469, "step": 25090 }, { "loss": 0.06768692135810853, "grad_norm": 0.42703741788864136, "learning_rate": 1.7915438820957277e-05, "epoch": 0.8071518152876483, "step": 25100 }, { "loss": 0.0990111231803894, "grad_norm": 0.7536817789077759, "learning_rate": 1.7857594062843695e-05, "epoch": 0.8074733897160498, "step": 25110 }, { "loss": 0.07579346895217895, "grad_norm": 0.8095626831054688, "learning_rate": 1.7799833682452994e-05, "epoch": 0.8077949641444513, "step": 25120 }, { "loss": 0.07740336656570435, "grad_norm": 0.5823168158531189, "learning_rate": 1.7742157739117372e-05, "epoch": 0.8081165385728527, "step": 25130 }, { "loss": 0.07928024530410767, "grad_norm": 0.6158061623573303, "learning_rate": 1.7684566292082273e-05, "epoch": 0.8084381130012541, "step": 25140 }, { "loss": 0.05865381360054016, "grad_norm": 0.25727465748786926, "learning_rate": 1.762705940050633e-05, "epoch": 0.8087596874296556, "step": 25150 }, { "loss": 0.056818431615829466, "grad_norm": 0.87153559923172, "learning_rate": 1.7569637123461415e-05, "epoch": 0.8090812618580571, "step": 25160 }, { "loss": 0.06351528763771057, "grad_norm": 0.3815484046936035, "learning_rate": 1.7512299519932363e-05, "epoch": 0.8094028362864585, "step": 25170 }, { "loss": 0.07543556094169616, "grad_norm": 0.42525655031204224, "learning_rate": 1.7455046648817118e-05, "epoch": 0.8097244107148599, "step": 25180 }, { "loss": 0.07463630437850952, "grad_norm": 0.7410084009170532, "learning_rate": 1.7397878568926584e-05, "epoch": 0.8100459851432614, "step": 25190 }, { "loss": 0.056423962116241455, "grad_norm": 0.8661516308784485, "learning_rate": 1.7340795338984484e-05, "epoch": 0.8103675595716628, "step": 25200 }, { "loss": 0.06369969248771667, "grad_norm": 0.6466599702835083, "learning_rate": 1.7283797017627478e-05, "epoch": 0.8106891340000643, "step": 25210 }, { "loss": 0.050625795125961305, "grad_norm": 0.3340155780315399, "learning_rate": 1.7226883663404935e-05, "epoch": 0.8110107084284658, "step": 25220 }, { "loss": 0.06831510663032532, "grad_norm": 0.3890591263771057, "learning_rate": 1.7170055334779012e-05, "epoch": 0.8113322828568672, "step": 25230 }, { "loss": 0.07306399941444397, "grad_norm": 0.5340134501457214, "learning_rate": 1.7113312090124445e-05, "epoch": 0.8116538572852686, "step": 25240 }, { "loss": 0.06650834083557129, "grad_norm": 0.37586766481399536, "learning_rate": 1.7056653987728644e-05, "epoch": 0.8119754317136701, "step": 25250 }, { "loss": 0.06394178867340088, "grad_norm": 0.5065869092941284, "learning_rate": 1.700008108579154e-05, "epoch": 0.8122970061420716, "step": 25260 }, { "loss": 0.07049349546432496, "grad_norm": 0.9211828708648682, "learning_rate": 1.694359344242551e-05, "epoch": 0.812618580570473, "step": 25270 }, { "loss": 0.07769249081611633, "grad_norm": 0.5380610227584839, "learning_rate": 1.6887191115655422e-05, "epoch": 0.8129401549988745, "step": 25280 }, { "loss": 0.057320517301559445, "grad_norm": 0.641497015953064, "learning_rate": 1.68308741634184e-05, "epoch": 0.8132617294272759, "step": 25290 }, { "loss": 0.08097407221794128, "grad_norm": 0.4522627294063568, "learning_rate": 1.6774642643563953e-05, "epoch": 0.8135833038556773, "step": 25300 }, { "loss": 0.07374048829078675, "grad_norm": 0.6357822418212891, "learning_rate": 1.6718496613853872e-05, "epoch": 0.8139048782840789, "step": 25310 }, { "loss": 0.06144424080848694, "grad_norm": 0.3151220977306366, "learning_rate": 1.666243613196199e-05, "epoch": 0.8142264527124803, "step": 25320 }, { "loss": 0.08905712366104127, "grad_norm": 0.6542190909385681, "learning_rate": 1.6606461255474404e-05, "epoch": 0.8145480271408817, "step": 25330 }, { "loss": 0.07413828372955322, "grad_norm": 0.5541084408760071, "learning_rate": 1.6550572041889178e-05, "epoch": 0.8148696015692832, "step": 25340 }, { "loss": 0.07905344367027282, "grad_norm": 0.29783719778060913, "learning_rate": 1.6494768548616456e-05, "epoch": 0.8151911759976846, "step": 25350 }, { "loss": 0.08787615299224853, "grad_norm": 0.808488130569458, "learning_rate": 1.6439050832978285e-05, "epoch": 0.8155127504260861, "step": 25360 }, { "loss": 0.06256959438323975, "grad_norm": 0.5121647119522095, "learning_rate": 1.6383418952208553e-05, "epoch": 0.8158343248544876, "step": 25370 }, { "loss": 0.056725984811782836, "grad_norm": 0.7378894090652466, "learning_rate": 1.632787296345314e-05, "epoch": 0.816155899282889, "step": 25380 }, { "loss": 0.06233987808227539, "grad_norm": 0.7205318808555603, "learning_rate": 1.6272412923769507e-05, "epoch": 0.8164774737112904, "step": 25390 }, { "loss": 0.06884159445762635, "grad_norm": 1.139447808265686, "learning_rate": 1.6217038890126966e-05, "epoch": 0.816799048139692, "step": 25400 }, { "loss": 0.07286211252212524, "grad_norm": 0.6898419260978699, "learning_rate": 1.6161750919406405e-05, "epoch": 0.8171206225680934, "step": 25410 }, { "loss": 0.07253461480140685, "grad_norm": 0.6165859699249268, "learning_rate": 1.6106549068400312e-05, "epoch": 0.8174421969964948, "step": 25420 }, { "loss": 0.06999062299728394, "grad_norm": 0.685233473777771, "learning_rate": 1.6051433393812733e-05, "epoch": 0.8177637714248963, "step": 25430 }, { "loss": 0.06932086944580078, "grad_norm": 0.765102744102478, "learning_rate": 1.599640395225921e-05, "epoch": 0.8180853458532977, "step": 25440 }, { "loss": 0.0570664644241333, "grad_norm": 0.4864404797554016, "learning_rate": 1.5941460800266693e-05, "epoch": 0.8184069202816991, "step": 25450 }, { "loss": 0.08184292912483215, "grad_norm": 0.5153435468673706, "learning_rate": 1.5886603994273473e-05, "epoch": 0.8187284947101007, "step": 25460 }, { "loss": 0.0793727159500122, "grad_norm": 0.7540408372879028, "learning_rate": 1.5831833590629132e-05, "epoch": 0.8190500691385021, "step": 25470 }, { "loss": 0.05399051308631897, "grad_norm": 0.45676225423812866, "learning_rate": 1.577714964559457e-05, "epoch": 0.8193716435669035, "step": 25480 }, { "loss": 0.06276117563247681, "grad_norm": 0.5364438891410828, "learning_rate": 1.572255221534178e-05, "epoch": 0.819693217995305, "step": 25490 }, { "loss": 0.07922245264053344, "grad_norm": 1.0108367204666138, "learning_rate": 1.5668041355953956e-05, "epoch": 0.8200147924237065, "step": 25500 }, { "loss": 0.0870794415473938, "grad_norm": 0.7691150903701782, "learning_rate": 1.5613617123425384e-05, "epoch": 0.8203363668521079, "step": 25510 }, { "loss": 0.09856227636337281, "grad_norm": 0.706281840801239, "learning_rate": 1.5559279573661267e-05, "epoch": 0.8206579412805094, "step": 25520 }, { "loss": 0.09251796603202819, "grad_norm": 0.7108443975448608, "learning_rate": 1.550502876247788e-05, "epoch": 0.8209795157089108, "step": 25530 }, { "loss": 0.07778313755989075, "grad_norm": 0.8512086272239685, "learning_rate": 1.54508647456023e-05, "epoch": 0.8213010901373122, "step": 25540 }, { "loss": 0.08167454004287719, "grad_norm": 0.6187142133712769, "learning_rate": 1.5396787578672532e-05, "epoch": 0.8216226645657138, "step": 25550 }, { "loss": 0.06370655298233033, "grad_norm": 0.27371832728385925, "learning_rate": 1.5342797317237288e-05, "epoch": 0.8219442389941152, "step": 25560 }, { "loss": 0.07302612662315369, "grad_norm": 0.5439883470535278, "learning_rate": 1.5288894016756072e-05, "epoch": 0.8222658134225166, "step": 25570 }, { "loss": 0.06198353767395019, "grad_norm": 0.9168033599853516, "learning_rate": 1.5235077732599057e-05, "epoch": 0.8225873878509181, "step": 25580 }, { "loss": 0.08354194164276123, "grad_norm": 0.28686732053756714, "learning_rate": 1.5181348520046979e-05, "epoch": 0.8229089622793195, "step": 25590 }, { "loss": 0.0665959358215332, "grad_norm": 0.4895515441894531, "learning_rate": 1.5127706434291201e-05, "epoch": 0.823230536707721, "step": 25600 }, { "loss": 0.06269041895866394, "grad_norm": 0.15579339861869812, "learning_rate": 1.5074151530433512e-05, "epoch": 0.8235521111361225, "step": 25610 }, { "loss": 0.08237427473068237, "grad_norm": 0.6324445009231567, "learning_rate": 1.5020683863486219e-05, "epoch": 0.8238736855645239, "step": 25620 }, { "loss": 0.0931522786617279, "grad_norm": 0.5454768538475037, "learning_rate": 1.496730348837202e-05, "epoch": 0.8241952599929253, "step": 25630 }, { "loss": 0.06490244269371033, "grad_norm": 0.5473190546035767, "learning_rate": 1.4914010459923844e-05, "epoch": 0.8245168344213268, "step": 25640 }, { "loss": 0.07663293480873108, "grad_norm": 0.9343203902244568, "learning_rate": 1.4860804832885023e-05, "epoch": 0.8248384088497283, "step": 25650 }, { "loss": 0.07581743001937866, "grad_norm": 0.8527815937995911, "learning_rate": 1.4807686661909015e-05, "epoch": 0.8251599832781297, "step": 25660 }, { "loss": 0.0709666907787323, "grad_norm": 0.7864364981651306, "learning_rate": 1.475465600155953e-05, "epoch": 0.8254815577065312, "step": 25670 }, { "loss": 0.0744117796421051, "grad_norm": 0.6601343154907227, "learning_rate": 1.4701712906310295e-05, "epoch": 0.8258031321349326, "step": 25680 }, { "loss": 0.06409841179847717, "grad_norm": 0.33034786581993103, "learning_rate": 1.4648857430545092e-05, "epoch": 0.826124706563334, "step": 25690 }, { "loss": 0.0730972170829773, "grad_norm": 0.4441629648208618, "learning_rate": 1.459608962855783e-05, "epoch": 0.8264462809917356, "step": 25700 }, { "loss": 0.07730489373207092, "grad_norm": 0.6629310846328735, "learning_rate": 1.4543409554552211e-05, "epoch": 0.826767855420137, "step": 25710 }, { "loss": 0.08678494691848755, "grad_norm": 0.5934807062149048, "learning_rate": 1.4490817262641876e-05, "epoch": 0.8270894298485384, "step": 25720 }, { "loss": 0.07854796648025512, "grad_norm": 0.3622133731842041, "learning_rate": 1.4438312806850319e-05, "epoch": 0.8274110042769399, "step": 25730 }, { "loss": 0.06401524543762208, "grad_norm": 0.5912713408470154, "learning_rate": 1.4385896241110752e-05, "epoch": 0.8277325787053413, "step": 25740 }, { "loss": 0.07179030179977416, "grad_norm": 0.6012607216835022, "learning_rate": 1.4333567619266153e-05, "epoch": 0.8280541531337428, "step": 25750 }, { "loss": 0.07031106948852539, "grad_norm": 1.0617568492889404, "learning_rate": 1.4281326995069166e-05, "epoch": 0.8283757275621443, "step": 25760 }, { "loss": 0.05340087413787842, "grad_norm": 0.5312886834144592, "learning_rate": 1.422917442218198e-05, "epoch": 0.8286973019905457, "step": 25770 }, { "loss": 0.07728091478347779, "grad_norm": 1.6353224515914917, "learning_rate": 1.4177109954176437e-05, "epoch": 0.8290188764189471, "step": 25780 }, { "loss": 0.061128294467926024, "grad_norm": 0.2983153760433197, "learning_rate": 1.4125133644533761e-05, "epoch": 0.8293404508473486, "step": 25790 }, { "loss": 0.07462538480758667, "grad_norm": 0.6013174653053284, "learning_rate": 1.4073245546644731e-05, "epoch": 0.8296620252757501, "step": 25800 }, { "loss": 0.05741596817970276, "grad_norm": 0.28202420473098755, "learning_rate": 1.4021445713809422e-05, "epoch": 0.8299835997041515, "step": 25810 }, { "loss": 0.07711042165756225, "grad_norm": 0.7175576090812683, "learning_rate": 1.3969734199237283e-05, "epoch": 0.830305174132553, "step": 25820 }, { "loss": 0.06802661418914795, "grad_norm": 0.5284676551818848, "learning_rate": 1.3918111056047067e-05, "epoch": 0.8306267485609544, "step": 25830 }, { "loss": 0.07056057453155518, "grad_norm": 0.570586621761322, "learning_rate": 1.386657633726669e-05, "epoch": 0.8309483229893558, "step": 25840 }, { "loss": 0.07420239448547364, "grad_norm": 1.6333948373794556, "learning_rate": 1.3815130095833306e-05, "epoch": 0.8312698974177574, "step": 25850 }, { "loss": 0.05796622633934021, "grad_norm": 0.6089661717414856, "learning_rate": 1.376377238459311e-05, "epoch": 0.8315914718461588, "step": 25860 }, { "loss": 0.07319130897521972, "grad_norm": 0.568272590637207, "learning_rate": 1.3712503256301424e-05, "epoch": 0.8319130462745602, "step": 25870 }, { "loss": 0.06356196999549865, "grad_norm": 0.36745578050613403, "learning_rate": 1.3661322763622519e-05, "epoch": 0.8322346207029617, "step": 25880 }, { "loss": 0.07408368587493896, "grad_norm": 1.0645989179611206, "learning_rate": 1.3610230959129654e-05, "epoch": 0.8325561951313631, "step": 25890 }, { "loss": 0.06604211330413819, "grad_norm": 0.7959291338920593, "learning_rate": 1.3559227895305015e-05, "epoch": 0.8328777695597646, "step": 25900 }, { "loss": 0.07473559975624085, "grad_norm": 0.46967774629592896, "learning_rate": 1.3508313624539558e-05, "epoch": 0.8331993439881661, "step": 25910 }, { "loss": 0.0604839563369751, "grad_norm": 0.6299682855606079, "learning_rate": 1.3457488199133096e-05, "epoch": 0.8335209184165675, "step": 25920 }, { "loss": 0.07758623361587524, "grad_norm": 0.60549396276474, "learning_rate": 1.3406751671294149e-05, "epoch": 0.8338424928449689, "step": 25930 }, { "loss": 0.07174047231674194, "grad_norm": 0.376686155796051, "learning_rate": 1.3356104093139855e-05, "epoch": 0.8341640672733704, "step": 25940 }, { "loss": 0.05498989224433899, "grad_norm": 0.33224010467529297, "learning_rate": 1.330554551669615e-05, "epoch": 0.8344856417017719, "step": 25950 }, { "loss": 0.08045591115951538, "grad_norm": 0.4750721752643585, "learning_rate": 1.3255075993897392e-05, "epoch": 0.8348072161301733, "step": 25960 }, { "loss": 0.039634838700294495, "grad_norm": 0.40264174342155457, "learning_rate": 1.3204695576586534e-05, "epoch": 0.8351287905585748, "step": 25970 }, { "loss": 0.08607410788536071, "grad_norm": 0.9619850516319275, "learning_rate": 1.3154404316514979e-05, "epoch": 0.8354503649869762, "step": 25980 }, { "loss": 0.08280614018440247, "grad_norm": 0.46870899200439453, "learning_rate": 1.3104202265342525e-05, "epoch": 0.8357719394153776, "step": 25990 }, { "loss": 0.05811293721199036, "grad_norm": 0.901194155216217, "learning_rate": 1.3054089474637366e-05, "epoch": 0.8360935138437792, "step": 26000 }, { "loss": 0.0757293939590454, "grad_norm": 0.8040971159934998, "learning_rate": 1.3004065995876013e-05, "epoch": 0.8364150882721806, "step": 26010 }, { "loss": 0.08551060557365417, "grad_norm": 0.6271659731864929, "learning_rate": 1.2954131880443222e-05, "epoch": 0.836736662700582, "step": 26020 }, { "loss": 0.07875540852546692, "grad_norm": 0.8364705443382263, "learning_rate": 1.2904287179631946e-05, "epoch": 0.8370582371289835, "step": 26030 }, { "loss": 0.08773240447044373, "grad_norm": 0.49071305990219116, "learning_rate": 1.2854531944643266e-05, "epoch": 0.837379811557385, "step": 26040 }, { "loss": 0.0672372043132782, "grad_norm": 0.6522132754325867, "learning_rate": 1.2804866226586443e-05, "epoch": 0.8377013859857864, "step": 26050 }, { "loss": 0.07467586994171142, "grad_norm": 0.6706381440162659, "learning_rate": 1.2755290076478677e-05, "epoch": 0.8380229604141879, "step": 26060 }, { "loss": 0.08341933488845825, "grad_norm": 0.5691989660263062, "learning_rate": 1.2705803545245243e-05, "epoch": 0.8383445348425893, "step": 26070 }, { "loss": 0.06978496313095092, "grad_norm": 0.717632532119751, "learning_rate": 1.2656406683719357e-05, "epoch": 0.8386661092709907, "step": 26080 }, { "loss": 0.08308391571044922, "grad_norm": 0.7718671560287476, "learning_rate": 1.2607099542642075e-05, "epoch": 0.8389876836993923, "step": 26090 }, { "loss": 0.07776856422424316, "grad_norm": 0.8327365517616272, "learning_rate": 1.2557882172662338e-05, "epoch": 0.8393092581277937, "step": 26100 }, { "loss": 0.06721643805503845, "grad_norm": 0.6579548120498657, "learning_rate": 1.2508754624336827e-05, "epoch": 0.8396308325561951, "step": 26110 }, { "loss": 0.05926239490509033, "grad_norm": 0.40918952226638794, "learning_rate": 1.2459716948130018e-05, "epoch": 0.8399524069845966, "step": 26120 }, { "loss": 0.08606613874435425, "grad_norm": 0.7024818062782288, "learning_rate": 1.2410769194413995e-05, "epoch": 0.840273981412998, "step": 26130 }, { "loss": 0.07684431672096252, "grad_norm": 0.7775634527206421, "learning_rate": 1.2361911413468541e-05, "epoch": 0.8405955558413994, "step": 26140 }, { "loss": 0.06418314576148987, "grad_norm": 0.8045727014541626, "learning_rate": 1.2313143655480997e-05, "epoch": 0.840917130269801, "step": 26150 }, { "loss": 0.08686638474464417, "grad_norm": 0.8366597294807434, "learning_rate": 1.226446597054619e-05, "epoch": 0.8412387046982024, "step": 26160 }, { "loss": 0.06793270111083985, "grad_norm": 0.80096435546875, "learning_rate": 1.2215878408666493e-05, "epoch": 0.8415602791266038, "step": 26170 }, { "loss": 0.0783788800239563, "grad_norm": 0.8146688342094421, "learning_rate": 1.2167381019751634e-05, "epoch": 0.8418818535550053, "step": 26180 }, { "loss": 0.08650415539741516, "grad_norm": 0.7247388958930969, "learning_rate": 1.2118973853618765e-05, "epoch": 0.8422034279834067, "step": 26190 }, { "loss": 0.06807527542114258, "grad_norm": 0.5796247124671936, "learning_rate": 1.2070656959992354e-05, "epoch": 0.8425250024118082, "step": 26200 }, { "loss": 0.06200519800186157, "grad_norm": 0.36104416847229004, "learning_rate": 1.2022430388504091e-05, "epoch": 0.8428465768402097, "step": 26210 }, { "loss": 0.04987512826919556, "grad_norm": 0.5600906014442444, "learning_rate": 1.1974294188692959e-05, "epoch": 0.8431681512686111, "step": 26220 }, { "loss": 0.061176568269729614, "grad_norm": 0.41778096556663513, "learning_rate": 1.1926248410005047e-05, "epoch": 0.8434897256970125, "step": 26230 }, { "loss": 0.06344327926635743, "grad_norm": 0.2527564764022827, "learning_rate": 1.1878293101793625e-05, "epoch": 0.843811300125414, "step": 26240 }, { "loss": 0.06565487980842591, "grad_norm": 0.4772278368473053, "learning_rate": 1.1830428313318976e-05, "epoch": 0.8441328745538155, "step": 26250 }, { "loss": 0.0762079894542694, "grad_norm": 0.583329439163208, "learning_rate": 1.1782654093748357e-05, "epoch": 0.8444544489822169, "step": 26260 }, { "loss": 0.08452026844024658, "grad_norm": 0.4808464050292969, "learning_rate": 1.1734970492156171e-05, "epoch": 0.8447760234106184, "step": 26270 }, { "loss": 0.06882460117340088, "grad_norm": 0.47750189900398254, "learning_rate": 1.1687377557523526e-05, "epoch": 0.8450975978390198, "step": 26280 }, { "loss": 0.05743715763092041, "grad_norm": 0.6582987308502197, "learning_rate": 1.1639875338738549e-05, "epoch": 0.8454191722674212, "step": 26290 }, { "loss": 0.06682893633842468, "grad_norm": 0.45502129197120667, "learning_rate": 1.1592463884596094e-05, "epoch": 0.8457407466958228, "step": 26300 }, { "loss": 0.07056695818901063, "grad_norm": 0.6482895016670227, "learning_rate": 1.1545143243797785e-05, "epoch": 0.8460623211242242, "step": 26310 }, { "loss": 0.06616935729980469, "grad_norm": 0.6247773766517639, "learning_rate": 1.1497913464952015e-05, "epoch": 0.8463838955526256, "step": 26320 }, { "loss": 0.09493278861045837, "grad_norm": 0.5621542930603027, "learning_rate": 1.145077459657381e-05, "epoch": 0.8467054699810271, "step": 26330 }, { "loss": 0.06745593547821045, "grad_norm": 0.7123182415962219, "learning_rate": 1.1403726687084838e-05, "epoch": 0.8470270444094286, "step": 26340 }, { "loss": 0.05028054118156433, "grad_norm": 0.6581669449806213, "learning_rate": 1.1356769784813271e-05, "epoch": 0.84734861883783, "step": 26350 }, { "loss": 0.06702528595924377, "grad_norm": 0.8366682529449463, "learning_rate": 1.1309903937993838e-05, "epoch": 0.8476701932662315, "step": 26360 }, { "loss": 0.06623958349227906, "grad_norm": 0.8056640028953552, "learning_rate": 1.1263129194767764e-05, "epoch": 0.8479917676946329, "step": 26370 }, { "loss": 0.08432796597480774, "grad_norm": 1.285785436630249, "learning_rate": 1.1216445603182612e-05, "epoch": 0.8483133421230343, "step": 26380 }, { "loss": 0.058904987573623654, "grad_norm": 0.43198439478874207, "learning_rate": 1.1169853211192394e-05, "epoch": 0.8486349165514359, "step": 26390 }, { "loss": 0.07768229246139527, "grad_norm": 1.1017895936965942, "learning_rate": 1.1123352066657422e-05, "epoch": 0.8489564909798373, "step": 26400 }, { "loss": 0.04701220989227295, "grad_norm": 0.5291910171508789, "learning_rate": 1.1076942217344221e-05, "epoch": 0.8492780654082387, "step": 26410 }, { "eval_loss": 0.06639111787080765, "eval_runtime": 34.6001, "eval_samples_per_second": 145.26, "eval_steps_per_second": 36.329, "epoch": 0.8495353249509598, "step": 26418 }, { "loss": 0.06604416966438294, "grad_norm": 0.5437458157539368, "learning_rate": 1.1030623710925603e-05, "epoch": 0.8495996398366402, "step": 26420 }, { "loss": 0.05475022792816162, "grad_norm": 0.7437111139297485, "learning_rate": 1.0984396594980516e-05, "epoch": 0.8499212142650416, "step": 26430 }, { "loss": 0.055143731832504275, "grad_norm": 0.7033617496490479, "learning_rate": 1.093826091699406e-05, "epoch": 0.850242788693443, "step": 26440 }, { "loss": 0.05738850831985474, "grad_norm": 1.1026557683944702, "learning_rate": 1.0892216724357351e-05, "epoch": 0.8505643631218446, "step": 26450 }, { "loss": 0.08413679003715516, "grad_norm": 0.5151999592781067, "learning_rate": 1.0846264064367595e-05, "epoch": 0.850885937550246, "step": 26460 }, { "loss": 0.0639430046081543, "grad_norm": 0.6788651943206787, "learning_rate": 1.0800402984227964e-05, "epoch": 0.8512075119786474, "step": 26470 }, { "loss": 0.07391080856323243, "grad_norm": 0.619463324546814, "learning_rate": 1.0754633531047508e-05, "epoch": 0.8515290864070489, "step": 26480 }, { "loss": 0.051558637619018556, "grad_norm": 0.6007417440414429, "learning_rate": 1.070895575184122e-05, "epoch": 0.8518506608354504, "step": 26490 }, { "loss": 0.0619118869304657, "grad_norm": 0.610027015209198, "learning_rate": 1.0663369693529868e-05, "epoch": 0.8521722352638518, "step": 26500 }, { "loss": 0.05492755174636841, "grad_norm": 0.4237189292907715, "learning_rate": 1.0617875402940036e-05, "epoch": 0.8524938096922533, "step": 26510 }, { "loss": 0.07027317881584168, "grad_norm": 0.36540210247039795, "learning_rate": 1.0572472926804067e-05, "epoch": 0.8528153841206547, "step": 26520 }, { "loss": 0.07017855644226074, "grad_norm": 0.4847584068775177, "learning_rate": 1.052716231175992e-05, "epoch": 0.8531369585490561, "step": 26530 }, { "loss": 0.08128488063812256, "grad_norm": 0.8945302963256836, "learning_rate": 1.0481943604351274e-05, "epoch": 0.8534585329774577, "step": 26540 }, { "loss": 0.08098180294036865, "grad_norm": 0.341545045375824, "learning_rate": 1.043681685102732e-05, "epoch": 0.8537801074058591, "step": 26550 }, { "loss": 0.08044508099555969, "grad_norm": 0.3028389811515808, "learning_rate": 1.0391782098142867e-05, "epoch": 0.8541016818342605, "step": 26560 }, { "loss": 0.06877010464668273, "grad_norm": 0.481671541929245, "learning_rate": 1.0346839391958175e-05, "epoch": 0.854423256262662, "step": 26570 }, { "loss": 0.07255155444145203, "grad_norm": 0.5929657220840454, "learning_rate": 1.0301988778638916e-05, "epoch": 0.8547448306910634, "step": 26580 }, { "loss": 0.07767698764801026, "grad_norm": 0.6092710494995117, "learning_rate": 1.0257230304256282e-05, "epoch": 0.8550664051194649, "step": 26590 }, { "loss": 0.08522970676422119, "grad_norm": 0.6273548603057861, "learning_rate": 1.021256401478673e-05, "epoch": 0.8553879795478664, "step": 26600 }, { "loss": 0.07369956970214844, "grad_norm": 0.33282068371772766, "learning_rate": 1.016798995611199e-05, "epoch": 0.8557095539762678, "step": 26610 }, { "loss": 0.08735382556915283, "grad_norm": 0.6816231608390808, "learning_rate": 1.0123508174019169e-05, "epoch": 0.8560311284046692, "step": 26620 }, { "loss": 0.07665892243385315, "grad_norm": 0.4102591574192047, "learning_rate": 1.0079118714200464e-05, "epoch": 0.8563527028330707, "step": 26630 }, { "loss": 0.07389369606971741, "grad_norm": 0.4486946165561676, "learning_rate": 1.003482162225331e-05, "epoch": 0.8566742772614722, "step": 26640 }, { "loss": 0.0670365035533905, "grad_norm": 0.6316031217575073, "learning_rate": 9.990616943680265e-06, "epoch": 0.8569958516898736, "step": 26650 }, { "loss": 0.06709270477294922, "grad_norm": 0.6071740984916687, "learning_rate": 9.946504723888905e-06, "epoch": 0.8573174261182751, "step": 26660 }, { "loss": 0.062121140956878665, "grad_norm": 0.4220135509967804, "learning_rate": 9.902485008191897e-06, "epoch": 0.8576390005466765, "step": 26670 }, { "loss": 0.07908244729042054, "grad_norm": 0.6105177402496338, "learning_rate": 9.858557841806803e-06, "epoch": 0.8579605749750779, "step": 26680 }, { "loss": 0.06170150637626648, "grad_norm": 0.6949994564056396, "learning_rate": 9.814723269856219e-06, "epoch": 0.8582821494034795, "step": 26690 }, { "loss": 0.07273870706558228, "grad_norm": 0.549635648727417, "learning_rate": 9.770981337367524e-06, "epoch": 0.8586037238318809, "step": 26700 }, { "loss": 0.07126254439353943, "grad_norm": 0.8531264066696167, "learning_rate": 9.727332089273022e-06, "epoch": 0.8589252982602823, "step": 26710 }, { "loss": 0.0822081744670868, "grad_norm": 0.5738287568092346, "learning_rate": 9.683775570409781e-06, "epoch": 0.8592468726886838, "step": 26720 }, { "loss": 0.05138362646102905, "grad_norm": 0.35763096809387207, "learning_rate": 9.640311825519577e-06, "epoch": 0.8595684471170852, "step": 26730 }, { "loss": 0.08185197710990906, "grad_norm": 0.8491652011871338, "learning_rate": 9.596940899248951e-06, "epoch": 0.8598900215454867, "step": 26740 }, { "loss": 0.07479264736175537, "grad_norm": 0.4308110773563385, "learning_rate": 9.553662836149047e-06, "epoch": 0.8602115959738882, "step": 26750 }, { "loss": 0.043882808089256285, "grad_norm": 0.49413225054740906, "learning_rate": 9.510477680675656e-06, "epoch": 0.8605331704022896, "step": 26760 }, { "loss": 0.0769636869430542, "grad_norm": 0.8833135962486267, "learning_rate": 9.467385477189105e-06, "epoch": 0.860854744830691, "step": 26770 }, { "loss": 0.05496814846992493, "grad_norm": 0.462801069021225, "learning_rate": 9.424386269954266e-06, "epoch": 0.8611763192590925, "step": 26780 }, { "loss": 0.080049329996109, "grad_norm": 0.6054528951644897, "learning_rate": 9.381480103140483e-06, "epoch": 0.861497893687494, "step": 26790 }, { "loss": 0.08080293536186219, "grad_norm": 0.5494915246963501, "learning_rate": 9.338667020821468e-06, "epoch": 0.8618194681158954, "step": 26800 }, { "loss": 0.08168855309486389, "grad_norm": 0.6569522619247437, "learning_rate": 9.29594706697543e-06, "epoch": 0.8621410425442969, "step": 26810 }, { "loss": 0.07000950574874878, "grad_norm": 0.34261035919189453, "learning_rate": 9.2533202854848e-06, "epoch": 0.8624626169726983, "step": 26820 }, { "loss": 0.07513426542282105, "grad_norm": 0.47495919466018677, "learning_rate": 9.210786720136333e-06, "epoch": 0.8627841914010997, "step": 26830 }, { "loss": 0.08019859790802002, "grad_norm": 0.7717815637588501, "learning_rate": 9.168346414621109e-06, "epoch": 0.8631057658295013, "step": 26840 }, { "loss": 0.06261076331138611, "grad_norm": 0.47469109296798706, "learning_rate": 9.125999412534291e-06, "epoch": 0.8634273402579027, "step": 26850 }, { "loss": 0.07787419557571411, "grad_norm": 0.7436442971229553, "learning_rate": 9.083745757375307e-06, "epoch": 0.8637489146863041, "step": 26860 }, { "loss": 0.05460736751556396, "grad_norm": 0.42278188467025757, "learning_rate": 9.041585492547633e-06, "epoch": 0.8640704891147056, "step": 26870 }, { "loss": 0.08874971866607666, "grad_norm": 0.5448344945907593, "learning_rate": 8.999518661358785e-06, "epoch": 0.864392063543107, "step": 26880 }, { "loss": 0.07377313375473023, "grad_norm": 0.5940506458282471, "learning_rate": 8.957545307020399e-06, "epoch": 0.8647136379715085, "step": 26890 }, { "loss": 0.06170197129249573, "grad_norm": 0.3291904926300049, "learning_rate": 8.915665472648004e-06, "epoch": 0.86503521239991, "step": 26900 }, { "loss": 0.06614161133766175, "grad_norm": 0.5995969772338867, "learning_rate": 8.873879201261138e-06, "epoch": 0.8653567868283114, "step": 26910 }, { "loss": 0.08219808340072632, "grad_norm": 0.5390191078186035, "learning_rate": 8.83218653578317e-06, "epoch": 0.8656783612567128, "step": 26920 }, { "loss": 0.07147032022476196, "grad_norm": 0.7250548005104065, "learning_rate": 8.790587519041305e-06, "epoch": 0.8659999356851144, "step": 26930 }, { "loss": 0.07538982033729554, "grad_norm": 0.3211059272289276, "learning_rate": 8.749082193766633e-06, "epoch": 0.8663215101135158, "step": 26940 }, { "loss": 0.048902788758277894, "grad_norm": 0.6315880417823792, "learning_rate": 8.707670602593909e-06, "epoch": 0.8666430845419172, "step": 26950 }, { "loss": 0.06604506373405457, "grad_norm": 0.5350209474563599, "learning_rate": 8.66635278806166e-06, "epoch": 0.8669646589703187, "step": 26960 }, { "loss": 0.06788616180419922, "grad_norm": 0.7068520784378052, "learning_rate": 8.625128792612081e-06, "epoch": 0.8672862333987201, "step": 26970 }, { "loss": 0.05400567650794983, "grad_norm": 0.781935453414917, "learning_rate": 8.583998658590975e-06, "epoch": 0.8676078078271215, "step": 26980 }, { "loss": 0.07868753671646118, "grad_norm": 0.6845842003822327, "learning_rate": 8.542962428247748e-06, "epoch": 0.8679293822555231, "step": 26990 }, { "loss": 0.07282733917236328, "grad_norm": 0.39165160059928894, "learning_rate": 8.502020143735302e-06, "epoch": 0.8682509566839245, "step": 27000 }, { "loss": 0.06988500952720642, "grad_norm": 0.6668932437896729, "learning_rate": 8.461171847110127e-06, "epoch": 0.8685725311123259, "step": 27010 }, { "loss": 0.06271352767944335, "grad_norm": 0.6527853012084961, "learning_rate": 8.420417580332052e-06, "epoch": 0.8688941055407274, "step": 27020 }, { "loss": 0.059419095516204834, "grad_norm": 0.514185905456543, "learning_rate": 8.37975738526442e-06, "epoch": 0.8692156799691289, "step": 27030 }, { "loss": 0.06655218005180359, "grad_norm": 0.41537120938301086, "learning_rate": 8.339191303673899e-06, "epoch": 0.8695372543975303, "step": 27040 }, { "loss": 0.053801441192626955, "grad_norm": 0.34744489192962646, "learning_rate": 8.298719377230457e-06, "epoch": 0.8698588288259318, "step": 27050 }, { "loss": 0.07902992367744446, "grad_norm": 0.9833202362060547, "learning_rate": 8.258341647507407e-06, "epoch": 0.8701804032543332, "step": 27060 }, { "loss": 0.06786506175994873, "grad_norm": 0.5024024248123169, "learning_rate": 8.218058155981223e-06, "epoch": 0.8705019776827346, "step": 27070 }, { "loss": 0.0703201413154602, "grad_norm": 0.6945049166679382, "learning_rate": 8.177868944031663e-06, "epoch": 0.8708235521111362, "step": 27080 }, { "loss": 0.0715039312839508, "grad_norm": 0.35131388902664185, "learning_rate": 8.13777405294156e-06, "epoch": 0.8711451265395376, "step": 27090 }, { "loss": 0.07369782328605652, "grad_norm": 0.5261259078979492, "learning_rate": 8.097773523896902e-06, "epoch": 0.871466700967939, "step": 27100 }, { "loss": 0.07828069925308227, "grad_norm": 1.102333903312683, "learning_rate": 8.057867397986773e-06, "epoch": 0.8717882753963405, "step": 27110 }, { "loss": 0.06868954300880432, "grad_norm": 0.34812596440315247, "learning_rate": 8.018055716203198e-06, "epoch": 0.8721098498247419, "step": 27120 }, { "loss": 0.07218150496482849, "grad_norm": 0.43147793412208557, "learning_rate": 7.978338519441308e-06, "epoch": 0.8724314242531434, "step": 27130 }, { "loss": 0.05935378670692444, "grad_norm": 0.36542806029319763, "learning_rate": 7.938715848499068e-06, "epoch": 0.8727529986815449, "step": 27140 }, { "loss": 0.08468295931816101, "grad_norm": 0.5510056614875793, "learning_rate": 7.899187744077353e-06, "epoch": 0.8730745731099463, "step": 27150 }, { "loss": 0.09099004268646241, "grad_norm": 1.0482388734817505, "learning_rate": 7.859754246780027e-06, "epoch": 0.8733961475383477, "step": 27160 }, { "loss": 0.06953157782554627, "grad_norm": 0.5658690333366394, "learning_rate": 7.820415397113601e-06, "epoch": 0.8737177219667492, "step": 27170 }, { "loss": 0.08543871641159058, "grad_norm": 0.7472510933876038, "learning_rate": 7.781171235487494e-06, "epoch": 0.8740392963951507, "step": 27180 }, { "loss": 0.04514435529708862, "grad_norm": 0.5680370926856995, "learning_rate": 7.742021802213795e-06, "epoch": 0.8743608708235521, "step": 27190 }, { "loss": 0.07006924748420715, "grad_norm": 0.7031437158584595, "learning_rate": 7.702967137507256e-06, "epoch": 0.8746824452519536, "step": 27200 }, { "loss": 0.0831302523612976, "grad_norm": 0.8786540627479553, "learning_rate": 7.664007281485363e-06, "epoch": 0.875004019680355, "step": 27210 }, { "loss": 0.055636584758758545, "grad_norm": 0.5265671014785767, "learning_rate": 7.625142274168184e-06, "epoch": 0.8753255941087564, "step": 27220 }, { "loss": 0.09252220988273621, "grad_norm": 0.5989711880683899, "learning_rate": 7.586372155478316e-06, "epoch": 0.875647168537158, "step": 27230 }, { "loss": 0.05664872527122498, "grad_norm": 0.47775182127952576, "learning_rate": 7.547696965240958e-06, "epoch": 0.8759687429655594, "step": 27240 }, { "loss": 0.0712408185005188, "grad_norm": 0.8774183392524719, "learning_rate": 7.509116743183708e-06, "epoch": 0.8762903173939608, "step": 27250 }, { "loss": 0.07321785688400269, "grad_norm": 0.8138816356658936, "learning_rate": 7.470631528936711e-06, "epoch": 0.8766118918223623, "step": 27260 }, { "loss": 0.07316104769706726, "grad_norm": 0.537562906742096, "learning_rate": 7.4322413620324215e-06, "epoch": 0.8769334662507637, "step": 27270 }, { "loss": 0.06197282075881958, "grad_norm": 0.5482314825057983, "learning_rate": 7.393946281905728e-06, "epoch": 0.8772550406791652, "step": 27280 }, { "loss": 0.06532438397407532, "grad_norm": 0.8577420115470886, "learning_rate": 7.355746327893831e-06, "epoch": 0.8775766151075667, "step": 27290 }, { "loss": 0.058403903245925905, "grad_norm": 0.7714664340019226, "learning_rate": 7.317641539236198e-06, "epoch": 0.8778981895359681, "step": 27300 }, { "loss": 0.06350155472755432, "grad_norm": 0.6568384170532227, "learning_rate": 7.2796319550745615e-06, "epoch": 0.8782197639643695, "step": 27310 }, { "loss": 0.07977257370948791, "grad_norm": 0.7861108183860779, "learning_rate": 7.241717614452826e-06, "epoch": 0.878541338392771, "step": 27320 }, { "loss": 0.0729379415512085, "grad_norm": 0.3218686878681183, "learning_rate": 7.203898556317123e-06, "epoch": 0.8788629128211725, "step": 27330 }, { "loss": 0.05332390666007995, "grad_norm": 0.3437587022781372, "learning_rate": 7.166174819515626e-06, "epoch": 0.8791844872495739, "step": 27340 }, { "loss": 0.05867266654968262, "grad_norm": 0.6583501100540161, "learning_rate": 7.128546442798656e-06, "epoch": 0.8795060616779754, "step": 27350 }, { "loss": 0.06763720512390137, "grad_norm": 0.8608779907226562, "learning_rate": 7.0910134648185854e-06, "epoch": 0.8798276361063768, "step": 27360 }, { "loss": 0.055006229877471925, "grad_norm": 0.5322985649108887, "learning_rate": 7.053575924129718e-06, "epoch": 0.8801492105347782, "step": 27370 }, { "loss": 0.055677604675292966, "grad_norm": 0.43913590908050537, "learning_rate": 7.016233859188415e-06, "epoch": 0.8804707849631798, "step": 27380 }, { "loss": 0.07852984070777894, "grad_norm": 0.650809109210968, "learning_rate": 6.97898730835288e-06, "epoch": 0.8807923593915812, "step": 27390 }, { "loss": 0.06146419048309326, "grad_norm": 0.5928049683570862, "learning_rate": 6.941836309883254e-06, "epoch": 0.8811139338199826, "step": 27400 }, { "loss": 0.08269037008285522, "grad_norm": 0.49598103761672974, "learning_rate": 6.904780901941555e-06, "epoch": 0.8814355082483841, "step": 27410 }, { "loss": 0.06693826913833618, "grad_norm": 0.33657902479171753, "learning_rate": 6.867821122591511e-06, "epoch": 0.8817570826767855, "step": 27420 }, { "loss": 0.08126822710037232, "grad_norm": 0.8901121020317078, "learning_rate": 6.830957009798722e-06, "epoch": 0.882078657105187, "step": 27430 }, { "loss": 0.07340906858444214, "grad_norm": 0.4934292733669281, "learning_rate": 6.794188601430451e-06, "epoch": 0.8824002315335885, "step": 27440 }, { "loss": 0.08674216270446777, "grad_norm": 0.6380556225776672, "learning_rate": 6.757515935255665e-06, "epoch": 0.8827218059619899, "step": 27450 }, { "loss": 0.07352336049079895, "grad_norm": 0.5957514643669128, "learning_rate": 6.720939048945018e-06, "epoch": 0.8830433803903913, "step": 27460 }, { "loss": 0.06992534399032593, "grad_norm": 0.9841471910476685, "learning_rate": 6.6844579800707016e-06, "epoch": 0.8833649548187928, "step": 27470 }, { "loss": 0.08985934257507325, "grad_norm": 1.4467130899429321, "learning_rate": 6.6480727661066055e-06, "epoch": 0.8836865292471943, "step": 27480 }, { "loss": 0.06457531452178955, "grad_norm": 0.6235713362693787, "learning_rate": 6.6117834444280545e-06, "epoch": 0.8840081036755957, "step": 27490 }, { "loss": 0.06599720120429993, "grad_norm": 0.4847148060798645, "learning_rate": 6.575590052311875e-06, "epoch": 0.8843296781039972, "step": 27500 }, { "loss": 0.08554141521453858, "grad_norm": 0.4832707345485687, "learning_rate": 6.539492626936417e-06, "epoch": 0.8846512525323986, "step": 27510 }, { "loss": 0.0616130530834198, "grad_norm": 0.6885722279548645, "learning_rate": 6.503491205381374e-06, "epoch": 0.8849728269608, "step": 27520 }, { "loss": 0.06359918117523193, "grad_norm": 0.5734153389930725, "learning_rate": 6.467585824627887e-06, "epoch": 0.8852944013892016, "step": 27530 }, { "loss": 0.07236401438713073, "grad_norm": 0.9058264493942261, "learning_rate": 6.431776521558441e-06, "epoch": 0.885615975817603, "step": 27540 }, { "loss": 0.05739564299583435, "grad_norm": 0.626163899898529, "learning_rate": 6.3960633329567585e-06, "epoch": 0.8859375502460044, "step": 27550 }, { "loss": 0.10272804498672486, "grad_norm": 0.348671019077301, "learning_rate": 6.360446295507927e-06, "epoch": 0.8862591246744059, "step": 27560 }, { "loss": 0.0653518259525299, "grad_norm": 0.6510688066482544, "learning_rate": 6.324925445798191e-06, "epoch": 0.8865806991028073, "step": 27570 }, { "loss": 0.0677101731300354, "grad_norm": 0.595810055732727, "learning_rate": 6.289500820315031e-06, "epoch": 0.8869022735312088, "step": 27580 }, { "loss": 0.08281749486923218, "grad_norm": 2.272451400756836, "learning_rate": 6.2541724554470605e-06, "epoch": 0.8872238479596103, "step": 27590 }, { "loss": 0.08681055307388305, "grad_norm": 0.5360921621322632, "learning_rate": 6.218940387484018e-06, "epoch": 0.8875454223880117, "step": 27600 }, { "loss": 0.05985884070396423, "grad_norm": 0.651020348072052, "learning_rate": 6.183804652616776e-06, "epoch": 0.8878669968164131, "step": 27610 }, { "loss": 0.09359393119812012, "grad_norm": 0.6078817844390869, "learning_rate": 6.148765286937153e-06, "epoch": 0.8881885712448147, "step": 27620 }, { "loss": 0.0729813039302826, "grad_norm": 0.8955041766166687, "learning_rate": 6.113822326438079e-06, "epoch": 0.8885101456732161, "step": 27630 }, { "loss": 0.06026132702827454, "grad_norm": 0.3575083911418915, "learning_rate": 6.078975807013354e-06, "epoch": 0.8888317201016175, "step": 27640 }, { "loss": 0.07106353044509887, "grad_norm": 0.6659265160560608, "learning_rate": 6.0442257644578225e-06, "epoch": 0.889153294530019, "step": 27650 }, { "loss": 0.0630476176738739, "grad_norm": 0.6418014764785767, "learning_rate": 6.00957223446712e-06, "epoch": 0.8894748689584204, "step": 27660 }, { "loss": 0.07165031433105469, "grad_norm": 0.5182862281799316, "learning_rate": 5.975015252637806e-06, "epoch": 0.8897964433868218, "step": 27670 }, { "loss": 0.08316783308982849, "grad_norm": 0.7247962951660156, "learning_rate": 5.940554854467284e-06, "epoch": 0.8901180178152234, "step": 27680 }, { "loss": 0.07041310071945191, "grad_norm": 0.5974995493888855, "learning_rate": 5.906191075353673e-06, "epoch": 0.8904395922436248, "step": 27690 }, { "loss": 0.07408487200736999, "grad_norm": 0.5634307861328125, "learning_rate": 5.871923950595903e-06, "epoch": 0.8907611666720262, "step": 27700 }, { "loss": 0.06824617981910705, "grad_norm": 0.7515895962715149, "learning_rate": 5.837753515393607e-06, "epoch": 0.8910827411004277, "step": 27710 }, { "loss": 0.05768151879310608, "grad_norm": 0.399549663066864, "learning_rate": 5.80367980484704e-06, "epoch": 0.8914043155288291, "step": 27720 }, { "loss": 0.07902880907058715, "grad_norm": 0.8869091868400574, "learning_rate": 5.769702853957226e-06, "epoch": 0.8917258899572306, "step": 27730 }, { "loss": 0.06725460886955262, "grad_norm": 0.35859981179237366, "learning_rate": 5.73582269762567e-06, "epoch": 0.8920474643856321, "step": 27740 }, { "loss": 0.06240668296813965, "grad_norm": 0.5596455335617065, "learning_rate": 5.702039370654522e-06, "epoch": 0.8923690388140335, "step": 27750 }, { "loss": 0.06892146468162537, "grad_norm": 0.7789075970649719, "learning_rate": 5.668352907746433e-06, "epoch": 0.8926906132424349, "step": 27760 }, { "loss": 0.06604178547859192, "grad_norm": 0.256012499332428, "learning_rate": 5.634763343504546e-06, "epoch": 0.8930121876708365, "step": 27770 }, { "loss": 0.07076762318611145, "grad_norm": 0.9413619637489319, "learning_rate": 5.601270712432538e-06, "epoch": 0.8933337620992379, "step": 27780 }, { "loss": 0.0627564549446106, "grad_norm": 0.7740989923477173, "learning_rate": 5.567875048934401e-06, "epoch": 0.8936553365276393, "step": 27790 }, { "loss": 0.08443350791931152, "grad_norm": 0.8512759804725647, "learning_rate": 5.534576387314649e-06, "epoch": 0.8939769109560408, "step": 27800 }, { "loss": 0.061937087774276735, "grad_norm": 0.7870593667030334, "learning_rate": 5.5013747617780554e-06, "epoch": 0.8942984853844422, "step": 27810 }, { "loss": 0.05405592918395996, "grad_norm": 0.5975480079650879, "learning_rate": 5.4682702064297485e-06, "epoch": 0.8946200598128436, "step": 27820 }, { "loss": 0.06346514225006103, "grad_norm": 0.6930370330810547, "learning_rate": 5.435262755275172e-06, "epoch": 0.8949416342412452, "step": 27830 }, { "loss": 0.07561540603637695, "grad_norm": 1.0342068672180176, "learning_rate": 5.402352442219971e-06, "epoch": 0.8952632086696466, "step": 27840 }, { "loss": 0.08634264469146728, "grad_norm": 0.8079270720481873, "learning_rate": 5.369539301070059e-06, "epoch": 0.895584783098048, "step": 27850 }, { "loss": 0.07511700391769409, "grad_norm": 0.8758837580680847, "learning_rate": 5.336823365531507e-06, "epoch": 0.8959063575264495, "step": 27860 }, { "loss": 0.06092031598091126, "grad_norm": 0.5970578789710999, "learning_rate": 5.3042046692105214e-06, "epoch": 0.896227931954851, "step": 27870 }, { "loss": 0.08482131361961365, "grad_norm": 1.5843493938446045, "learning_rate": 5.271683245613468e-06, "epoch": 0.8965495063832524, "step": 27880 }, { "loss": 0.06481119394302368, "grad_norm": 0.6575544476509094, "learning_rate": 5.239259128146745e-06, "epoch": 0.8968710808116539, "step": 27890 }, { "loss": 0.07339617013931274, "grad_norm": 0.574725329875946, "learning_rate": 5.206932350116845e-06, "epoch": 0.8971926552400553, "step": 27900 }, { "loss": 0.06229846477508545, "grad_norm": 0.4941605031490326, "learning_rate": 5.174702944730203e-06, "epoch": 0.8975142296684567, "step": 27910 }, { "loss": 0.07401782870292664, "grad_norm": 0.7536153197288513, "learning_rate": 5.142570945093305e-06, "epoch": 0.8978358040968583, "step": 27920 }, { "loss": 0.06073766350746155, "grad_norm": 0.7745930552482605, "learning_rate": 5.11053638421255e-06, "epoch": 0.8981573785252597, "step": 27930 }, { "loss": 0.055574798583984376, "grad_norm": 0.6120657920837402, "learning_rate": 5.078599294994224e-06, "epoch": 0.8984789529536611, "step": 27940 }, { "loss": 0.07941893339157105, "grad_norm": 0.3994508683681488, "learning_rate": 5.046759710244542e-06, "epoch": 0.8988005273820626, "step": 27950 }, { "loss": 0.06143311858177185, "grad_norm": 0.42189642786979675, "learning_rate": 5.015017662669497e-06, "epoch": 0.899122101810464, "step": 27960 }, { "loss": 0.06756633520126343, "grad_norm": 0.48182159662246704, "learning_rate": 4.983373184874951e-06, "epoch": 0.8994436762388655, "step": 27970 }, { "eval_loss": 0.06576023995876312, "eval_runtime": 34.5615, "eval_samples_per_second": 145.422, "eval_steps_per_second": 36.37, "epoch": 0.8995079911245458, "step": 27972 }, { "loss": 0.08180526494979859, "grad_norm": 0.5850648283958435, "learning_rate": 4.951826309366481e-06, "epoch": 0.899765250667267, "step": 27980 }, { "loss": 0.07834064364433288, "grad_norm": 0.5346769690513611, "learning_rate": 4.920377068549475e-06, "epoch": 0.9000868250956684, "step": 27990 }, { "loss": 0.07173922061920165, "grad_norm": 0.5718779563903809, "learning_rate": 4.8890254947289714e-06, "epoch": 0.9004083995240698, "step": 28000 }, { "loss": 0.05986430048942566, "grad_norm": 0.8651319742202759, "learning_rate": 4.857771620109708e-06, "epoch": 0.9007299739524713, "step": 28010 }, { "loss": 0.08395033478736877, "grad_norm": 0.5755200982093811, "learning_rate": 4.826615476796071e-06, "epoch": 0.9010515483808728, "step": 28020 }, { "loss": 0.07701780796051025, "grad_norm": 0.5145189762115479, "learning_rate": 4.7955570967920475e-06, "epoch": 0.9013731228092742, "step": 28030 }, { "loss": 0.07752405405044556, "grad_norm": 0.40484297275543213, "learning_rate": 4.764596512001162e-06, "epoch": 0.9016946972376757, "step": 28040 }, { "loss": 0.07332845330238343, "grad_norm": 0.3679267168045044, "learning_rate": 4.7337337542265656e-06, "epoch": 0.9020162716660771, "step": 28050 }, { "loss": 0.06816344857215881, "grad_norm": 0.4973434805870056, "learning_rate": 4.702968855170864e-06, "epoch": 0.9023378460944785, "step": 28060 }, { "loss": 0.0697591781616211, "grad_norm": 0.9307938814163208, "learning_rate": 4.672301846436155e-06, "epoch": 0.9026594205228801, "step": 28070 }, { "loss": 0.06903193593025207, "grad_norm": 0.707920253276825, "learning_rate": 4.641732759523964e-06, "epoch": 0.9029809949512815, "step": 28080 }, { "loss": 0.08139731884002685, "grad_norm": 0.4234994053840637, "learning_rate": 4.61126162583525e-06, "epoch": 0.9033025693796829, "step": 28090 }, { "loss": 0.09809705018997192, "grad_norm": 0.8795967102050781, "learning_rate": 4.580888476670353e-06, "epoch": 0.9036241438080844, "step": 28100 }, { "loss": 0.05017074942588806, "grad_norm": 0.7633052468299866, "learning_rate": 4.55061334322896e-06, "epoch": 0.9039457182364858, "step": 28110 }, { "loss": 0.06319268345832825, "grad_norm": 0.5459538102149963, "learning_rate": 4.520436256610072e-06, "epoch": 0.9042672926648873, "step": 28120 }, { "loss": 0.06305754780769349, "grad_norm": 0.1714586466550827, "learning_rate": 4.4903572478119826e-06, "epoch": 0.9045888670932888, "step": 28130 }, { "loss": 0.07959019541740417, "grad_norm": 0.6232452392578125, "learning_rate": 4.460376347732198e-06, "epoch": 0.9049104415216902, "step": 28140 }, { "loss": 0.06855315566062928, "grad_norm": 0.5981919169425964, "learning_rate": 4.430493587167528e-06, "epoch": 0.9052320159500916, "step": 28150 }, { "loss": 0.08559853434562684, "grad_norm": 0.8783455491065979, "learning_rate": 4.400708996813884e-06, "epoch": 0.9055535903784931, "step": 28160 }, { "loss": 0.07695115208625794, "grad_norm": 0.3367885947227478, "learning_rate": 4.371022607266395e-06, "epoch": 0.9058751648068946, "step": 28170 }, { "loss": 0.05580394268035889, "grad_norm": 0.482756644487381, "learning_rate": 4.34143444901931e-06, "epoch": 0.906196739235296, "step": 28180 }, { "loss": 0.06382061243057251, "grad_norm": 0.2901076078414917, "learning_rate": 4.31194455246593e-06, "epoch": 0.9065183136636975, "step": 28190 }, { "loss": 0.06710493564605713, "grad_norm": 0.6091313362121582, "learning_rate": 4.282552947898677e-06, "epoch": 0.9068398880920989, "step": 28200 }, { "loss": 0.05544446706771851, "grad_norm": 0.4454199969768524, "learning_rate": 4.253259665508958e-06, "epoch": 0.9071614625205003, "step": 28210 }, { "loss": 0.07407185435295105, "grad_norm": 0.5129989385604858, "learning_rate": 4.224064735387234e-06, "epoch": 0.9074830369489019, "step": 28220 }, { "loss": 0.07657233476638795, "grad_norm": 0.5245542526245117, "learning_rate": 4.194968187522874e-06, "epoch": 0.9078046113773033, "step": 28230 }, { "loss": 0.0796772301197052, "grad_norm": 0.641661524772644, "learning_rate": 4.165970051804235e-06, "epoch": 0.9081261858057047, "step": 28240 }, { "loss": 0.06403705477714539, "grad_norm": 0.3376065790653229, "learning_rate": 4.137070358018602e-06, "epoch": 0.9084477602341062, "step": 28250 }, { "loss": 0.06514493227005005, "grad_norm": 0.3697962760925293, "learning_rate": 4.1082691358520496e-06, "epoch": 0.9087693346625076, "step": 28260 }, { "loss": 0.06303318738937377, "grad_norm": 0.6878823637962341, "learning_rate": 4.079566414889613e-06, "epoch": 0.9090909090909091, "step": 28270 }, { "loss": 0.06593666672706604, "grad_norm": 0.5542412400245667, "learning_rate": 4.050962224615051e-06, "epoch": 0.9094124835193106, "step": 28280 }, { "loss": 0.08710123896598816, "grad_norm": 0.7127916216850281, "learning_rate": 4.022456594410984e-06, "epoch": 0.909734057947712, "step": 28290 }, { "loss": 0.06951413154602051, "grad_norm": 0.7896924018859863, "learning_rate": 3.994049553558732e-06, "epoch": 0.9100556323761134, "step": 28300 }, { "loss": 0.08014414310455323, "grad_norm": 0.8470936417579651, "learning_rate": 3.9657411312383765e-06, "epoch": 0.910377206804515, "step": 28310 }, { "loss": 0.06745404601097107, "grad_norm": 0.4270573556423187, "learning_rate": 3.93753135652869e-06, "epoch": 0.9106987812329164, "step": 28320 }, { "loss": 0.07204972505569458, "grad_norm": 0.8026243448257446, "learning_rate": 3.909420258407126e-06, "epoch": 0.9110203556613178, "step": 28330 }, { "loss": 0.09062637090682983, "grad_norm": 0.9312450289726257, "learning_rate": 3.881407865749731e-06, "epoch": 0.9113419300897193, "step": 28340 }, { "loss": 0.06378825306892395, "grad_norm": 0.7994136214256287, "learning_rate": 3.853494207331221e-06, "epoch": 0.9116635045181207, "step": 28350 }, { "loss": 0.06912797093391418, "grad_norm": 0.7310376167297363, "learning_rate": 3.825679311824815e-06, "epoch": 0.9119850789465221, "step": 28360 }, { "loss": 0.05452196598052979, "grad_norm": 0.4820187985897064, "learning_rate": 3.797963207802391e-06, "epoch": 0.9123066533749237, "step": 28370 }, { "loss": 0.07047023177146912, "grad_norm": 0.451716810464859, "learning_rate": 3.770345923734242e-06, "epoch": 0.9126282278033251, "step": 28380 }, { "loss": 0.07474204897880554, "grad_norm": 0.7766392827033997, "learning_rate": 3.7428274879891865e-06, "epoch": 0.9129498022317265, "step": 28390 }, { "loss": 0.06405274868011475, "grad_norm": 0.5303685665130615, "learning_rate": 3.715407928834547e-06, "epoch": 0.913271376660128, "step": 28400 }, { "loss": 0.07459316849708557, "grad_norm": 0.33874908089637756, "learning_rate": 3.688087274436014e-06, "epoch": 0.9135929510885294, "step": 28410 }, { "loss": 0.09029234647750854, "grad_norm": 0.7368858456611633, "learning_rate": 3.6608655528577064e-06, "epoch": 0.9139145255169309, "step": 28420 }, { "loss": 0.0907670497894287, "grad_norm": 1.2996673583984375, "learning_rate": 3.633742792062156e-06, "epoch": 0.9142360999453324, "step": 28430 }, { "loss": 0.06846472024917602, "grad_norm": 0.47941938042640686, "learning_rate": 3.6067190199101765e-06, "epoch": 0.9145576743737338, "step": 28440 }, { "loss": 0.07348551750183105, "grad_norm": 0.6636233925819397, "learning_rate": 3.5797942641609405e-06, "epoch": 0.9148792488021352, "step": 28450 }, { "loss": 0.08235877752304077, "grad_norm": 0.9041732549667358, "learning_rate": 3.5529685524718913e-06, "epoch": 0.9152008232305368, "step": 28460 }, { "loss": 0.07450997829437256, "grad_norm": 0.520968496799469, "learning_rate": 3.5262419123987643e-06, "epoch": 0.9155223976589382, "step": 28470 }, { "loss": 0.05997384190559387, "grad_norm": 0.3654121458530426, "learning_rate": 3.4996143713954653e-06, "epoch": 0.9158439720873396, "step": 28480 }, { "loss": 0.07119588851928711, "grad_norm": 0.5396535396575928, "learning_rate": 3.4730859568141703e-06, "epoch": 0.9161655465157411, "step": 28490 }, { "loss": 0.061024290323257444, "grad_norm": 0.5486513376235962, "learning_rate": 3.446656695905215e-06, "epoch": 0.9164871209441425, "step": 28500 }, { "loss": 0.06300418376922608, "grad_norm": 0.5239927768707275, "learning_rate": 3.4203266158170376e-06, "epoch": 0.916808695372544, "step": 28510 }, { "loss": 0.07844939231872558, "grad_norm": 0.7887962460517883, "learning_rate": 3.3940957435962597e-06, "epoch": 0.9171302698009455, "step": 28520 }, { "loss": 0.06970057487487794, "grad_norm": 0.6157139539718628, "learning_rate": 3.367964106187549e-06, "epoch": 0.9174518442293469, "step": 28530 }, { "loss": 0.06430141925811768, "grad_norm": 0.40715357661247253, "learning_rate": 3.341931730433667e-06, "epoch": 0.9177734186577483, "step": 28540 }, { "loss": 0.07139861583709717, "grad_norm": 0.35261547565460205, "learning_rate": 3.31599864307538e-06, "epoch": 0.9180949930861498, "step": 28550 }, { "loss": 0.07542607784271241, "grad_norm": 0.9486578106880188, "learning_rate": 3.290164870751511e-06, "epoch": 0.9184165675145513, "step": 28560 }, { "loss": 0.0655196487903595, "grad_norm": 0.69345623254776, "learning_rate": 3.2644304399988225e-06, "epoch": 0.9187381419429527, "step": 28570 }, { "loss": 0.08214229941368104, "grad_norm": 0.9937447905540466, "learning_rate": 3.2387953772520574e-06, "epoch": 0.9190597163713542, "step": 28580 }, { "loss": 0.08389940857887268, "grad_norm": 0.32834765315055847, "learning_rate": 3.2132597088438855e-06, "epoch": 0.9193812907997556, "step": 28590 }, { "loss": 0.0574823260307312, "grad_norm": 0.302408903837204, "learning_rate": 3.1878234610048464e-06, "epoch": 0.919702865228157, "step": 28600 }, { "loss": 0.06297532320022584, "grad_norm": 0.7567213177680969, "learning_rate": 3.162486659863373e-06, "epoch": 0.9200244396565586, "step": 28610 }, { "loss": 0.06948093771934509, "grad_norm": 0.39594024419784546, "learning_rate": 3.1372493314457906e-06, "epoch": 0.92034601408496, "step": 28620 }, { "loss": 0.06656071543693542, "grad_norm": 0.7741539478302002, "learning_rate": 3.1121115016761736e-06, "epoch": 0.9206675885133614, "step": 28630 }, { "loss": 0.0799754023551941, "grad_norm": 0.567747175693512, "learning_rate": 3.087073196376422e-06, "epoch": 0.9209891629417629, "step": 28640 }, { "loss": 0.05742677450180054, "grad_norm": 0.4938858449459076, "learning_rate": 3.062134441266218e-06, "epoch": 0.9213107373701643, "step": 28650 }, { "loss": 0.09684234261512756, "grad_norm": 0.6577500700950623, "learning_rate": 3.0372952619629356e-06, "epoch": 0.9216323117985658, "step": 28660 }, { "loss": 0.08077757954597473, "grad_norm": 0.9018112421035767, "learning_rate": 3.0125556839817214e-06, "epoch": 0.9219538862269673, "step": 28670 }, { "loss": 0.07379125952720642, "grad_norm": 0.6212087273597717, "learning_rate": 2.987915732735358e-06, "epoch": 0.9222754606553687, "step": 28680 }, { "loss": 0.06398572921752929, "grad_norm": 0.5765277147293091, "learning_rate": 2.9633754335343656e-06, "epoch": 0.9225970350837701, "step": 28690 }, { "loss": 0.05859953761100769, "grad_norm": 0.7695963978767395, "learning_rate": 2.938934811586802e-06, "epoch": 0.9229186095121716, "step": 28700 }, { "loss": 0.07643457055091858, "grad_norm": 0.7159215807914734, "learning_rate": 2.9145938919984075e-06, "epoch": 0.9232401839405731, "step": 28710 }, { "loss": 0.0891688585281372, "grad_norm": 0.9027368426322937, "learning_rate": 2.8903526997724804e-06, "epoch": 0.9235617583689745, "step": 28720 }, { "loss": 0.05892614722251892, "grad_norm": 0.478014200925827, "learning_rate": 2.8662112598098813e-06, "epoch": 0.923883332797376, "step": 28730 }, { "loss": 0.0703760802745819, "grad_norm": 0.38507896661758423, "learning_rate": 2.8421695969089946e-06, "epoch": 0.9242049072257774, "step": 28740 }, { "loss": 0.06641483306884766, "grad_norm": 1.0187102556228638, "learning_rate": 2.818227735765733e-06, "epoch": 0.9245264816541788, "step": 28750 }, { "loss": 0.08073602914810181, "grad_norm": 0.4585058093070984, "learning_rate": 2.7943857009734785e-06, "epoch": 0.9248480560825804, "step": 28760 }, { "loss": 0.07679535746574402, "grad_norm": 0.5923634171485901, "learning_rate": 2.770643517023086e-06, "epoch": 0.9251696305109818, "step": 28770 }, { "loss": 0.0702307403087616, "grad_norm": 0.8317908644676208, "learning_rate": 2.747001208302791e-06, "epoch": 0.9254912049393832, "step": 28780 }, { "loss": 0.06400009393692016, "grad_norm": 0.3549131751060486, "learning_rate": 2.723458799098311e-06, "epoch": 0.9258127793677847, "step": 28790 }, { "loss": 0.058918613195419314, "grad_norm": 0.38816264271736145, "learning_rate": 2.7000163135926793e-06, "epoch": 0.9261343537961861, "step": 28800 }, { "loss": 0.07531256675720215, "grad_norm": 0.44148746132850647, "learning_rate": 2.676673775866334e-06, "epoch": 0.9264559282245876, "step": 28810 }, { "loss": 0.07543994188308716, "grad_norm": 0.4504343569278717, "learning_rate": 2.653431209897028e-06, "epoch": 0.9267775026529891, "step": 28820 }, { "loss": 0.06393010020256043, "grad_norm": 0.5456916689872742, "learning_rate": 2.630288639559808e-06, "epoch": 0.9270990770813905, "step": 28830 }, { "loss": 0.06197878122329712, "grad_norm": 0.26009121537208557, "learning_rate": 2.6072460886270243e-06, "epoch": 0.9274206515097919, "step": 28840 }, { "loss": 0.06824758648872375, "grad_norm": 0.45600271224975586, "learning_rate": 2.584303580768266e-06, "epoch": 0.9277422259381934, "step": 28850 }, { "loss": 0.06901856660842895, "grad_norm": 0.6517654657363892, "learning_rate": 2.561461139550392e-06, "epoch": 0.9280638003665949, "step": 28860 }, { "loss": 0.06693161725997925, "grad_norm": 0.7441944479942322, "learning_rate": 2.538718788437422e-06, "epoch": 0.9283853747949963, "step": 28870 }, { "loss": 0.08583144545555114, "grad_norm": 0.8883978724479675, "learning_rate": 2.5160765507905915e-06, "epoch": 0.9287069492233978, "step": 28880 }, { "loss": 0.06471338272094726, "grad_norm": 0.5437493920326233, "learning_rate": 2.493534449868318e-06, "epoch": 0.9290285236517992, "step": 28890 }, { "loss": 0.07316511273384094, "grad_norm": 0.5847581028938293, "learning_rate": 2.4710925088261115e-06, "epoch": 0.9293500980802006, "step": 28900 }, { "loss": 0.053254157304763794, "grad_norm": 0.49556058645248413, "learning_rate": 2.448750750716633e-06, "epoch": 0.9296716725086022, "step": 28910 }, { "loss": 0.05694871544837952, "grad_norm": 0.47599726915359497, "learning_rate": 2.4265091984896237e-06, "epoch": 0.9299932469370036, "step": 28920 }, { "loss": 0.06631427407264709, "grad_norm": 0.9774466753005981, "learning_rate": 2.404367874991842e-06, "epoch": 0.930314821365405, "step": 28930 }, { "loss": 0.05324622988700867, "grad_norm": 0.4933931827545166, "learning_rate": 2.3823268029672054e-06, "epoch": 0.9306363957938065, "step": 28940 }, { "loss": 0.06808727383613586, "grad_norm": 0.4201553165912628, "learning_rate": 2.3603860050565473e-06, "epoch": 0.9309579702222079, "step": 28950 }, { "loss": 0.07126197814941407, "grad_norm": 0.49235799908638, "learning_rate": 2.3385455037977176e-06, "epoch": 0.9312795446506094, "step": 28960 }, { "loss": 0.07900972366333008, "grad_norm": 0.6312693357467651, "learning_rate": 2.3168053216255924e-06, "epoch": 0.9316011190790109, "step": 28970 }, { "loss": 0.06725811958312988, "grad_norm": 0.5075303316116333, "learning_rate": 2.2951654808719414e-06, "epoch": 0.9319226935074123, "step": 28980 }, { "loss": 0.06910203099250793, "grad_norm": 0.4444521367549896, "learning_rate": 2.2736260037654943e-06, "epoch": 0.9322442679358137, "step": 28990 }, { "loss": 0.08351538777351379, "grad_norm": 0.839489758014679, "learning_rate": 2.252186912431864e-06, "epoch": 0.9325658423642152, "step": 29000 }, { "loss": 0.07000740766525268, "grad_norm": 0.7335992455482483, "learning_rate": 2.230848228893567e-06, "epoch": 0.9328874167926167, "step": 29010 }, { "loss": 0.05521320104598999, "grad_norm": 0.4982221722602844, "learning_rate": 2.2096099750699707e-06, "epoch": 0.9332089912210181, "step": 29020 }, { "loss": 0.07880055904388428, "grad_norm": 0.947349488735199, "learning_rate": 2.1884721727772672e-06, "epoch": 0.9335305656494196, "step": 29030 }, { "loss": 0.07375386357307434, "grad_norm": 0.7958569526672363, "learning_rate": 2.1674348437284997e-06, "epoch": 0.933852140077821, "step": 29040 }, { "loss": 0.06184510588645935, "grad_norm": 0.5542522668838501, "learning_rate": 2.1464980095334486e-06, "epoch": 0.9341737145062224, "step": 29050 }, { "loss": 0.07052304148674012, "grad_norm": 0.7163567543029785, "learning_rate": 2.1256616916987216e-06, "epoch": 0.934495288934624, "step": 29060 }, { "loss": 0.06895366907119752, "grad_norm": 0.6882383227348328, "learning_rate": 2.104925911627631e-06, "epoch": 0.9348168633630254, "step": 29070 }, { "loss": 0.07813504934310914, "grad_norm": 0.6083703637123108, "learning_rate": 2.0842906906202496e-06, "epoch": 0.9351384377914268, "step": 29080 }, { "loss": 0.06795021295547485, "grad_norm": 0.7039796710014343, "learning_rate": 2.063756049873333e-06, "epoch": 0.9354600122198283, "step": 29090 }, { "loss": 0.07217402458190918, "grad_norm": 0.7208359837532043, "learning_rate": 2.0433220104803088e-06, "epoch": 0.9357815866482297, "step": 29100 }, { "loss": 0.0501723051071167, "grad_norm": 0.7383726239204407, "learning_rate": 2.0229885934313074e-06, "epoch": 0.9361031610766312, "step": 29110 }, { "loss": 0.07170373201370239, "grad_norm": 0.7307721376419067, "learning_rate": 2.002755819613056e-06, "epoch": 0.9364247355050327, "step": 29120 }, { "loss": 0.07169081568717957, "grad_norm": 0.4697757065296173, "learning_rate": 1.982623709808917e-06, "epoch": 0.9367463099334341, "step": 29130 }, { "loss": 0.05431884527206421, "grad_norm": 0.6256595253944397, "learning_rate": 1.962592284698872e-06, "epoch": 0.9370678843618355, "step": 29140 }, { "loss": 0.06069269776344299, "grad_norm": 0.49124467372894287, "learning_rate": 1.942661564859427e-06, "epoch": 0.937389458790237, "step": 29150 }, { "loss": 0.05852615833282471, "grad_norm": 0.8317437767982483, "learning_rate": 1.9228315707636836e-06, "epoch": 0.9377110332186385, "step": 29160 }, { "loss": 0.0563937246799469, "grad_norm": 0.5100141763687134, "learning_rate": 1.9031023227812695e-06, "epoch": 0.9380326076470399, "step": 29170 }, { "loss": 0.07683940529823304, "grad_norm": 0.6290521621704102, "learning_rate": 1.883473841178296e-06, "epoch": 0.9383541820754414, "step": 29180 }, { "loss": 0.08868919610977173, "grad_norm": 0.6542617082595825, "learning_rate": 1.8639461461174013e-06, "epoch": 0.9386757565038428, "step": 29190 }, { "loss": 0.07720979452133178, "grad_norm": 0.48346221446990967, "learning_rate": 1.8445192576576952e-06, "epoch": 0.9389973309322442, "step": 29200 }, { "loss": 0.08283687829971313, "grad_norm": 0.9130428433418274, "learning_rate": 1.8251931957547154e-06, "epoch": 0.9393189053606458, "step": 29210 }, { "loss": 0.06735263466835022, "grad_norm": 0.24367094039916992, "learning_rate": 1.8059679802604368e-06, "epoch": 0.9396404797890472, "step": 29220 }, { "loss": 0.0744469404220581, "grad_norm": 0.5839953422546387, "learning_rate": 1.7868436309232296e-06, "epoch": 0.9399620542174486, "step": 29230 }, { "loss": 0.06536500453948975, "grad_norm": 0.5514238476753235, "learning_rate": 1.7678201673879013e-06, "epoch": 0.9402836286458501, "step": 29240 }, { "loss": 0.058148926496505736, "grad_norm": 0.4055790603160858, "learning_rate": 1.7488976091955545e-06, "epoch": 0.9406052030742516, "step": 29250 }, { "loss": 0.07389817237854004, "grad_norm": 0.4462706446647644, "learning_rate": 1.7300759757837182e-06, "epoch": 0.940926777502653, "step": 29260 }, { "loss": 0.08689826726913452, "grad_norm": 0.8113035559654236, "learning_rate": 1.7113552864862048e-06, "epoch": 0.9412483519310545, "step": 29270 }, { "loss": 0.049148404598236085, "grad_norm": 0.3998713493347168, "learning_rate": 1.6927355605331318e-06, "epoch": 0.9415699263594559, "step": 29280 }, { "loss": 0.06236416697502136, "grad_norm": 0.6018595099449158, "learning_rate": 1.674216817050933e-06, "epoch": 0.9418915007878573, "step": 29290 }, { "loss": 0.06123303771018982, "grad_norm": 0.4179343283176422, "learning_rate": 1.6557990750622699e-06, "epoch": 0.9422130752162589, "step": 29300 }, { "loss": 0.058761775493621826, "grad_norm": 0.5038360357284546, "learning_rate": 1.6374823534861084e-06, "epoch": 0.9425346496446603, "step": 29310 }, { "loss": 0.06190430521965027, "grad_norm": 0.3785480558872223, "learning_rate": 1.6192666711376204e-06, "epoch": 0.9428562240730617, "step": 29320 }, { "loss": 0.07154954075813294, "grad_norm": 0.6645725965499878, "learning_rate": 1.6011520467281605e-06, "epoch": 0.9431777985014632, "step": 29330 }, { "loss": 0.08225449323654174, "grad_norm": 0.5627620816230774, "learning_rate": 1.5831384988653331e-06, "epoch": 0.9434993729298646, "step": 29340 }, { "loss": 0.07556136846542358, "grad_norm": 0.3942989408969879, "learning_rate": 1.5652260460528478e-06, "epoch": 0.943820947358266, "step": 29350 }, { "loss": 0.06251301765441894, "grad_norm": 0.4809018075466156, "learning_rate": 1.5474147066906308e-06, "epoch": 0.9441425217866676, "step": 29360 }, { "loss": 0.06617502570152282, "grad_norm": 0.40315186977386475, "learning_rate": 1.5297044990747022e-06, "epoch": 0.944464096215069, "step": 29370 }, { "loss": 0.07782045006752014, "grad_norm": 0.8132786154747009, "learning_rate": 1.512095441397221e-06, "epoch": 0.9447856706434704, "step": 29380 }, { "loss": 0.07609405517578124, "grad_norm": 0.42774105072021484, "learning_rate": 1.4945875517464293e-06, "epoch": 0.9451072450718719, "step": 29390 }, { "loss": 0.05179577469825745, "grad_norm": 0.5319843292236328, "learning_rate": 1.4771808481066518e-06, "epoch": 0.9454288195002734, "step": 29400 }, { "loss": 0.07737534046173096, "grad_norm": 0.7647845149040222, "learning_rate": 1.4598753483582972e-06, "epoch": 0.9457503939286748, "step": 29410 }, { "loss": 0.05944972634315491, "grad_norm": 0.4028695523738861, "learning_rate": 1.4426710702777568e-06, "epoch": 0.9460719683570763, "step": 29420 }, { "loss": 0.07734155058860778, "grad_norm": 0.9073440432548523, "learning_rate": 1.4255680315375164e-06, "epoch": 0.9463935427854777, "step": 29430 }, { "loss": 0.06692083477973938, "grad_norm": 0.7518401741981506, "learning_rate": 1.4085662497060115e-06, "epoch": 0.9467151172138791, "step": 29440 }, { "loss": 0.0926957905292511, "grad_norm": 0.5793595910072327, "learning_rate": 1.3916657422476942e-06, "epoch": 0.9470366916422807, "step": 29450 }, { "loss": 0.07510214447975158, "grad_norm": 0.776908278465271, "learning_rate": 1.374866526523e-06, "epoch": 0.9473582660706821, "step": 29460 }, { "loss": 0.07716012001037598, "grad_norm": 0.8807123899459839, "learning_rate": 1.3581686197882694e-06, "epoch": 0.9476798404990835, "step": 29470 }, { "loss": 0.06747672557830811, "grad_norm": 0.4527372419834137, "learning_rate": 1.3415720391958265e-06, "epoch": 0.948001414927485, "step": 29480 }, { "loss": 0.08032204508781433, "grad_norm": 0.7324082851409912, "learning_rate": 1.3250768017938787e-06, "epoch": 0.9483229893558864, "step": 29490 }, { "loss": 0.06460022926330566, "grad_norm": 0.6987818479537964, "learning_rate": 1.3086829245265387e-06, "epoch": 0.9486445637842879, "step": 29500 }, { "loss": 0.06491804122924805, "grad_norm": 0.7714876532554626, "learning_rate": 1.2923904242338136e-06, "epoch": 0.9489661382126894, "step": 29510 }, { "loss": 0.07516576647758484, "grad_norm": 0.6882345080375671, "learning_rate": 1.2761993176515607e-06, "epoch": 0.9492877126410908, "step": 29520 }, { "eval_loss": 0.06555379927158356, "eval_runtime": 34.9932, "eval_samples_per_second": 143.628, "eval_steps_per_second": 35.921, "epoch": 0.9494806572981317, "step": 29526 }, { "loss": 0.08071179986000061, "grad_norm": 1.0186065435409546, "learning_rate": 1.2601096214114982e-06, "epoch": 0.9496092870694922, "step": 29530 }, { "loss": 0.07067824602127075, "grad_norm": 0.6346520781517029, "learning_rate": 1.2441213520411722e-06, "epoch": 0.9499308614978937, "step": 29540 }, { "loss": 0.08398991823196411, "grad_norm": 0.7575256824493408, "learning_rate": 1.2282345259639116e-06, "epoch": 0.9502524359262952, "step": 29550 }, { "loss": 0.06456127166748046, "grad_norm": 0.5739431977272034, "learning_rate": 1.2124491594988852e-06, "epoch": 0.9505740103546966, "step": 29560 }, { "loss": 0.07740659713745117, "grad_norm": 0.42999500036239624, "learning_rate": 1.1967652688610108e-06, "epoch": 0.9508955847830981, "step": 29570 }, { "loss": 0.07819628715515137, "grad_norm": 0.38845735788345337, "learning_rate": 1.1811828701609906e-06, "epoch": 0.9512171592114995, "step": 29580 }, { "loss": 0.0833408534526825, "grad_norm": 1.2384730577468872, "learning_rate": 1.1657019794052648e-06, "epoch": 0.9515387336399009, "step": 29590 }, { "loss": 0.07756343483924866, "grad_norm": 0.5256941914558411, "learning_rate": 1.150322612495991e-06, "epoch": 0.9518603080683025, "step": 29600 }, { "loss": 0.069635409116745, "grad_norm": 0.5696307420730591, "learning_rate": 1.1350447852310541e-06, "epoch": 0.9521818824967039, "step": 29610 }, { "loss": 0.06680102348327636, "grad_norm": 0.6343799829483032, "learning_rate": 1.1198685133040343e-06, "epoch": 0.9525034569251053, "step": 29620 }, { "loss": 0.060926932096481326, "grad_norm": 0.2505302131175995, "learning_rate": 1.1047938123041835e-06, "epoch": 0.9528250313535068, "step": 29630 }, { "loss": 0.05705304145812988, "grad_norm": 0.28949621319770813, "learning_rate": 1.0898206977164371e-06, "epoch": 0.9531466057819082, "step": 29640 }, { "loss": 0.05276017785072327, "grad_norm": 0.5729100704193115, "learning_rate": 1.0749491849213588e-06, "epoch": 0.9534681802103097, "step": 29650 }, { "loss": 0.07277541160583496, "grad_norm": 0.47938433289527893, "learning_rate": 1.0601792891951623e-06, "epoch": 0.9537897546387112, "step": 29660 }, { "loss": 0.0780954122543335, "grad_norm": 0.9656330943107605, "learning_rate": 1.0455110257096667e-06, "epoch": 0.9541113290671126, "step": 29670 }, { "loss": 0.08907517194747924, "grad_norm": 0.9449646472930908, "learning_rate": 1.0309444095323083e-06, "epoch": 0.954432903495514, "step": 29680 }, { "loss": 0.06952834129333496, "grad_norm": 0.4525853991508484, "learning_rate": 1.0164794556260849e-06, "epoch": 0.9547544779239155, "step": 29690 }, { "loss": 0.0694973349571228, "grad_norm": 0.48509544134140015, "learning_rate": 1.0021161788495882e-06, "epoch": 0.955076052352317, "step": 29700 }, { "loss": 0.08033158779144287, "grad_norm": 1.0671344995498657, "learning_rate": 9.878545939569717e-07, "epoch": 0.9553976267807184, "step": 29710 }, { "loss": 0.06861055493354798, "grad_norm": 0.259791761636734, "learning_rate": 9.736947155978948e-07, "epoch": 0.9557192012091199, "step": 29720 }, { "loss": 0.06976649165153503, "grad_norm": 0.44905713200569153, "learning_rate": 9.596365583175782e-07, "epoch": 0.9560407756375213, "step": 29730 }, { "loss": 0.058680230379104616, "grad_norm": 0.6654853224754333, "learning_rate": 9.456801365567258e-07, "epoch": 0.9563623500659227, "step": 29740 }, { "loss": 0.0554412305355072, "grad_norm": 3.4719386100769043, "learning_rate": 9.318254646515811e-07, "epoch": 0.9566839244943242, "step": 29750 }, { "loss": 0.08116706609725952, "grad_norm": 0.8351684212684631, "learning_rate": 9.180725568338044e-07, "epoch": 0.9570054989227257, "step": 29760 }, { "loss": 0.055116087198257446, "grad_norm": 1.078339695930481, "learning_rate": 9.044214272305728e-07, "epoch": 0.9573270733511271, "step": 29770 }, { "loss": 0.06081744432449341, "grad_norm": 0.408652663230896, "learning_rate": 8.908720898645029e-07, "epoch": 0.9576486477795285, "step": 29780 }, { "loss": 0.0641602635383606, "grad_norm": 0.4921199083328247, "learning_rate": 8.774245586536389e-07, "epoch": 0.95797022220793, "step": 29790 }, { "loss": 0.07612771391868592, "grad_norm": 0.7166224122047424, "learning_rate": 8.640788474114647e-07, "epoch": 0.9582917966363315, "step": 29800 }, { "loss": 0.06867678165435791, "grad_norm": 0.6871895790100098, "learning_rate": 8.508349698468588e-07, "epoch": 0.9586133710647329, "step": 29810 }, { "loss": 0.060842263698577884, "grad_norm": 0.6564211845397949, "learning_rate": 8.376929395640831e-07, "epoch": 0.9589349454931344, "step": 29820 }, { "loss": 0.06288072466850281, "grad_norm": 0.863722026348114, "learning_rate": 8.246527700628393e-07, "epoch": 0.9592565199215358, "step": 29830 }, { "loss": 0.0851693332195282, "grad_norm": 0.7785186767578125, "learning_rate": 8.117144747381233e-07, "epoch": 0.9595780943499372, "step": 29840 }, { "loss": 0.04293628633022308, "grad_norm": 0.3203319311141968, "learning_rate": 7.988780668803153e-07, "epoch": 0.9598996687783388, "step": 29850 }, { "loss": 0.07816066145896912, "grad_norm": 0.9088801145553589, "learning_rate": 7.861435596751676e-07, "epoch": 0.9602212432067402, "step": 29860 }, { "loss": 0.06818323731422424, "grad_norm": 0.649931788444519, "learning_rate": 7.735109662036832e-07, "epoch": 0.9605428176351416, "step": 29870 }, { "loss": 0.08106167912483216, "grad_norm": 0.210426926612854, "learning_rate": 7.609802994422599e-07, "epoch": 0.9608643920635431, "step": 29880 }, { "loss": 0.06616997718811035, "grad_norm": 0.4277474880218506, "learning_rate": 7.485515722625347e-07, "epoch": 0.9611859664919445, "step": 29890 }, { "loss": 0.05833558440208435, "grad_norm": 0.4171481430530548, "learning_rate": 7.362247974314617e-07, "epoch": 0.961507540920346, "step": 29900 }, { "loss": 0.06775500178337097, "grad_norm": 0.5039193630218506, "learning_rate": 7.23999987611279e-07, "epoch": 0.9618291153487475, "step": 29910 }, { "loss": 0.06911481022834778, "grad_norm": 0.5150966048240662, "learning_rate": 7.118771553594305e-07, "epoch": 0.9621506897771489, "step": 29920 }, { "loss": 0.05216036438941955, "grad_norm": 0.5649464726448059, "learning_rate": 6.998563131286662e-07, "epoch": 0.9624722642055503, "step": 29930 }, { "loss": 0.0770330011844635, "grad_norm": 0.4386029541492462, "learning_rate": 6.87937473266953e-07, "epoch": 0.9627938386339518, "step": 29940 }, { "loss": 0.06624605655670165, "grad_norm": 0.4605594575405121, "learning_rate": 6.761206480174531e-07, "epoch": 0.9631154130623533, "step": 29950 }, { "loss": 0.06516779661178589, "grad_norm": 0.6219736337661743, "learning_rate": 6.64405849518579e-07, "epoch": 0.9634369874907547, "step": 29960 }, { "loss": 0.06415200829505921, "grad_norm": 0.317819207906723, "learning_rate": 6.527930898039047e-07, "epoch": 0.9637585619191562, "step": 29970 }, { "loss": 0.06802978515625, "grad_norm": 0.6788613200187683, "learning_rate": 6.412823808022328e-07, "epoch": 0.9640801363475576, "step": 29980 }, { "loss": 0.056847357749938966, "grad_norm": 0.5521454215049744, "learning_rate": 6.298737343374717e-07, "epoch": 0.964401710775959, "step": 29990 }, { "loss": 0.06040582060813904, "grad_norm": 0.637925386428833, "learning_rate": 6.185671621287581e-07, "epoch": 0.9647232852043606, "step": 30000 }, { "loss": 0.0742426872253418, "grad_norm": 0.5535704493522644, "learning_rate": 6.073626757903239e-07, "epoch": 0.965044859632762, "step": 30010 }, { "loss": 0.06893228888511657, "grad_norm": 0.6192355751991272, "learning_rate": 5.962602868315737e-07, "epoch": 0.9653664340611634, "step": 30020 }, { "loss": 0.06794821619987487, "grad_norm": 0.6708097457885742, "learning_rate": 5.852600066570291e-07, "epoch": 0.9656880084895649, "step": 30030 }, { "loss": 0.0759381353855133, "grad_norm": 0.4879767596721649, "learning_rate": 5.743618465662959e-07, "epoch": 0.9660095829179663, "step": 30040 }, { "loss": 0.074285888671875, "grad_norm": 0.5703880190849304, "learning_rate": 5.635658177541192e-07, "epoch": 0.9663311573463678, "step": 30050 }, { "loss": 0.06602758765220643, "grad_norm": 0.5126018524169922, "learning_rate": 5.52871931310317e-07, "epoch": 0.9666527317747693, "step": 30060 }, { "loss": 0.04887774586677551, "grad_norm": 0.6106338500976562, "learning_rate": 5.4228019821978e-07, "epoch": 0.9669743062031707, "step": 30070 }, { "loss": 0.08942516446113587, "grad_norm": 0.6533125638961792, "learning_rate": 5.317906293624719e-07, "epoch": 0.9672958806315721, "step": 30080 }, { "loss": 0.059960460662841795, "grad_norm": 0.5349124073982239, "learning_rate": 5.214032355134069e-07, "epoch": 0.9676174550599737, "step": 30090 }, { "loss": 0.06590099334716797, "grad_norm": 0.6322242021560669, "learning_rate": 5.111180273426497e-07, "epoch": 0.9679390294883751, "step": 30100 }, { "loss": 0.08278042078018188, "grad_norm": 0.5157520174980164, "learning_rate": 5.009350154153158e-07, "epoch": 0.9682606039167765, "step": 30110 }, { "loss": 0.08131775259971619, "grad_norm": 0.7335174679756165, "learning_rate": 4.908542101914937e-07, "epoch": 0.968582178345178, "step": 30120 }, { "loss": 0.05691747665405274, "grad_norm": 0.6210181713104248, "learning_rate": 4.808756220263333e-07, "epoch": 0.9689037527735794, "step": 30130 }, { "loss": 0.08510000705718994, "grad_norm": 0.43013817071914673, "learning_rate": 4.709992611699465e-07, "epoch": 0.9692253272019808, "step": 30140 }, { "loss": 0.079147469997406, "grad_norm": 0.47200024127960205, "learning_rate": 4.6122513776749586e-07, "epoch": 0.9695469016303824, "step": 30150 }, { "loss": 0.0846597671508789, "grad_norm": 1.025412678718567, "learning_rate": 4.5155326185905014e-07, "epoch": 0.9698684760587838, "step": 30160 }, { "loss": 0.06067612171173096, "grad_norm": 0.48970702290534973, "learning_rate": 4.4198364337969535e-07, "epoch": 0.9701900504871852, "step": 30170 }, { "loss": 0.05486201643943787, "grad_norm": 0.8192650675773621, "learning_rate": 4.325162921594683e-07, "epoch": 0.9705116249155867, "step": 30180 }, { "loss": 0.0711601734161377, "grad_norm": 0.49607720971107483, "learning_rate": 4.2315121792335654e-07, "epoch": 0.9708331993439882, "step": 30190 }, { "loss": 0.05947260856628418, "grad_norm": 0.5873615741729736, "learning_rate": 4.138884302912649e-07, "epoch": 0.9711547737723896, "step": 30200 }, { "loss": 0.06383942365646363, "grad_norm": 0.4902679920196533, "learning_rate": 4.0472793877806004e-07, "epoch": 0.9714763482007911, "step": 30210 }, { "loss": 0.06064661741256714, "grad_norm": 0.3690512478351593, "learning_rate": 3.9566975279350385e-07, "epoch": 0.9717979226291925, "step": 30220 }, { "loss": 0.05835795998573303, "grad_norm": 0.7086575031280518, "learning_rate": 3.867138816422977e-07, "epoch": 0.9721194970575939, "step": 30230 }, { "loss": 0.07011948823928833, "grad_norm": 0.5723183751106262, "learning_rate": 3.7786033452401616e-07, "epoch": 0.9724410714859955, "step": 30240 }, { "loss": 0.08009362816810608, "grad_norm": 0.8066279292106628, "learning_rate": 3.6910912053313983e-07, "epoch": 0.9727626459143969, "step": 30250 }, { "loss": 0.0474946528673172, "grad_norm": 0.796147346496582, "learning_rate": 3.604602486590114e-07, "epoch": 0.9730842203427983, "step": 30260 }, { "loss": 0.08515186905860901, "grad_norm": 0.6759833097457886, "learning_rate": 3.519137277858797e-07, "epoch": 0.9734057947711998, "step": 30270 }, { "loss": 0.06661058664321899, "grad_norm": 0.7066734433174133, "learning_rate": 3.434695666928445e-07, "epoch": 0.9737273691996012, "step": 30280 }, { "loss": 0.06675366759300232, "grad_norm": 0.6176568269729614, "learning_rate": 3.351277740538339e-07, "epoch": 0.9740489436280027, "step": 30290 }, { "loss": 0.07360825538635254, "grad_norm": 0.7115502953529358, "learning_rate": 3.268883584376603e-07, "epoch": 0.9743705180564042, "step": 30300 }, { "loss": 0.05721208453178406, "grad_norm": 0.8995148539543152, "learning_rate": 3.187513283079424e-07, "epoch": 0.9746920924848056, "step": 30310 }, { "loss": 0.0957691490650177, "grad_norm": 1.0181430578231812, "learning_rate": 3.1071669202314966e-07, "epoch": 0.975013666913207, "step": 30320 }, { "loss": 0.06637667417526245, "grad_norm": 0.7077873349189758, "learning_rate": 3.0278445783655794e-07, "epoch": 0.9753352413416085, "step": 30330 }, { "loss": 0.08069146275520325, "grad_norm": 0.5280202031135559, "learning_rate": 2.9495463389627166e-07, "epoch": 0.97565681577001, "step": 30340 }, { "loss": 0.06518712043762206, "grad_norm": 0.6744681000709534, "learning_rate": 2.8722722824516825e-07, "epoch": 0.9759783901984114, "step": 30350 }, { "loss": 0.07043516635894775, "grad_norm": 0.6811769604682922, "learning_rate": 2.796022488209427e-07, "epoch": 0.9762999646268129, "step": 30360 }, { "loss": 0.08094441890716553, "grad_norm": 0.4031400978565216, "learning_rate": 2.720797034560851e-07, "epoch": 0.9766215390552143, "step": 30370 }, { "loss": 0.07110047340393066, "grad_norm": 0.7690785527229309, "learning_rate": 2.6465959987782563e-07, "epoch": 0.9769431134836157, "step": 30380 }, { "loss": 0.08905300498008728, "grad_norm": 0.5113458633422852, "learning_rate": 2.5734194570820045e-07, "epoch": 0.9772646879120173, "step": 30390 }, { "loss": 0.05021012425422668, "grad_norm": 0.4673030376434326, "learning_rate": 2.5012674846399685e-07, "epoch": 0.9775862623404187, "step": 30400 }, { "loss": 0.0810099720954895, "grad_norm": 0.7750974297523499, "learning_rate": 2.43014015556764e-07, "epoch": 0.9779078367688201, "step": 30410 }, { "loss": 0.06320219039916992, "grad_norm": 0.5255265235900879, "learning_rate": 2.3600375429277954e-07, "epoch": 0.9782294111972216, "step": 30420 }, { "loss": 0.08997372388839722, "grad_norm": 0.6714896559715271, "learning_rate": 2.290959718730834e-07, "epoch": 0.978550985625623, "step": 30430 }, { "loss": 0.06944092512130737, "grad_norm": 0.5551565885543823, "learning_rate": 2.2229067539343286e-07, "epoch": 0.9788725600540245, "step": 30440 }, { "loss": 0.0534528911113739, "grad_norm": 0.5738435983657837, "learning_rate": 2.1558787184430274e-07, "epoch": 0.979194134482426, "step": 30450 }, { "loss": 0.06853411197662354, "grad_norm": 0.3571486473083496, "learning_rate": 2.089875681109188e-07, "epoch": 0.9795157089108274, "step": 30460 }, { "loss": 0.07802314162254334, "grad_norm": 0.6826774477958679, "learning_rate": 2.024897709731799e-07, "epoch": 0.9798372833392288, "step": 30470 }, { "loss": 0.07004804015159607, "grad_norm": 0.29291650652885437, "learning_rate": 1.9609448710572464e-07, "epoch": 0.9801588577676303, "step": 30480 }, { "loss": 0.06426869630813599, "grad_norm": 0.7378115653991699, "learning_rate": 1.898017230778537e-07, "epoch": 0.9804804321960318, "step": 30490 }, { "loss": 0.08708415627479553, "grad_norm": 0.951000452041626, "learning_rate": 1.8361148535357419e-07, "epoch": 0.9808020066244332, "step": 30500 }, { "loss": 0.0709414005279541, "grad_norm": 0.2556989789009094, "learning_rate": 1.7752378029157745e-07, "epoch": 0.9811235810528347, "step": 30510 }, { "loss": 0.07617509365081787, "grad_norm": 0.4664788842201233, "learning_rate": 1.7153861414525018e-07, "epoch": 0.9814451554812361, "step": 30520 }, { "loss": 0.06983622908592224, "grad_norm": 0.6659867167472839, "learning_rate": 1.6565599306260782e-07, "epoch": 0.9817667299096375, "step": 30530 }, { "loss": 0.08357095718383789, "grad_norm": 0.6239607334136963, "learning_rate": 1.5987592308636113e-07, "epoch": 0.9820883043380391, "step": 30540 }, { "loss": 0.06588581800460816, "grad_norm": 0.43989256024360657, "learning_rate": 1.5419841015388293e-07, "epoch": 0.9824098787664405, "step": 30550 }, { "loss": 0.07492035031318664, "grad_norm": 0.6496413350105286, "learning_rate": 1.4862346009716365e-07, "epoch": 0.9827314531948419, "step": 30560 }, { "loss": 0.06939973831176757, "grad_norm": 0.5781214237213135, "learning_rate": 1.4315107864287802e-07, "epoch": 0.9830530276232434, "step": 30570 }, { "loss": 0.07107747197151185, "grad_norm": 0.7525464296340942, "learning_rate": 1.3778127141232943e-07, "epoch": 0.9833746020516448, "step": 30580 }, { "loss": 0.06889268159866332, "grad_norm": 0.5934503674507141, "learning_rate": 1.3251404392143896e-07, "epoch": 0.9836961764800463, "step": 30590 }, { "loss": 0.08742887377738953, "grad_norm": 0.8118358850479126, "learning_rate": 1.2734940158078967e-07, "epoch": 0.9840177509084478, "step": 30600 }, { "loss": 0.058663409948348996, "grad_norm": 0.6564080119132996, "learning_rate": 1.222873496955379e-07, "epoch": 0.9843393253368492, "step": 30610 }, { "loss": 0.06013064980506897, "grad_norm": 0.6733872890472412, "learning_rate": 1.173278934655131e-07, "epoch": 0.9846608997652506, "step": 30620 }, { "loss": 0.07635838985443115, "grad_norm": 0.632681667804718, "learning_rate": 1.1247103798511793e-07, "epoch": 0.9849824741936521, "step": 30630 }, { "loss": 0.08892632722854614, "grad_norm": 0.9751865267753601, "learning_rate": 1.0771678824339493e-07, "epoch": 0.9853040486220536, "step": 30640 }, { "loss": 0.06905696392059327, "grad_norm": 0.4078354835510254, "learning_rate": 1.0306514912393761e-07, "epoch": 0.985625623050455, "step": 30650 }, { "loss": 0.06030969619750977, "grad_norm": 0.5289317965507507, "learning_rate": 9.851612540500155e-08, "epoch": 0.9859471974788565, "step": 30660 }, { "loss": 0.06162986159324646, "grad_norm": 0.6373181343078613, "learning_rate": 9.406972175938222e-08, "epoch": 0.9862687719072579, "step": 30670 }, { "loss": 0.06717531085014343, "grad_norm": 0.5047315359115601, "learning_rate": 8.972594275448165e-08, "epoch": 0.9865903463356593, "step": 30680 }, { "loss": 0.07401962280273437, "grad_norm": 0.7474780082702637, "learning_rate": 8.548479285229727e-08, "epoch": 0.9869119207640609, "step": 30690 }, { "loss": 0.07880670428276063, "grad_norm": 0.7355194687843323, "learning_rate": 8.134627640937754e-08, "epoch": 0.9872334951924623, "step": 30700 }, { "loss": 0.07137477397918701, "grad_norm": 0.6599552631378174, "learning_rate": 7.731039767687743e-08, "epoch": 0.9875550696208637, "step": 30710 }, { "loss": 0.10589978694915772, "grad_norm": 0.5183787941932678, "learning_rate": 7.337716080048074e-08, "epoch": 0.9878766440492652, "step": 30720 }, { "loss": 0.07905386686325074, "grad_norm": 0.7177006602287292, "learning_rate": 6.954656982047781e-08, "epoch": 0.9881982184776666, "step": 30730 }, { "loss": 0.05417203903198242, "grad_norm": 0.43448272347450256, "learning_rate": 6.581862867168776e-08, "epoch": 0.9885197929060681, "step": 30740 }, { "loss": 0.0657284438610077, "grad_norm": 0.6650566458702087, "learning_rate": 6.219334118351405e-08, "epoch": 0.9888413673344696, "step": 30750 }, { "loss": 0.06718289256095886, "grad_norm": 0.6515393257141113, "learning_rate": 5.8670711079877874e-08, "epoch": 0.989162941762871, "step": 30760 }, { "loss": 0.054975324869155885, "grad_norm": 0.3662674129009247, "learning_rate": 5.52507419792736e-08, "epoch": 0.9894845161912724, "step": 30770 }, { "loss": 0.06047446727752685, "grad_norm": 0.5469169616699219, "learning_rate": 5.193343739474665e-08, "epoch": 0.989806090619674, "step": 30780 }, { "loss": 0.05281834602355957, "grad_norm": 0.690144419670105, "learning_rate": 4.871880073388235e-08, "epoch": 0.9901276650480754, "step": 30790 }, { "loss": 0.07430117130279541, "grad_norm": 0.5861929059028625, "learning_rate": 4.560683529877263e-08, "epoch": 0.9904492394764768, "step": 30800 }, { "loss": 0.061764204502105714, "grad_norm": 0.6248287558555603, "learning_rate": 4.259754428608265e-08, "epoch": 0.9907708139048783, "step": 30810 }, { "loss": 0.06891185641288758, "grad_norm": 0.8176858425140381, "learning_rate": 3.9690930786995264e-08, "epoch": 0.9910923883332797, "step": 30820 }, { "loss": 0.06561875939369202, "grad_norm": 0.5709847211837769, "learning_rate": 3.6886997787211054e-08, "epoch": 0.9914139627616811, "step": 30830 }, { "loss": 0.07824975848197938, "grad_norm": 1.156491994857788, "learning_rate": 3.418574816697051e-08, "epoch": 0.9917355371900827, "step": 30840 }, { "loss": 0.08055688142776489, "grad_norm": 0.5808718800544739, "learning_rate": 3.158718470103183e-08, "epoch": 0.9920571116184841, "step": 30850 }, { "loss": 0.07981680631637574, "grad_norm": 0.45614954829216003, "learning_rate": 2.909131005868204e-08, "epoch": 0.9923786860468855, "step": 30860 }, { "loss": 0.06255224347114563, "grad_norm": 0.7705869674682617, "learning_rate": 2.6698126803703648e-08, "epoch": 0.992700260475287, "step": 30870 }, { "loss": 0.05859441757202148, "grad_norm": 0.5446732640266418, "learning_rate": 2.4407637394396886e-08, "epoch": 0.9930218349036884, "step": 30880 }, { "loss": 0.0605139434337616, "grad_norm": 0.8045651912689209, "learning_rate": 2.2219844183601902e-08, "epoch": 0.9933434093320899, "step": 30890 }, { "loss": 0.06261725425720215, "grad_norm": 0.4299694895744324, "learning_rate": 2.013474941864324e-08, "epoch": 0.9936649837604914, "step": 30900 }, { "loss": 0.07130892276763916, "grad_norm": 0.4894041419029236, "learning_rate": 1.815235524134096e-08, "epoch": 0.9939865581888928, "step": 30910 }, { "loss": 0.07848079800605774, "grad_norm": 0.8134866952896118, "learning_rate": 1.627266368804392e-08, "epoch": 0.9943081326172942, "step": 30920 }, { "loss": 0.07628424167633056, "grad_norm": 0.8397912383079529, "learning_rate": 1.4495676689607606e-08, "epoch": 0.9946297070456958, "step": 30930 }, { "loss": 0.07152285575866699, "grad_norm": 0.5438010096549988, "learning_rate": 1.2821396071360792e-08, "epoch": 0.9949512814740972, "step": 30940 }, { "loss": 0.0670372188091278, "grad_norm": 0.6954264640808105, "learning_rate": 1.1249823553149962e-08, "epoch": 0.9952728559024986, "step": 30950 }, { "loss": 0.06887378692626953, "grad_norm": 0.6697443127632141, "learning_rate": 9.780960749317114e-09, "epoch": 0.9955944303309001, "step": 30960 }, { "loss": 0.08694977760314941, "grad_norm": 0.4334903657436371, "learning_rate": 8.414809168688643e-09, "epoch": 0.9959160047593015, "step": 30970 }, { "loss": 0.08538188934326171, "grad_norm": 0.8963655829429626, "learning_rate": 7.151370214608654e-09, "epoch": 0.996237579187703, "step": 30980 }, { "loss": 0.04286316633224487, "grad_norm": 0.40512776374816895, "learning_rate": 5.990645184883459e-09, "epoch": 0.9965591536161045, "step": 30990 }, { "loss": 0.06450687050819397, "grad_norm": 0.2572462260723114, "learning_rate": 4.932635271825969e-09, "epoch": 0.9968807280445059, "step": 31000 }, { "loss": 0.05183506608009338, "grad_norm": 0.4054562747478485, "learning_rate": 3.977341562244608e-09, "epoch": 0.9972023024729073, "step": 31010 }, { "loss": 0.06269176006317138, "grad_norm": 0.5525853633880615, "learning_rate": 3.1247650374210957e-09, "epoch": 0.9975238769013088, "step": 31020 }, { "loss": 0.05774378776550293, "grad_norm": 0.5034267902374268, "learning_rate": 2.3749065731326627e-09, "epoch": 0.9978454513297103, "step": 31030 }, { "loss": 0.08453238010406494, "grad_norm": 0.7438255548477173, "learning_rate": 1.7277669396520425e-09, "epoch": 0.9981670257581117, "step": 31040 }, { "loss": 0.06554477214813233, "grad_norm": 0.40568795800209045, "learning_rate": 1.1833468017252714e-09, "epoch": 0.9984886001865132, "step": 31050 }, { "loss": 0.0662672460079193, "grad_norm": 0.3644300699234009, "learning_rate": 7.416467185827891e-10, "epoch": 0.9988101746149146, "step": 31060 }, { "loss": 0.0686276376247406, "grad_norm": 0.8694908618927002, "learning_rate": 4.026671439616436e-10, "epoch": 0.999131749043316, "step": 31070 }, { "loss": 0.07674849033355713, "grad_norm": 0.7753235697746277, "learning_rate": 1.6640842603887762e-10, "epoch": 0.9994533234717176, "step": 31080 }, { "eval_loss": 0.06542336940765381, "eval_runtime": 34.5075, "eval_samples_per_second": 145.649, "eval_steps_per_second": 36.427, "epoch": 0.9994533234717176, "step": 31080 }, { "loss": 0.08255035877227783, "grad_norm": 0.5661125183105469, "learning_rate": 3.2870807520346546e-11, "epoch": 0.999774897900119, "step": 31090 }, { "eval_loss": 0.06546882539987564, "eval_runtime": 34.5111, "eval_samples_per_second": 145.634, "eval_steps_per_second": 36.423, "epoch": 1.0, "step": 31097 }, { "train_runtime": 4570.1661, "train_samples_per_second": 108.867, "train_steps_per_second": 6.804, "total_flos": 1.6262875457184e+17, "train_loss": 0.09000451330583219, "epoch": 1.0, "step": 31097 } ]